Source code for bob.learn.boosting.Boosting

from ._library import BoostedMachine
import numpy
import scipy.optimize
import logging
logger = logging.getLogger('bob.learn.boosting')


[docs]class Boosting:
  """ The class to boost the features from  a set of training samples.

  It iteratively adds new weak models to assemble a strong classifier.
  In each round of iteration a weak machine is learned by optimizing a differentiable function.

  **Constructor Documentation**

  Keyword parameters

    weak_trainer : :py:class:`bob.learn.boosting.LUTTrainer` or :py:class:`bob.learn.boosting.StumpTrainer`
      The class to train weak machines.

    loss_function : a class derived from :py:class:`bob.learn.boosting.LossFunction`
      The function to define the weights for the weak machines.

  """


  def __init__(self, weak_trainer, loss_function):
    self.m_trainer = weak_trainer
    self.m_loss_function = loss_function


[docs]  def get_loss_function(self):
    """Returns the loss function this trainer will use."""
    return self.m_loss_function


[docs]  def train(self, training_features, training_targets, number_of_rounds = 20, boosted_machine = None):
    """The function to train a boosting machine.

    The function boosts the training features and returns a strong classifier as a weighted combination of weak classifiers.

    Keyword parameters:

    training_features : uint16 <#samples, #features> or float <#samples, #features>)
      Features extracted from the training samples.

    training_targets : float <#samples, #outputs>
      The values that the boosted classifier should reach for the given samples.

    number_of_rounds : int
      The number of rounds of boosting, i.e., the number of weak classifiers to select.

    boosted_machine :py:class:`bob.learn.boosting.BoostedMachine` or None
      The machine to add the weak machines to. If not given, a new machine is created.

    Returns : :py:class:`bob.learn.boosting.BoostedMachine`
      The boosted machine that is combination of the weak classifiers.
    """

    # Initializations
    if(len(training_targets.shape) == 1):
      training_targets = training_targets[:,numpy.newaxis]

    number_of_samples = training_features.shape[0]
    number_of_outputs = training_targets.shape[1]

    strong_predicted_scores = numpy.zeros((number_of_samples, number_of_outputs))
    weak_predicted_scores = numpy.ndarray((number_of_samples, number_of_outputs))

    if boosted_machine is not None:
      boosted_machine(training_features, strong_predicted_scores)
    else:
      boosted_machine = BoostedMachine()

    # Start boosting iterations for num_rnds rounds
    logger.info("Starting %d rounds of boosting" % number_of_rounds)
    for round in range(number_of_rounds):

      logger.debug("Starting round %d" % (round+1))

      # Compute the gradient of the loss function, l'(y,f(x)) using loss_class
      loss_gradient = self.m_loss_function.loss_gradient(training_targets, strong_predicted_scores)

      # Select the best weak machine for current round of boosting
      weak_machine = self.m_trainer.train(training_features, loss_gradient)

      # Compute the classification scores of the samples based only on the current round weak classifier (g_r)
      weak_machine(training_features, weak_predicted_scores)

      # Perform L-BFGS minimization and compute the scale (alpha_r) for current weak machine
      alpha, _, flags = scipy.optimize.fmin_l_bfgs_b(
          func   = self.m_loss_function.loss_sum,
          x0     = numpy.zeros(number_of_outputs),
          fprime = self.m_loss_function.loss_gradient_sum,
          args   = (training_targets, strong_predicted_scores, weak_predicted_scores),
#          disp = 1
      )
      # check output of L-BFGS
      if flags['warnflag'] != 0:
        msg = "too many function evaluations or too many iterations" if flags['warnflag'] == 1 else flags['task']
        if (alpha == numpy.zeros(number_of_outputs)).all():
          logger.warn("L-BFGS returned zero weights with error '%d': %s" % (flags['warnflag'], msg))
          return boosted_machine
        else:
          logger.warn("L-BFGS returned warning '%d': %s" % (flags['warnflag'], msg))


      # Update the prediction score after adding the score from the current weak classifier f(x) = f(x) + alpha_r*g_r
      strong_predicted_scores += alpha * weak_predicted_scores

      # Add the current weak machine into the boosting machine
      boosted_machine.add_weak_machine(weak_machine, alpha)

      logger.info("Finished round %d / %d" % (round+1, number_of_rounds))

    return boosted_machine