Source code for bob.learn.boosting.Boosting

from ._library import BoostedMachine
import numpy
import scipy.optimize
import logging
logger = logging.getLogger('bob.learn.boosting')


[docs]class Boosting: """ The class to boost the features from a set of training samples. It iteratively adds new weak models to assemble a strong classifier. In each round of iteration a weak machine is learned by optimizing a differentiable function. **Constructor Documentation** Keyword parameters weak_trainer : :py:class:`bob.learn.boosting.LUTTrainer` or :py:class:`bob.learn.boosting.StumpTrainer` The class to train weak machines. loss_function : a class derived from :py:class:`bob.learn.boosting.LossFunction` The function to define the weights for the weak machines. """ def __init__(self, weak_trainer, loss_function): self.m_trainer = weak_trainer self.m_loss_function = loss_function
[docs] def get_loss_function(self): """Returns the loss function this trainer will use.""" return self.m_loss_function
[docs] def train(self, training_features, training_targets, number_of_rounds = 20, boosted_machine = None): """The function to train a boosting machine. The function boosts the training features and returns a strong classifier as a weighted combination of weak classifiers. Keyword parameters: training_features : uint16 <#samples, #features> or float <#samples, #features>) Features extracted from the training samples. training_targets : float <#samples, #outputs> The values that the boosted classifier should reach for the given samples. number_of_rounds : int The number of rounds of boosting, i.e., the number of weak classifiers to select. boosted_machine :py:class:`bob.learn.boosting.BoostedMachine` or None The machine to add the weak machines to. If not given, a new machine is created. Returns : :py:class:`bob.learn.boosting.BoostedMachine` The boosted machine that is combination of the weak classifiers. """ # Initializations if(len(training_targets.shape) == 1): training_targets = training_targets[:,numpy.newaxis] number_of_samples = training_features.shape[0] number_of_outputs = training_targets.shape[1] strong_predicted_scores = numpy.zeros((number_of_samples, number_of_outputs)) weak_predicted_scores = numpy.ndarray((number_of_samples, number_of_outputs)) if boosted_machine is not None: boosted_machine(training_features, strong_predicted_scores) else: boosted_machine = BoostedMachine() # Start boosting iterations for num_rnds rounds logger.info("Starting %d rounds of boosting" % number_of_rounds) for round in range(number_of_rounds): logger.debug("Starting round %d" % (round+1)) # Compute the gradient of the loss function, l'(y,f(x)) using loss_class loss_gradient = self.m_loss_function.loss_gradient(training_targets, strong_predicted_scores) # Select the best weak machine for current round of boosting weak_machine = self.m_trainer.train(training_features, loss_gradient) # Compute the classification scores of the samples based only on the current round weak classifier (g_r) weak_machine(training_features, weak_predicted_scores) # Perform L-BFGS minimization and compute the scale (alpha_r) for current weak machine alpha, _, flags = scipy.optimize.fmin_l_bfgs_b( func = self.m_loss_function.loss_sum, x0 = numpy.zeros(number_of_outputs), fprime = self.m_loss_function.loss_gradient_sum, args = (training_targets, strong_predicted_scores, weak_predicted_scores), # disp = 1 ) # check output of L-BFGS if flags['warnflag'] != 0: msg = "too many function evaluations or too many iterations" if flags['warnflag'] == 1 else flags['task'] if (alpha == numpy.zeros(number_of_outputs)).all(): logger.warn("L-BFGS returned zero weights with error '%d': %s" % (flags['warnflag'], msg)) return boosted_machine else: logger.warn("L-BFGS returned warning '%d': %s" % (flags['warnflag'], msg)) # Update the prediction score after adding the score from the current weak classifier f(x) = f(x) + alpha_r*g_r strong_predicted_scores += alpha * weak_predicted_scores # Add the current weak machine into the boosting machine boosted_machine.add_weak_machine(weak_machine, alpha) logger.info("Finished round %d / %d" % (round+1, number_of_rounds)) return boosted_machine