Source code for bob.bio.base.extractor.stacks

from bob.extension.processors import SequentialProcessor, ParallelProcessor
from .Extractor import Extractor
from bob.io.base import HDF5File


class MultipleExtractor(Extractor):
    """Base class for SequentialExtractor and ParallelExtractor. This class is
    not meant to be used directly."""

[docs] @staticmethod def get_attributes(processors): requires_training = any(p.requires_training for p in processors) split_training_data_by_client = any(p.split_training_data_by_client for p in processors) min_extractor_file_size = min(p.min_extractor_file_size for p in processors) min_feature_file_size = min(p.min_feature_file_size for p in processors) return (requires_training, split_training_data_by_client, min_extractor_file_size, min_feature_file_size)
[docs] def get_extractor_groups(self): groups = ['E_{}'.format(i + 1) for i in range(len(self.processors))] return groups
[docs] def train_one(self, e, training_data, extractor_file, apply=False): """Trains one extractor and optionally applies the extractor on the training data after training. Parameters ---------- e : :any:`Extractor` The extractor to train. The extractor should be able to save itself in an opened hdf5 file. training_data : [object] or [[object]] The data to be used for training. extractor_file : :any:`bob.io.base.HDF5File` The opened hdf5 file to save the trained extractor inside. apply : :obj:`bool`, optional If ``True``, the extractor is applied to the training data after it is trained and the data is returned. Returns ------- None or [object] or [[object]] Returns ``None`` if ``apply`` is ``False``. Otherwise, returns the transformed ``training_data``. """ if not e.requires_training: # do nothing since e does not require training! pass # if any of the extractors require splitting the data, the # split_training_data_by_client is True. elif e.split_training_data_by_client: e.train(training_data, extractor_file) # when no extractor needs splitting elif not self.split_training_data_by_client: e.train(training_data, extractor_file) # when e here wants it flat but the data is split else: # make training_data flat flat_training_data = [d for datalist in training_data for d in datalist] e.train(flat_training_data, extractor_file) if not apply: return # prepare the training data for the next extractor if self.split_training_data_by_client: training_data = [[e(d) for d in datalist] for datalist in training_data] else: training_data = [e(d) for d in training_data] return training_data
[docs] def load(self, extractor_file): if not self.requires_training: return with HDF5File(extractor_file) as f: groups = self.get_extractor_groups() for e, group in zip(self.processors, groups): f.cd(group) e.load(f) f.cd('..')
class SequentialExtractor(SequentialProcessor, MultipleExtractor): """A helper class which takes several extractors and applies them one by one sequentially. Attributes ---------- processors : list A list of extractors to apply. Examples -------- You can use this class to apply a chain of extractors on your data. For example: >>> import numpy as np >>> from functools import partial >>> from bob.bio.base.extractor import SequentialExtractor, CallableExtractor >>> raw_data = np.array([[1, 2, 3], [1, 2, 3]]) >>> seq_extractor = SequentialExtractor( ... [CallableExtractor(f) for f in ... [np.cast['float64'], lambda x: x / 2, partial(np.mean, axis=1)]]) >>> np.allclose(seq_extractor(raw_data),[ 1., 1.]) True >>> np.all(seq_extractor(raw_data) == ... np.mean(np.cast['float64'](raw_data) / 2, axis=1)) True """ def __init__(self, processors, **kwargs): (requires_training, split_training_data_by_client, min_extractor_file_size, min_feature_file_size) = \ self.get_attributes(processors) super(SequentialExtractor, self).__init__( processors=processors, requires_training=requires_training, split_training_data_by_client=split_training_data_by_client, min_extractor_file_size=min_extractor_file_size, min_feature_file_size=min_feature_file_size, **kwargs)
[docs] def train(self, training_data, extractor_file): with HDF5File(extractor_file, 'w') as f: groups = self.get_extractor_groups() for i, (e, group) in enumerate(zip(self.processors, groups)): apply = i != len(self.processors) - 1 f.create_group(group) f.cd(group) training_data = self.train_one(e, training_data, f, apply=apply) f.cd('..')
[docs] def read_feature(self, feature_file): return self.processors[-1].read_feature(feature_file)
[docs] def write_feature(self, feature, feature_file): self.processors[-1].write_feature(feature, feature_file)
class ParallelExtractor(ParallelProcessor, MultipleExtractor): """A helper class which takes several extractors and applies them on each processor separately and yields their outputs one by one. Attributes ---------- processors : list A list of extractors to apply. Examples -------- You can use this class to apply several extractors on your data and get all the results back. For example: >>> import numpy as np >>> from functools import partial >>> from bob.bio.base.extractor import ParallelExtractor, CallableExtractor >>> raw_data = np.array([[1, 2, 3], [1, 2, 3]]) >>> parallel_extractor = ParallelExtractor( ... [CallableExtractor(f) for f in ... [np.cast['float64'], lambda x: x / 2.0]]) >>> np.allclose(list(parallel_extractor(raw_data)),[[[ 1., 2., 3.],[ 1., 2., 3.]], [[ 0.5, 1. , 1.5],[ 0.5, 1. , 1.5]]]) True The data may be further processed using a :any:`SequentialExtractor`: >>> from bob.bio.base.extractor import SequentialExtractor >>> total_extractor = SequentialExtractor( ... [parallel_extractor, CallableExtractor(list), ... CallableExtractor(partial(np.concatenate, axis=1))]) >>> np.allclose(total_extractor(raw_data),[[ 1. , 2. , 3. , 0.5, 1. , 1.5],[ 1. , 2. , 3. , 0.5, 1. , 1.5]]) True """ def __init__(self, processors, **kwargs): (requires_training, split_training_data_by_client, min_extractor_file_size, min_feature_file_size) = self.get_attributes( processors) super(ParallelExtractor, self).__init__( processors=processors, requires_training=requires_training, split_training_data_by_client=split_training_data_by_client, min_extractor_file_size=min_extractor_file_size, min_feature_file_size=min_feature_file_size, **kwargs)
[docs] def train(self, training_data, extractor_file): with HDF5File(extractor_file, 'w') as f: groups = self.get_extractor_groups() for e, group in zip(self.processors, groups): f.create_group(group) f.cd(group) self.train_one(e, training_data, f, apply=False) f.cd('..')
class CallableExtractor(Extractor): """A simple extractor that takes a callable and applies that callable to the input. Attributes ---------- callable : object Anything that is callable. It will be used as an extractor in bob.bio.base. read_feature : object A callable object with the signature of ``feature = read_feature(feature_file)``. If not provided, the default implementation handles numpy arrays. write_feature : object A callable object with the signature of ``write_feature(feature, feature_file)``. If not provided, the default implementation handles numpy arrays. Examples -------- You can take any function like ``numpy.cast['float32']`` to cast your data to float32 for example. This is useful when you want to stack several extractors using the :any:`SequentialExtractor` and :any:`ParallelExtractor` classes. """ def __init__(self, callable, write_feature=None, read_feature=None, **kwargs): super(CallableExtractor, self).__init__(**kwargs) self.callable = callable if write_feature is not None: self.write_feature = write_feature if read_feature is not None: self.read_feature = read_feature def __call__(self, data): return self.callable(data)