BEAT - tutorial/pca

This algorithm is a legacy one. The API has changed since its implementation. New versions and forks will need to be updated.

Endpoint Groups 1

Algorithms have at least one input and one output. All algorithm endpoints are organized in groups. Groups are used by the platform to indicate which inputs and outputs are synchronized together. The first group is automatically synchronized with the channel defined by the block in which the algorithm is deployed.

Group: main

Endpoint Name	Data Format	Nature
image	system/array_2d_uint8/1	Input
client_id	system/uint64/1	Input
subspace_lda	tutorial/linear_machine/1	Output
subspace_pca	tutorial/linear_machine/1	Output

Parameters 2

Parameters allow users to change the configuration of an algorithm when scheduling an experiment

Name	Description	Type	Default	Range/Choices
number-of-pca-components		uint32	5
number-of-lda-components		uint32	2

import bob
import numpy

class Algorithm:

def __init__(self):
        self.number_of_pca_components = 5
        self.number_of_lda_components = 2
        self.data = {}

def setup(self, parameters):
        self.number_of_pca_components = parameters.get('number-of-pca-components',
                                                   self.number_of_pca_components)
        self.number_of_lda_components = parameters.get('number-of-lda-components',
                                                   self.number_of_lda_components)
        return True

def _project_data_for_lda(self, machine):
        tdata = []
        for client_id, client_files in self.data.iteritems():
            # at least two files per client are required!
            if len(client_files) < 2:
                # "Skipping client since the number of client files is only %d" %len(client_files)
                continue
            tdata.append(numpy.vstack([machine(feature.astype('float')) for feature in client_files]))
        return tdata

def _perform_pca(self, pca_machine, training_set):
        """Perform PCA on data"""
        data = []
        for client_features in training_set:
            data.append(numpy.vstack([machine(feature) for feature in client_features]))
        return data

def process(self, inputs, outputs):
        image = inputs["image"].data.value.flatten()
        c_id = inputs["client_id"].data.value
        if c_id in self.data.keys(): self.data[c_id].append(image)
        else: self.data[c_id] = [image]

if not(inputs.hasMoreData()):
            # PCA
            data_pca = numpy.vstack([self.data[c_id] for c_id in self.data.keys()])
            trainer = bob.trainer.PCATrainer()
            data_pca = data_pca.astype('float')
            pca_machine, eigen_values = trainer.train(data_pca)
            del data_pca # Reduce memory usage
            pca_machine.resize(pca_machine.shape[0], int(self.number_of_pca_components))
            # outputs data
            outputs["subspace_pca"].write({
                'input_subtract': pca_machine.input_subtract,
                'input_divide':   pca_machine.input_divide,
                'weights':        pca_machine.weights,
                'biases':         pca_machine.biases,
            })

# LDA
            data_lda = self._project_data_for_lda(pca_machine)
            lda_trainer = bob.trainer.FisherLDATrainer()
            lda_machine, lda_variances = lda_trainer.train(data_lda)
            del data_lda # Reduce memory usage
            lda_machine.resize(lda_machine.shape[0], int(self.number_of_lda_components))
            # outputs data
            outputs["subspace_lda"].write({
                'input_subtract': lda_machine.input_subtract,
                'input_divide':   lda_machine.input_divide,
                'weights':        lda_machine.weights,
                'biases':         lda_machine.biases,
            })

return True

xxxxxxxxxx
 
import bob
import numpy
​
​
class Algorithm:
​
    def __init__(self):
        self.number_of_pca_components = 5
        self.number_of_lda_components = 2
        self.data = {}
​
​
    def setup(self, parameters):
        self.number_of_pca_components = parameters.get('number-of-pca-components',
                                                   self.number_of_pca_components)
        self.number_of_lda_components = parameters.get('number-of-lda-components',
                                                   self.number_of_lda_components)
        return True
​
    def _project_data_for_lda(self, machine):
        tdata = []
        for client_id, client_files in self.data.iteritems():
            # at least two files per client are required!
            if len(client_files) < 2:
                # "Skipping client since the number of client files is only %d" %len(client_files)
                continue
            tdata.append(numpy.vstack([machine(feature.astype('float')) for feature in client_files]))
        return tdata
​
    def _perform_pca(self, pca_machine, training_set):
        """Perform PCA on data"""
        data = []
        for client_features in training_set:
            data.append(numpy.vstack([machine(feature) for feature in client_features]))
        return data
​
    def process(self, inputs, outputs):
        image = inputs["image"].data.value.flatten()
        c_id = inputs["client_id"].data.value
        if c_id in self.data.keys(): self.data[c_id].append(image)
        else: self.data[c_id] = [image]
​
        if not(inputs.hasMoreData()):
            # PCA
            data_pca = numpy.vstack([self.data[c_id] for c_id in self.data.keys()])
            trainer = bob.trainer.PCATrainer()
            data_pca = data_pca.astype('float')
            pca_machine, eigen_values = trainer.train(data_pca)
            del data_pca # Reduce memory usage
            pca_machine.resize(pca_machine.shape[0], int(self.number_of_pca_components))
            # outputs data
            outputs["subspace_pca"].write({
                'input_subtract': pca_machine.input_subtract,
                'input_divide':   pca_machine.input_divide,
                'weights':        pca_machine.weights,
                'biases':         pca_machine.biases,
            })
​
            # LDA
            data_lda = self._project_data_for_lda(pca_machine)
            lda_trainer = bob.trainer.FisherLDATrainer()
            lda_machine, lda_variances = lda_trainer.train(data_lda)
            del data_lda # Reduce memory usage
            lda_machine.resize(lda_machine.shape[0], int(self.number_of_lda_components))
            # outputs data
            outputs["subspace_lda"].write({
                'input_subtract': lda_machine.input_subtract,
                'input_divide':   lda_machine.input_divide,
                'weights':        lda_machine.weights,
                'biases':         lda_machine.biases,
            })
​
​
        return True

The code for this algorithm in Python
The ruler at 80 columns indicate suggested POSIX line breaks (for readability).
The editor will automatically enlarge to accomodate the entirety of your input
Use keyboard shortcuts for search/replace and faster editing. For example, use Ctrl-F (PC) or Cmd-F (Mac) to search through this box

This algorithm performs principal component analysis (PCA) [PCA] on a given dataset using the singular value decomposition (SVD) [SVD], followed by linear discriminant analysis (LDA) [LDA].

This implementation relies on the Bob library.

The inputs are:

image: A training set of floating point vectors as a two-dimensional array of floats (64 bits), the number of rows corresponding to the number of training samples, and the number of columns to the dimensionality of the training samples.
client_id: Client (class/subject) identifier as an unsigned 64 bits integer.

The outputs are subspace_pca and subspace_lda for the PCA and LDA transformation, respectively.

[SVD]

http://en.wikipedia.org/wiki/Singular_value_decomposition

[PCA]

http://en.wikipedia.org/wiki/Principal_component_analysis

[LDA]

http://en.wikipedia.org/wiki/Linear_discriminant_analysis

No experiments are using this algorithm.

Scientific Python 2.7 (0.0.4) 3

This table shows the number of times this algorithm has been successfully run using the given environment. Note this does not provide sufficient information to evaluate if the algorithm will run when submitted to different conditions.

algorithms tutorial pca_lda 5

Endpoint Groups 1

Group: main

Parameters 2

algorithms

tutorial

pca_lda

5