BEAT - tpereira/ubm_training_nomalize

This algorithm is a legacy one. The API has changed since its implementation. New versions and forks will need to be updated.

Endpoint Groups 1

Algorithms have at least one input and one output. All algorithm endpoints are organized in groups. Groups are used by the platform to indicate which inputs and outputs are synchronized together. The first group is automatically synchronized with the channel defined by the block in which the algorithm is deployed.

Unnamed group

Endpoint Name	Data Format	Nature
features	system/array_2d_floats/1	Input
ubm	tutorial/gmm/1	Output

Parameters 2

Parameters allow users to change the configuration of an algorithm when scheduling an experiment

Name	Description	Type	Default	Range/Choices
number-of-gaussians		uint32	100
maximum-number-of-iterations		uint32	10

import bob
import numpy
from bob.machine import GMMMachine

class Algorithm:

def __init__(self):
        self.number_of_gaussians = 100
        self.max_iterations = 10
        self.data = []

def setup(self, parameters):
        self.number_of_gaussians = parameters.get('number-of-gaussians',
                                                  self.number_of_gaussians)

self.max_iterations = parameters.get('maximum-number-of-iterations',
                                             self.max_iterations)

return True
      
    def __normalize_std_array__(self, array):
      """Applies a unit variance normalization to an array"""

# Initializes variables
      n_samples = array.shape[0]
      length = array.shape[1]
      mean = numpy.ndarray((length,), 'float64')
      std = numpy.ndarray((length,), 'float64')

mean.fill(0)
      std.fill(0)

# Computes mean and variance
      for k in range(n_samples):
        x = array[k,:].astype('float64')
        mean += x
        std += (x ** 2)

mean /= n_samples
      std /= n_samples
      std -= (mean ** 2)
      std = std ** 0.5 # sqrt(std)
  
      ar_std_list = []
      for k in range(n_samples):
        ar_std_list.append(array[k,:].astype('float64') / std)
      ar_std = numpy.vstack(ar_std_list)

return (ar_std,std)

def __multiply_vectors_by_factors__(self, matrix, vector):
      """Used to unnormalize some data"""
      for i in range(0, matrix.shape[0]):
        for j in range(0, matrix.shape[1]):
          matrix[i, j] *= vector[j]

def process(self, inputs, outputs):
        self.data.append(inputs["features"].data.value)

if not(inputs.hasMoreData()):
            # create array set used for training
            training_set = numpy.vstack(self.data)
            input_size = training_set.shape[1]

# create the KMeans and UBM machine
            kmeans = bob.machine.KMeansMachine(int(self.number_of_gaussians), input_size)
            ubm = bob.machine.GMMMachine(int(self.number_of_gaussians), input_size)

# create the KMeansTrainer
            kmeans_trainer = bob.trainer.KMeansTrainer()
            kmeans_trainer.initialization_method = bob.trainer.KMeansTrainer.RANDOM_NO_DUPLICATE
            kmeans_trainer.max_iterations = int(self.max_iterations)

# train using the KMeansTrainer            
            normalized_data, std_array = self.__normalize_std_array__(training_set) #normlize before training
            kmeans_trainer.train(kmeans, normalized_data)

(variances, weights) = kmeans.get_variances_and_weights_for_each_cluster(normalized_data)
            means = kmeans.means
           
            #undoing the normalization
            self.__multiply_vectors_by_factors__(means, std_array)
            self.__multiply_vectors_by_factors__(variances, std_array ** 2)

# initialize the GMM
            ubm.means = means
            ubm.variances = variances
            ubm.weights = weights

# train the GMM
            trainer = bob.trainer.ML_GMMTrainer()
            trainer.max_iterations = int(self.max_iterations)
            trainer.train(ubm, training_set)

# outputs data
            outputs["ubm"].write({
                'weights':              ubm.weights,
                'means':                ubm.means,
                'variances':            ubm.variances,
                'variance_thresholds':  ubm.variance_thresholds,
            })

return True

xxxxxxxxxx
 
import bob
import numpy
from bob.machine import GMMMachine
​
​
​
class Algorithm:
​
    def __init__(self):
        self.number_of_gaussians = 100
        self.max_iterations = 10
        self.data = []
​
​
    def setup(self, parameters):
        self.number_of_gaussians = parameters.get('number-of-gaussians',
                                                  self.number_of_gaussians)
​
        self.max_iterations = parameters.get('maximum-number-of-iterations',
                                             self.max_iterations)
​
        return True
      
    def __normalize_std_array__(self, array):
      """Applies a unit variance normalization to an array"""
​
      # Initializes variables
      n_samples = array.shape[0]
      length = array.shape[1]
      mean = numpy.ndarray((length,), 'float64')
      std = numpy.ndarray((length,), 'float64')
​
      mean.fill(0)
      std.fill(0)
​
      # Computes mean and variance
      for k in range(n_samples):
        x = array[k,:].astype('float64')
        mean += x
        std += (x ** 2)
​
      mean /= n_samples
      std /= n_samples
      std -= (mean ** 2)
      std = std ** 0.5 # sqrt(std)
  
      ar_std_list = []
      for k in range(n_samples):
        ar_std_list.append(array[k,:].astype('float64') / std)
      ar_std = numpy.vstack(ar_std_list)
​
      return (ar_std,std)
​
​
    def __multiply_vectors_by_factors__(self, matrix, vector):
      """Used to unnormalize some data"""
      for i in range(0, matrix.shape[0]):
        for j in range(0, matrix.shape[1]):
          matrix[i, j] *= vector[j]      
​
​
    def process(self, inputs, outputs):
        self.data.append(inputs["features"].data.value)
​
        if not(inputs.hasMoreData()):
            # create array set used for training
            training_set = numpy.vstack(self.data)
            input_size = training_set.shape[1]            
​
            # create the KMeans and UBM machine
            kmeans = bob.machine.KMeansMachine(int(self.number_of_gaussians), input_size)
            ubm = bob.machine.GMMMachine(int(self.number_of_gaussians), input_size)
​
            # create the KMeansTrainer
            kmeans_trainer = bob.trainer.KMeansTrainer()
            kmeans_trainer.initialization_method = bob.trainer.KMeansTrainer.RANDOM_NO_DUPLICATE
            kmeans_trainer.max_iterations = int(self.max_iterations)
​
            # train using the KMeansTrainer            
            normalized_data, std_array = self.__normalize_std_array__(training_set) #normlize before training
            kmeans_trainer.train(kmeans, normalized_data)
​
            (variances, weights) = kmeans.get_variances_and_weights_for_each_cluster(normalized_data)
            means = kmeans.means
           
            #undoing the normalization
            self.__multiply_vectors_by_factors__(means, std_array)
            self.__multiply_vectors_by_factors__(variances, std_array ** 2)
            
​
            # initialize the GMM
            ubm.means = means
            ubm.variances = variances
            ubm.weights = weights
​
            # train the GMM
            trainer = bob.trainer.ML_GMMTrainer()
            trainer.max_iterations = int(self.max_iterations)
            trainer.train(ubm, training_set)
​
            # outputs data
            outputs["ubm"].write({
                'weights':              ubm.weights,
                'means':                ubm.means,
                'variances':            ubm.variances,
                'variance_thresholds':  ubm.variance_thresholds,
            })
​
        return True
​

The code for this algorithm in Python
The ruler at 80 columns indicate suggested POSIX line breaks (for readability).
The editor will automatically enlarge to accomodate the entirety of your input
Use keyboard shortcuts for search/replace and faster editing. For example, use Ctrl-F (PC) or Cmd-F (Mac) to search through this box

For a Gaussian Mixture Models (GMM), this algorithm implements the Universal Background Model (UBM) training described in [Reynolds2000].

First, this algorithm estimates the means, diagonal covariance matrix and the weights of each gaussian component using the KMeans clustering. After, only the means are re-estimated using the Maximum Likelihood (ML) estimator.

This version of the UBM training normalizes the input data before the kmeans training.

This algorithm relies on the Bob library.

The input, features, is a training set of floating point vectors as a two-dimensional array of floats (64 bits), the number of rows corresponding to the number of training samples, and the number of columns to the dimensionality of the training samples. The output, ubm, is the GMM trained using the ML estimator.

[Reynolds2000]

Reynolds, T. Quatieri, R. Dunn: Speaker verification using adapted Gaussian mixture models. Digital signal processing 10.1 (2000): 19-41.

No experiments are using this algorithm.

Scientific Python 2.7 (0.0.3) 1

This table shows the number of times this algorithm has been successfully run using the given environment. Note this does not provide sufficient information to evaluate if the algorithm will run when submitted to different conditions.

algorithms tpereira ubm_training_nomalize_kmeans 1

Endpoint Groups 1

Unnamed group

Parameters 2

algorithms

tpereira

ubm_training_nomalize_kmeans

1