Implements the Linear and Mel Frequency Cepstal Coefficients (MFCC and LFCC)
Algorithms have at least one input and one output. All algorithm endpoints are organized in groups. Groups are used by the platform to indicate which inputs and outputs are synchronized together. The first group is automatically synchronized with the channel defined by the block in which the algorithm is deployed.
Endpoint Name | Data Format | Nature |
---|---|---|
speech | system/array_1d_floats/1 | Input |
vad | system/array_1d_integers/1 | Input |
features | system/array_2d_floats/1 | Output |
Parameters allow users to change the configuration of an algorithm when scheduling an experiment
Name | Description | Type | Default | Range/Choices |
---|---|---|---|---|
rate | float64 | 16000.0 |
xxxxxxxxxx
import bob
import numpy
def normalize_std_array(vector):
"""Applies a unit mean and variance normalization to an arrayset"""
# Initializes variables
length = 1
n_samples = len(vector)
mean = numpy.ndarray((length,), 'float64')
std = numpy.ndarray((length,), 'float64')
mean.fill(0)
std.fill(0)
# Computes mean and variance
for array in vector:
x = array.astype('float64')
mean += x
std += (x ** 2)
mean /= n_samples
std /= n_samples
std -= (mean ** 2)
std = std ** 0.5
arrayset = numpy.ndarray(shape=(n_samples,mean.shape[0]), dtype=numpy.float64)
for i in range (0, n_samples):
arrayset[i,:] = (vector[i]-mean) / std
return arrayset
class Algorithm:
def __init__(self):
self.win_length_ms = 20
self.win_shift_ms = 10
self.n_filters = 24
self.n_ceps = 19
self.f_min = 0
self.f_max = 4000
self.delta_win = 2
self.pre_emphasis_coef = 0.95
self.dct_norm = False
self.mel_scale = True
self.withEnergy = True
self.withDelta = True
self.withDeltaDelta = True
#TODO: find a way to compute this automatically
self.rate = 16000
self.features_mask = numpy.arange(0,60)
self.normalizeFeatures = True
def setup(self, parameters):
self.rate = parameters.get('rate', self.rate)
wl = self.win_length_ms
ws = self.win_shift_ms
nf = self.n_filters
nc = self.n_ceps
f_min = self.f_min
f_max = self.f_max
dw = self.delta_win
pre = self.pre_emphasis_coef
rate = self.rate
normalizeFeatures = self.normalizeFeatures
self.extractor = bob.ap.Ceps(rate, wl, ws, nf, nc, f_min, f_max, dw, pre)
self.extractor.dct_norm = self.dct_norm
self.extractor.mel_scale = self.mel_scale
self.extractor.with_energy = self.withEnergy
self.extractor.with_delta = self.withDelta
self.extractor.with_delta_delta = self.withDeltaDelta
return True
def normalize_features(self, params):
#########################
## Initialisation part ##
#########################
normalized_vector = [ [ 0 for i in range(params.shape[1]) ] for j in range(params.shape[0]) ]
for index in range(params.shape[1]):
vector = numpy.array([row[index] for row in params])
n_samples = len(vector)
norm_vector = normalize_std_array(vector)
for i in range(n_samples):
normalized_vector[i][index]=numpy.asscalar(norm_vector[i])
data = numpy.array(normalized_vector)
return data
def process(self, inputs, outputs):
float_wav = inputs["speech"].data.value
labels = inputs["vad"].data.value
cepstral_features = self.extractor(float_wav)
features_mask = self.features_mask
filtered_features = numpy.ndarray(shape=((labels == 1).sum(),len(features_mask)), dtype=numpy.float64)
i=0
cur_i=0
for row in cepstral_features:
if i < len(labels):
if labels[i]==1:
for k in range(len(features_mask)):
filtered_features[cur_i,k] = row[features_mask[k]]
cur_i = cur_i + 1
i = i+1
else:
if labels[-1]==1:
if cur_i == cepstral_features.shape[0]:
for k in range(len(features_mask)):
filtered_features[cur_i,k] = row[features_mask[k]]
cur_i = cur_i + 1
i = i+1
if self.normalizeFeatures:
normalized_features = self.normalize_features(filtered_features)
else:
normalized_features = filtered_features
if normalized_features.shape[0] == 0:
print("Warning: no speech found in: %s" % input_file)
# But do not keep it empty!!! This avoids errors in next steps
normalized_features=numpy.array([numpy.zeros(len(features_mask))])
outputs["features"].write({
'value': numpy.vstack(normalized_features)
})
return True
The code for this algorithm in Python
The ruler at 80 columns indicate suggested POSIX line breaks (for readability).
The editor will automatically enlarge to accomodate the entirety of your input
Use keyboard shortcuts for search/replace and faster editing. For example, use Ctrl-F (PC) or Cmd-F (Mac) to search through this box
This algorithm implements the MFCC and LFCC feature extraction. It relies on the Bob library.
The following parameters are set inside the algorithm and can be modified by the user: