This algorithm is a legacy one. The API has changed since its implementation. New versions and forks will need to be updated.

Algorithms have at least one input and one output. All algorithm endpoints are organized in groups. Groups are used by the platform to indicate which inputs and outputs are synchronized together. The first group is automatically synchronized with the channel defined by the block in which the algorithm is deployed.

Group: group0

Endpoint Name Data Format Nature
features system/array_2d_floats/1 Input
processor_uem anthony_larcher/uemranges/1 Input
processor_file_info anthony_larcher/file_info_sd/1 Input
adapted_speakers anthony_larcher/speakers/1 Output

Group: group_train

Endpoint Name Data Format Nature
model anthony_larcher/array_1d_uint8/1 Input
train_features system/array_2d_floats/1 Input
train_uem anthony_larcher/uemranges/1 Input
train_file_info anthony_larcher/file_info_sd/1 Input
train_speakers anthony_larcher/speakers/1 Input
xxxxxxxxxx
495
 
1
#!/usr/bin/env python
2
# vim: set fileencoding=utf-8 :
3
4
###################################################################################
5
#                                                                                 #
6
# Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/               #
7
# Contact: beat.support@idiap.ch                                                  #
8
#                                                                                 #
9
# Redistribution and use in source and binary forms, with or without              #
10
# modification, are permitted provided that the following conditions are met:     #
11
#                                                                                 #
12
# 1. Redistributions of source code must retain the above copyright notice, this  #
13
# list of conditions and the following disclaimer.                                #
14
#                                                                                 #
15
# 2. Redistributions in binary form must reproduce the above copyright notice,    #
16
# this list of conditions and the following disclaimer in the documentation       #
17
# and/or other materials provided with the distribution.                          #
18
#                                                                                 #
19
# 3. Neither the name of the copyright holder nor the names of its contributors   #
20
# may be used to endorse or promote products derived from this software without   #
21
# specific prior written permission.                                              #
22
#                                                                                 #
23
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND #
24
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED   #
25
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE          #
26
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE    #
27
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL      #
28
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR      #
29
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER      #
30
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,   #
31
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE   #
32
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.            #
33
#                                                                                 #
34
###################################################################################
35
import numpy as np
36
import pickle
37
import sidekit
38
import s4d
39
import struct
40
import copy
41
42
43
44
class H5Dict(dict):
45
    """ 
46
    A dictionary sub-class used to exchange features between
47
    algorithms.
48
    """
49
50
    def __setitem__(self, key, value):
51
        super().__setitem__(key, value)
52
53
    def __getitem__(self, key):
54
        return super().__getitem__(key)
55
56
    def __iter__(self):
57
        return iter(["cep",])
58
59
60
class AlliesExtractor(sidekit.FeaturesExtractor):
61
    """
62
    A FeaturesExtractor is used to access cepstral coefficients in SIDEKIT style 
63
    """
64
65
    def __init__(self, dataloader, name_dict, key="features"):
66
        super().__init__()
67
        self.dataloader = dataloader
68
        self.name_dict = name_dict
69
        self.audio_filename_structure = "{}"
70
        self.key = key
71
72
    def extract(self, show, channel, input_audio_filename):
73
        entry, _, _ = self.dataloader[self.name_dict[show]]
74
        cep = entry[self.key].value
75
        label = np.ones((cep.shape[0]), dtype=bool)
76
77
        h5f = H5Dict()
78
        h5f[show] = iter(["cep"])
79
        h5f["compression"] = 0
80
        h5f[show + "/energy"] = cep[:, 0]
81
        h5f[show + "/energy_mean"] = cep[:, 0].mean()
82
        h5f[show + "/energy_std"] = cep[:, 0].std()
83
        h5f[show + "/cep"] = cep[:, 1:]
84
        h5f[show + "/cep_mean"] = cep[:, 1:].mean(1)
85
        h5f[show + "/cep_std"] = cep[:, 1:].std(1)
86
        h5f[show + "/vad"] = label.astype("int8")
87
        return h5f
88
89
90
def get_s4d_model_from_text(serialized_model):
91
    """
92
    Fill a s4d.ModelIV object to use S4D
93
    """
94
    model = s4d.ModelIV()
95
    model.ubm = serialized_model['ubm']
96
    model.tv = serialized_model['tv_fa'].F
97
    model.tv_mean = serialized_model['tv_fa'].mean
98
    model.tv_sigma = serialized_model['tv_fa'].Sigma
99
    model.sn_mean = serialized_model['norm_mean']
100
    model.sn_cov = serialized_model['norm_cov']
101
    model.plda_mean = serialized_model['plda_fa'].mean
102
    model.plda_f = serialized_model['plda_fa'].F
103
    model.plda_g = None
104
    model.plda_sigma = serialized_model['plda_fa'].Sigma
105
    model.ivectors = serialized_model['iv_stat']
106
    model.scores = None
107
    model.nb_thread = 1
108
    return model
109
110
111
def s4d_to_allies(s4d_segmentation):
112
    """
113
    Convert a s4d.Diar object into the proper ALLIES format for segmentation
114
115
    :param s4d_segmentation[0]: is a s4d.Diar object
116
    """
117
    spk = []
118
    st = []
119
    en = []
120
    for segment in s4d_segmentation[0]:
121
        spk.append(segment["cluster"])
122
        st.append(float(segment["start"])/100.)
123
        en.append(float(segment["stop"])/100.)
124
    hyp = {"speaker": spk, "start_time": st, "end_time": en}
125
    return hyp
126
127
128
def extract_iv_on_segments(model, features, seg):
129
    # Create a Statserver to accmulate the statistics
130
    tmp_ss = sidekit.StatServer(statserver_file_name=None, 
131
                distrib_nb=len(model.ubm.w), 
132
                feature_size=features.shape[1], 
133
                index=None, 
134
                ubm=model.ubm)
135
    tmp_ss.modelset = np.empty(len(seg), dtype="|O") 
136
    tmp_ss.segset = np.empty(len(seg), dtype="|O")
137
    tmp_ss.start = np.zeros((len(seg),), dtype=np.int32)
138
    tmp_ss.stop = np.zeros((len(seg),), dtype=np.int32)
139
    tmp_ss.stat0 = np.zeros((len(seg), len(model.ubm.w)), dtype=np.float32)
140
    tmp_ss.stat1 = np.zeros((len(seg), model.ubm.sv_size()), dtype=np.float32)
141
142
    for idx, segment in enumerate(seg):
143
        start = segment['start']
144
        stop = segment['stop']
145
        data = features[start:stop, :]
146
        lp = model.ubm.compute_log_posterior_probabilities(data)
147
        pp, foo = sidekit.mixture.sum_log_probabilities(lp)
148
        tmp_ss.modelset[idx] = segment['cluster']
149
        tmp_ss.modelset[idx] = segment['cluster']
150
        tmp_ss.start[idx] = start
151
        tmp_ss.stop[idx] = stop
152
        # Compute 0th-order statistics
153
        tmp_ss.stat0[idx, :] = pp.sum(0)
154
        # Compute 1st-order statistics
155
        tmp_ss.stat1[idx, :] = np.reshape(np.transpose(np.dot(data.transpose(), pp)), model.ubm.sv_size()).astype(np.float32)
156
157
    fa = sidekit.FactorAnalyser(mean=model.tv_mean, Sigma=model.tv_sigma, F=model.tv)
158
    ivectors = fa.extract_ivectors_single(model.ubm, tmp_ss)
159
    return ivectors
160
161
162
def run_s4d_segmentation(model,
163
             global_diar,
164
             features, 
165
             file_id):
166
    """
167
168
    """
169
    ####################################################################################
170
    #  First pass of Within show diarization
171
    ####################################################################################
172
    thr_l = 2
173
    thr_h = 3
174
    thr_vit = -250
175
    # initialize diarization
176
    init_diar = s4d.segmentation.init_seg(features, file_id)
177
    # ## Step 1: gaussian divergence segmentation
178
    seg_diar = s4d.segmentation.segmentation(features, init_diar)
179
    # ## Step 2: linear BIC segmentation (fusion)
180
    bicl_diar = s4d.segmentation.bic_linear(features, seg_diar, thr_l, sr=False)
181
    # ## Step 3: BIC HAC (clustering intra-show)
182
    hac = s4d.clustering.hac_bic.HAC_BIC(features, bicl_diar, thr_h, sr=False)
183
    bich_diar = hac.perform()
184
    # ## Step 4: Viterbi decoding
185
    vit_diar = s4d.viterbi.viterbi_decoding(features, bich_diar, thr_vit)
186
         
187
    ####################################################################################
188
    #  Second pass of Within show diarization
189
    ####################################################################################
190
    idmap_in = vit_diar.id_map()
191
    # Add delta, double delta and normalize the features 
192
    label, threshold = sidekit.frontend.vad.vad_percentil(features[:, 0], 10)
193
    delta_filter = np.array([.25, .5, .25, 0, -.25, -.5, -.25])
194
    delta = sidekit.frontend.features.compute_delta(features, filt=delta_filter)
195
    features = np.column_stack((features, delta))
196
    double_delta = sidekit.frontend.features.compute_delta(delta, filt=delta_filter)
197
    features = np.column_stack((features, double_delta))
198
    label = sidekit.frontend.vad.label_fusion(label)
199
    sidekit.frontend.normfeat.cep_sliding_norm(features, label=label, win=301, center=True, reduce=True) 
200
201
202
    # Extract i-vectors
203
    within_iv = extract_iv_on_segments(model, features, vit_diar)
204
    within_iv_backup = copy.deepcopy(within_iv)
205
    # Normalize i-vectors from the new file and compute within show scores
206
    within_iv_mean = within_iv_backup.mean_stat_per_model()
207
    within_iv_mean.spectral_norm_stat1(model.sn_mean[:1], model.sn_cov[:1])
208
    if within_iv_mean.modelset.shape[0] > 1:
209
        ndx = sidekit.Ndx(models=within_iv_mean.modelset, testsegs=within_iv_mean.modelset)
210
        scores = sidekit.iv_scoring.fast_PLDA_scoring(within_iv_mean, 
211
                                                      within_iv_mean, 
212
                                                      ndx,
213
                                                      model.plda_mean,
214
                                                      model.plda_f,
215
                                                      model.plda_sigma,
216
                                                      p_known=0.0,
217
                                                      scaling_factor=1.0,
218
                                                      check_missing=False)
219
    scores.scoremat = 0.5 * (scores.scoremat + scores.scoremat.transpose())
220
221
    print("models dans scores: {}".format(scores.modelset))
222
    print("Clusters dans vit_diar: {}".format(vit_diar.unique('cluster')))
223
    # Within show clustering
224
    within_diar, cluster_dict, merge = s4d.clustering.hac_iv.hac_iv(
225
        vit_diar, scores, threshold=5
226
    )
227
    print("Number of clusters in vit_diar {} and in within_diar {}".format(len(vit_diar.unique('cluster')),len(within_diar.unique('cluster'))))
228
    print("scores min {} and max{}".format(scores.scoremat.min(), scores.scoremat.max()))
229
    print("merge: {}".format(merge))
230
    
231
    return (within_diar, cluster_dict, merge)
232
233
234
235
def unsupervised_model_adaptation(features, 
236
                  file_id,
237
                  data_loaders,
238
                  model, 
239
                  global_diar,
240
                  current_s4d_segmentation):
241
242
    ########################################################
243
    # ACCESS TRAINING DATA TO ADAPT IF WANTED
244
    ########################################################
245
    # You are allowed to re-process training data all along the
246
    # lifelong adaptation.
247
    # The code below shows how to acces data from the training set.
248
249
    # Create the Data_Loaders to access all training data (not only the file_info)
250
    # See below how to get all training data
251
    train_loader = data_loaders.loaderOf("train_file_info")
252
    # Get the list of training files
253
    # We create a dictionnary with file_id as keys and the index of the file in the training set
254
    # as value. This dictionnary will be used later to access the training files
255
    # by file_id
256
    name_dict = {}
257
    for i in range(train_loader.count()):
258
        (data, _, end) = train_loader[i]
259
        train_file_id = data["train_file_info"].file_id
260
        name_dict[train_file_id] = int(i)
261
262
    # If you use an AlliesExtractor to access the features you need to instantiate it like this
263
    fe = AlliesExtractor(train_loader, name_dict, "train_features")
264
265
    fs_seg = sidekit.FeaturesServer(
266
                features_extractor=fe,
267
                dataset_list=["cep"],
268
                keep_all_features=True,
269
                delta=False,
270
                double_delta=False,
271
                )
272
273
    fs_acoustic = sidekit.FeaturesServer(
274
                features_extractor=fe,
275
                dataset_list=["cep"],
276
                keep_all_features=True,
277
                delta=True,
278
                double_delta=True,
279
                feat_norm="cmvn_sliding",
280
                )
281
282
    ########################################################
283
    # Shows how to access training data per INDEX
284
    #
285
    # Shows how to access features, uem and file_info from
286
    # a training file by using its index in the training set
287
    ########################################################
288
    file_index = 2  # index of the file we want to access
289
    file_id_per_index = train_loader[file_index][0]['train_file_info'].file_id
290
    time_stamp_per_index = train_loader[file_index][0]['train_file_info'].time_stamp
291
    supervision_per_index = train_loader[file_index][0]['train_file_info'].supervision
292
    uem_per_index = train_loader[file_index][0]['train_uem']
293
    features_per_index = train_loader[file_index][0]["train_features"].value
294
295
    # Access features using an AlliesExtractor
296
    # BEWARE: the access here is done using the file_ID that you must retrieve first
297
    tmp = fe.extract(file_id_per_index, 0, "input_audio_filename")
298
    features_per_index_extractor = tmp[file_id_per_index + "/cep"]
299
300
    ########################################################
301
    # Shows how to access training data per file_id
302
    #
303
    # Shows how to access features, uem and file_info from
304
    # a training file by using its file_id
305
    ########################################################
306
    # Assume that we know the file_id, note that we stored them earlier in a dictionnary
307
    train_file_id = list(name_dict.keys())[3]
308
    time_stamp_per_ID = train_loader[name_dict[train_file_id]][0]["train_file_info"].time_stamp
309
    supervision_per_ID = train_loader[name_dict[train_file_id]][0]["train_file_info"].supervision
310
    uem_per_ID = train_loader[name_dict[train_file_id]][0]["train_uem"]
311
    features_per_ID = train_loader[name_dict[train_file_id]][0]["train_features"].value
312
313
    # Access features using an AlliesExtractor
314
    # BEWARE: the access here is done using the file_ID that you must retrieve first
315
    tmp = fe.extract(train_file_id, 0, "input_audio_filename")
316
    features_per_ID_extractor = tmp[train_file_id + "/cep"]
317
318
319
    ####################################################################################
320
    #  Cross show diarization
321
    ####################################################################################
322
    # group speakers across shows to improve the model if possible
323
324
    return model, global_diar
325
326
327
def generate_system_request_to_user(model, global_diar, current_s4d_segmentation):
328
    """
329
    Include here your code to ask "questions" to the human in the loop
330
331
    Prototype of this function can be adapted to your need as it is internatl to this block
332
    but it must return a request as a dictionary object as shown below
333
334
    request_type can be either "same" or "boundary"
335
        if "same", the question asked to the user is: Is the same speaker speaking at time time_1 and time_2
336
                   the cost of this question is 6s / total_file_duration
337
        if "boundary" the question asked to the user is: What are the boundaries of the segment including time_1
338
                   note that the data time_2 is not used in this request
339
                   the cost of this question is (|time_2 - time_1| + 6s) / total_file_duration
340
341
    time_1 must be a numpy.float32
342
    time_2 must be a numpy.float32
343
    """
344
    request = {
345
           "request_type": "same",
346
           "time_1": np.float32(0.5),
347
           "time_2": np.float32(1.5),
348
          }
349
350
    return request
351
352
353
def online_adaptation(model, global_diar, features, current_s4d_segmentation, request, user_answer):
354
    """
355
    Include here your code to adapt the model according
356
    to the human domain expert answer to the request
357
358
    Prototype of this function can be adapted to your need as it is internatl to this block
359
    """
360
361
    # Case of interactive learning (request is a fake one)
362
363
    # Case of active learning
364
    new_s4d_segmentation = current_s4d_segmentation
365
366
    return model, global_diar, new_s4d_segmentation
367
368
369
def rename_ivectors(iv_stat_server, cluster_dict):
370
    """
371
    Function that is used by the baseline system to rename clusters of i-verctors
372
    after segmentation
373
    """
374
    # Rename i-vectors to match cluster IDs from the new segmentation
375
    iv = copy.deepcopy(iv_stat_server)
376
    reverse_dict = {}
377
    for k in cluster_dict.keys():
378
        for c in cluster_dict[k]:
379
            reverse_dict[c] = k
380
381
    for idx, c in enumerate(iv.modelset):
382
        if c in reverse_dict.keys():
383
            iv.modelset[idx] = reverse_dict[c]
384
385
    return iv
386
387
388
389
class Algorithm:
390
    """
391
    Main class of the lifelong_block
392
    """
393
    def __init__(self):
394
        self.model = None
395
        self.global_diar = None
396
397
    def process(self, inputs, data_loaders, outputs, loop_channel):
398
        """
399
400
        """
401
        ########################################################
402
        # RECEIVE INCOMING INFORMATION FROM THE FILE TO PROCESS
403
        ########################################################
404
        
405
        # Get the model after initial training
406
        if self.model is None:
407
            model_loader = data_loaders.loaderOf("model")
408
            tmp_m = model_loader[0][0]['model'].value
409
            pkl_after = struct.pack('{}B'.format(len(tmp_m)), *list(tmp_m))
410
            serialized_model = pickle.loads(pkl_after)
411
            #serialized_model = pickle.loads(model_loader[0][0]['model'])
412
            # Return a model using s4d
413
            self.model = get_s4d_model_from_text(serialized_model)
414
            self.global_diar = serialized_model["global_diar"]
415
416
        # Access features of the current file to process 
417
        # features is a numpy.ndarray with dimension nb_frames x 14
418
        # See the documentation for detail of MFCC computation
419
        features = inputs["features"].data.value
420
        
421
        # Access incoming file information
422
        # See documentation for a detailed description of the file_info
423
        file_info = inputs["processor_file_info"].data
424
        file_id = file_info.file_id
425
        supervision = file_info.supervision
426
        time_stamp = file_info.time_stamp
427
        # Access UEM information for the incoming file
428
        # See the documentation for a description of the UEM format
429
        uem = inputs["processor_uem"].data
430
        # Run diarization on the new input using s4d baseline system
431
        current_s4d_segmentation = run_s4d_segmentation(self.model, 
432
                                self.global_diar,
433
                                features, 
434
                                file_id)
435
        # Convert the output from s4d into the ALLIES format
436
        current_hypothesis = s4d_to_allies(current_s4d_segmentation)
437
438
        # If human assisted learning is ON
439
        ###################################################################################################
440
        # Interact with the human if necessary
441
        # This section exchange information with the user simulation and ends up with a new hypothesis
442
        ###################################################################################################
443
        if supervision in ["active", "interactive"]:
444
            human_assisted_learning = True
445
        if not human_assisted_learning:
446
            # In this method, see how to access initial training data to adapt the model
447
            # for the new incoming data
448
            self.model, self.global_diar = unsupervised_model_adaptation(features, 
449
                                         file_id,
450
                                         data_loaders,
451
                                         self.model, 
452
                                         self.global_diar,
453
                                         current_s4d_segmentation
454
                                         )
455
456
        # If human assisted learning mode is on (active or interactive learning)
457
        while human_assisted_learning:
458
459
            # Create an empty request that is used to initiate interactive learning
460
            # For the case of actove learning, this request is overwritten by your system itself
461
            request = {"request_type": "toto", "time_1": np.float32(0.0), "time_2": np.float32(0.0)}
462
463
            if supervision == "active":
464
                # The system can send a question to the human in the loop
465
                # by using an object of type request
466
                # The request is the question asked to the system
467
                request = generate_system_request_to_user(self.model, self.global_diar, current_s4d_segmentation)
468
469
            # Send the request to the user and wait for the answer
470
            message_to_user = {
471
                "file_id": file_id,  # ID of the file the question is related to
472
                "hypothesis": current_hypothesis,  # The current hypothesis
473
                "system_request": request,  # the question fot the human in the loop
474
            }
475
            human_assisted_learning, user_answer = loop_channel.validate(message_to_user)
476
477
            # Take into account the user answer to generate a new hypothesis 
478
            # and possibly update the model
479
            self.model, self.global_diar, current_s4d_segmentation = online_adaptation(self.model, self.global_diar, features, current_s4d_segmentation, request, user_answer)
480
            # Convert the output from s4d into the ALLIES format
481
            current_hypothesis = s4d_to_allies(current_s4d_segmentation)
482
        
483
        # End of human assisted learning
484
        # Send the current hypothesis
485
        end_data_index = 1
486
        outputs["adapted_speakers"].write(current_hypothesis)
487
488
        if not inputs.hasMoreData():
489
            pass
490
        # always return True, it signals BEAT to continue processing
491
        return True
492
493
494
495

The code for this algorithm in Python
The ruler at 80 columns indicate suggested POSIX line breaks (for readability).
The editor will automatically enlarge to accomodate the entirety of your input
Use keyboard shortcuts for search/replace and faster editing. For example, use Ctrl-F (PC) or Cmd-F (Mac) to search through this box

Test documentation

No experiments are using this algorithm.
Created with Raphaël 2.1.2[compare]anthony_larcher/diar_lifelong/12020Mar9

This table shows the number of times this algorithm has been successfully run using the given environment. Note this does not provide sufficient information to evaluate if the algorithm will run when submitted to different conditions.

Terms of Service | Contact Information | BEAT platform version 2.2.1b0 | © Idiap Research Institute - 2013-2025