This algorithm is a legacy one. The API has changed since its implementation. New versions and forks will need to be updated.

Algorithms have at least one input and one output. All algorithm endpoints are organized in groups. Groups are used by the platform to indicate which inputs and outputs are synchronized together. The first group is automatically synchronized with the channel defined by the block in which the algorithm is deployed.

Group: group0

Endpoint Name Data Format Nature
evaluator_file_info anthony_larcher/file_info_sd/1 Input
evaluator_speakers anthony_larcher/speakers/1 Input
evaluator_uem anthony_larcher/uemranges/1 Input
evaluator_output system/float/1 Output

Parameters allow users to change the configuration of an algorithm when scheduling an experiment

Name Description Type Default Range/Choices
max_cost_per_file Maximum amount of effort a user is going to do on a given file in seconds float32 60.0
request_collar_cost Duration the user has to listen to around a point to given an answer in seconds float32 3.0
xxxxxxxxxx
721
 
1
#!/usr/bin/env python
2
# vim: set fileencoding=utf-8 :
3
4
###################################################################################
5
#                                                                                 #
6
# Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/               #
7
# Contact: beat.support@idiap.ch                                                  #
8
#                                                                                 #
9
# Redistribution and use in source and binary forms, with or without              #
10
# modification, are permitted provided that the following conditions are met:     #
11
#                                                                                 #
12
# 1. Redistributions of source code must retain the above copyright notice, this  #
13
# list of conditions and the following disclaimer.                                #
14
#                                                                                 #
15
# 2. Redistributions in binary form must reproduce the above copyright notice,    #
16
# this list of conditions and the following disclaimer in the documentation       #
17
# and/or other materials provided with the distribution.                          #
18
#                                                                                 #
19
# 3. Neither the name of the copyright holder nor the names of its contributors   #
20
# may be used to endorse or promote products derived from this software without   #
21
# specific prior written permission.                                              #
22
#                                                                                 #
23
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND #
24
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED   #
25
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE          #
26
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE    #
27
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL      #
28
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR      #
29
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER      #
30
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,   #
31
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE   #
32
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.            #
33
#                                                                                 #
34
###################################################################################
35
36
import numpy as np
37
38
from scipy.optimize import linear_sum_assignment
39
40
41
def make_spkmap(spk):
42
    spkmap = {}
43
    spkcount = 0
44
    for s in spk.speaker:
45
        if not s in spkmap:
46
            spkmap[s] = spkcount
47
            spkcount += 1
48
    return spkmap, spkcount
49
50
51
def range_to_frontiers(rng):
52
    rng.sort()
53
    pos = 0
54
    while pos < len(rng) - 1:
55
        if rng[pos][1] >= rng[pos + 1][0]:
56
            rng[pos] = (rng[pos][0], max(rng[pos][1], rng[pos + 1][1]))
57
            rng.pop(pos + 1)
58
        else:
59
            pos = pos + 1
60
    front = []
61
    for r in rng:
62
        front.append(("n", r[0]))
63
        front.append(("p", r[1]))
64
    return front
65
66
67
def merge_two_frontiers(front1, front2, end1, end2):
68
    frontr = []
69
    pos1 = 0
70
    pos2 = 0
71
    while pos1 < len(front1) or pos2 < len(front2):
72
        ctime = (
73
            front1[pos1][1]
74
            if pos2 == len(front2)
75
            else front2[pos2][1]
76
            if pos1 == len(front1)
77
            else min(front1[pos1][1], front2[pos2][1])
78
        )
79
        mode1 = end1 if pos1 == len(front1) else front1[pos1][0]
80
        mode2 = end2 if pos2 == len(front2) else front2[pos2][0]
81
        frontr.append((mode1 + mode2, ctime))
82
        if pos1 != len(front1) and front1[pos1][1] == ctime:
83
            pos1 += 1
84
        if pos2 != len(front2) and front2[pos2][1] == ctime:
85
            pos2 += 1
86
    return frontr
87
88
89
def make_merge_frontier(hyp_union, ref_union, ref_frontiers_collar):
90
    hr = merge_two_frontiers(hyp_union, ref_union, "n", "n")
91
    frontr = []
92
    for f in ref_frontiers_collar:
93
        frontr.append(merge_two_frontiers(hr, f, "nn", "n"))
94
    return frontr
95
96
97
def make_frontiers(spk, spkmap, spkcount):
98
    rngs = [[] for i in range(spkcount)]
99
    for i in range(0, len(spk.speaker)):
100
        spki = spkmap[spk.speaker[i]]
101
        rngs[spki].append((spk.start_time[i], spk.end_time[i]))
102
    front = []
103
    for r in rngs:
104
        front.append(range_to_frontiers(r))
105
    return front
106
107
108
def make_union_frontiers(spk):
109
    rngs = []
110
    for i in range(0, len(spk.speaker)):
111
        rngs.append((spk.start_time[i], spk.end_time[i]))
112
    return range_to_frontiers(rngs)
113
114
115
def frontiers_add_collar(front, collar):
116
    cfront = []
117
    for f in front:
118
        a = f[1] - collar
119
        b = f[1] + collar
120
        if a < 0:
121
            a = 0
122
        if len(cfront) == 0 or a > cfront[-1][1]:
123
            cfront.append((f[0], a))
124
            cfront.append(("t", b))
125
        else:
126
            cfront[-1] = ("t", b)
127
    return cfront
128
129
130
def make_times(front):
131
    times = []
132
    for s in front:
133
        time = 0
134
        ptime = 0
135
        for p in s:
136
            if p[0] == "n":
137
                ptime = p[1]
138
            elif p[0] == "p":
139
                time += p[1] - ptime
140
        times.append(time)
141
    return times
142
143
144
def add_time(thyp, thyn, mode, eh, er, tc, efa, emiss, econf):
145
    if mode == "ppp":
146
        return eh, er + thyn, tc + thyp, efa, emiss, econf + thyn
147
    if mode == "ppn":
148
        return eh + thyp, er, tc, efa, emiss, econf
149
    if mode == "ppt":
150
        return eh, er, tc + thyp, efa, emiss, econf
151
    if mode == "pnn":
152
        return eh + thyp, er, tc, efa + thyp, emiss, econf
153
    if mode == "pnt":
154
        return eh, er, tc + thyp, efa, emiss, econf
155
    if mode == "npp":
156
        return eh, er + thyn, tc, efa, emiss + thyn, econf
157
    # npn npt nnn nnt
158
    return eh, er, tc, efa, emiss, econf
159
160
161
def compute_times(frontr, fronth):
162
    eh = 0
163
    er = 0
164
    rc = 0
165
    efa = 0
166
    emiss = 0
167
    econf = 0
168
    hpos = 0
169
    tbeg = 0
170
    thyp = 0
171
    hypbef = 0
172
    for f in frontr:
173
        tend = f[1]
174
        while hpos < len(fronth):
175
            dinter = min(fronth[hpos][1], tend)
176
            if fronth[hpos][0] == "p":
177
                thyp += dinter - hypbef
178
            if fronth[hpos][1] > tend:
179
                break
180
            hypbef = dinter
181
            hpos += 1
182
        eh, er, rc, efa, emiss, econf = add_time(
183
            thyp, tend - tbeg - thyp, f[0], eh, er, rc, efa, emiss, econf
184
        )
185
186
        if hpos < len(fronth):
187
            hypbef = min(fronth[hpos][1], tend)
188
        tbeg = tend
189
        thyp = 0
190
    while hpos < len(fronth):
191
        if fronth[hpos][0] == "p":
192
            thyp += fronth[hpos][1] - tbeg
193
        tbeg = fronth[hpos][1]
194
        hpos += 1
195
    eh, er, rc, efa, emiss, econf = add_time(
196
        thyp, 0, "pnn", eh, er, rc, efa, emiss, econf
197
    )
198
    return (
199
        round(eh, 3),
200
        round(er, 3),
201
        round(rc, 3),
202
        round(efa, 3),
203
        round(emiss, 3),
204
        round(econf, 3),
205
    )
206
207
208
def compute_miss(funion, front):
209
    miss = []
210
    for f1 in front:
211
        hpos = 0
212
        tbeg = 0
213
        thyp = 0
214
        hypbef = 0
215
        fa = 0
216
        for f in funion:
217
            tend = f[1]
218
            while hpos < len(f1):
219
                dinter = min(f1[hpos][1], tend)
220
                if f1[hpos][0] == "p":
221
                    thyp += dinter - hypbef
222
                if f1[hpos][1] > tend:
223
                    break
224
                hypbef = dinter
225
                hpos += 1
226
            if f[0] == "n":
227
                fa += thyp
228
            if hpos < len(f1):
229
                hypbef = min(f1[hpos][1], tend)
230
            tbeg = tend
231
            thyp = 0
232
        while hpos < len(f1):
233
            if f1[hpos][0] == "p":
234
                thyp += f1[hpos][1] - tbeg
235
            tbeg = f1[hpos][1]
236
            hpos += 1
237
        fa += thyp
238
        fa = round(fa, 3)
239
        miss.append(fa)
240
    return miss
241
242
243
def accumulate_confusion(fref, fhyp, map_rh, map_hr):
244
    ref_spkcount = len(fref)
245
    hyp_spkcount = len(fhyp)
246
    correct_ref = [0] * ref_spkcount
247
    correct_hyp = [0] * hyp_spkcount
248
    lost_ref = [0] * ref_spkcount
249
    lost_hyp = [0] * hyp_spkcount
250
    confusion_matrix = np.zeros((ref_spkcount, hyp_spkcount), dtype="float64")
251
    fri = [0] * ref_spkcount
252
    fhi = [0] * hyp_spkcount
253
    cur_time = 0
254
    while True:
255
        ridx = []
256
        r_is_t = []
257
        hidx = []
258
        time = -1
259
260
        # Build the list of who is in the segment
261
        for i in range(ref_spkcount):
262
            if fri[i] != len(fref[i]):
263
                cf = fref[i][fri[i]]
264
                if time == -1 or cf[1] < time:
265
                    time = cf[1]
266
                if cf[0] != "n":
267
                    ridx.append(i)
268
                    r_is_t.append(cf[0] == "t")
269
270
        for i in range(hyp_spkcount):
271
            if fhi[i] != len(fhyp[i]):
272
                cf = fhyp[i][fhi[i]]
273
                if time == -1 or cf[1] < time:
274
                    time = cf[1]
275
                if cf[0] != "n":
276
                    hidx.append(i)
277
278
        if time == -1:
279
            break
280
281
        # Only do the computations when there's something to do
282
        if len(ridx) > 0 or len(hidx) > 0:
283
            duration = time - cur_time
284
285
            # Hyp and ref mapped together end up in correct time and are removed from the lists
286
            i = 0
287
            while i != len(ridx):
288
                r = ridx[i]
289
                h = map_rh[r]
290
                dropped = False
291
                if h != -1:
292
                    slot = -1
293
                    for j in range(len(hidx)):
294
                        if hidx[j] == h:
295
                            slot = j
296
                            break
297
                    if slot != -1:
298
                        correct_ref[r] += duration
299
                        correct_hyp[h] += duration
300
                        ridx.pop(i)
301
                        r_is_t.pop(i)
302
                        hidx.pop(slot)
303
                        dropped = True
304
                if not dropped:
305
                    i += 1
306
307
            # Ref in transition is removed from the list if mapped to some hyp
308
            i = 0
309
            while i != len(ridx):
310
                r = ridx[i]
311
                if r_is_t[i] and map_rh[r] != -1:
312
                    ridx.pop(i)
313
                    r_is_t.pop(i)
314
                else:
315
                    i += 1
316
317
            if len(hidx) == 0:
318
                # If there's no hyp, we're all in lost_ref
319
                for r in ridx:
320
                    lost_ref[r] += duration
321
322
            elif len(ridx) == 0:
323
                # If there's no ref, we're all in lost_hyp
324
                for h in hidx:
325
                    lost_hyp[h] += duration
326
327
            else:
328
                # Otherwise we're in confusion.  Amount of confusion time to give
329
                # is equal to the max of the ref and hyp times
330
                conf_time = max(len(ridx), len(hidx)) * duration
331
332
                # Number of slots, otoh, is equal to the product of the number of
333
                # refs and hyps
334
                conf_slots = len(ridx) * len(hidx)
335
336
                # Give the time equally in all slots
337
                conf_one_time = conf_time / conf_slots
338
                for r in ridx:
339
                    for h in hidx:
340
                        confusion_matrix[r, h] += conf_one_time
341
342
        # Step all the done segments
343
        for r in range(ref_spkcount):
344
            if fri[r] != len(fref[r]) and fref[r][fri[r]][1] == time:
345
                fri[r] += 1
346
        for h in range(hyp_spkcount):
347
            if fhi[h] != len(fhyp[h]) and fhyp[h][fhi[h]][1] == time:
348
                fhi[h] += 1
349
        cur_time = time
350
351
    return correct_ref, correct_hyp, lost_ref, lost_hyp, confusion_matrix
352
353
def find_common_point(f1, f2):
354
    fr = merge_two_frontiers(f1, f2, "nn", "n")
355
    st = None
356
    en = None
357
    dur = None
358
    for i in range(1, len(fr)):
359
        if fr[i][0] == "pp":
360
            st1 = fr[i-1][1]
361
            en1 = fr[i][1]
362
            dur1 = en1 - st1
363
            if dur == None or dur < dur1:
364
                st = st1
365
                en = en1
366
                dur = dur1
367
    # Should we randomize?  Let's be nice for now
368
    return (st+en)/2
369
370
def find_best_information(ref, hyp, collar):
371
    ref_spkmap, ref_spkcount = make_spkmap(ref)
372
    hyp_spkmap, hyp_spkcount = make_spkmap(hyp)
373
374
    ref_frontiers = make_frontiers(ref, ref_spkmap, ref_spkcount)
375
    hyp_frontiers = make_frontiers(hyp, hyp_spkmap, hyp_spkcount)
376
    ref_frontiers_collar = []
377
    for front in ref_frontiers:
378
        ref_frontiers_collar.append(frontiers_add_collar(front, collar))
379
380
    ref_union = make_union_frontiers(ref)
381
    hyp_union = make_union_frontiers(hyp)
382
383
    merge_frontiers = make_merge_frontier(hyp_union, ref_union, ref_frontiers_collar)
384
385
    ref_times = make_times(ref_frontiers)
386
    hyp_times = make_times(hyp_frontiers)
387
388
    eh = np.zeros((ref_spkcount, hyp_spkcount), dtype="float64")
389
    er = np.zeros((ref_spkcount, hyp_spkcount), dtype="float64")
390
    tc = np.zeros((ref_spkcount, hyp_spkcount), dtype="float64")
391
    efa = np.zeros((ref_spkcount, hyp_spkcount), dtype="float64")
392
    emiss = np.zeros((ref_spkcount, hyp_spkcount), dtype="float64")
393
    econf = np.zeros((ref_spkcount, hyp_spkcount), dtype="float64")
394
    de = np.zeros((ref_spkcount, hyp_spkcount), dtype="float64")
395
396
    opt_size = max(ref_spkcount, hyp_spkcount)
397
    costs = np.zeros((opt_size, opt_size), dtype="float64")
398
399
    miss_hyp = compute_miss(ref_union, hyp_frontiers)
400
    miss_ref = compute_miss(hyp_union, ref_frontiers)
401
402
    for r in range(ref_spkcount):
403
        for h in range(hyp_spkcount):
404
            (
405
                eh[r, h],
406
                er[r, h],
407
                tc[r, h],
408
                efa[r, h],
409
                emiss[r, h],
410
                econf[r, h],
411
            ) = compute_times(merge_frontiers[r], hyp_frontiers[h])
412
            de[r, h] = (
413
                ref_times[r] + miss_hyp[h] - efa[r, h] - emiss[r, h] - econf[r, h]
414
            )
415
            costs[r, h] = -round(de[r, h] * 1000)
416
417
    (map1, map2) = linear_sum_assignment(costs)
418
    map_rh = [-1] * ref_spkcount
419
    map_hr = [-1] * hyp_spkcount
420
    for i1 in range(0, opt_size):
421
        i = map1[i1]
422
        j = map2[i1]
423
        if (
424
            i < ref_spkcount
425
            and j < hyp_spkcount
426
            and de[i, j] > 0
427
            and tc[i, j] > 0
428
        ):
429
            map_rh[i] = j
430
            map_hr[j] = i
431
432
433
    ref_mixed_frontiers = []
434
    for r in range(ref_spkcount):
435
        if map_rh[r] == -1:
436
            ref_mixed_frontiers.append(ref_frontiers[r])
437
        else:
438
            ref_mixed_frontiers.append(ref_frontiers_collar[r])
439
440
    (
441
        correct_ref,
442
        correct_hyp,
443
        lost_ref,
444
        lost_hyp,
445
        confusion_matrix,
446
    ) = accumulate_confusion(ref_mixed_frontiers, hyp_frontiers, map_rh, map_hr)
447
448
    conf = 0
449
    for r in range(ref_spkcount):
450
        for h in range(hyp_spkcount):
451
            conf += confusion_matrix[r, h]
452
    totaltime = 0
453
    miss = 0
454
    for r in range(ref_spkcount):
455
        totaltime += ref_times[r]
456
        miss += lost_ref[r]
457
    fa = 0
458
    for h in range(hyp_spkcount):
459
        fa += lost_hyp[h]
460
461
462
    best_segment_fix = None
463
    
464
    # Do one pass over the reference segments to see how much to gain if one hypothesis segment is corrected
465
    # Assume the speaker is correct if there's only one, or that the biggest is correct otherwise
466
467
    nf = len(ref_frontiers[0])
468
    for i in range(0, len(ref.speaker)):
469
        # segment boundaries
470
        st = ref.start_time[i]
471
        en = ref.end_time[i]
472
473
        # amount of silence before and after the segment
474
        silence_before = 0
475
        silence_after = 0
476
        for f in ref_union:
477
            if f[1] < st:
478
                silence_before = st-f[1] if f[0] == 'p' else 0
479
            if f[1] > en:
480
                silence_after = f[1] - en if f[0] == 'n' else 0
481
                break
482
483
        # scan the hypothesis, collate the per-speaker time
484
        hyptime = {}
485
        for j in range(0, len(hyp_frontiers)):
486
            hst = hyp.start_time[j]
487
            hen = hyp.end_time[j]
488
            if not(hen <= st or hst >= en):
489
                if hen > en:
490
                    hen = en
491
                if hst < st:
492
                    hst = st
493
                hspk = hyp.speaker[j]
494
                if hspk in hyptime:
495
                    hyptime[hspk] += hen - hst
496
                else:
497
                    hyptime[hspk] = hen - hst
498
499
        # compute the fixed time under the assumption that the longest speaker is correct
500
        fixed_time = 0
501
        if len(hyptime) > 1:
502
            best_time = 0
503
            for s in hyptime:
504
                tm = hyptime[s]
505
                if tm > best_time:
506
                    best_time = tm
507
                fixed_time += tm
508
            fixed_time -= best_time
509
510
        # compute the time in speech in the silences on the border, and the time in silence in the speech
511
        if len(hyp_union) > 0:
512
            stf = 0
513
            enf = len(hyp_union)-1
514
            for j in range(0, len(hyp_union)):
515
                if hyp_union[j][1] < st:
516
                    stf = j
517
                if hyp_union[j][1] > en:
518
                    enf = j
519
                    break
520
            # pre/post-segment silence
521
            if hyp_union[stf][1] < st and hyp_union[stf][0] == 'n':
522
                sp = st - silence_before;
523
                if sp < hyp_union[stf][1]:
524
                    sp = hyp_union[stf][1]
525
                fixed_time += st - sp
526
            if hyp_union[enf][1] > en and hyp_union[enf][0] == 'p':
527
                sp = en + silence_after;
528
                if sp > hyp_union[enf][1]:
529
                    sp = hyp_union[enf][1]
530
                fixed_time += sp - en
531
            # in-segment silence
532
            sp = st
533
            for j in range(stf, enf+1):
534
                if hyp_union[j][1] > st and hyp_union[j][0] == 'p':
535
                    sp = hyp_union[j][1]
536
                if hyp_union[j][1] > st and hyp_union[j][1] < en and hyp_union[j][0] == 'n':
537
                    fixed_time += hyp_union[j][1] - sp
538
539
        else:
540
            # nothing in the hypothesis, all the time is fixed
541
            fixed_time += en - st;
542
        if fixed_time:
543
            if (best_segment_fix == None) or (best_segment_fix[1] < fixed_time):
544
                best_segment_fix = [ fixed_time, st, en ]
545
546
    # find the couple of maximum confusion where both sides are mapped
547
    max_conf = 0
548
    max_conf_r = None
549
    max_conf_h = None
550
    for r in range(ref_spkcount):
551
        for h in range(hyp_spkcount):
552
            if confusion_matrix[r, h] > max_conf and map_rh[r] != -1 and map_hr[h] != -1:
553
                max_conf = confusion_matrix[r, h]
554
                max_conf_r = r
555
                max_conf_h = h
556
    if max_conf_r != None:
557
        # Of the two, pick the speaker with the maximum amount of error associated
558
        error_spk_ref = lost_ref[r]
559
        for h in range(hyp_spkcount):
560
            error_spk_ref += confusion_matrix[max_conf_r, h]
561
        error_spk_hyp = lost_hyp[h]
562
        for r in range(ref_spkcount):
563
            error_spk_hyp += confusion_matrix[r, max_conf_h]
564
565
        if error_spk_ref > error_spk_hyp:
566
            # We want to pivot on the reference, that means merging the mapped hyp speaker and the mapped max error hyp speaker
567
            correct_point = find_common_point(ref_frontiers[max_conf_r], hyp_frontiers[map_rh[max_conf_r]])
568
            bad_point = find_common_point(ref_frontiers[max_conf_r], hyp_frontiers[max_conf_h])
569
            p1 = min(correct_point, bad_point)
570
            p2 = max(correct_point, bad_point)
571
            max_conf_a = { "answer": {"value": True}, "response_type": "same", "time_1": np.float32(p1), "time_2": np.float32(p2) }
572
        else:
573
            # We want to pivot on the hypothesis, that means splitting the mapped ref speaker and the mapped max error ref speaker
574
            correct_point = find_common_point(ref_frontiers[map_hr[max_conf_h]], hyp_frontiers[max_conf_h])
575
            bad_point = find_common_point(ref_frontiers[max_conf_r], hyp_frontiers[max_conf_h])
576
            p1 = min(correct_point, bad_point)
577
            p2 = max(correct_point, bad_point)
578
            max_conf_a = { "answer": {"value": False}, "response_type": "same", "time_1": np.float32(p1), "time_2": np.float32(p2) }
579
580
    if best_segment_fix == None and max_conf == 0:
581
        return { "answer": {"value": False}, "response_type": "stop", "time_1": np.float32(0.0), "time_2": np.float32(0.0) }
582
    if best_segment_fix == None or max_conf > best_segment_fix[0]:
583
        return max_conf_a
584
    else:
585
        return { "answer": {"value": True}, "response_type": "boundary", "time_1": np.float32(best_segment_fix[1]), "time_2": np.float32(best_segment_fix[2]) }
586
587
588
class Algorithm:
589
    def __init__(self):
590
        pass
591
592
    def setup(self, parameters):
593
        self.cost = 0
594
        print("params", parameters)
595
        self.max_cost_per_file = parameters["max_cost_per_file"]
596
        self.request_collar_cost = parameters["request_collar_cost"]
597
        return True
598
599
    def compute_answer_cost(self, a):
600
        cost = 0
601
        if a["response_type"] == "same":
602
            time_1 = a["time_1"]
603
            time_2 = a["time_2"]
604
            if abs(time_2 - time_1) >= self.request_collar_cost:
605
                cost += 2*self.request_collar_cost
606
            else:
607
                cost += max(time_1, time_2) - min(time_1, time_2) + self.request_collar_cost
608
        elif a["response_type"] == "boundary":
609
            time_1 = a["time_1"]
610
            time_2 = a["time_2"]
611
            cost = time_2 - time_1 + self.request_collar_cost
612
        return cost
613
614
    def validate(self, request):
615
        print(self.file_info.file_id, request.file_id)
616
        answer = {}
617
        if self.cost >= self.max_cost_per_file:
618
            answer = {
619
                "answer": {"value": False},
620
                "response_type": "stop",
621
                "time_1": 0.0,
622
                "time_2": 0.0,
623
                }
624
        if self.file_info.supervision == "active":
625
            if request.system_request.request_type == "same":
626
                time_1 = request.system_request.time_1
627
                time_2 = request.system_request.time_2
628
                spk1 = self.find_speaker_for_time(time_1)
629
                spk2 = self.find_speaker_for_time(time_2)
630
                if abs(time_2 - time_1) >= self.request_collar_cost:
631
                    self.cost += 2*request_collar_cost
632
                else:
633
                    self.cost += max(time_1, time_2) - min(time_1, time_2) + self.request_collar_cost
634
                print(
635
                    "USER: Check for same on %f (%s) vs. %f (%s)"
636
                    % (time_1, spk1, time_2, spk2)
637
                )
638
                answer = {
639
                    "answer": {"value": spk1 == spk2},
640
                    "response_type": "same",
641
                    "time_1": time_1,
642
                    "time_2": time_2,
643
                    }
644
            elif request.system_request.request_type == "boundary":
645
                st, en = self.find_segment_for_time(time_1)
646
                if st != None:
647
                    self.cost += en - st + self.request_collar_cost
648
                    print(
649
                        "USER: Check for boundary on %f (%f - %f)"
650
                        % (time_1, st, en)
651
                    )
652
                    answer = {
653
                        "answer": {"value": True},
654
                        "response_type": "boundary",
655
                        "time_1": st,
656
                        "time_2": en,
657
                        }
658
                else:
659
                    self.cost += self.request_collar_cost
660
                    print(
661
                        "USER: Check for boundary on %f (not speech)"
662
                        % (time_1)
663
                    )
664
                    answer = {
665
                        "answer": {"value": False},
666
                        "response_type": "boundary",
667
                        "time_1": 0.0,
668
                        "time_2": 0.0,
669
                        }
670
671
        elif self.file_info.supervision == "interactive":
672
            answer = find_best_information(self.reference, request.hypothesis, 0.250)
673
            
674
        else:
675
            answer = {
676
                "response_type": 'stop',
677
                "time_1": np.float32(0),
678
                "time_2": np.float32(0),
679
                "answer": {"value": False},
680
                }
681
        self.cost += self.compute_answer_cost(answer)
682
        return ( answer['response_type'] == "stop", answer )
683
684
    def write(self, outputs, processor_output_name, end_data_index):
685
        outputs["evaluator_output"].write({"value": self.cost}, end_data_index)
686
        return True
687
688
    def read(self, inputs):
689
        self.file_info = inputs["evaluator_file_info"].data
690
        self.reference = inputs["evaluator_speakers"].data
691
        self.uem = inputs["evaluator_uem"].data
692
        self.cost = 0
693
        print(
694
            "user - file %s supervision %s"
695
            % (self.file_info.file_id, self.file_info.supervision)
696
        )
697
        return True
698
699
    def process(self, inputs, data_loaders, outputs, channel):
700
        print("user process called")
701
702
    def find_segment_for_time(self, time):
703
        for i, s in enumerate(self.reference.speaker):
704
            if (
705
                time >= self.reference.start_time[i]
706
                and time < self.reference.end_time[i]
707
            ):
708
                return self.reference.start_time[i], self.reference.end_time[i]
709
        return (None, None)
710
711
    def find_speaker_for_time(self, time):
712
        for i, s in enumerate(self.reference.speaker):
713
            #            print(i, s, time, )
714
            if (
715
                time >= self.reference.start_time[i]
716
                and time < self.reference.end_time[i]
717
            ):
718
                return s
719
        return None
720
721

The code for this algorithm in Python
The ruler at 80 columns indicate suggested POSIX line breaks (for readability).
The editor will automatically enlarge to accomodate the entirety of your input
Use keyboard shortcuts for search/replace and faster editing. For example, use Ctrl-F (PC) or Cmd-F (Mac) to search through this box

Could not find any documentation for this object.
No experiments are using this algorithm.
Created with Raphaël 2.1.2[compare]anthony_larcher/user_simulation_sd/12020Mar9
This algorithm was never executed.
Terms of Service | Contact Information | BEAT platform version 2.2.1b0 | © Idiap Research Institute - 2013-2025