Evaluates the DER for a diarization hypothesis

This algorithm is a sequential one. The platform will call its process() method once per data incoming on its inputs.
This algorithm is splittable

Algorithms have at least one input and one output. All algorithm endpoints are organized in groups. Groups are used by the platform to indicate which inputs and outputs are synchronized together. The first group is automatically synchronized with the channel defined by the block in which the algorithm is deployed.

Group: group

Endpoint Name Data Format Nature
cost system/float/1 Input
hypothesis anthony_larcher/speakers/1 Input
reference anthony_larcher/speakers/1 Input
uem anthony_larcher/uemranges/1 Input
DER system/float/1 Output
speech_duration system/float/1 Output

Parameters allow users to change the configuration of an algorithm when scheduling an experiment

Name Description Type Default Range/Choices
collar Allowed imprecision on frontier positions float64 0.25
names Whether the evaluation should be done taking speaker names into account bool False
xxxxxxxxxx
466
 
1
# You may import any python packages that will be available in the environment you will run this algorithm in
2
# Environments can change based on the experiment's settings
3
4
import numpy as np
5
from scipy.optimize import linear_sum_assignment
6
7
8
def make_spkmap(spk):
9
    spkmap = {}
10
    spkcount = 0
11
    for s in spk.speaker:
12
        if not s in spkmap:
13
            spkmap[s] = spkcount
14
            spkcount += 1
15
    return spkmap, spkcount
16
17
18
def range_to_frontiers(rng):
19
    rng.sort()
20
    pos = 0
21
    while pos < len(rng) - 1:
22
        if rng[pos][1] >= rng[pos + 1][0]:
23
            rng[pos] = (rng[pos][0], max(rng[pos][1], rng[pos + 1][1]))
24
            rng.pop(pos + 1)
25
        else:
26
            pos = pos + 1
27
    front = []
28
    for r in rng:
29
        front.append(("n", r[0]))
30
        front.append(("p", r[1]))
31
    return front
32
33
def filter_frontier_on_uem(front, uem):
34
    uemi = 0
35
    fri = 0
36
    fo = []
37
    while uemi != len(uem.start_time) and fri != len(front):
38
        if uem.start_time[uemi] < front[fri][1]:
39
            if uem.end_time[uemi] >= front[fri][1]:
40
                if front[fri][0] != 'n':
41
                    if len(fo) == 0 or fo[-1][1] < uem.start_time[uemi]:
42
                        fo.append(('n', uem.start_time[uemi]))
43
                    fo.append((front[fri][0], front[fri][1]))
44
                else:
45
                    fo.append((front[fri][0], front[fri][1]))
46
                fri += 1
47
            else:
48
                if front[fri][0] != 'n':
49
                    if len(fo) == 0 or fo[-1][1] < uem.start_time[uemi]:
50
                        fo.append(('n', uem.start_time[uemi]))
51
                    fo.append((front[fri][0], uem.end_time[uemi]))
52
                uemi += 1
53
        else:
54
            fri += 1
55
    return fo
56
57
def filter_frontiers_on_uem(front, uem):
58
    fo = []
59
    for fr in front:
60
        fo.append(filter_frontier_on_uem(fr, uem))
61
    return fo
62
63
def merge_two_frontiers(front1, front2, end1, end2):
64
    frontr = []
65
    pos1 = 0
66
    pos2 = 0
67
    while pos1 < len(front1) or pos2 < len(front2):
68
        ctime = (
69
            front1[pos1][1]
70
            if pos2 == len(front2)
71
            else front2[pos2][1]
72
            if pos1 == len(front1)
73
            else min(front1[pos1][1], front2[pos2][1])
74
        )
75
        mode1 = end1 if pos1 == len(front1) else front1[pos1][0]
76
        mode2 = end2 if pos2 == len(front2) else front2[pos2][0]
77
        frontr.append((mode1 + mode2, ctime))
78
        if pos1 != len(front1) and front1[pos1][1] == ctime:
79
            pos1 += 1
80
        if pos2 != len(front2) and front2[pos2][1] == ctime:
81
            pos2 += 1
82
    return frontr
83
84
85
def make_merge_frontier(hyp_union, ref_union, ref_frontiers_collar):
86
    hr = merge_two_frontiers(hyp_union, ref_union, "n", "n")
87
    frontr = []
88
    for f in ref_frontiers_collar:
89
        frontr.append(merge_two_frontiers(hr, f, "nn", "n"))
90
    return frontr
91
92
93
def make_frontiers(spk, spkmap, spkcount):
94
    rngs = [[] for i in range(spkcount)]
95
    for i in range(0, len(spk.speaker)):
96
        spki = spkmap[spk.speaker[i]]
97
        rngs[spki].append((spk.start_time[i], spk.end_time[i]))
98
    front = []
99
    for r in rngs:
100
        front.append(range_to_frontiers(r))
101
    return front
102
103
104
def make_union_frontiers(spk):
105
    rngs = []
106
    for i in range(0, len(spk.speaker)):
107
        rngs.append((spk.start_time[i], spk.end_time[i]))
108
    return range_to_frontiers(rngs)
109
110
111
def frontiers_add_collar(front, collar):
112
    cfront = []
113
    for f in front:
114
        a = f[1] - collar
115
        b = f[1] + collar
116
        if a < 0:
117
            a = 0
118
        if len(cfront) == 0 or a > cfront[-1][1]:
119
            cfront.append((f[0], a))
120
            cfront.append(("t", b))
121
        else:
122
            cfront[-1] = ("t", b)
123
    return cfront
124
125
126
def make_times(front):
127
    times = []
128
    for s in front:
129
        time = 0
130
        ptime = 0
131
        for p in s:
132
            if p[0] == "n":
133
                ptime = p[1]
134
            elif p[0] == "p":
135
                time += p[1] - ptime
136
        times.append(time)
137
    return times
138
139
140
def add_time(thyp, thyn, mode, eh, er, tc, efa, emiss, econf):
141
    if mode == "ppp":
142
        return eh, er + thyn, tc + thyp, efa, emiss, econf + thyn
143
    if mode == "ppn":
144
        return eh + thyp, er, tc, efa, emiss, econf
145
    if mode == "ppt":
146
        return eh, er, tc + thyp, efa, emiss, econf
147
    if mode == "pnn":
148
        return eh + thyp, er, tc, efa + thyp, emiss, econf
149
    if mode == "pnt":
150
        return eh, er, tc + thyp, efa, emiss, econf
151
    if mode == "npp":
152
        return eh, er + thyn, tc, efa, emiss + thyn, econf
153
    # npn npt nnn nnt
154
    return eh, er, tc, efa, emiss, econf
155
156
157
def compute_times(frontr, fronth):
158
    eh = 0
159
    er = 0
160
    rc = 0
161
    efa = 0
162
    emiss = 0
163
    econf = 0
164
    hpos = 0
165
    tbeg = 0
166
    thyp = 0
167
    hypbef = 0
168
    for f in frontr:
169
        tend = f[1]
170
        while hpos < len(fronth):
171
            dinter = min(fronth[hpos][1], tend)
172
            if fronth[hpos][0] == "p":
173
                thyp += dinter - hypbef
174
            if fronth[hpos][1] > tend:
175
                break
176
            hypbef = dinter
177
            hpos += 1
178
        eh, er, rc, efa, emiss, econf = add_time(
179
            thyp, tend - tbeg - thyp, f[0], eh, er, rc, efa, emiss, econf
180
        )
181
182
        if hpos < len(fronth):
183
            hypbef = min(fronth[hpos][1], tend)
184
        tbeg = tend
185
        thyp = 0
186
    while hpos < len(fronth):
187
        if fronth[hpos][0] == "p":
188
            thyp += fronth[hpos][1] - tbeg
189
        tbeg = fronth[hpos][1]
190
        hpos += 1
191
    eh, er, rc, efa, emiss, econf = add_time(
192
        thyp, 0, "pnn", eh, er, rc, efa, emiss, econf
193
    )
194
    return (
195
        round(eh, 3),
196
        round(er, 3),
197
        round(rc, 3),
198
        round(efa, 3),
199
        round(emiss, 3),
200
        round(econf, 3),
201
    )
202
203
204
def compute_miss(funion, front):
205
    miss = []
206
    for f1 in front:
207
        hpos = 0
208
        tbeg = 0
209
        thyp = 0
210
        hypbef = 0
211
        fa = 0
212
        for f in funion:
213
            tend = f[1]
214
            while hpos < len(f1):
215
                dinter = min(f1[hpos][1], tend)
216
                if f1[hpos][0] == "p":
217
                    thyp += dinter - hypbef
218
                if f1[hpos][1] > tend:
219
                    break
220
                hypbef = dinter
221
                hpos += 1
222
            if f[0] == "n":
223
                fa += thyp
224
            if hpos < len(f1):
225
                hypbef = min(f1[hpos][1], tend)
226
            tbeg = tend
227
            thyp = 0
228
        while hpos < len(f1):
229
            if f1[hpos][0] == "p":
230
                thyp += f1[hpos][1] - tbeg
231
            tbeg = f1[hpos][1]
232
            hpos += 1
233
        fa += thyp
234
        fa = round(fa, 3)
235
        miss.append(fa)
236
    return miss
237
238
239
def accumulate_confusion(fref, fhyp, map_rh, map_hr):
240
    ref_spkcount = len(fref)
241
    hyp_spkcount = len(fhyp)
242
    correct_ref = [0] * ref_spkcount
243
    correct_hyp = [0] * hyp_spkcount
244
    lost_ref = [0] * ref_spkcount
245
    lost_hyp = [0] * hyp_spkcount
246
    confusion_matrix = np.zeros((ref_spkcount, hyp_spkcount), dtype="float64")
247
    fri = [0] * ref_spkcount
248
    fhi = [0] * hyp_spkcount
249
    cur_time = 0
250
    while True:
251
        ridx = []
252
        r_is_t = []
253
        hidx = []
254
        time = -1
255
256
        # Build the list of who is in the segment
257
        for i in range(ref_spkcount):
258
            if fri[i] != len(fref[i]):
259
                cf = fref[i][fri[i]]
260
                if time == -1 or cf[1] < time:
261
                    time = cf[1]
262
                if cf[0] != "n":
263
                    ridx.append(i)
264
                    r_is_t.append(cf[0] == "t")
265
266
        for i in range(hyp_spkcount):
267
            if fhi[i] != len(fhyp[i]):
268
                cf = fhyp[i][fhi[i]]
269
                if time == -1 or cf[1] < time:
270
                    time = cf[1]
271
                if cf[0] != "n":
272
                    hidx.append(i)
273
274
        if time == -1:
275
            break
276
277
        # Only do the computations when there's something to do
278
        if len(ridx) > 0 or len(hidx) > 0:
279
            duration = time - cur_time
280
281
            # Hyp and ref mapped together end up in correct time and are removed from the lists
282
            i = 0
283
            while i != len(ridx):
284
                r = ridx[i]
285
                h = map_rh[r]
286
                dropped = False
287
                if h != -1:
288
                    slot = -1
289
                    for j in range(len(hidx)):
290
                        if hidx[j] == h:
291
                            slot = j
292
                            break
293
                    if slot != -1:
294
                        correct_ref[r] += duration
295
                        correct_hyp[h] += duration
296
                        ridx.pop(i)
297
                        r_is_t.pop(i)
298
                        hidx.pop(slot)
299
                        dropped = True
300
                if not dropped:
301
                    i += 1
302
303
            # Ref in transition is removed from the list if mapped to some hyp
304
            i = 0
305
            while i != len(ridx):
306
                r = ridx[i]
307
                if r_is_t[i] and map_rh[r] != -1:
308
                    ridx.pop(i)
309
                    r_is_t.pop(i)
310
                else:
311
                    i += 1
312
313
            if len(hidx) == 0:
314
                # If there's no hyp, we're all in lost_ref
315
                for r in ridx:
316
                    lost_ref[r] += duration
317
318
            elif len(ridx) == 0:
319
                # If there's no ref, we're all in lost_hyp
320
                for h in hidx:
321
                    lost_hyp[h] += duration
322
323
            else:
324
                # Otherwise we're in confusion.  Amount of confusion time to give
325
                # is equal to the max of the ref and hyp times
326
                conf_time = max(len(ridx), len(hidx)) * duration
327
328
                # Number of slots, otoh, is equal to the product of the number of
329
                # refs and hyps
330
                conf_slots = len(ridx) * len(hidx)
331
332
                # Give the time equally in all slots
333
                conf_one_time = conf_time / conf_slots
334
                for r in ridx:
335
                    for h in hidx:
336
                        confusion_matrix[r, h] += conf_one_time
337
338
        # Step all the done segments
339
        for r in range(ref_spkcount):
340
            if fri[r] != len(fref[r]) and fref[r][fri[r]][1] == time:
341
                fri[r] += 1
342
        for h in range(hyp_spkcount):
343
            if fhi[h] != len(fhyp[h]) and fhyp[h][fhi[h]][1] == time:
344
                fhi[h] += 1
345
        cur_time = time
346
347
    return correct_ref, correct_hyp, lost_ref, lost_hyp, confusion_matrix
348
349
350
def compute_der(ref, hyp, uem, collar, cost):
351
    ref_spkmap, ref_spkcount = make_spkmap(ref)
352
    hyp_spkmap, hyp_spkcount = make_spkmap(hyp)
353
354
    ref_frontiers = filter_frontiers_on_uem(make_frontiers(ref, ref_spkmap, ref_spkcount), uem)
355
    hyp_frontiers = filter_frontiers_on_uem(make_frontiers(hyp, hyp_spkmap, hyp_spkcount), uem)
356
    ref_frontiers_collar = []
357
    for front in ref_frontiers:
358
        ref_frontiers_collar.append(filter_frontier_on_uem(frontiers_add_collar(front, collar), uem))
359
360
    ref_union = filter_frontier_on_uem(make_union_frontiers(ref), uem)
361
    hyp_union = filter_frontier_on_uem(make_union_frontiers(hyp), uem)
362
363
    merge_frontiers = make_merge_frontier(hyp_union, ref_union, ref_frontiers_collar)
364
365
    ref_times = make_times(ref_frontiers)
366
    hyp_times = make_times(hyp_frontiers)
367
368
    eh = np.zeros((ref_spkcount, hyp_spkcount), dtype="float64")
369
    er = np.zeros((ref_spkcount, hyp_spkcount), dtype="float64")
370
    tc = np.zeros((ref_spkcount, hyp_spkcount), dtype="float64")
371
    efa = np.zeros((ref_spkcount, hyp_spkcount), dtype="float64")
372
    emiss = np.zeros((ref_spkcount, hyp_spkcount), dtype="float64")
373
    econf = np.zeros((ref_spkcount, hyp_spkcount), dtype="float64")
374
    de = np.zeros((ref_spkcount, hyp_spkcount), dtype="float64")
375
376
    opt_size = max(ref_spkcount, hyp_spkcount)
377
    costs = np.zeros((opt_size, opt_size), dtype="float64")
378
379
    miss_hyp = compute_miss(ref_union, hyp_frontiers)
380
    miss_ref = compute_miss(hyp_union, ref_frontiers)
381
382
    for r in range(ref_spkcount):
383
        for h in range(hyp_spkcount):
384
            (
385
                eh[r, h],
386
                er[r, h],
387
                tc[r, h],
388
                efa[r, h],
389
                emiss[r, h],
390
                econf[r, h],
391
            ) = compute_times(merge_frontiers[r], hyp_frontiers[h])
392
            de[r, h] = (
393
                ref_times[r] + miss_hyp[h] - efa[r, h] - emiss[r, h] - econf[r, h]
394
            )
395
            costs[r, h] = -round(de[r, h] * 1000)
396
397
    (map1, map2) = linear_sum_assignment(costs)
398
    map_rh = [-1] * ref_spkcount
399
    map_hr = [-1] * hyp_spkcount
400
    for i1 in range(0, opt_size):
401
        i = map1[i1]
402
        j = map2[i1]
403
        if (
404
            i < ref_spkcount
405
            and j < hyp_spkcount
406
            and de[i, j] > 0
407
            and tc[i, j] > 0
408
        ):
409
            map_rh[i] = j
410
            map_hr[j] = i
411
412
    ref_mixed_frontiers = []
413
    for r in range(ref_spkcount):
414
        if map_rh[r] == -1:
415
            ref_mixed_frontiers.append(ref_frontiers[r])
416
        else:
417
            ref_mixed_frontiers.append(ref_frontiers_collar[r])
418
419
    (
420
        correct_ref,
421
        correct_hyp,
422
        lost_ref,
423
        lost_hyp,
424
        confusion_matrix,
425
    ) = accumulate_confusion(ref_mixed_frontiers, hyp_frontiers, map_rh, map_hr)
426
427
    conf = 0
428
    for r in range(ref_spkcount):
429
        for h in range(hyp_spkcount):
430
            conf += confusion_matrix[r, h]
431
    totaltime = 0
432
    miss = 0
433
    for r in range(ref_spkcount):
434
        totaltime += ref_times[r]
435
        miss += lost_ref[r]
436
    fa = 0
437
    for h in range(hyp_spkcount):
438
        fa += lost_hyp[h]
439
440
    return 100 * (fa + miss + conf + cost) / totaltime, totaltime
441
442
443
class Algorithm:
444
    # initialise fields to store cross-input data (e.g. machines, aggregations, etc.)
445
    def __init__(self):
446
        pass
447
448
    # do initial setup work with the given parameters for the algorithm
449
    def setup(self, parameters):
450
        self.names = parameters.get("names", False)
451
        self.collar = parameters.get("collar", 0.25)
452
        return True
453
454
    # this will be called each time the sync'd input has more data available to be processed
455
    def process(self, inputs, dataloader, outputs):
456
        hyp = inputs["hypothesis"].data
457
        ref = inputs["reference"].data
458
        uem = inputs["uem"].data
459
        cost = inputs["cost"].data.value
460
461
        der, reftime = compute_der(ref, hyp, uem, self.collar, cost)
462
463
        outputs["DER"].write({"value": der})
464
        outputs["speech_duration"].write({"value": reftime})
465
        return True
466

The code for this algorithm in Python
The ruler at 80 columns indicate suggested POSIX line breaks (for readability).
The editor will automatically enlarge to accomodate the entirety of your input
Use keyboard shortcuts for search/replace and faster editing. For example, use Ctrl-F (PC) or Cmd-F (Mac) to search through this box

Could not find any documentation for this object.
No experiments are using this algorithm.
Created with Raphaël 2.1.2[compare]anthony_larcher/DER_evaluation_cost/12020Mar9

This table shows the number of times this algorithm has been successfully run using the given environment. Note this does not provide sufficient information to evaluate if the algorithm will run when submitted to different conditions.

Terms of Service | Contact Information | BEAT platform version 2.2.1b0 | © Idiap Research Institute - 2013-2025