Coverage for src/bob/measure/

1#!/usr/bin/env python

2# coding=utf-8

4"""Various functions for performance assessment

6Most of these were imported from older C++ implementations.

7"""

9import logging

11from functools import wraps

13import numpy

14import numpy.linalg

16from numba import jit, objmode

18logger = logging.getLogger(__name__)

21def _lists_to_arrays(*args, **kwargs):

22 ret, retkw = list(), dict()

23 for v in args:

24 ret.append(numpy.asarray(v) if isinstance(v, list) else v)

25 for k, v in kwargs.items():

26 retkw[k] = numpy.asarray(v) if isinstance(v, list) else v

27 return ret, retkw

30def array_jit(func):

31 jit_func = jit(func, nopython=True)

33 @wraps(jit_func)

34 def new_func(*args, **kwargs):

35 args, kwargs = _lists_to_arrays(*args, **kwargs)

36 return jit_func(*args, **kwargs)

38 new_func.jit_func = jit_func

39 return new_func

42@jit(nopython=True)

43def _log_values(points, min_power):

44 """Computes log-scaled values between :math:`10^\text{min_power}` and 1

46 Parameters

47 ==========

49 points : int

50 Number of points to consider

52 min_power : int

53 Negative integer with the minimum power

56 Returns

57 =======

59 logscale : numpy.ndarray (float, 1D)

61 A set of numbers forming a logarithm-based scale between

62 :math:`10^\text{min_power}` and 1.

64 """

65 return 10 ** (numpy.arange(1 - points, 1) / int(points / (-min_power)))

68@jit(nopython=True)

69def _meaningful_thresholds(negatives, positives, points, min_far, is_sorted):

70 """Returns non-repeatitive thresholds to generate ROC curves

72 This function creates a list of FAR (and FRR) values that we are

73 interesting to see on the curve. Computes thresholds for those points.

74 Sorts the thresholds so we get sorted numbers to plot on the curve and

75 returns the thresholds. Some points will be duplicate but in terms of

76 resolution and accuracy this is better than just changing the threshold

77 from ``min()`` of scores to ``max()`` of scores with equal spaces.

80 Parameters

81 ==========

83 negatives, positives : numpy.ndarray (1D, float)

85 The negative and positive scores, for which the meaningful threshold

86 will be calculated.

88 n_points : int

90 The number of points, in which the ROC curve are calculated, which are

91 distributed uniformly in the range ``[min(negatives, positives),

92 max(negatives, positives)]``

94 min_far : int

96 Minimum FAR in terms of :math:`10^(\text{min_far}`. This value is also

97 used for ``min_frr``. Values should be negative.

100 is_sorted : bool

101

102 Set this to ``True`` if both sets of scores are already sorted in

103 ascending order. If ``False``, scores will be sorted internally, which

104 will require more memory.

105

106

107 Returns

108 =======

109

110 thresholds : numpy.ndarray (1D, float)

111

112 The "meaningful" thresholds that would cause changes in the ROC.

113

114 """

115

116 half_points = points // 2

117

118 # if not pre-sorted, copies and sorts

119 neg = negatives if is_sorted else numpy.sort(negatives)

120 pos = positives if is_sorted else numpy.sort(positives)

121

122 frr_list = _log_values(half_points, min_far)

123 far_list = _log_values(points - half_points, min_far)

124

125 t = numpy.zeros((points,))

126 t[:half_points] = [_jit_frr_threshold(neg, pos, k, True) for k in frr_list]

127 t[half_points:] = [_jit_far_threshold(neg, pos, k, True) for k in far_list]

129 t.sort()

131 return t

134def correctly_classified_negatives(negatives, threshold):

135 """Evaluates correctly classifed negatives in a set, based on a threshold

136

137 This method returns an array composed of booleans that pin-point, which

138 negatives where correctly classified for the given threshold

139

140 The pseudo-code for this function is:

141

142 .. code-block:: python

143

144 classified = []

145 for k in negatives:

146 if k < threshold:

147 classified.append(True)

148 else:

149 classified.append(False)

150

151

152 Parameters

153 ==========

154

155 negatives : numpy.ndarray (1D, float)

156

157 The scores generated by comparing objects of different classes

158

159 threshold : float

160

161 The threshold, for which scores should be considered to be

162 correctly classified

163

164

165 Returns

166 =======

167

168 classified : numpy.ndarray (1D, bool)

169

170 The decision for each of the ``negatives``

171

172 """

173 return negatives < threshold

174

175

176def correctly_classified_positives(positives, threshold):

177 """Evaluates correctly classifed positives in a set, based on a threshold

178

179 This method returns an array composed of booleans that pin-point, which

180 positives where correctly classified for the given threshold

181

182 The pseudo-code for this function is:

183

184 .. code-block:: python

185

186 classified = []

187 for k in positives:

188 if k >= threshold:

189 classified.append(True)

190 else:

191 classified.append(False)

192

193

194 Parameters

195 ==========

196

197 positives : numpy.ndarray (1D, float)

198

199 The scores generated by comparing objects of different classes

200

201 threshold : float

202

203 The threshold, for which scores should be considered to be

204 correctly classified

205

206

207 Returns

208 =======

209

210 classified : numpy.ndarray (1D, bool)

211

212 The decision for each of the ``positives``

213

214 """

215 return positives >= threshold

216

217

218def det(negatives, positives, n_points, min_far=-8):

219 """Calculates points of an Detection Error-Tradeoff (DET) curve

220

221 Calculates the DET curve given a set of negative and positive scores and a

222 desired number of points. Returns a two-dimensional array of doubles that

223 express on its rows:

224

225 You can plot the results using your preferred tool to first create a plot

226 using rows 0 and 1 from the returned value and then replace the X/Y axis

227 annotation using a pre-determined set of tickmarks as recommended by NIST.

228 The derivative scales are computed with the :py:func:`ppndf` function.

229

230

231 Parameters

232 ==========

233

234 negatives, positives : numpy.ndarray (1D, float)

235

236 The list of negative and positive scores to compute the DET for

237

238 n_points : int

239

240 The number of points on the DET curve, for which the DET should be

241 evaluated

242

243 min_far : :class:`int`, Optional

244

245 Minimum FAR in terms of :math:`10^\text{min_far}`. This value is also

246 used for ``min_frr``. Values should be negative.

247

248

249 Returns

250 =======

251

252 curve : numpy.ndarray (2D, float)

253

254 The DET curve, with the FPR in the first and the FNR in the second

255 row:

256

257 0. X axis values in the normal deviate scale for the false-accepts

258 1. Y axis values in the normal deviate scale for the false-rejections

259

260 """

261 return ppndf(roc(negatives, positives, n_points, min_far))

262

263

264def pavx_1(y, ghat, index, len):

265 """Calculates the pavx_1 function

266

267 Calculates the pavx_1 function given a set of negative and positive scores

268 and a desired number of points. Returns a two-dimensional array of doubles

269 that express on its rows:

270

271 You can plot the results using your preferred tool to first create a plot

272 using rows 0 and 1 from the returned value and then replace the X/Y axis

273 annotation using a pre-determined set of tickmarks as recommended by NIST.

274 The derivative scales are computed with the :py:func:`ppndf` function.

275

276

277 Parameters

278 ==========

279

280 y : numpy.ndarray (1D, float)

281

282 The list of negative and positive scores to compute the DET for

283

284 ghat : numpy.ndarray (1D, float)

285

286 The list of mean values calculated in the pavx_1 function

287

288 index : numpy.ndarray (1D, size_t)

289

290 The list of indices calculated in the pavx_1 function

291

292 len : numpy.ndarray (1D, size_t)

293

294 The list of lengths calculated in the pavx_1 function

295

296

297 Returns

298 =======

299

300 ci : size_t

301

302 The index of the interval currently considered

303

304 """

305 # Sets output and working arrays to 0

306 index = 0

307 len = 0

308 ghat = 0.0

309 # ci is the index of the interval currently considered

310 # ghat(ci) is the mean of y-values within this interval

311 ci = 0

312 index[0] = 0

313 len[0] = 1

314 ghat[0] = y[0]

315 for j in range(1, y.shape[0]):

316 # a new index interval "j" is created:

317 ci += 1

318 index[ci] = j

319 len[ci] = 1

320 ghat[ci] = y[j]

321 while ci >= 1 and ghat[numpy.max(ci - 1, 0)] >= ghat[ci]:

322 # "pool adjacent violators"

323 nw = len[ci - 1] + len[ci]

324 ghat[ci - 1] += (len[ci] / nw) * (ghat[ci] - ghat[ci - 1])

325 len[ci - 1] = nw

326 ci -= 1

327 return ci

328

329

330def pavx_2(ghat, index, ci):

331 """Calculates the pavx_2 function

332

333 Calculates the pavx_2 function given the pavx_1 function.

334

335 Parameters

336 ==========

337

338 ghat : numpy.ndarray (1D, float)

339

340 The list of mean values calculated in the pavx_1 function

341

342 index : numpy.ndarray (1D, size_t)

343

344 The list of indices calculated in the pavx_1 function

345

346 ci : size_t

347

348 The index of the interval currently considered

349

350 """

351 # define ghat for all indices

352 n = index[ci]

353 while n >= 1:

354 r = numpy.array(range(index[ci], n))

355 ghat[r] = ghat[ci]

356 n = index[ci]

357 ci -= 1

358 return ghat

359

360

361def pavxWidth(input, output):

362 """Applies the Pool-Adjacent-Violators Algorithm and returns the width.

363

364 This is a simplified C++ port of the isotonic regression code made available at the University of Bern website.

365

366 Parameters

367 ==========

368

369 input : array_like (float, 1D)

370

371 The input matrix for the PAV algorithm.

372

373 output : array_like (float, 1D)

374

375 The output matrix, must be of the same size as input

376

377 Returns

378 =======

379

380 width : array_like (uint64, 1D)

381

382 The width matrix will be created in the same size as input

383 """

384 input = numpy.array(input)

385 output = numpy.array(output)

386

387 # Define working arrays: An interval of indices is represented by its left

388 # endpoint "index" and its length "len"

389 N = input.shape[0]

390 index = numpy.zeros(N, dtype=numpy.uint64)

391 len = numpy.zeros(N, dtype=numpy.uint64)

392

393 # First step

394 ci = pavx_1(input, output, index, len)

395

396 # Get the width vector

397 width = len[: ci + 1]

398

399 # Second step

400 pavx_2(output, index, ci)

401

402 return width

403

404

405def rocch(negatives, positives):

406 """Calculates the ROC Convex Hull (ROCCH) curve given a set of positive and negative scores

407

408

409 Parameters

410 ==========

411

412 negatives, positives : numpy.ndarray (1D, float)

413

414 The set of negative and positive scores to compute the curve

415

416

417 Returns

418 =======

419

420 curve : numpy.ndarray (2D, float)

421

422 The ROC curve, with the first row containing the FPR, and the second

423 row containing the FNR.

424

425 """

426

427 # Number of positive and negative scores

428 Nt = len(positives)

429 Nn = len(negatives)

430 N = Nt + Nn

431

432 # Creates a big array with all scores

433 scores = numpy.concatenate((positives, negatives))

434

435 # It is important here that scores that are the same (i.e. already in

436 # order) should NOT be swapped. "stable" has this property.

437 perturb = numpy.argsort(scores, kind="stable")

438

439 # Apply permutation

440 Pideal = numpy.zeros((N,))

441 Pideal[perturb < Nt] = 1.0

442

443 # Applies the PAVA algorithm

444 Popt = numpy.ndarray((N,))

445 raise NotImplementedError(

446 "An auto generated implementation of pavxWidth is available but no test has been done."

447 )

448 width = pavxWidth(Pideal, Popt)

449

450 # Allocates output

451 nbins = len(width)

452 retval = numpy.zeros((2, nbins + 1)) # FAR, FRR

453

454 # Fills in output

455 left = 0

456 fa = Nn

457 miss = 0

458

459 for i in range(nbins):

460 retval[0, i] = fa / Nn # pfa

461 retval[1, i] = miss / Nt # pmiss

462 left += int(width[i])

463

464 if left >= 1:

465 miss = Pideal[:left].sum()

466 else:

467 miss = 0.0

468

469 if Pideal.shape[0] - 1 >= left:

470 fa = N - left - Pideal[left:].sum()

471 else:

472 fa = 0

473

474 retval[0, nbins] = fa / Nn # pfa

475 retval[1, nbins] = miss / Nt # pmiss

476

477 return retval

478

479

480@array_jit

481def rocch2eer(pmiss_pfa):

482 """Calculates the threshold that is as close as possible to the equal-error-rate (EER) given the input data

483

484

485 .. todo::

486

487 The parameter(s) ``pmiss_pfa`` are used, but not documented.

488

489

490 Returns

491 =======

492

493 threshold : float

494

495 The computed threshold, at which the EER can be obtained

496

497 """

498

499 assert pmiss_pfa.shape[0] == 2

500

501 N = pmiss_pfa.shape[1]

502

503 eer = 0.0

504 XY = numpy.empty((2, 2))

505 one = numpy.ones((2,))

506 eerseg = 0.0

507 epsilon = numpy.finfo(numpy.float64).eps

508

509 for i in range(N - 1):

510 # Define XY matrix

511 XY[0, 0] = pmiss_pfa[0, i] # pfa

512 XY[1, 0] = pmiss_pfa[0, i + 1] # pfa

513 XY[0, 1] = pmiss_pfa[1, i] # pmiss

514 XY[1, 1] = pmiss_pfa[1, i + 1] # pmiss

515

516 # xx and yy should be sorted:

517 assert XY[1, 0] <= XY[0, 0] and XY[0, 1] <= XY[1, 1]

518

519 # Commpute "dd"

520 abs_dd0 = abs(XY[0, 0] - XY[1, 0])

521 abs_dd1 = abs(XY[0, 1] - XY[1, 1])

522

523 if min(abs_dd0, abs_dd1) < epsilon:

524 eerseg = 0.0

525

526 else:

527 # Finds line coefficients seg s.t. XY.seg = 1

528 seg = numpy.linalg.solve(XY, one)

529 # Candidate for the EER (to be compared to current value)

530 eerseg = 1.0 / seg.sum()

531

532 eer = max(eer, eerseg)

533

534 return eer

535

536

537def eer_rocch(negatives, positives):

538 """Equal-error-rate (EER) given the input data, on the ROC Convex Hull

539

540 It replicates the EER calculation from the Bosaris toolkit

541 (https://sites.google.com/site/bosaristoolkit/).

542

543

544 Parameters

545 ==========

546

547 negatives : numpy.ndarray (1D, float)

548

549 The set of negative scores to compute the threshold

550

551 positives : numpy.ndarray (1D, float)

552

553 The set of positive scores to compute the threshold

554

555

556 Returns

557 =======

558

559 threshold : float

560

561 The threshold for the equal error rate

562

563 """

564 return rocch2eer(rocch(negatives, positives))

565

566

567@jit("float64(float64, float64, float64)", nopython=True)

568def _abs_diff(a, b, cost):

569 return abs(a - b)

570

571

572@jit("float64(float64, float64, float64)", nopython=True)

573def _weighted_err(far, frr, cost):

574 return (cost * far) + ((1.0 - cost) * frr)

575

576

577@jit(nopython=True)

578def _minimizing_threshold(negatives, positives, criterion, cost=0.5):

579 """Calculates the best threshold taking a predicate as input condition

580

581 This method can calculate a threshold based on a set of scores (positives

582 and negatives) given a certain minimization criterium, input as a

583 functional predicate. For a discussion on ``positive`` and ``negative`` see

584 :py:func:`farfrr`. Here, it is expected that the positives and the

585 negatives are sorted ascendantly.

586

587 The predicate method gives back the current minimum given false-acceptance

588 (FA) and false-rejection (FR) ratios for the input data. The API for the

589 criterium is:

590

591 predicate(fa_ratio : float, fr_ratio : float) -> float

592

593 Please note that this method will only work with single-minimum smooth

594 predicates.

595

596 The minimization is carried out in a data-driven way. Starting from the

597 lowest score (might be a positive or a negative), it increases the

598 threshold based on the distance between the current score and the following

599 higher score (also keeping track of duplicate scores) and computes the

600 predicate for each possible threshold.

601

602 Finally, that threshold is returned, for which the predicate returned the

603 lowest value.

604

605

606 Parameters

607 ==========

608

609 negatives : numpy.ndarray (1D, float)

610 Negative scores, sorted ascendantly

611

612 positives : numpy.ndarray (1D, float)

613 Positive scores, sorted ascendantly

614

615 criterion : str

616 A predicate from one of ("absolute-difference", "weighted-error")

617

618 cost : float

619 Extra cost argument to be passed to criterion

620

621 Returns

622 =======

623

624 threshold : float

625 The optimal threshold given the predicate and the scores

626

627 """

628 if criterion not in ("absolute-difference", "weighted-error"):

629 raise ValueError("Uknown criterion")

630

631 def criterium(a, b, c):

632 if criterion == "absolute-difference":

633 return _abs_diff(a, b, c)

634 else:

635 return _weighted_err(a, b, c)

636

637 if not len(negatives) or not len(positives):

638 raise RuntimeError(

639 "Cannot compute threshold when no positives or "

640 "no negatives are provided"

641 )

642

643 # iterates over all possible far and frr points and compute the predicate

644 # for each possible threshold...

645 min_predicate = 1e8

646 min_threshold = 1e8

647 current_predicate = 1e8

648 # we start with the extreme values for far and frr

649 far = 1.0

650 frr = 0.0

651

652 # the decrease/increase for far/frr when moving one negative/positive

653 max_neg = len(negatives)

654 far_decrease = 1.0 / max_neg

655 max_pos = len(positives)

656 frr_increase = 1.0 / max_pos

657

658 # we start with the threshold based on the minimum score

659

660 # iterates until one of these goes bananas

661 pos_it = 0

662 neg_it = 0

663 current_threshold = min(negatives[neg_it], positives[pos_it])

664

665 # continues until one of the two iterators reaches the end of the list

666 while pos_it < max_pos and neg_it < max_neg:

667 # compute predicate

668 current_predicate = criterium(far, frr, cost)

669

670 if current_predicate <= min_predicate:

671 min_predicate = current_predicate

672 min_threshold = current_threshold

673

674 if positives[pos_it] >= negatives[neg_it]:

675 # compute current threshold

676 current_threshold = negatives[neg_it]

677 neg_it += 1

678 far -= far_decrease

679

680 else: # pos_val <= neg_val

681 # compute current threshold

682 current_threshold = positives[pos_it]

683 pos_it += 1

684 frr += frr_increase

685

686 # skip until next "different" value, which case we "gain" 1 unit on

687 # the "FAR" value, since we will be accepting that negative as a

688 # true negative, and not as a false positive anymore. we continue

689 # to do so for as long as the current threshold matches the current

690 # iterator.

691 while neg_it < max_neg and current_threshold == negatives[neg_it]:

692 neg_it += 1

693 far -= far_decrease

694

695 # skip until next "different" value, which case we "loose" 1 unit

696 # on the "FRR" value, since we will be accepting that positive as a

697 # false negative, and not as a true positive anymore. we continue

698 # to do so for as long as the current threshold matches the current

699 # iterator.

700 while pos_it < max_pos and current_threshold == positives[pos_it]:

701 pos_it += 1

702 frr += frr_increase

703

704 # computes a new threshold based on the center between last and current

705 # score, if we are **not** already at the end of the score lists

706 if neg_it < max_neg or pos_it < max_pos:

707 if neg_it < max_neg and pos_it < max_pos:

708 current_threshold += min(negatives[neg_it], positives[pos_it])

709 elif neg_it < max_neg:

710 current_threshold += negatives[neg_it]

711 else:

712 current_threshold += positives[pos_it]

713 current_threshold /= 2

714

715 # now, we have reached the end of one list (usually the negatives) so,

716 # finally compute predicate for the last time

717 current_predicate = criterium(far, frr, cost)

718 if current_predicate < min_predicate:

719 min_predicate = current_predicate

720 min_threshold = current_threshold

721

722 # now we just double check choosing the threshold higher than all scores

723 # will not improve the min_predicate

724 if neg_it < max_neg or pos_it < max_pos:

725 last_threshold = current_threshold

726 if neg_it < max_neg:

727 last_threshold = numpy.nextafter(negatives[-1], negatives[-1] + 1)

728 elif pos_it < max_pos:

729 last_threshold = numpy.nextafter(positives[-1], positives[-1] + 1)

730 current_predicate = criterium(0.0, 1.0, cost)

731 if current_predicate < min_predicate:

732 min_predicate = current_predicate

733 min_threshold = last_threshold

734

735 # return the best threshold found

736 return min_threshold

737

738

739def eer_threshold(negatives, positives, is_sorted=False):

740 """Calculates threshold as close as possible to the equal error rate (EER)

741

742 The EER should be the point where the FPR equals the FNR. Graphically, this

743 would be equivalent to the intersection between the ROC (or DET) curves and

744 the identity.

745

746 .. note::

747

748 The scores will be sorted internally, requiring the scores to be copied.

749 To avoid this copy, you can sort both sets of scores externally in

750 ascendant order, and set the ``is_sorted`` parameter to ``True``.

751

752

753 Parameters

754 ==========

755

756 negatives : numpy.ndarray (1D, float)

757

758 The set of negative scores to compute the threshold

759

760 positives : numpy.ndarray (1D, float)

761

762 The set of positive scores to compute the threshold

763

764 is_sorted : :py:class:`bool`, Optional

765

766 Set this to ``True`` if the ``negatives`` are already sorted in

767 ascending order. If ``False``, scores will be sorted internally, which

768 will require more memory.

769

770

771 Returns

772 =======

773

774 threshold : float

775

776 The threshold (i.e., as used in :py:func:`farfrr`) where FPR and FNR

777 are as close as possible

778

779 """

780

781 # if not pre-sorted, copies and sorts

782 neg = negatives if is_sorted else numpy.sort(negatives)

783 pos = positives if is_sorted else numpy.sort(positives)

784

785 return _minimizing_threshold(neg, pos, "absolute-difference")

786

787

788@array_jit

789def epc(

790 dev_negatives,

791 dev_positives,

792 test_negatives,

793 test_positives,

794 n_points,

795 is_sorted=False,

796 thresholds=False,

797):

798 """Calculates points of an Expected Performance Curve (EPC)

799

800 Calculates the EPC curve given a set of positive and negative scores and a

801 desired number of points. Returns a two-dimensional

802 :py:class:`numpy.ndarray` of type float with the shape of ``(2, points)``

803 or ``(3, points)`` depending on the ``thresholds`` argument. The rows

804 correspond to the X (cost), Y (weighted error rate on the test set given

805 the min. threshold on the development set), and the thresholds which were

806 used to calculate the error (if the ``thresholds`` argument was set to

807 ``True``), respectively. Please note that, in order to calculate the EPC

808 curve, one needs two sets of data comprising a development set and a test

809 set. The minimum weighted error is calculated on the development set and

810 then applied to the test set to evaluate the weighted error rate at that

811 position.

812

813 The EPC curve plots the HTER on the test set for various values of 'cost'.

814 For each value of 'cost', a threshold is found that provides the minimum

815 weighted error (see :py:func:`min_weighted_error_rate_threshold`) on the

816 development set. Each threshold is consecutively applied to the test set

817 and the resulting weighted error values are plotted in the EPC.

818

819 The cost points in which the EPC curve are calculated are distributed

820 uniformly in the range :math:`[0.0, 1.0]`.

821

822 .. note::

823

824 It is more memory efficient, when sorted arrays of scores are provided

825 and the ``is_sorted`` parameter is set to ``True``.

826

827

828 Parameters

829 ==========

830

831 dev_negatives : numpy.ndarray (1D, float)

832

833 Negative scores on the development set

834

835 dev_positives : numpy.ndarray (1D, float)

836

837 Positive scores on the development set

838

839 test_negatives : numpy.ndarray (1D, float)

840

841 Negative scores on the test set

842

843 test_positives : numpy.ndarray (1D, float)

844

845 Positive scores on the test set

846

847 n_points : int

848

849 The number of weights for which the EPC curve should be computed

850

851 is_sorted : :py:class:`bool`, Optional

852

853 Set this to ``True`` if the ``negatives`` are already sorted in

854 ascending order. If ``False``, scores will be sorted internally, which

855 will require more memory.

856

857 thresholds : :py:class:`bool`, Optional

858

859 If ``True`` the function returns an array with the shape of ``(3,

860 points)`` where the third row contains the thresholds that were

861 calculated on the development set.

862

863

864 Returns

865 =======

866

867 curve : numpy.ndarray (2D or 3D, float)

868

869 The EPC curve, with the first row containing the weights and the second

870 row containing the weighted errors on the test set. If ``thresholds``

871 is ``True``, there is also a third row which contains the thresholds

872 that were calculated on the development set.

873

874 """

875

876 # if not pre-sorted, copies and sorts

877 dev_neg = dev_negatives if is_sorted else numpy.sort(dev_negatives)

878 dev_pos = dev_positives if is_sorted else numpy.sort(dev_positives)

879 # numpy.linspace is more stable than numpy.arange for non-integer steps.

880 # However, both arange and linspace are buggy in numba. Using objmode for a

881 # workaround. TODO(amir): remove objmode once

882 # https://github.com/numba/numba/issues/6768 is resolved.

883 with objmode(alpha="float64[:]"):

884 alpha = numpy.linspace(0, 1.0, n_points)

885 thres = numpy.empty_like(alpha)

886 mwer = numpy.empty_like(alpha)

887 for i, k in enumerate(alpha):

888 thres[i] = _jit_min_weighted_error_rate_threshold(

889 dev_neg, dev_pos, k, True

890 )

891 tmp = _jit_farfrr(test_negatives, test_positives, thres[i])

892 tmp2 = numpy.empty((2,))

893 tmp2[0] = tmp[0]

894 tmp2[1] = tmp[1]

895 mwer[i] = numpy.mean(tmp2)

896

897 if thresholds:

898 return numpy.vstack((alpha, mwer, thres))

899 return numpy.vstack((alpha, mwer))

900

901

902def f_score(negatives, positives, threshold, weight=1.0):

903 r"""Computes the F-score of the accuracy of the classification

904

905 The F-score is a weighted mean of precision and recall measurements, see

906 :py:func:`precision_recall`. It is computed as:

907

908 .. math::

909

910 \mathrm{\text{f-score}}(w) = (1 + w^2)\frac{\mathrm{precision}\cdot{}\mathrm{recall}}{w^2\cdot{}\mathrm{precision} + \mathrm{recall}}

911

912 The weight :math:`w` needs to be non-negative real value. In case the

913 weight parameter is 1 (the default), the F-score is called F1 score and is

914 a harmonic mean between precision and recall values.

915

916

917 Parameters

918 ==========

919

920 negatives : numpy.ndarray (1D, float)

921

922 The set of negative scores to compute the precision and recall

923

924 positives : numpy.ndarray (1D, float)

925

926 The set of positive scores to compute the precision and recall

927

928 threshold : float

929

930 The threshold to compute the precision and recall for

931

932 weight : :py:class:`float`, Optional

933

934 The weight :math:`w` between precision and recall

935

936

937 Returns

938 =======

939

940 f_score : float

941

942 The computed f-score for the given scores and the given threshold

943

944 """

945 weight = weight if weight > 0 else 1

946 w2 = weight**2

947 p, r = precision_recall(negatives, positives, threshold)

948 if p == 0.0 and r == 0.0:

949 return 0.0

950 return (1 + w2) * (p * r) / ((w2 * p) + r)

951

952

953@array_jit

954def far_threshold(negatives, positives, far_value=0.001, is_sorted=False):

955 """Threshold such that the real FPR is **at most** the requested ``far_value`` if possible

956

957

958 .. note::

959

960 The scores will be sorted internally, requiring the scores to be copied.

961 To avoid this copy, you can sort the ``negatives`` scores externally in

962 ascendant order, and set the ``is_sorted`` parameter to ``True``.

963

964

965 Parameters

966 ==========

967

968 negatives : numpy.ndarray (1D, float)

969

970 The set of negative scores to compute the FPR threshold

971

972 positives : numpy.ndarray (1D, float)

973

974 Ignored, but needs to be specified -- may be given as ``[]``

975

976 far_value : :py:class:`float`, Optional

977

978 The FPR value, for which the threshold should be computed

979

980 is_sorted : :py:class:`bool`, Optional

981

982 Set this to ``True`` if the ``negatives`` are already sorted in

983 ascending order. If ``False``, scores will be sorted internally, which

984 will require more memory.

985

986

987 Returns

988 =======

989

990 threshold : float

991

992 The threshold such that the real FPR is at most ``far_value``

993

994 """

995

996 # N.B.: Unoptimized version ported from C++

997

998 if far_value < 0.0 or far_value > 1.0:

999 raise RuntimeError("`far_value' must be in the interval [0.,1.]")

1000

1001 if len(negatives) < 2:

1002 raise RuntimeError("the number of negative scores must be at least 2")

1003

1004 epsilon = numpy.finfo(numpy.float64).eps

1005 # if not pre-sorted, copies and sorts

1006 scores = negatives if is_sorted else numpy.sort(negatives)

1007

1008 # handles special case of far == 1 without any iterating

1009 if far_value >= (1 - epsilon):

1010 return numpy.nextafter(scores[0], scores[0] - 1)

1011

1012 # Reverse negatives so the end is the start. This way the code below will

1013 # be very similar to the implementation in the frr_threshold function. The

1014 # implementations are not exactly the same though.

1015 scores = numpy.flip(scores)

1016

1017 # Move towards the end of array changing the threshold until we cross the

1018 # desired FAR value. Starting with a threshold that corresponds to FAR ==

1019 # 0.

1020 total_count = len(scores)

1021 current_position = 0

1022

1023 # since the comparison is `if score >= threshold then accept as genuine`,

1024 # we can choose the largest score value + eps as the threshold so that we

1025 # can get for 0% FAR.

1026 valid_threshold = numpy.nextafter(

1027 scores[current_position], scores[current_position] + 1

1028 )

1029 current_threshold = 0.0

1030

1031 while current_position < total_count:

1032 current_threshold = scores[current_position]

1033 # keep iterating if values are repeated

1034 while (

1035 current_position < (total_count - 1)

1036 and scores[current_position + 1] == current_threshold

1037 ):

1038 current_position += 1

1039 # All the scores up to the current position and including the current

1040 # position will be accepted falsely.

1041 future_far = (current_position + 1) / total_count

1042 if future_far > far_value:

1043 break

1044 valid_threshold = current_threshold

1045 current_position += 1

1046

1047 return valid_threshold

1048

1049

1050_jit_far_threshold = far_threshold.jit_func

1051

1052

1053@array_jit

1054def farfrr(negatives, positives, threshold):

1055 """Calculates the false-acceptance (FA) ratio and the false-rejection (FR) ratio for the given positive and negative scores and a score threshold

1056

1057 ``positives`` holds the score information for samples that are labeled to

1058 belong to a certain class (a.k.a., 'signal' or 'client'). ``negatives``

1059 holds the score information for samples that are labeled **not** to belong

1060 to the class (a.k.a., 'noise' or 'impostor'). It is expected that

1061 'positive' scores are, at least by design, greater than 'negative' scores.

1062 So, every 'positive' value that falls bellow the threshold is considered a

1063 false-rejection (FR). `negative` samples that fall above the threshold are

1064 considered a false-accept (FA).

1065

1066 Positives that fall on the threshold (exactly) are considered correctly

1067 classified. Negatives that fall on the threshold (exactly) are considered

1068 **incorrectly** classified. This equivalent to setting the comparison like

1069 this pseudo-code:

1070

1071 .. code-block:: python

1072

1073 false_rejects = 0

1074 false_accepts = 0

1075 for k in positives:

1076 if k < threshold:

1077 false_rejects += 1

1078 for k in negatives:

1079 if k >= threshold:

1080 false_accepts += 1

1081

1082

1083 The output is in form of a tuple of two double-precision float numbers.

1084 The numbers range from 0 to 1. The first element of the pair is the false

1085 positive ratio (FPR), the second element the false negative ratio (FNR).

1086

1087 The ``threshold`` value does not necessarily have to fall in the range

1088 covered by the input scores (negatives and positives altogether), but if it

1089 does not, the output will be either (1.0, 0.0) or (0.0, 1.0), depending on

1090 the side the threshold falls.

1091

1092 It is possible that scores are inverted in the negative/positive sense. In

1093 some setups the designer may have setup the system so 'positive' samples

1094 have a smaller score than the 'negative' ones. In this case, make sure you

1095 normalize the scores so positive samples have greater scores before feeding

1096 them into this method.

1097

1098

1099 Parameters

1100 ==========

1101

1102 negatives : numpy.ndarray (1D, float)

1103

1104 The scores for comparisons of objects of different classes

1105

1106 positives : numpy.ndarray (1D, float)

1107

1108 The scores for comparisons of objects of the same class

1109

1110 threshold : float

1111

1112 The threshold to separate correctly and incorrectly classified scores

1113

1114

1115 Returns

1116 =======

1117

1118 far : float

1119

1120 The False Positve Rate (FPR) for the given threshold

1121

1122 frr : float

1123

1124 The False Negative Rate (FNR) for the given threshold

1125

1126 """

1127

1128 if numpy.isnan(threshold):

1129 print("Error: Cannot compute FPR (FAR) or FNR (FRR) with NaN threshold")

1130 return (1.0, 1.0)

1131

1132 if not len(negatives):

1133 raise RuntimeError(

1134 "Cannot compute FPR (FAR) when no negatives are given"

1135 )

1136

1137 if not len(positives):

1138 raise RuntimeError(

1139 "Cannot compute FNR (FRR) when no positives are given"

1140 )

1141

1142 return (negatives >= threshold).sum() / len(negatives), (

1143 positives < threshold

1144 ).sum() / len(positives)

1145

1146

1147_jit_farfrr = farfrr.jit_func

1148

1149

1150@array_jit

1151def frr_threshold(negatives, positives, frr_value=0.001, is_sorted=False):

1152 """Computes the threshold such that the real FNR is **at most** the requested ``frr_value`` if possible

1153

1154

1155 .. note::

1156

1157 The scores will be sorted internally, requiring the scores to be copied.

1158 To avoid this copy, you can sort the ``positives`` scores externally in

1159 ascendant order, and set the ``is_sorted`` parameter to ``True``.

1160

1161

1162 Parameters

1163 ==========

1164

1165 negatives : numpy.ndarray (1D, float)

1166

1167 Ignored, but needs to be specified -- may be given as ``[]``.

1168

1169 positives : numpy.ndarray (1D, float)

1170

1171 The set of positive scores to compute the FNR threshold.

1172

1173 frr_value : :py:class:`float`, Optional

1174

1175 The FNR value, for which the threshold should be computed.

1176

1177 is_sorted : :py:class:`bool`, Optional

1178

1179 Set this to ``True`` if the ``positives`` are already sorted in

1180 ascending order. If ``False``, scores will be sorted internally, which

1181 will require more memory.

1182

1183

1184 Returns

1185 =======

1186

1187 threshold : float

1188

1189 The threshold such that the real FRR is at most ``frr_value``.

1190

1191 """

1192

1193 # N.B.: Unoptimized version ported from C++

1194

1195 if frr_value < 0.0 or frr_value > 1.0:

1196 raise RuntimeError("`frr_value' value must be in the interval [0.,1.]")

1197

1198 if len(positives) < 2:

1199 raise RuntimeError("the number of positive scores must be at least 2")

1200

1201 epsilon = numpy.finfo(numpy.float64).eps

1202 # if not pre-sorted, copies and sorts

1203 scores = positives if is_sorted else numpy.sort(positives)

1204

1205 # handles special case of far == 1 without any iterating

1206 if frr_value >= (1 - epsilon):

1207 return numpy.nextafter(scores[-1], scores[-1] + 1)

1208

1209 # Move towards the end of array changing the threshold until we cross the

1210 # desired FRR value. Starting with a threshold that corresponds to FRR ==

1211 # 0.

1212 total_count = len(scores)

1213 current_position = 0

1214

1215 # since the comparison is `if score >= threshold then accept as genuine`,

1216 # we can choose the largest score value + eps as the threshold so that we

1217 # can get for 0% FAR.

1218 valid_threshold = numpy.nextafter(

1219 scores[current_position], scores[current_position] + 1

1220 )

1221 current_threshold = 0.0

1222

1223 while current_position < total_count:

1224 current_threshold = scores[current_position]

1225 # keep iterating if values are repeated

1226 while (

1227 current_position < (total_count - 1)

1228 and scores[current_position + 1] == current_threshold

1229 ):

1230 current_position += 1

1231 # All the scores up to the current position and including the current

1232 # position will be accepted falsely.

1233 future_frr = current_position / total_count

1234 if future_frr > frr_value:

1235 break

1236 valid_threshold = current_threshold

1237 current_position += 1

1238

1239 return valid_threshold

1240

1241

1242_jit_frr_threshold = frr_threshold.jit_func

1243

1244

1245def min_hter_threshold(negatives, positives, is_sorted=False):

1246 """Calculates the :py:func:`min_weighted_error_rate_threshold` with ``cost=0.5``

1247

1248 Parameters

1249 ==========

1250

1251 negatives, positives : numpy.ndarray (1D, float)

1252

1253 The set of negative and positive scores to compute the threshold

1254

1255 is_sorted : :py:class:`bool`, Optional

1256

1257 Set this to ``True`` if the ``positives`` are already sorted in

1258 ascending order. If ``False``, scores will be sorted internally, which

1259 will require more memory.

1260

1261

1262 Returns

1263 =======

1264

1265 threshold : float

1266

1267 The threshold for which the weighted error rate is minimal

1268

1269 """

1270 return min_weighted_error_rate_threshold(

1271 negatives, positives, 0.5, is_sorted

1272 )

1273

1274

1275@array_jit

1276def min_weighted_error_rate_threshold(

1277 negatives, positives, cost, is_sorted=False

1278):

1279 """Calculates the threshold that minimizes the error rate

1280

1281 The ``cost`` parameter determines the relative importance between

1282 false-accepts and false-rejections. This number should be between 0 and 1

1283 and will be clipped to those extremes. The value to minimize becomes:

1284 :math:`ER_{cost} = cost * FPR + (1-cost) * FNR`. The higher the cost, the

1285 higher the importance given to **not** making mistakes classifying

1286 negatives/noise/impostors.

1287

1288 .. note::

1289

1290 The scores will be sorted internally, requiring the scores to be copied.

1291 To avoid this copy, you can sort both sets of scores externally in

1292 ascendant order, and set the ``is_sorted`` parameter to ``True``.

1293

1294

1295 Parameters

1296 ==========

1297

1298 negatives, positives : numpy.ndarray (1D, float)

1299

1300 The set of negative and positive scores to compute the threshold

1301

1302 cost : float

1303

1304 The relative cost over FPR with respect to FNR in the threshold

1305 calculation

1306

1307 is_sorted : :py:class:`bool`, Optional

1308

1309 Set this to ``True`` if the ``positives`` are already sorted in

1310 ascending order. If ``False``, scores will be sorted internally, which

1311 will require more memory.

1312

1313

1314 Returns

1315 =======

1316

1317 threshold : float

1318

1319 The threshold for which the weighted error rate is minimal

1320

1321 """

1322

1323 # if not pre-sorted, copies and sorts

1324 neg = negatives if is_sorted else numpy.sort(negatives)

1325 pos = positives if is_sorted else numpy.sort(positives)

1326 if cost > 1.0:

1327 cost = 1.0

1328 elif cost < 0.0:

1329 cost = 0.0

1330

1331 return _minimizing_threshold(neg, pos, "weighted-error", cost)

1332

1333

1334_jit_min_weighted_error_rate_threshold = (

1335 min_weighted_error_rate_threshold.jit_func

1336)

1337

1338

1339# @jit([(numba.float64[:, :],)], nopython=True)

1340def ppndf(p):

1341 """Returns the Deviate Scale equivalent of a false rejection/acceptance ratio

1342

1343 The algorithm that calculates the deviate scale is based on function

1344 ``ppndf()`` from the NIST package DETware version 2.1, freely available on

1345 the internet. Please consult it for more details. By 20.04.2011, you could

1346 find such package `here <http://www.itl.nist.gov/iad/mig/tools/>`_.

1347

1348 The input to this function is a cumulative probability. The output from

1349 this function is the Normal deviate that corresponds to that probability.

1350 For example:

1351

1352 -------+--------

1353 INPUT | OUTPUT

1354 -------+--------

1355 0.001 | -3.090

1356 0.01 | -2.326

1357 0.1 | -1.282

1358 0.5 | 0.0

1359 0.9 | 1.282

1360 0.99 | 2.326

1361 0.999 | 3.090

1362 -------+--------

1363

1364

1365 Parameters

1366 ==========

1367

1368 p : numpy.ndarray (2D, float)

1369

1370 The value (usually FPR or FNR) for which the PPNDF should be calculated

1371

1372

1373 Returns

1374 =======

1375

1376 ppndf : numpy.ndarray (2D, float)

1377

1378 The derivative scale of the given value

1379

1380 """

1381

1382 # threshold

1383 epsilon = numpy.finfo(numpy.float64).eps

1384 p_new = numpy.copy(p)

1385 p_new = numpy.where(p_new >= 1.0, 1.0 - epsilon, p_new)

1386 p_new = numpy.where(p_new <= 0.0, epsilon, p_new)

1387

1388 q = p_new - 0.5

1389 abs_q_smaller = numpy.abs(q) <= 0.42

1390 abs_q_bigger = ~abs_q_smaller

1391

1392 retval = numpy.zeros_like(p_new)

1393

1394 # first part q<=0.42

1395 q1 = q[abs_q_smaller]

1396 r = numpy.square(q1)

1397 opt1 = (

1398 q1

1399 * (

1400 ((-25.4410604963 * r + 41.3911977353) * r + -18.6150006252) * r

1401 + 2.5066282388

1402 )

1403 / (

1404 (

1405 ((3.1308290983 * r + -21.0622410182) * r + 23.0833674374) * r

1406 + -8.4735109309

1407 )

1408 * r

1409 + 1.0

1410 )

1411 )

1412 retval[abs_q_smaller] = opt1

1413

1414 # second part q>0.42

1415 # r = sqrt (log (0.5 - abs(q)));

1416 q2 = q[abs_q_bigger]

1417 r = p_new[abs_q_bigger]

1418 r[q2 > 0] = 1 - r[q2 > 0]

1419 if (r <= 0).any():

1420 raise RuntimeError("measure::ppndf(): r <= 0.0!")

1421

1422 r = numpy.sqrt(-1 * numpy.log(r))

1423 opt2 = (

1424 ((2.3212127685 * r + 4.8501412713) * r + -2.2979647913) * r

1425 + -2.7871893113

1426 ) / ((1.6370678189 * r + 3.5438892476) * r + 1.0)

1427 opt2[q2 < 0] *= -1

1428 retval[abs_q_bigger] = opt2

1429

1430 return retval

1431

1432

1433@array_jit

1434def precision_recall(negatives, positives, threshold):

1435 r"""Calculates the precision and recall (sensitivity) values given negative and positive scores and a threshold

1436

1437 Precision and recall are computed as:

1438

1439 .. math::

1440

1441 \mathrm{precision} = \frac{tp}{tp + fp}

1442

1443 \mathrm{recall} = \frac{tp}{tp + fn}

1444

1445 where :math:`tp` are the true positives, :math:`fp` are the false positives

1446 and :math:`fn` are the false negatives.

1447

1448 ``positives`` holds the score information for samples that are labeled to

1449 belong to a certain class (a.k.a., 'signal' or 'client'). ``negatives``

1450 holds the score information for samples that are labeled **not** to belong

1451 to the class (a.k.a., 'noise' or 'impostor'). For more precise details

1452 about how the method considers error rates, see :py:func:`farfrr`.

1453

1454

1455 Parameters

1456 ==========

1457

1458 negatives, positives : numpy.ndarray (1D, float)

1459

1460 The set of negative and positive scores to compute the measurements

1461

1462 threshold : float

1463

1464 The threshold to compute the measures for

1465

1466

1467 Returns

1468 =======

1469

1470 precision : float

1471

1472 The precision value for the given negatives and positives

1473

1474 recall : float

1475

1476 The recall value for the given negatives and positives

1477

1478 """

1479

1480 if not len(positives) or not len(negatives):

1481 raise RuntimeError(

1482 "Cannot compute precision or recall when no "

1483 "positives or no negatives are given"

1484 )

1485

1486 FP = (negatives >= threshold).sum()

1487 TP = (positives >= threshold).sum()

1488 CP = TP + FP

1489 if CP == 0:

1490 CP = 1

1491 return TP / CP, TP / len(positives)

1492

1493

1494_jit_precision_recall = precision_recall.jit_func

1495

1496

1497@array_jit

1498def precision_recall_curve(negatives, positives, n_points):

1499 """Calculates the precision-recall curve given a set of positive and negative scores and a number of desired points

1500

1501 The points in which the curve is calculated are distributed uniformly in

1502 the range ``[min(negatives, positives), max(negatives, positives)]``

1503

1504

1505 Parameters

1506 ==========

1507

1508 negatives, positives : numpy.ndarray (1D, float)

1509

1510 The set of negative and positive scores to compute the measurements

1511

1512 n_points : int

1513

1514 The number of thresholds for which precision and recall should be

1515 evaluated

1516

1517

1518 Returns

1519 =======

1520

1521 curve : numpy.ndarray (2D, float)

1522

1523 2D array of floats that express the X (precision) and Y (recall)

1524 coordinates.

1525

1526 """

1527 curve = numpy.empty((2, n_points))

1528 for i, k in enumerate(

1529 _meaningful_thresholds(negatives, positives, n_points, -8, False)

1530 ):

1531 x, y = _jit_precision_recall(negatives, positives, k)

1532 curve[0, i] = x

1533 curve[1, i] = y

1534 return curve

1535 # return numpy.array(

1536 # [

1537

1538 # for k in

1539 # ]

1540 # ).T

1541

1542

1543@array_jit

1544def roc(negatives, positives, n_points, min_far=-8):

1545 """Calculates points of an Receiver Operating Characteristic (ROC)

1546

1547 Calculates the ROC curve given a set of negative and positive scores and a

1548 desired number of points.

1549

1550

1551 Parameters

1552 ==========

1553

1554 ``negatives, positives`` : numpy.ndarray (1D, float)

1555

1556 The negative and positive scores, for which the ROC curve should be

1557 calculated.

1558

1559 n_points : int

1560

1561 The number of points, in which the ROC curve are calculated, which are

1562 distributed uniformly in the range ``[min(negatives, positives),

1563 max(negatives, positives)]``

1564

1565 min_far : int

1566

1567 Minimum FAR in terms of :math:`10^(\text{min_far}`. This value is also

1568 used for ``min_frr``. Values should be negative.

1569

1570

1571 Returns

1572 =======

1573

1574 curve : numpy.ndarray (2D, float)

1575

1576 A two-dimensional array of doubles that express the X (FPR) and Y (FNR)

1577 coordinates in this order

1578

1579 """

1580

1581 t = _meaningful_thresholds(negatives, positives, n_points, min_far, False)

1582 curve = numpy.empty((2, len(t)))

1583 for i, k in enumerate(t):

1584 curve[:, i] = _jit_farfrr(negatives, positives, k)

1585 return curve

1586

1587

1588@array_jit

1589def roc_for_far(negatives, positives, far_list, is_sorted=False):

1590 """Calculates the ROC curve for a given set of positive and negative scores and the FPR values, for which the FNR should be computed

1591

1592 .. note::

1593

1594 The scores will be sorted internally, requiring the scores to be copied.

1595 To avoid this copy, you can sort both sets of scores externally in

1596 ascendant order, and set the ``is_sorted`` parameter to ``True``.

1597

1598

1599 Parameters

1600 ==========

1601

1602 negatives, positives : numpy.ndarray (1D, float)

1603

1604 The set of negative and positive scores to compute the curve

1605

1606 far_list : numpy.ndarray (1D, float)

1607

1608 A list of FPR values, for which the FNR values should be computed

1609

1610 is_sorted : :py:class:`bool`, Optional

1611

1612 Set this to ``True`` if both sets of scores are already sorted in

1613 ascending order. If ``False``, scores will be sorted internally, which

1614 will require more memory.

1615

1616

1617 Returns

1618 =======

1619

1620 curve : numpy.ndarray (2D, float)

1621

1622 The ROC curve, which holds a copy of the given FPR values in row 0, and

1623 the corresponding FNR values in row 1.

1624

1625 """

1626 if len(negatives) == 0:

1627 raise RuntimeError("The given set of negatives is empty.")

1628

1629 if len(positives) == 0:

1630 raise RuntimeError("The given set of positives is empty.")

1631

1632 # if not pre-sorted, copies and sorts

1633 neg = negatives if is_sorted else numpy.sort(negatives)

1634 pos = positives if is_sorted else numpy.sort(positives)

1635

1636 # Get the threshold for the requested far values and calculate far and frr

1637 # values based on the threshold.

1638 curve = numpy.empty((2, len(far_list)))

1639 for i, k in enumerate(far_list):

1640 curve[:, i] = _jit_farfrr(

1641 neg, pos, _jit_far_threshold(neg, pos, k, True)

1642 )

1643 return curve

Coverage for src/bob/measure/_library.py: 28%

366 statements