Coverage for src/bob/pad/base/error_utils.py: 95%
65 statements
« prev ^ index » next coverage.py v7.6.0, created at 2024-07-12 23:40 +0200
« prev ^ index » next coverage.py v7.6.0, created at 2024-07-12 23:40 +0200
1#!/usr/bin/env python
2# Ivana Chingovska <ivana.chingovska@idiap.ch>
3# Fri Dec 7 12:33:37 CET 2012
4"""Utility functions for computation of EPSC curve and related measurement"""
6import logging
7import re
9from collections import defaultdict
11import numpy
13from bob.bio.base.score.load import _iterate_csv_score_file
14from bob.measure import (
15 eer_threshold,
16 far_threshold,
17 farfrr,
18 frr_threshold,
19 min_hter_threshold,
20)
22logger = logging.getLogger(__name__)
25def calc_threshold(
26 method, pos, negs, all_negs, far_value=None, is_sorted=False
27):
28 """Calculates the threshold based on the given method.
30 Parameters
31 ----------
32 method : str
33 One of ``bpcer20``, ``eer``, ``min-hter``, ``apcer20``.
34 pos : ``array_like``
35 The positive scores. They should be sorted!
36 negs : list
37 A list of array_like negative scores. Each item in the list corresponds to
38 scores of one PAI.
39 all_negs : ``array_like``
40 An array of all negative scores. This can be calculated from negs as well but we
41 ask for it since you might have it already calculated.
42 far_value : :obj:`float`, optional
43 If method is far, far_value and all_negs are used to calculate the threshold.
44 is_sorted : :obj:`bool`, optional
45 If True, it means all scores are sorted and no sorting will happen.
47 Returns
48 -------
49 float
50 The calculated threshold.
52 Raises
53 ------
54 ValueError
55 If method is unknown.
56 """
57 method = method.lower()
58 if "bpcer" in method:
59 desired_apcer = 1 / float(method.replace("bpcer", ""))
60 threshold = apcer_threshold(
61 desired_apcer, pos, *negs, is_sorted=is_sorted
62 )
63 elif "apcer" in method:
64 desired_bpcer = 1 / float(method.replace("apcer", ""))
65 threshold = frr_threshold(
66 all_negs, pos, desired_bpcer, is_sorted=is_sorted
67 )
68 elif method == "far":
69 threshold = far_threshold(all_negs, pos, far_value, is_sorted=is_sorted)
70 elif method == "eer":
71 threshold = eer_threshold(all_negs, pos, is_sorted=is_sorted)
72 elif method == "min-hter":
73 threshold = min_hter_threshold(all_negs, pos, is_sorted=is_sorted)
74 else:
75 raise ValueError("Unknown threshold criteria: {}".format(method))
77 return threshold
80def apcer_threshold(desired_apcer, pos, *negs, is_sorted=False):
81 """Computes the threshold given the desired APCER as the criteria.
83 APCER is computed as max of all APCER_PAI values.
84 The threshold will be computed such that the real APCER is **at most** the desired
85 value.
87 Parameters
88 ----------
89 desired_apcer : float
90 The desired APCER value.
91 pos : list
92 An array or list of positive scores in float.
93 *negs
94 A list of negative scores. Each item corresponds to the negative scores of one
95 PAI.
96 is_sorted : :obj:`bool`, optional
97 Set to ``True`` if ALL arrays (pos and negs) are sorted.
99 Returns
100 -------
101 float
102 The computed threshold that satisfies the desired APCER.
103 """
104 threshold = max(
105 far_threshold(neg, pos, desired_apcer, is_sorted=is_sorted)
106 for neg in negs
107 )
108 return threshold
111def apcer_bpcer(threshold, pos, *negs):
112 """Computes APCER_PAI, APCER, and BPCER given the positive scores and a list of
113 negative scores and a threshold.
115 Parameters
116 ----------
117 threshold : float
118 The threshold to be used to compute the error rates.
119 pos : list
120 An array or list of positive scores in float.
121 *negs
122 A list of negative scores. Each item corresponds to the negative scores of one
123 PAI.
125 Returns
126 -------
127 tuple
128 A tuple such as (list of APCER_PAI, APCER, BPCER)
129 """
130 apcers = []
131 assert len(negs) > 0, negs
132 for neg in negs:
133 far, frr = farfrr(neg, pos, threshold)
134 apcers.append(far)
135 bpcer = frr # bpcer will be the same in all cases
136 return apcers, max(apcers), bpcer
139def split_csv_pad_per_pai(filename, regexps=[], regexp_column="attack_type"):
140 """Returns scores for Bona-Fide samples and scores for each PAI.
141 By default, the real_id column (second column) is used as indication for
142 each Presentation Attack Instrument (PAI).
144 For example, with default regexps and regexp_column, if you have scores
145 like::
147 claimed_id, test_label, is_bonafide, attack_type, score
148 001, bona_fide_sample_1_path, True, , 0.9
149 001, print_sample_1_path, False, print, 0.6
150 001, print_sample_2_path, False, print, 0.6
151 001, replay_sample_1_path, False, replay, 0.2
152 001, replay_sample_2_path, False, replay, 0.2
153 001, mask_sample_1_path, False, mask, 0.5
154 001, mask_sample_2_path, False, mask, 0.5
156 this function will return 1 set of positive scores, and 3 sets of negative
157 scores (for each print, replay, and mask PAIs).
159 Otherwise, you can provide a list regular expressions that match each PAI.
160 For example, with regexps as ['print', 'replay', 'mask'], if you have scores
161 like::
163 claimed_id, test_label, is_bonafide, attack_type, score
164 001, bona_fide_sample_1_path, True, , 0.9
165 001, print_sample_1_path, False, print/1, 0.6
166 001, print_sample_2_path, False, print/2, 0.6
167 001, replay_sample_1_path, False, replay/1, 0.2
168 001, replay_sample_2_path, False, replay/2, 0.2
169 001, mask_sample_1_path, False, mask/1, 0.5
170 001, mask_sample_2_path, False, mask/2, 0.5
172 the function will return 3 sets of negative scores (for print, replay, and
173 mask PAIs, given in regexp).
176 Parameters
177 ----------
178 filename : str
179 Path to the score file.
180 regexps : :obj:`list`, optional
181 A list of regular expressions that match each PAI. If not given, the
182 values in the column pointed by regexp_column are used to find scores
183 for different PAIs.
184 regexp_column : :obj:`str`, optional
185 If a list of regular expressions are given, those patterns will be
186 matched against the values in this column. default: ``attack_type``
188 Returns
189 -------
190 tuple
191 A tuple, ([positives], {'pai_name': [negatives]}), containing positive
192 scores and a dict of negative scores mapping PAIs names to their
193 respective scores.
195 Raises
196 ------
197 ValueError
198 If none of the given regular expressions match the values in
199 regexp_column.
200 KeyError
201 If regexp_column is not a column of the CSV file.
202 """
203 pos = []
204 negs = defaultdict(list)
205 logger.debug(f"Loading CSV score file: '{filename}'")
206 if regexps:
207 regexps = [re.compile(pattern) for pattern in regexps]
209 for row in _iterate_csv_score_file(filename):
210 # if it is a Bona-Fide score
211 if row["is_bonafide"].lower() == "true":
212 pos.append(row["score"])
213 continue
214 if not regexps:
215 negs[row[regexp_column]].append(row["score"])
216 continue
217 # if regexps is not None or empty and is not a Bona-Fide score
218 for pattern in regexps:
219 if pattern.search(row[regexp_column]):
220 negs[pattern.pattern].append(row["score"])
221 break
222 else: # this else is for the for loop: ``for pattern in regexps:``
223 raise ValueError(
224 f"No regexps: {regexps} match `{row[regexp_column]}' "
225 f"from `{regexp_column}' column."
226 )
227 logger.debug(f"Found {len(negs)} different PAIs names: {list(negs.keys())}")
228 return pos, negs
231def split_csv_pad(filename):
232 """Loads PAD scores from a CSV score file, splits them by attack vs
233 bonafide.
235 The CSV must contain a ``is_bonafide`` column with each field either
236 ``True`` or ``False`` (case insensitive).
238 Parameters
239 ----------
240 filename: str
241 The path to a CSV file containing all the scores.
243 Returns
244 -------
245 tuple
246 Tuple of 1D-arrays: (attack, bonafide). The negative (attacks) and
247 positives (bonafide) scores.
248 """
249 logger.debug(f"Loading CSV score file: '{filename}'")
250 split_scores = defaultdict(list)
251 for row in _iterate_csv_score_file(filename):
252 if row["is_bonafide"].lower() == "true":
253 split_scores["bonafide"].append(row["score"])
254 else:
255 split_scores["attack"].append(row["score"])
256 logger.debug(
257 f"Found {len(split_scores['attack'])} negative (attack), and"
258 f"{len(split_scores['bonafide'])} positive (bonafide) scores."
259 )
260 # Cast the scores to numpy float
261 for key, scores in split_scores.items():
262 split_scores[key] = numpy.array(scores, dtype=numpy.float64)
263 return split_scores["attack"], split_scores["bonafide"]