Coverage for src/bob/pad/base/error_utils.py: 95%

65 statements  

« prev     ^ index     » next       coverage.py v7.6.0, created at 2024-07-12 23:40 +0200

1#!/usr/bin/env python 

2# Ivana Chingovska <ivana.chingovska@idiap.ch> 

3# Fri Dec 7 12:33:37 CET 2012 

4"""Utility functions for computation of EPSC curve and related measurement""" 

5 

6import logging 

7import re 

8 

9from collections import defaultdict 

10 

11import numpy 

12 

13from bob.bio.base.score.load import _iterate_csv_score_file 

14from bob.measure import ( 

15 eer_threshold, 

16 far_threshold, 

17 farfrr, 

18 frr_threshold, 

19 min_hter_threshold, 

20) 

21 

22logger = logging.getLogger(__name__) 

23 

24 

25def calc_threshold( 

26 method, pos, negs, all_negs, far_value=None, is_sorted=False 

27): 

28 """Calculates the threshold based on the given method. 

29 

30 Parameters 

31 ---------- 

32 method : str 

33 One of ``bpcer20``, ``eer``, ``min-hter``, ``apcer20``. 

34 pos : ``array_like`` 

35 The positive scores. They should be sorted! 

36 negs : list 

37 A list of array_like negative scores. Each item in the list corresponds to 

38 scores of one PAI. 

39 all_negs : ``array_like`` 

40 An array of all negative scores. This can be calculated from negs as well but we 

41 ask for it since you might have it already calculated. 

42 far_value : :obj:`float`, optional 

43 If method is far, far_value and all_negs are used to calculate the threshold. 

44 is_sorted : :obj:`bool`, optional 

45 If True, it means all scores are sorted and no sorting will happen. 

46 

47 Returns 

48 ------- 

49 float 

50 The calculated threshold. 

51 

52 Raises 

53 ------ 

54 ValueError 

55 If method is unknown. 

56 """ 

57 method = method.lower() 

58 if "bpcer" in method: 

59 desired_apcer = 1 / float(method.replace("bpcer", "")) 

60 threshold = apcer_threshold( 

61 desired_apcer, pos, *negs, is_sorted=is_sorted 

62 ) 

63 elif "apcer" in method: 

64 desired_bpcer = 1 / float(method.replace("apcer", "")) 

65 threshold = frr_threshold( 

66 all_negs, pos, desired_bpcer, is_sorted=is_sorted 

67 ) 

68 elif method == "far": 

69 threshold = far_threshold(all_negs, pos, far_value, is_sorted=is_sorted) 

70 elif method == "eer": 

71 threshold = eer_threshold(all_negs, pos, is_sorted=is_sorted) 

72 elif method == "min-hter": 

73 threshold = min_hter_threshold(all_negs, pos, is_sorted=is_sorted) 

74 else: 

75 raise ValueError("Unknown threshold criteria: {}".format(method)) 

76 

77 return threshold 

78 

79 

80def apcer_threshold(desired_apcer, pos, *negs, is_sorted=False): 

81 """Computes the threshold given the desired APCER as the criteria. 

82 

83 APCER is computed as max of all APCER_PAI values. 

84 The threshold will be computed such that the real APCER is **at most** the desired 

85 value. 

86 

87 Parameters 

88 ---------- 

89 desired_apcer : float 

90 The desired APCER value. 

91 pos : list 

92 An array or list of positive scores in float. 

93 *negs 

94 A list of negative scores. Each item corresponds to the negative scores of one 

95 PAI. 

96 is_sorted : :obj:`bool`, optional 

97 Set to ``True`` if ALL arrays (pos and negs) are sorted. 

98 

99 Returns 

100 ------- 

101 float 

102 The computed threshold that satisfies the desired APCER. 

103 """ 

104 threshold = max( 

105 far_threshold(neg, pos, desired_apcer, is_sorted=is_sorted) 

106 for neg in negs 

107 ) 

108 return threshold 

109 

110 

111def apcer_bpcer(threshold, pos, *negs): 

112 """Computes APCER_PAI, APCER, and BPCER given the positive scores and a list of 

113 negative scores and a threshold. 

114 

115 Parameters 

116 ---------- 

117 threshold : float 

118 The threshold to be used to compute the error rates. 

119 pos : list 

120 An array or list of positive scores in float. 

121 *negs 

122 A list of negative scores. Each item corresponds to the negative scores of one 

123 PAI. 

124 

125 Returns 

126 ------- 

127 tuple 

128 A tuple such as (list of APCER_PAI, APCER, BPCER) 

129 """ 

130 apcers = [] 

131 assert len(negs) > 0, negs 

132 for neg in negs: 

133 far, frr = farfrr(neg, pos, threshold) 

134 apcers.append(far) 

135 bpcer = frr # bpcer will be the same in all cases 

136 return apcers, max(apcers), bpcer 

137 

138 

139def split_csv_pad_per_pai(filename, regexps=[], regexp_column="attack_type"): 

140 """Returns scores for Bona-Fide samples and scores for each PAI. 

141 By default, the real_id column (second column) is used as indication for 

142 each Presentation Attack Instrument (PAI). 

143 

144 For example, with default regexps and regexp_column, if you have scores 

145 like:: 

146 

147 claimed_id, test_label, is_bonafide, attack_type, score 

148 001, bona_fide_sample_1_path, True, , 0.9 

149 001, print_sample_1_path, False, print, 0.6 

150 001, print_sample_2_path, False, print, 0.6 

151 001, replay_sample_1_path, False, replay, 0.2 

152 001, replay_sample_2_path, False, replay, 0.2 

153 001, mask_sample_1_path, False, mask, 0.5 

154 001, mask_sample_2_path, False, mask, 0.5 

155 

156 this function will return 1 set of positive scores, and 3 sets of negative 

157 scores (for each print, replay, and mask PAIs). 

158 

159 Otherwise, you can provide a list regular expressions that match each PAI. 

160 For example, with regexps as ['print', 'replay', 'mask'], if you have scores 

161 like:: 

162 

163 claimed_id, test_label, is_bonafide, attack_type, score 

164 001, bona_fide_sample_1_path, True, , 0.9 

165 001, print_sample_1_path, False, print/1, 0.6 

166 001, print_sample_2_path, False, print/2, 0.6 

167 001, replay_sample_1_path, False, replay/1, 0.2 

168 001, replay_sample_2_path, False, replay/2, 0.2 

169 001, mask_sample_1_path, False, mask/1, 0.5 

170 001, mask_sample_2_path, False, mask/2, 0.5 

171 

172 the function will return 3 sets of negative scores (for print, replay, and 

173 mask PAIs, given in regexp). 

174 

175 

176 Parameters 

177 ---------- 

178 filename : str 

179 Path to the score file. 

180 regexps : :obj:`list`, optional 

181 A list of regular expressions that match each PAI. If not given, the 

182 values in the column pointed by regexp_column are used to find scores 

183 for different PAIs. 

184 regexp_column : :obj:`str`, optional 

185 If a list of regular expressions are given, those patterns will be 

186 matched against the values in this column. default: ``attack_type`` 

187 

188 Returns 

189 ------- 

190 tuple 

191 A tuple, ([positives], {'pai_name': [negatives]}), containing positive 

192 scores and a dict of negative scores mapping PAIs names to their 

193 respective scores. 

194 

195 Raises 

196 ------ 

197 ValueError 

198 If none of the given regular expressions match the values in 

199 regexp_column. 

200 KeyError 

201 If regexp_column is not a column of the CSV file. 

202 """ 

203 pos = [] 

204 negs = defaultdict(list) 

205 logger.debug(f"Loading CSV score file: '{filename}'") 

206 if regexps: 

207 regexps = [re.compile(pattern) for pattern in regexps] 

208 

209 for row in _iterate_csv_score_file(filename): 

210 # if it is a Bona-Fide score 

211 if row["is_bonafide"].lower() == "true": 

212 pos.append(row["score"]) 

213 continue 

214 if not regexps: 

215 negs[row[regexp_column]].append(row["score"]) 

216 continue 

217 # if regexps is not None or empty and is not a Bona-Fide score 

218 for pattern in regexps: 

219 if pattern.search(row[regexp_column]): 

220 negs[pattern.pattern].append(row["score"]) 

221 break 

222 else: # this else is for the for loop: ``for pattern in regexps:`` 

223 raise ValueError( 

224 f"No regexps: {regexps} match `{row[regexp_column]}' " 

225 f"from `{regexp_column}' column." 

226 ) 

227 logger.debug(f"Found {len(negs)} different PAIs names: {list(negs.keys())}") 

228 return pos, negs 

229 

230 

231def split_csv_pad(filename): 

232 """Loads PAD scores from a CSV score file, splits them by attack vs 

233 bonafide. 

234 

235 The CSV must contain a ``is_bonafide`` column with each field either 

236 ``True`` or ``False`` (case insensitive). 

237 

238 Parameters 

239 ---------- 

240 filename: str 

241 The path to a CSV file containing all the scores. 

242 

243 Returns 

244 ------- 

245 tuple 

246 Tuple of 1D-arrays: (attack, bonafide). The negative (attacks) and 

247 positives (bonafide) scores. 

248 """ 

249 logger.debug(f"Loading CSV score file: '{filename}'") 

250 split_scores = defaultdict(list) 

251 for row in _iterate_csv_score_file(filename): 

252 if row["is_bonafide"].lower() == "true": 

253 split_scores["bonafide"].append(row["score"]) 

254 else: 

255 split_scores["attack"].append(row["score"]) 

256 logger.debug( 

257 f"Found {len(split_scores['attack'])} negative (attack), and" 

258 f"{len(split_scores['bonafide'])} positive (bonafide) scores." 

259 ) 

260 # Cast the scores to numpy float 

261 for key, scores in split_scores.items(): 

262 split_scores[key] = numpy.array(scores, dtype=numpy.float64) 

263 return split_scores["attack"], split_scores["bonafide"]