Coverage for src/bob/measure/utils.py: 79%

68 statements  

« prev     ^ index     » next       coverage.py v7.0.5, created at 2023-06-16 14:10 +0200

1""" utility functions for bob.measure """ 

2 

3import logging 

4 

5import numpy 

6import scipy.stats 

7 

8LOGGER = logging.getLogger(__name__) 

9 

10 

11def remove_nan(scores): 

12 """remove_nan 

13 

14 Remove NaN(s) in the given array 

15 

16 Parameters 

17 ---------- 

18 scores : 

19 :py:class:`numpy.ndarray` : array 

20 

21 Returns 

22 ------- 

23 :py:class:`numpy.ndarray` : array without NaN(s) 

24 :py:class:`int` : number of NaN(s) in the input array 

25 :py:class:`int` : length of the input array 

26 """ 

27 nans = numpy.isnan(scores) 

28 sum_nans = sum(nans) 

29 total = len(scores) 

30 if sum_nans > 0: 

31 LOGGER.warning("Found {} NaNs in {} scores".format(sum_nans, total)) 

32 return scores[~nans], sum_nans, total 

33 

34 

35def get_fta(scores): 

36 """get_fta 

37 calculates the Failure To Acquire (FtA) rate, i.e. proportion of NaN(s) 

38 in the input scores 

39 

40 Parameters 

41 ---------- 

42 scores : 

43 Tuple of (``positive``, ``negative``) :py:class:`numpy.ndarray`. 

44 

45 Returns 

46 ------- 

47 (:py:class:`numpy.ndarray`, :py:class:`numpy.ndarray`): scores without 

48 NaN(s) 

49 :py:class:`float` : failure to acquire rate 

50 """ 

51 fta_sum, fta_total = 0.0, 0.0 

52 neg, sum_nans, total = remove_nan(scores[0]) 

53 fta_sum += sum_nans 

54 fta_total += total 

55 pos, sum_nans, total = remove_nan(scores[1]) 

56 fta_sum += sum_nans 

57 fta_total += total 

58 return ((neg, pos), fta_sum / fta_total) 

59 

60 

61def get_fta_list(scores): 

62 """Get FTAs for a list of scores 

63 

64 Parameters 

65 ---------- 

66 scores: :any:`list` 

67 list of scores 

68 

69 Returns 

70 ------- 

71 neg_list: :any:`list` 

72 list of negatives 

73 pos_list: :any:`list` 

74 list of positives 

75 fta_list: :any:`list` 

76 list of FTAs 

77 """ 

78 neg_list = [] 

79 pos_list = [] 

80 fta_list = [] 

81 for score in scores: 

82 neg = pos = fta = None 

83 if score is not None: 

84 (neg, pos), fta = get_fta(score) 

85 if neg is None: 

86 raise ValueError("While loading dev-score file") 

87 neg_list.append(neg) 

88 pos_list.append(pos) 

89 fta_list.append(fta) 

90 return (neg_list, pos_list, fta_list) 

91 

92 

93def get_thres(criter, neg, pos, far=None): 

94 """Get threshold for the given positive/negatives scores and criterion 

95 

96 Parameters 

97 ---------- 

98 criter : 

99 Criterion (`eer` or `hter` or `far`) 

100 neg : :py:class:`numpy.ndarray`: 

101 array of negative scores 

102 pos : :py:class:`numpy.ndarray`:: 

103 array of positive scores 

104 

105 Returns 

106 ------- 

107 :py:obj:`float` 

108 threshold 

109 """ 

110 if criter == "eer": 

111 from . import eer_threshold 

112 

113 return eer_threshold(neg, pos) 

114 elif criter == "min-hter": 

115 from . import min_hter_threshold 

116 

117 return min_hter_threshold(neg, pos) 

118 elif criter == "far": 

119 if far is None: 

120 raise ValueError( 

121 "FAR value must be provided through " 

122 "``--far-value`` or ``--fpr-value`` option." 

123 ) 

124 from . import far_threshold 

125 

126 return far_threshold(neg, pos, far) 

127 else: 

128 raise ValueError("Incorrect plotting criterion: ``%s``" % criter) 

129 

130 

131def get_colors(n): 

132 """get_colors 

133 Get a list of matplotlib colors 

134 

135 Parameters 

136 ---------- 

137 n : :obj:`int` 

138 Number of colors to output 

139 

140 Returns 

141 ------- 

142 :any:`list` 

143 list of colors 

144 """ 

145 if n > 10: 

146 from matplotlib import pyplot 

147 

148 cmap = pyplot.cm.get_cmap(name="magma") 

149 return [cmap(i) for i in numpy.linspace(0, 1.0, n + 1)] 

150 

151 return ["C0", "C1", "C2", "C3", "C4", "C5", "C6", "C7", "C8", "C9"] 

152 

153 

154def get_linestyles(n, on=True): 

155 """Get a list of matplotlib linestyles 

156 

157 Parameters 

158 ---------- 

159 n : :obj:`int` 

160 Number of linestyles to output 

161 

162 Returns 

163 ------- 

164 :any:`list` 

165 list of linestyles 

166 """ 

167 if not on: 

168 return [None] * n 

169 

170 list_linestyles = [ 

171 (0, ()), # solid 

172 (0, (1, 1)), # densely dotted 

173 (0, (5, 5)), # dashed 

174 (0, (5, 1)), # densely dashed 

175 (0, (3, 1, 1, 1, 1, 1)), # densely dashdotdotted 

176 (0, (3, 10, 1, 10, 1, 10)), # loosely dashdotdotted 

177 (0, (3, 5, 1, 5, 1, 5)), # dashdotdotted 

178 (0, (3, 1, 1, 1)), # densely dashdotted 

179 (0, (1, 5)), # dotted 

180 (0, (3, 5, 1, 5)), # dashdotted 

181 (0, (5, 10)), # loosely dashed 

182 (0, (3, 10, 1, 10)), # loosely dashdotted 

183 (0, (1, 10)), # loosely dotted 

184 ] 

185 while n > len(list_linestyles): 

186 list_linestyles += list_linestyles 

187 return list_linestyles 

188 

189 

190def confidence_for_indicator_variable(x, n, alpha=0.05): 

191 """Calculates the confidence interval for proportion estimates 

192 The Clopper-Pearson interval method is used for estimating the confidence 

193 intervals. 

194 

195 Parameters 

196 ---------- 

197 x : int 

198 The number of successes. 

199 n : int 

200 The number of trials. 

201 alpha : :obj:`float`, optional 

202 The 1-confidence value that you want. For example, alpha should be 0.05 

203 to obtain 95% confidence intervals. 

204 

205 Returns 

206 ------- 

207 (:obj:`float`, :obj:`float`) 

208 a tuple of (lower_bound, upper_bound) which 

209 shows the limit of your success rate: lower_bound < x/n < upper_bound 

210 """ 

211 lower_bound = scipy.stats.beta.ppf(alpha / 2.0, x, n - x + 1) 

212 upper_bound = scipy.stats.beta.ppf(1 - alpha / 2.0, x + 1, n - x) 

213 if numpy.isnan(lower_bound): 

214 lower_bound = 0 

215 if numpy.isnan(upper_bound): 

216 upper_bound = 1 

217 return (lower_bound, upper_bound)