Coverage for src/bob/measure/utils.py: 79%
68 statements
« prev ^ index » next coverage.py v7.0.5, created at 2023-06-16 14:10 +0200
« prev ^ index » next coverage.py v7.0.5, created at 2023-06-16 14:10 +0200
1""" utility functions for bob.measure """
3import logging
5import numpy
6import scipy.stats
8LOGGER = logging.getLogger(__name__)
11def remove_nan(scores):
12 """remove_nan
14 Remove NaN(s) in the given array
16 Parameters
17 ----------
18 scores :
19 :py:class:`numpy.ndarray` : array
21 Returns
22 -------
23 :py:class:`numpy.ndarray` : array without NaN(s)
24 :py:class:`int` : number of NaN(s) in the input array
25 :py:class:`int` : length of the input array
26 """
27 nans = numpy.isnan(scores)
28 sum_nans = sum(nans)
29 total = len(scores)
30 if sum_nans > 0:
31 LOGGER.warning("Found {} NaNs in {} scores".format(sum_nans, total))
32 return scores[~nans], sum_nans, total
35def get_fta(scores):
36 """get_fta
37 calculates the Failure To Acquire (FtA) rate, i.e. proportion of NaN(s)
38 in the input scores
40 Parameters
41 ----------
42 scores :
43 Tuple of (``positive``, ``negative``) :py:class:`numpy.ndarray`.
45 Returns
46 -------
47 (:py:class:`numpy.ndarray`, :py:class:`numpy.ndarray`): scores without
48 NaN(s)
49 :py:class:`float` : failure to acquire rate
50 """
51 fta_sum, fta_total = 0.0, 0.0
52 neg, sum_nans, total = remove_nan(scores[0])
53 fta_sum += sum_nans
54 fta_total += total
55 pos, sum_nans, total = remove_nan(scores[1])
56 fta_sum += sum_nans
57 fta_total += total
58 return ((neg, pos), fta_sum / fta_total)
61def get_fta_list(scores):
62 """Get FTAs for a list of scores
64 Parameters
65 ----------
66 scores: :any:`list`
67 list of scores
69 Returns
70 -------
71 neg_list: :any:`list`
72 list of negatives
73 pos_list: :any:`list`
74 list of positives
75 fta_list: :any:`list`
76 list of FTAs
77 """
78 neg_list = []
79 pos_list = []
80 fta_list = []
81 for score in scores:
82 neg = pos = fta = None
83 if score is not None:
84 (neg, pos), fta = get_fta(score)
85 if neg is None:
86 raise ValueError("While loading dev-score file")
87 neg_list.append(neg)
88 pos_list.append(pos)
89 fta_list.append(fta)
90 return (neg_list, pos_list, fta_list)
93def get_thres(criter, neg, pos, far=None):
94 """Get threshold for the given positive/negatives scores and criterion
96 Parameters
97 ----------
98 criter :
99 Criterion (`eer` or `hter` or `far`)
100 neg : :py:class:`numpy.ndarray`:
101 array of negative scores
102 pos : :py:class:`numpy.ndarray`::
103 array of positive scores
105 Returns
106 -------
107 :py:obj:`float`
108 threshold
109 """
110 if criter == "eer":
111 from . import eer_threshold
113 return eer_threshold(neg, pos)
114 elif criter == "min-hter":
115 from . import min_hter_threshold
117 return min_hter_threshold(neg, pos)
118 elif criter == "far":
119 if far is None:
120 raise ValueError(
121 "FAR value must be provided through "
122 "``--far-value`` or ``--fpr-value`` option."
123 )
124 from . import far_threshold
126 return far_threshold(neg, pos, far)
127 else:
128 raise ValueError("Incorrect plotting criterion: ``%s``" % criter)
131def get_colors(n):
132 """get_colors
133 Get a list of matplotlib colors
135 Parameters
136 ----------
137 n : :obj:`int`
138 Number of colors to output
140 Returns
141 -------
142 :any:`list`
143 list of colors
144 """
145 if n > 10:
146 from matplotlib import pyplot
148 cmap = pyplot.cm.get_cmap(name="magma")
149 return [cmap(i) for i in numpy.linspace(0, 1.0, n + 1)]
151 return ["C0", "C1", "C2", "C3", "C4", "C5", "C6", "C7", "C8", "C9"]
154def get_linestyles(n, on=True):
155 """Get a list of matplotlib linestyles
157 Parameters
158 ----------
159 n : :obj:`int`
160 Number of linestyles to output
162 Returns
163 -------
164 :any:`list`
165 list of linestyles
166 """
167 if not on:
168 return [None] * n
170 list_linestyles = [
171 (0, ()), # solid
172 (0, (1, 1)), # densely dotted
173 (0, (5, 5)), # dashed
174 (0, (5, 1)), # densely dashed
175 (0, (3, 1, 1, 1, 1, 1)), # densely dashdotdotted
176 (0, (3, 10, 1, 10, 1, 10)), # loosely dashdotdotted
177 (0, (3, 5, 1, 5, 1, 5)), # dashdotdotted
178 (0, (3, 1, 1, 1)), # densely dashdotted
179 (0, (1, 5)), # dotted
180 (0, (3, 5, 1, 5)), # dashdotted
181 (0, (5, 10)), # loosely dashed
182 (0, (3, 10, 1, 10)), # loosely dashdotted
183 (0, (1, 10)), # loosely dotted
184 ]
185 while n > len(list_linestyles):
186 list_linestyles += list_linestyles
187 return list_linestyles
190def confidence_for_indicator_variable(x, n, alpha=0.05):
191 """Calculates the confidence interval for proportion estimates
192 The Clopper-Pearson interval method is used for estimating the confidence
193 intervals.
195 Parameters
196 ----------
197 x : int
198 The number of successes.
199 n : int
200 The number of trials.
201 alpha : :obj:`float`, optional
202 The 1-confidence value that you want. For example, alpha should be 0.05
203 to obtain 95% confidence intervals.
205 Returns
206 -------
207 (:obj:`float`, :obj:`float`)
208 a tuple of (lower_bound, upper_bound) which
209 shows the limit of your success rate: lower_bound < x/n < upper_bound
210 """
211 lower_bound = scipy.stats.beta.ppf(alpha / 2.0, x, n - x + 1)
212 upper_bound = scipy.stats.beta.ppf(1 - alpha / 2.0, x + 1, n - x)
213 if numpy.isnan(lower_bound):
214 lower_bound = 0
215 if numpy.isnan(upper_bound):
216 upper_bound = 1
217 return (lower_bound, upper_bound)