Coverage for src/bob/bio/face/database/lfw.py: 15%
203 statements
« prev ^ index » next coverage.py v7.6.0, created at 2024-07-13 00:04 +0200
« prev ^ index » next coverage.py v7.6.0, created at 2024-07-13 00:04 +0200
1#!/usr/bin/env python
2# vim: set fileencoding=utf-8 :
3# Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
4# Sat 20 Aug 15:43:10 CEST 2016
6import copy
7import logging
8import os
10from functools import partial
12import numpy as np
14from clapper.rc import UserDefaults
16import bob.io.base
18from bob.bio.base.database.utils import download_file, md5_hash
19from bob.bio.base.pipelines.abstract_classes import Database
20from bob.pipelines import DelayedSample, SampleSet
22logger = logging.getLogger(__name__)
23rc = UserDefaults("bobrc.toml")
26class LFWDatabase(Database): # TODO Make this a CSVDatabase?
27 """
28 This package contains the access API and descriptions for the `Labeled Faced in the Wild <http://vis-www.cs.umass.edu/lfw>`_ (LFW) database.
29 It only contains the Bob_ accessor methods to use the DB directly from python, with our certified protocols.
30 The actual raw data for the database should be downloaded from the original URL (though we were not able to contact the corresponding Professor).
33 The LFW database provides two different sets (called "views").
34 The first one, called ``view1`` is used for optimizing meta-parameters of your algorithm.
35 The second one, called ``view2`` is used for benchmarking.
36 This interface supports only the ``view2`` protocol.
37 Please note that in ``view2`` there is only a ``'dev'`` group, but no ``'eval'``.
40 .. warning::
42 To use this dataset protocol, you need to have the original files of the LFW datasets.
43 Once you have it downloaded, please run the following command to set the path for Bob
45 .. code-block:: sh
47 bob config set bob.bio.face.lfw.directory [LFW PATH]
48 bob config set bob.bio.face.lfw.annotation_directory [LFW ANNOTATION_PATH] # for the annotations
52 .. code-block:: python
54 >>> from bob.bio.face.database import LFWDatabase
55 >>> lfw = LFWDatabase(protocol="view2")
56 >>>
57 >>> # Fetching the gallery
58 >>> references = lfw.references()
59 >>> # Fetching the probes
60 >>> probes = lfw.probes()
64 Parameters
65 ----------
67 protocol: str
68 One of the database protocols. Options are `view2`
70 annotation_type: str
71 Type of the annotations used for face crop. Default to `eyes-center`
73 image_relative_path: str
74 LFW provides several types image crops. Some with the full image, some with with specific
75 face crop. Use this variable to set which image crop you want. Default to `all_images`, which means
76 no crop.
78 annotation_directory: str
79 LFW annotations path. Default to what is set in the variable `bob.bio.face.lfw.directory`
81 original_directory: str
82 LFW phisical path. Default to what is set in the variable `bob.bio.face.lfw.directory`
84 annotation_issuer: str
85 Type of the annotations. Default to `funneled`. Possible types `funneled`, `idiap` or `named`
87 """
89 def __init__(
90 self,
91 protocol,
92 annotation_type="eyes-center",
93 image_relative_path="all_images",
94 fixed_positions=None,
95 original_directory=rc.get("bob.bio.face.lfw.directory"),
96 extension=rc.get("bob.bio.face.lfw.extension", ".jpg"),
97 annotation_directory=rc.get("bob.bio.face.lfw.annotation_directory"),
98 annotation_issuer="funneled",
99 ):
100 import warnings
102 warnings.warn(
103 "The lfw database is not yet adapted to this version of bob. Please port it or ask for it to be ported (This one actually needs to be converted to a CSVDatabase).",
104 DeprecationWarning,
105 )
107 if original_directory is None or not os.path.exists(original_directory):
108 logger.warning(
109 f"Invalid or non existent `original_directory`: {original_directory}."
110 "Please, do `bob config set bob.bio.face.lfw.directory PATH` to set the LFW data directory."
111 )
113 if annotation_issuer not in ("funneled", "idiap", "named"):
114 raise ValueError(
115 f"Invalid annotation issuer: {annotation_issuer}. Possible values are `idiap`, `funneled` or `named`"
116 )
118 if annotation_directory is None or not os.path.exists(
119 annotation_directory
120 ):
121 # Downloading annotations if not exists
122 annotation_urls = LFWDatabase.urls()
124 logger.info(
125 f"`annotation_directory`: {annotation_directory} not set. "
126 f"Fetching it from {annotation_urls[0]}"
127 )
129 annotation_directory = download_file(
130 urls=annotation_urls,
131 destination_filename="lfw_annotations.tar.gz",
132 checksum="c0ce6e090e19d0ed159172fcba2e8252",
133 checksum_fct=md5_hash,
134 extract=True,
135 )
137 # Removing extension
138 annotation_directory = annotation_directory / "lfw_annotations"
140 # Attaching the issuer sub-directory
141 annotation_directory = os.path.join(
142 annotation_directory, annotation_issuer
143 )
145 self.annotation_issuer = annotation_issuer
146 # Hard-coding the extension of the annotations
147 # I don't think we need this exposed
148 # Please, open an issue if otherwise
149 self.annotation_extension = (
150 ".jpg.pts" if annotation_issuer == "funneled" else ".pos"
151 )
153 self._check_protocol(protocol)
155 self.references_dict = {}
156 self.probes_dict = {}
157 self.pairs = {}
158 self.probe_reference_keys = {} # Inverted pairs
160 self.annotations = None
161 self.original_directory = original_directory
162 self.annotation_directory = annotation_directory
163 self.extension = extension
164 self.image_relative_path = image_relative_path
166 # Some path manipulation lambdas
167 self.subject_id_from_filename = lambda x: "_".join(x.split("_")[0:-1])
169 self.make_path_from_filename = lambda x: os.path.join(
170 self.subject_id_from_filename(x), x
171 )
173 super().__init__(
174 name="lfw",
175 protocol=protocol,
176 score_all_vs_all=protocol[0] == "o",
177 annotation_type=annotation_type,
178 fixed_positions=fixed_positions,
179 memory_demanding=False,
180 )
182 self.load_pairs()
184 def _extract_funneled(self, annotation_path):
185 """Interprets the annotation string as if it came from the funneled images.
186 Inspired by: https://gitlab.idiap.ch/bob/bob.db.lfw/-/blob/5ac22c5b77aae971de6b73cbe23f26d6a5632072/bob/db/lfw/models.py#L69
187 """
188 with open(annotation_path) as f:
189 splits = np.array(f.readlines()[0].split(" "), "float")
191 assert len(splits) == 18
192 locations = [
193 "reyeo",
194 "reyei",
195 "leyei",
196 "leyeo",
197 "noser",
198 "noset",
199 "nosel",
200 "mouthr",
201 "mouthl",
202 ]
203 annotations = dict(
204 [
205 (locations[i], (float(splits[2 * i + 1]), float(splits[2 * i])))
206 for i in range(9)
207 ]
208 )
209 # add eye center annotations as the center between the eye corners
210 annotations["leye"] = (
211 (annotations["leyei"][0] + annotations["leyeo"][0]) / 2.0,
212 (annotations["leyei"][1] + annotations["leyeo"][1]) / 2.0,
213 )
214 annotations["reye"] = (
215 (annotations["reyei"][0] + annotations["reyeo"][0]) / 2.0,
216 (annotations["reyei"][1] + annotations["reyeo"][1]) / 2.0,
217 )
219 return annotations
221 def _extract_idiap(self, annotation_file):
222 """Interprets the annotation string as if it came from the Idiap annotations.
223 Inspired by: https://gitlab.idiap.ch/bob/bob.db.lfw/-/blob/5ac22c5b77aae971de6b73cbe23f26d6a5632072/bob/db/lfw/models.py#L81
224 """
226 annotations = {}
227 splits = {}
228 with open(annotation_file) as f:
229 for line in f.readlines():
230 line = line.split(" ")
231 if len(line) == 3:
232 # splits.append([float(line[2]), float(line[1])])
233 splits[int(line[0])] = (float(line[1]), float(line[2]))
235 if 3 in splits:
236 annotations["reye"] = splits[3]
238 if 8 in splits:
239 annotations["leye"] = splits[8]
241 return annotations
243 def _extract_named(self, annotation_file):
244 """Reads the annotation files as provided in the biometrics resources.
245 Download them here: https://www.idiap.ch/webarchives/sites/www.idiap.ch/resource/biometric
246 """
248 annotations = {}
249 with open(annotation_file) as f:
250 for line in f.readlines():
251 line = line.split(" ")
252 if len(line) == 3:
253 # splits.append([float(line[2]), float(line[1])])
254 annotations[line[0]] = (float(line[2]), float(line[1]))
256 assert all(a in annotations for a in ("leye", "reye"))
258 return annotations
260 def _extract(self, annotation_file):
261 return {
262 "funneled": self._extract_funneled,
263 "idiap": self._extract_idiap,
264 "named": self._extract_named,
265 }[self.annotation_issuer](annotation_file)
267 def load_pairs(self):
268 if self.protocol == "view2":
269 # view 2
271 pairs_path = os.path.join(
272 self.original_directory, "view2", "pairs.txt"
273 )
274 self.pairs = {}
276 def make_filename(name, index):
277 return f"{name}_{index.zfill(4)}"
279 with open(pairs_path) as f:
280 for i, line in enumerate(f.readlines()):
281 # Skip the first line
282 if i == 0:
283 continue
285 line = line.split("\t")
287 # Three lines, genuine pairs otherwise impostor
288 if len(line) == 3:
289 # self.subject_id_from_filename()
290 key_filename = make_filename(
291 line[0], line[1].rstrip("\n")
292 )
293 value_filename = make_filename(
294 line[0], line[2].rstrip("\n")
295 )
297 else:
298 key_filename = make_filename(
299 line[0], line[1].rstrip("\n")
300 )
301 value_filename = make_filename(
302 line[2], line[3].rstrip("\n")
303 )
305 key = self.make_path_from_filename(key_filename)
306 value = self.make_path_from_filename(value_filename)
308 if key not in self.pairs:
309 self.pairs[key] = []
310 self.pairs[key].append(value)
312 self._create_probe_reference_dict()
314 elif self.protocol[0] == "o":
315 self.pairs = {
316 "enroll": {},
317 "training-unknown": [],
318 "probe": {},
319 "o1": [],
320 "o2": [],
321 }
322 # parse directory for open-set protocols
323 for d in os.listdir(
324 os.path.join(self.original_directory, self.image_relative_path)
325 ):
326 dd = os.path.join(
327 self.original_directory, self.image_relative_path, d
328 )
329 if os.path.isdir(dd):
330 # count the number of images
331 images = sorted(
332 [
333 os.path.splitext(i)[0]
334 for i in os.listdir(dd)
335 if os.path.splitext(i)[1] == self.extension
336 ]
337 )
339 if len(images) > 3:
340 # take the first three images for enrollment
341 self.pairs["enroll"][d] = images[:3]
342 # and the remaining images for known probes
343 self.pairs["probe"][d] = images[3:]
344 elif len(images) > 1:
345 # take the first image as known unknown for training (ignored in our case)
346 self.pairs["training-unknown"].append(images[0])
347 # and the remaining as known unknown probe
348 self.pairs["o1"].extend(images[1:])
349 else:
350 # one image -> use as unknown unknown probe
351 self.pairs["o2"].append(images[0])
353 @staticmethod
354 def protocols():
355 return ["view2", "o1", "o2", "o3"]
357 def background_model_samples(self):
358 """This function returns the training set for the open-set protocols o1, o2 and o3.
359 It returns the :py:meth:`references` and the training samples with known unknowns, which get the subject id "unknown".
361 Returns
362 -------
364 [bob.pipelines.SampleSet]
365 The training samples, where each sampleset contains all images of one subject.
366 Only the samples of the "unknown" subject are collected from several subjects.
368 """
369 if self.protocol[0] != "o":
370 return []
372 # return a list of samplesets for each enrollment image and each known unknown training sample
373 enrollmentset = self.references()
374 data = {}
375 for image in self.pairs["training-unknown"]:
376 # get image path
377 image_path = os.path.join(
378 self.original_directory,
379 self.image_relative_path,
380 self.make_path_from_filename(image) + self.extension,
381 )
382 # load annotations
383 if self.annotation_directory is not None:
384 annotation_path = os.path.join(
385 self.annotation_directory,
386 self.make_path_from_filename(image)
387 + self.annotation_extension,
388 )
389 annotations = self._extract(annotation_path)
390 else:
391 annotations = None
392 data[image] = (image_path, annotations)
394 # generate one sampleset from images of the unknown unknowns
395 sset = SampleSet(
396 key="unknown",
397 template_id="unknown",
398 subject_id="unknown",
399 samples=[
400 DelayedSample(
401 key=image,
402 load=partial(bob.io.base.load, data[image][0]),
403 annotations=data[image][1],
404 )
405 for image in data
406 ],
407 )
408 return enrollmentset + [sset]
410 def _create_probe_reference_dict(self):
411 """
412 Returns a dictionary whose each key (probe key) holds the list of biometric references
413 where that probe should be compared with.
414 """
416 if self.protocol[0] == "o":
417 return
419 self.probe_reference_keys = {}
420 for key in self.pairs:
421 for value in self.pairs[key]:
422 if value not in self.probe_reference_keys:
423 self.probe_reference_keys[value] = []
425 self.probe_reference_keys[value].append(key)
427 def probes(self, group="dev"):
428 if self.protocol not in self.probes_dict:
429 self.probes_dict[self.protocol] = []
431 if self.protocol == "view2":
432 for key in self.probe_reference_keys:
433 image_path = os.path.join(
434 self.original_directory,
435 self.image_relative_path,
436 key + self.extension,
437 )
438 if self.annotation_directory is not None:
439 annotation_path = os.path.join(
440 self.annotation_directory,
441 key + self.annotation_extension,
442 )
443 annotations = self._extract(annotation_path)
444 else:
445 annotations = None
447 sset = SampleSet(
448 key=key,
449 template_id=key,
450 subject_id=self.subject_id_from_filename(key),
451 references=copy.deepcopy(
452 self.probe_reference_keys[key]
453 ), # deep copying to avoid bizarre issues with dask
454 samples=[
455 DelayedSample(
456 key=key,
457 template_id=key,
458 subject_id=self.subject_id_from_filename(key),
459 load=partial(bob.io.base.load, image_path),
460 annotations=annotations,
461 )
462 ],
463 )
464 self.probes_dict[self.protocol].append(sset)
466 elif self.protocol[0] == "o":
467 # add known probes
468 # collect probe samples:
469 probes = [
470 (image, key)
471 for key in self.pairs["probe"]
472 for image in self.pairs["probe"][key]
473 ]
474 if self.protocol in ("o1", "o3"):
475 probes += [(image, "unknown") for image in self.pairs["o1"]]
476 if self.protocol in ("o2", "o3"):
477 probes += [(image, "unknown") for image in self.pairs["o2"]]
479 for image, key in probes:
480 # get image path
481 image_path = os.path.join(
482 self.original_directory,
483 self.image_relative_path,
484 self.make_path_from_filename(image) + self.extension,
485 )
486 # load annotations
487 if self.annotation_directory is not None:
488 annotation_path = os.path.join(
489 self.annotation_directory,
490 self.make_path_from_filename(image)
491 + self.annotation_extension,
492 )
493 annotations = self._extract(annotation_path)
494 else:
495 annotations = None
497 # one probe sample per image
498 sset = SampleSet(
499 key=image,
500 template_id=image,
501 subject_id=key,
502 samples=[
503 DelayedSample(
504 key=image,
505 template_id=image,
506 load=partial(bob.io.base.load, image_path),
507 annotations=annotations,
508 )
509 ],
510 )
511 self.probes_dict[self.protocol].append(sset)
513 return self.probes_dict[self.protocol]
515 def references(self, group="dev"):
516 if self.protocol not in self.references_dict:
517 self.references_dict[self.protocol] = []
519 if self.protocol == "view2":
520 for key in self.pairs:
521 image_path = os.path.join(
522 self.original_directory,
523 self.image_relative_path,
524 key + self.extension,
525 )
526 if self.annotation_directory is not None:
527 annotation_path = os.path.join(
528 self.annotation_directory,
529 key + self.annotation_extension,
530 )
531 annotations = self._extract(annotation_path)
532 else:
533 annotations = None
535 sset = SampleSet(
536 key=key,
537 template_id=key,
538 subject_id=self.subject_id_from_filename(key),
539 samples=[
540 DelayedSample(
541 key=key,
542 template_id=key,
543 load=partial(bob.io.base.load, image_path),
544 subject_id=self.subject_id_from_filename(key),
545 annotations=annotations,
546 )
547 ],
548 )
549 self.references_dict[self.protocol].append(sset)
550 elif self.protocol[0] == "o":
551 for key in self.pairs["enroll"]:
552 data = {}
553 for image in self.pairs["enroll"][key]:
554 # get image path
555 image_path = os.path.join(
556 self.original_directory,
557 self.image_relative_path,
558 self.make_path_from_filename(image)
559 + self.extension,
560 )
561 # load annotations
562 if self.annotation_directory is not None:
563 annotation_path = os.path.join(
564 self.annotation_directory,
565 self.make_path_from_filename(image)
566 + self.annotation_extension,
567 )
568 annotations = self._extract(annotation_path)
569 else:
570 annotations = None
571 data[image] = (image_path, annotations)
573 # generate one sampleset from several (should be 3) images of the same person
574 sset = SampleSet(
575 key=key,
576 template_id=key,
577 subject_id=key,
578 samples=[
579 DelayedSample(
580 key=image,
581 template_id=key,
582 load=partial(bob.io.base.load, data[image][0]),
583 annotations=data[image][1],
584 )
585 for image in data
586 ],
587 )
588 self.references_dict[self.protocol].append(sset)
590 return self.references_dict[self.protocol]
592 def groups(self):
593 return ["dev"]
595 def all_samples(self, group="dev"):
596 self._check_group(group)
598 if self.protocol == "view2":
599 return self.references() + self.probes()
600 elif self.protocol[0] == "o":
601 return self.background_model_samples() + self.probes()
603 def _check_protocol(self, protocol):
604 assert (
605 protocol in self.protocols()
606 ), "Invalid protocol `{}` not in {}".format(protocol, self.protocols())
608 def _check_group(self, group):
609 assert group in self.groups(), "Invalid group `{}` not in {}".format(
610 group, self.groups()
611 )
613 @staticmethod
614 def urls():
615 return [
616 "https://www.idiap.ch/software/bob/data/bob/bob.bio.face/master/annotations/lfw_annotations.tar.gz",
617 "http://www.idiap.ch/software/bob/data/bob/bob.bio.face/master/annotations/lfw_annotations.tar.gz",
618 ]