Coverage for src/bob/bio/face/database/lfw.py: 15%

203 statements  

« prev     ^ index     » next       coverage.py v7.6.0, created at 2024-07-13 00:04 +0200

1#!/usr/bin/env python 

2# vim: set fileencoding=utf-8 : 

3# Tiago de Freitas Pereira <tiago.pereira@idiap.ch> 

4# Sat 20 Aug 15:43:10 CEST 2016 

5 

6import copy 

7import logging 

8import os 

9 

10from functools import partial 

11 

12import numpy as np 

13 

14from clapper.rc import UserDefaults 

15 

16import bob.io.base 

17 

18from bob.bio.base.database.utils import download_file, md5_hash 

19from bob.bio.base.pipelines.abstract_classes import Database 

20from bob.pipelines import DelayedSample, SampleSet 

21 

22logger = logging.getLogger(__name__) 

23rc = UserDefaults("bobrc.toml") 

24 

25 

26class LFWDatabase(Database): # TODO Make this a CSVDatabase? 

27 """ 

28 This package contains the access API and descriptions for the `Labeled Faced in the Wild <http://vis-www.cs.umass.edu/lfw>`_ (LFW) database. 

29 It only contains the Bob_ accessor methods to use the DB directly from python, with our certified protocols. 

30 The actual raw data for the database should be downloaded from the original URL (though we were not able to contact the corresponding Professor). 

31 

32 

33 The LFW database provides two different sets (called "views"). 

34 The first one, called ``view1`` is used for optimizing meta-parameters of your algorithm. 

35 The second one, called ``view2`` is used for benchmarking. 

36 This interface supports only the ``view2`` protocol. 

37 Please note that in ``view2`` there is only a ``'dev'`` group, but no ``'eval'``. 

38 

39 

40 .. warning:: 

41 

42 To use this dataset protocol, you need to have the original files of the LFW datasets. 

43 Once you have it downloaded, please run the following command to set the path for Bob 

44 

45 .. code-block:: sh 

46 

47 bob config set bob.bio.face.lfw.directory [LFW PATH] 

48 bob config set bob.bio.face.lfw.annotation_directory [LFW ANNOTATION_PATH] # for the annotations 

49 

50 

51 

52 .. code-block:: python 

53 

54 >>> from bob.bio.face.database import LFWDatabase 

55 >>> lfw = LFWDatabase(protocol="view2") 

56 >>> 

57 >>> # Fetching the gallery 

58 >>> references = lfw.references() 

59 >>> # Fetching the probes 

60 >>> probes = lfw.probes() 

61 

62 

63 

64 Parameters 

65 ---------- 

66 

67 protocol: str 

68 One of the database protocols. Options are `view2` 

69 

70 annotation_type: str 

71 Type of the annotations used for face crop. Default to `eyes-center` 

72 

73 image_relative_path: str 

74 LFW provides several types image crops. Some with the full image, some with with specific 

75 face crop. Use this variable to set which image crop you want. Default to `all_images`, which means 

76 no crop. 

77 

78 annotation_directory: str 

79 LFW annotations path. Default to what is set in the variable `bob.bio.face.lfw.directory` 

80 

81 original_directory: str 

82 LFW phisical path. Default to what is set in the variable `bob.bio.face.lfw.directory` 

83 

84 annotation_issuer: str 

85 Type of the annotations. Default to `funneled`. Possible types `funneled`, `idiap` or `named` 

86 

87 """ 

88 

89 def __init__( 

90 self, 

91 protocol, 

92 annotation_type="eyes-center", 

93 image_relative_path="all_images", 

94 fixed_positions=None, 

95 original_directory=rc.get("bob.bio.face.lfw.directory"), 

96 extension=rc.get("bob.bio.face.lfw.extension", ".jpg"), 

97 annotation_directory=rc.get("bob.bio.face.lfw.annotation_directory"), 

98 annotation_issuer="funneled", 

99 ): 

100 import warnings 

101 

102 warnings.warn( 

103 "The lfw database is not yet adapted to this version of bob. Please port it or ask for it to be ported (This one actually needs to be converted to a CSVDatabase).", 

104 DeprecationWarning, 

105 ) 

106 

107 if original_directory is None or not os.path.exists(original_directory): 

108 logger.warning( 

109 f"Invalid or non existent `original_directory`: {original_directory}." 

110 "Please, do `bob config set bob.bio.face.lfw.directory PATH` to set the LFW data directory." 

111 ) 

112 

113 if annotation_issuer not in ("funneled", "idiap", "named"): 

114 raise ValueError( 

115 f"Invalid annotation issuer: {annotation_issuer}. Possible values are `idiap`, `funneled` or `named`" 

116 ) 

117 

118 if annotation_directory is None or not os.path.exists( 

119 annotation_directory 

120 ): 

121 # Downloading annotations if not exists 

122 annotation_urls = LFWDatabase.urls() 

123 

124 logger.info( 

125 f"`annotation_directory`: {annotation_directory} not set. " 

126 f"Fetching it from {annotation_urls[0]}" 

127 ) 

128 

129 annotation_directory = download_file( 

130 urls=annotation_urls, 

131 destination_filename="lfw_annotations.tar.gz", 

132 checksum="c0ce6e090e19d0ed159172fcba2e8252", 

133 checksum_fct=md5_hash, 

134 extract=True, 

135 ) 

136 

137 # Removing extension 

138 annotation_directory = annotation_directory / "lfw_annotations" 

139 

140 # Attaching the issuer sub-directory 

141 annotation_directory = os.path.join( 

142 annotation_directory, annotation_issuer 

143 ) 

144 

145 self.annotation_issuer = annotation_issuer 

146 # Hard-coding the extension of the annotations 

147 # I don't think we need this exposed 

148 # Please, open an issue if otherwise 

149 self.annotation_extension = ( 

150 ".jpg.pts" if annotation_issuer == "funneled" else ".pos" 

151 ) 

152 

153 self._check_protocol(protocol) 

154 

155 self.references_dict = {} 

156 self.probes_dict = {} 

157 self.pairs = {} 

158 self.probe_reference_keys = {} # Inverted pairs 

159 

160 self.annotations = None 

161 self.original_directory = original_directory 

162 self.annotation_directory = annotation_directory 

163 self.extension = extension 

164 self.image_relative_path = image_relative_path 

165 

166 # Some path manipulation lambdas 

167 self.subject_id_from_filename = lambda x: "_".join(x.split("_")[0:-1]) 

168 

169 self.make_path_from_filename = lambda x: os.path.join( 

170 self.subject_id_from_filename(x), x 

171 ) 

172 

173 super().__init__( 

174 name="lfw", 

175 protocol=protocol, 

176 score_all_vs_all=protocol[0] == "o", 

177 annotation_type=annotation_type, 

178 fixed_positions=fixed_positions, 

179 memory_demanding=False, 

180 ) 

181 

182 self.load_pairs() 

183 

184 def _extract_funneled(self, annotation_path): 

185 """Interprets the annotation string as if it came from the funneled images. 

186 Inspired by: https://gitlab.idiap.ch/bob/bob.db.lfw/-/blob/5ac22c5b77aae971de6b73cbe23f26d6a5632072/bob/db/lfw/models.py#L69 

187 """ 

188 with open(annotation_path) as f: 

189 splits = np.array(f.readlines()[0].split(" "), "float") 

190 

191 assert len(splits) == 18 

192 locations = [ 

193 "reyeo", 

194 "reyei", 

195 "leyei", 

196 "leyeo", 

197 "noser", 

198 "noset", 

199 "nosel", 

200 "mouthr", 

201 "mouthl", 

202 ] 

203 annotations = dict( 

204 [ 

205 (locations[i], (float(splits[2 * i + 1]), float(splits[2 * i]))) 

206 for i in range(9) 

207 ] 

208 ) 

209 # add eye center annotations as the center between the eye corners 

210 annotations["leye"] = ( 

211 (annotations["leyei"][0] + annotations["leyeo"][0]) / 2.0, 

212 (annotations["leyei"][1] + annotations["leyeo"][1]) / 2.0, 

213 ) 

214 annotations["reye"] = ( 

215 (annotations["reyei"][0] + annotations["reyeo"][0]) / 2.0, 

216 (annotations["reyei"][1] + annotations["reyeo"][1]) / 2.0, 

217 ) 

218 

219 return annotations 

220 

221 def _extract_idiap(self, annotation_file): 

222 """Interprets the annotation string as if it came from the Idiap annotations. 

223 Inspired by: https://gitlab.idiap.ch/bob/bob.db.lfw/-/blob/5ac22c5b77aae971de6b73cbe23f26d6a5632072/bob/db/lfw/models.py#L81 

224 """ 

225 

226 annotations = {} 

227 splits = {} 

228 with open(annotation_file) as f: 

229 for line in f.readlines(): 

230 line = line.split(" ") 

231 if len(line) == 3: 

232 # splits.append([float(line[2]), float(line[1])]) 

233 splits[int(line[0])] = (float(line[1]), float(line[2])) 

234 

235 if 3 in splits: 

236 annotations["reye"] = splits[3] 

237 

238 if 8 in splits: 

239 annotations["leye"] = splits[8] 

240 

241 return annotations 

242 

243 def _extract_named(self, annotation_file): 

244 """Reads the annotation files as provided in the biometrics resources. 

245 Download them here: https://www.idiap.ch/webarchives/sites/www.idiap.ch/resource/biometric 

246 """ 

247 

248 annotations = {} 

249 with open(annotation_file) as f: 

250 for line in f.readlines(): 

251 line = line.split(" ") 

252 if len(line) == 3: 

253 # splits.append([float(line[2]), float(line[1])]) 

254 annotations[line[0]] = (float(line[2]), float(line[1])) 

255 

256 assert all(a in annotations for a in ("leye", "reye")) 

257 

258 return annotations 

259 

260 def _extract(self, annotation_file): 

261 return { 

262 "funneled": self._extract_funneled, 

263 "idiap": self._extract_idiap, 

264 "named": self._extract_named, 

265 }[self.annotation_issuer](annotation_file) 

266 

267 def load_pairs(self): 

268 if self.protocol == "view2": 

269 # view 2 

270 

271 pairs_path = os.path.join( 

272 self.original_directory, "view2", "pairs.txt" 

273 ) 

274 self.pairs = {} 

275 

276 def make_filename(name, index): 

277 return f"{name}_{index.zfill(4)}" 

278 

279 with open(pairs_path) as f: 

280 for i, line in enumerate(f.readlines()): 

281 # Skip the first line 

282 if i == 0: 

283 continue 

284 

285 line = line.split("\t") 

286 

287 # Three lines, genuine pairs otherwise impostor 

288 if len(line) == 3: 

289 # self.subject_id_from_filename() 

290 key_filename = make_filename( 

291 line[0], line[1].rstrip("\n") 

292 ) 

293 value_filename = make_filename( 

294 line[0], line[2].rstrip("\n") 

295 ) 

296 

297 else: 

298 key_filename = make_filename( 

299 line[0], line[1].rstrip("\n") 

300 ) 

301 value_filename = make_filename( 

302 line[2], line[3].rstrip("\n") 

303 ) 

304 

305 key = self.make_path_from_filename(key_filename) 

306 value = self.make_path_from_filename(value_filename) 

307 

308 if key not in self.pairs: 

309 self.pairs[key] = [] 

310 self.pairs[key].append(value) 

311 

312 self._create_probe_reference_dict() 

313 

314 elif self.protocol[0] == "o": 

315 self.pairs = { 

316 "enroll": {}, 

317 "training-unknown": [], 

318 "probe": {}, 

319 "o1": [], 

320 "o2": [], 

321 } 

322 # parse directory for open-set protocols 

323 for d in os.listdir( 

324 os.path.join(self.original_directory, self.image_relative_path) 

325 ): 

326 dd = os.path.join( 

327 self.original_directory, self.image_relative_path, d 

328 ) 

329 if os.path.isdir(dd): 

330 # count the number of images 

331 images = sorted( 

332 [ 

333 os.path.splitext(i)[0] 

334 for i in os.listdir(dd) 

335 if os.path.splitext(i)[1] == self.extension 

336 ] 

337 ) 

338 

339 if len(images) > 3: 

340 # take the first three images for enrollment 

341 self.pairs["enroll"][d] = images[:3] 

342 # and the remaining images for known probes 

343 self.pairs["probe"][d] = images[3:] 

344 elif len(images) > 1: 

345 # take the first image as known unknown for training (ignored in our case) 

346 self.pairs["training-unknown"].append(images[0]) 

347 # and the remaining as known unknown probe 

348 self.pairs["o1"].extend(images[1:]) 

349 else: 

350 # one image -> use as unknown unknown probe 

351 self.pairs["o2"].append(images[0]) 

352 

353 @staticmethod 

354 def protocols(): 

355 return ["view2", "o1", "o2", "o3"] 

356 

357 def background_model_samples(self): 

358 """This function returns the training set for the open-set protocols o1, o2 and o3. 

359 It returns the :py:meth:`references` and the training samples with known unknowns, which get the subject id "unknown". 

360 

361 Returns 

362 ------- 

363 

364 [bob.pipelines.SampleSet] 

365 The training samples, where each sampleset contains all images of one subject. 

366 Only the samples of the "unknown" subject are collected from several subjects. 

367 

368 """ 

369 if self.protocol[0] != "o": 

370 return [] 

371 

372 # return a list of samplesets for each enrollment image and each known unknown training sample 

373 enrollmentset = self.references() 

374 data = {} 

375 for image in self.pairs["training-unknown"]: 

376 # get image path 

377 image_path = os.path.join( 

378 self.original_directory, 

379 self.image_relative_path, 

380 self.make_path_from_filename(image) + self.extension, 

381 ) 

382 # load annotations 

383 if self.annotation_directory is not None: 

384 annotation_path = os.path.join( 

385 self.annotation_directory, 

386 self.make_path_from_filename(image) 

387 + self.annotation_extension, 

388 ) 

389 annotations = self._extract(annotation_path) 

390 else: 

391 annotations = None 

392 data[image] = (image_path, annotations) 

393 

394 # generate one sampleset from images of the unknown unknowns 

395 sset = SampleSet( 

396 key="unknown", 

397 template_id="unknown", 

398 subject_id="unknown", 

399 samples=[ 

400 DelayedSample( 

401 key=image, 

402 load=partial(bob.io.base.load, data[image][0]), 

403 annotations=data[image][1], 

404 ) 

405 for image in data 

406 ], 

407 ) 

408 return enrollmentset + [sset] 

409 

410 def _create_probe_reference_dict(self): 

411 """ 

412 Returns a dictionary whose each key (probe key) holds the list of biometric references 

413 where that probe should be compared with. 

414 """ 

415 

416 if self.protocol[0] == "o": 

417 return 

418 

419 self.probe_reference_keys = {} 

420 for key in self.pairs: 

421 for value in self.pairs[key]: 

422 if value not in self.probe_reference_keys: 

423 self.probe_reference_keys[value] = [] 

424 

425 self.probe_reference_keys[value].append(key) 

426 

427 def probes(self, group="dev"): 

428 if self.protocol not in self.probes_dict: 

429 self.probes_dict[self.protocol] = [] 

430 

431 if self.protocol == "view2": 

432 for key in self.probe_reference_keys: 

433 image_path = os.path.join( 

434 self.original_directory, 

435 self.image_relative_path, 

436 key + self.extension, 

437 ) 

438 if self.annotation_directory is not None: 

439 annotation_path = os.path.join( 

440 self.annotation_directory, 

441 key + self.annotation_extension, 

442 ) 

443 annotations = self._extract(annotation_path) 

444 else: 

445 annotations = None 

446 

447 sset = SampleSet( 

448 key=key, 

449 template_id=key, 

450 subject_id=self.subject_id_from_filename(key), 

451 references=copy.deepcopy( 

452 self.probe_reference_keys[key] 

453 ), # deep copying to avoid bizarre issues with dask 

454 samples=[ 

455 DelayedSample( 

456 key=key, 

457 template_id=key, 

458 subject_id=self.subject_id_from_filename(key), 

459 load=partial(bob.io.base.load, image_path), 

460 annotations=annotations, 

461 ) 

462 ], 

463 ) 

464 self.probes_dict[self.protocol].append(sset) 

465 

466 elif self.protocol[0] == "o": 

467 # add known probes 

468 # collect probe samples: 

469 probes = [ 

470 (image, key) 

471 for key in self.pairs["probe"] 

472 for image in self.pairs["probe"][key] 

473 ] 

474 if self.protocol in ("o1", "o3"): 

475 probes += [(image, "unknown") for image in self.pairs["o1"]] 

476 if self.protocol in ("o2", "o3"): 

477 probes += [(image, "unknown") for image in self.pairs["o2"]] 

478 

479 for image, key in probes: 

480 # get image path 

481 image_path = os.path.join( 

482 self.original_directory, 

483 self.image_relative_path, 

484 self.make_path_from_filename(image) + self.extension, 

485 ) 

486 # load annotations 

487 if self.annotation_directory is not None: 

488 annotation_path = os.path.join( 

489 self.annotation_directory, 

490 self.make_path_from_filename(image) 

491 + self.annotation_extension, 

492 ) 

493 annotations = self._extract(annotation_path) 

494 else: 

495 annotations = None 

496 

497 # one probe sample per image 

498 sset = SampleSet( 

499 key=image, 

500 template_id=image, 

501 subject_id=key, 

502 samples=[ 

503 DelayedSample( 

504 key=image, 

505 template_id=image, 

506 load=partial(bob.io.base.load, image_path), 

507 annotations=annotations, 

508 ) 

509 ], 

510 ) 

511 self.probes_dict[self.protocol].append(sset) 

512 

513 return self.probes_dict[self.protocol] 

514 

515 def references(self, group="dev"): 

516 if self.protocol not in self.references_dict: 

517 self.references_dict[self.protocol] = [] 

518 

519 if self.protocol == "view2": 

520 for key in self.pairs: 

521 image_path = os.path.join( 

522 self.original_directory, 

523 self.image_relative_path, 

524 key + self.extension, 

525 ) 

526 if self.annotation_directory is not None: 

527 annotation_path = os.path.join( 

528 self.annotation_directory, 

529 key + self.annotation_extension, 

530 ) 

531 annotations = self._extract(annotation_path) 

532 else: 

533 annotations = None 

534 

535 sset = SampleSet( 

536 key=key, 

537 template_id=key, 

538 subject_id=self.subject_id_from_filename(key), 

539 samples=[ 

540 DelayedSample( 

541 key=key, 

542 template_id=key, 

543 load=partial(bob.io.base.load, image_path), 

544 subject_id=self.subject_id_from_filename(key), 

545 annotations=annotations, 

546 ) 

547 ], 

548 ) 

549 self.references_dict[self.protocol].append(sset) 

550 elif self.protocol[0] == "o": 

551 for key in self.pairs["enroll"]: 

552 data = {} 

553 for image in self.pairs["enroll"][key]: 

554 # get image path 

555 image_path = os.path.join( 

556 self.original_directory, 

557 self.image_relative_path, 

558 self.make_path_from_filename(image) 

559 + self.extension, 

560 ) 

561 # load annotations 

562 if self.annotation_directory is not None: 

563 annotation_path = os.path.join( 

564 self.annotation_directory, 

565 self.make_path_from_filename(image) 

566 + self.annotation_extension, 

567 ) 

568 annotations = self._extract(annotation_path) 

569 else: 

570 annotations = None 

571 data[image] = (image_path, annotations) 

572 

573 # generate one sampleset from several (should be 3) images of the same person 

574 sset = SampleSet( 

575 key=key, 

576 template_id=key, 

577 subject_id=key, 

578 samples=[ 

579 DelayedSample( 

580 key=image, 

581 template_id=key, 

582 load=partial(bob.io.base.load, data[image][0]), 

583 annotations=data[image][1], 

584 ) 

585 for image in data 

586 ], 

587 ) 

588 self.references_dict[self.protocol].append(sset) 

589 

590 return self.references_dict[self.protocol] 

591 

592 def groups(self): 

593 return ["dev"] 

594 

595 def all_samples(self, group="dev"): 

596 self._check_group(group) 

597 

598 if self.protocol == "view2": 

599 return self.references() + self.probes() 

600 elif self.protocol[0] == "o": 

601 return self.background_model_samples() + self.probes() 

602 

603 def _check_protocol(self, protocol): 

604 assert ( 

605 protocol in self.protocols() 

606 ), "Invalid protocol `{}` not in {}".format(protocol, self.protocols()) 

607 

608 def _check_group(self, group): 

609 assert group in self.groups(), "Invalid group `{}` not in {}".format( 

610 group, self.groups() 

611 ) 

612 

613 @staticmethod 

614 def urls(): 

615 return [ 

616 "https://www.idiap.ch/software/bob/data/bob/bob.bio.face/master/annotations/lfw_annotations.tar.gz", 

617 "http://www.idiap.ch/software/bob/data/bob/bob.bio.face/master/annotations/lfw_annotations.tar.gz", 

618 ]