Coverage for src/bob/bio/face/database/lfw.py: 15%

1#!/usr/bin/env python

2# vim: set fileencoding=utf-8 :

3# Tiago de Freitas Pereira <tiago.pereira@idiap.ch>

4# Sat 20 Aug 15:43:10 CEST 2016

6import copy

7import logging

8import os

10from functools import partial

12import numpy as np

14from clapper.rc import UserDefaults

16import bob.io.base

18from bob.bio.base.database.utils import download_file, md5_hash

19from bob.bio.base.pipelines.abstract_classes import Database

20from bob.pipelines import DelayedSample, SampleSet

22logger = logging.getLogger(__name__)

23rc = UserDefaults("bobrc.toml")

26class LFWDatabase(Database): # TODO Make this a CSVDatabase?

27 """

28 This package contains the access API and descriptions for the `Labeled Faced in the Wild <http://vis-www.cs.umass.edu/lfw>`_ (LFW) database.

29 It only contains the Bob_ accessor methods to use the DB directly from python, with our certified protocols.

30 The actual raw data for the database should be downloaded from the original URL (though we were not able to contact the corresponding Professor).

33 The LFW database provides two different sets (called "views").

34 The first one, called ``view1`` is used for optimizing meta-parameters of your algorithm.

35 The second one, called ``view2`` is used for benchmarking.

36 This interface supports only the ``view2`` protocol.

37 Please note that in ``view2`` there is only a ``'dev'`` group, but no ``'eval'``.

40 .. warning::

42 To use this dataset protocol, you need to have the original files of the LFW datasets.

43 Once you have it downloaded, please run the following command to set the path for Bob

45 .. code-block:: sh

47 bob config set bob.bio.face.lfw.directory [LFW PATH]

48 bob config set bob.bio.face.lfw.annotation_directory [LFW ANNOTATION_PATH] # for the annotations

52 .. code-block:: python

54 >>> from bob.bio.face.database import LFWDatabase

55 >>> lfw = LFWDatabase(protocol="view2")

56 >>>

57 >>> # Fetching the gallery

58 >>> references = lfw.references()

59 >>> # Fetching the probes

60 >>> probes = lfw.probes()

64 Parameters

65 ----------

67 protocol: str

68 One of the database protocols. Options are `view2`

70 annotation_type: str

71 Type of the annotations used for face crop. Default to `eyes-center`

73 image_relative_path: str

74 LFW provides several types image crops. Some with the full image, some with with specific

75 face crop. Use this variable to set which image crop you want. Default to `all_images`, which means

76 no crop.

78 annotation_directory: str

79 LFW annotations path. Default to what is set in the variable `bob.bio.face.lfw.directory`

81 original_directory: str

82 LFW phisical path. Default to what is set in the variable `bob.bio.face.lfw.directory`

84 annotation_issuer: str

85 Type of the annotations. Default to `funneled`. Possible types `funneled`, `idiap` or `named`

87 """

89 def __init__(

90 self,

91 protocol,

92 annotation_type="eyes-center",

93 image_relative_path="all_images",

94 fixed_positions=None,

95 original_directory=rc.get("bob.bio.face.lfw.directory"),

96 extension=rc.get("bob.bio.face.lfw.extension", ".jpg"),

97 annotation_directory=rc.get("bob.bio.face.lfw.annotation_directory"),

98 annotation_issuer="funneled",

99 ):

100 import warnings

101

102 warnings.warn(

103 "The lfw database is not yet adapted to this version of bob. Please port it or ask for it to be ported (This one actually needs to be converted to a CSVDatabase).",

104 DeprecationWarning,

105 )

106

107 if original_directory is None or not os.path.exists(original_directory):

108 logger.warning(

109 f"Invalid or non existent `original_directory`: {original_directory}."

110 "Please, do `bob config set bob.bio.face.lfw.directory PATH` to set the LFW data directory."

111 )

112

113 if annotation_issuer not in ("funneled", "idiap", "named"):

114 raise ValueError(

115 f"Invalid annotation issuer: {annotation_issuer}. Possible values are `idiap`, `funneled` or `named`"

116 )

117

118 if annotation_directory is None or not os.path.exists(

119 annotation_directory

120 ):

121 # Downloading annotations if not exists

122 annotation_urls = LFWDatabase.urls()

123

124 logger.info(

125 f"`annotation_directory`: {annotation_directory} not set. "

126 f"Fetching it from {annotation_urls[0]}"

127 )

128

129 annotation_directory = download_file(

130 urls=annotation_urls,

131 destination_filename="lfw_annotations.tar.gz",

132 checksum="c0ce6e090e19d0ed159172fcba2e8252",

133 checksum_fct=md5_hash,

134 extract=True,

135 )

136

137 # Removing extension

138 annotation_directory = annotation_directory / "lfw_annotations"

139

140 # Attaching the issuer sub-directory

141 annotation_directory = os.path.join(

142 annotation_directory, annotation_issuer

143 )

144

145 self.annotation_issuer = annotation_issuer

146 # Hard-coding the extension of the annotations

147 # I don't think we need this exposed

148 # Please, open an issue if otherwise

149 self.annotation_extension = (

150 ".jpg.pts" if annotation_issuer == "funneled" else ".pos"

151 )

152

153 self._check_protocol(protocol)

154

155 self.references_dict = {}

156 self.probes_dict = {}

157 self.pairs = {}

158 self.probe_reference_keys = {} # Inverted pairs

159

160 self.annotations = None

161 self.original_directory = original_directory

162 self.annotation_directory = annotation_directory

163 self.extension = extension

164 self.image_relative_path = image_relative_path

165

166 # Some path manipulation lambdas

167 self.subject_id_from_filename = lambda x: "_".join(x.split("_")[0:-1])

168

169 self.make_path_from_filename = lambda x: os.path.join(

170 self.subject_id_from_filename(x), x

171 )

172

173 super().__init__(

174 name="lfw",

175 protocol=protocol,

176 score_all_vs_all=protocol[0] == "o",

177 annotation_type=annotation_type,

178 fixed_positions=fixed_positions,

179 memory_demanding=False,

180 )

181

182 self.load_pairs()

183

184 def _extract_funneled(self, annotation_path):

185 """Interprets the annotation string as if it came from the funneled images.

186 Inspired by: https://gitlab.idiap.ch/bob/bob.db.lfw/-/blob/5ac22c5b77aae971de6b73cbe23f26d6a5632072/bob/db/lfw/models.py#L69

187 """

188 with open(annotation_path) as f:

189 splits = np.array(f.readlines()[0].split(" "), "float")

190

191 assert len(splits) == 18

192 locations = [

193 "reyeo",

194 "reyei",

195 "leyei",

196 "leyeo",

197 "noser",

198 "noset",

199 "nosel",

200 "mouthr",

201 "mouthl",

202 ]

203 annotations = dict(

204 [

205 (locations[i], (float(splits[2 * i + 1]), float(splits[2 * i])))

206 for i in range(9)

207 ]

208 )

209 # add eye center annotations as the center between the eye corners

210 annotations["leye"] = (

211 (annotations["leyei"][0] + annotations["leyeo"][0]) / 2.0,

212 (annotations["leyei"][1] + annotations["leyeo"][1]) / 2.0,

213 )

214 annotations["reye"] = (

215 (annotations["reyei"][0] + annotations["reyeo"][0]) / 2.0,

216 (annotations["reyei"][1] + annotations["reyeo"][1]) / 2.0,

217 )

218

219 return annotations

220

221 def _extract_idiap(self, annotation_file):

222 """Interprets the annotation string as if it came from the Idiap annotations.

223 Inspired by: https://gitlab.idiap.ch/bob/bob.db.lfw/-/blob/5ac22c5b77aae971de6b73cbe23f26d6a5632072/bob/db/lfw/models.py#L81

224 """

225

226 annotations = {}

227 splits = {}

228 with open(annotation_file) as f:

229 for line in f.readlines():

230 line = line.split(" ")

231 if len(line) == 3:

232 # splits.append([float(line[2]), float(line[1])])

233 splits[int(line[0])] = (float(line[1]), float(line[2]))

234

235 if 3 in splits:

236 annotations["reye"] = splits[3]

237

238 if 8 in splits:

239 annotations["leye"] = splits[8]

240

241 return annotations

242

243 def _extract_named(self, annotation_file):

244 """Reads the annotation files as provided in the biometrics resources.

245 Download them here: https://www.idiap.ch/webarchives/sites/www.idiap.ch/resource/biometric

246 """

247

248 annotations = {}

249 with open(annotation_file) as f:

250 for line in f.readlines():

251 line = line.split(" ")

252 if len(line) == 3:

253 # splits.append([float(line[2]), float(line[1])])

254 annotations[line[0]] = (float(line[2]), float(line[1]))

255

256 assert all(a in annotations for a in ("leye", "reye"))

257

258 return annotations

259

260 def _extract(self, annotation_file):

261 return {

262 "funneled": self._extract_funneled,

263 "idiap": self._extract_idiap,

264 "named": self._extract_named,

265 }[self.annotation_issuer](annotation_file)

266

267 def load_pairs(self):

268 if self.protocol == "view2":

269 # view 2

270

271 pairs_path = os.path.join(

272 self.original_directory, "view2", "pairs.txt"

273 )

274 self.pairs = {}

275

276 def make_filename(name, index):

277 return f"{name}_{index.zfill(4)}"

278

279 with open(pairs_path) as f:

280 for i, line in enumerate(f.readlines()):

281 # Skip the first line

282 if i == 0:

283 continue

284

285 line = line.split("\t")

286

287 # Three lines, genuine pairs otherwise impostor

288 if len(line) == 3:

289 # self.subject_id_from_filename()

290 key_filename = make_filename(

291 line[0], line[1].rstrip("\n")

292 )

293 value_filename = make_filename(

294 line[0], line[2].rstrip("\n")

295 )

296

297 else:

298 key_filename = make_filename(

299 line[0], line[1].rstrip("\n")

300 )

301 value_filename = make_filename(

302 line[2], line[3].rstrip("\n")

303 )

304

305 key = self.make_path_from_filename(key_filename)

306 value = self.make_path_from_filename(value_filename)

307

308 if key not in self.pairs:

309 self.pairs[key] = []

310 self.pairs[key].append(value)

311

312 self._create_probe_reference_dict()

313

314 elif self.protocol[0] == "o":

315 self.pairs = {

316 "enroll": {},

317 "training-unknown": [],

318 "probe": {},

319 "o1": [],

320 "o2": [],

321 }

322 # parse directory for open-set protocols

323 for d in os.listdir(

324 os.path.join(self.original_directory, self.image_relative_path)

325 ):

326 dd = os.path.join(

327 self.original_directory, self.image_relative_path, d

328 )

329 if os.path.isdir(dd):

330 # count the number of images

331 images = sorted(

332 [

333 os.path.splitext(i)[0]

334 for i in os.listdir(dd)

335 if os.path.splitext(i)[1] == self.extension

336 ]

337 )

338

339 if len(images) > 3:

340 # take the first three images for enrollment

341 self.pairs["enroll"][d] = images[:3]

342 # and the remaining images for known probes

343 self.pairs["probe"][d] = images[3:]

344 elif len(images) > 1:

345 # take the first image as known unknown for training (ignored in our case)

346 self.pairs["training-unknown"].append(images[0])

347 # and the remaining as known unknown probe

348 self.pairs["o1"].extend(images[1:])

349 else:

350 # one image -> use as unknown unknown probe

351 self.pairs["o2"].append(images[0])

352

353 @staticmethod

354 def protocols():

355 return ["view2", "o1", "o2", "o3"]

356

357 def background_model_samples(self):

358 """This function returns the training set for the open-set protocols o1, o2 and o3.

359 It returns the :py:meth:`references` and the training samples with known unknowns, which get the subject id "unknown".

360

361 Returns

362 -------

363

364 [bob.pipelines.SampleSet]

365 The training samples, where each sampleset contains all images of one subject.

366 Only the samples of the "unknown" subject are collected from several subjects.

367

368 """

369 if self.protocol[0] != "o":

370 return []

371

372 # return a list of samplesets for each enrollment image and each known unknown training sample

373 enrollmentset = self.references()

374 data = {}

375 for image in self.pairs["training-unknown"]:

376 # get image path

377 image_path = os.path.join(

378 self.original_directory,

379 self.image_relative_path,

380 self.make_path_from_filename(image) + self.extension,

381 )

382 # load annotations

383 if self.annotation_directory is not None:

384 annotation_path = os.path.join(

385 self.annotation_directory,

386 self.make_path_from_filename(image)

387 + self.annotation_extension,

388 )

389 annotations = self._extract(annotation_path)

390 else:

391 annotations = None

392 data[image] = (image_path, annotations)

393

394 # generate one sampleset from images of the unknown unknowns

395 sset = SampleSet(

396 key="unknown",

397 template_id="unknown",

398 subject_id="unknown",

399 samples=[

400 DelayedSample(

401 key=image,

402 load=partial(bob.io.base.load, data[image][0]),

403 annotations=data[image][1],

404 )

405 for image in data

406 ],

407 )

408 return enrollmentset + [sset]

409

410 def _create_probe_reference_dict(self):

411 """

412 Returns a dictionary whose each key (probe key) holds the list of biometric references

413 where that probe should be compared with.

414 """

415

416 if self.protocol[0] == "o":

417 return

418

419 self.probe_reference_keys = {}

420 for key in self.pairs:

421 for value in self.pairs[key]:

422 if value not in self.probe_reference_keys:

423 self.probe_reference_keys[value] = []

424

425 self.probe_reference_keys[value].append(key)

426

427 def probes(self, group="dev"):

428 if self.protocol not in self.probes_dict:

429 self.probes_dict[self.protocol] = []

430

431 if self.protocol == "view2":

432 for key in self.probe_reference_keys:

433 image_path = os.path.join(

434 self.original_directory,

435 self.image_relative_path,

436 key + self.extension,

437 )

438 if self.annotation_directory is not None:

439 annotation_path = os.path.join(

440 self.annotation_directory,

441 key + self.annotation_extension,

442 )

443 annotations = self._extract(annotation_path)

444 else:

445 annotations = None

446

447 sset = SampleSet(

448 key=key,

449 template_id=key,

450 subject_id=self.subject_id_from_filename(key),

451 references=copy.deepcopy(

452 self.probe_reference_keys[key]

453 ), # deep copying to avoid bizarre issues with dask

454 samples=[

455 DelayedSample(

456 key=key,

457 template_id=key,

458 subject_id=self.subject_id_from_filename(key),

459 load=partial(bob.io.base.load, image_path),

460 annotations=annotations,

461 )

462 ],

463 )

464 self.probes_dict[self.protocol].append(sset)

465

466 elif self.protocol[0] == "o":

467 # add known probes

468 # collect probe samples:

469 probes = [

470 (image, key)

471 for key in self.pairs["probe"]

472 for image in self.pairs["probe"][key]

473 ]

474 if self.protocol in ("o1", "o3"):

475 probes += [(image, "unknown") for image in self.pairs["o1"]]

476 if self.protocol in ("o2", "o3"):

477 probes += [(image, "unknown") for image in self.pairs["o2"]]

478

479 for image, key in probes:

480 # get image path

481 image_path = os.path.join(

482 self.original_directory,

483 self.image_relative_path,

484 self.make_path_from_filename(image) + self.extension,

485 )

486 # load annotations

487 if self.annotation_directory is not None:

488 annotation_path = os.path.join(

489 self.annotation_directory,

490 self.make_path_from_filename(image)

491 + self.annotation_extension,

492 )

493 annotations = self._extract(annotation_path)

494 else:

495 annotations = None

496

497 # one probe sample per image

498 sset = SampleSet(

499 key=image,

500 template_id=image,

501 subject_id=key,

502 samples=[

503 DelayedSample(

504 key=image,

505 template_id=image,

506 load=partial(bob.io.base.load, image_path),

507 annotations=annotations,

508 )

509 ],

510 )

511 self.probes_dict[self.protocol].append(sset)

512

513 return self.probes_dict[self.protocol]

514

515 def references(self, group="dev"):

516 if self.protocol not in self.references_dict:

517 self.references_dict[self.protocol] = []

518

519 if self.protocol == "view2":

520 for key in self.pairs:

521 image_path = os.path.join(

522 self.original_directory,

523 self.image_relative_path,

524 key + self.extension,

525 )

526 if self.annotation_directory is not None:

527 annotation_path = os.path.join(

528 self.annotation_directory,

529 key + self.annotation_extension,

530 )

531 annotations = self._extract(annotation_path)

532 else:

533 annotations = None

534

535 sset = SampleSet(

536 key=key,

537 template_id=key,

538 subject_id=self.subject_id_from_filename(key),

539 samples=[

540 DelayedSample(

541 key=key,

542 template_id=key,

543 load=partial(bob.io.base.load, image_path),

544 subject_id=self.subject_id_from_filename(key),

545 annotations=annotations,

546 )

547 ],

548 )

549 self.references_dict[self.protocol].append(sset)

550 elif self.protocol[0] == "o":

551 for key in self.pairs["enroll"]:

552 data = {}

553 for image in self.pairs["enroll"][key]:

554 # get image path

555 image_path = os.path.join(

556 self.original_directory,

557 self.image_relative_path,

558 self.make_path_from_filename(image)

559 + self.extension,

560 )

561 # load annotations

562 if self.annotation_directory is not None:

563 annotation_path = os.path.join(

564 self.annotation_directory,

565 self.make_path_from_filename(image)

566 + self.annotation_extension,

567 )

568 annotations = self._extract(annotation_path)

569 else:

570 annotations = None

571 data[image] = (image_path, annotations)

572

573 # generate one sampleset from several (should be 3) images of the same person

574 sset = SampleSet(

575 key=key,

576 template_id=key,

577 subject_id=key,

578 samples=[

579 DelayedSample(

580 key=image,

581 template_id=key,

582 load=partial(bob.io.base.load, data[image][0]),

583 annotations=data[image][1],

584 )

585 for image in data

586 ],

587 )

588 self.references_dict[self.protocol].append(sset)

589

590 return self.references_dict[self.protocol]

591

592 def groups(self):

593 return ["dev"]

594

595 def all_samples(self, group="dev"):

596 self._check_group(group)

597

598 if self.protocol == "view2":

599 return self.references() + self.probes()

600 elif self.protocol[0] == "o":

601 return self.background_model_samples() + self.probes()

602

603 def _check_protocol(self, protocol):

604 assert (

605 protocol in self.protocols()

606 ), "Invalid protocol `{}` not in {}".format(protocol, self.protocols())

607

608 def _check_group(self, group):

609 assert group in self.groups(), "Invalid group `{}` not in {}".format(

610 group, self.groups()

611 )

612

613 @staticmethod

614 def urls():

615 return [

616 "https://www.idiap.ch/software/bob/data/bob/bob.bio.face/master/annotations/lfw_annotations.tar.gz",

617 "http://www.idiap.ch/software/bob/data/bob/bob.bio.face/master/annotations/lfw_annotations.tar.gz",

618 ]