Coverage for src/bob/bio/face/annotator/faceX

1import logging

2import os

3import sys

5from itertools import product as product

6from math import ceil

8import numpy as np

9import torch

11from torchvision import transforms

13from bob.bio.base.database.utils import download_file, md5_hash

14from bob.io.image import bob_to_opencvbgr

16from . import Base

17from .mtcnn import MTCNN

19logger = logging.getLogger(__name__)

22# Adapted from https://github.com/biubug6/Pytorch_Retinafacey

23class PriorBox(object):

24 """Compute the suitable parameters of anchors for later decode operation

26 Attributes:

27 cfg(dict): testing config.

28 image_size(tuple): the input image size.

29 """

31 def __init__(self, cfg, image_size=None):

32 """

33 Init priorBox settings related to the generation of anchors.

34 """

35 super(PriorBox, self).__init__()

36 self.min_sizes = cfg["min_sizes"]

37 self.steps = cfg["steps"]

38 self.image_size = image_size

39 self.feature_maps = [

40 [ceil(self.image_size[0] / step), ceil(self.image_size[1] / step)]

41 for step in self.steps

42 ]

43 self.name = "s"

45 def forward(self):

46 anchors = []

47 for k, f in enumerate(self.feature_maps):

48 min_sizes = self.min_sizes[k]

49 for i, j in product(range(f[0]), range(f[1])):

50 for min_size in min_sizes:

51 s_kx = min_size / self.image_size[1]

52 s_ky = min_size / self.image_size[0]

53 dense_cx = [

54 x * self.steps[k] / self.image_size[1]

55 for x in [j + 0.5]

56 ]

57 dense_cy = [

58 y * self.steps[k] / self.image_size[0]

59 for y in [i + 0.5]

60 ]

61 for cy, cx in product(dense_cy, dense_cx):

62 anchors += [cx, cy, s_kx, s_ky]

63 # back to torch land

64 output = torch.Tensor(anchors).view(-1, 4)

65 return output

68def download_faceX_model():

69 urls = [

70 "https://www.idiap.ch/software/bob/data/bob/bob.bio.face/master/pytorch/faceX_models.tar.gz",

71 "http://www.idiap.ch/software/bob/data/bob/bob.bio.face/master/pytorch/faceX_models.tar.gz",

72 ]

74 filename = download_file(

75 urls=urls,

76 destination_sub_directory="models/pytorch/",

77 destination_filename="faceX_models.tar.gz",

78 checksum="eb7ec871f434d2f44e5408627d656297",

79 checksum_fct=md5_hash,

80 extract=True,

81 )

83 return filename

86def add_faceX_path(filename):

87 str_path = (filename / "faceX_models").as_posix()

89 logger.warning(f"Adding the following path to PYTHON_PATH: {str_path}")

90 sys.path.insert(0, str_path)

91 return str_path

94class FaceXDetector(Base):

95 """

96 Face detector taken from https://github.com/JDAI-CV/FaceX-Zoo

98 This one we are using the 106 landmark detector that was taken from

99 https://github.com/Hsintao/pfld_106_face_landmarks/blob/master/models/mobilev3_pfld.py

100

101 .. warning:

102 Here we are assuming that the faces is already detected and cropped

103

104 """

105

106 def __init__(self, device=None, one_face_only=True, **kwargs):

107 self.device = torch.device("cpu") if device is None else device

108

109 filename = download_faceX_model()

110 faceX_path = add_faceX_path(filename)

111

112 model_filename = os.path.join(

113 faceX_path,

114 "models",

115 "face_detection",

116 "face_detection_1.0",

117 "face_detection_retina.pkl",

118 )

119

120 # Loading face detector

121 self.model = torch.load(model_filename, map_location=device)

122 self.one_face_only = one_face_only

123

124 self.transforms = transforms.Compose([transforms.ToTensor()])

125

126 # Face detection threshold

127 # from: https://github.com/JDAI-CV/FaceX-Zoo/blob/db0b087e4f4d28152e172d6c8d3767a8870733b4/face_sdk/models/face_detection/face_detection_1.0/model_meta.json

128 self.cfg = {

129 "model_type": "retina face detect nets",

130 "model_info": "some model info",

131 "model_file": "face_detection_retina.pkl",

132 "release_date": "20201019",

133 "input_height": 120,

134 "input_width": 120,

135 "min_sizes": [[16, 32], [64, 128], [256, 512]],

136 "steps": [8, 16, 32],

137 "variance": [0.1, 0.2],

138 "in_channel": 256,

139 "out_channel": 256,

140 "confidence_threshold": 0.7,

141 }

142

143 super(FaceXDetector, self).__init__(**kwargs)

144

145 # Adapted from https://github.com/chainer/chainercv

146 def decode(self, loc, priors, variances):

147 """

148

149 Decode locations from predictions using priors to undo

150 the encoding we did for offset regression at train time.

151

152 Parameters

153 ----------

154 loc (tensor): location predictions for loc layers,

155 Shape: [num_priors,4]

156 priors (tensor): Prior boxes in center-offset form.

157 Shape: [num_priors,4].

158 variances: (list[float]) Variances of priorboxes

159

160 Returns

161 -------

162 decoded bounding box predictions

163

164 """

165 boxes = torch.cat((priors[:, :2], priors[:, 2:]), 1)

166 boxes[:, :2] = priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:]

167 boxes[:, 2:] = priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])

168 boxes[:, :2] -= boxes[:, 2:] / 2

169 boxes[:, 2:] += boxes[:, :2]

170 return boxes

171

172 def _preprocess(self, image):

173 """Preprocess the image, such as standardization and other operations.

174

175 Returns:

176 A numpy array list, the shape is channel * h * w.

177 A tensor, the shape is 4.

178 """

179 if not isinstance(image, np.ndarray):

180 logger.error("The input should be the ndarray read by cv2!")

181

182 img = np.float32(image)

183 scale = torch.Tensor(

184 [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]

185 )

186 img -= (104, 117, 123)

187 img = img.transpose(2, 0, 1)

188 return img, scale

189

190 def _postprocess(self, loc, conf, scale, input_height, input_width):

191 """Postprecess the prediction result.

192 Decode detection result, set the confidence threshold and do the NMS

193 to keep the appropriate detection box.

194

195 Returns:

196 A numpy array, the shape is N * (x, y, w, h, confidence),

197 N is the number of detection box.

198 """

199 priorbox = PriorBox(self.cfg, image_size=(input_height, input_width))

200 priors = priorbox.forward()

201 priors = priors.to(self.device)

202 prior_data = priors.data

203 boxes = self.decode(

204 loc.data.squeeze(0), prior_data, self.cfg["variance"]

205 )

206 boxes = boxes * scale

207 boxes = boxes.cpu().numpy()

208 scores = conf.squeeze(0).data.cpu().numpy()[:, 1]

209

210 # ignore low scores

211 inds = np.where(scores > self.cfg["confidence_threshold"])[0]

212 boxes = boxes[inds]

213 scores = scores[inds]

214

215 # keep top-K before NMS

216 order = scores.argsort()[::-1]

217 boxes = boxes[order]

218 scores = scores[order]

219

220 # do NMS

221 nms_threshold = 0.2

222 dets = np.hstack((boxes, scores[:, np.newaxis])).astype(

223 np.float32, copy=False

224 )

225 keep = self.py_cpu_nms(dets, nms_threshold)

226 dets = dets[keep, :]

227 return dets

228

229 # Adapted from https://github.com/biubug6/Pytorch_Retinaface

230 def py_cpu_nms(self, dets, thresh):

231 """Python version NMS (Non maximum suppression).

232

233 Returns:

234 The kept index after NMS.

235 """

236 x1 = dets[:, 0]

237 y1 = dets[:, 1]

238 x2 = dets[:, 2]

239 y2 = dets[:, 3]

240 scores = dets[:, 4]

241 areas = (x2 - x1 + 1) * (y2 - y1 + 1)

242 order = scores.argsort()[::-1]

243 keep = []

244 while order.size > 0:

245 i = order[0]

246 keep.append(i)

247 xx1 = np.maximum(x1[i], x1[order[1:]])

248 yy1 = np.maximum(y1[i], y1[order[1:]])

249 xx2 = np.minimum(x2[i], x2[order[1:]])

250 yy2 = np.minimum(y2[i], y2[order[1:]])

251 w = np.maximum(0.0, xx2 - xx1 + 1)

252 h = np.maximum(0.0, yy2 - yy1 + 1)

253 inter = w * h

254 ovr = inter / (areas[i] + areas[order[1:]] - inter)

255 inds = np.where(ovr <= thresh)[0]

256 order = order[inds + 1]

257 return keep

258

259 def annotate(self, image, **kwargs):

260 """Get the inference of the image and process the inference result.

261

262 Returns:

263 A numpy array, the shape is N * (x, y, w, h, confidence),

264 N is the number of detection box.

265 """

266

267 # First thing, we need to convert the bob CxHxW

268 # to the openCV HxWxC and BGR

269 image = bob_to_opencvbgr(image)

270

271 input_height, input_width, _ = image.shape

272 try:

273 image, scale = self._preprocess(image)

274 except Exception as e:

275 raise e

276 self.model = self.model.to(self.device)

277 image = torch.from_numpy(image).unsqueeze(0)

278 with torch.no_grad():

279 image = image.to(self.device)

280 scale = scale.to(self.device)

281 loc, conf, landms = self.model(image)

282 dets = self._postprocess(loc, conf, scale, input_height, input_width)

283

284 if len(dets) == 0:

285 logger.error("Face not detected. Returning None")

286 return None

287

288 dets = dets[0] if self.one_face_only else dets

289

290 return dets

291

292

293class FaceX106Landmarks(Base):

294 """

295 Landmark detector taken from https://github.com/JDAI-CV/FaceX-Zoo

296

297 This one we are using the 106 larnmark detector that was taken from

298 https://github.com/Hsintao/pfld_106_face_landmarks/blob/master/models/mobilev3_pfld.py

299

300 .. warning:

301 Here we are assuming that the faces is already detected and cropped

302

303

304 Parameters

305 ----------

306

307 use_mtcnn_detector: bool

308 If set uses the MTCNN face detector as a base for the landmark extractor.

309 If not, it uses the standard face detector of FaceXZoo.

310

311

312 """

313

314 def __init__(self, device=None, use_mtcnn_detector=True, **kwargs):

315 self.device = torch.device("cpu") if device is None else device

316

317 filename = download_faceX_model()

318 faceX_path = add_faceX_path(filename)

319 self.use_mtcnn_detector = use_mtcnn_detector

320

321 model_filename = os.path.join(

322 faceX_path,

323 "models",

324 "face_alignment",

325 "face_alignment_1.0",

326 "face_landmark_pfld.pkl",

327 )

328

329 self.model = torch.load(model_filename, map_location=self.device)

330

331 # Loading the face detector

332 self.face_detector = MTCNN() if use_mtcnn_detector else FaceXDetector()

333

334 self.transforms = transforms.Compose([transforms.ToTensor()])

335

336 # Face alignment threshold

337 # from: https://github.com/JDAI-CV/FaceX-Zoo/blob/db0b087e4f4d28152e172d6c8d3767a8870733b4/face_sdk/models/face_alignment/face_alignment_1.0/model_meta.json

338 self.cfg = {

339 "model_path": "models",

340 "model_category": "face_alignment",

341 "model_name": "face_alignment_1.0",

342 "model_type": "pfld face landmark nets",

343 "model_info": "some model info",

344 "model_file_path": "models/face_alignment/face_alignment_1.0/face_landmark_pfld.pkl",

345 "release_date": "20201023",

346 "input_height": 112,

347 "input_width": 112,

348 "img_size": 112,

349 }

350

351 self.img_size = self.cfg["img_size"]

352

353 super(FaceX106Landmarks, self).__init__(**kwargs)

354

355 # self.detector = MTCNN(min_size=min_size, factor=factor, thresholds=thresholds)

356

357 def annotate(self, image, **kwargs):

358 """Annotates an image using mtcnn

359

360 Parameters

361 ----------

362 image : numpy.array

363 An RGB image in Bob format.

364 **kwargs

365 Ignored.

366

367 Returns

368 -------

369 dict

370 Annotations contain: (topleft, bottomright, leye, reye, nose,

371 mouthleft, mouthright, quality).

372 """

373

374 # Detect the face

375 if self.use_mtcnn_detector:

376 annotations = self.face_detector.annotate(image)

377 if annotations is None:

378 return None

379

380 dets = [

381 annotations["topleft"][1],

382 annotations["topleft"][0],

383 annotations["bottomright"][1],

384 annotations["bottomright"][0],

385 ]

386 else:

387 dets = self.face_detector.annotate(image.copy())

388

389 if dets is None:

390 return None

391

392 # First thing, we need to convert the bob CxHxW

393 # to the openCV HxWxC and BGR

394 image = bob_to_opencvbgr(image)

395 try:

396 image_pre = self._preprocess(image, dets)

397 except Exception as e:

398 raise e

399 self.model = self.model.to(self.device)

400 image_pre = image_pre.unsqueeze(0)

401 with torch.no_grad():

402 image_pre = image_pre.to(self.device)

403 _, landmarks_normal = self.model(image_pre)

404 landmarks = self._postprocess(landmarks_normal)

405

406 return np.array(landmarks)

407

408 # Adapted from https://github.com/Hsintao/pfld_106_face_landmarks/blob/master/data/prepare.py

409 def _preprocess(self, image, det):

410 import cv2

411

412 """Preprocess the input image, cutting the input image through the face detection information.

413 Using the face detection result(dets) to get the face position in the input image.

414 After determining the center of face position and the box size of face, crop the image

415 and resize it into preset size.

416

417 Returns:

418 A torch tensor, the image after preprecess, shape: (3, 112, 112).

419 """

420 if not isinstance(image, np.ndarray):

421 logger.error("The input should be the ndarray read by cv2!")

422

423 img = image.copy()

424 self.image_org = image.copy()

425 img = np.float32(img)

426

427 xy = np.array([det[0], det[1]])

428 zz = np.array([det[2], det[3]])

429 wh = zz - xy + 1

430 center = (xy + wh / 2).astype(np.int32)

431 boxsize = int(np.max(wh) * 1.2)

432 xy = center - boxsize // 2

433 self.xy = xy

434 self.boxsize = boxsize

435 x1, y1 = xy

436 x2, y2 = xy + boxsize

437 height, width, _ = img.shape

438 dx = max(0, -x1)

439 dy = max(0, -y1)

440 x1 = max(0, x1)

441 y1 = max(0, y1)

442 edx = max(0, x2 - width)

443 edy = max(0, y2 - height)

444 x2 = min(width, x2)

445 y2 = min(height, y2)

446 imageT = image[y1:y2, x1:x2]

447 if dx > 0 or dy > 0 or edx > 0 or edy > 0:

448 imageT = cv2.copyMakeBorder(

449 imageT, dy, edy, dx, edx, cv2.BORDER_CONSTANT, 0

450 )

451

452 imageT = cv2.resize(imageT, (self.img_size, self.img_size))

453 t = transforms.Compose([transforms.ToTensor()])

454 img_after = t(imageT)

455 return img_after

456

457 def _postprocess(self, landmarks_normal):

458 """Process the predicted landmarks into the form of the original image.

459

460 Returns:

461 A numpy array, the landmarks based on the shape of original image, shape: (106, 2),

462 """

463 landmarks_normal = landmarks_normal.cpu().numpy()

464 landmarks_normal = landmarks_normal.reshape(

465 landmarks_normal.shape[0], -1, 2

466 )

467 landmarks = landmarks_normal[0] * [self.boxsize, self.boxsize] + self.xy

468 return landmarks

Coverage for src/bob/bio/face/annotator/faceX_106landmarks.py: 94%

205 statements