Coverage for src/bob/bio/face/annotator/faceX_106landmarks.py: 94%

205 statements  

« prev     ^ index     » next       coverage.py v7.6.0, created at 2024-07-13 00:04 +0200

1import logging 

2import os 

3import sys 

4 

5from itertools import product as product 

6from math import ceil 

7 

8import numpy as np 

9import torch 

10 

11from torchvision import transforms 

12 

13from bob.bio.base.database.utils import download_file, md5_hash 

14from bob.io.image import bob_to_opencvbgr 

15 

16from . import Base 

17from .mtcnn import MTCNN 

18 

19logger = logging.getLogger(__name__) 

20 

21 

22# Adapted from https://github.com/biubug6/Pytorch_Retinafacey 

23class PriorBox(object): 

24 """Compute the suitable parameters of anchors for later decode operation 

25 

26 Attributes: 

27 cfg(dict): testing config. 

28 image_size(tuple): the input image size. 

29 """ 

30 

31 def __init__(self, cfg, image_size=None): 

32 """ 

33 Init priorBox settings related to the generation of anchors. 

34 """ 

35 super(PriorBox, self).__init__() 

36 self.min_sizes = cfg["min_sizes"] 

37 self.steps = cfg["steps"] 

38 self.image_size = image_size 

39 self.feature_maps = [ 

40 [ceil(self.image_size[0] / step), ceil(self.image_size[1] / step)] 

41 for step in self.steps 

42 ] 

43 self.name = "s" 

44 

45 def forward(self): 

46 anchors = [] 

47 for k, f in enumerate(self.feature_maps): 

48 min_sizes = self.min_sizes[k] 

49 for i, j in product(range(f[0]), range(f[1])): 

50 for min_size in min_sizes: 

51 s_kx = min_size / self.image_size[1] 

52 s_ky = min_size / self.image_size[0] 

53 dense_cx = [ 

54 x * self.steps[k] / self.image_size[1] 

55 for x in [j + 0.5] 

56 ] 

57 dense_cy = [ 

58 y * self.steps[k] / self.image_size[0] 

59 for y in [i + 0.5] 

60 ] 

61 for cy, cx in product(dense_cy, dense_cx): 

62 anchors += [cx, cy, s_kx, s_ky] 

63 # back to torch land 

64 output = torch.Tensor(anchors).view(-1, 4) 

65 return output 

66 

67 

68def download_faceX_model(): 

69 urls = [ 

70 "https://www.idiap.ch/software/bob/data/bob/bob.bio.face/master/pytorch/faceX_models.tar.gz", 

71 "http://www.idiap.ch/software/bob/data/bob/bob.bio.face/master/pytorch/faceX_models.tar.gz", 

72 ] 

73 

74 filename = download_file( 

75 urls=urls, 

76 destination_sub_directory="models/pytorch/", 

77 destination_filename="faceX_models.tar.gz", 

78 checksum="eb7ec871f434d2f44e5408627d656297", 

79 checksum_fct=md5_hash, 

80 extract=True, 

81 ) 

82 

83 return filename 

84 

85 

86def add_faceX_path(filename): 

87 str_path = (filename / "faceX_models").as_posix() 

88 

89 logger.warning(f"Adding the following path to PYTHON_PATH: {str_path}") 

90 sys.path.insert(0, str_path) 

91 return str_path 

92 

93 

94class FaceXDetector(Base): 

95 """ 

96 Face detector taken from https://github.com/JDAI-CV/FaceX-Zoo 

97 

98 This one we are using the 106 landmark detector that was taken from 

99 https://github.com/Hsintao/pfld_106_face_landmarks/blob/master/models/mobilev3_pfld.py 

100 

101 .. warning: 

102 Here we are assuming that the faces is already detected and cropped 

103 

104 """ 

105 

106 def __init__(self, device=None, one_face_only=True, **kwargs): 

107 self.device = torch.device("cpu") if device is None else device 

108 

109 filename = download_faceX_model() 

110 faceX_path = add_faceX_path(filename) 

111 

112 model_filename = os.path.join( 

113 faceX_path, 

114 "models", 

115 "face_detection", 

116 "face_detection_1.0", 

117 "face_detection_retina.pkl", 

118 ) 

119 

120 # Loading face detector 

121 self.model = torch.load(model_filename, map_location=device) 

122 self.one_face_only = one_face_only 

123 

124 self.transforms = transforms.Compose([transforms.ToTensor()]) 

125 

126 # Face detection threshold 

127 # from: https://github.com/JDAI-CV/FaceX-Zoo/blob/db0b087e4f4d28152e172d6c8d3767a8870733b4/face_sdk/models/face_detection/face_detection_1.0/model_meta.json 

128 self.cfg = { 

129 "model_type": "retina face detect nets", 

130 "model_info": "some model info", 

131 "model_file": "face_detection_retina.pkl", 

132 "release_date": "20201019", 

133 "input_height": 120, 

134 "input_width": 120, 

135 "min_sizes": [[16, 32], [64, 128], [256, 512]], 

136 "steps": [8, 16, 32], 

137 "variance": [0.1, 0.2], 

138 "in_channel": 256, 

139 "out_channel": 256, 

140 "confidence_threshold": 0.7, 

141 } 

142 

143 super(FaceXDetector, self).__init__(**kwargs) 

144 

145 # Adapted from https://github.com/chainer/chainercv 

146 def decode(self, loc, priors, variances): 

147 """ 

148 

149 Decode locations from predictions using priors to undo 

150 the encoding we did for offset regression at train time. 

151 

152 Parameters 

153 ---------- 

154 loc (tensor): location predictions for loc layers, 

155 Shape: [num_priors,4] 

156 priors (tensor): Prior boxes in center-offset form. 

157 Shape: [num_priors,4]. 

158 variances: (list[float]) Variances of priorboxes 

159 

160 Returns 

161 ------- 

162 decoded bounding box predictions 

163 

164 """ 

165 boxes = torch.cat((priors[:, :2], priors[:, 2:]), 1) 

166 boxes[:, :2] = priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:] 

167 boxes[:, 2:] = priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1]) 

168 boxes[:, :2] -= boxes[:, 2:] / 2 

169 boxes[:, 2:] += boxes[:, :2] 

170 return boxes 

171 

172 def _preprocess(self, image): 

173 """Preprocess the image, such as standardization and other operations. 

174 

175 Returns: 

176 A numpy array list, the shape is channel * h * w. 

177 A tensor, the shape is 4. 

178 """ 

179 if not isinstance(image, np.ndarray): 

180 logger.error("The input should be the ndarray read by cv2!") 

181 

182 img = np.float32(image) 

183 scale = torch.Tensor( 

184 [img.shape[1], img.shape[0], img.shape[1], img.shape[0]] 

185 ) 

186 img -= (104, 117, 123) 

187 img = img.transpose(2, 0, 1) 

188 return img, scale 

189 

190 def _postprocess(self, loc, conf, scale, input_height, input_width): 

191 """Postprecess the prediction result. 

192 Decode detection result, set the confidence threshold and do the NMS 

193 to keep the appropriate detection box. 

194 

195 Returns: 

196 A numpy array, the shape is N * (x, y, w, h, confidence), 

197 N is the number of detection box. 

198 """ 

199 priorbox = PriorBox(self.cfg, image_size=(input_height, input_width)) 

200 priors = priorbox.forward() 

201 priors = priors.to(self.device) 

202 prior_data = priors.data 

203 boxes = self.decode( 

204 loc.data.squeeze(0), prior_data, self.cfg["variance"] 

205 ) 

206 boxes = boxes * scale 

207 boxes = boxes.cpu().numpy() 

208 scores = conf.squeeze(0).data.cpu().numpy()[:, 1] 

209 

210 # ignore low scores 

211 inds = np.where(scores > self.cfg["confidence_threshold"])[0] 

212 boxes = boxes[inds] 

213 scores = scores[inds] 

214 

215 # keep top-K before NMS 

216 order = scores.argsort()[::-1] 

217 boxes = boxes[order] 

218 scores = scores[order] 

219 

220 # do NMS 

221 nms_threshold = 0.2 

222 dets = np.hstack((boxes, scores[:, np.newaxis])).astype( 

223 np.float32, copy=False 

224 ) 

225 keep = self.py_cpu_nms(dets, nms_threshold) 

226 dets = dets[keep, :] 

227 return dets 

228 

229 # Adapted from https://github.com/biubug6/Pytorch_Retinaface 

230 def py_cpu_nms(self, dets, thresh): 

231 """Python version NMS (Non maximum suppression). 

232 

233 Returns: 

234 The kept index after NMS. 

235 """ 

236 x1 = dets[:, 0] 

237 y1 = dets[:, 1] 

238 x2 = dets[:, 2] 

239 y2 = dets[:, 3] 

240 scores = dets[:, 4] 

241 areas = (x2 - x1 + 1) * (y2 - y1 + 1) 

242 order = scores.argsort()[::-1] 

243 keep = [] 

244 while order.size > 0: 

245 i = order[0] 

246 keep.append(i) 

247 xx1 = np.maximum(x1[i], x1[order[1:]]) 

248 yy1 = np.maximum(y1[i], y1[order[1:]]) 

249 xx2 = np.minimum(x2[i], x2[order[1:]]) 

250 yy2 = np.minimum(y2[i], y2[order[1:]]) 

251 w = np.maximum(0.0, xx2 - xx1 + 1) 

252 h = np.maximum(0.0, yy2 - yy1 + 1) 

253 inter = w * h 

254 ovr = inter / (areas[i] + areas[order[1:]] - inter) 

255 inds = np.where(ovr <= thresh)[0] 

256 order = order[inds + 1] 

257 return keep 

258 

259 def annotate(self, image, **kwargs): 

260 """Get the inference of the image and process the inference result. 

261 

262 Returns: 

263 A numpy array, the shape is N * (x, y, w, h, confidence), 

264 N is the number of detection box. 

265 """ 

266 

267 # First thing, we need to convert the bob CxHxW 

268 # to the openCV HxWxC and BGR 

269 image = bob_to_opencvbgr(image) 

270 

271 input_height, input_width, _ = image.shape 

272 try: 

273 image, scale = self._preprocess(image) 

274 except Exception as e: 

275 raise e 

276 self.model = self.model.to(self.device) 

277 image = torch.from_numpy(image).unsqueeze(0) 

278 with torch.no_grad(): 

279 image = image.to(self.device) 

280 scale = scale.to(self.device) 

281 loc, conf, landms = self.model(image) 

282 dets = self._postprocess(loc, conf, scale, input_height, input_width) 

283 

284 if len(dets) == 0: 

285 logger.error("Face not detected. Returning None") 

286 return None 

287 

288 dets = dets[0] if self.one_face_only else dets 

289 

290 return dets 

291 

292 

293class FaceX106Landmarks(Base): 

294 """ 

295 Landmark detector taken from https://github.com/JDAI-CV/FaceX-Zoo 

296 

297 This one we are using the 106 larnmark detector that was taken from 

298 https://github.com/Hsintao/pfld_106_face_landmarks/blob/master/models/mobilev3_pfld.py 

299 

300 .. warning: 

301 Here we are assuming that the faces is already detected and cropped 

302 

303 

304 Parameters 

305 ---------- 

306 

307 use_mtcnn_detector: bool 

308 If set uses the MTCNN face detector as a base for the landmark extractor. 

309 If not, it uses the standard face detector of FaceXZoo. 

310 

311 

312 """ 

313 

314 def __init__(self, device=None, use_mtcnn_detector=True, **kwargs): 

315 self.device = torch.device("cpu") if device is None else device 

316 

317 filename = download_faceX_model() 

318 faceX_path = add_faceX_path(filename) 

319 self.use_mtcnn_detector = use_mtcnn_detector 

320 

321 model_filename = os.path.join( 

322 faceX_path, 

323 "models", 

324 "face_alignment", 

325 "face_alignment_1.0", 

326 "face_landmark_pfld.pkl", 

327 ) 

328 

329 self.model = torch.load(model_filename, map_location=self.device) 

330 

331 # Loading the face detector 

332 self.face_detector = MTCNN() if use_mtcnn_detector else FaceXDetector() 

333 

334 self.transforms = transforms.Compose([transforms.ToTensor()]) 

335 

336 # Face alignment threshold 

337 # from: https://github.com/JDAI-CV/FaceX-Zoo/blob/db0b087e4f4d28152e172d6c8d3767a8870733b4/face_sdk/models/face_alignment/face_alignment_1.0/model_meta.json 

338 self.cfg = { 

339 "model_path": "models", 

340 "model_category": "face_alignment", 

341 "model_name": "face_alignment_1.0", 

342 "model_type": "pfld face landmark nets", 

343 "model_info": "some model info", 

344 "model_file_path": "models/face_alignment/face_alignment_1.0/face_landmark_pfld.pkl", 

345 "release_date": "20201023", 

346 "input_height": 112, 

347 "input_width": 112, 

348 "img_size": 112, 

349 } 

350 

351 self.img_size = self.cfg["img_size"] 

352 

353 super(FaceX106Landmarks, self).__init__(**kwargs) 

354 

355 # self.detector = MTCNN(min_size=min_size, factor=factor, thresholds=thresholds) 

356 

357 def annotate(self, image, **kwargs): 

358 """Annotates an image using mtcnn 

359 

360 Parameters 

361 ---------- 

362 image : numpy.array 

363 An RGB image in Bob format. 

364 **kwargs 

365 Ignored. 

366 

367 Returns 

368 ------- 

369 dict 

370 Annotations contain: (topleft, bottomright, leye, reye, nose, 

371 mouthleft, mouthright, quality). 

372 """ 

373 

374 # Detect the face 

375 if self.use_mtcnn_detector: 

376 annotations = self.face_detector.annotate(image) 

377 if annotations is None: 

378 return None 

379 

380 dets = [ 

381 annotations["topleft"][1], 

382 annotations["topleft"][0], 

383 annotations["bottomright"][1], 

384 annotations["bottomright"][0], 

385 ] 

386 else: 

387 dets = self.face_detector.annotate(image.copy()) 

388 

389 if dets is None: 

390 return None 

391 

392 # First thing, we need to convert the bob CxHxW 

393 # to the openCV HxWxC and BGR 

394 image = bob_to_opencvbgr(image) 

395 try: 

396 image_pre = self._preprocess(image, dets) 

397 except Exception as e: 

398 raise e 

399 self.model = self.model.to(self.device) 

400 image_pre = image_pre.unsqueeze(0) 

401 with torch.no_grad(): 

402 image_pre = image_pre.to(self.device) 

403 _, landmarks_normal = self.model(image_pre) 

404 landmarks = self._postprocess(landmarks_normal) 

405 

406 return np.array(landmarks) 

407 

408 # Adapted from https://github.com/Hsintao/pfld_106_face_landmarks/blob/master/data/prepare.py 

409 def _preprocess(self, image, det): 

410 import cv2 

411 

412 """Preprocess the input image, cutting the input image through the face detection information. 

413 Using the face detection result(dets) to get the face position in the input image. 

414 After determining the center of face position and the box size of face, crop the image 

415 and resize it into preset size. 

416 

417 Returns: 

418 A torch tensor, the image after preprecess, shape: (3, 112, 112). 

419 """ 

420 if not isinstance(image, np.ndarray): 

421 logger.error("The input should be the ndarray read by cv2!") 

422 

423 img = image.copy() 

424 self.image_org = image.copy() 

425 img = np.float32(img) 

426 

427 xy = np.array([det[0], det[1]]) 

428 zz = np.array([det[2], det[3]]) 

429 wh = zz - xy + 1 

430 center = (xy + wh / 2).astype(np.int32) 

431 boxsize = int(np.max(wh) * 1.2) 

432 xy = center - boxsize // 2 

433 self.xy = xy 

434 self.boxsize = boxsize 

435 x1, y1 = xy 

436 x2, y2 = xy + boxsize 

437 height, width, _ = img.shape 

438 dx = max(0, -x1) 

439 dy = max(0, -y1) 

440 x1 = max(0, x1) 

441 y1 = max(0, y1) 

442 edx = max(0, x2 - width) 

443 edy = max(0, y2 - height) 

444 x2 = min(width, x2) 

445 y2 = min(height, y2) 

446 imageT = image[y1:y2, x1:x2] 

447 if dx > 0 or dy > 0 or edx > 0 or edy > 0: 

448 imageT = cv2.copyMakeBorder( 

449 imageT, dy, edy, dx, edx, cv2.BORDER_CONSTANT, 0 

450 ) 

451 

452 imageT = cv2.resize(imageT, (self.img_size, self.img_size)) 

453 t = transforms.Compose([transforms.ToTensor()]) 

454 img_after = t(imageT) 

455 return img_after 

456 

457 def _postprocess(self, landmarks_normal): 

458 """Process the predicted landmarks into the form of the original image. 

459 

460 Returns: 

461 A numpy array, the landmarks based on the shape of original image, shape: (106, 2), 

462 """ 

463 landmarks_normal = landmarks_normal.cpu().numpy() 

464 landmarks_normal = landmarks_normal.reshape( 

465 landmarks_normal.shape[0], -1, 2 

466 ) 

467 landmarks = landmarks_normal[0] * [self.boxsize, self.boxsize] + self.xy 

468 return landmarks