Coverage for src/bob/bio/face/annotator/faceX_106landmarks.py: 94%
205 statements
« prev ^ index » next coverage.py v7.6.0, created at 2024-07-13 00:04 +0200
« prev ^ index » next coverage.py v7.6.0, created at 2024-07-13 00:04 +0200
1import logging
2import os
3import sys
5from itertools import product as product
6from math import ceil
8import numpy as np
9import torch
11from torchvision import transforms
13from bob.bio.base.database.utils import download_file, md5_hash
14from bob.io.image import bob_to_opencvbgr
16from . import Base
17from .mtcnn import MTCNN
19logger = logging.getLogger(__name__)
22# Adapted from https://github.com/biubug6/Pytorch_Retinafacey
23class PriorBox(object):
24 """Compute the suitable parameters of anchors for later decode operation
26 Attributes:
27 cfg(dict): testing config.
28 image_size(tuple): the input image size.
29 """
31 def __init__(self, cfg, image_size=None):
32 """
33 Init priorBox settings related to the generation of anchors.
34 """
35 super(PriorBox, self).__init__()
36 self.min_sizes = cfg["min_sizes"]
37 self.steps = cfg["steps"]
38 self.image_size = image_size
39 self.feature_maps = [
40 [ceil(self.image_size[0] / step), ceil(self.image_size[1] / step)]
41 for step in self.steps
42 ]
43 self.name = "s"
45 def forward(self):
46 anchors = []
47 for k, f in enumerate(self.feature_maps):
48 min_sizes = self.min_sizes[k]
49 for i, j in product(range(f[0]), range(f[1])):
50 for min_size in min_sizes:
51 s_kx = min_size / self.image_size[1]
52 s_ky = min_size / self.image_size[0]
53 dense_cx = [
54 x * self.steps[k] / self.image_size[1]
55 for x in [j + 0.5]
56 ]
57 dense_cy = [
58 y * self.steps[k] / self.image_size[0]
59 for y in [i + 0.5]
60 ]
61 for cy, cx in product(dense_cy, dense_cx):
62 anchors += [cx, cy, s_kx, s_ky]
63 # back to torch land
64 output = torch.Tensor(anchors).view(-1, 4)
65 return output
68def download_faceX_model():
69 urls = [
70 "https://www.idiap.ch/software/bob/data/bob/bob.bio.face/master/pytorch/faceX_models.tar.gz",
71 "http://www.idiap.ch/software/bob/data/bob/bob.bio.face/master/pytorch/faceX_models.tar.gz",
72 ]
74 filename = download_file(
75 urls=urls,
76 destination_sub_directory="models/pytorch/",
77 destination_filename="faceX_models.tar.gz",
78 checksum="eb7ec871f434d2f44e5408627d656297",
79 checksum_fct=md5_hash,
80 extract=True,
81 )
83 return filename
86def add_faceX_path(filename):
87 str_path = (filename / "faceX_models").as_posix()
89 logger.warning(f"Adding the following path to PYTHON_PATH: {str_path}")
90 sys.path.insert(0, str_path)
91 return str_path
94class FaceXDetector(Base):
95 """
96 Face detector taken from https://github.com/JDAI-CV/FaceX-Zoo
98 This one we are using the 106 landmark detector that was taken from
99 https://github.com/Hsintao/pfld_106_face_landmarks/blob/master/models/mobilev3_pfld.py
101 .. warning:
102 Here we are assuming that the faces is already detected and cropped
104 """
106 def __init__(self, device=None, one_face_only=True, **kwargs):
107 self.device = torch.device("cpu") if device is None else device
109 filename = download_faceX_model()
110 faceX_path = add_faceX_path(filename)
112 model_filename = os.path.join(
113 faceX_path,
114 "models",
115 "face_detection",
116 "face_detection_1.0",
117 "face_detection_retina.pkl",
118 )
120 # Loading face detector
121 self.model = torch.load(model_filename, map_location=device)
122 self.one_face_only = one_face_only
124 self.transforms = transforms.Compose([transforms.ToTensor()])
126 # Face detection threshold
127 # from: https://github.com/JDAI-CV/FaceX-Zoo/blob/db0b087e4f4d28152e172d6c8d3767a8870733b4/face_sdk/models/face_detection/face_detection_1.0/model_meta.json
128 self.cfg = {
129 "model_type": "retina face detect nets",
130 "model_info": "some model info",
131 "model_file": "face_detection_retina.pkl",
132 "release_date": "20201019",
133 "input_height": 120,
134 "input_width": 120,
135 "min_sizes": [[16, 32], [64, 128], [256, 512]],
136 "steps": [8, 16, 32],
137 "variance": [0.1, 0.2],
138 "in_channel": 256,
139 "out_channel": 256,
140 "confidence_threshold": 0.7,
141 }
143 super(FaceXDetector, self).__init__(**kwargs)
145 # Adapted from https://github.com/chainer/chainercv
146 def decode(self, loc, priors, variances):
147 """
149 Decode locations from predictions using priors to undo
150 the encoding we did for offset regression at train time.
152 Parameters
153 ----------
154 loc (tensor): location predictions for loc layers,
155 Shape: [num_priors,4]
156 priors (tensor): Prior boxes in center-offset form.
157 Shape: [num_priors,4].
158 variances: (list[float]) Variances of priorboxes
160 Returns
161 -------
162 decoded bounding box predictions
164 """
165 boxes = torch.cat((priors[:, :2], priors[:, 2:]), 1)
166 boxes[:, :2] = priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:]
167 boxes[:, 2:] = priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])
168 boxes[:, :2] -= boxes[:, 2:] / 2
169 boxes[:, 2:] += boxes[:, :2]
170 return boxes
172 def _preprocess(self, image):
173 """Preprocess the image, such as standardization and other operations.
175 Returns:
176 A numpy array list, the shape is channel * h * w.
177 A tensor, the shape is 4.
178 """
179 if not isinstance(image, np.ndarray):
180 logger.error("The input should be the ndarray read by cv2!")
182 img = np.float32(image)
183 scale = torch.Tensor(
184 [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]
185 )
186 img -= (104, 117, 123)
187 img = img.transpose(2, 0, 1)
188 return img, scale
190 def _postprocess(self, loc, conf, scale, input_height, input_width):
191 """Postprecess the prediction result.
192 Decode detection result, set the confidence threshold and do the NMS
193 to keep the appropriate detection box.
195 Returns:
196 A numpy array, the shape is N * (x, y, w, h, confidence),
197 N is the number of detection box.
198 """
199 priorbox = PriorBox(self.cfg, image_size=(input_height, input_width))
200 priors = priorbox.forward()
201 priors = priors.to(self.device)
202 prior_data = priors.data
203 boxes = self.decode(
204 loc.data.squeeze(0), prior_data, self.cfg["variance"]
205 )
206 boxes = boxes * scale
207 boxes = boxes.cpu().numpy()
208 scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
210 # ignore low scores
211 inds = np.where(scores > self.cfg["confidence_threshold"])[0]
212 boxes = boxes[inds]
213 scores = scores[inds]
215 # keep top-K before NMS
216 order = scores.argsort()[::-1]
217 boxes = boxes[order]
218 scores = scores[order]
220 # do NMS
221 nms_threshold = 0.2
222 dets = np.hstack((boxes, scores[:, np.newaxis])).astype(
223 np.float32, copy=False
224 )
225 keep = self.py_cpu_nms(dets, nms_threshold)
226 dets = dets[keep, :]
227 return dets
229 # Adapted from https://github.com/biubug6/Pytorch_Retinaface
230 def py_cpu_nms(self, dets, thresh):
231 """Python version NMS (Non maximum suppression).
233 Returns:
234 The kept index after NMS.
235 """
236 x1 = dets[:, 0]
237 y1 = dets[:, 1]
238 x2 = dets[:, 2]
239 y2 = dets[:, 3]
240 scores = dets[:, 4]
241 areas = (x2 - x1 + 1) * (y2 - y1 + 1)
242 order = scores.argsort()[::-1]
243 keep = []
244 while order.size > 0:
245 i = order[0]
246 keep.append(i)
247 xx1 = np.maximum(x1[i], x1[order[1:]])
248 yy1 = np.maximum(y1[i], y1[order[1:]])
249 xx2 = np.minimum(x2[i], x2[order[1:]])
250 yy2 = np.minimum(y2[i], y2[order[1:]])
251 w = np.maximum(0.0, xx2 - xx1 + 1)
252 h = np.maximum(0.0, yy2 - yy1 + 1)
253 inter = w * h
254 ovr = inter / (areas[i] + areas[order[1:]] - inter)
255 inds = np.where(ovr <= thresh)[0]
256 order = order[inds + 1]
257 return keep
259 def annotate(self, image, **kwargs):
260 """Get the inference of the image and process the inference result.
262 Returns:
263 A numpy array, the shape is N * (x, y, w, h, confidence),
264 N is the number of detection box.
265 """
267 # First thing, we need to convert the bob CxHxW
268 # to the openCV HxWxC and BGR
269 image = bob_to_opencvbgr(image)
271 input_height, input_width, _ = image.shape
272 try:
273 image, scale = self._preprocess(image)
274 except Exception as e:
275 raise e
276 self.model = self.model.to(self.device)
277 image = torch.from_numpy(image).unsqueeze(0)
278 with torch.no_grad():
279 image = image.to(self.device)
280 scale = scale.to(self.device)
281 loc, conf, landms = self.model(image)
282 dets = self._postprocess(loc, conf, scale, input_height, input_width)
284 if len(dets) == 0:
285 logger.error("Face not detected. Returning None")
286 return None
288 dets = dets[0] if self.one_face_only else dets
290 return dets
293class FaceX106Landmarks(Base):
294 """
295 Landmark detector taken from https://github.com/JDAI-CV/FaceX-Zoo
297 This one we are using the 106 larnmark detector that was taken from
298 https://github.com/Hsintao/pfld_106_face_landmarks/blob/master/models/mobilev3_pfld.py
300 .. warning:
301 Here we are assuming that the faces is already detected and cropped
304 Parameters
305 ----------
307 use_mtcnn_detector: bool
308 If set uses the MTCNN face detector as a base for the landmark extractor.
309 If not, it uses the standard face detector of FaceXZoo.
312 """
314 def __init__(self, device=None, use_mtcnn_detector=True, **kwargs):
315 self.device = torch.device("cpu") if device is None else device
317 filename = download_faceX_model()
318 faceX_path = add_faceX_path(filename)
319 self.use_mtcnn_detector = use_mtcnn_detector
321 model_filename = os.path.join(
322 faceX_path,
323 "models",
324 "face_alignment",
325 "face_alignment_1.0",
326 "face_landmark_pfld.pkl",
327 )
329 self.model = torch.load(model_filename, map_location=self.device)
331 # Loading the face detector
332 self.face_detector = MTCNN() if use_mtcnn_detector else FaceXDetector()
334 self.transforms = transforms.Compose([transforms.ToTensor()])
336 # Face alignment threshold
337 # from: https://github.com/JDAI-CV/FaceX-Zoo/blob/db0b087e4f4d28152e172d6c8d3767a8870733b4/face_sdk/models/face_alignment/face_alignment_1.0/model_meta.json
338 self.cfg = {
339 "model_path": "models",
340 "model_category": "face_alignment",
341 "model_name": "face_alignment_1.0",
342 "model_type": "pfld face landmark nets",
343 "model_info": "some model info",
344 "model_file_path": "models/face_alignment/face_alignment_1.0/face_landmark_pfld.pkl",
345 "release_date": "20201023",
346 "input_height": 112,
347 "input_width": 112,
348 "img_size": 112,
349 }
351 self.img_size = self.cfg["img_size"]
353 super(FaceX106Landmarks, self).__init__(**kwargs)
355 # self.detector = MTCNN(min_size=min_size, factor=factor, thresholds=thresholds)
357 def annotate(self, image, **kwargs):
358 """Annotates an image using mtcnn
360 Parameters
361 ----------
362 image : numpy.array
363 An RGB image in Bob format.
364 **kwargs
365 Ignored.
367 Returns
368 -------
369 dict
370 Annotations contain: (topleft, bottomright, leye, reye, nose,
371 mouthleft, mouthright, quality).
372 """
374 # Detect the face
375 if self.use_mtcnn_detector:
376 annotations = self.face_detector.annotate(image)
377 if annotations is None:
378 return None
380 dets = [
381 annotations["topleft"][1],
382 annotations["topleft"][0],
383 annotations["bottomright"][1],
384 annotations["bottomright"][0],
385 ]
386 else:
387 dets = self.face_detector.annotate(image.copy())
389 if dets is None:
390 return None
392 # First thing, we need to convert the bob CxHxW
393 # to the openCV HxWxC and BGR
394 image = bob_to_opencvbgr(image)
395 try:
396 image_pre = self._preprocess(image, dets)
397 except Exception as e:
398 raise e
399 self.model = self.model.to(self.device)
400 image_pre = image_pre.unsqueeze(0)
401 with torch.no_grad():
402 image_pre = image_pre.to(self.device)
403 _, landmarks_normal = self.model(image_pre)
404 landmarks = self._postprocess(landmarks_normal)
406 return np.array(landmarks)
408 # Adapted from https://github.com/Hsintao/pfld_106_face_landmarks/blob/master/data/prepare.py
409 def _preprocess(self, image, det):
410 import cv2
412 """Preprocess the input image, cutting the input image through the face detection information.
413 Using the face detection result(dets) to get the face position in the input image.
414 After determining the center of face position and the box size of face, crop the image
415 and resize it into preset size.
417 Returns:
418 A torch tensor, the image after preprecess, shape: (3, 112, 112).
419 """
420 if not isinstance(image, np.ndarray):
421 logger.error("The input should be the ndarray read by cv2!")
423 img = image.copy()
424 self.image_org = image.copy()
425 img = np.float32(img)
427 xy = np.array([det[0], det[1]])
428 zz = np.array([det[2], det[3]])
429 wh = zz - xy + 1
430 center = (xy + wh / 2).astype(np.int32)
431 boxsize = int(np.max(wh) * 1.2)
432 xy = center - boxsize // 2
433 self.xy = xy
434 self.boxsize = boxsize
435 x1, y1 = xy
436 x2, y2 = xy + boxsize
437 height, width, _ = img.shape
438 dx = max(0, -x1)
439 dy = max(0, -y1)
440 x1 = max(0, x1)
441 y1 = max(0, y1)
442 edx = max(0, x2 - width)
443 edy = max(0, y2 - height)
444 x2 = min(width, x2)
445 y2 = min(height, y2)
446 imageT = image[y1:y2, x1:x2]
447 if dx > 0 or dy > 0 or edx > 0 or edy > 0:
448 imageT = cv2.copyMakeBorder(
449 imageT, dy, edy, dx, edx, cv2.BORDER_CONSTANT, 0
450 )
452 imageT = cv2.resize(imageT, (self.img_size, self.img_size))
453 t = transforms.Compose([transforms.ToTensor()])
454 img_after = t(imageT)
455 return img_after
457 def _postprocess(self, landmarks_normal):
458 """Process the predicted landmarks into the form of the original image.
460 Returns:
461 A numpy array, the landmarks based on the shape of original image, shape: (106, 2),
462 """
463 landmarks_normal = landmarks_normal.cpu().numpy()
464 landmarks_normal = landmarks_normal.reshape(
465 landmarks_normal.shape[0], -1, 2
466 )
467 landmarks = landmarks_normal[0] * [self.boxsize, self.boxsize] + self.xy
468 return landmarks