Coverage for src/bob/bio/face/pytorch/facexzoo/ 65%

369 statements  

« prev     ^ index     » next v7.6.0, created at 2024-07-13 00:04 +0200


2@author: Jun Wang 

3@date: 20201019 




7# based on: 

8# Github repo: 


10import collections 

11import math 

12import re 


14from functools import partial 


16import torch 


18from torch import nn 

19from torch.nn import ( 

20 BatchNorm1d, 

21 BatchNorm2d, 

22 Dropout, 

23 Linear, 

24 Module, 

25 Sequential, 


27from torch.nn import functional as F 

28from torch.utils import model_zoo 



31# Help functions for model architecture 



34# GlobalParams and BlockArgs: Two namedtuples 

35# Swish and MemoryEfficientSwish: Two implementations of the method 

36# round_filters and round_repeats: 

37# Functions to calculate params for scaling model width and depth ! ! ! 

38# get_width_and_height_from_size and calculate_output_image_size 

39# drop_connect: A structural design 

40# get_same_padding_conv2d: 

41# Conv2dDynamicSamePadding 

42# Conv2dStaticSamePadding 

43# get_same_padding_maxPool2d: 

44# MaxPool2dDynamicSamePadding 

45# MaxPool2dStaticSamePadding 

46# It's an additional function, not used in EfficientNet, 

47# but can be used in other model (such as EfficientDet). 


49# Parameters for the entire model (stem, all blocks, and head) 

50GlobalParams = collections.namedtuple( 

51 "GlobalParams", 

52 [ 

53 "width_coefficient", 

54 "depth_coefficient", 

55 "image_size", 

56 "dropout_rate", 

57 "num_classes", 

58 "batch_norm_momentum", 

59 "batch_norm_epsilon", 

60 "drop_connect_rate", 

61 "depth_divisor", 

62 "min_depth", 

63 "include_top", 

64 ], 



67# Parameters for an individual model block 

68BlockArgs = collections.namedtuple( 

69 "BlockArgs", 

70 [ 

71 "num_repeat", 

72 "kernel_size", 

73 "stride", 

74 "expand_ratio", 

75 "input_filters", 

76 "output_filters", 

77 "se_ratio", 

78 "id_skip", 

79 ], 



82# Set GlobalParams and BlockArgs's defaults 

83GlobalParams.__new__.__defaults__ = (None,) * len(GlobalParams._fields) 

84BlockArgs.__new__.__defaults__ = (None,) * len(BlockArgs._fields) 



87# An ordinary implementation of Swish function 

88class Swish(nn.Module): 

89 def forward(self, x): 

90 return x * torch.sigmoid(x) 



93# A memory-efficient implementation of Swish function 

94class SwishImplementation(torch.autograd.Function): 

95 @staticmethod 

96 def forward(ctx, i): 

97 result = i * torch.sigmoid(i) 

98 ctx.save_for_backward(i) 

99 return result 


101 @staticmethod 

102 def backward(ctx, grad_output): 

103 i = ctx.saved_tensors[0] 

104 sigmoid_i = torch.sigmoid(i) 

105 return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i))) 



108class MemoryEfficientSwish(nn.Module): 

109 def forward(self, x): 

110 return SwishImplementation.apply(x) 



113def round_filters(filters, global_params): 

114 """Calculate and round number of filters based on width multiplier. 

115 Use width_coefficient, depth_divisor and min_depth of global_params. 


117 Args: 

118 filters (int): Filters number to be calculated. 

119 global_params (namedtuple): Global params of the model. 


121 Returns: 

122 new_filters: New filters number after calculating. 

123 """ 

124 multiplier = global_params.width_coefficient 

125 if not multiplier: 

126 return filters 

127 # TODO: modify the params names. 

128 # maybe the names (width_divisor,min_width) 

129 # are more suitable than (depth_divisor,min_depth). 

130 divisor = global_params.depth_divisor 

131 min_depth = global_params.min_depth 

132 filters *= multiplier 

133 min_depth = ( 

134 min_depth or divisor 

135 ) # pay attention to this line when using min_depth 

136 # follow the formula transferred from official TensorFlow implementation 

137 new_filters = max( 

138 min_depth, int(filters + divisor / 2) // divisor * divisor 

139 ) 

140 if new_filters < 0.9 * filters: # prevent rounding by more than 10% 

141 new_filters += divisor 

142 return int(new_filters) 



145def round_repeats(repeats, global_params): 

146 """Calculate module's repeat number of a block based on depth multiplier. 

147 Use depth_coefficient of global_params. 


149 Args: 

150 repeats (int): num_repeat to be calculated. 

151 global_params (namedtuple): Global params of the model. 


153 Returns: 

154 new repeat: New repeat number after calculating. 

155 """ 

156 multiplier = global_params.depth_coefficient 

157 if not multiplier: 

158 return repeats 

159 # follow the formula transferred from official TensorFlow implementation 

160 return int(math.ceil(multiplier * repeats)) 



163def drop_connect(inputs, p, training): 

164 """Drop connect. 


166 Args: 

167 input (tensor: BCWH): Input of this structure. 

168 p (float: 0.0~1.0): Probability of drop connection. 

169 training (bool): The running mode. 


171 Returns: 

172 output: Output after drop connection. 

173 """ 

174 assert 0 <= p <= 1, "p must be in range of [0,1]" 


176 if not training: 

177 return inputs 


179 batch_size = inputs.shape[0] 

180 keep_prob = 1 - p 


182 # generate binary_tensor mask according to probability (p for 0, 1-p for 1) 

183 random_tensor = keep_prob 

184 random_tensor += torch.rand( 

185 [batch_size, 1, 1, 1], dtype=inputs.dtype, device=inputs.device 

186 ) 

187 binary_tensor = torch.floor(random_tensor) 


189 output = inputs / keep_prob * binary_tensor 

190 return output 



193def get_width_and_height_from_size(x): 

194 """Obtain height and width from x. 


196 Args: 

197 x (int, tuple or list): Data size. 


199 Returns: 

200 size: A tuple or list (H,W). 

201 """ 

202 if isinstance(x, int): 

203 return x, x 

204 if isinstance(x, list) or isinstance(x, tuple): 

205 return x 

206 else: 

207 raise TypeError() 



210def calculate_output_image_size(input_image_size, stride): 

211 """Calculates the output image size when using Conv2dSamePadding with a stride. 

212 Necessary for static padding. Thanks to mannatsingh for pointing this out. 


214 Args: 

215 input_image_size (int, tuple or list): Size of input image. 

216 stride (int, tuple or list): Conv2d operation's stride. 


218 Returns: 

219 output_image_size: A list [H,W]. 

220 """ 

221 if input_image_size is None: 

222 return None 

223 image_height, image_width = get_width_and_height_from_size(input_image_size) 

224 stride = stride if isinstance(stride, int) else stride[0] 

225 image_height = int(math.ceil(image_height / stride)) 

226 image_width = int(math.ceil(image_width / stride)) 

227 return [image_height, image_width] 



230# Note: 

231# The following 'SamePadding' functions make output size equal ceil(input size/stride). 

232# Only when stride equals 1, can the output size be the same as input size. 

233# Don't be confused by their function names ! ! ! 



236def get_same_padding_conv2d(image_size=None): 

237 """Chooses static padding if you have specified an image size, and dynamic padding otherwise. 

238 Static padding is necessary for ONNX exporting of models. 


240 Args: 

241 image_size (int or tuple): Size of the image. 


243 Returns: 

244 Conv2dDynamicSamePadding or Conv2dStaticSamePadding. 

245 """ 

246 if image_size is None: 

247 return Conv2dDynamicSamePadding 

248 else: 

249 return partial(Conv2dStaticSamePadding, image_size=image_size) 



252class Conv2dDynamicSamePadding(nn.Conv2d): 

253 """2D Convolutions like TensorFlow, for a dynamic image size. 

254 The padding is operated in forward function by calculating dynamically. 

255 """ 


257 # Tips for 'SAME' mode padding. 

258 # Given the following: 

259 # i: width or height 

260 # s: stride 

261 # k: kernel size 

262 # d: dilation 

263 # p: padding 

264 # Output after Conv2d: 

265 # o = floor((i+p-((k-1)*d+1))/s+1) 

266 # If o equals i, i = floor((i+p-((k-1)*d+1))/s+1), 

267 # => p = (i-1)*s+((k-1)*d+1)-i 


269 def __init__( 

270 self, 

271 in_channels, 

272 out_channels, 

273 kernel_size, 

274 stride=1, 

275 dilation=1, 

276 groups=1, 

277 bias=True, 

278 ): 

279 super().__init__( 

280 in_channels, 

281 out_channels, 

282 kernel_size, 

283 stride, 

284 0, 

285 dilation, 

286 groups, 

287 bias, 

288 ) 

289 self.stride = ( 

290 self.stride if len(self.stride) == 2 else [self.stride[0]] * 2 

291 ) 


293 def forward(self, x): 

294 ih, iw = x.size()[-2:] 

295 kh, kw = self.weight.size()[-2:] 

296 sh, sw = self.stride 

297 oh, ow = math.ceil(ih / sh), math.ceil( 

298 iw / sw 

299 ) # change the output size according to stride ! ! ! 

300 pad_h = max( 

301 (oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0 

302 ) 

303 pad_w = max( 

304 (ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0 

305 ) 

306 if pad_h > 0 or pad_w > 0: 

307 x = F.pad( 

308 x, 

309 [ 

310 pad_w // 2, 

311 pad_w - pad_w // 2, 

312 pad_h // 2, 

313 pad_h - pad_h // 2, 

314 ], 

315 ) 

316 return F.conv2d( 

317 x, 

318 self.weight, 

319 self.bias, 

320 self.stride, 

321 self.padding, 

322 self.dilation, 

323 self.groups, 

324 ) 



327class Conv2dStaticSamePadding(nn.Conv2d): 

328 """2D Convolutions like TensorFlow's 'SAME' mode, with the given input image size. 

329 The padding mudule is calculated in construction function, then used in forward. 

330 """ 


332 # With the same calculation as Conv2dDynamicSamePadding 


334 def __init__( 

335 self, 

336 in_channels, 

337 out_channels, 

338 kernel_size, 

339 stride=1, 

340 image_size=None, 

341 **kwargs, 

342 ): 

343 super().__init__( 

344 in_channels, out_channels, kernel_size, stride, **kwargs 

345 ) 

346 self.stride = ( 

347 self.stride if len(self.stride) == 2 else [self.stride[0]] * 2 

348 ) 


350 # Calculate padding based on image size and save it 

351 assert image_size is not None 

352 ih, iw = ( 

353 (image_size, image_size) 

354 if isinstance(image_size, int) 

355 else image_size 

356 ) 

357 kh, kw = self.weight.size()[-2:] 

358 sh, sw = self.stride 

359 oh, ow = math.ceil(ih / sh), math.ceil(iw / sw) 

360 pad_h = max( 

361 (oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0 

362 ) 

363 pad_w = max( 

364 (ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0 

365 ) 

366 if pad_h > 0 or pad_w > 0: 

367 self.static_padding = nn.ZeroPad2d( 

368 (pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2) 

369 ) 

370 else: 

371 self.static_padding = nn.Identity() 


373 def forward(self, x): 

374 x = self.static_padding(x) 

375 x = F.conv2d( 

376 x, 

377 self.weight, 

378 self.bias, 

379 self.stride, 

380 self.padding, 

381 self.dilation, 

382 self.groups, 

383 ) 

384 return x 



387def get_same_padding_maxPool2d(image_size=None): 

388 """Chooses static padding if you have specified an image size, and dynamic padding otherwise. 

389 Static padding is necessary for ONNX exporting of models. 


391 Args: 

392 image_size (int or tuple): Size of the image. 


394 Returns: 

395 MaxPool2dDynamicSamePadding or MaxPool2dStaticSamePadding. 

396 """ 

397 if image_size is None: 

398 return MaxPool2dDynamicSamePadding 

399 else: 

400 return partial(MaxPool2dStaticSamePadding, image_size=image_size) 



403class MaxPool2dDynamicSamePadding(nn.MaxPool2d): 

404 """2D MaxPooling like TensorFlow's 'SAME' mode, with a dynamic image size. 

405 The padding is operated in forward function by calculating dynamically. 

406 """ 


408 def __init__( 

409 self, 

410 kernel_size, 

411 stride, 

412 padding=0, 

413 dilation=1, 

414 return_indices=False, 

415 ceil_mode=False, 

416 ): 

417 super().__init__( 

418 kernel_size, stride, padding, dilation, return_indices, ceil_mode 

419 ) 

420 self.stride = ( 

421 [self.stride] * 2 if isinstance(self.stride, int) else self.stride 

422 ) 

423 self.kernel_size = ( 

424 [self.kernel_size] * 2 

425 if isinstance(self.kernel_size, int) 

426 else self.kernel_size 

427 ) 

428 self.dilation = ( 

429 [self.dilation] * 2 

430 if isinstance(self.dilation, int) 

431 else self.dilation 

432 ) 


434 def forward(self, x): 

435 ih, iw = x.size()[-2:] 

436 kh, kw = self.kernel_size 

437 sh, sw = self.stride 

438 oh, ow = math.ceil(ih / sh), math.ceil(iw / sw) 

439 pad_h = max( 

440 (oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0 

441 ) 

442 pad_w = max( 

443 (ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0 

444 ) 

445 if pad_h > 0 or pad_w > 0: 

446 x = F.pad( 

447 x, 

448 [ 

449 pad_w // 2, 

450 pad_w - pad_w // 2, 

451 pad_h // 2, 

452 pad_h - pad_h // 2, 

453 ], 

454 ) 

455 return F.max_pool2d( 

456 x, 

457 self.kernel_size, 

458 self.stride, 

459 self.padding, 

460 self.dilation, 

461 self.ceil_mode, 

462 self.return_indices, 

463 ) 



466class MaxPool2dStaticSamePadding(nn.MaxPool2d): 

467 """2D MaxPooling like TensorFlow's 'SAME' mode, with the given input image size. 

468 The padding mudule is calculated in construction function, then used in forward. 

469 """ 


471 def __init__(self, kernel_size, stride, image_size=None, **kwargs): 

472 super().__init__(kernel_size, stride, **kwargs) 

473 self.stride = ( 

474 [self.stride] * 2 if isinstance(self.stride, int) else self.stride 

475 ) 

476 self.kernel_size = ( 

477 [self.kernel_size] * 2 

478 if isinstance(self.kernel_size, int) 

479 else self.kernel_size 

480 ) 

481 self.dilation = ( 

482 [self.dilation] * 2 

483 if isinstance(self.dilation, int) 

484 else self.dilation 

485 ) 


487 # Calculate padding based on image size and save it 

488 assert image_size is not None 

489 ih, iw = ( 

490 (image_size, image_size) 

491 if isinstance(image_size, int) 

492 else image_size 

493 ) 

494 kh, kw = self.kernel_size 

495 sh, sw = self.stride 

496 oh, ow = math.ceil(ih / sh), math.ceil(iw / sw) 

497 pad_h = max( 

498 (oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0 

499 ) 

500 pad_w = max( 

501 (ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0 

502 ) 

503 if pad_h > 0 or pad_w > 0: 

504 self.static_padding = nn.ZeroPad2d( 

505 (pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2) 

506 ) 

507 else: 

508 self.static_padding = nn.Identity() 


510 def forward(self, x): 

511 x = self.static_padding(x) 

512 x = F.max_pool2d( 

513 x, 

514 self.kernel_size, 

515 self.stride, 

516 self.padding, 

517 self.dilation, 

518 self.ceil_mode, 

519 self.return_indices, 

520 ) 

521 return x 




525# Helper functions for loading model params 



528# BlockDecoder: A Class for encoding and decoding BlockArgs 

529# efficientnet_params: A function to query compound coefficient 

530# get_model_params and efficientnet: 

531# Functions to get BlockArgs and GlobalParams for efficientnet 

532# url_map and url_map_advprop: Dicts of url_map for pretrained weights 

533# load_pretrained_weights: A function to load pretrained weights 



536class BlockDecoder(object): 

537 """Block Decoder for readability, 

538 straight from the official TensorFlow repository. 

539 """ 


541 @staticmethod 

542 def _decode_block_string(block_string): 

543 """Get a block through a string notation of arguments. 


545 Args: 

546 block_string (str): A string notation of arguments. 

547 Examples: 'r1_k3_s11_e1_i32_o16_se0.25_noskip'. 


549 Returns: 

550 BlockArgs: The namedtuple defined at the top of this file. 

551 """ 

552 assert isinstance(block_string, str) 


554 ops = block_string.split("_") 

555 options = {} 

556 for op in ops: 

557 splits = re.split(r"(\d.*)", op) 

558 if len(splits) >= 2: 

559 key, value = splits[:2] 

560 options[key] = value 


562 # Check stride 

563 assert ("s" in options and len(options["s"]) == 1) or ( 

564 len(options["s"]) == 2 and options["s"][0] == options["s"][1] 

565 ) 


567 return BlockArgs( 

568 num_repeat=int(options["r"]), 

569 kernel_size=int(options["k"]), 

570 stride=[int(options["s"][0])], 

571 expand_ratio=int(options["e"]), 

572 input_filters=int(options["i"]), 

573 output_filters=int(options["o"]), 

574 se_ratio=float(options["se"]) if "se" in options else None, 

575 id_skip=("noskip" not in block_string), 

576 ) 


578 @staticmethod 

579 def _encode_block_string(block): 

580 """Encode a block to a string. 


582 Args: 

583 block (namedtuple): A BlockArgs type argument. 


585 Returns: 

586 block_string: A String form of BlockArgs. 

587 """ 

588 args = [ 

589 "r%d" % block.num_repeat, 

590 "k%d" % block.kernel_size, 

591 "s%d%d" % (block.strides[0], block.strides[1]), 

592 "e%s" % block.expand_ratio, 

593 "i%d" % block.input_filters, 

594 "o%d" % block.output_filters, 

595 ] 

596 if 0 < block.se_ratio <= 1: 

597 args.append("se%s" % block.se_ratio) 

598 if block.id_skip is False: 

599 args.append("noskip") 

600 return "_".join(args) 


602 @staticmethod 

603 def decode(string_list): 

604 """Decode a list of string notations to specify blocks inside the network. 


606 Args: 

607 string_list (list[str]): A list of strings, each string is a notation of block. 


609 Returns: 

610 blocks_args: A list of BlockArgs namedtuples of block args. 

611 """ 

612 assert isinstance(string_list, list) 

613 blocks_args = [] 

614 for block_string in string_list: 

615 blocks_args.append(BlockDecoder._decode_block_string(block_string)) 

616 return blocks_args 


618 @staticmethod 

619 def encode(blocks_args): 

620 """Encode a list of BlockArgs to a list of strings. 


622 Args: 

623 blocks_args (list[namedtuples]): A list of BlockArgs namedtuples of block args. 


625 Returns: 

626 block_strings: A list of strings, each string is a notation of block. 

627 """ 

628 block_strings = [] 

629 for block in blocks_args: 

630 block_strings.append(BlockDecoder._encode_block_string(block)) 

631 return block_strings 



634def efficientnet_params(model_name): 

635 """Map EfficientNet model name to parameter coefficients. 


637 Args: 

638 model_name (str): Model name to be queried. 


640 Returns: 

641 params_dict[model_name]: A (width,depth,res,dropout) tuple. 

642 """ 

643 """ 

644 params_dict = { 

645 # Coefficients: width,depth,res,dropout 

646 'efficientnet-b0': (1.0, 1.0, 224, 0.2), 

647 'efficientnet-b1': (1.0, 1.1, 240, 0.2), 

648 'efficientnet-b2': (1.1, 1.2, 260, 0.3), 

649 'efficientnet-b3': (1.2, 1.4, 300, 0.3), 

650 'efficientnet-b4': (1.4, 1.8, 380, 0.4), 

651 'efficientnet-b5': (1.6, 2.2, 456, 0.4), 

652 'efficientnet-b6': (1.8, 2.6, 528, 0.5), 

653 'efficientnet-b7': (2.0, 3.1, 600, 0.5), 

654 'efficientnet-b8': (2.2, 3.6, 672, 0.5), 

655 'efficientnet-l2': (4.3, 5.3, 800, 0.5), 

656 } 

657 """ 

658 params_dict = { 

659 # Coefficients: width,depth,res,dropout 

660 "efficientnet-b0": (1.0, 1.0, 112, 0.2), 

661 "efficientnet-b1": (1.0, 1.1, 112, 0.2), 

662 "efficientnet-b2": (1.1, 1.2, 112, 0.3), 

663 "efficientnet-b3": (1.2, 1.4, 112, 0.3), 

664 "efficientnet-b4": (1.4, 1.8, 112, 0.4), 

665 "efficientnet-b5": (1.6, 2.2, 112, 0.4), 

666 "efficientnet-b6": (1.8, 2.6, 112, 0.5), 

667 "efficientnet-b7": (2.0, 3.1, 112, 0.5), 

668 "efficientnet-b8": (2.2, 3.6, 112, 0.5), 

669 "efficientnet-l2": (4.3, 5.3, 112, 0.5), 

670 } 

671 return params_dict[model_name] 



674def efficientnet( 

675 width_coefficient=None, 

676 depth_coefficient=None, 

677 image_size=None, 

678 dropout_rate=0.2, 

679 drop_connect_rate=0.2, 

680 num_classes=1000, 

681 include_top=True, 


683 """Create BlockArgs and GlobalParams for efficientnet model. 


685 Args: 

686 width_coefficient (float) 

687 depth_coefficient (float) 

688 image_size (int) 

689 dropout_rate (float) 

690 drop_connect_rate (float) 

691 num_classes (int) 


693 Meaning as the name suggests. 


695 Returns: 

696 blocks_args, global_params. 

697 """ 


699 # Blocks args for the whole model(efficientnet-b0 by default) 

700 # It will be modified in the construction of EfficientNet Class according to model 

701 blocks_args = [ 

702 "r1_k3_s11_e1_i32_o16_se0.25", 

703 "r2_k3_s22_e6_i16_o24_se0.25", 

704 "r2_k5_s22_e6_i24_o40_se0.25", 

705 "r3_k3_s22_e6_i40_o80_se0.25", 

706 "r3_k5_s11_e6_i80_o112_se0.25", 

707 "r4_k5_s22_e6_i112_o192_se0.25", 

708 "r1_k3_s11_e6_i192_o320_se0.25", 

709 ] 

710 blocks_args = BlockDecoder.decode(blocks_args) 


712 global_params = GlobalParams( 

713 width_coefficient=width_coefficient, 

714 depth_coefficient=depth_coefficient, 

715 image_size=image_size, 

716 dropout_rate=dropout_rate, 

717 num_classes=num_classes, 

718 batch_norm_momentum=0.99, 

719 batch_norm_epsilon=1e-3, 

720 drop_connect_rate=drop_connect_rate, 

721 depth_divisor=8, 

722 min_depth=None, 

723 include_top=include_top, 

724 ) 


726 return blocks_args, global_params 



729def get_model_params(model_name, override_params): 

730 """Get the block args and global params for a given model name. 


732 Args: 

733 model_name (str): Model's name. 

734 override_params (dict): A dict to modify global_params. 


736 Returns: 

737 blocks_args, global_params 

738 """ 

739 if model_name.startswith("efficientnet"): 

740 w, d, s, p = efficientnet_params(model_name) 

741 # note: all models have drop connect rate = 0.2 

742 blocks_args, global_params = efficientnet( 

743 width_coefficient=w, 

744 depth_coefficient=d, 

745 dropout_rate=p, 

746 image_size=s, 

747 ) 

748 else: 

749 raise NotImplementedError( 

750 "model name is not pre-defined: {}".format(model_name) 

751 ) 

752 if override_params: 

753 # ValueError will be raised here if override_params has fields not included in global_params. 

754 global_params = global_params._replace(**override_params) 

755 return blocks_args, global_params 



758# train with Standard methods 

759# check more details in paper(EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks) 

760url_map = { 

761 "efficientnet-b0": "", 

762 "efficientnet-b1": "", 

763 "efficientnet-b2": "", 

764 "efficientnet-b3": "", 

765 "efficientnet-b4": "", 

766 "efficientnet-b5": "", 

767 "efficientnet-b6": "", 

768 "efficientnet-b7": "", 



771# train with Adversarial Examples(AdvProp) 

772# check more details in paper(Adversarial Examples Improve Image Recognition) 

773url_map_advprop = { 

774 "efficientnet-b0": "", 

775 "efficientnet-b1": "", 

776 "efficientnet-b2": "", 

777 "efficientnet-b3": "", 

778 "efficientnet-b4": "", 

779 "efficientnet-b5": "", 

780 "efficientnet-b6": "", 

781 "efficientnet-b7": "", 

782 "efficientnet-b8": "", 



785# TODO: add the petrained weights url map of 'efficientnet-l2' 



788def load_pretrained_weights( 

789 model, model_name, weights_path=None, load_fc=True, advprop=False 


791 """Loads pretrained weights from weights path or download using url. 


793 Args: 

794 model (Module): The whole model of efficientnet. 

795 model_name (str): Model name of efficientnet. 

796 weights_path (None or str): 

797 str: path to pretrained weights file on the local disk. 

798 None: use pretrained weights downloaded from the Internet. 

799 load_fc (bool): Whether to load pretrained weights for fc layer at the end of the model. 

800 advprop (bool): Whether to load pretrained weights 

801 trained with advprop (valid when weights_path is None). 

802 """ 

803 if isinstance(weights_path, str): 

804 state_dict = torch.load(weights_path) 

805 else: 

806 # AutoAugment or Advprop (different preprocessing) 

807 url_map_ = url_map_advprop if advprop else url_map 

808 state_dict = model_zoo.load_url(url_map_[model_name]) 


810 if load_fc: 

811 ret = model.load_state_dict(state_dict, strict=False) 

812 assert ( 

813 not ret.missing_keys 

814 ), "Missing keys when loading pretrained weights: {}".format( 

815 ret.missing_keys 

816 ) 

817 else: 

818 state_dict.pop("_fc.weight") 

819 state_dict.pop("_fc.bias") 

820 ret = model.load_state_dict(state_dict, strict=False) 

821 assert set(ret.missing_keys) == set( 

822 ["_fc.weight", "_fc.bias"] 

823 ), "Missing keys when loading pretrained weights: {}".format( 

824 ret.missing_keys 

825 ) 

826 assert ( 

827 not ret.unexpected_keys 

828 ), "Missing keys when loading pretrained weights: {}".format( 

829 ret.unexpected_keys 

830 ) 


832 print("Loaded pretrained weights for {}".format(model_name)) 



835class Flatten(Module): 

836 def forward(self, input): 

837 return input.view(input.size(0), -1) 



840# backbone ################################################################## 



843 "efficientnet-b0", 

844 "efficientnet-b1", 

845 "efficientnet-b2", 

846 "efficientnet-b3", 

847 "efficientnet-b4", 

848 "efficientnet-b5", 

849 "efficientnet-b6", 

850 "efficientnet-b7", 

851 "efficientnet-b8", 

852 # Support the construction of 'efficientnet-l2' without pretrained weights 

853 "efficientnet-l2", 




857class MBConvBlock(nn.Module): 

858 """Mobile Inverted Residual Bottleneck Block. 


860 Args: 

861 block_args (namedtuple): BlockArgs, defined in 

862 global_params (namedtuple): GlobalParam, defined in 

863 image_size (tuple or list): [image_height, image_width]. 


865 References: 

866 [1] (MobileNet v1) 

867 [2] (MobileNet v2) 

868 [3] (MobileNet v3) 

869 """ 


871 def __init__(self, block_args, global_params, image_size=None): 

872 super().__init__() 

873 self._block_args = block_args 

874 self._bn_mom = ( 

875 1 - global_params.batch_norm_momentum 

876 ) # pytorch's difference from tensorflow 

877 self._bn_eps = global_params.batch_norm_epsilon 

878 self.has_se = (self._block_args.se_ratio is not None) and ( 

879 0 < self._block_args.se_ratio <= 1 

880 ) 

881 self.id_skip = ( 

882 block_args.id_skip 

883 ) # whether to use skip connection and drop connect 


885 # Expansion phase (Inverted Bottleneck) 

886 inp = self._block_args.input_filters # number of input channels 

887 oup = ( 

888 self._block_args.input_filters * self._block_args.expand_ratio 

889 ) # number of output channels 

890 if self._block_args.expand_ratio != 1: 

891 Conv2d = get_same_padding_conv2d(image_size=image_size) 

892 self._expand_conv = Conv2d( 

893 in_channels=inp, out_channels=oup, kernel_size=1, bias=False 

894 ) 

895 self._bn0 = nn.BatchNorm2d( 

896 num_features=oup, momentum=self._bn_mom, eps=self._bn_eps 

897 ) 

898 # image_size = calculate_output_image_size(image_size, 1) <-- this wouldn't modify image_size 


900 # Depthwise convolution phase 

901 k = self._block_args.kernel_size 

902 s = self._block_args.stride 

903 Conv2d = get_same_padding_conv2d(image_size=image_size) 

904 self._depthwise_conv = Conv2d( 

905 in_channels=oup, 

906 out_channels=oup, 

907 groups=oup, # groups makes it depthwise 

908 kernel_size=k, 

909 stride=s, 

910 bias=False, 

911 ) 

912 self._bn1 = nn.BatchNorm2d( 

913 num_features=oup, momentum=self._bn_mom, eps=self._bn_eps 

914 ) 

915 image_size = calculate_output_image_size(image_size, s) 


917 # Squeeze and Excitation layer, if desired 

918 if self.has_se: 

919 Conv2d = get_same_padding_conv2d(image_size=(1, 1)) 

920 num_squeezed_channels = max( 

921 1, 

922 int(self._block_args.input_filters * self._block_args.se_ratio), 

923 ) 

924 self._se_reduce = Conv2d( 

925 in_channels=oup, 

926 out_channels=num_squeezed_channels, 

927 kernel_size=1, 

928 ) 

929 self._se_expand = Conv2d( 

930 in_channels=num_squeezed_channels, 

931 out_channels=oup, 

932 kernel_size=1, 

933 ) 


935 # Pointwise convolution phase 

936 final_oup = self._block_args.output_filters 

937 Conv2d = get_same_padding_conv2d(image_size=image_size) 

938 self._project_conv = Conv2d( 

939 in_channels=oup, out_channels=final_oup, kernel_size=1, bias=False 

940 ) 

941 self._bn2 = nn.BatchNorm2d( 

942 num_features=final_oup, momentum=self._bn_mom, eps=self._bn_eps 

943 ) 

944 self._swish = MemoryEfficientSwish() 


946 def forward(self, inputs, drop_connect_rate=None): 

947 """MBConvBlock's forward function. 


949 Args: 

950 inputs (tensor): Input tensor. 

951 drop_connect_rate (bool): Drop connect rate (float, between 0 and 1). 


953 Returns: 

954 Output of this block after processing. 

955 """ 


957 # Expansion and Depthwise Convolution 

958 x = inputs 

959 if self._block_args.expand_ratio != 1: 

960 x = self._expand_conv(inputs) 

961 x = self._bn0(x) 

962 x = self._swish(x) 


964 x = self._depthwise_conv(x) 

965 x = self._bn1(x) 

966 x = self._swish(x) 


968 # Squeeze and Excitation 

969 if self.has_se: 

970 x_squeezed = F.adaptive_avg_pool2d(x, 1) 

971 x_squeezed = self._se_reduce(x_squeezed) 

972 x_squeezed = self._swish(x_squeezed) 

973 x_squeezed = self._se_expand(x_squeezed) 

974 x = torch.sigmoid(x_squeezed) * x 


976 # Pointwise Convolution 

977 x = self._project_conv(x) 

978 x = self._bn2(x) 


980 # Skip connection and drop connect 

981 input_filters, output_filters = ( 

982 self._block_args.input_filters, 

983 self._block_args.output_filters, 

984 ) 

985 if ( 

986 self.id_skip 

987 and self._block_args.stride == 1 

988 and input_filters == output_filters 

989 ): 

990 # The combination of skip connection and drop connect brings about stochastic depth. 

991 if drop_connect_rate: 

992 x = drop_connect(x, p=drop_connect_rate, 

993 x = x + inputs # skip connection 

994 return x 


996 def set_swish(self, memory_efficient=True): 

997 """Sets swish function as memory efficient (for training) or standard (for export). 


999 Args: 

1000 memory_efficient (bool): Whether to use memory-efficient version of swish. 

1001 """ 

1002 self._swish = MemoryEfficientSwish() if memory_efficient else Swish() 



1005class EfficientNet(nn.Module): 

1006 """EfficientNet model. 

1007 Most easily loaded with the .from_name or .from_pretrained methods. 


1009 Args: 

1010 blocks_args (list[namedtuple]): A list of BlockArgs to construct blocks. 

1011 global_params (namedtuple): A set of GlobalParams shared between blocks. 


1013 References: 

1014 [1] (EfficientNet) 


1016 Example: 



1019 import torch 

1020 >>> from efficientnet.model import EfficientNet 

1021 >>> inputs = torch.rand(1, 3, 224, 224) 

1022 >>> model = EfficientNet.from_pretrained('efficientnet-b0') 

1023 >>> model.eval() 

1024 >>> outputs = model(inputs) 

1025 """ 


1027 def __init__( 

1028 self, out_h, out_w, feat_dim, blocks_args=None, global_params=None 

1029 ): 

1030 super().__init__() 

1031 assert isinstance(blocks_args, list), "blocks_args should be a list" 

1032 assert len(blocks_args) > 0, "block args must be greater than 0" 

1033 self._global_params = global_params 

1034 self._blocks_args = blocks_args 


1036 # Batch norm parameters 

1037 bn_mom = 1 - self._global_params.batch_norm_momentum 

1038 bn_eps = self._global_params.batch_norm_epsilon 


1040 # Get stem static or dynamic convolution depending on image size 

1041 image_size = global_params.image_size 

1042 Conv2d = get_same_padding_conv2d(image_size=image_size) 


1044 # Stem 

1045 in_channels = 3 # rgb 

1046 out_channels = round_filters( 

1047 32, self._global_params 

1048 ) # number of output channels 

1049 # self._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False) 

1050 self._conv_stem = Conv2d( 

1051 in_channels, out_channels, kernel_size=3, stride=1, bias=False 

1052 ) 

1053 self._bn0 = nn.BatchNorm2d( 

1054 num_features=out_channels, momentum=bn_mom, eps=bn_eps 

1055 ) 

1056 image_size = calculate_output_image_size(image_size, 2) 


1058 # Build blocks 

1059 self._blocks = nn.ModuleList([]) 

1060 for block_args in self._blocks_args: 

1061 # Update block input and output filters based on depth multiplier. 

1062 block_args = block_args._replace( 

1063 input_filters=round_filters( 

1064 block_args.input_filters, self._global_params 

1065 ), 

1066 output_filters=round_filters( 

1067 block_args.output_filters, self._global_params 

1068 ), 

1069 num_repeat=round_repeats( 

1070 block_args.num_repeat, self._global_params 

1071 ), 

1072 ) 


1074 # The first block needs to take care of stride and filter size increase. 

1075 self._blocks.append( 

1076 MBConvBlock( 

1077 block_args, self._global_params, image_size=image_size 

1078 ) 

1079 ) 

1080 image_size = calculate_output_image_size( 

1081 image_size, block_args.stride 

1082 ) 

1083 if ( 

1084 block_args.num_repeat > 1 

1085 ): # modify block_args to keep same output size 

1086 block_args = block_args._replace( 

1087 input_filters=block_args.output_filters, stride=1 

1088 ) 

1089 for _ in range(block_args.num_repeat - 1): 

1090 self._blocks.append( 

1091 MBConvBlock( 

1092 block_args, self._global_params, image_size=image_size 

1093 ) 

1094 ) 

1095 # image_size = calculate_output_image_size(image_size, block_args.stride) # stride = 1 


1097 # Head 

1098 in_channels = block_args.output_filters # output of final block 

1099 out_channels = round_filters(1280, self._global_params) 

1100 # out_channels = round_filters(512, self._global_params) 

1101 Conv2d = get_same_padding_conv2d(image_size=image_size) 

1102 self._conv_head = Conv2d( 

1103 in_channels, out_channels, kernel_size=1, bias=False 

1104 ) 

1105 self._bn1 = nn.BatchNorm2d( 

1106 num_features=out_channels, momentum=bn_mom, eps=bn_eps 

1107 ) 


1109 # Final linear layer 

1110 self._avg_pooling = nn.AdaptiveAvgPool2d(1) 

1111 self._dropout = nn.Dropout(self._global_params.dropout_rate) 

1112 self._fc = nn.Linear(out_channels, self._global_params.num_classes) 

1113 self._swish = MemoryEfficientSwish() 

1114 self.output_layer = Sequential( 

1115 BatchNorm2d(1280), 

1116 # BatchNorm2d(512), 

1117 Dropout(self._global_params.dropout_rate), 

1118 Flatten(), 

1119 Linear(1280 * out_h * out_w, feat_dim), 

1120 # Linear(512 * out_h * out_w, feat_dim), 

1121 BatchNorm1d(feat_dim), 

1122 ) 


1124 def set_swish(self, memory_efficient=True): 

1125 """Sets swish function as memory efficient (for training) or standard (for export). 


1127 Args: 

1128 memory_efficient (bool): Whether to use memory-efficient version of swish. 


1130 """ 

1131 self._swish = MemoryEfficientSwish() if memory_efficient else Swish() 

1132 for block in self._blocks: 

1133 block.set_swish(memory_efficient) 


1135 def extract_endpoints(self, inputs): 

1136 """Use convolution layer to extract features 

1137 from reduction levels i in [1, 2, 3, 4, 5]. 


1139 Args: 

1140 inputs (tensor): Input tensor. 


1142 Returns: 

1143 Dictionary of last intermediate features 

1144 with reduction levels i in [1, 2, 3, 4, 5]. 

1145 Example: 

1146 >>> import torch 

1147 >>> from efficientnet.model import EfficientNet 

1148 >>> inputs = torch.rand(1, 3, 224, 224) 

1149 >>> model = EfficientNet.from_pretrained('efficientnet-b0') 

1150 >>> endpoints = model.extract_endpoints(inputs) 

1151 >>> print(endpoints['reduction_1'].shape) # torch.Size([1, 16, 112, 112]) 

1152 >>> print(endpoints['reduction_2'].shape) # torch.Size([1, 24, 56, 56]) 

1153 >>> print(endpoints['reduction_3'].shape) # torch.Size([1, 40, 28, 28]) 

1154 >>> print(endpoints['reduction_4'].shape) # torch.Size([1, 112, 14, 14]) 

1155 >>> print(endpoints['reduction_5'].shape) # torch.Size([1, 1280, 7, 7]) 

1156 """ 

1157 endpoints = dict() 


1159 # Stem 

1160 x = self._swish(self._bn0(self._conv_stem(inputs))) 

1161 prev_x = x 


1163 # Blocks 

1164 for idx, block in enumerate(self._blocks): 

1165 drop_connect_rate = self._global_params.drop_connect_rate 

1166 if drop_connect_rate: 

1167 drop_connect_rate *= float(idx) / len( 

1168 self._blocks 

1169 ) # scale drop connect_rate 

1170 x = block(x, drop_connect_rate=drop_connect_rate) 

1171 if prev_x.size(2) > x.size(2): 

1172 endpoints["reduction_{}".format(len(endpoints) + 1)] = prev_x 

1173 prev_x = x 


1175 # Head 

1176 x = self._swish(self._bn1(self._conv_head(x))) 

1177 endpoints["reduction_{}".format(len(endpoints) + 1)] = x 


1179 return endpoints 


1181 def extract_features(self, inputs): 

1182 """use convolution layer to extract feature . 


1184 Args: 

1185 inputs (tensor): Input tensor. 


1187 Returns: 

1188 Output of the final convolution 

1189 layer in the efficientnet model. 

1190 """ 

1191 # Stem 

1192 x = self._swish(self._bn0(self._conv_stem(inputs))) 

1193 # Blocks 

1194 for idx, block in enumerate(self._blocks): 

1195 drop_connect_rate = self._global_params.drop_connect_rate 

1196 if drop_connect_rate: 

1197 drop_connect_rate *= float(idx) / len( 

1198 self._blocks 

1199 ) # scale drop connect_rate 

1200 x = block(x, drop_connect_rate=drop_connect_rate) 


1202 # Head 

1203 x = self._swish(self._bn1(self._conv_head(x))) 


1205 return x 


1207 def forward(self, inputs): 

1208 """EfficientNet's forward function. 

1209 Calls extract_features to extract features, applies final linear layer, and returns logits. 


1211 Args: 

1212 inputs (tensor): Input tensor. 


1214 Returns: 

1215 Output of this model after processing. 

1216 """ 

1217 # Convolution layers 

1218 x = self.extract_features(inputs) 

1219 """ 

1220 # Pooling and final linear layer 

1221 x = self._avg_pooling(x) 

1222 if self._global_params.include_top: 

1223 x = x.flatten(start_dim=1) 

1224 x = self._dropout(x) 

1225 #x = self._fc(x) 

1226 """ 

1227 x = self.output_layer(x) 

1228 return x 


1230 @classmethod 

1231 def from_name(cls, model_name, in_channels=3, **override_params): 

1232 """create an efficientnet model according to name. 


1234 Args: 

1235 model_name (str): Name for efficientnet. 

1236 in_channels (int): Input data's channel number. 

1237 override_params (other key word params): 

1238 Params to override model's global_params. 

1239 Optional key: 

1240 'width_coefficient', 'depth_coefficient', 

1241 'image_size', 'dropout_rate', 

1242 'num_classes', 'batch_norm_momentum', 

1243 'batch_norm_epsilon', 'drop_connect_rate', 

1244 'depth_divisor', 'min_depth' 


1246 Returns: 

1247 An efficientnet model. 

1248 """ 

1249 cls._check_model_name_is_valid(model_name) 

1250 blocks_args, global_params = get_model_params( 

1251 model_name, override_params 

1252 ) 

1253 model = cls(blocks_args, global_params) 

1254 model._change_in_channels(in_channels) 

1255 return model 


1257 @classmethod 

1258 def from_pretrained( 

1259 cls, 

1260 model_name, 

1261 weights_path=None, 

1262 advprop=False, 

1263 in_channels=3, 

1264 num_classes=1000, 

1265 **override_params, 

1266 ): 

1267 """create an efficientnet model according to name. 


1269 Args: 

1270 model_name (str): Name for efficientnet. 

1271 weights_path (None or str): 

1272 str: path to pretrained weights file on the local disk. 

1273 None: use pretrained weights downloaded from the Internet. 

1274 advprop (bool): 

1275 Whether to load pretrained weights 

1276 trained with advprop (valid when weights_path is None). 

1277 in_channels (int): Input data's channel number. 

1278 num_classes (int): 

1279 Number of categories for classification. 

1280 It controls the output size for final linear layer. 

1281 override_params (other key word params): 

1282 Params to override model's global_params. 

1283 Optional key: 

1284 'width_coefficient', 'depth_coefficient', 

1285 'image_size', 'dropout_rate', 

1286 'batch_norm_momentum', 

1287 'batch_norm_epsilon', 'drop_connect_rate', 

1288 'depth_divisor', 'min_depth' 


1290 Returns: 

1291 A pretrained efficientnet model. 

1292 """ 

1293 model = cls.from_name( 

1294 model_name, num_classes=num_classes, **override_params 

1295 ) 

1296 load_pretrained_weights( 

1297 model, 

1298 model_name, 

1299 weights_path=weights_path, 

1300 load_fc=(num_classes == 1000), 

1301 advprop=advprop, 

1302 ) 

1303 model._change_in_channels(in_channels) 

1304 return model 


1306 @classmethod 

1307 def get_image_size(cls, model_name): 

1308 """Get the input image size for a given efficientnet model. 


1310 Args: 

1311 model_name (str): Name for efficientnet. 


1313 Returns: 

1314 Input image size (resolution). 

1315 """ 

1316 cls._check_model_name_is_valid(model_name) 

1317 _, _, res, _ = efficientnet_params(model_name) 

1318 return res 


1320 @classmethod 

1321 def _check_model_name_is_valid(cls, model_name): 

1322 """Validates model name. 


1324 Args: 

1325 model_name (str): Name for efficientnet. 


1327 Returns: 

1328 bool: Is a valid name or not. 

1329 """ 

1330 if model_name not in VALID_MODELS: 

1331 raise ValueError( 

1332 "model_name should be one of: " + ", ".join(VALID_MODELS) 

1333 ) 


1335 def _change_in_channels(self, in_channels): 

1336 """Adjust model's first convolution layer to in_channels, if in_channels not equals 3. 


1338 Args: 

1339 in_channels (int): Input data's channel number. 

1340 """ 

1341 if in_channels != 3: 

1342 Conv2d = get_same_padding_conv2d( 

1343 image_size=self._global_params.image_size 

1344 ) 

1345 out_channels = round_filters(32, self._global_params) 

1346 self._conv_stem = Conv2d( 

1347 in_channels, out_channels, kernel_size=3, stride=2, bias=False 

1348 )