Coverage for src/bob/bio/face/pytorch/facexzoo/EfficientNets.py: 65%
369 statements
« prev ^ index » next coverage.py v7.6.0, created at 2024-07-13 00:04 +0200
« prev ^ index » next coverage.py v7.6.0, created at 2024-07-13 00:04 +0200
1"""
2@author: Jun Wang
3@date: 20201019
4@contact: jun21wangustc@gmail.com
5"""
7# based on:
8# Github repo: https://github.com/lukemelas/EfficientNet-PyTorch
10import collections
11import math
12import re
14from functools import partial
16import torch
18from torch import nn
19from torch.nn import (
20 BatchNorm1d,
21 BatchNorm2d,
22 Dropout,
23 Linear,
24 Module,
25 Sequential,
26)
27from torch.nn import functional as F
28from torch.utils import model_zoo
30################################################################################
31# Help functions for model architecture
32################################################################################
34# GlobalParams and BlockArgs: Two namedtuples
35# Swish and MemoryEfficientSwish: Two implementations of the method
36# round_filters and round_repeats:
37# Functions to calculate params for scaling model width and depth ! ! !
38# get_width_and_height_from_size and calculate_output_image_size
39# drop_connect: A structural design
40# get_same_padding_conv2d:
41# Conv2dDynamicSamePadding
42# Conv2dStaticSamePadding
43# get_same_padding_maxPool2d:
44# MaxPool2dDynamicSamePadding
45# MaxPool2dStaticSamePadding
46# It's an additional function, not used in EfficientNet,
47# but can be used in other model (such as EfficientDet).
49# Parameters for the entire model (stem, all blocks, and head)
50GlobalParams = collections.namedtuple(
51 "GlobalParams",
52 [
53 "width_coefficient",
54 "depth_coefficient",
55 "image_size",
56 "dropout_rate",
57 "num_classes",
58 "batch_norm_momentum",
59 "batch_norm_epsilon",
60 "drop_connect_rate",
61 "depth_divisor",
62 "min_depth",
63 "include_top",
64 ],
65)
67# Parameters for an individual model block
68BlockArgs = collections.namedtuple(
69 "BlockArgs",
70 [
71 "num_repeat",
72 "kernel_size",
73 "stride",
74 "expand_ratio",
75 "input_filters",
76 "output_filters",
77 "se_ratio",
78 "id_skip",
79 ],
80)
82# Set GlobalParams and BlockArgs's defaults
83GlobalParams.__new__.__defaults__ = (None,) * len(GlobalParams._fields)
84BlockArgs.__new__.__defaults__ = (None,) * len(BlockArgs._fields)
87# An ordinary implementation of Swish function
88class Swish(nn.Module):
89 def forward(self, x):
90 return x * torch.sigmoid(x)
93# A memory-efficient implementation of Swish function
94class SwishImplementation(torch.autograd.Function):
95 @staticmethod
96 def forward(ctx, i):
97 result = i * torch.sigmoid(i)
98 ctx.save_for_backward(i)
99 return result
101 @staticmethod
102 def backward(ctx, grad_output):
103 i = ctx.saved_tensors[0]
104 sigmoid_i = torch.sigmoid(i)
105 return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i)))
108class MemoryEfficientSwish(nn.Module):
109 def forward(self, x):
110 return SwishImplementation.apply(x)
113def round_filters(filters, global_params):
114 """Calculate and round number of filters based on width multiplier.
115 Use width_coefficient, depth_divisor and min_depth of global_params.
117 Args:
118 filters (int): Filters number to be calculated.
119 global_params (namedtuple): Global params of the model.
121 Returns:
122 new_filters: New filters number after calculating.
123 """
124 multiplier = global_params.width_coefficient
125 if not multiplier:
126 return filters
127 # TODO: modify the params names.
128 # maybe the names (width_divisor,min_width)
129 # are more suitable than (depth_divisor,min_depth).
130 divisor = global_params.depth_divisor
131 min_depth = global_params.min_depth
132 filters *= multiplier
133 min_depth = (
134 min_depth or divisor
135 ) # pay attention to this line when using min_depth
136 # follow the formula transferred from official TensorFlow implementation
137 new_filters = max(
138 min_depth, int(filters + divisor / 2) // divisor * divisor
139 )
140 if new_filters < 0.9 * filters: # prevent rounding by more than 10%
141 new_filters += divisor
142 return int(new_filters)
145def round_repeats(repeats, global_params):
146 """Calculate module's repeat number of a block based on depth multiplier.
147 Use depth_coefficient of global_params.
149 Args:
150 repeats (int): num_repeat to be calculated.
151 global_params (namedtuple): Global params of the model.
153 Returns:
154 new repeat: New repeat number after calculating.
155 """
156 multiplier = global_params.depth_coefficient
157 if not multiplier:
158 return repeats
159 # follow the formula transferred from official TensorFlow implementation
160 return int(math.ceil(multiplier * repeats))
163def drop_connect(inputs, p, training):
164 """Drop connect.
166 Args:
167 input (tensor: BCWH): Input of this structure.
168 p (float: 0.0~1.0): Probability of drop connection.
169 training (bool): The running mode.
171 Returns:
172 output: Output after drop connection.
173 """
174 assert 0 <= p <= 1, "p must be in range of [0,1]"
176 if not training:
177 return inputs
179 batch_size = inputs.shape[0]
180 keep_prob = 1 - p
182 # generate binary_tensor mask according to probability (p for 0, 1-p for 1)
183 random_tensor = keep_prob
184 random_tensor += torch.rand(
185 [batch_size, 1, 1, 1], dtype=inputs.dtype, device=inputs.device
186 )
187 binary_tensor = torch.floor(random_tensor)
189 output = inputs / keep_prob * binary_tensor
190 return output
193def get_width_and_height_from_size(x):
194 """Obtain height and width from x.
196 Args:
197 x (int, tuple or list): Data size.
199 Returns:
200 size: A tuple or list (H,W).
201 """
202 if isinstance(x, int):
203 return x, x
204 if isinstance(x, list) or isinstance(x, tuple):
205 return x
206 else:
207 raise TypeError()
210def calculate_output_image_size(input_image_size, stride):
211 """Calculates the output image size when using Conv2dSamePadding with a stride.
212 Necessary for static padding. Thanks to mannatsingh for pointing this out.
214 Args:
215 input_image_size (int, tuple or list): Size of input image.
216 stride (int, tuple or list): Conv2d operation's stride.
218 Returns:
219 output_image_size: A list [H,W].
220 """
221 if input_image_size is None:
222 return None
223 image_height, image_width = get_width_and_height_from_size(input_image_size)
224 stride = stride if isinstance(stride, int) else stride[0]
225 image_height = int(math.ceil(image_height / stride))
226 image_width = int(math.ceil(image_width / stride))
227 return [image_height, image_width]
230# Note:
231# The following 'SamePadding' functions make output size equal ceil(input size/stride).
232# Only when stride equals 1, can the output size be the same as input size.
233# Don't be confused by their function names ! ! !
236def get_same_padding_conv2d(image_size=None):
237 """Chooses static padding if you have specified an image size, and dynamic padding otherwise.
238 Static padding is necessary for ONNX exporting of models.
240 Args:
241 image_size (int or tuple): Size of the image.
243 Returns:
244 Conv2dDynamicSamePadding or Conv2dStaticSamePadding.
245 """
246 if image_size is None:
247 return Conv2dDynamicSamePadding
248 else:
249 return partial(Conv2dStaticSamePadding, image_size=image_size)
252class Conv2dDynamicSamePadding(nn.Conv2d):
253 """2D Convolutions like TensorFlow, for a dynamic image size.
254 The padding is operated in forward function by calculating dynamically.
255 """
257 # Tips for 'SAME' mode padding.
258 # Given the following:
259 # i: width or height
260 # s: stride
261 # k: kernel size
262 # d: dilation
263 # p: padding
264 # Output after Conv2d:
265 # o = floor((i+p-((k-1)*d+1))/s+1)
266 # If o equals i, i = floor((i+p-((k-1)*d+1))/s+1),
267 # => p = (i-1)*s+((k-1)*d+1)-i
269 def __init__(
270 self,
271 in_channels,
272 out_channels,
273 kernel_size,
274 stride=1,
275 dilation=1,
276 groups=1,
277 bias=True,
278 ):
279 super().__init__(
280 in_channels,
281 out_channels,
282 kernel_size,
283 stride,
284 0,
285 dilation,
286 groups,
287 bias,
288 )
289 self.stride = (
290 self.stride if len(self.stride) == 2 else [self.stride[0]] * 2
291 )
293 def forward(self, x):
294 ih, iw = x.size()[-2:]
295 kh, kw = self.weight.size()[-2:]
296 sh, sw = self.stride
297 oh, ow = math.ceil(ih / sh), math.ceil(
298 iw / sw
299 ) # change the output size according to stride ! ! !
300 pad_h = max(
301 (oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0
302 )
303 pad_w = max(
304 (ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0
305 )
306 if pad_h > 0 or pad_w > 0:
307 x = F.pad(
308 x,
309 [
310 pad_w // 2,
311 pad_w - pad_w // 2,
312 pad_h // 2,
313 pad_h - pad_h // 2,
314 ],
315 )
316 return F.conv2d(
317 x,
318 self.weight,
319 self.bias,
320 self.stride,
321 self.padding,
322 self.dilation,
323 self.groups,
324 )
327class Conv2dStaticSamePadding(nn.Conv2d):
328 """2D Convolutions like TensorFlow's 'SAME' mode, with the given input image size.
329 The padding mudule is calculated in construction function, then used in forward.
330 """
332 # With the same calculation as Conv2dDynamicSamePadding
334 def __init__(
335 self,
336 in_channels,
337 out_channels,
338 kernel_size,
339 stride=1,
340 image_size=None,
341 **kwargs,
342 ):
343 super().__init__(
344 in_channels, out_channels, kernel_size, stride, **kwargs
345 )
346 self.stride = (
347 self.stride if len(self.stride) == 2 else [self.stride[0]] * 2
348 )
350 # Calculate padding based on image size and save it
351 assert image_size is not None
352 ih, iw = (
353 (image_size, image_size)
354 if isinstance(image_size, int)
355 else image_size
356 )
357 kh, kw = self.weight.size()[-2:]
358 sh, sw = self.stride
359 oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
360 pad_h = max(
361 (oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0
362 )
363 pad_w = max(
364 (ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0
365 )
366 if pad_h > 0 or pad_w > 0:
367 self.static_padding = nn.ZeroPad2d(
368 (pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2)
369 )
370 else:
371 self.static_padding = nn.Identity()
373 def forward(self, x):
374 x = self.static_padding(x)
375 x = F.conv2d(
376 x,
377 self.weight,
378 self.bias,
379 self.stride,
380 self.padding,
381 self.dilation,
382 self.groups,
383 )
384 return x
387def get_same_padding_maxPool2d(image_size=None):
388 """Chooses static padding if you have specified an image size, and dynamic padding otherwise.
389 Static padding is necessary for ONNX exporting of models.
391 Args:
392 image_size (int or tuple): Size of the image.
394 Returns:
395 MaxPool2dDynamicSamePadding or MaxPool2dStaticSamePadding.
396 """
397 if image_size is None:
398 return MaxPool2dDynamicSamePadding
399 else:
400 return partial(MaxPool2dStaticSamePadding, image_size=image_size)
403class MaxPool2dDynamicSamePadding(nn.MaxPool2d):
404 """2D MaxPooling like TensorFlow's 'SAME' mode, with a dynamic image size.
405 The padding is operated in forward function by calculating dynamically.
406 """
408 def __init__(
409 self,
410 kernel_size,
411 stride,
412 padding=0,
413 dilation=1,
414 return_indices=False,
415 ceil_mode=False,
416 ):
417 super().__init__(
418 kernel_size, stride, padding, dilation, return_indices, ceil_mode
419 )
420 self.stride = (
421 [self.stride] * 2 if isinstance(self.stride, int) else self.stride
422 )
423 self.kernel_size = (
424 [self.kernel_size] * 2
425 if isinstance(self.kernel_size, int)
426 else self.kernel_size
427 )
428 self.dilation = (
429 [self.dilation] * 2
430 if isinstance(self.dilation, int)
431 else self.dilation
432 )
434 def forward(self, x):
435 ih, iw = x.size()[-2:]
436 kh, kw = self.kernel_size
437 sh, sw = self.stride
438 oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
439 pad_h = max(
440 (oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0
441 )
442 pad_w = max(
443 (ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0
444 )
445 if pad_h > 0 or pad_w > 0:
446 x = F.pad(
447 x,
448 [
449 pad_w // 2,
450 pad_w - pad_w // 2,
451 pad_h // 2,
452 pad_h - pad_h // 2,
453 ],
454 )
455 return F.max_pool2d(
456 x,
457 self.kernel_size,
458 self.stride,
459 self.padding,
460 self.dilation,
461 self.ceil_mode,
462 self.return_indices,
463 )
466class MaxPool2dStaticSamePadding(nn.MaxPool2d):
467 """2D MaxPooling like TensorFlow's 'SAME' mode, with the given input image size.
468 The padding mudule is calculated in construction function, then used in forward.
469 """
471 def __init__(self, kernel_size, stride, image_size=None, **kwargs):
472 super().__init__(kernel_size, stride, **kwargs)
473 self.stride = (
474 [self.stride] * 2 if isinstance(self.stride, int) else self.stride
475 )
476 self.kernel_size = (
477 [self.kernel_size] * 2
478 if isinstance(self.kernel_size, int)
479 else self.kernel_size
480 )
481 self.dilation = (
482 [self.dilation] * 2
483 if isinstance(self.dilation, int)
484 else self.dilation
485 )
487 # Calculate padding based on image size and save it
488 assert image_size is not None
489 ih, iw = (
490 (image_size, image_size)
491 if isinstance(image_size, int)
492 else image_size
493 )
494 kh, kw = self.kernel_size
495 sh, sw = self.stride
496 oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
497 pad_h = max(
498 (oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0
499 )
500 pad_w = max(
501 (ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0
502 )
503 if pad_h > 0 or pad_w > 0:
504 self.static_padding = nn.ZeroPad2d(
505 (pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2)
506 )
507 else:
508 self.static_padding = nn.Identity()
510 def forward(self, x):
511 x = self.static_padding(x)
512 x = F.max_pool2d(
513 x,
514 self.kernel_size,
515 self.stride,
516 self.padding,
517 self.dilation,
518 self.ceil_mode,
519 self.return_indices,
520 )
521 return x
524################################################################################
525# Helper functions for loading model params
526################################################################################
528# BlockDecoder: A Class for encoding and decoding BlockArgs
529# efficientnet_params: A function to query compound coefficient
530# get_model_params and efficientnet:
531# Functions to get BlockArgs and GlobalParams for efficientnet
532# url_map and url_map_advprop: Dicts of url_map for pretrained weights
533# load_pretrained_weights: A function to load pretrained weights
536class BlockDecoder(object):
537 """Block Decoder for readability,
538 straight from the official TensorFlow repository.
539 """
541 @staticmethod
542 def _decode_block_string(block_string):
543 """Get a block through a string notation of arguments.
545 Args:
546 block_string (str): A string notation of arguments.
547 Examples: 'r1_k3_s11_e1_i32_o16_se0.25_noskip'.
549 Returns:
550 BlockArgs: The namedtuple defined at the top of this file.
551 """
552 assert isinstance(block_string, str)
554 ops = block_string.split("_")
555 options = {}
556 for op in ops:
557 splits = re.split(r"(\d.*)", op)
558 if len(splits) >= 2:
559 key, value = splits[:2]
560 options[key] = value
562 # Check stride
563 assert ("s" in options and len(options["s"]) == 1) or (
564 len(options["s"]) == 2 and options["s"][0] == options["s"][1]
565 )
567 return BlockArgs(
568 num_repeat=int(options["r"]),
569 kernel_size=int(options["k"]),
570 stride=[int(options["s"][0])],
571 expand_ratio=int(options["e"]),
572 input_filters=int(options["i"]),
573 output_filters=int(options["o"]),
574 se_ratio=float(options["se"]) if "se" in options else None,
575 id_skip=("noskip" not in block_string),
576 )
578 @staticmethod
579 def _encode_block_string(block):
580 """Encode a block to a string.
582 Args:
583 block (namedtuple): A BlockArgs type argument.
585 Returns:
586 block_string: A String form of BlockArgs.
587 """
588 args = [
589 "r%d" % block.num_repeat,
590 "k%d" % block.kernel_size,
591 "s%d%d" % (block.strides[0], block.strides[1]),
592 "e%s" % block.expand_ratio,
593 "i%d" % block.input_filters,
594 "o%d" % block.output_filters,
595 ]
596 if 0 < block.se_ratio <= 1:
597 args.append("se%s" % block.se_ratio)
598 if block.id_skip is False:
599 args.append("noskip")
600 return "_".join(args)
602 @staticmethod
603 def decode(string_list):
604 """Decode a list of string notations to specify blocks inside the network.
606 Args:
607 string_list (list[str]): A list of strings, each string is a notation of block.
609 Returns:
610 blocks_args: A list of BlockArgs namedtuples of block args.
611 """
612 assert isinstance(string_list, list)
613 blocks_args = []
614 for block_string in string_list:
615 blocks_args.append(BlockDecoder._decode_block_string(block_string))
616 return blocks_args
618 @staticmethod
619 def encode(blocks_args):
620 """Encode a list of BlockArgs to a list of strings.
622 Args:
623 blocks_args (list[namedtuples]): A list of BlockArgs namedtuples of block args.
625 Returns:
626 block_strings: A list of strings, each string is a notation of block.
627 """
628 block_strings = []
629 for block in blocks_args:
630 block_strings.append(BlockDecoder._encode_block_string(block))
631 return block_strings
634def efficientnet_params(model_name):
635 """Map EfficientNet model name to parameter coefficients.
637 Args:
638 model_name (str): Model name to be queried.
640 Returns:
641 params_dict[model_name]: A (width,depth,res,dropout) tuple.
642 """
643 """
644 params_dict = {
645 # Coefficients: width,depth,res,dropout
646 'efficientnet-b0': (1.0, 1.0, 224, 0.2),
647 'efficientnet-b1': (1.0, 1.1, 240, 0.2),
648 'efficientnet-b2': (1.1, 1.2, 260, 0.3),
649 'efficientnet-b3': (1.2, 1.4, 300, 0.3),
650 'efficientnet-b4': (1.4, 1.8, 380, 0.4),
651 'efficientnet-b5': (1.6, 2.2, 456, 0.4),
652 'efficientnet-b6': (1.8, 2.6, 528, 0.5),
653 'efficientnet-b7': (2.0, 3.1, 600, 0.5),
654 'efficientnet-b8': (2.2, 3.6, 672, 0.5),
655 'efficientnet-l2': (4.3, 5.3, 800, 0.5),
656 }
657 """
658 params_dict = {
659 # Coefficients: width,depth,res,dropout
660 "efficientnet-b0": (1.0, 1.0, 112, 0.2),
661 "efficientnet-b1": (1.0, 1.1, 112, 0.2),
662 "efficientnet-b2": (1.1, 1.2, 112, 0.3),
663 "efficientnet-b3": (1.2, 1.4, 112, 0.3),
664 "efficientnet-b4": (1.4, 1.8, 112, 0.4),
665 "efficientnet-b5": (1.6, 2.2, 112, 0.4),
666 "efficientnet-b6": (1.8, 2.6, 112, 0.5),
667 "efficientnet-b7": (2.0, 3.1, 112, 0.5),
668 "efficientnet-b8": (2.2, 3.6, 112, 0.5),
669 "efficientnet-l2": (4.3, 5.3, 112, 0.5),
670 }
671 return params_dict[model_name]
674def efficientnet(
675 width_coefficient=None,
676 depth_coefficient=None,
677 image_size=None,
678 dropout_rate=0.2,
679 drop_connect_rate=0.2,
680 num_classes=1000,
681 include_top=True,
682):
683 """Create BlockArgs and GlobalParams for efficientnet model.
685 Args:
686 width_coefficient (float)
687 depth_coefficient (float)
688 image_size (int)
689 dropout_rate (float)
690 drop_connect_rate (float)
691 num_classes (int)
693 Meaning as the name suggests.
695 Returns:
696 blocks_args, global_params.
697 """
699 # Blocks args for the whole model(efficientnet-b0 by default)
700 # It will be modified in the construction of EfficientNet Class according to model
701 blocks_args = [
702 "r1_k3_s11_e1_i32_o16_se0.25",
703 "r2_k3_s22_e6_i16_o24_se0.25",
704 "r2_k5_s22_e6_i24_o40_se0.25",
705 "r3_k3_s22_e6_i40_o80_se0.25",
706 "r3_k5_s11_e6_i80_o112_se0.25",
707 "r4_k5_s22_e6_i112_o192_se0.25",
708 "r1_k3_s11_e6_i192_o320_se0.25",
709 ]
710 blocks_args = BlockDecoder.decode(blocks_args)
712 global_params = GlobalParams(
713 width_coefficient=width_coefficient,
714 depth_coefficient=depth_coefficient,
715 image_size=image_size,
716 dropout_rate=dropout_rate,
717 num_classes=num_classes,
718 batch_norm_momentum=0.99,
719 batch_norm_epsilon=1e-3,
720 drop_connect_rate=drop_connect_rate,
721 depth_divisor=8,
722 min_depth=None,
723 include_top=include_top,
724 )
726 return blocks_args, global_params
729def get_model_params(model_name, override_params):
730 """Get the block args and global params for a given model name.
732 Args:
733 model_name (str): Model's name.
734 override_params (dict): A dict to modify global_params.
736 Returns:
737 blocks_args, global_params
738 """
739 if model_name.startswith("efficientnet"):
740 w, d, s, p = efficientnet_params(model_name)
741 # note: all models have drop connect rate = 0.2
742 blocks_args, global_params = efficientnet(
743 width_coefficient=w,
744 depth_coefficient=d,
745 dropout_rate=p,
746 image_size=s,
747 )
748 else:
749 raise NotImplementedError(
750 "model name is not pre-defined: {}".format(model_name)
751 )
752 if override_params:
753 # ValueError will be raised here if override_params has fields not included in global_params.
754 global_params = global_params._replace(**override_params)
755 return blocks_args, global_params
758# train with Standard methods
759# check more details in paper(EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks)
760url_map = {
761 "efficientnet-b0": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b0-355c32eb.pth",
762 "efficientnet-b1": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b1-f1951068.pth",
763 "efficientnet-b2": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b2-8bb594d6.pth",
764 "efficientnet-b3": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b3-5fb5a3c3.pth",
765 "efficientnet-b4": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b4-6ed6700e.pth",
766 "efficientnet-b5": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b5-b6417697.pth",
767 "efficientnet-b6": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b6-c76e70fd.pth",
768 "efficientnet-b7": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b7-dcc49843.pth",
769}
771# train with Adversarial Examples(AdvProp)
772# check more details in paper(Adversarial Examples Improve Image Recognition)
773url_map_advprop = {
774 "efficientnet-b0": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b0-b64d5a18.pth",
775 "efficientnet-b1": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b1-0f3ce85a.pth",
776 "efficientnet-b2": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b2-6e9d97e5.pth",
777 "efficientnet-b3": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b3-cdd7c0f4.pth",
778 "efficientnet-b4": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b4-44fb3a87.pth",
779 "efficientnet-b5": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b5-86493f6b.pth",
780 "efficientnet-b6": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b6-ac80338e.pth",
781 "efficientnet-b7": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b7-4652b6dd.pth",
782 "efficientnet-b8": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b8-22a8fe65.pth",
783}
785# TODO: add the petrained weights url map of 'efficientnet-l2'
788def load_pretrained_weights(
789 model, model_name, weights_path=None, load_fc=True, advprop=False
790):
791 """Loads pretrained weights from weights path or download using url.
793 Args:
794 model (Module): The whole model of efficientnet.
795 model_name (str): Model name of efficientnet.
796 weights_path (None or str):
797 str: path to pretrained weights file on the local disk.
798 None: use pretrained weights downloaded from the Internet.
799 load_fc (bool): Whether to load pretrained weights for fc layer at the end of the model.
800 advprop (bool): Whether to load pretrained weights
801 trained with advprop (valid when weights_path is None).
802 """
803 if isinstance(weights_path, str):
804 state_dict = torch.load(weights_path)
805 else:
806 # AutoAugment or Advprop (different preprocessing)
807 url_map_ = url_map_advprop if advprop else url_map
808 state_dict = model_zoo.load_url(url_map_[model_name])
810 if load_fc:
811 ret = model.load_state_dict(state_dict, strict=False)
812 assert (
813 not ret.missing_keys
814 ), "Missing keys when loading pretrained weights: {}".format(
815 ret.missing_keys
816 )
817 else:
818 state_dict.pop("_fc.weight")
819 state_dict.pop("_fc.bias")
820 ret = model.load_state_dict(state_dict, strict=False)
821 assert set(ret.missing_keys) == set(
822 ["_fc.weight", "_fc.bias"]
823 ), "Missing keys when loading pretrained weights: {}".format(
824 ret.missing_keys
825 )
826 assert (
827 not ret.unexpected_keys
828 ), "Missing keys when loading pretrained weights: {}".format(
829 ret.unexpected_keys
830 )
832 print("Loaded pretrained weights for {}".format(model_name))
835class Flatten(Module):
836 def forward(self, input):
837 return input.view(input.size(0), -1)
840# backbone ##################################################################
842VALID_MODELS = (
843 "efficientnet-b0",
844 "efficientnet-b1",
845 "efficientnet-b2",
846 "efficientnet-b3",
847 "efficientnet-b4",
848 "efficientnet-b5",
849 "efficientnet-b6",
850 "efficientnet-b7",
851 "efficientnet-b8",
852 # Support the construction of 'efficientnet-l2' without pretrained weights
853 "efficientnet-l2",
854)
857class MBConvBlock(nn.Module):
858 """Mobile Inverted Residual Bottleneck Block.
860 Args:
861 block_args (namedtuple): BlockArgs, defined in utils.py.
862 global_params (namedtuple): GlobalParam, defined in utils.py.
863 image_size (tuple or list): [image_height, image_width].
865 References:
866 [1] https://arxiv.org/abs/1704.04861 (MobileNet v1)
867 [2] https://arxiv.org/abs/1801.04381 (MobileNet v2)
868 [3] https://arxiv.org/abs/1905.02244 (MobileNet v3)
869 """
871 def __init__(self, block_args, global_params, image_size=None):
872 super().__init__()
873 self._block_args = block_args
874 self._bn_mom = (
875 1 - global_params.batch_norm_momentum
876 ) # pytorch's difference from tensorflow
877 self._bn_eps = global_params.batch_norm_epsilon
878 self.has_se = (self._block_args.se_ratio is not None) and (
879 0 < self._block_args.se_ratio <= 1
880 )
881 self.id_skip = (
882 block_args.id_skip
883 ) # whether to use skip connection and drop connect
885 # Expansion phase (Inverted Bottleneck)
886 inp = self._block_args.input_filters # number of input channels
887 oup = (
888 self._block_args.input_filters * self._block_args.expand_ratio
889 ) # number of output channels
890 if self._block_args.expand_ratio != 1:
891 Conv2d = get_same_padding_conv2d(image_size=image_size)
892 self._expand_conv = Conv2d(
893 in_channels=inp, out_channels=oup, kernel_size=1, bias=False
894 )
895 self._bn0 = nn.BatchNorm2d(
896 num_features=oup, momentum=self._bn_mom, eps=self._bn_eps
897 )
898 # image_size = calculate_output_image_size(image_size, 1) <-- this wouldn't modify image_size
900 # Depthwise convolution phase
901 k = self._block_args.kernel_size
902 s = self._block_args.stride
903 Conv2d = get_same_padding_conv2d(image_size=image_size)
904 self._depthwise_conv = Conv2d(
905 in_channels=oup,
906 out_channels=oup,
907 groups=oup, # groups makes it depthwise
908 kernel_size=k,
909 stride=s,
910 bias=False,
911 )
912 self._bn1 = nn.BatchNorm2d(
913 num_features=oup, momentum=self._bn_mom, eps=self._bn_eps
914 )
915 image_size = calculate_output_image_size(image_size, s)
917 # Squeeze and Excitation layer, if desired
918 if self.has_se:
919 Conv2d = get_same_padding_conv2d(image_size=(1, 1))
920 num_squeezed_channels = max(
921 1,
922 int(self._block_args.input_filters * self._block_args.se_ratio),
923 )
924 self._se_reduce = Conv2d(
925 in_channels=oup,
926 out_channels=num_squeezed_channels,
927 kernel_size=1,
928 )
929 self._se_expand = Conv2d(
930 in_channels=num_squeezed_channels,
931 out_channels=oup,
932 kernel_size=1,
933 )
935 # Pointwise convolution phase
936 final_oup = self._block_args.output_filters
937 Conv2d = get_same_padding_conv2d(image_size=image_size)
938 self._project_conv = Conv2d(
939 in_channels=oup, out_channels=final_oup, kernel_size=1, bias=False
940 )
941 self._bn2 = nn.BatchNorm2d(
942 num_features=final_oup, momentum=self._bn_mom, eps=self._bn_eps
943 )
944 self._swish = MemoryEfficientSwish()
946 def forward(self, inputs, drop_connect_rate=None):
947 """MBConvBlock's forward function.
949 Args:
950 inputs (tensor): Input tensor.
951 drop_connect_rate (bool): Drop connect rate (float, between 0 and 1).
953 Returns:
954 Output of this block after processing.
955 """
957 # Expansion and Depthwise Convolution
958 x = inputs
959 if self._block_args.expand_ratio != 1:
960 x = self._expand_conv(inputs)
961 x = self._bn0(x)
962 x = self._swish(x)
964 x = self._depthwise_conv(x)
965 x = self._bn1(x)
966 x = self._swish(x)
968 # Squeeze and Excitation
969 if self.has_se:
970 x_squeezed = F.adaptive_avg_pool2d(x, 1)
971 x_squeezed = self._se_reduce(x_squeezed)
972 x_squeezed = self._swish(x_squeezed)
973 x_squeezed = self._se_expand(x_squeezed)
974 x = torch.sigmoid(x_squeezed) * x
976 # Pointwise Convolution
977 x = self._project_conv(x)
978 x = self._bn2(x)
980 # Skip connection and drop connect
981 input_filters, output_filters = (
982 self._block_args.input_filters,
983 self._block_args.output_filters,
984 )
985 if (
986 self.id_skip
987 and self._block_args.stride == 1
988 and input_filters == output_filters
989 ):
990 # The combination of skip connection and drop connect brings about stochastic depth.
991 if drop_connect_rate:
992 x = drop_connect(x, p=drop_connect_rate, training=self.training)
993 x = x + inputs # skip connection
994 return x
996 def set_swish(self, memory_efficient=True):
997 """Sets swish function as memory efficient (for training) or standard (for export).
999 Args:
1000 memory_efficient (bool): Whether to use memory-efficient version of swish.
1001 """
1002 self._swish = MemoryEfficientSwish() if memory_efficient else Swish()
1005class EfficientNet(nn.Module):
1006 """EfficientNet model.
1007 Most easily loaded with the .from_name or .from_pretrained methods.
1009 Args:
1010 blocks_args (list[namedtuple]): A list of BlockArgs to construct blocks.
1011 global_params (namedtuple): A set of GlobalParams shared between blocks.
1013 References:
1014 [1] https://arxiv.org/abs/1905.11946 (EfficientNet)
1016 Example:
1019 import torch
1020 >>> from efficientnet.model import EfficientNet
1021 >>> inputs = torch.rand(1, 3, 224, 224)
1022 >>> model = EfficientNet.from_pretrained('efficientnet-b0')
1023 >>> model.eval()
1024 >>> outputs = model(inputs)
1025 """
1027 def __init__(
1028 self, out_h, out_w, feat_dim, blocks_args=None, global_params=None
1029 ):
1030 super().__init__()
1031 assert isinstance(blocks_args, list), "blocks_args should be a list"
1032 assert len(blocks_args) > 0, "block args must be greater than 0"
1033 self._global_params = global_params
1034 self._blocks_args = blocks_args
1036 # Batch norm parameters
1037 bn_mom = 1 - self._global_params.batch_norm_momentum
1038 bn_eps = self._global_params.batch_norm_epsilon
1040 # Get stem static or dynamic convolution depending on image size
1041 image_size = global_params.image_size
1042 Conv2d = get_same_padding_conv2d(image_size=image_size)
1044 # Stem
1045 in_channels = 3 # rgb
1046 out_channels = round_filters(
1047 32, self._global_params
1048 ) # number of output channels
1049 # self._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False)
1050 self._conv_stem = Conv2d(
1051 in_channels, out_channels, kernel_size=3, stride=1, bias=False
1052 )
1053 self._bn0 = nn.BatchNorm2d(
1054 num_features=out_channels, momentum=bn_mom, eps=bn_eps
1055 )
1056 image_size = calculate_output_image_size(image_size, 2)
1058 # Build blocks
1059 self._blocks = nn.ModuleList([])
1060 for block_args in self._blocks_args:
1061 # Update block input and output filters based on depth multiplier.
1062 block_args = block_args._replace(
1063 input_filters=round_filters(
1064 block_args.input_filters, self._global_params
1065 ),
1066 output_filters=round_filters(
1067 block_args.output_filters, self._global_params
1068 ),
1069 num_repeat=round_repeats(
1070 block_args.num_repeat, self._global_params
1071 ),
1072 )
1074 # The first block needs to take care of stride and filter size increase.
1075 self._blocks.append(
1076 MBConvBlock(
1077 block_args, self._global_params, image_size=image_size
1078 )
1079 )
1080 image_size = calculate_output_image_size(
1081 image_size, block_args.stride
1082 )
1083 if (
1084 block_args.num_repeat > 1
1085 ): # modify block_args to keep same output size
1086 block_args = block_args._replace(
1087 input_filters=block_args.output_filters, stride=1
1088 )
1089 for _ in range(block_args.num_repeat - 1):
1090 self._blocks.append(
1091 MBConvBlock(
1092 block_args, self._global_params, image_size=image_size
1093 )
1094 )
1095 # image_size = calculate_output_image_size(image_size, block_args.stride) # stride = 1
1097 # Head
1098 in_channels = block_args.output_filters # output of final block
1099 out_channels = round_filters(1280, self._global_params)
1100 # out_channels = round_filters(512, self._global_params)
1101 Conv2d = get_same_padding_conv2d(image_size=image_size)
1102 self._conv_head = Conv2d(
1103 in_channels, out_channels, kernel_size=1, bias=False
1104 )
1105 self._bn1 = nn.BatchNorm2d(
1106 num_features=out_channels, momentum=bn_mom, eps=bn_eps
1107 )
1109 # Final linear layer
1110 self._avg_pooling = nn.AdaptiveAvgPool2d(1)
1111 self._dropout = nn.Dropout(self._global_params.dropout_rate)
1112 self._fc = nn.Linear(out_channels, self._global_params.num_classes)
1113 self._swish = MemoryEfficientSwish()
1114 self.output_layer = Sequential(
1115 BatchNorm2d(1280),
1116 # BatchNorm2d(512),
1117 Dropout(self._global_params.dropout_rate),
1118 Flatten(),
1119 Linear(1280 * out_h * out_w, feat_dim),
1120 # Linear(512 * out_h * out_w, feat_dim),
1121 BatchNorm1d(feat_dim),
1122 )
1124 def set_swish(self, memory_efficient=True):
1125 """Sets swish function as memory efficient (for training) or standard (for export).
1127 Args:
1128 memory_efficient (bool): Whether to use memory-efficient version of swish.
1130 """
1131 self._swish = MemoryEfficientSwish() if memory_efficient else Swish()
1132 for block in self._blocks:
1133 block.set_swish(memory_efficient)
1135 def extract_endpoints(self, inputs):
1136 """Use convolution layer to extract features
1137 from reduction levels i in [1, 2, 3, 4, 5].
1139 Args:
1140 inputs (tensor): Input tensor.
1142 Returns:
1143 Dictionary of last intermediate features
1144 with reduction levels i in [1, 2, 3, 4, 5].
1145 Example:
1146 >>> import torch
1147 >>> from efficientnet.model import EfficientNet
1148 >>> inputs = torch.rand(1, 3, 224, 224)
1149 >>> model = EfficientNet.from_pretrained('efficientnet-b0')
1150 >>> endpoints = model.extract_endpoints(inputs)
1151 >>> print(endpoints['reduction_1'].shape) # torch.Size([1, 16, 112, 112])
1152 >>> print(endpoints['reduction_2'].shape) # torch.Size([1, 24, 56, 56])
1153 >>> print(endpoints['reduction_3'].shape) # torch.Size([1, 40, 28, 28])
1154 >>> print(endpoints['reduction_4'].shape) # torch.Size([1, 112, 14, 14])
1155 >>> print(endpoints['reduction_5'].shape) # torch.Size([1, 1280, 7, 7])
1156 """
1157 endpoints = dict()
1159 # Stem
1160 x = self._swish(self._bn0(self._conv_stem(inputs)))
1161 prev_x = x
1163 # Blocks
1164 for idx, block in enumerate(self._blocks):
1165 drop_connect_rate = self._global_params.drop_connect_rate
1166 if drop_connect_rate:
1167 drop_connect_rate *= float(idx) / len(
1168 self._blocks
1169 ) # scale drop connect_rate
1170 x = block(x, drop_connect_rate=drop_connect_rate)
1171 if prev_x.size(2) > x.size(2):
1172 endpoints["reduction_{}".format(len(endpoints) + 1)] = prev_x
1173 prev_x = x
1175 # Head
1176 x = self._swish(self._bn1(self._conv_head(x)))
1177 endpoints["reduction_{}".format(len(endpoints) + 1)] = x
1179 return endpoints
1181 def extract_features(self, inputs):
1182 """use convolution layer to extract feature .
1184 Args:
1185 inputs (tensor): Input tensor.
1187 Returns:
1188 Output of the final convolution
1189 layer in the efficientnet model.
1190 """
1191 # Stem
1192 x = self._swish(self._bn0(self._conv_stem(inputs)))
1193 # Blocks
1194 for idx, block in enumerate(self._blocks):
1195 drop_connect_rate = self._global_params.drop_connect_rate
1196 if drop_connect_rate:
1197 drop_connect_rate *= float(idx) / len(
1198 self._blocks
1199 ) # scale drop connect_rate
1200 x = block(x, drop_connect_rate=drop_connect_rate)
1202 # Head
1203 x = self._swish(self._bn1(self._conv_head(x)))
1205 return x
1207 def forward(self, inputs):
1208 """EfficientNet's forward function.
1209 Calls extract_features to extract features, applies final linear layer, and returns logits.
1211 Args:
1212 inputs (tensor): Input tensor.
1214 Returns:
1215 Output of this model after processing.
1216 """
1217 # Convolution layers
1218 x = self.extract_features(inputs)
1219 """
1220 # Pooling and final linear layer
1221 x = self._avg_pooling(x)
1222 if self._global_params.include_top:
1223 x = x.flatten(start_dim=1)
1224 x = self._dropout(x)
1225 #x = self._fc(x)
1226 """
1227 x = self.output_layer(x)
1228 return x
1230 @classmethod
1231 def from_name(cls, model_name, in_channels=3, **override_params):
1232 """create an efficientnet model according to name.
1234 Args:
1235 model_name (str): Name for efficientnet.
1236 in_channels (int): Input data's channel number.
1237 override_params (other key word params):
1238 Params to override model's global_params.
1239 Optional key:
1240 'width_coefficient', 'depth_coefficient',
1241 'image_size', 'dropout_rate',
1242 'num_classes', 'batch_norm_momentum',
1243 'batch_norm_epsilon', 'drop_connect_rate',
1244 'depth_divisor', 'min_depth'
1246 Returns:
1247 An efficientnet model.
1248 """
1249 cls._check_model_name_is_valid(model_name)
1250 blocks_args, global_params = get_model_params(
1251 model_name, override_params
1252 )
1253 model = cls(blocks_args, global_params)
1254 model._change_in_channels(in_channels)
1255 return model
1257 @classmethod
1258 def from_pretrained(
1259 cls,
1260 model_name,
1261 weights_path=None,
1262 advprop=False,
1263 in_channels=3,
1264 num_classes=1000,
1265 **override_params,
1266 ):
1267 """create an efficientnet model according to name.
1269 Args:
1270 model_name (str): Name for efficientnet.
1271 weights_path (None or str):
1272 str: path to pretrained weights file on the local disk.
1273 None: use pretrained weights downloaded from the Internet.
1274 advprop (bool):
1275 Whether to load pretrained weights
1276 trained with advprop (valid when weights_path is None).
1277 in_channels (int): Input data's channel number.
1278 num_classes (int):
1279 Number of categories for classification.
1280 It controls the output size for final linear layer.
1281 override_params (other key word params):
1282 Params to override model's global_params.
1283 Optional key:
1284 'width_coefficient', 'depth_coefficient',
1285 'image_size', 'dropout_rate',
1286 'batch_norm_momentum',
1287 'batch_norm_epsilon', 'drop_connect_rate',
1288 'depth_divisor', 'min_depth'
1290 Returns:
1291 A pretrained efficientnet model.
1292 """
1293 model = cls.from_name(
1294 model_name, num_classes=num_classes, **override_params
1295 )
1296 load_pretrained_weights(
1297 model,
1298 model_name,
1299 weights_path=weights_path,
1300 load_fc=(num_classes == 1000),
1301 advprop=advprop,
1302 )
1303 model._change_in_channels(in_channels)
1304 return model
1306 @classmethod
1307 def get_image_size(cls, model_name):
1308 """Get the input image size for a given efficientnet model.
1310 Args:
1311 model_name (str): Name for efficientnet.
1313 Returns:
1314 Input image size (resolution).
1315 """
1316 cls._check_model_name_is_valid(model_name)
1317 _, _, res, _ = efficientnet_params(model_name)
1318 return res
1320 @classmethod
1321 def _check_model_name_is_valid(cls, model_name):
1322 """Validates model name.
1324 Args:
1325 model_name (str): Name for efficientnet.
1327 Returns:
1328 bool: Is a valid name or not.
1329 """
1330 if model_name not in VALID_MODELS:
1331 raise ValueError(
1332 "model_name should be one of: " + ", ".join(VALID_MODELS)
1333 )
1335 def _change_in_channels(self, in_channels):
1336 """Adjust model's first convolution layer to in_channels, if in_channels not equals 3.
1338 Args:
1339 in_channels (int): Input data's channel number.
1340 """
1341 if in_channels != 3:
1342 Conv2d = get_same_padding_conv2d(
1343 image_size=self._global_params.image_size
1344 )
1345 out_channels = round_filters(32, self._global_params)
1346 self._conv_stem = Conv2d(
1347 in_channels, out_channels, kernel_size=3, stride=2, bias=False
1348 )