Coverage for src/bob/bio/face/pytorch/facexzoo/GhostNet.py: 98%

1"""

2@author: Jun Wang

3@date: 20210121

4@contact: jun21wangustc@gmail.com

5"""

7# based on:

8# https://github.com/huawei-noah/ghostnet/blob/master/ghostnet_pytorch/ghostnet.py

10# 2020.06.09-Changed for building GhostNet

11# Huawei Technologies Co., Ltd. <foss@huawei.com>

12"""

13Creates a GhostNet Model as defined in:

14GhostNet: More Features from Cheap Operations By Kai Han, Yunhe Wang, Qi Tian, Jianyuan Guo, Chunjing Xu, Chang Xu.

15https://arxiv.org/abs/1911.11907

16Modified from https://github.com/d-li14/mobilenetv3.pytorch and https://github.com/rwightman/pytorch-image-models

17"""

18import math

20import torch

21import torch.nn as nn

22import torch.nn.functional as F

24from torch.nn import (

25 BatchNorm1d,

26 BatchNorm2d,

27 Dropout,

28 Linear,

29 Module,

30 Sequential,

31)

34class Flatten(Module):

35 def forward(self, input):

36 return input.view(input.size(0), -1)

39def _make_divisible(v, divisor, min_value=None):

40 """

41 This function is taken from the original tf repo.

42 It ensures that all layers have a channel number that is divisible by 8

43 It can be seen here:

44 https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py

45 """

46 if min_value is None:

47 min_value = divisor

48 new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)

49 # Make sure that round down does not go down by more than 10%.

50 if new_v < 0.9 * v:

51 new_v += divisor

52 return new_v

55def hard_sigmoid(x, inplace: bool = False):

56 if inplace:

57 return x.add_(3.0).clamp_(0.0, 6.0).div_(6.0)

58 else:

59 return F.relu6(x + 3.0) / 6.0

62class SqueezeExcite(nn.Module):

63 def __init__(

64 self,

65 in_chs,

66 se_ratio=0.25,

67 reduced_base_chs=None,

68 act_layer=nn.ReLU,

69 gate_fn=hard_sigmoid,

70 divisor=4,

71 **_,

72 ):

73 super(SqueezeExcite, self).__init__()

74 self.gate_fn = gate_fn

75 reduced_chs = _make_divisible(

76 (reduced_base_chs or in_chs) * se_ratio, divisor

77 )

78 self.avg_pool = nn.AdaptiveAvgPool2d(1)

79 self.conv_reduce = nn.Conv2d(in_chs, reduced_chs, 1, bias=True)

80 self.act1 = act_layer(inplace=True)

81 self.conv_expand = nn.Conv2d(reduced_chs, in_chs, 1, bias=True)

83 def forward(self, x):

84 x_se = self.avg_pool(x)

85 x_se = self.conv_reduce(x_se)

86 x_se = self.act1(x_se)

87 x_se = self.conv_expand(x_se)

88 x = x * self.gate_fn(x_se)

89 return x

92class ConvBnAct(nn.Module):

93 def __init__(

94 self, in_chs, out_chs, kernel_size, stride=1, act_layer=nn.ReLU

95 ):

96 super(ConvBnAct, self).__init__()

97 self.conv = nn.Conv2d(

98 in_chs, out_chs, kernel_size, stride, kernel_size // 2, bias=False

99 )

100 self.bn1 = nn.BatchNorm2d(out_chs)

101 self.act1 = act_layer(inplace=True)

102

103 def forward(self, x):

104 x = self.conv(x)

105 x = self.bn1(x)

106 x = self.act1(x)

107 return x

108

109

110class GhostModule(nn.Module):

111 def __init__(

112 self, inp, oup, kernel_size=1, ratio=2, dw_size=3, stride=1, relu=True

113 ):

114 super(GhostModule, self).__init__()

115 self.oup = oup

116 init_channels = math.ceil(oup / ratio)

117 new_channels = init_channels * (ratio - 1)

118

119 self.primary_conv = nn.Sequential(

120 nn.Conv2d(

121 inp,

122 init_channels,

123 kernel_size,

124 stride,

125 kernel_size // 2,

126 bias=False,

127 ),

128 nn.BatchNorm2d(init_channels),

129 nn.ReLU(inplace=True) if relu else nn.Sequential(),

130 )

131

132 self.cheap_operation = nn.Sequential(

133 nn.Conv2d(

134 init_channels,

135 new_channels,

136 dw_size,

137 1,

138 dw_size // 2,

139 groups=init_channels,

140 bias=False,

141 ),

142 nn.BatchNorm2d(new_channels),

143 nn.ReLU(inplace=True) if relu else nn.Sequential(),

144 )

145

146 def forward(self, x):

147 x1 = self.primary_conv(x)

148 x2 = self.cheap_operation(x1)

149 out = torch.cat([x1, x2], dim=1)

150 return out[:, : self.oup, :, :]

151

152

153class GhostBottleneck(nn.Module):

154 """Ghost bottleneck w/ optional SE"""

155

156 def __init__(

157 self,

158 in_chs,

159 mid_chs,

160 out_chs,

161 dw_kernel_size=3,

162 stride=1,

163 act_layer=nn.ReLU,

164 se_ratio=0.0,

165 ):

166 super(GhostBottleneck, self).__init__()

167 has_se = se_ratio is not None and se_ratio > 0.0

168 self.stride = stride

169

170 # Point-wise expansion

171 self.ghost1 = GhostModule(in_chs, mid_chs, relu=True)

172

173 # Depth-wise convolution

174 if self.stride > 1:

175 self.conv_dw = nn.Conv2d(

176 mid_chs,

177 mid_chs,

178 dw_kernel_size,

179 stride=stride,

180 padding=(dw_kernel_size - 1) // 2,

181 groups=mid_chs,

182 bias=False,

183 )

184 self.bn_dw = nn.BatchNorm2d(mid_chs)

185

186 # Squeeze-and-excitation

187 if has_se:

188 self.se = SqueezeExcite(mid_chs, se_ratio=se_ratio)

189 else:

190 self.se = None

191

192 # Point-wise linear projection

193 self.ghost2 = GhostModule(mid_chs, out_chs, relu=False)

194

195 # shortcut

196 if in_chs == out_chs and self.stride == 1:

197 self.shortcut = nn.Sequential()

198 else:

199 self.shortcut = nn.Sequential(

200 nn.Conv2d(

201 in_chs,

202 in_chs,

203 dw_kernel_size,

204 stride=stride,

205 padding=(dw_kernel_size - 1) // 2,

206 groups=in_chs,

207 bias=False,

208 ),

209 nn.BatchNorm2d(in_chs),

210 nn.Conv2d(in_chs, out_chs, 1, stride=1, padding=0, bias=False),

211 nn.BatchNorm2d(out_chs),

212 )

213

214 def forward(self, x):

215 residual = x

216

217 # 1st ghost bottleneck

218 x = self.ghost1(x)

219

220 # Depth-wise convolution

221 if self.stride > 1:

222 x = self.conv_dw(x)

223 x = self.bn_dw(x)

224

225 # Squeeze-and-excitation

226 if self.se is not None:

227 x = self.se(x)

228

229 # 2nd ghost bottleneck

230 x = self.ghost2(x)

231

232 x += self.shortcut(residual)

233 return x

234

235

236class GhostNet(nn.Module):

237 def __init__(

238 self, width=1.0, drop_ratio=0.2, feat_dim=512, out_h=7, out_w=7

239 ):

240 super(GhostNet, self).__init__()

241 # setting of inverted residual blocks

242 self.cfgs = [

243 # k, t, c, SE, s

244 # stage1

245 [[3, 16, 16, 0, 1]],

246 # stage2

247 [[3, 48, 24, 0, 2]],

248 [[3, 72, 24, 0, 1]],

249 # stage3

250 [[5, 72, 40, 0.25, 2]],

251 [[5, 120, 40, 0.25, 1]],

252 # stage4

253 [[3, 240, 80, 0, 2]],

254 [

255 [3, 200, 80, 0, 1],

256 [3, 184, 80, 0, 1],

257 [3, 184, 80, 0, 1],

258 [3, 480, 112, 0.25, 1],

259 [3, 672, 112, 0.25, 1],

260 ],

261 # stage5

262 [[5, 672, 160, 0.25, 2]],

263 [

264 [5, 960, 160, 0, 1],

265 [5, 960, 160, 0.25, 1],

266 [5, 960, 160, 0, 1],

267 [5, 960, 160, 0.25, 1],

268 ],

269 ]

270

271 # building first layer

272 output_channel = _make_divisible(16 * width, 4)

273 # self.conv_stem = nn.Conv2d(3, output_channel, 3, 2, 1, bias=False)

274 self.conv_stem = nn.Conv2d(3, output_channel, 3, 1, 1, bias=False)

275 self.bn1 = nn.BatchNorm2d(output_channel)

276 self.act1 = nn.ReLU(inplace=True)

277 input_channel = output_channel

278

279 # building inverted residual blocks

280 stages = []

281 block = GhostBottleneck

282 for cfg in self.cfgs:

283 layers = []

284 for k, exp_size, c, se_ratio, s in cfg:

285 output_channel = _make_divisible(c * width, 4)

286 hidden_channel = _make_divisible(exp_size * width, 4)

287 layers.append(

288 block(

289 input_channel,

290 hidden_channel,

291 output_channel,

292 k,

293 s,

294 se_ratio=se_ratio,

295 )

296 )

297 input_channel = output_channel

298 stages.append(nn.Sequential(*layers))

299

300 output_channel = _make_divisible(exp_size * width, 4)

301 stages.append(

302 nn.Sequential(ConvBnAct(input_channel, output_channel, 1))

303 )

304 input_channel = output_channel

305

306 self.blocks = nn.Sequential(*stages)

307

308 self.output_layer = Sequential(

309 BatchNorm2d(960),

310 Dropout(drop_ratio),

311 Flatten(),

312 Linear(960 * out_h * out_w, feat_dim), # for eye

313 BatchNorm1d(feat_dim),

314 )

315

316 def forward(self, x):

317 x = self.conv_stem(x)

318 x = self.bn1(x)

319 x = self.act1(x)

320 x = self.blocks(x)

321 x = self.output_layer(x)

322 return x