File models/attn_can_adcrowdnet_simple.py changed (mode: 100644) (index 135c6ca..85b6f95) |
... |
... |
class AttnCanAdcrowdNetSimpleV3(nn.Module): |
392 |
392 |
nn.init.constant_(m.bias, 0) |
nn.init.constant_(m.bias, 0) |
393 |
393 |
|
|
394 |
394 |
|
|
|
395 |
|
class AttnCanAdcrowdNetSimpleV4(nn.Module): |
|
396 |
|
""" |
|
397 |
|
compare with v3: add 1 layer (1 branch) of deformable cnn before output layer |
|
398 |
|
""" |
|
399 |
|
def __init__(self, load_weights=False): |
|
400 |
|
super(AttnCanAdcrowdNetSimpleV4, self).__init__() |
|
401 |
|
self.frontend_feat = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512] |
|
402 |
|
self.frontend = make_layers(self.frontend_feat) |
|
403 |
|
|
|
404 |
|
# freeze vgg layer |
|
405 |
|
for param in self.frontend.parameters(): |
|
406 |
|
param.requires_grad = False |
|
407 |
|
|
|
408 |
|
self.sSE = SpatialSELayer(num_channels=512) |
|
409 |
|
|
|
410 |
|
self.concat_filter_layer = nn.Conv2d(1024, 512, kernel_size=3, padding=2, dilation=2) |
|
411 |
|
|
|
412 |
|
# we skip one formation of deformconv |
|
413 |
|
# self.deform_conv_1_3 = DeformConv2d(512, 256, kernel_size=3, stride=1, padding=1) |
|
414 |
|
# self.deform_conv_1_5 = DeformConv2d(512, 256, kernel_size=5, stride=1, padding=2) |
|
415 |
|
# self.deform_conv_1_7 = DeformConv2d(512, 256, kernel_size=7, stride=1, padding=3) |
|
416 |
|
self.concat_filter_layer_1 = nn.Conv2d(512, 256, kernel_size=3, padding=2, dilation=2) |
|
417 |
|
|
|
418 |
|
self.dilated_conv_2_3 = nn.Conv2d(256, 128, kernel_size=3, stride=1, dilation=2, padding=2) |
|
419 |
|
self.dilated_conv_2_5 = nn.Conv2d(256, 128, kernel_size=3, stride=1, dilation=4, padding=4) |
|
420 |
|
# self.deform_conv_2_7 = DeformConv2d(256, 128, kernel_size=7, stride=1, padding=3) |
|
421 |
|
self.concat_filter_layer_2 = nn.Conv2d(128 * 2, 128, kernel_size=3, padding=2, dilation=2) |
|
422 |
|
|
|
423 |
|
self.deform_conv_3_3 = DeformConv2d(128, 64, kernel_size=3, stride=1, padding=1) |
|
424 |
|
self.deform_conv_3_5 = DeformConv2d(128, 64, kernel_size=5, stride=1, padding=2) |
|
425 |
|
# self.deform_conv_3_7 = DeformConv2d(128, 64, kernel_size=7, stride=1, padding=3) |
|
426 |
|
self.concat_filter_layer_3 = nn.Conv2d(64 * 2, 64, kernel_size=3, padding=2, dilation=2) |
|
427 |
|
|
|
428 |
|
self.deform_conv_4_3 = DeformConv2d(64, 32, kernel_size=3, stride=1, padding=1) |
|
429 |
|
|
|
430 |
|
self.output_layer = nn.Conv2d(32, 1, kernel_size=1) |
|
431 |
|
self.conv1_1 = nn.Conv2d(512, 512, kernel_size=1, bias=False) |
|
432 |
|
self.conv1_2 = nn.Conv2d(512, 512, kernel_size=1, bias=False) |
|
433 |
|
self.conv2_1 = nn.Conv2d(512, 512, kernel_size=1, bias=False) |
|
434 |
|
self.conv2_2 = nn.Conv2d(512, 512, kernel_size=1, bias=False) |
|
435 |
|
self.conv3_1 = nn.Conv2d(512, 512, kernel_size=1, bias=False) |
|
436 |
|
self.conv3_2 = nn.Conv2d(512, 512, kernel_size=1, bias=False) |
|
437 |
|
self.conv6_1 = nn.Conv2d(512, 512, kernel_size=1, bias=False) |
|
438 |
|
self.conv6_2 = nn.Conv2d(512, 512, kernel_size=1, bias=False) |
|
439 |
|
if not load_weights: |
|
440 |
|
mod = models.vgg16(pretrained=True) |
|
441 |
|
self._initialize_weights() |
|
442 |
|
fsd = collections.OrderedDict() |
|
443 |
|
for i in range(len(self.frontend.state_dict().items())): |
|
444 |
|
temp_key = list(self.frontend.state_dict().items())[i][0] |
|
445 |
|
fsd[temp_key] = list(mod.state_dict().items())[i][1] |
|
446 |
|
self.frontend.load_state_dict(fsd) |
|
447 |
|
|
|
448 |
|
def forward(self, x): |
|
449 |
|
fv = self.frontend(x) |
|
450 |
|
|
|
451 |
|
# spatial squeeze & excitation |
|
452 |
|
fv = self.sSE(fv) |
|
453 |
|
|
|
454 |
|
# S=1 |
|
455 |
|
ave1 = nn.functional.adaptive_avg_pool2d(fv, (1, 1)) |
|
456 |
|
ave1 = self.conv1_1(ave1) |
|
457 |
|
s1 = nn.functional.upsample(ave1, size=(fv.shape[2], fv.shape[3]), mode='bilinear') |
|
458 |
|
c1 = s1 - fv |
|
459 |
|
w1 = self.conv1_2(c1) |
|
460 |
|
w1 = nn.functional.sigmoid(w1) |
|
461 |
|
# S=2 |
|
462 |
|
ave2 = nn.functional.adaptive_avg_pool2d(fv, (2, 2)) |
|
463 |
|
ave2 = self.conv2_1(ave2) |
|
464 |
|
s2 = nn.functional.upsample(ave2, size=(fv.shape[2], fv.shape[3]), mode='bilinear') |
|
465 |
|
c2 = s2 - fv |
|
466 |
|
w2 = self.conv2_2(c2) |
|
467 |
|
w2 = nn.functional.sigmoid(w2) |
|
468 |
|
# S=3 |
|
469 |
|
ave3 = nn.functional.adaptive_avg_pool2d(fv, (3, 3)) |
|
470 |
|
ave3 = self.conv3_1(ave3) |
|
471 |
|
s3 = nn.functional.upsample(ave3, size=(fv.shape[2], fv.shape[3]), mode='bilinear') |
|
472 |
|
c3 = s3 - fv |
|
473 |
|
w3 = self.conv3_2(c3) |
|
474 |
|
w3 = nn.functional.sigmoid(w3) |
|
475 |
|
# S=6 |
|
476 |
|
ave6 = nn.functional.adaptive_avg_pool2d(fv, (6, 6)) |
|
477 |
|
ave6 = self.conv6_1(ave6) |
|
478 |
|
s6 = nn.functional.upsample(ave6, size=(fv.shape[2], fv.shape[3]), mode='bilinear') |
|
479 |
|
c6 = s6 - fv |
|
480 |
|
w6 = self.conv6_2(c6) |
|
481 |
|
w6 = nn.functional.sigmoid(w6) |
|
482 |
|
|
|
483 |
|
fi = (w1 * s1 + w2 * s2 + w3 * s3 + w6 * s6) / (w1 + w2 + w3 + w6 + 0.000000000001) |
|
484 |
|
x = torch.cat((fv, fi), 1) |
|
485 |
|
x = F.relu(self.concat_filter_layer(x), inplace=True) |
|
486 |
|
|
|
487 |
|
# x3 = self.deform_conv_1_3(x) |
|
488 |
|
# x5 = self.deform_conv_1_5(x) |
|
489 |
|
# x7 = self.deform_conv_1_7(x) |
|
490 |
|
# x = torch.cat((x3, x5, x7), 1) |
|
491 |
|
# x = torch.cat((x3, x5), 1) |
|
492 |
|
x = F.relu(self.concat_filter_layer_1(x), inplace=True) |
|
493 |
|
|
|
494 |
|
x3 = self.dilated_conv_2_3(x) |
|
495 |
|
x5 = self.dilated_conv_2_5(x) |
|
496 |
|
# x7 = self.deform_conv_2_7(x) |
|
497 |
|
# x = torch.cat((x3, x5, x7), 1) |
|
498 |
|
x = F.relu(torch.cat((x3, x5), 1), inplace=True) |
|
499 |
|
x = F.relu(self.concat_filter_layer_2(x), inplace=True) |
|
500 |
|
|
|
501 |
|
x3 = self.deform_conv_3_3(x) |
|
502 |
|
x5 = self.deform_conv_3_5(x) |
|
503 |
|
# x7 = self.deform_conv_3_7(x) |
|
504 |
|
# x = torch.cat((x3, x5, x7), 1) |
|
505 |
|
x = F.relu(torch.cat((x3, x5), 1), inplace=True) |
|
506 |
|
x = F.relu(self.concat_filter_layer_3(x), inplace=True) |
|
507 |
|
|
|
508 |
|
x = F.relu(self.deform_conv_4_3(x)) |
|
509 |
|
x = self.output_layer(x) |
|
510 |
|
|
|
511 |
|
# this cause too much dimension mismatch problem |
|
512 |
|
# so we desampling label instead |
|
513 |
|
# x = nn.functional.upsample(x, scale_factor=8, mode='bilinear') / 64.0 |
|
514 |
|
return x |
|
515 |
|
|
|
516 |
|
def _initialize_weights(self): |
|
517 |
|
for m in self.modules(): |
|
518 |
|
if isinstance(m, nn.Conv2d): |
|
519 |
|
nn.init.normal_(m.weight, std=0.01) |
|
520 |
|
if m.bias is not None: |
|
521 |
|
nn.init.constant_(m.bias, 0) |
|
522 |
|
elif isinstance(m, nn.BatchNorm2d): |
|
523 |
|
nn.init.constant_(m.weight, 1) |
|
524 |
|
nn.init.constant_(m.bias, 0) |
|
525 |
|
|
|
526 |
|
|
395 |
527 |
def make_layers(cfg, in_channels=3, batch_norm=False, dilation=False): |
def make_layers(cfg, in_channels=3, batch_norm=False, dilation=False): |
396 |
528 |
if dilation: |
if dilation: |
397 |
529 |
d_rate = 2 |
d_rate = 2 |