File args_util.py changed (mode: 100644) (index f5fd17d..6728e74) |
... |
... |
def real_args_parse(): |
67 |
67 |
parser.add_argument('--model', action="store", default="csrnet") |
parser.add_argument('--model', action="store", default="csrnet") |
68 |
68 |
|
|
69 |
69 |
# args with default value |
# args with default value |
70 |
|
parser.add_argument('--lr', action="store", default=1e-7, type=float) |
|
|
70 |
|
parser.add_argument('--lr', action="store", default=1e-8, type=float) |
71 |
71 |
parser.add_argument('--momentum', action="store", default=0.95, type=float) |
parser.add_argument('--momentum', action="store", default=0.95, type=float) |
72 |
|
parser.add_argument('--decay', action="store", default=5*1e-4, type=float) |
|
|
72 |
|
parser.add_argument('--decay', action="store", default=5*1e-3, type=float) |
73 |
73 |
parser.add_argument('--epochs', action="store", default=1, type=int) |
parser.add_argument('--epochs', action="store", default=1, type=int) |
74 |
74 |
|
|
75 |
75 |
# args.original_lr = 1e-7 |
# args.original_lr = 1e-7 |
File data_flow.py changed (mode: 100644) (index e9a9443..2d70582) |
... |
... |
def load_data_ucf_cc50_pancnn(img_path, train=True): |
74 |
74 |
gt_file = h5py.File(gt_path, 'r') |
gt_file = h5py.File(gt_path, 'r') |
75 |
75 |
target = np.asarray(gt_file['density']) |
target = np.asarray(gt_file['density']) |
76 |
76 |
|
|
|
77 |
|
if train: |
|
78 |
|
crop_size = (int(img.size[0] / 2), int(img.size[1] / 2)) |
|
79 |
|
if random.randint(0, 9) <= -1: |
|
80 |
|
|
|
81 |
|
dx = int(random.randint(0, 1) * img.size[0] * 1. / 2) |
|
82 |
|
dy = int(random.randint(0, 1) * img.size[1] * 1. / 2) |
|
83 |
|
else: |
|
84 |
|
dx = int(random.random() * img.size[0] * 1. / 2) |
|
85 |
|
dy = int(random.random() * img.size[1] * 1. / 2) |
|
86 |
|
|
|
87 |
|
img = img.crop((dx, dy, crop_size[0] + dx, crop_size[1] + dy)) |
|
88 |
|
target = target[dy:crop_size[1] + dy, dx:crop_size[0] + dx] |
|
89 |
|
|
|
90 |
|
if random.random() > 0.8: |
|
91 |
|
target = np.fliplr(target) |
|
92 |
|
img = img.transpose(Image.FLIP_LEFT_RIGHT) |
|
93 |
|
|
77 |
94 |
target1 = cv2.resize(target, (int(target.shape[1] / 8), int(target.shape[0] / 8)), |
target1 = cv2.resize(target, (int(target.shape[1] / 8), int(target.shape[0] / 8)), |
78 |
95 |
interpolation=cv2.INTER_CUBIC) * 64 |
interpolation=cv2.INTER_CUBIC) * 64 |
79 |
96 |
target2 = cv2.resize(target, (int(target.shape[1] / 16), int(target.shape[0] / 16)), |
target2 = cv2.resize(target, (int(target.shape[1] / 16), int(target.shape[0] / 16)), |
80 |
|
interpolation=cv2.INTER_CUBIC) * 64*2 |
|
|
97 |
|
interpolation=cv2.INTER_CUBIC) * 64 #*2 |
81 |
98 |
target3 = cv2.resize(target, (int(target.shape[1] / 32), int(target.shape[0] / 32)), |
target3 = cv2.resize(target, (int(target.shape[1] / 32), int(target.shape[0] / 32)), |
82 |
|
interpolation=cv2.INTER_CUBIC) * 64*4 |
|
|
99 |
|
interpolation=cv2.INTER_CUBIC) * 64 #*4 |
83 |
100 |
|
|
84 |
101 |
return img, (target1, target2, target3) |
return img, (target1, target2, target3) |
85 |
102 |
|
|
File main_pacnn_ucfcc50.py changed (mode: 100644) (index ab19946..f125b41) |
... |
... |
import os |
11 |
11 |
import cv2 |
import cv2 |
12 |
12 |
from torchvision import datasets, transforms |
from torchvision import datasets, transforms |
13 |
13 |
from data_flow import ListDataset |
from data_flow import ListDataset |
|
14 |
|
import pytorch_ssim |
14 |
15 |
|
|
15 |
16 |
if __name__ == "__main__": |
if __name__ == "__main__": |
16 |
|
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
17 |
|
device = "cpu" |
|
|
17 |
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
18 |
|
# device = "cpu" |
18 |
19 |
print(device) |
print(device) |
19 |
20 |
args = real_args_parse() |
args = real_args_parse() |
20 |
21 |
print(args) |
print(args) |
|
... |
... |
if __name__ == "__main__": |
40 |
41 |
batch_size=1, num_workers=4) |
batch_size=1, num_workers=4) |
41 |
42 |
|
|
42 |
43 |
# create model |
# create model |
43 |
|
net = PACNN() |
|
44 |
|
|
|
45 |
|
for train_img, label in train_loader_pacnn: |
|
46 |
|
d1_label, d2_label, d3_label = label |
|
47 |
|
d1, d2, d3 = net(train_img) |
|
48 |
|
print(d1.size()) |
|
49 |
|
print(d2.size()) |
|
50 |
|
print(d3.size()) |
|
51 |
|
print("====") |
|
52 |
|
print(d1_label.size()) |
|
53 |
|
print(d2_label.size()) |
|
54 |
|
print(d3_label.size()) |
|
55 |
|
print("done===done=====") |
|
|
44 |
|
net = PACNN().to(device) |
|
45 |
|
criterion_mse = nn.MSELoss(size_average=False).to(device) |
|
46 |
|
criterion_ssim = pytorch_ssim.SSIM(window_size=11).to(device) |
|
47 |
|
|
|
48 |
|
|
|
49 |
|
optimizer = torch.optim.SGD(net.parameters(), args.lr, |
|
50 |
|
momentum=args.momentum, |
|
51 |
|
weight_decay=args.decay) |
|
52 |
|
for e in range(3): |
|
53 |
|
loss_sum = 0 |
|
54 |
|
for train_img, label in train_loader_pacnn: |
|
55 |
|
d1_label, d2_label, d3_label = label |
|
56 |
|
d1_label = d1_label.to(device) |
|
57 |
|
d2_label = d2_label.to(device) |
|
58 |
|
d3_label = d3_label.to(device) |
|
59 |
|
d1, d2, d3 = net(train_img.to(device)) |
|
60 |
|
loss_1 = criterion_mse(d1, d1_label) + criterion_ssim(d1.unsqueeze(0), d1_label.unsqueeze(0)) |
|
61 |
|
loss_2 = criterion_mse(d2, d2_label) + criterion_ssim(d2.unsqueeze(0), d2_label.unsqueeze(0)) |
|
62 |
|
loss_3 = criterion_mse(d3, d3_label) + criterion_ssim(d3.unsqueeze(0), d3_label.unsqueeze(0)) |
|
63 |
|
|
|
64 |
|
loss = loss_1 + loss_2 + loss_3 |
|
65 |
|
loss.backward() |
|
66 |
|
optimizer.step() |
|
67 |
|
loss_sum += loss.item() |
|
68 |
|
avg_loss = loss_sum/40 |
|
69 |
|
print(avg_loss) |
File pytorch_ssim/__init__.py added (mode: 100644) (index 0000000..738e803) |
|
1 |
|
import torch |
|
2 |
|
import torch.nn.functional as F |
|
3 |
|
from torch.autograd import Variable |
|
4 |
|
import numpy as np |
|
5 |
|
from math import exp |
|
6 |
|
|
|
7 |
|
def gaussian(window_size, sigma): |
|
8 |
|
gauss = torch.Tensor([exp(-(x - window_size//2)**2/float(2*sigma**2)) for x in range(window_size)]) |
|
9 |
|
return gauss/gauss.sum() |
|
10 |
|
|
|
11 |
|
def create_window(window_size, channel): |
|
12 |
|
_1D_window = gaussian(window_size, 1.5).unsqueeze(1) |
|
13 |
|
_2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0) |
|
14 |
|
window = Variable(_2D_window.expand(channel, 1, window_size, window_size).contiguous()) |
|
15 |
|
return window |
|
16 |
|
|
|
17 |
|
def _ssim(img1, img2, window, window_size, channel, size_average = True): |
|
18 |
|
mu1 = F.conv2d(img1, window, padding = window_size//2, groups = channel) |
|
19 |
|
mu2 = F.conv2d(img2, window, padding = window_size//2, groups = channel) |
|
20 |
|
|
|
21 |
|
mu1_sq = mu1.pow(2) |
|
22 |
|
mu2_sq = mu2.pow(2) |
|
23 |
|
mu1_mu2 = mu1*mu2 |
|
24 |
|
|
|
25 |
|
sigma1_sq = F.conv2d(img1*img1, window, padding = window_size//2, groups = channel) - mu1_sq |
|
26 |
|
sigma2_sq = F.conv2d(img2*img2, window, padding = window_size//2, groups = channel) - mu2_sq |
|
27 |
|
sigma12 = F.conv2d(img1*img2, window, padding = window_size//2, groups = channel) - mu1_mu2 |
|
28 |
|
|
|
29 |
|
C1 = 0.01**2 |
|
30 |
|
C2 = 0.03**2 |
|
31 |
|
|
|
32 |
|
ssim_map = ((2*mu1_mu2 + C1)*(2*sigma12 + C2))/((mu1_sq + mu2_sq + C1)*(sigma1_sq + sigma2_sq + C2)) |
|
33 |
|
|
|
34 |
|
if size_average: |
|
35 |
|
return ssim_map.mean() |
|
36 |
|
else: |
|
37 |
|
return ssim_map.mean(1).mean(1).mean(1) |
|
38 |
|
|
|
39 |
|
class SSIM(torch.nn.Module): |
|
40 |
|
def __init__(self, window_size = 11, size_average = True): |
|
41 |
|
super(SSIM, self).__init__() |
|
42 |
|
self.window_size = window_size |
|
43 |
|
self.size_average = size_average |
|
44 |
|
self.channel = 1 |
|
45 |
|
self.window = create_window(window_size, self.channel) |
|
46 |
|
|
|
47 |
|
def forward(self, img1, img2): |
|
48 |
|
(_, channel, _, _) = img1.size() |
|
49 |
|
|
|
50 |
|
if channel == self.channel and self.window.data.type() == img1.data.type(): |
|
51 |
|
window = self.window |
|
52 |
|
else: |
|
53 |
|
window = create_window(self.window_size, channel) |
|
54 |
|
|
|
55 |
|
if img1.is_cuda: |
|
56 |
|
window = window.cuda(img1.get_device()) |
|
57 |
|
window = window.type_as(img1) |
|
58 |
|
|
|
59 |
|
self.window = window |
|
60 |
|
self.channel = channel |
|
61 |
|
|
|
62 |
|
|
|
63 |
|
return _ssim(img1, img2, window, self.window_size, channel, self.size_average) |
|
64 |
|
|
|
65 |
|
def ssim(img1, img2, window_size = 11, size_average = True): |
|
66 |
|
(_, channel, _, _) = img1.size() |
|
67 |
|
window = create_window(window_size, channel) |
|
68 |
|
|
|
69 |
|
if img1.is_cuda: |
|
70 |
|
window = window.cuda(img1.get_device()) |
|
71 |
|
window = window.type_as(img1) |
|
72 |
|
|
|
73 |
|
return _ssim(img1, img2, window, window_size, channel, size_average) |