List of commits:
Subject Hash Author Date (UTC)
apex 62e72f69045f89359064cdd988e909a2d19553ea Thai Thien 2019-10-02 17:43:53
update script, debug ddb68b95389be1c1d398118677dd227a8bb2b70b Thai Thien 2019-10-02 15:52:31
add d (output density map) to loss function) a0c71bf4bf2ab7393d60b06a84db8dfbbfb1a6c2 tthien 2019-09-30 16:32:39
fix the args, add save interval for model, so we don't save them all 9fdf9daa2ac4bd12b7b62521d81e520db0debd01 tthien 2019-09-30 16:30:00
meow 1ad19a22a310992e27a26471feeb37375124d075 tthien 2019-09-29 18:25:43
fix pacnn perspective map 453ece3ccb818889ba895bfc4285f7905d33cba5 Thai Thien 2019-09-25 17:20:33
apex not work so well da8c0dd57297f972201f31d57e66897177922f48 Thai Thien 2019-09-24 17:25:59
fix data loader pacnn so it will scale up with correct number of people 11d55b50d764511f2491291f0208fee0905dec49 Thai Thien 2019-09-24 15:40:56
add comet ml a9d4b89ce594f5e241168ccafdcdf0f150ea0ebb Thai Thien 2019-09-23 17:07:58
fix pacnn avg schema c2140a96886195782e5689c24aeeb4fe7a2db7ad Thai Thien 2019-09-22 17:35:01
debug number not divisible by 8 a568fd7f294a8bd31b3db78437b4b6b51b5b41b9 Thai Thien 2019-09-22 04:36:06
pacnn 967074890d14ab0eefc277801860270a468e8f9f Thai Thien 2019-09-22 03:54:48
wip: pacnn 2192d7c7b449fecf3868877d9cfbc09bb6f7ae98 Thai Thien 2019-09-22 03:44:56
wip: pacnn 37620e5a9bc0f9516ea964ec58d9bdaa1c40ff36 Thai Thien 2019-09-22 03:14:42
fix training flow 2b87b1b26c7296b64493fdc49fedb421b249dfa3 Thai Thien 2019-09-17 18:00:35
dataset script bc5c052f5f956510ab95ef9a45434fd486c57fae Thai Thien 2019-09-16 17:21:13
evaluator ffc5bf8290ae0c469a9a18a2d061cfd1bfeee822 Thai Thien 2019-09-14 04:56:35
some more test for data loader 25173578cde7d4e9fe6c6140d1ee01caa4fcfc32 Thai Thien 2019-09-14 02:51:58
some visualize to debug data loader e4f52007616acf307bddbde79c0fb4f8c649c785 Thai Thien 2019-09-13 17:35:45
wip d7d44cad6774355bdfa45414258763f6c6a0c299 Thai Thien 2019-08-31 16:58:16
Commit 62e72f69045f89359064cdd988e909a2d19553ea - apex
Author: Thai Thien
Author date (UTC): 2019-10-02 17:43
Committer name: Thai Thien
Committer date (UTC): 2019-10-02 17:43
Parent(s): ddb68b95389be1c1d398118677dd227a8bb2b70b
Signing key:
Tree: 66740c5bed6e99eeca8fcfccad6e71dda13edc13
File Lines added Lines deleted
main_pacnn.py 35 12
train_script/train_pacnn_shanghaitechA_apex_amp.sh 7 0
File main_pacnn.py changed (mode: 100644) (index 24d3d1c..2abdbad)
... ... from data_flow import ListDataset
13 13 import pytorch_ssim import pytorch_ssim
14 14 from time import time from time import time
15 15 from evaluator import MAECalculator from evaluator import MAECalculator
16
16 import torch.backends.cudnn as cudnn
17 17 from model_util import save_checkpoint from model_util import save_checkpoint
18 18
19 # import apex
20 # from apex import amp
19 import apex
20 from apex import amp
21 21
22 22 if __name__ == "__main__": if __name__ == "__main__":
23 23 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
... ... if __name__ == "__main__":
38 38 TOTAL_EPOCH = args.epochs TOTAL_EPOCH = args.epochs
39 39 PACNN_PERSPECTIVE_AWARE_MODEL = args.PACNN_PERSPECTIVE_AWARE_MODEL PACNN_PERSPECTIVE_AWARE_MODEL = args.PACNN_PERSPECTIVE_AWARE_MODEL
40 40
41 APEX_AMP = True
42 if APEX_AMP:
43 cudnn.benchmark = True
44 assert torch.backends.cudnn.enabled, "Amp requires cudnn backend to be enabled."
45
41 46 experiment.set_name(args.task_id) experiment.set_name(args.task_id)
42 47 experiment.log_parameter("DATA_PATH", DATA_PATH) experiment.log_parameter("DATA_PATH", DATA_PATH)
43 48 experiment.log_parameter("PACNN_PERSPECTIVE_AWARE_MODEL", PACNN_PERSPECTIVE_AWARE_MODEL) experiment.log_parameter("PACNN_PERSPECTIVE_AWARE_MODEL", PACNN_PERSPECTIVE_AWARE_MODEL)
 
... ... if __name__ == "__main__":
89 94 optimizer = torch.optim.SGD(net.parameters(), args.lr, optimizer = torch.optim.SGD(net.parameters(), args.lr,
90 95 momentum=args.momentum, momentum=args.momentum,
91 96 weight_decay=args.decay) weight_decay=args.decay)
92 # Allow Amp to perform casts as required by the opt_level
93 # net, optimizer = amp.initialize(net, optimizer, opt_level="O1", enabled=False)
97
98 if APEX_AMP:
99 # Allow Amp to perform casts as required by the opt_level
100 net, optimizer = amp.initialize(net, optimizer, opt_level="O3", enabled=APEX_AMP,
101 keep_batchnorm_fp32=True)
102 # loss_scale="dynamic"
103
104
94 105
95 106 current_save_model_name = "" current_save_model_name = ""
96 107 current_epoch = 0 current_epoch = 0
 
... ... if __name__ == "__main__":
102 113 net.load_state_dict(checkpoint['model']) net.load_state_dict(checkpoint['model'])
103 114 optimizer.load_state_dict(checkpoint['optimizer']) optimizer.load_state_dict(checkpoint['optimizer'])
104 115 current_epoch = checkpoint['e'] current_epoch = checkpoint['e']
116 if APEX_AMP:
117 amp.load_state_dict(checkpoint['amp'])
105 118 print("load ", load_model, " epoch ", str(current_epoch)) print("load ", load_model, " epoch ", str(current_epoch))
106 119 else: else:
107 120 print("new model") print("new model")
 
... ... if __name__ == "__main__":
139 152 pass pass
140 153 loss_d = criterion_mse(d, d1_label) + criterion_ssim(d, d1_label) loss_d = criterion_mse(d, d1_label) + criterion_ssim(d, d1_label)
141 154 loss += loss_d loss += loss_d
142 loss.backward()
143 # with amp.scale_loss(loss, optimizer) as scaled_loss:
144 # scaled_loss.backward()
145 optimizer.step()
146 optimizer.zero_grad()
155
156 optimizer.zero_grad() # make optimizer grad = 0
157
158 if APEX_AMP:
159 with amp.scale_loss(loss, optimizer) as scaled_loss:
160 scaled_loss.backward()
161 else:
162 loss.backward() # calculate grad for optimizer
163
164 optimizer.step() # optimize param
165
147 166 loss_sum += loss.item() loss_sum += loss.item()
148 167 sample += 1 sample += 1
149 168 counting += 1 counting += 1
 
... ... if __name__ == "__main__":
166 185 print("=================================================================") print("=================================================================")
167 186
168 187 if current_epoch % MODEL_SAVE_INTERVAL == 0: if current_epoch % MODEL_SAVE_INTERVAL == 0:
188 amp_state_dict = None
189 if APEX_AMP:
190 amp_state_dict = amp.state_dict()
191
169 192 current_save_model_name = save_checkpoint({ current_save_model_name = save_checkpoint({
170 193 'model': net.state_dict(), 'model': net.state_dict(),
171 194 'optimizer': optimizer.state_dict(), 'optimizer': optimizer.state_dict(),
172 195 'e': current_epoch, 'e': current_epoch,
173 'PACNN_PERSPECTIVE_AWARE_MODEL': PACNN_PERSPECTIVE_AWARE_MODEL
174 # 'amp': amp.state_dict()
196 'PACNN_PERSPECTIVE_AWARE_MODEL': PACNN_PERSPECTIVE_AWARE_MODEL,
197 'amp': amp_state_dict # amp.state_dict()
175 198 }, False, MODEL_SAVE_NAME+"_"+str(current_epoch)+"_") }, False, MODEL_SAVE_NAME+"_"+str(current_epoch)+"_")
176 199 experiment.log_asset(current_save_model_name) experiment.log_asset(current_save_model_name)
177 200 print("saved ", current_save_model_name) print("saved ", current_save_model_name)
File train_script/train_pacnn_shanghaitechA_apex_amp.sh added (mode: 100644) (index 0000000..41b2486)
1 python main_pacnn.py \
2 --input data/ShanghaiTech/part_A \
3 --epochs 151 \
4 --lr 1e-8 \
5 --task_id train_apex_state1_attemp4
6
7 # --load_model saved_model/train_state1_attemp3_30_checkpoint.pth.tar \
Hints:
Before first commit, do not forget to setup your git environment:
git config --global user.name "your_name_here"
git config --global user.email "your@email_here"

Clone this repository using HTTP(S):
git clone https://rocketgit.com/user/hahattpro/crowd_counting_framework

Clone this repository using ssh (do not forget to upload a key first):
git clone ssh://rocketgit@ssh.rocketgit.com/user/hahattpro/crowd_counting_framework

Clone this repository using git:
git clone git://git.rocketgit.com/user/hahattpro/crowd_counting_framework

You are allowed to anonymously push to this repository.
This means that your pushed commits will automatically be transformed into a merge request:
... clone the repository ...
... make some changes and some commits ...
git push origin main