List of commits:
Subject Hash Author Date (UTC)
best score checkpoint , timer bb26cec915aa04d68a0dd00911c273542f9b34b5 Thai Thien 2020-04-23 11:16:24
m4 t2 a02b9610e868f4ba5e64496dc0c861a269f4cb9f Thai Thien 2020-04-17 16:01:39
fix url 358f164d558dab393f65c0829d8d9c37b1437ff3 Thai Thien 2020-04-16 14:32:49
increase epoch 03be68a9e02df1ffa245394ea3096990e8f9d44b Thai Thien 2020-04-16 14:30:15
add load model 044a398d62add2e854b79b0b3c48c961a4a20bb0 Thai Thien 2020-04-16 14:27:43
M4 c960a8e3ddbfb7fc57f3f843fa4184c063cf8cdb Thai Thien 2020-04-16 14:22:37
typo again 3dbe3ce4634b8d4ca30b012851c5b9690b1d88d7 Thai Thien 2020-04-13 15:49:23
typo 9b9a84ed5bfffc6e8979fe8b9aa2d6411bfd70c2 Thai Thien 2020-04-13 15:47:51
typo 69cdd6f3037ef0357783ad4c3f8cdf8de2258c3b Thai Thien 2020-04-13 15:38:14
small tall on split branch 36c80eee740df7449c112f4dd4925e0ffbc7ac5a Thai Thien 2020-04-13 15:23:33
fix load model 68c03563a5aa3acda165eae104ff4d2df83201b2 Thai Thien 2020-04-09 17:30:14
get lr and weight_decay b43666da2cb8bb5710f30d0f3bfd2c3b1e7b6473 Thai Thien 2020-04-09 17:26:13
1201 epoch (because cur epoch continue when load) ee303fe3945fcc6cc3c26601039de77a58fa3d60 Thai Thien 2020-04-09 17:23:18
shb load model 062126f959c021577dbf08224aeb442ca308587c Thai Thien 2020-04-09 17:20:27
4 2f49bfa380e997c177af1de34c8d6882ed7099e9 Thai Thien 2020-04-09 17:06:28
11 331644c623b1bf5a34c4db432e1526f6bc34a398 Thai Thien 2020-04-09 17:04:49
batchsize 24 f6aeba845ee1915cb0aeb8fce298e56a6ba40b3a Thai Thien 2020-04-09 16:46:25
t10 91d9e83c80a6a533535fd91278b9175c839c9715 Thai Thien 2020-04-09 16:43:38
t9 927b97f2f285000ce7b407496c52da2c61539cb8 Thai Thien 2020-04-09 16:37:39
minor bugfix cbab75b39a2d3495c1d07a6f5c127ccc6e7cfbf5 Thai Thien 2020-04-09 16:15:24
Commit bb26cec915aa04d68a0dd00911c273542f9b34b5 - best score checkpoint , timer
Author: Thai Thien
Author date (UTC): 2020-04-23 11:16
Committer name: Thai Thien
Committer date (UTC): 2020-04-23 11:16
Parent(s): a02b9610e868f4ba5e64496dc0c861a269f4cb9f
Signing key:
Tree: 7aa4fca70178fd2903aaa64dfdbd4e6e75ed7b8d
File Lines added Lines deleted
experiment_meow_main.py 40 2
File experiment_meow_main.py changed (mode: 100644) (index 84b1083..4441b69)
... ... from args_util import meow_parse
4 4 from data_flow import get_dataloader, create_image_list from data_flow import get_dataloader, create_image_list
5 5 from ignite.engine import Events, create_supervised_trainer, create_supervised_evaluator from ignite.engine import Events, create_supervised_trainer, create_supervised_evaluator
6 6 from ignite.metrics import Loss from ignite.metrics import Loss
7 from ignite.handlers import Checkpoint, DiskSaver
7 from ignite.handlers import Checkpoint, DiskSaver, Timer
8 8 from crowd_counting_error_metrics import CrowdCountingMeanAbsoluteError, CrowdCountingMeanSquaredError from crowd_counting_error_metrics import CrowdCountingMeanAbsoluteError, CrowdCountingMeanSquaredError
9 9 from visualize_util import get_readable_time from visualize_util import get_readable_time
10 10
 
... ... if __name__ == "__main__":
102 102
103 103 print(args) print(args)
104 104
105
106 # timer
107 train_timer = Timer() # time to train whole epoch
108 batch_timer = Timer(average=True) # every batch
109 evaluate_timer = Timer()
110
111 batch_timer.attach(trainer,
112 start =Events.EPOCH_STARTED,
113 resume =Events.ITERATION_STARTED,
114 pause =Events.ITERATION_COMPLETED,
115 step =Events.ITERATION_COMPLETED)
116
117 train_timer.attach(trainer,
118 start =Events.EPOCH_STARTED,
119 resume =Events.EPOCH_STARTED,
120 pause =Events.EPOCH_COMPLETED,
121 step =Events.EPOCH_COMPLETED)
122
105 123 if len(args.load_model) > 0: if len(args.load_model) > 0:
106 124 load_model_path = args.load_model load_model_path = args.load_model
107 125 print("load mode " + load_model_path) print("load mode " + load_model_path)
 
... ... if __name__ == "__main__":
116 134 print("do not load, keep training") print("do not load, keep training")
117 135
118 136
119 @trainer.on(Events.ITERATION_COMPLETED(every=50))
137 @trainer.on(Events.ITERATION_COMPLETED(every=100))
120 138 def log_training_loss(trainer): def log_training_loss(trainer):
121 139 timestamp = get_readable_time() timestamp = get_readable_time()
122 140 print(timestamp + " Epoch[{}] Loss: {:.2f}".format(trainer.state.epoch, trainer.state.output)) print(timestamp + " Epoch[{}] Loss: {:.2f}".format(trainer.state.epoch, trainer.state.output))
 
... ... if __name__ == "__main__":
135 153 experiment.log_metric("train_loss", metrics['loss']) experiment.log_metric("train_loss", metrics['loss'])
136 154 experiment.log_metric("lr", get_lr(optimizer)) experiment.log_metric("lr", get_lr(optimizer))
137 155
156 experiment.log_metric("batch_timer", batch_timer.value())
157 experiment.log_metric("train_timer", train_timer.value())
158
138 159 @trainer.on(Events.EPOCH_COMPLETED) @trainer.on(Events.EPOCH_COMPLETED)
139 160 def log_validation_results(trainer): def log_validation_results(trainer):
161 evaluate_timer.resume()
140 162 evaluator.run(test_loader) evaluator.run(test_loader)
163 evaluate_timer.pause()
164 evaluate_timer.step()
165
141 166 metrics = evaluator.state.metrics metrics = evaluator.state.metrics
142 167 timestamp = get_readable_time() timestamp = get_readable_time()
143 168 print(timestamp + " Validation set Results - Epoch: {} Avg mae: {:.2f} Avg mse: {:.2f} Avg loss: {:.2f}" print(timestamp + " Validation set Results - Epoch: {} Avg mae: {:.2f} Avg mse: {:.2f} Avg loss: {:.2f}"
 
... ... if __name__ == "__main__":
146 171 experiment.log_metric("valid_mse", metrics['mse']) experiment.log_metric("valid_mse", metrics['mse'])
147 172 experiment.log_metric("valid_loss", metrics['loss']) experiment.log_metric("valid_loss", metrics['loss'])
148 173
174 # timer
175 experiment.log_metric("evaluate_timer", evaluate_timer.value())
176
177 def checkpoint_valid_mae_score_function(engine):
178 score = engine.state.metrics['valid_mae']
179 return score
180
149 181
150 182 # docs on save and load # docs on save and load
151 183 to_save = {'trainer': trainer, 'model': model, 'optimizer': optimizer} to_save = {'trainer': trainer, 'model': model, 'optimizer': optimizer}
 
... ... if __name__ == "__main__":
153 185 filename_prefix=args.task_id, filename_prefix=args.task_id,
154 186 n_saved=5) n_saved=5)
155 187
188 save_handler_best = Checkpoint(to_save, DiskSaver('saved_model_best/' + args.task_id, create_dir=True, atomic=True),
189 filename_prefix=args.task_id, score_name="valid_mae", score_function=checkpoint_valid_mae_score_function,
190 n_saved=5)
191
156 192 trainer.add_event_handler(Events.EPOCH_COMPLETED(every=5), save_handler) trainer.add_event_handler(Events.EPOCH_COMPLETED(every=5), save_handler)
193 trainer.add_event_handler(Events.EPOCH_COMPLETED(every=1), save_handler_best)
194
157 195
158 196 trainer.run(train_loader, max_epochs=args.epochs) trainer.run(train_loader, max_epochs=args.epochs)
Hints:
Before first commit, do not forget to setup your git environment:
git config --global user.name "your_name_here"
git config --global user.email "your@email_here"

Clone this repository using HTTP(S):
git clone https://rocketgit.com/user/hahattpro/crowd_counting_framework

Clone this repository using ssh (do not forget to upload a key first):
git clone ssh://rocketgit@ssh.rocketgit.com/user/hahattpro/crowd_counting_framework

Clone this repository using git:
git clone git://git.rocketgit.com/user/hahattpro/crowd_counting_framework

You are allowed to anonymously push to this repository.
This means that your pushed commits will automatically be transformed into a merge request:
... clone the repository ...
... make some changes and some commits ...
git push origin main