/main_pacnn.py (cb90faba0bd4a45f2606a1e60975ed05bfacdb07) (15300 bytes) (mode 100644) (type blob)

from comet_ml import Experiment
from args_util import real_args_parse
from data_flow import get_train_val_list, get_dataloader, create_training_image_list
from crowd_counting_error_metrics import CrowdCountingMeanAbsoluteError, CrowdCountingMeanSquaredError
import torch
from torch import nn
import torch.nn.functional as F
from models import CSRNet, PACNN, PACNNWithPerspectiveMap
import os
import cv2
from torchvision import datasets, transforms
from data_flow import ListDataset
import pytorch_ssim
from time import time
from evaluator import MAECalculator

from model_util import save_checkpoint

# import apex
# from apex import amp

if __name__ == "__main__":
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Add the following code anywhere in your machine learning file
    experiment = Experiment(api_key="S3mM1eMq6NumMxk2QJAXASkUM",
                            project_name="pacnn-dev2", workspace="ttpro1995")

    args = real_args_parse()
    print(device)
    print(args)



    MODEL_SAVE_NAME = args.task_id
    TEST = args.test
    MODEL_SAVE_INTERVAL = 5
    DATA_PATH = args.input
    TOTAL_EPOCH = args.epochs
    PACNN_PERSPECTIVE_AWARE_MODEL = args.PACNN_PERSPECTIVE_AWARE_MODEL
    PACNN_MUTILPLE_SCALE_LOSS = args.PACNN_MUTILPLE_SCALE_LOSS
    DATASET_NAME = "shanghaitech_pacnn"

    if PACNN_PERSPECTIVE_AWARE_MODEL:
        DATASET_NAME = "shanghaitech_pacnn_with_perspective"

    experiment.set_name(args.task_id)
    experiment.log_parameter("DATA_PATH", DATA_PATH)
    experiment.log_parameter("PACNN_PERSPECTIVE_AWARE_MODEL", PACNN_PERSPECTIVE_AWARE_MODEL)
    experiment.log_parameter("PACNN_MUTILPLE_SCALE_LOSS", PACNN_MUTILPLE_SCALE_LOSS)
    experiment.log_parameter("train", "train without p")
    experiment.log_parameter("momentum", args.momentum)
    experiment.log_parameter("lr", args.lr)

    # create list
    if  "shanghaitech" in DATASET_NAME:
        TRAIN_PATH = os.path.join(DATA_PATH, "train_data")
        TEST_PATH = os.path.join(DATA_PATH, "test_data")
        train_list, val_list = get_train_val_list(TRAIN_PATH)
        test_list = create_training_image_list(TEST_PATH)
    elif "ucf_cc_50" in DATASET_NAME:
        train_list, val_list = get_train_val_list(DATA_PATH, test_size=0.2)
        test_list = None

    # create data loader
    train_loader, val_loader, test_loader = get_dataloader(train_list, val_list, test_list, dataset_name="ucf_cc_50")
    train_loader_pacnn = torch.utils.data.DataLoader(
        ListDataset(train_list,
                    shuffle=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                                    std=[0.229, 0.224, 0.225]),
                    ]),
                    train=True,
                    batch_size=1,
                    num_workers=4, dataset_name=DATASET_NAME),
        batch_size=1, num_workers=4)

    val_loader_pacnn = torch.utils.data.DataLoader(
        ListDataset(val_list,
                    shuffle=False,
                    transform=transforms.Compose([
                        transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                                    std=[0.229, 0.224, 0.225]),
                    ]),
                    train=False,
                    batch_size=1,
                    num_workers=4, dataset_name="shanghaitech_pacnn"),
        batch_size=1, num_workers=4)

    test_loader_pacnn = torch.utils.data.DataLoader(
        ListDataset(test_list,
                    shuffle=False,
                    transform=transforms.Compose([
                        transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                                    std=[0.229, 0.224, 0.225]),
                    ]),
                    train=False,
                    batch_size=1,
                    num_workers=4, dataset_name="shanghaitech_pacnn"),
        batch_size=1, num_workers=4)

    # create model
    net = PACNNWithPerspectiveMap(perspective_aware_mode=PACNN_PERSPECTIVE_AWARE_MODEL).to(device)
    criterion_mse = nn.MSELoss(size_average=False).to(device)
    criterion_ssim = pytorch_ssim.SSIM(window_size=5).to(device)

    optimizer = torch.optim.SGD(net.parameters(), args.lr,
                                momentum=args.momentum,
                                weight_decay=args.decay)
    # Allow Amp to perform casts as required by the opt_level
    # net, optimizer = amp.initialize(net, optimizer, opt_level="O1", enabled=False)

    current_save_model_name = ""
    current_epoch = 0

    # load model
    load_model = args.load_model
    if len(load_model) > 0:
        checkpoint = torch.load(load_model)
        net.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        current_epoch = checkpoint['e']
        print("load ", load_model, "  epoch ", str(current_epoch))
    else:
        print("new model")

    if TEST:
        print("test model")
        mae_calculator_d1 = MAECalculator()
        mae_calculator_d2 = MAECalculator()
        mae_calculator_d3 = MAECalculator()
        mae_calculator_final = MAECalculator()
        with torch.no_grad():
            for val_img, label in test_loader_pacnn:
                net.eval()
                # load data
                d1_label, d2_label, d3_label = label

                # forward pass
                d1, d2, d3, p_s, p, d = net(val_img.to(device))

                d1_label = d1_label.to(device)
                d2_label = d2_label.to(device)
                d3_label = d3_label.to(device)

                # score
                mae_calculator_d1.eval(d1.cpu().detach().numpy(), d1_label.cpu().detach().numpy())
                mae_calculator_d2.eval(d2.cpu().detach().numpy(), d2_label.cpu().detach().numpy())
                mae_calculator_d3.eval(d3.cpu().detach().numpy(), d3_label.cpu().detach().numpy())
                mae_calculator_final.eval(d.cpu().detach().numpy(), d1_label.cpu().detach().numpy())
            print("count ", mae_calculator_d1.count)
            print("d1_val ", mae_calculator_d1.get_mae())
            print("d2_val ", mae_calculator_d2.get_mae())
            print("d3_val ", mae_calculator_d3.get_mae())
            print("dfinal_val ", mae_calculator_final.get_mae())
            experiment.log_metric("d1_val", mae_calculator_d1.get_mae())
            experiment.log_metric("d2_val", mae_calculator_d2.get_mae())
            experiment.log_metric("d3_val", mae_calculator_d3.get_mae())
            experiment.log_metric("dfinal_val", mae_calculator_final.get_mae())
        exit()

    while current_epoch < TOTAL_EPOCH:
        experiment.log_current_epoch(current_epoch)
        current_epoch += 1
        print("start epoch ", current_epoch)
        loss_sum = 0
        sample = 0
        start_time = time()
        counting = 0
        for train_img, label in train_loader_pacnn:
            net.train()
            # zero the parameter gradients
            optimizer.zero_grad()

            # load data
            if PACNN_PERSPECTIVE_AWARE_MODEL:
                d1_label, d2_label, d3_label, perspective_s, perspective_p = label
                perspective_s = perspective_s.to(device).unsqueeze(0)
                perspective_p = perspective_p.to(device).unsqueeze(0)
            else:
                d1_label, d2_label, d3_label = label
            d1_label = d1_label.to(device).unsqueeze(0)
            d2_label = d2_label.to(device).unsqueeze(0)
            d3_label = d3_label.to(device).unsqueeze(0)

            # forward pass

            d1, d2, d3, p_s, p, d = net(train_img.to(device))
            loss_d = criterion_mse(d, d1_label) + criterion_ssim(d, d1_label)
            loss = loss_d

            if PACNN_MUTILPLE_SCALE_LOSS:
                loss_1 = criterion_mse(d1, d1_label) + criterion_ssim(d1, d1_label)
                loss_2 = criterion_mse(d2, d2_label) + criterion_ssim(d2, d2_label)
                loss_3 = criterion_mse(d3, d3_label) + criterion_ssim(d3, d3_label)
                loss += loss_1 + loss_2 + loss_3


            if PACNN_PERSPECTIVE_AWARE_MODEL:
                # TODO: loss for perspective map here
                pad_p_0 = perspective_p.size()[2] - p.size()[2]
                pad_p_1 = perspective_p.size()[3] - p.size()[3]
                if pad_p_0 == 0:
                    pad_p_0 = -perspective_p.size()[2]
                if pad_p_1 == 0:
                    pad_p_1 = -perspective_p.size()[3]

                # p_pad = F.pad(p, (0, pad_p_1, 0, pad_p_0), mode='replicate')
                perspective_p_pad = perspective_p[:,:, 0:-pad_p_0, 0:-pad_p_1]

                # print(p.shape)
                # print(perspective_p.shape)
                # print(pad_p_0, pad_p_1)
                # print(perspective_p_pad.shape)
                loss_p = criterion_mse(p, perspective_p_pad) + criterion_ssim(p, perspective_p_pad)

                loss += loss_p
                if PACNN_MUTILPLE_SCALE_LOSS:
                    pad_s_0 = perspective_s.size()[2] - p_s.size()[2]
                    pad_s_1 = perspective_s.size()[3] - p_s.size()[3]
                    # p_s_pad = F.pad(perspective_s, (0, pad_s_1, 0, pad_s_0),
                    #                          mode='replicate')

                    if pad_s_0 == 0:
                        pad_s_0 = -perspective_s.size()[2]
                    if pad_s_1 == 0:
                        pad_s_1 = -perspective_s.size()[3]

                    perspective_s_pad = perspective_s[:,:, 0:-pad_s_0, 0:-pad_s_1]

                    loss_p_s = criterion_mse(p_s, perspective_s_pad) + criterion_ssim(p_s, perspective_s_pad)
                    loss += loss_p_s

            # what is this, loss_d count 2 ?
            ## loss_d = criterion_mse(d, d1_label) + criterion_ssim(d, d1_label)
            ## loss += loss_d

            # with amp.scale_loss(loss, optimizer) as scaled_loss:
            #     scaled_loss.backward()

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            loss_sum += loss.item()
            sample += 1
            counting += 1

            if counting % 100 == 0:
                avg_loss_ministep = loss_sum/sample
                print("counting ", counting, " -- avg loss ", avg_loss_ministep)
                experiment.log_metric("avg_loss_ministep", avg_loss_ministep)
            # if counting == 100:
            #     break
            # end dataloader loop

        end_time = time()
        avg_loss = loss_sum/sample
        epoch_time = end_time - start_time
        print("==END epoch ", current_epoch, " =============================================")
        print(epoch_time, avg_loss, sample)
        experiment.log_metric("epoch_time", epoch_time)
        experiment.log_metric("avg_loss_epoch", avg_loss)
        print("=================================================================")

        if current_epoch % MODEL_SAVE_INTERVAL == 0:
            current_save_model_name = save_checkpoint({
                    'model': net.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'e': current_epoch,
                    'PACNN_PERSPECTIVE_AWARE_MODEL': PACNN_PERSPECTIVE_AWARE_MODEL
                    # 'amp': amp.state_dict()
            }, False, MODEL_SAVE_NAME+"_"+str(current_epoch)+"_")
            experiment.log_asset(current_save_model_name)
            print("saved ", current_save_model_name)

        # end 1 epoch

        # after epoch evaluate
        mae_calculator_d1 = MAECalculator()
        mae_calculator_d2 = MAECalculator()
        mae_calculator_d3 = MAECalculator()
        mae_calculator_final = MAECalculator()
        with torch.no_grad():
            for val_img, label in val_loader_pacnn:
                net.eval()
                # load data
                d1_label, d2_label, d3_label = label

                # forward pass
                d1, d2, d3, p_s, p, d = net(val_img.to(device))

                d1_label = d1_label.to(device)
                d2_label = d2_label.to(device)
                d3_label = d3_label.to(device)

                # score
                mae_calculator_d1.eval(d1.cpu().detach().numpy(), d1_label.cpu().detach().numpy())
                mae_calculator_d2.eval(d2.cpu().detach().numpy(), d2_label.cpu().detach().numpy())
                mae_calculator_d3.eval(d3.cpu().detach().numpy(), d3_label.cpu().detach().numpy())
                mae_calculator_final.eval(d.cpu().detach().numpy(), d1_label.cpu().detach().numpy())
            print("count ", mae_calculator_d1.count)
            print("d1_val ", mae_calculator_d1.get_mae())
            print("d2_val ", mae_calculator_d2.get_mae())
            print("d3_val ", mae_calculator_d3.get_mae())
            print("dfinal_val ", mae_calculator_final.get_mae())
            experiment.log_metric("d1_val", mae_calculator_d1.get_mae())
            experiment.log_metric("d2_val", mae_calculator_d2.get_mae())
            experiment.log_metric("d3_val", mae_calculator_d3.get_mae())
            experiment.log_metric("dfinal_val", mae_calculator_final.get_mae())


    #############################################
    # done training evaluate
    net = PACNNWithPerspectiveMap(PACNN_PERSPECTIVE_AWARE_MODEL).to(device)
    print(net)

    best_checkpoint = torch.load(current_save_model_name)
    net.load_state_dict(best_checkpoint['model'])

    # device = "cpu"
    # TODO d1_val  155.97279205322266
    # d2_val  35.46327234903971
    # d3_val  23.07176342010498
    # why d2 and d3 mse too low
    mae_calculator_d1 = MAECalculator()
    mae_calculator_d2 = MAECalculator()
    mae_calculator_d3 = MAECalculator()
    mae_calculator_final = MAECalculator()
    with torch.no_grad():
        for val_img, label in val_loader_pacnn:
            net.eval()
            # load data
            d1_label, d2_label, d3_label = label

            # forward pass
            d1, d2, d3, p_s, p, d = net(val_img.to(device))

            d1_label = d1_label.to(device)
            d2_label = d2_label.to(device)
            d3_label = d3_label.to(device)

            # score
            mae_calculator_d1.eval(d1.cpu().detach().numpy(), d1_label.cpu().detach().numpy())
            mae_calculator_d2.eval(d2.cpu().detach().numpy(), d2_label.cpu().detach().numpy())
            mae_calculator_d3.eval(d3.cpu().detach().numpy(), d3_label.cpu().detach().numpy())
            mae_calculator_final.eval(d.cpu().detach().numpy(), d1_label.cpu().detach().numpy())
        print("count ", mae_calculator_d1.count)
        print("d1_val ", mae_calculator_d1.get_mae())
        print("d2_val ", mae_calculator_d2.get_mae())
        print("d3_val ", mae_calculator_d3.get_mae())
        print("dfinal_val ", mae_calculator_final.get_mae())
        experiment.log_metric("d1_val", mae_calculator_d1.get_mae())
        experiment.log_metric("d2_val", mae_calculator_d2.get_mae())
        experiment.log_metric("d3_val", mae_calculator_d3.get_mae())
        experiment.log_metric("dfinal_val", mae_calculator_final.get_mae())




Mode Type Size Ref File
100644 blob 82 a70468201005e2eefb48e83a1056bd00c5c1e34a .gitignore
100644 blob 1342 f2eb3073ff4a8536cf4e8104ff942b525e3c7f34 .travis.yml
100644 blob 1255 1dfa426237bc174a2ba2186240191a6b7041bc86 README.md
100644 blob 9201 77806a96d77d0431140f344bfe59168acdbed2c1 args_util.py
040000 tree - 5e9d7f0e1fd3a9e4d5a37f3d6de0c3ecd3125af8 backup_notebook
040000 tree - 55d1d196f5b6ed4bfc1e8a715df1cfff1dd18117 bug
100644 blob 3591 7b4c18e8cf2c0417cd13d3f77ea0571c9e0e493f crowd_counting_error_metrics.py
100644 blob 58983 c4ebf965407cdd7badd0c5f2545dc61c93b10456 data_flow.py
040000 tree - 7b2560d2cb223bf0574eb278bafeda5a8577c7db data_util
040000 tree - f333f7adc5f35d8831851020a2775b5f5db2d829 dataset_script
040000 tree - d49265b4cd60b8c94b247123a3bd1376de58fdd3 debug
040000 tree - 9862b9cbc6e7a1d43565f12d85d9b17d1bf1814e env_file
100644 blob 4460 9b254c348a3453f4df2c3ccbf21fb175a16852de eval_context_aware_network.py
100644 blob 428 35cc7bfe48a4ed8dc56635fd3a6763612d8af771 evaluator.py
100644 blob 17317 917f86e294452e11c5a67edc9b622f9634f8c6ed experiment_main.py
100644 blob 8876 049432d6bde50245a4acba4e116d59605b5b6315 experiment_meow_main.py
100644 blob 1916 1d228fa4fa2887927db069f0c93c61a920279d1f explore_model_summary.py
100644 blob 2718 b09b84e8b761137654ba6904669799c4866554b3 hard_code_variable.py
040000 tree - b3aa858a157f5e1e22c00fdb6f9dd071f4c6c163 local_train_script
040000 tree - 927d159228536a86499de8a294700f8599b8a60b logs
100644 blob 15300 cb90faba0bd4a45f2606a1e60975ed05bfacdb07 main_pacnn.py
100644 blob 2760 3c2d5ba1c81ef2770ad216c566e268f4ece17262 main_shanghaitech.py
100644 blob 2683 29189260c1a2c03c8e59cd0b4bd61df19d5ce098 main_ucfcc50.py
100644 blob 2794 f37b3bb572c53dd942c51243bd5b0853228c6ddb model_util.py
040000 tree - 3e68f1cb103228fc5e5d22db43874f853152bb39 models
100644 blob 870 8f5ce4f7e0b168add5ff2a363faa973a5b56ca48 mse_l1_loss.py
100644 blob 1066 811554259182e63240d7aa9406f315377b3be1ac mse_ssim_loss.py
040000 tree - 287a5c66239c55dc71808ef91f6c442552394c4a playground
040000 tree - c7c295e9e418154ae7c754dc888a77df8f50aa61 pytorch_ssim
100644 blob 1727 1cd14cbff636cb6145c8bacf013e97eb3f7ed578 sanity_check_dataloader.py
040000 tree - a1e8ea43eba8a949288a00fff12974aec8692003 saved_model_best
100644 blob 3525 27067234ad3deddd743dcab0d7b3ba4812902656 train_attn_can_adcrowdnet.py
100644 blob 3488 e47bfc7e91c46ca3c61be0c5258302de4730b06d train_attn_can_adcrowdnet_freeze_vgg.py
100644 blob 5352 3ee3269d6fcc7408901af46bed52b1d86ee9818c train_attn_can_adcrowdnet_simple.py
100644 blob 5728 90b846b68f15bdc58e3fd60b41aa4b5d82864ec4 train_attn_can_adcrowdnet_simple_lrscheduler.py
100644 blob 9081 664051f8838434c386e34e6dd6e6bca862cb3ccd train_compact_cnn.py
100644 blob 5702 fdec7cd1ee062aa4a2182a91e2fb1bd0db3ab35f train_compact_cnn_lrscheduler.py
100644 blob 5611 2a241c876015db34681d73ce534221de482b0b90 train_compact_cnn_sgd.py
100644 blob 3525 eb52f7a4462687c9b2bf1c3a887014c4afefa26d train_context_aware_network.py
100644 blob 5651 48631e36a1fdc063a6d54d9206d2fd45521d8dc8 train_custom_compact_cnn.py
100644 blob 5594 07d6c9c056db36082545b5b60b1c00d9d9f6396d train_custom_compact_cnn_lrscheduler.py
100644 blob 5281 8a92eb87b54f71ad2a799a7e05020344a22e22d3 train_custom_compact_cnn_sgd.py
040000 tree - 7cfa2fcf8b2fa8bfc6d09a63fe2cfb28b7ab7b2a train_script
100644 blob 6595 5b8afd4fb322dd7cbffd1a589ff5276b0e3edeb5 visualize_data_loader.py
100644 blob 1146 1b0f845587f0f37166d44fa0c74b51f89cf8b349 visualize_util.py
Hints:
Before first commit, do not forget to setup your git environment:
git config --global user.name "your_name_here"
git config --global user.email "your@email_here"

Clone this repository using HTTP(S):
git clone https://rocketgit.com/user/hahattpro/crowd_counting_framework

Clone this repository using ssh (do not forget to upload a key first):
git clone ssh://rocketgit@ssh.rocketgit.com/user/hahattpro/crowd_counting_framework

Clone this repository using git:
git clone git://git.rocketgit.com/user/hahattpro/crowd_counting_framework

You are allowed to anonymously push to this repository.
This means that your pushed commits will automatically be transformed into a merge request:
... clone the repository ...
... make some changes and some commits ...
git push origin main