myPCL/main.py


								import builtins

								import math

								import os

								import random

								import shutil

								import time

								import warnings


								import torch

								import torch.nn as nn

								import torch.nn.parallel

								import torch.backends.cudnn as cudnn

								import torch.distributed as dist

								import torch.optim

								import torch.multiprocessing as mp

								import torch.utils.data

								import torch.utils.data.distributed

								import torchvision.transforms as transforms

								import torchvision.datasets as datasets

								import torchvision.models as models


								from scripts.parser import parser

								from scripts.meter import AverageMeter, ProgressMeter

								import scripts.augmentation as aug

								import scripts.momentum as momentum

								import scripts.clustering as clustering

								import pcl.builder

								import pcl.loader


								def main_loader():

								    args = parser().parse_args()


								    if args.seed is not None:

								        random.seed(args.seed)

								        torch.manual_seed(args.seed)

								        warnings.warn('You have chosen to seed training. '

								                      'This will turn on the CUDNN deterministic setting, '

								                      'which can slow down your training considerably! '

								                      'You may see unexpected behavior when restarting '

								                      'from checkpoints.')


								    if args.gpu is not None:

								        warnings.warn('You have chosen a specific GPU. This will completely '

								                      'disable data parallelism.')


								    if args.dist_url == "env://" and args.world_size == -1:

								        args.world_size = int(os.environ["WORLD_SIZE"])


								    args.distributed = args.world_size > 1 or args.multiprocessing_distributed


								    args.num_cluster = args.num_cluster.split(',')


								    if not os.path.exists(args.exp_dir):

								        os.mkdir(args.exp_dir)


								    ngpus_per_node = torch.cuda.device_count()

								    if args.multiprocessing_distributed:

								        # Since we have ngpus_per_node processes per node, the total world_size

								        # needs to be adjusted accordingly

								        args.world_size = ngpus_per_node * args.world_size

								        # Use torch.multiprocessing.spawn to launch distributed processes: the

								        # main_worker process function

								        mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args))

								    else:

								        # Simply call main_worker function

								        main_worker(args.gpu, ngpus_per_node, args)


								def main_worker(gpu, ngpus_per_node, args):

								    args.gpu = gpu


								    if args.gpu is not None:

								        print("Use GPU: {} for training".format(args.gpu))


								    # suppress printing if not master

								    if args.multiprocessing_distributed and args.gpu != 0:

								        def print_pass():

								            pass

								        builtins.print = print_pass


								    if args.distributed:

								        if args.dist_url == "env://" and args.rank == -1:

								            args.rank = int(os.environ["RANK"])

								        if args.multiprocessing_distributed:

								            # For multiprocessing distributed training, rank needs to be the

								            # global rank among all the processes

								            args.rank = args.rank * ngpus_per_node + gpu

								        dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,

								                                world_size=args.world_size, rank=args.rank)


								    # create model

								    print("=> create model '{}'".format(args.arch))

								    model = pcl.builder.MoCo(

								        models.__dict__[args.arch],

								        args.low_dim, args.pcl_r, args.moco_m, args.temperature, args.mlp)

								    # print(model)


								    if args.distributed:

								        # For multiprocessing distributed, DistributedDataParallel constructor

								        # should always set the single device scope, otherwise,

								        # DistributedDataParallel will use all available devices.

								        if args.gpu is not None:

								            torch.cuda.set_device(args.gpu)

								            model.cuda(args.gpu)

								            # When using a single GPU per process and per

								            # DistributedDataParallel, we need to divide the batch size

								            # ourselves based on the total number of GPUs we have

								            args.batch_size = int(args.batch_size / ngpus_per_node)

								            args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node)

								            model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])

								        else:

								            model.cuda()

								            # DistributedDataParallel will divide and allocate batch_size to all

								            # available GPUs if device_ids are not set

								            model = torch.nn.parallel.DistributedDataParallel(model)

								    elif args.gpu is not None:

								        torch.cuda.set_device(args.gpu)

								        model.cuda(args.gpu)

								        # comment out the following line for debugging

								        raise NotImplementedError("Only DistributedDataParallel is supported.")

								    else:

								        # AllGather implementation (batch shuffle, queue update, etc.) in

								        # this code only supports DistributedDataParallel.

								        raise NotImplementedError("Only DistributedDataParallel is supported.")


								    # define loss function (criterion) and optimizer

								    criterion = nn.CrossEntropyLoss().cuda(args.gpu)


								    optimizer = torch.optim.SGD(model.parameters(), args.lr,

								                                momentum=args.momentum,

								                                weight_decay=args.weight_decay)


								    # optionally resume from a checkpoint

								    if args.resume:

								        if os.path.isfile(args.resume):

								            print("=> loading checkpoint '{}'".format(args.resume))

								            if args.gpu is None:

								                checkpoint = torch.load(args.resume)

								            else:

								                # Map model to be loaded to specified single gpu.

								                loc = 'cuda:{}'.format(args.gpu)

								                checkpoint = torch.load(args.resume, map_location=loc)

								            args.start_epoch = checkpoint['epoch']

								            model.load_state_dict(checkpoint['state_dict'])

								            optimizer.load_state_dict(checkpoint['optimizer'])

								            print("=> loaded checkpoint '{}' (epoch {})"

								                  .format(args.resume, checkpoint['epoch']))

								        else:

								            print("=> no checkpoint found at '{}'".format(args.resume))


								    cudnn.benchmark = True

								    cudnn.deterministic = True


								    # Data loading code

								    pre_train_dir = os.path.join(args.data, 'train')

								    train_dir = os.path.join(args.data, 'train')


								    if args.aug_plus:

								        # MoCo v2's aug: similar to SimCLR https://arxiv.org/abs/2002.05709

								        augmentation = aug.moco_v2()

								    else:

								        # MoCo v1's aug: same as InstDisc https://arxiv.org/abs/1805.01978

								        augmentation = aug.moco_v1()


								    # center-crop augmentation

								    eval_augmentation = aug.moco_eval()


								    pre_train_dataset = pcl.loader.PreImager(pre_train_dir, eval_augmentation)


								    train_dataset = pcl.loader.ImageFolderInstance(

								        train_dir,

								        pcl.loader.TwoCropsTransform(eval_augmentation))

								    eval_dataset = pcl.loader.ImageFolderInstance(

								        train_dir,

								        eval_augmentation)


								    if args.distributed:

								        pre_train_sampler = torch.utils.data.distributed.DistributedSampler(pre_train_dataset)

								        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)

								        eval_sampler = torch.utils.data.distributed.DistributedSampler(eval_dataset, shuffle=False)

								    else:

								        pre_train_sampler = None

								        train_sampler = None

								        eval_sampler = None


								    if args.batch_size//pre_train_dataset.class_number < 2:

								        raise NotImplementedError("Batch size must above double number of classes.")


								    pre_train_loader = torch.utils.data.DataLoader(

								        pre_train_dataset,

								        batch_size=args.batch_size//pre_train_dataset.class_number,

								        shuffle=(pre_train_sampler is None),

								        num_workers=args.workers,

								        pin_memory=True,

								        sampler=pre_train_sampler,

								        drop_last=True)


								    train_loader = torch.utils.data.DataLoader(

								        train_dataset, batch_size=args.batch_size//2, shuffle=(train_sampler is None),

								        num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=True)


								    # dataloader for center-cropped images, use larger batch size to increase speed

								    eval_loader = torch.utils.data.DataLoader(

								        eval_dataset, batch_size=args.batch_size * 5, shuffle=False,

								        sampler=eval_sampler, num_workers=args.workers, pin_memory=True)


								    print("=> Pre-train")

								    # main loop

								    for epoch in range(args.start_epoch, args.epochs):


								        cluster_result = None

								        if epoch >= args.warmup_epoch:

								            # compute momentum features for center-cropped images

								            features = momentum.compute_features(eval_loader, model, args)


								            # placeholder for clustering result

								            cluster_result = {'im2cluster': [], 'centroids': [], 'density': []}

								            for num_cluster in args.num_cluster:

								                cluster_result['im2cluster'].append(torch.zeros(len(eval_dataset), dtype=torch.long).cuda())

								                cluster_result['centroids'].append(torch.zeros(int(num_cluster), args.low_dim).cuda())

								                cluster_result['density'].append(torch.zeros(int(num_cluster)).cuda())


								            if args.gpu == 0:

								                features[

								                    torch.norm(features, dim=1) > 1.5] /= 2  # account for the few samples that are computed twice

								                features = features.numpy()

								                cluster_result = clustering.run_kmeans(features, args)  # run kmeans clustering on master node

								                # save the clustering result

								                # torch.save(cluster_result,os.path.join(args.exp_dir, 'clusters_%d'%epoch))


								            dist.barrier()

								            # broadcast clustering result

								            for _k, data_list in cluster_result.items():

								                for data_tensor in data_list:

								                    dist.broadcast(data_tensor, 0, async_op=False)


								        if args.distributed:

								            train_sampler.set_epoch(epoch)

								        adjust_learning_rate(optimizer, epoch, args)


								        # train for one epoch

								        if epoch >= args.warmup_epoch:

								            train(train_loader, model, criterion, optimizer, epoch, args, cluster_result)

								        else:

								            train(pre_train_loader, model, criterion, optimizer, epoch, args, cluster_result)


								        if (epoch + 1) % 10 == 0 and (not args.multiprocessing_distributed or (args.multiprocessing_distributed

								                                                                               and args.rank % ngpus_per_node == 0)):

								            save_checkpoint({

								                'epoch': epoch + 1,

								                'arch': args.arch,

								                'state_dict': model.state_dict(),

								                'optimizer': optimizer.state_dict(),

								            }, is_best=False, filename='{}/checkpoint_{:04d}.pth.tar'.format(args.exp_dir, epoch))


								def train(train_loader, model, criterion, optimizer, epoch, args, cluster_result=None):

								    batch_time = AverageMeter('Time', ':6.3f')

								    data_time = AverageMeter('Data', ':6.3f')

								    losses = AverageMeter('Loss', ':.4e')

								    acc_inst = AverageMeter('Acc@Inst', ':6.2f')

								    acc_proto = AverageMeter('Acc@Proto', ':6.2f')


								    progress = ProgressMeter(

								        len(train_loader),

								        [batch_time, data_time, losses, acc_inst, acc_proto],

								        prefix="Epoch: [{}]".format(epoch))


								    # switch to train mode

								    model.train()


								    end = time.time()

								    for i, (images, index) in enumerate(train_loader):

								        # measure data loading time

								        data_time.update(time.time() - end)


								        im_q = []

								        im_k = []

								        if cluster_result is None:

								            class_number = len(images)

								            class_len = len(images[0])

								            for _i in range(0, class_len, 2):

								                for c in range(class_number):

								                    im_q.append(images[c][_i])

								                    im_k.append(images[c][_i+1])

								            im_q = torch.stack(im_q)

								            im_k = torch.stack(im_k)

								        else:

								            im_q = images[0]

								            im_k = images[1]


								        if args.gpu is not None:

								            im_q = im_q.cuda(args.gpu, non_blocking=True)

								            im_k = im_k.cuda(args.gpu, non_blocking=True)


								        # compute output

								        output, target, output_proto, target_proto = model(im_q=im_q, im_k=im_k,

								                                                           cluster_result=cluster_result, index=index)


								        # InfoNCE loss

								        loss = criterion(output, target)


								        # ProtoNCE loss

								        if output_proto is not None:

								            loss_proto = 0

								            for proto_out, proto_target in zip(output_proto, target_proto):

								                loss_proto += criterion(proto_out, proto_target)

								                accp = accuracy(proto_out, proto_target)[0]

								                acc_proto.update(accp[0], images[0].size(0))


								            # average loss across all sets of prototypes

								            loss_proto /= len(args.num_cluster)

								            loss += loss_proto


								        losses.update(loss.item(), images[0].size(0))

								        acc = accuracy(output, target)[0]

								        acc_inst.update(acc[0], images[0].size(0))


								        # compute gradient and do SGD step

								        optimizer.zero_grad()

								        loss.backward()

								        optimizer.step()


								        # measure elapsed time

								        batch_time.update(time.time() - end)

								        end = time.time()


								        if i % args.print_freq == 0:

								            progress.display(i)


								def adjust_learning_rate(optimizer, epoch, args):

								    """Decay the learning rate based on schedule"""

								    lr = args.lr

								    if args.cos:  # cosine lr schedule

								        lr *= 0.5 * (1. + math.cos(math.pi * epoch / args.epochs))

								    else:  # stepwise lr schedule

								        for milestone in args.schedule:

								            lr *= 0.1 if epoch >= milestone else 1.

								    for param_group in optimizer.param_groups:

								        param_group['lr'] = lr


								def accuracy(output, target, topk=(1,)):

								    """Computes the accuracy over the k top predictions for the specified values of k"""

								    with torch.no_grad():

								        maxk = max(topk)

								        batch_size = target.size(0)


								        _, pred = output.topk(maxk, 1, True, True)

								        pred = pred.t()

								        correct = pred.eq(target.view(1, -1).expand_as(pred))


								        res = []

								        for k in topk:

								            correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)

								            res.append(correct_k.mul_(100.0 / batch_size))

								        return res


								def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):

								    torch.save(state, filename)

								    print("Model saved as:"+filename)

								    if is_best:

								        shutil.copyfile(filename, 'model_best.pth.tar')


								if __name__ == '__main__':

								    main_loader()