From 5451ff84b1e8fc4f0ce3c4a3b60fa48086e35dfb Mon Sep 17 00:00:00 2001 From: wangguo02 <939117440@qq.com> Date: Mon, 15 May 2023 06:53:08 +0000 Subject: [PATCH 01/18] add mocov2 --- passl/data/preprocess/basic_transforms.py | 19 + passl/engine/loops/loop.py | 4 + passl/models/__init__.py | 2 +- passl/models/mocov2.py | 325 ++++++++++++++++++ passl/scheduler/__init__.py | 2 +- passl/scheduler/lr_scheduler.py | 56 +++ tasks/ssl/mocov2/builder_moco.py | 159 +++++++++ .../configs/mocov2_resnet50_lp_in1k_1n8c.yaml | 114 ++++++ .../configs/mocov2_resnet50_pt_in1k_1n8c.yaml | 97 ++++++ tasks/ssl/mocov2/dataset | 1 + tasks/ssl/mocov2/extract_weight.py | 56 +++ tasks/ssl/mocov2/linearprobe.sh | 26 ++ tasks/ssl/mocov2/pretrain.sh | 26 ++ 13 files changed, 885 insertions(+), 2 deletions(-) create mode 100644 passl/models/mocov2.py create mode 100644 tasks/ssl/mocov2/builder_moco.py create mode 100644 tasks/ssl/mocov2/configs/mocov2_resnet50_lp_in1k_1n8c.yaml create mode 100644 tasks/ssl/mocov2/configs/mocov2_resnet50_pt_in1k_1n8c.yaml create mode 120000 tasks/ssl/mocov2/dataset create mode 100644 tasks/ssl/mocov2/extract_weight.py create mode 100644 tasks/ssl/mocov2/linearprobe.sh create mode 100644 tasks/ssl/mocov2/pretrain.sh diff --git a/passl/data/preprocess/basic_transforms.py b/passl/data/preprocess/basic_transforms.py index 7be2b26a..374b05b3 100644 --- a/passl/data/preprocess/basic_transforms.py +++ b/passl/data/preprocess/basic_transforms.py @@ -57,6 +57,7 @@ "SimCLRGaussianBlur", "BYOLSolarize", "MAERandCropImage", + "GaussianBlur", ] @@ -941,3 +942,21 @@ def __call__(self, img): else: img = ImageOps.solarize(img) return img + +class GaussianBlur(object): + """Gaussian blur augmentation in SimCLR https://arxiv.org/abs/2002.05709""" + + def __init__(self, sigma=[.1, 2.], p=1.0): + self.p = p + self.sigma = sigma + + def __call__(self, img): + if random.random() < self.p: + if not isinstance(img, Image.Image): + img = np.ascontiguousarray(img) + img = Image.fromarray(img) + sigma = random.uniform(self.sigma[0], self.sigma[1]) + img = img.filter(ImageFilter.GaussianBlur(radius=sigma)) + if isinstance(img, Image.Image): + img = np.asarray(img) + return img \ No newline at end of file diff --git a/passl/engine/loops/loop.py b/passl/engine/loops/loop.py index 35bdfa1d..8d398feb 100644 --- a/passl/engine/loops/loop.py +++ b/passl/engine/loops/loop.py @@ -285,6 +285,10 @@ def train_one_epoch(self): paddle.to_tensor(batch[0]['label']) ] + for idx, value in enumerate(batch): + if isinstance(value,paddle.Tensor): + batch[idx] = batch[idx].cuda() + self.global_step += 1 # do forward and backward diff --git a/passl/models/__init__.py b/passl/models/__init__.py index de3b9a8e..c78f3e65 100644 --- a/passl/models/__init__.py +++ b/passl/models/__init__.py @@ -28,7 +28,7 @@ from .convnext import * from .mocov3 import * from .simsiam import * - +from .mocov2 import * __all__ = ["build_model"] diff --git a/passl/models/mocov2.py b/passl/models/mocov2.py new file mode 100644 index 00000000..20ef3adf --- /dev/null +++ b/passl/models/mocov2.py @@ -0,0 +1,325 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections.abc import Callable + +import os +import copy +import numpy as np + +import paddle +import paddle.nn as nn +from passl.nn import init +import paddle.nn.functional as F +from passl.models.base_model import Model +from paddle.nn.initializer import Constant, Normal +from functools import partial, reduce +from passl.models.resnet import ResNet +from paddle.vision.models.resnet import resnet50 +import random +__all__ = [ + 'mocov2_resnet50_linearprobe', + 'mocov2_resnet50_pretrain', +] + +class MoCoV2Projector(nn.Layer): + def __init__(self, with_pool, in_dim, out_dim): + super().__init__() + + self.with_pool = with_pool + if with_pool: + self.avgpool = nn.Sequential( + nn.AdaptiveAvgPool2D((1, 1)), nn.Flatten(start_axis=1)) + + self.mlp = nn.Sequential(nn.Linear(in_dim, out_dim), nn.ReLU()) + + def forward(self, x): + + if self.with_pool: + x = self.avgpool(x) + + x = self.mlp(x) + return x + + +class MoCoClassifier(nn.Layer): + def __init__(self, with_pool, num_features, class_num): + super().__init__() + + self.with_pool = with_pool + if with_pool: + self.avgpool = nn.Sequential( + nn.AdaptiveAvgPool2D((1, 1)), nn.Flatten(start_axis=1)) + + self.fc = nn.Linear(num_features, class_num) + normal_ = Normal(std=0.01) + zeros_ = Constant(value=0.) + + normal_(self.fc.weight) + zeros_(self.fc.bias) + + def save(self,path): + paddle.save(self.fc.state_dict(),path + ".pdparams") + def load(self,path): + self.fc.set_state_dict(paddle.load(path+".pdparams")) + + + def forward(self, x): + + if self.with_pool: + x = self.avgpool(x) + x = self.fc(x) + return x + + +class MoCoV2Pretain(Model): + """ MoCo v1, v2 + + ref: https://github.com/facebookresearch/moco/blob/main/moco/builder.py + ref: https://github.com/PaddlePaddle/PASSL/blob/main/passl/modeling/architectures/moco.py + """ + + def __init__(self, + base_encoder, + base_projector, + base_classifier, + momentum_encoder, + momentum_projector, + momentum_classifier, + dim=128, + K=65536, + m=0.999, + T=0.07, + **kwargs): + super(MoCoV2Pretain, self).__init__() + + self.m = m + self.T = T + self.K = K + + self.base_encoder = nn.Sequential(base_encoder(), base_projector(), + base_classifier()) + self.momentum_encoder = nn.Sequential( + momentum_encoder(), momentum_projector(), momentum_classifier()) + + for param_b, param_m in zip(self.base_encoder.parameters(), + self.momentum_encoder.parameters()): + param_m.copy_(param_b, False) # initialize + param_m.stop_gradient = True # not update by gradient + + # create the queue + self.register_buffer("queue", paddle.randn([dim, K])) + self.queue = F.normalize(self.queue, axis=0) + + self.register_buffer("queue_ptr", paddle.zeros([1], 'int64')) + + self.loss_fuc = nn.CrossEntropyLoss() + + def save(self, path, local_rank=0, rank=0): + paddle.save(self.state_dict(), path + ".pdparams") + + # rename moco pre-trained keys + state_dict = self.state_dict() + for k in list(state_dict.keys()): + # retain only base_encoder up to before the embedding layer + if k.startswith('base_encoder') and not k.startswith( + 'base_encoder.head'): + # remove prefix + state_dict[k[len("base_encoder."):]] = state_dict[k] + # delete renamed or unused k + del state_dict[k] + + paddle.save(state_dict, path + "_base_encoder.pdparams") + + @paddle.no_grad() + def _update_momentum_encoder(self): + """Momentum update of the momentum encoder""" + #Note(GuoxiaWang): disable auto cast when use mix_precision + with paddle.amp.auto_cast(False): + for param_b, param_m in zip(self.base_encoder.parameters(), + self.momentum_encoder.parameters()): + paddle.assign((param_m * self.m + param_b * (1. - self.m)), + param_m) + param_m.stop_gradient = True + + # utils + @paddle.no_grad() + def concat_all_gather(self, tensor): + """ + Performs all_gather operation on the provided tensors. + """ + if paddle.distributed.get_world_size() < 2: + return tensor + tensors_gather = [] + paddle.distributed.all_gather(tensors_gather, tensor) + + output = paddle.concat(tensors_gather, axis=0) + return output + + @paddle.no_grad() + def _dequeue_and_enqueue(self, keys): + keys = self.concat_all_gather(keys) + + batch_size = keys.shape[0] + + ptr = int(self.queue_ptr[0]) + assert self.K % batch_size == 0 # for simplicity + + # replace the keys at ptr (dequeue and enqueue) + self.queue[:, ptr:ptr + batch_size] = keys.transpose([1, 0]) + ptr = (ptr + batch_size) % self.K # move pointer + + self.queue_ptr[0] = ptr + + @paddle.no_grad() + def _batch_shuffle_ddp(self, x): + """ + Batch shuffle, for making use of BatchNorm. + *** Only support DistributedDataParallel (DDP) model. *** + """ + # gather from all gpus + batch_size_this = x.shape[0] + x_gather = self.concat_all_gather(x) + batch_size_all = x_gather.shape[0] + + num_gpus = batch_size_all // batch_size_this + + # random shuffle index + idx_shuffle = paddle.randperm(batch_size_all) + + # broadcast to all gpus + if paddle.distributed.get_world_size() > 1: + paddle.distributed.broadcast(idx_shuffle, src=0) + + # index for restoring + idx_unshuffle = paddle.argsort(idx_shuffle) + + # shuffled index for this gpu + gpu_idx = paddle.distributed.get_rank() + idx_this = idx_shuffle.reshape([num_gpus, -1])[gpu_idx] + return paddle.gather(x_gather, idx_this, axis=0), idx_unshuffle + + @paddle.no_grad() + def _batch_unshuffle_ddp(self, x, idx_unshuffle): + """ + Undo batch shuffle. + *** Only support DistributedDataParallel (DDP) model. *** + """ + # gather from all gpus + batch_size_this = x.shape[0] + x_gather = self.concat_all_gather(x) + batch_size_all = x_gather.shape[0] + + num_gpus = batch_size_all // batch_size_this + + # restored index for this gpu + gpu_idx = paddle.distributed.get_rank() + idx_this = idx_unshuffle.reshape([num_gpus, -1])[gpu_idx] + + return paddle.gather(x_gather, idx_this, axis=0) + + def forward(self, inputs): + assert isinstance(inputs, list) + x1 = inputs[0] + x2 = inputs[1] + # compute query features + q = self.base_encoder(x1) # queries: NxC + q = F.normalize(q, axis=1) + + # compute key features + with paddle.no_grad(): # no gradient + self._update_momentum_encoder() # update the momentum encoder + + # shuffle for making use of BN + k, idx_unshuffle = self._batch_shuffle_ddp(x2) + + k = self.momentum_encoder(k) # keys: NxC + k = F.normalize(k, axis=1) + + # undo shuffle + k = self._batch_unshuffle_ddp(k, idx_unshuffle) + + # compute logits + # Einstein sum is more intuitive + # positive logits: Nx1 + l_pos = paddle.sum(q * k, axis=1).unsqueeze(-1) + # negative logits: NxK + l_neg = paddle.matmul(q, self.queue.clone().detach()) + + # logits: Nx(1+K) + logits = paddle.concat((l_pos, l_neg), axis=1) + + # apply temperature + logits /= self.T + + # labels: positive key indicators + labels = paddle.zeros([logits.shape[0]], dtype=paddle.int64) + + # dequeue and enqueue + self._dequeue_and_enqueue(k) + + return self.loss_fuc(logits, labels) + +class MoCoV2LinearProbe(ResNet): + """ MoCo v1, v2 + + ref: https://github.com/facebookresearch/moco/blob/main/moco/builder.py + ref: https://github.com/PaddlePaddle/PASSL/blob/main/passl/modeling/architectures/moco.py + """ + + def __init__(self, + **kwargs): + super().__init__() + # freeze all layers but the last fc + for name, param in self.named_parameters(): + if name not in ['fc.weight', 'fc.bias']: + param.stop_gradient = True + + # optimize only the linear classifier + parameters = list( + filter(lambda p: not p.stop_gradient, self.parameters())) + assert len(parameters) == 2 # weight, bias + + init.normal_(self.fc.weight, mean=0.0, std=0.01) + init.zeros_(self.fc.bias) + self.apply(self._freeze_norm) + + def _freeze_norm(self, layer): + if isinstance(layer, (nn.layer.norm._BatchNormBase)): + layer._use_global_stats = True + + +def mocov2_resnet50_linearprobe(**kwargs): + # **kwargs specify numclass + resnet = MoCoV2LinearProbe(with_pool=True,**kwargs) + resnet.fc.load_dict(paddle.load("/wangguo/PASSL/pretrained/moco/class_fc.pdparams")) + return resnet +def mocov2_resnet50_pretrain(**kwargs): + # prepare all layer here + base_encoder = partial(resnet50, with_pool=False,num_classes=0) + base_projector = partial(MoCoV2Projector, with_pool=True, in_dim=2048,out_dim=2048) + base_classifier = partial(MoCoClassifier, with_pool=False, num_features=2048, class_num=128) + momentum_encoder = partial(resnet50, with_pool=False, num_classes=0) + momentum_projector = partial(MoCoV2Projector,with_pool=True,in_dim=2048,out_dim=2048) + momentum_classifier = partial(MoCoClassifier,with_pool=False,num_features=2048,class_num=128) + model = MoCoV2Pretain( + base_encoder=base_encoder, + base_projector=base_projector, + base_classifier=base_classifier, + momentum_encoder=momentum_encoder, + momentum_projector=momentum_projector, + momentum_classifier=momentum_classifier, + T=0.2, + **kwargs) + return model diff --git a/passl/scheduler/__init__.py b/passl/scheduler/__init__.py index 6bec1e45..8a194fd9 100644 --- a/passl/scheduler/__init__.py +++ b/passl/scheduler/__init__.py @@ -15,7 +15,7 @@ from passl.utils import logger -from .lr_scheduler import TimmCosine, ViTLRScheduler, Step, Poly +from .lr_scheduler import TimmCosine, ViTLRScheduler, Step, Poly, MultiStepDecay, CosineDecay from .lr_callable import LRCallable diff --git a/passl/scheduler/lr_scheduler.py b/passl/scheduler/lr_scheduler.py index 223ca349..eb9467d4 100644 --- a/passl/scheduler/lr_scheduler.py +++ b/passl/scheduler/lr_scheduler.py @@ -200,3 +200,59 @@ def get_lr(self): return self.base_lr * pow(1 - float(self.last_epoch - self.warmups) / float(self.T_max - self.warmups), 2) + +class MultiStepDecay(lr.LRScheduler): + def __init__(self, + learning_rate, + step_each_epoch, + epochs, + milestones, + gamma=0.1, + last_epoch=-1, + verbose=False, + decay_unit='epoch', + **kwargs): + self.milestones = milestones + assert decay_unit in ['step', 'epoch'] + if decay_unit=='step': + milestones = [mile*step_each_epoch for mile in milestones] + self.gamma = gamma + super().__init__(learning_rate, last_epoch, verbose) + def get_lr(self): + for i in range(len(self.milestones)): + if self.last_epoch < self.milestones[i]: + return self.base_lr * (self.gamma**i) + return self.base_lr * (self.gamma ** len(self.milestones)) + +class CosineDecay(lr.LRScheduler): + def __init__(self, + learning_rate, + step_each_epoch, + epochs, + decay_unit='epoch', + warmups=0, + verbose=False, + last_epoch=-1, + **kwargs): + + assert decay_unit in ['step', 'epoch'] + self.T_max = epochs if decay_unit == 'epoch' else step_each_epoch * epochs + self.warmups = warmups if decay_unit == 'epoch' else step_each_epoch * warmups + + assert self.warmups < self.T_max + + self.last_epoch = last_epoch + super(CosineDecay, self).__init__(learning_rate, last_epoch, verbose) + + def get_lr(self): + + progress = ( + self.last_epoch - self.warmups) / float(self.T_max - self.warmups) + progress = min(1.0, max(0.0, progress)) + + if self.warmups: + lr = lr * min(1.0, self.last_epoch / self.warmups) + else: + lr = 0.5 * self.base_lr * (1.0 + math.cos(math.pi * progress)) + + return lr \ No newline at end of file diff --git a/tasks/ssl/mocov2/builder_moco.py b/tasks/ssl/mocov2/builder_moco.py new file mode 100644 index 00000000..e0e52326 --- /dev/null +++ b/tasks/ssl/mocov2/builder_moco.py @@ -0,0 +1,159 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn + + +class MoCo(nn.Layer): + """ + Build a MoCo model with a base encoder, a momentum encoder, and two MLPs + https://arxiv.org/abs/1911.05722 + """ + + def __init__(self, base_encoder, dim=256, mlp_dim=4096, T=1.0): + """ + dim: feature dimension (default: 256) + mlp_dim: hidden dimension in MLPs (default: 4096) + T: softmax temperature (default: 1.0) + """ + super(MoCo, self).__init__() + + self.T = T + + # build encoders + self.base_encoder = base_encoder(num_classes=mlp_dim) + self.momentum_encoder = base_encoder(num_classes=mlp_dim) + + self._build_projector_and_predictor_mlps(dim, mlp_dim) + + for param_b, param_m in zip(self.base_encoder.parameters(), + self.momentum_encoder.parameters()): + param_m.copy_(param_b, False) # initialize + param_m.stop_gradient = True # not update by gradient + + def _build_mlp(self, + num_layers, + input_dim, + mlp_dim, + output_dim, + last_bn=True): + mlp = [] + for l in range(num_layers): + dim1 = input_dim if l == 0 else mlp_dim + dim2 = output_dim if l == num_layers - 1 else mlp_dim + + mlp.append(nn.Linear(dim1, dim2, bias_attr=False)) + + if l < num_layers - 1: + mlp.append(nn.BatchNorm1D(dim2)) + mlp.append(nn.ReLU()) + elif last_bn: + # follow SimCLR's design: https://github.com/google-research/simclr/blob/master/model_util.py#L157 + # for simplicity, we further removed gamma in BN + mlp.append( + nn.BatchNorm1D( + dim2, weight_attr=False, bias_attr=False)) + + return nn.Sequential(*mlp) + + def _build_projector_and_predictor_mlps(self, dim, mlp_dim): + pass + + @paddle.no_grad() + def _update_momentum_encoder(self, m): + """Momentum update of the momentum encoder""" + with paddle.amp.auto_cast(False): + for param_b, param_m in zip(self.base_encoder.parameters(), + self.momentum_encoder.parameters()): + paddle.assign((param_m * m + param_b * (1. - m)), param_m) + + def contrastive_loss(self, q, k): + # normalize + q = nn.functional.normalize(q, axis=1) + k = nn.functional.normalize(k, axis=1) + # gather all targets + k = concat_all_gather(k) + # Einstein sum is more intuitive + logits = paddle.einsum('nc,mc->nm', q, k) / self.T + N = logits.shape[0] # batch size per GPU + labels = (paddle.arange( + N, dtype=paddle.int64) + N * paddle.distributed.get_rank()) + return nn.CrossEntropyLoss()(logits, labels) * (2 * self.T) + + def forward(self, x1, x2, m): + """ + Input: + x1: first views of images + x2: second views of images + m: moco momentum + Output: + loss + """ + + # compute features + q1 = self.predictor(self.base_encoder(x1)) + q2 = self.predictor(self.base_encoder(x2)) + + with paddle.no_grad(): # no gradient + self._update_momentum_encoder(m) # update the momentum encoder + + # compute momentum features as targets + k1 = self.momentum_encoder(x1) + k2 = self.momentum_encoder(x2) + + return self.contrastive_loss(q1, k2) + self.contrastive_loss(q2, k1) + + +class MoCo_ResNet(MoCo): + def _build_projector_and_predictor_mlps(self, dim, mlp_dim): + hidden_dim = self.base_encoder.fc.weight.shape[0] + del self.base_encoder.fc, self.momentum_encoder.fc # remove original fc layer + + # projectors + self.base_encoder.fc = self._build_mlp(2, hidden_dim, mlp_dim, dim) + self.momentum_encoder.fc = self._build_mlp(2, hidden_dim, mlp_dim, dim) + + # predictor + self.predictor = self._build_mlp(2, dim, mlp_dim, dim, False) + + +class MoCo_ViT(MoCo): + def _build_projector_and_predictor_mlps(self, dim, mlp_dim): + hidden_dim = self.base_encoder.head.weight.shape[0] + del self.base_encoder.head, self.momentum_encoder.head # remove original fc layer + + # projectors + self.base_encoder.head = self._build_mlp(3, hidden_dim, mlp_dim, dim) + self.momentum_encoder.head = self._build_mlp(3, hidden_dim, mlp_dim, + dim) + + # predictor + self.predictor = self._build_mlp(2, dim, mlp_dim, dim) + + +# utils +@paddle.no_grad() +def concat_all_gather(tensor): + """ + Performs all_gather operation on the provided tensors. + """ + if paddle.distributed.get_world_size() < 2: + return tensor + + tensors_gather = [] + paddle.distributed.all_gather(tensors_gather, tensor) + + output = paddle.concat(tensors_gather, axis=0) + return output diff --git a/tasks/ssl/mocov2/configs/mocov2_resnet50_lp_in1k_1n8c.yaml b/tasks/ssl/mocov2/configs/mocov2_resnet50_lp_in1k_1n8c.yaml new file mode 100644 index 00000000..b0fc5583 --- /dev/null +++ b/tasks/ssl/mocov2/configs/mocov2_resnet50_lp_in1k_1n8c.yaml @@ -0,0 +1,114 @@ +# global configs +Global: + task_type: Classification + train_loop: ClassificationTrainingEpochLoop + validate_loop: ClassificationEvaluationLoop + checkpoint: null + pretrained_model: /wangguo/PASSL/pretrained/moco/transformed_mocov2_pt_imagenet2012_resnet50 + output_dir: ./output/ + device: gpu + save_interval: 1 + max_num_latest_checkpoint: 0 + eval_during_train: True + eval_interval: 1 + eval_unit: "epoch" + accum_steps: 1 + epochs: 100 + print_batch_step: 10 + use_visualdl: False + seed: 2022 + +# FP16 setting +FP16: + level: O0 + +DistributedStrategy: + data_parallel: True + +# model architecture +Model: + name: mocov2_resnet50_linearprobe + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + +LRScheduler: + name: MultiStepDecay + decay_unit: epoch + learning_rate: 30.0 + gamma: 0.1 + milestones: [60, 80] + +Optimizer: + name: Momentum + momentum: 0.9 + weight_decay: 0.0 + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageFolder + root: ./dataset/train + transform: + - RandomResizedCrop: + size: 224 + - RandFlipImage: + flip_code: 1 + - ToTensor: + - Normalize: + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] + sampler: + name: DistributedBatchSampler + batch_size: 32 + drop_last: True + shuffle: True + loader: + num_workers: 1 + use_shared_memory: False + + Eval: + dataset: + name: ImageFolder + root: ./dataset/val + transform: + - ResizeImage: + resize_short: 256 + interpolation: bilinear + backend: pil + - CenterCropImage: + size: 224 + - ToTensor: + - Normalize: + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + + loader: + num_workers: 1 + use_shared_memory: False + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] + +Export: + export_type: paddle + input_shape: [None, 3, 224, 224] diff --git a/tasks/ssl/mocov2/configs/mocov2_resnet50_pt_in1k_1n8c.yaml b/tasks/ssl/mocov2/configs/mocov2_resnet50_pt_in1k_1n8c.yaml new file mode 100644 index 00000000..f97ce0c9 --- /dev/null +++ b/tasks/ssl/mocov2/configs/mocov2_resnet50_pt_in1k_1n8c.yaml @@ -0,0 +1,97 @@ +# global configs +Global: + task_type: ContrastiveLearning + train_loop: ContrastiveLearningTrainingEpochLoop + validate_loop: None + checkpoint: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + max_num_latest_checkpoint: 0 + eval_during_train: False + eval_interval: 1 + eval_unit: "epoch" + accum_steps: 1 + epochs: 200 + print_batch_step: 10 + use_visualdl: False + seed: 2023 + +DistributedStrategy: + data_parallel: True + +# model architecture +Model: + name: mocov2_resnet50_pretrain + +LRScheduler: + name: CosineDecay + decay_unit: epoch + learning_rate: 0.03 + +Optimizer: + name: Momentum + momentum: 0.9 + weight_decay: 0.0001 + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageFolder + root: ./dataset/train + transform: + - TwoViewsTransform: + base_transform1: + - RandomResizedCrop: + size: 224 + scale: [0.2, 1.0] + interpolation: bicubic + - ColorJitter: + brightness: 0.4 + contrast: 0.4 + saturation: 0.4 + hue: 0.1 + p: 0.8 + - RandomGrayscale: + p: 0.2 + - GaussianBlur: + sigma: [.1, 2.] + p: 0.5 + - RandFlipImage: + flip_code: 1 + - ToTensor: + - Normalize: + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] + base_transform2: + - RandomResizedCrop: + size: 224 + scale: [0.2, 1.0] + interpolation: bicubic + - ColorJitter: + brightness: 0.4 + contrast: 0.4 + saturation: 0.4 + hue: 0.1 + p: 0.8 + - RandomGrayscale: + p: 0.2 + - GaussianBlur: + sigma: [.1, 2.] + p: 0.5 + - RandFlipImage: + flip_code: 1 + - ToTensor: + - Normalize: + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] + sampler: + name: DistributedBatchSampler + batch_size: 32 + drop_last: True + shuffle: True + loader: + num_workers: 8 + use_shared_memory: False diff --git a/tasks/ssl/mocov2/dataset b/tasks/ssl/mocov2/dataset new file mode 120000 index 00000000..93a401d4 --- /dev/null +++ b/tasks/ssl/mocov2/dataset @@ -0,0 +1 @@ +/wangguo/imagenet/ \ No newline at end of file diff --git a/tasks/ssl/mocov2/extract_weight.py b/tasks/ssl/mocov2/extract_weight.py new file mode 100644 index 00000000..5e7e1532 --- /dev/null +++ b/tasks/ssl/mocov2/extract_weight.py @@ -0,0 +1,56 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os +import paddle + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description='Convert MoCo Pre-Traind Model to DEiT') + parser.add_argument( + '--input', + default='', + type=str, + metavar='PATH', + required=True, + help='path to moco pre-trained checkpoint') + parser.add_argument( + '--output', + default='', + type=str, + metavar='PATH', + required=True, + help='path to output checkpoint in DEiT format') + args = parser.parse_args() + print(args) + + # load input + checkpoint = paddle.load(args.input) + state_dict = checkpoint['state_dict'] + for k in list(state_dict.keys()): + # retain only base_encoder up to before the embedding layer + if k.startswith('base_encoder') and not k.startswith( + 'base_encoder.head'): + # remove prefix + state_dict[k[len("base_encoder."):]] = state_dict[k] + # delete renamed or unused k + del state_dict[k] + + # make output directory if necessary + output_dir = os.path.dirname(args.output) + if not os.path.isdir(output_dir): + os.makedirs(output_dir) + # save to output + paddle.save(state_dict, args.output) diff --git a/tasks/ssl/mocov2/linearprobe.sh b/tasks/ssl/mocov2/linearprobe.sh new file mode 100644 index 00000000..f0cb339e --- /dev/null +++ b/tasks/ssl/mocov2/linearprobe.sh @@ -0,0 +1,26 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# unset PADDLE_TRAINER_ENDPOINTS +# export PADDLE_NNODES=1 +# export PADDLE_MASTER="xxx.xxx.xxx.xxx:12538" +# export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 +export FLAGS_stop_check_timeout=3600 + +python -m paddle.distributed.launch \ + --nnodes=$PADDLE_NNODES \ + --master=$PADDLE_MASTER \ + --devices=$CUDA_VISIBLE_DEVICES \ + passl-train \ + -c ./configs/mocov2_resnet50_lp_in1k_1n8c.yaml diff --git a/tasks/ssl/mocov2/pretrain.sh b/tasks/ssl/mocov2/pretrain.sh new file mode 100644 index 00000000..aeac93e3 --- /dev/null +++ b/tasks/ssl/mocov2/pretrain.sh @@ -0,0 +1,26 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# unset PADDLE_TRAINER_ENDPOINTS +# export PADDLE_NNODES=1 +# #export PADDLE_MASTER="xxx.xxx.xxx.xxx:12538" +# export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 +export FLAGS_stop_check_timeout=3600 + +python -m paddle.distributed.launch \ + --nnodes=$PADDLE_NNODES \ + --master=$PADDLE_MASTER \ + --devices=$CUDA_VISIBLE_DEVICES \ + passl-train \ + -c ./configs/mocov2_resnet50_pt_in1k_1n8c.yaml \ No newline at end of file From 930ccabd594898de5dc26c5e7a66509bcc062591 Mon Sep 17 00:00:00 2001 From: wangguo02 <939117440@qq.com> Date: Mon, 15 May 2023 07:36:02 +0000 Subject: [PATCH 02/18] wrap weight --- passl/models/mocov2.py | 32 +++- passl/models/resnet.py | 9 +- tasks/ssl/mocov2/builder_moco.py | 159 ------------------ .../configs/mocov2_resnet50_lp_in1k_1n8c.yaml | 2 +- tasks/ssl/mocov2/extract_weight.py | 56 ------ 5 files changed, 38 insertions(+), 220 deletions(-) delete mode 100644 tasks/ssl/mocov2/builder_moco.py delete mode 100644 tasks/ssl/mocov2/extract_weight.py diff --git a/passl/models/mocov2.py b/passl/models/mocov2.py index 20ef3adf..af24d096 100644 --- a/passl/models/mocov2.py +++ b/passl/models/mocov2.py @@ -125,7 +125,7 @@ def __init__(self, self.register_buffer("queue_ptr", paddle.zeros([1], 'int64')) self.loss_fuc = nn.CrossEntropyLoss() - + def save(self, path, local_rank=0, rank=0): paddle.save(self.state_dict(), path + ".pdparams") @@ -299,11 +299,33 @@ def _freeze_norm(self, layer): if isinstance(layer, (nn.layer.norm._BatchNormBase)): layer._use_global_stats = True + def load_pretrained(self, path, rank=0, finetune=False): + if not os.path.exists(path + '.pdparams'): + raise ValueError("Model pretrain path {} does not " + "exists.".format(path)) + + path = path + ".pdparams" + base_encoder_dict = paddle.load(path) + for k in list(base_encoder_dict.keys()): + # retain only encoder_q up to before the embedding layer + if k.startswith('0.'): + # remove prefix + base_encoder_dict[k[len( + "0."):]] = base_encoder_dict[k] + # delete renamed + del base_encoder_dict[k] + + for name, param in self.state_dict().items(): + if name in base_encoder_dict and param.dtype != base_encoder_dict[ + name].dtype: + base_encoder_dict[name] = base_encoder_dict[name].cast( + param.dtype) + + self.set_state_dict(base_encoder_dict) def mocov2_resnet50_linearprobe(**kwargs): # **kwargs specify numclass resnet = MoCoV2LinearProbe(with_pool=True,**kwargs) - resnet.fc.load_dict(paddle.load("/wangguo/PASSL/pretrained/moco/class_fc.pdparams")) return resnet def mocov2_resnet50_pretrain(**kwargs): # prepare all layer here @@ -323,3 +345,9 @@ def mocov2_resnet50_pretrain(**kwargs): T=0.2, **kwargs) return model + +if __name__ == "__main__": + model = mocov2_resnet50_pretrain() + model.save("./mocov2") + model_lineprobe = mocov2_resnet50_linearprobe() + model_lineprobe.load_pretrained("./mocov2_base_encoder") diff --git a/passl/models/resnet.py b/passl/models/resnet.py index f15f3443..211c2878 100644 --- a/passl/models/resnet.py +++ b/passl/models/resnet.py @@ -52,14 +52,19 @@ class ResNet(PDResNet, Model): def __init__( self, - block, + block=None, depth=50, width=64, class_num=1000, with_pool=True, groups=1, zero_init_residual=True, - ): + ): + if block == None: + if depth <= 34: + block=BasicBlock + else: + block=BottleneckBlock super().__init__(block, depth=depth, width=width, num_classes=class_num, with_pool=with_pool, groups=groups) # Zero-initialize the last BN in each residual branch, diff --git a/tasks/ssl/mocov2/builder_moco.py b/tasks/ssl/mocov2/builder_moco.py deleted file mode 100644 index e0e52326..00000000 --- a/tasks/ssl/mocov2/builder_moco.py +++ /dev/null @@ -1,159 +0,0 @@ -# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle -import paddle.nn as nn - - -class MoCo(nn.Layer): - """ - Build a MoCo model with a base encoder, a momentum encoder, and two MLPs - https://arxiv.org/abs/1911.05722 - """ - - def __init__(self, base_encoder, dim=256, mlp_dim=4096, T=1.0): - """ - dim: feature dimension (default: 256) - mlp_dim: hidden dimension in MLPs (default: 4096) - T: softmax temperature (default: 1.0) - """ - super(MoCo, self).__init__() - - self.T = T - - # build encoders - self.base_encoder = base_encoder(num_classes=mlp_dim) - self.momentum_encoder = base_encoder(num_classes=mlp_dim) - - self._build_projector_and_predictor_mlps(dim, mlp_dim) - - for param_b, param_m in zip(self.base_encoder.parameters(), - self.momentum_encoder.parameters()): - param_m.copy_(param_b, False) # initialize - param_m.stop_gradient = True # not update by gradient - - def _build_mlp(self, - num_layers, - input_dim, - mlp_dim, - output_dim, - last_bn=True): - mlp = [] - for l in range(num_layers): - dim1 = input_dim if l == 0 else mlp_dim - dim2 = output_dim if l == num_layers - 1 else mlp_dim - - mlp.append(nn.Linear(dim1, dim2, bias_attr=False)) - - if l < num_layers - 1: - mlp.append(nn.BatchNorm1D(dim2)) - mlp.append(nn.ReLU()) - elif last_bn: - # follow SimCLR's design: https://github.com/google-research/simclr/blob/master/model_util.py#L157 - # for simplicity, we further removed gamma in BN - mlp.append( - nn.BatchNorm1D( - dim2, weight_attr=False, bias_attr=False)) - - return nn.Sequential(*mlp) - - def _build_projector_and_predictor_mlps(self, dim, mlp_dim): - pass - - @paddle.no_grad() - def _update_momentum_encoder(self, m): - """Momentum update of the momentum encoder""" - with paddle.amp.auto_cast(False): - for param_b, param_m in zip(self.base_encoder.parameters(), - self.momentum_encoder.parameters()): - paddle.assign((param_m * m + param_b * (1. - m)), param_m) - - def contrastive_loss(self, q, k): - # normalize - q = nn.functional.normalize(q, axis=1) - k = nn.functional.normalize(k, axis=1) - # gather all targets - k = concat_all_gather(k) - # Einstein sum is more intuitive - logits = paddle.einsum('nc,mc->nm', q, k) / self.T - N = logits.shape[0] # batch size per GPU - labels = (paddle.arange( - N, dtype=paddle.int64) + N * paddle.distributed.get_rank()) - return nn.CrossEntropyLoss()(logits, labels) * (2 * self.T) - - def forward(self, x1, x2, m): - """ - Input: - x1: first views of images - x2: second views of images - m: moco momentum - Output: - loss - """ - - # compute features - q1 = self.predictor(self.base_encoder(x1)) - q2 = self.predictor(self.base_encoder(x2)) - - with paddle.no_grad(): # no gradient - self._update_momentum_encoder(m) # update the momentum encoder - - # compute momentum features as targets - k1 = self.momentum_encoder(x1) - k2 = self.momentum_encoder(x2) - - return self.contrastive_loss(q1, k2) + self.contrastive_loss(q2, k1) - - -class MoCo_ResNet(MoCo): - def _build_projector_and_predictor_mlps(self, dim, mlp_dim): - hidden_dim = self.base_encoder.fc.weight.shape[0] - del self.base_encoder.fc, self.momentum_encoder.fc # remove original fc layer - - # projectors - self.base_encoder.fc = self._build_mlp(2, hidden_dim, mlp_dim, dim) - self.momentum_encoder.fc = self._build_mlp(2, hidden_dim, mlp_dim, dim) - - # predictor - self.predictor = self._build_mlp(2, dim, mlp_dim, dim, False) - - -class MoCo_ViT(MoCo): - def _build_projector_and_predictor_mlps(self, dim, mlp_dim): - hidden_dim = self.base_encoder.head.weight.shape[0] - del self.base_encoder.head, self.momentum_encoder.head # remove original fc layer - - # projectors - self.base_encoder.head = self._build_mlp(3, hidden_dim, mlp_dim, dim) - self.momentum_encoder.head = self._build_mlp(3, hidden_dim, mlp_dim, - dim) - - # predictor - self.predictor = self._build_mlp(2, dim, mlp_dim, dim) - - -# utils -@paddle.no_grad() -def concat_all_gather(tensor): - """ - Performs all_gather operation on the provided tensors. - """ - if paddle.distributed.get_world_size() < 2: - return tensor - - tensors_gather = [] - paddle.distributed.all_gather(tensors_gather, tensor) - - output = paddle.concat(tensors_gather, axis=0) - return output diff --git a/tasks/ssl/mocov2/configs/mocov2_resnet50_lp_in1k_1n8c.yaml b/tasks/ssl/mocov2/configs/mocov2_resnet50_lp_in1k_1n8c.yaml index b0fc5583..33f759f3 100644 --- a/tasks/ssl/mocov2/configs/mocov2_resnet50_lp_in1k_1n8c.yaml +++ b/tasks/ssl/mocov2/configs/mocov2_resnet50_lp_in1k_1n8c.yaml @@ -4,7 +4,7 @@ Global: train_loop: ClassificationTrainingEpochLoop validate_loop: ClassificationEvaluationLoop checkpoint: null - pretrained_model: /wangguo/PASSL/pretrained/moco/transformed_mocov2_pt_imagenet2012_resnet50 + pretrained_model: ./output/mocov2_resnet50_pretrain/epoch_96_base_encoder output_dir: ./output/ device: gpu save_interval: 1 diff --git a/tasks/ssl/mocov2/extract_weight.py b/tasks/ssl/mocov2/extract_weight.py deleted file mode 100644 index 5e7e1532..00000000 --- a/tasks/ssl/mocov2/extract_weight.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import os -import paddle - -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description='Convert MoCo Pre-Traind Model to DEiT') - parser.add_argument( - '--input', - default='', - type=str, - metavar='PATH', - required=True, - help='path to moco pre-trained checkpoint') - parser.add_argument( - '--output', - default='', - type=str, - metavar='PATH', - required=True, - help='path to output checkpoint in DEiT format') - args = parser.parse_args() - print(args) - - # load input - checkpoint = paddle.load(args.input) - state_dict = checkpoint['state_dict'] - for k in list(state_dict.keys()): - # retain only base_encoder up to before the embedding layer - if k.startswith('base_encoder') and not k.startswith( - 'base_encoder.head'): - # remove prefix - state_dict[k[len("base_encoder."):]] = state_dict[k] - # delete renamed or unused k - del state_dict[k] - - # make output directory if necessary - output_dir = os.path.dirname(args.output) - if not os.path.isdir(output_dir): - os.makedirs(output_dir) - # save to output - paddle.save(state_dict, args.output) From e1416ceaa9d840e227fe099a1d4d5a1a1b48ba05 Mon Sep 17 00:00:00 2001 From: wangguo02 <939117440@qq.com> Date: Mon, 15 May 2023 08:37:47 +0000 Subject: [PATCH 03/18] modify contrastive learning --- passl/engine/loops/contrastive_learning_loop.py | 3 +++ passl/engine/loops/loop.py | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/passl/engine/loops/contrastive_learning_loop.py b/passl/engine/loops/contrastive_learning_loop.py index a772a28d..1406aa30 100644 --- a/passl/engine/loops/contrastive_learning_loop.py +++ b/passl/engine/loops/contrastive_learning_loop.py @@ -74,6 +74,9 @@ def train_one_step(self, batch): # remove label batch = batch[0] + for idx, value in enumerate(batch): + if isinstance(value,paddle.Tensor): + batch[idx] = batch[idx].cuda() # do forward and backward loss_dict = self.forward_backward(batch) diff --git a/passl/engine/loops/loop.py b/passl/engine/loops/loop.py index 8d398feb..9d8c085f 100644 --- a/passl/engine/loops/loop.py +++ b/passl/engine/loops/loop.py @@ -285,9 +285,6 @@ def train_one_epoch(self): paddle.to_tensor(batch[0]['label']) ] - for idx, value in enumerate(batch): - if isinstance(value,paddle.Tensor): - batch[idx] = batch[idx].cuda() self.global_step += 1 From f5bdf6e277a3214b4283d55ffe70e299bfc03ad8 Mon Sep 17 00:00:00 2001 From: wangguo02 <939117440@qq.com> Date: Tue, 16 May 2023 02:24:20 +0000 Subject: [PATCH 04/18] add README.md with mocov2 --- tasks/ssl/mocov2/README.md | 102 ++++++++++++++++++ .../configs/mocov2_resnet50_lp_in1k_1n8c.yaml | 11 +- .../configs/mocov2_resnet50_pt_in1k_1n8c.yaml | 2 +- 3 files changed, 109 insertions(+), 6 deletions(-) create mode 100644 tasks/ssl/mocov2/README.md diff --git a/tasks/ssl/mocov2/README.md b/tasks/ssl/mocov2/README.md new file mode 100644 index 00000000..45614170 --- /dev/null +++ b/tasks/ssl/mocov2/README.md @@ -0,0 +1,102 @@ +# MoCo +![MoCo](https://user-images.githubusercontent.com/11435359/71603927-0ca98d00-2b14-11ea-9fd8-10d984a2de45.png) + +This is a PaddlePaddle implementation of the +[MoCov2](https://arxiv.org/abs/2003.04297). + + +## Install Preparation + +MoCo requires `PaddlePaddle >= 2.4`. +```shell +# git clone https://github.com/PaddlePaddle/PASSL.git +# cd /path/to/PASSL +``` + +All commands are executed in the `PASSL` root directory. + +```shell +# python setup.py install +``` + +## Data Preparation + +The imagenet 1k dataset needs to be prepared first and will be organized into the following directory structure. + +```shell +ILSVRC2012 +├── train/ +├── xxx +├── val/ +└── xxx +``` + +Then configure the path. + +```shell +mkdir -p dataset +ln -s /path/to/ILSVRC2012 dataset/ILSVRC2012 +``` + +## Unsupervised Training + +To do unsupervised pre-training of a ResNet-50 model on ImageNet in an 8-gpu machine, you can run the script: + +### MoCo V2 (Single Node with 8 GPUs) +```shell +# sh pretrain.sh +``` + +The differences between MoCo v2 and MoCo v1 are as follows: +* MoCo v2 has a projector +* Data augmentation +* Softmax temperature +* Learning rate scheduler + +## Linear Classification + +When the unsupervised pre-training is complete, or directly download the provided pre-training checkpoint, you can use the following script to train a supervised linear classifier. + +#### Linear Classification Training (Single Node with 8 GPUs) + +```shell +# sh linearprobe.sh +``` + +### MoCo v2 + +#### [Optional] Download checkpoint & Modify yaml configure +```shell +mkdir -p pretrained/moco/ +wget -O ./pretrained/moco/mocov2_pt_imagenet2012_resnet50.pdparams https://paddlefleetx.bj.bcebos.com/model/vision/moco/mocov2_pt_imagenet2012_resnet50.pdparams +``` + +#### Linear Classification Training (Single Node with 8 GPUs) + +```shell +python -m paddle.distributed.launch \ + --nnodes=$PADDLE_NNODES \ + --master=$PADDLE_MASTER \ + --devices=$CUDA_VISIBLE_DEVICES \ + passl-train \ + -c ./configs/mocov2_resnet50_lp_in1k_1n8c.yaml +``` + +## Models + +| Model | Phase | Epochs | Top1 Acc | Checkpoint | Log | +| ------- | --------------------- | ------ | -------- | ------------------------------------------------------------ | ------------------------------------------------------------ | +| MoCo v2 | Unsupervised Training | 200 | - | [download](https://paddlefleetx.bj.bcebos.com/model/vision/moco/mocov2_pt_imagenet2012_resnet50.pdparams) | [log](https://paddlefleetx.bj.bcebos.com/model/vision/moco/mocov2_pt_imagenet2012_resnet50.log) | +| MoCo v2 | Linear Classification | 100 | 0.676595 | [download](https://paddlefleetx.bj.bcebos.com/model/vision/moco/mocov2_lincls_imagenet2012_resnet50.pdparams) | [log](https://paddlefleetx.bj.bcebos.com/model/vision/moco/mocov2_lincls_imagenet2012_resnet50.log) | + + +## Citations + +``` +@Article{chen2020mocov2, + author = {Xinlei Chen and Haoqi Fan and Ross Girshick and Kaiming He}, + title = {Improved Baselines with Momentum Contrastive Learning}, + journal = {arXiv preprint arXiv:2003.04297}, + year = {2020}, +} +``` diff --git a/tasks/ssl/mocov2/configs/mocov2_resnet50_lp_in1k_1n8c.yaml b/tasks/ssl/mocov2/configs/mocov2_resnet50_lp_in1k_1n8c.yaml index 33f759f3..b60e5853 100644 --- a/tasks/ssl/mocov2/configs/mocov2_resnet50_lp_in1k_1n8c.yaml +++ b/tasks/ssl/mocov2/configs/mocov2_resnet50_lp_in1k_1n8c.yaml @@ -4,7 +4,7 @@ Global: train_loop: ClassificationTrainingEpochLoop validate_loop: ClassificationEvaluationLoop checkpoint: null - pretrained_model: ./output/mocov2_resnet50_pretrain/epoch_96_base_encoder + pretrained_model: ./path/to/pretrain output_dir: ./output/ device: gpu save_interval: 1 @@ -66,14 +66,15 @@ DataLoader: - Normalize: mean: [0.5, 0.5, 0.5] std: [0.5, 0.5, 0.5] + sampler: name: DistributedBatchSampler batch_size: 32 drop_last: True shuffle: True loader: - num_workers: 1 - use_shared_memory: False + num_workers: 8 + use_shared_memory: True Eval: dataset: @@ -98,8 +99,8 @@ DataLoader: shuffle: False loader: - num_workers: 1 - use_shared_memory: False + num_workers: 8 + use_shared_memory: True Metric: Train: diff --git a/tasks/ssl/mocov2/configs/mocov2_resnet50_pt_in1k_1n8c.yaml b/tasks/ssl/mocov2/configs/mocov2_resnet50_pt_in1k_1n8c.yaml index f97ce0c9..3e599005 100644 --- a/tasks/ssl/mocov2/configs/mocov2_resnet50_pt_in1k_1n8c.yaml +++ b/tasks/ssl/mocov2/configs/mocov2_resnet50_pt_in1k_1n8c.yaml @@ -94,4 +94,4 @@ DataLoader: shuffle: True loader: num_workers: 8 - use_shared_memory: False + use_shared_memory: True From 2d303a222263de1d3349863b4276ba8707a8c6f9 Mon Sep 17 00:00:00 2001 From: wangguo02 <939117440@qq.com> Date: Tue, 16 May 2023 02:27:16 +0000 Subject: [PATCH 05/18] modify README --- tasks/ssl/mocov2/README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tasks/ssl/mocov2/README.md b/tasks/ssl/mocov2/README.md index 45614170..42802d21 100644 --- a/tasks/ssl/mocov2/README.md +++ b/tasks/ssl/mocov2/README.md @@ -9,14 +9,14 @@ This is a PaddlePaddle implementation of the MoCo requires `PaddlePaddle >= 2.4`. ```shell -# git clone https://github.com/PaddlePaddle/PASSL.git -# cd /path/to/PASSL +git clone https://github.com/PaddlePaddle/PASSL.git +cd /path/to/PASSL ``` All commands are executed in the `PASSL` root directory. ```shell -# python setup.py install +python setup.py install ``` ## Data Preparation @@ -44,7 +44,7 @@ To do unsupervised pre-training of a ResNet-50 model on ImageNet in an 8-gpu mac ### MoCo V2 (Single Node with 8 GPUs) ```shell -# sh pretrain.sh +sh pretrain.sh ``` The differences between MoCo v2 and MoCo v1 are as follows: @@ -60,7 +60,7 @@ When the unsupervised pre-training is complete, or directly download the provide #### Linear Classification Training (Single Node with 8 GPUs) ```shell -# sh linearprobe.sh +sh linearprobe.sh ``` ### MoCo v2 From 3d5a1dd1546e40981d59f37770d590b875792069 Mon Sep 17 00:00:00 2001 From: wangguo02 <939117440@qq.com> Date: Tue, 16 May 2023 02:28:54 +0000 Subject: [PATCH 06/18] modify README --- tasks/ssl/mocov2/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/ssl/mocov2/README.md b/tasks/ssl/mocov2/README.md index 42802d21..8b918a1c 100644 --- a/tasks/ssl/mocov2/README.md +++ b/tasks/ssl/mocov2/README.md @@ -56,6 +56,7 @@ The differences between MoCo v2 and MoCo v1 are as follows: ## Linear Classification When the unsupervised pre-training is complete, or directly download the provided pre-training checkpoint, you can use the following script to train a supervised linear classifier. +### MoCo v2 #### Linear Classification Training (Single Node with 8 GPUs) @@ -63,7 +64,6 @@ When the unsupervised pre-training is complete, or directly download the provide sh linearprobe.sh ``` -### MoCo v2 #### [Optional] Download checkpoint & Modify yaml configure ```shell From 179eb20ab2308870c3673e207235eabe3f1daa60 Mon Sep 17 00:00:00 2001 From: wangguo02 <939117440@qq.com> Date: Tue, 16 May 2023 02:33:45 +0000 Subject: [PATCH 07/18] modify README --- tasks/ssl/mocov2/README.md | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/tasks/ssl/mocov2/README.md b/tasks/ssl/mocov2/README.md index 8b918a1c..77c50e7f 100644 --- a/tasks/ssl/mocov2/README.md +++ b/tasks/ssl/mocov2/README.md @@ -11,13 +11,11 @@ MoCo requires `PaddlePaddle >= 2.4`. ```shell git clone https://github.com/PaddlePaddle/PASSL.git cd /path/to/PASSL +python setup.py install ``` -All commands are executed in the `PASSL` root directory. +All commands are executed in the subdirectory of `tasks` directory. -```shell -python setup.py install -``` ## Data Preparation @@ -47,12 +45,6 @@ To do unsupervised pre-training of a ResNet-50 model on ImageNet in an 8-gpu mac sh pretrain.sh ``` -The differences between MoCo v2 and MoCo v1 are as follows: -* MoCo v2 has a projector -* Data augmentation -* Softmax temperature -* Learning rate scheduler - ## Linear Classification When the unsupervised pre-training is complete, or directly download the provided pre-training checkpoint, you can use the following script to train a supervised linear classifier. From 39a14ef7195586ea81bb14e748ae32153d15ee57 Mon Sep 17 00:00:00 2001 From: wangguo02 <939117440@qq.com> Date: Tue, 16 May 2023 02:34:58 +0000 Subject: [PATCH 08/18] delete dataset link --- tasks/ssl/mocov2/dataset | 1 - 1 file changed, 1 deletion(-) delete mode 120000 tasks/ssl/mocov2/dataset diff --git a/tasks/ssl/mocov2/dataset b/tasks/ssl/mocov2/dataset deleted file mode 120000 index 93a401d4..00000000 --- a/tasks/ssl/mocov2/dataset +++ /dev/null @@ -1 +0,0 @@ -/wangguo/imagenet/ \ No newline at end of file From 3d819e884b0850a33aec4ced5901a3786fa2a365 Mon Sep 17 00:00:00 2001 From: wangguo02 <939117440@qq.com> Date: Tue, 16 May 2023 02:58:04 +0000 Subject: [PATCH 09/18] modify README,yaml --- tasks/ssl/mocov2/README.md | 29 ++++++++++++++----- .../configs/mocov2_resnet50_lp_in1k_1n8c.yaml | 8 ++--- 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/tasks/ssl/mocov2/README.md b/tasks/ssl/mocov2/README.md index 77c50e7f..af5bd4b5 100644 --- a/tasks/ssl/mocov2/README.md +++ b/tasks/ssl/mocov2/README.md @@ -1,4 +1,4 @@ -# MoCo +# MoCov2 ![MoCo](https://user-images.githubusercontent.com/11435359/71603927-0ca98d00-2b14-11ea-9fd8-10d984a2de45.png) This is a PaddlePaddle implementation of the @@ -7,14 +7,14 @@ This is a PaddlePaddle implementation of the ## Install Preparation -MoCo requires `PaddlePaddle >= 2.4`. +MoCoV2 requires `PaddlePaddle >= 2.4`. ```shell git clone https://github.com/PaddlePaddle/PASSL.git cd /path/to/PASSL python setup.py install ``` -All commands are executed in the subdirectory of `tasks` directory. +All commands are executed in the `tasks/ssl/mocov2/` directory. ## Data Preparation @@ -24,9 +24,7 @@ The imagenet 1k dataset needs to be prepared first and will be organized into th ```shell ILSVRC2012 ├── train/ -├── xxx -├── val/ -└── xxx +└── val/ ``` Then configure the path. @@ -42,7 +40,13 @@ To do unsupervised pre-training of a ResNet-50 model on ImageNet in an 8-gpu mac ### MoCo V2 (Single Node with 8 GPUs) ```shell -sh pretrain.sh +export FLAGS_stop_check_timeout=3600 +python -m paddle.distributed.launch \ + --nnodes=$PADDLE_NNODES \ + --master=$PADDLE_MASTER \ + --devices=$CUDA_VISIBLE_DEVICES \ + passl-train \ + -c ./configs/mocov2_resnet50_pt_in1k_1n8c.yaml ``` ## Linear Classification @@ -53,7 +57,12 @@ When the unsupervised pre-training is complete, or directly download the provide #### Linear Classification Training (Single Node with 8 GPUs) ```shell -sh linearprobe.sh +python -m paddle.distributed.launch \ + --nnodes=$PADDLE_NNODES \ + --master=$PADDLE_MASTER \ + --devices=$CUDA_VISIBLE_DEVICES \ + passl-train \ + -c ./configs/mocov2_resnet50_lp_in1k_1n8c.yaml ``` @@ -72,7 +81,11 @@ python -m paddle.distributed.launch \ --devices=$CUDA_VISIBLE_DEVICES \ passl-train \ -c ./configs/mocov2_resnet50_lp_in1k_1n8c.yaml + -o Global.pretrained_model=./pretrained/mocov3/mocov3_vit_base_in1k_300ep_pretrained + ``` +## Other Configurations +We provide more directly runnable configurations, see [MoCoV2 Configurations](./configs/). ## Models diff --git a/tasks/ssl/mocov2/configs/mocov2_resnet50_lp_in1k_1n8c.yaml b/tasks/ssl/mocov2/configs/mocov2_resnet50_lp_in1k_1n8c.yaml index b60e5853..c39f6c2a 100644 --- a/tasks/ssl/mocov2/configs/mocov2_resnet50_lp_in1k_1n8c.yaml +++ b/tasks/ssl/mocov2/configs/mocov2_resnet50_lp_in1k_1n8c.yaml @@ -4,7 +4,7 @@ Global: train_loop: ClassificationTrainingEpochLoop validate_loop: ClassificationEvaluationLoop checkpoint: null - pretrained_model: ./path/to/pretrain + pretrained_model: ./output/mocov2_resnet50_pretrain/latest_base_encoder output_dir: ./output/ device: gpu save_interval: 1 @@ -56,7 +56,7 @@ DataLoader: Train: dataset: name: ImageFolder - root: ./dataset/train + root: ./dataset/ILSVRC2012/train transform: - RandomResizedCrop: size: 224 @@ -66,7 +66,7 @@ DataLoader: - Normalize: mean: [0.5, 0.5, 0.5] std: [0.5, 0.5, 0.5] - + sampler: name: DistributedBatchSampler batch_size: 32 @@ -79,7 +79,7 @@ DataLoader: Eval: dataset: name: ImageFolder - root: ./dataset/val + root: ./dataset/ILSVRC2012/val transform: - ResizeImage: resize_short: 256 From ed74f55dad82c71ffb8dfe3f77692e4a63f0feed Mon Sep 17 00:00:00 2001 From: wangguo02 <939117440@qq.com> Date: Tue, 16 May 2023 02:59:33 +0000 Subject: [PATCH 10/18] modify --- tasks/ssl/mocov2/README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/tasks/ssl/mocov2/README.md b/tasks/ssl/mocov2/README.md index af5bd4b5..a104ca53 100644 --- a/tasks/ssl/mocov2/README.md +++ b/tasks/ssl/mocov2/README.md @@ -40,7 +40,6 @@ To do unsupervised pre-training of a ResNet-50 model on ImageNet in an 8-gpu mac ### MoCo V2 (Single Node with 8 GPUs) ```shell -export FLAGS_stop_check_timeout=3600 python -m paddle.distributed.launch \ --nnodes=$PADDLE_NNODES \ --master=$PADDLE_MASTER \ From 177678cd6fc933e2dff16b025135356affc21525 Mon Sep 17 00:00:00 2001 From: wangguo02 <939117440@qq.com> Date: Thu, 25 May 2023 08:00:53 +0000 Subject: [PATCH 11/18] moco --- tests/CI/case.sh | 49 +++++++++++++++++++ .../mocov2/mocov2_resnet50_lp_in1k_1n8c_dp.sh | 31 ++++++++++++ .../mocov2/mocov2_resnet50_pt_in1k_1n8c_dp.sh | 31 ++++++++++++ 3 files changed, 111 insertions(+) create mode 100644 tests/CI/ssl/mocov2/mocov2_resnet50_lp_in1k_1n8c_dp.sh create mode 100644 tests/CI/ssl/mocov2/mocov2_resnet50_pt_in1k_1n8c_dp.sh diff --git a/tests/CI/case.sh b/tests/CI/case.sh index 7f428b97..dd9e67c9 100644 --- a/tests/CI/case.sh +++ b/tests/CI/case.sh @@ -40,6 +40,8 @@ function model_list(){ mocov3_vit_base_patch16_224_lp_in1k_1n8c_dp_fp16o1 simsiam_resnet50_pt_in1k_1n8c_dp_fp32 simsiam_resnet50_lp_in1k_1n8c_dp_fp32 + mocov2_resnet50_pt_in1k_1n8c_dp_fp32 + mocov2_resnet50_lp_in1k_1n8c_dp_fp32 } ############ case start ############ @@ -387,6 +389,53 @@ function simsiam_resnet50_lp_in1k_1n8c_dp_fp32() { echo "=========== $FUNCNAME run end ===========" } +function simsiam_resnet50_pt_in1k_1n8c_dp_fp32() { + echo "=========== $FUNCNAME run begin ===========" + rm -rf log + bash ./ssl/simsiam/simsiam_resnet50_pt_in1k_1n8c_dp_fp32.sh + + loss=`cat log/workerlog.0 | grep '50/2502' | awk -F 'loss: ' '{print $2}' | awk -F ',' '{print $1}'` + ips=`cat log/workerlog.0 | grep 'ips: ' | awk -F 'ips: ' '{print $2}' | awk -F ' images/sec,' '{print $1}'| awk 'NR>1 {print}' | awk '{a+=$1}END{print a/NR}'` + mem=`cat log/workerlog.0 | grep '50/2502' | awk -F 'max mem: ' '{print $2}' | awk -F ' GB,' '{print $1}'` + loss_base=-0.32798 + ips_base=1731.37 + mem_base=10.55 + check_result $FUNCNAME ${loss_base} ${loss} ${ips_base} ${ips} ${mem_base} ${mem} + echo "=========== $FUNCNAME run end ===========" +} + +###### MocoV2 ###### + +function mocov2_resnet50_lp_in1k_1n8c_dp_fp32() { + echo "=========== $FUNCNAME run begin ===========" + rm -rf log + bash ./ssl/mocov2/mocov2_resnet50_lp_in1k_1n8c.sh + + loss=`cat log/workerlog.0 | grep '50/313' | awk -F 'loss: ' '{print $2}' | awk -F ',' '{print $1}'` + ips=`cat log/workerlog.0 | grep 'ips: ' | awk -F 'ips: ' '{print $2}' | awk -F ' images/sec,' '{print $1}'| awk 'NR>1 {print}' | awk '{a+=$1}END{print a/NR}'` + mem=`cat log/workerlog.0 | grep '50/313' | awk -F 'max mem: ' '{print $2}' | awk -F ' GB,' '{print $1}'` + loss_base=6.89298 + ips_base=6285.21 + mem_base=5.38 + check_result $FUNCNAME ${loss_base} ${loss} ${ips_base} ${ips} ${mem_base} ${mem} + echo "=========== $FUNCNAME run end ===========" +} + +function mocov2_resnet50_pt_in1k_1n8c_dp_fp32() { + echo "=========== $FUNCNAME run begin ===========" + rm -rf log + bash ./ssl/mocov2/mocov2_resnet50_pt_in1k_1n8c.sh + + loss=`cat log/workerlog.0 | grep '50/2502' | awk -F 'loss: ' '{print $2}' | awk -F ',' '{print $1}'` + ips=`cat log/workerlog.0 | grep 'ips: ' | awk -F 'ips: ' '{print $2}' | awk -F ' images/sec,' '{print $1}'| awk 'NR>1 {print}' | awk '{a+=$1}END{print a/NR}'` + mem=`cat log/workerlog.0 | grep '50/2502' | awk -F 'max mem: ' '{print $2}' | awk -F ' GB,' '{print $1}'` + loss_base=-0.32798 + ips_base=1731.37 + mem_base=10.55 + check_result $FUNCNAME ${loss_base} ${loss} ${ips_base} ${ips} ${mem_base} ${mem} + echo "=========== $FUNCNAME run end ===========" +} + function check_result() { if [ $? -ne 0 ];then echo -e "\033 $1 model runs failed! \033" | tee -a $log_path/result.log diff --git a/tests/CI/ssl/mocov2/mocov2_resnet50_lp_in1k_1n8c_dp.sh b/tests/CI/ssl/mocov2/mocov2_resnet50_lp_in1k_1n8c_dp.sh new file mode 100644 index 00000000..04e3c83b --- /dev/null +++ b/tests/CI/ssl/mocov2/mocov2_resnet50_lp_in1k_1n8c_dp.sh @@ -0,0 +1,31 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# unset PADDLE_TRAINER_ENDPOINTS +# export PADDLE_NNODES=1 +# export PADDLE_MASTER="xxx.xxx.xxx.xxx:12538" +# export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 +export FLAGS_stop_check_timeout=3600 + +python -m paddle.distributed.launch \ + --nnodes=$PADDLE_NNODES \ + --master=$PADDLE_MASTER \ + --devices=$CUDA_VISIBLE_DEVICES \ + passl-train \ + -c ../../tasks/ssl/mocov2/configs/mocov2_resnet50_lp_in1k_1n8c.yaml + -o Global.print_batch_step=1 \ + -o Global.max_train_step=50 \ + -o Global.flags.FLAGS_cudnn_exhaustive_search=0 \ + -o Global.flags.FLAGS_cudnn_deterministic=1 \ + -o DataLoader.Train.sampler.batch_size=64 diff --git a/tests/CI/ssl/mocov2/mocov2_resnet50_pt_in1k_1n8c_dp.sh b/tests/CI/ssl/mocov2/mocov2_resnet50_pt_in1k_1n8c_dp.sh new file mode 100644 index 00000000..381adf0c --- /dev/null +++ b/tests/CI/ssl/mocov2/mocov2_resnet50_pt_in1k_1n8c_dp.sh @@ -0,0 +1,31 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# unset PADDLE_TRAINER_ENDPOINTS +# export PADDLE_NNODES=1 +# #export PADDLE_MASTER="xxx.xxx.xxx.xxx:12538" +# export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 +export FLAGS_stop_check_timeout=3600 + +python -m paddle.distributed.launch \ + --nnodes=$PADDLE_NNODES \ + --master=$PADDLE_MASTER \ + --devices=$CUDA_VISIBLE_DEVICES \ + passl-train \ + -c ../../tasks/ssl/mocov2/configs/mocov2_resnet50_pt_in1k_1n8c.yaml + -o Global.print_batch_step=1 \ + -o Global.max_train_step=50 \ + -o Global.flags.FLAGS_cudnn_exhaustive_search=0 \ + -o Global.flags.FLAGS_cudnn_deterministic=1 \ + -o DataLoader.Train.sampler.batch_size=64 \ No newline at end of file From 130b4b92a898279df976c4d80024fe34677c24c0 Mon Sep 17 00:00:00 2001 From: wangguo02 <939117440@qq.com> Date: Fri, 26 May 2023 08:38:55 +0000 Subject: [PATCH 12/18] add CI --- tests/CI/case.sh | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/CI/case.sh b/tests/CI/case.sh index dd9e67c9..e082ce8b 100644 --- a/tests/CI/case.sh +++ b/tests/CI/case.sh @@ -411,12 +411,12 @@ function mocov2_resnet50_lp_in1k_1n8c_dp_fp32() { rm -rf log bash ./ssl/mocov2/mocov2_resnet50_lp_in1k_1n8c.sh - loss=`cat log/workerlog.0 | grep '50/313' | awk -F 'loss: ' '{print $2}' | awk -F ',' '{print $1}'` + loss=`cat log/workerlog.0 | grep '50/5004' | awk -F 'loss: ' '{print $2}' | awk -F ',' '{print $1}'` ips=`cat log/workerlog.0 | grep 'ips: ' | awk -F 'ips: ' '{print $2}' | awk -F ' images/sec,' '{print $1}'| awk 'NR>1 {print}' | awk '{a+=$1}END{print a/NR}'` - mem=`cat log/workerlog.0 | grep '50/313' | awk -F 'max mem: ' '{print $2}' | awk -F ' GB,' '{print $1}'` - loss_base=6.89298 - ips_base=6285.21 - mem_base=5.38 + mem=`cat log/workerlog.0 | grep '50/5004' | awk -F 'max mem: ' '{print $2}' | awk -F ' GB,' '{print $1}'` + loss_base=4.69785 + ips_base=6670.45070 + mem_base=0.81 check_result $FUNCNAME ${loss_base} ${loss} ${ips_base} ${ips} ${mem_base} ${mem} echo "=========== $FUNCNAME run end ===========" } @@ -426,12 +426,12 @@ function mocov2_resnet50_pt_in1k_1n8c_dp_fp32() { rm -rf log bash ./ssl/mocov2/mocov2_resnet50_pt_in1k_1n8c.sh - loss=`cat log/workerlog.0 | grep '50/2502' | awk -F 'loss: ' '{print $2}' | awk -F ',' '{print $1}'` + loss=`cat log/workerlog.0 | grep '50/5004' | awk -F 'loss: ' '{print $2}' | awk -F ',' '{print $1}'` ips=`cat log/workerlog.0 | grep 'ips: ' | awk -F 'ips: ' '{print $2}' | awk -F ' images/sec,' '{print $1}'| awk 'NR>1 {print}' | awk '{a+=$1}END{print a/NR}'` - mem=`cat log/workerlog.0 | grep '50/2502' | awk -F 'max mem: ' '{print $2}' | awk -F ' GB,' '{print $1}'` - loss_base=-0.32798 - ips_base=1731.37 - mem_base=10.55 + mem=`cat log/workerlog.0 | grep '50/5004' | awk -F 'max mem: ' '{print $2}' | awk -F ' GB,' '{print $1}'` + loss_base=9.30424 + ips_base=2369.80220 + mem_base=3.38 check_result $FUNCNAME ${loss_base} ${loss} ${ips_base} ${ips} ${mem_base} ${mem} echo "=========== $FUNCNAME run end ===========" } From c88f2891ce3a7b26072c2bf156d6e9695bab784e Mon Sep 17 00:00:00 2001 From: wangguo02 <939117440@qq.com> Date: Fri, 26 May 2023 09:32:36 +0000 Subject: [PATCH 13/18] add CI --- tasks/ssl/mocov2/configs/mocov2_resnet50_pt_in1k_1n8c.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/ssl/mocov2/configs/mocov2_resnet50_pt_in1k_1n8c.yaml b/tasks/ssl/mocov2/configs/mocov2_resnet50_pt_in1k_1n8c.yaml index 3e599005..27befd50 100644 --- a/tasks/ssl/mocov2/configs/mocov2_resnet50_pt_in1k_1n8c.yaml +++ b/tasks/ssl/mocov2/configs/mocov2_resnet50_pt_in1k_1n8c.yaml @@ -40,7 +40,7 @@ DataLoader: Train: dataset: name: ImageFolder - root: ./dataset/train + root: ./dataset/ILSVRC2012/train transform: - TwoViewsTransform: base_transform1: From a158dc9042e5f064b0783248449611ed8536b89f Mon Sep 17 00:00:00 2001 From: MangoFF <939117440@qq.com> Date: Fri, 26 May 2023 17:44:11 +0800 Subject: [PATCH 14/18] Update case.sh --- tests/CI/case.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/CI/case.sh b/tests/CI/case.sh index e082ce8b..d9eb52ec 100644 --- a/tests/CI/case.sh +++ b/tests/CI/case.sh @@ -429,8 +429,8 @@ function mocov2_resnet50_pt_in1k_1n8c_dp_fp32() { loss=`cat log/workerlog.0 | grep '50/5004' | awk -F 'loss: ' '{print $2}' | awk -F ',' '{print $1}'` ips=`cat log/workerlog.0 | grep 'ips: ' | awk -F 'ips: ' '{print $2}' | awk -F ' images/sec,' '{print $1}'| awk 'NR>1 {print}' | awk '{a+=$1}END{print a/NR}'` mem=`cat log/workerlog.0 | grep '50/5004' | awk -F 'max mem: ' '{print $2}' | awk -F ' GB,' '{print $1}'` - loss_base=9.30424 - ips_base=2369.80220 + loss_base=9.33314 + ips_base=2076.1308 mem_base=3.38 check_result $FUNCNAME ${loss_base} ${loss} ${ips_base} ${ips} ${mem_base} ${mem} echo "=========== $FUNCNAME run end ===========" From 9bf4c23eb1174fab401dde9179794e2a782ba136 Mon Sep 17 00:00:00 2001 From: wangguo02 <939117440@qq.com> Date: Fri, 26 May 2023 10:39:21 +0000 Subject: [PATCH 15/18] add model --- tests/CI/ssl/mocov2/mocov2_resnet50_lp_in1k_1n8c_dp.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/CI/ssl/mocov2/mocov2_resnet50_lp_in1k_1n8c_dp.sh b/tests/CI/ssl/mocov2/mocov2_resnet50_lp_in1k_1n8c_dp.sh index 04e3c83b..6fa11b76 100644 --- a/tests/CI/ssl/mocov2/mocov2_resnet50_lp_in1k_1n8c_dp.sh +++ b/tests/CI/ssl/mocov2/mocov2_resnet50_lp_in1k_1n8c_dp.sh @@ -29,3 +29,5 @@ python -m paddle.distributed.launch \ -o Global.flags.FLAGS_cudnn_exhaustive_search=0 \ -o Global.flags.FLAGS_cudnn_deterministic=1 \ -o DataLoader.Train.sampler.batch_size=64 + -o Global.pretrained_model=./pretrained/mocov2/mocov2_latest_base_encoder + From 9f88fa44de4f8f080e48e1fb8b072fa69bbde54a Mon Sep 17 00:00:00 2001 From: wangguo02 <939117440@qq.com> Date: Fri, 26 May 2023 10:51:12 +0000 Subject: [PATCH 16/18] add model --- tests/CI/ssl/mocov2/mocov2_resnet50_lp_in1k_1n8c_dp.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/CI/ssl/mocov2/mocov2_resnet50_lp_in1k_1n8c_dp.sh b/tests/CI/ssl/mocov2/mocov2_resnet50_lp_in1k_1n8c_dp.sh index 6fa11b76..564e98d5 100644 --- a/tests/CI/ssl/mocov2/mocov2_resnet50_lp_in1k_1n8c_dp.sh +++ b/tests/CI/ssl/mocov2/mocov2_resnet50_lp_in1k_1n8c_dp.sh @@ -28,6 +28,6 @@ python -m paddle.distributed.launch \ -o Global.max_train_step=50 \ -o Global.flags.FLAGS_cudnn_exhaustive_search=0 \ -o Global.flags.FLAGS_cudnn_deterministic=1 \ - -o DataLoader.Train.sampler.batch_size=64 + -o DataLoader.Train.sampler.batch_size=64 \ -o Global.pretrained_model=./pretrained/mocov2/mocov2_latest_base_encoder From b49debb7d51a732ee640440080e6e01307edcf0d Mon Sep 17 00:00:00 2001 From: wangguo02 <939117440@qq.com> Date: Fri, 26 May 2023 10:54:34 +0000 Subject: [PATCH 17/18] add model --- tests/CI/ssl/mocov2/mocov2_resnet50_lp_in1k_1n8c_dp.sh | 2 +- tests/CI/ssl/mocov2/mocov2_resnet50_pt_in1k_1n8c_dp.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/CI/ssl/mocov2/mocov2_resnet50_lp_in1k_1n8c_dp.sh b/tests/CI/ssl/mocov2/mocov2_resnet50_lp_in1k_1n8c_dp.sh index 564e98d5..f17b52e4 100644 --- a/tests/CI/ssl/mocov2/mocov2_resnet50_lp_in1k_1n8c_dp.sh +++ b/tests/CI/ssl/mocov2/mocov2_resnet50_lp_in1k_1n8c_dp.sh @@ -23,7 +23,7 @@ python -m paddle.distributed.launch \ --master=$PADDLE_MASTER \ --devices=$CUDA_VISIBLE_DEVICES \ passl-train \ - -c ../../tasks/ssl/mocov2/configs/mocov2_resnet50_lp_in1k_1n8c.yaml + -c ../../tasks/ssl/mocov2/configs/mocov2_resnet50_lp_in1k_1n8c.yaml \ -o Global.print_batch_step=1 \ -o Global.max_train_step=50 \ -o Global.flags.FLAGS_cudnn_exhaustive_search=0 \ diff --git a/tests/CI/ssl/mocov2/mocov2_resnet50_pt_in1k_1n8c_dp.sh b/tests/CI/ssl/mocov2/mocov2_resnet50_pt_in1k_1n8c_dp.sh index 381adf0c..1e7d0be0 100644 --- a/tests/CI/ssl/mocov2/mocov2_resnet50_pt_in1k_1n8c_dp.sh +++ b/tests/CI/ssl/mocov2/mocov2_resnet50_pt_in1k_1n8c_dp.sh @@ -23,7 +23,7 @@ python -m paddle.distributed.launch \ --master=$PADDLE_MASTER \ --devices=$CUDA_VISIBLE_DEVICES \ passl-train \ - -c ../../tasks/ssl/mocov2/configs/mocov2_resnet50_pt_in1k_1n8c.yaml + -c ../../tasks/ssl/mocov2/configs/mocov2_resnet50_pt_in1k_1n8c.yaml \ -o Global.print_batch_step=1 \ -o Global.max_train_step=50 \ -o Global.flags.FLAGS_cudnn_exhaustive_search=0 \ From 594c2bff69433163e0f7ec937c9e45c8d0765ccd Mon Sep 17 00:00:00 2001 From: wangguo02 <939117440@qq.com> Date: Fri, 26 May 2023 11:02:10 +0000 Subject: [PATCH 18/18] modify ci --- tests/CI/case.sh | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/CI/case.sh b/tests/CI/case.sh index d9eb52ec..fc2e27dd 100644 --- a/tests/CI/case.sh +++ b/tests/CI/case.sh @@ -411,12 +411,12 @@ function mocov2_resnet50_lp_in1k_1n8c_dp_fp32() { rm -rf log bash ./ssl/mocov2/mocov2_resnet50_lp_in1k_1n8c.sh - loss=`cat log/workerlog.0 | grep '50/5004' | awk -F 'loss: ' '{print $2}' | awk -F ',' '{print $1}'` + loss=`cat log/workerlog.0 | grep '49/2502' | awk -F 'loss: ' '{print $2}' | awk -F ',' '{print $1}'` ips=`cat log/workerlog.0 | grep 'ips: ' | awk -F 'ips: ' '{print $2}' | awk -F ' images/sec,' '{print $1}'| awk 'NR>1 {print}' | awk '{a+=$1}END{print a/NR}'` - mem=`cat log/workerlog.0 | grep '50/5004' | awk -F 'max mem: ' '{print $2}' | awk -F ' GB,' '{print $1}'` - loss_base=4.69785 - ips_base=6670.45070 - mem_base=0.81 + mem=`cat log/workerlog.0 | grep '49/2502' | awk -F 'max mem: ' '{print $2}' | awk -F ' GB,' '{print $1}'` + loss_base=4.12551 + ips_base=6449.01604 + mem_base=0.77 check_result $FUNCNAME ${loss_base} ${loss} ${ips_base} ${ips} ${mem_base} ${mem} echo "=========== $FUNCNAME run end ===========" } @@ -426,12 +426,12 @@ function mocov2_resnet50_pt_in1k_1n8c_dp_fp32() { rm -rf log bash ./ssl/mocov2/mocov2_resnet50_pt_in1k_1n8c.sh - loss=`cat log/workerlog.0 | grep '50/5004' | awk -F 'loss: ' '{print $2}' | awk -F ',' '{print $1}'` + loss=`cat log/workerlog.0 | grep '49/2502' | awk -F 'loss: ' '{print $2}' | awk -F ',' '{print $1}'` ips=`cat log/workerlog.0 | grep 'ips: ' | awk -F 'ips: ' '{print $2}' | awk -F ' images/sec,' '{print $1}'| awk 'NR>1 {print}' | awk '{a+=$1}END{print a/NR}'` - mem=`cat log/workerlog.0 | grep '50/5004' | awk -F 'max mem: ' '{print $2}' | awk -F ' GB,' '{print $1}'` - loss_base=9.33314 - ips_base=2076.1308 - mem_base=3.38 + mem=`cat log/workerlog.0 | grep '49/2502' | awk -F 'max mem: ' '{print $2}' | awk -F ' GB,' '{print $1}'` + loss_base=10.05231 + ips_base=2045.23616 + mem_base=6.17 check_result $FUNCNAME ${loss_base} ${loss} ${ips_base} ${ips} ${mem_base} ${mem} echo "=========== $FUNCNAME run end ===========" }