这篇文章是yolov5_reid的扩展。针对reid网络定义代码的详解,有助于大家的理解,同时也方便网络方面的改进。
数据集的预处理代码可以参考我另一篇文章:Reid数据集处理
本篇文章Reid的网络将以Resnet50为例。因此需要大家对Resnet代码有一定的基础(不了解的可以参考我Resnet代码详解)。
在train.py代码中,模型的实例化是调用的build_model函数,该函数传入两个参数,args是相关配置参数,num_classes:
# prepare model 模型初始化
model = build_model(args, num_classes)
因此我们来看一下该函数,该函数的位置在modeling/__init__.py下。
在Reid的训练过程中,以数据集为Markt1501,num_classes为751,检测的时候才为1501。
然后这里又调用了Baseline函数,传入参数:
num_classes:分类情况,
cfg.LAST_STRIDE:layer4的步长,
cfg.weights:预训练模型,
cfg.neck:bnneck or no。
cfg.test_neck:after or before,
model_name:这里支持Resnet系列,pretrain_choice:imagent
def build_model(cfg, num_classes):
# cfg.MODEL.NECK, cfg.TEST.NECK_FEAT)
# num_classes:751
# LAST_STRIDE:1
# weights:pretrained weight
# neck:bnneck
# tesk_neck:after
# model_name: model name, resnet50, se_resnet, et al.
# pretrain_choice: imagenet
model = Baseline(num_classes, cfg.LAST_STRIDE, cfg.weights, cfg.neck, cfg.test_neck, cfg.model_name, cfg.pretrain_choice)
return model
接下来看Basline这个类。
我这里仅已Resnet50为例。
class Baseline(nn.Module):
in_planes = 2048
def __init__(self, num_classes, last_stride, model_path, neck, neck_feat, model_name, pretrain_choice):
elif model_name == 'resnet50':
self.base = ResNet(last_stride=last_stride,
block=Bottleneck,
layers=[3, 4, 6, 3])
ResNet代码如下,由于我们设置的last_stride为1,因此layer4的尺寸并不会减半。同时在这里与原来的Resnet相比没有池化和FC层。
class ResNet(nn.Module):
def __init__(self, last_stride=2, block=Bottleneck, layers=[3, 4, 6, 3]): # layers中是res_block重复的次数
self.inplanes = 64
super().__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = nn.BatchNorm2d(64)
# self.relu = nn.ReLU(inplace=True) # add missed relu
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0]) # 输出通道为256,尺寸不变
self.layer2 = self._make_layer(block, 128, layers[1], stride=2) # 输出通道512, 尺寸减半
self.layer3 = self._make_layer(block, 256, layers[2], stride=2) # 输出通道1024, 尺寸减半
self.layer4 = self._make_layer( # 尺寸不变
block, 512, layers[3], stride=last_stride) # 输出通道2048
# 和原Resnet相比,去除了平均池化和全连接层。
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
# x = self.relu(x) # add missed relu
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
return x
再返回到basline代码中看。当我们选择的预训练权重为imagenet时,加载预权重。
if pretrain_choice == 'imagenet':
# ---------------------------------
model = self.base # model
model_dict = model.state_dict() # get model defaults weight
pretrained_dict = torch.load(model_path) # pretrained weight load
pretrained_dict = {k: v for k, v in pretrained_dict.items() if
k in model_dict.keys() == pretrained_dict.keys()}
model_dict.update(pretrained_dict) # update weight
model.load_state_dict(model_dict) # load pretrained into model
print('Loading pretrained ImageNet model......')
接下来是定义自适应平均池化和全连接层。这里以neck为’no’为例。此时的classifier输出通道为751【表示分类情况】。
self.gap = nn.AdaptiveAvgPool2d(1) # add avgpool layer
self.num_classes = num_classes
self.neck = neck
self.neck_feat = neck_feat
if self.neck == 'no':
self.classifier = nn.Linear(self.in_planes, self.num_classes) # 池化后的通道数为2048,FC的输出为类别数量
接下来看forward部分。
还是以neck为’no’为例。
当为训练阶段的时候,也就是traning=True的时候,此时会返回两个值,1.cls_score是分类情况【这里是没有经过softmax的,因此还是hard label】,2.第二个值是全局特征global_feat【这个是经过平均池化后的特征层】。
如果是测试阶段,当neck_feat=’after’的时候,之间返回特征层(注意,没有分类hard label)。
def forward(self, x):
global_feat = self.gap(self.base(x)) # (b, 2048, 1, 1)
global_feat = global_feat.view(global_feat.shape[0], -1) # flatten to (bs, 2048)
if self.neck == 'no':
feat = global_feat
elif self.neck == 'bnneck':
feat = self.bottleneck(global_feat) # normalize for angular softmax
if self.training:
cls_score = self.classifier(feat)
# cls_score 分类情况,hard label; global_feat:特征
return cls_score, global_feat # global feature for triplet loss
else:
if self.neck_feat == 'after':
# print("Test with feature after BN")
return feat
else:
# print("Test with feature before BN")
return global_feat
完整代码:
class Baseline(nn.Module):
in_planes = 2048
def __init__(self, num_classes, last_stride, model_path, neck, neck_feat, model_name, pretrain_choice):
super(Baseline, self).__init__()
if model_name == 'resnet18':
self.in_planes = 512
self.base = ResNet(last_stride=last_stride,
block=BasicBlock,
layers=[2, 2, 2, 2])
elif model_name == 'resnet34':
self.in_planes = 512
self.base = ResNet(last_stride=last_stride,
block=BasicBlock,
layers=[3, 4, 6, 3])
elif model_name == 'resnet50':
self.base = ResNet(last_stride=last_stride,
block=Bottleneck,
layers=[3, 4, 6, 3])
elif model_name == 'resnet101':
self.base = ResNet(last_stride=last_stride,
block=Bottleneck,
layers=[3, 4, 23, 3])
elif model_name == 'resnet152':
self.base = ResNet(last_stride=last_stride,
block=Bottleneck,
layers=[3, 8, 36, 3])
elif model_name == 'se_resnet50':
self.base = SENet(block=SEResNetBottleneck,
layers=[3, 4, 6, 3],
groups=1,
reduction=16,
dropout_p=None,
inplanes=64,
input_3x3=False,
downsample_kernel_size=1,
downsample_padding=0,
last_stride=last_stride)
elif model_name == 'se_resnet101':
self.base = SENet(block=SEResNetBottleneck,
layers=[3, 4, 23, 3],
groups=1,
reduction=16,
dropout_p=None,
inplanes=64,
input_3x3=False,
downsample_kernel_size=1,
downsample_padding=0,
last_stride=last_stride)
elif model_name == 'se_resnet152':
self.base = SENet(block=SEResNetBottleneck,
layers=[3, 8, 36, 3],
groups=1,
reduction=16,
dropout_p=None,
inplanes=64,
input_3x3=False,
downsample_kernel_size=1,
downsample_padding=0,
last_stride=last_stride)
elif model_name == 'se_resnext50':
self.base = SENet(block=SEResNeXtBottleneck,
layers=[3, 4, 6, 3],
groups=32,
reduction=16,
dropout_p=None,
inplanes=64,
input_3x3=False,
downsample_kernel_size=1,
downsample_padding=0,
last_stride=last_stride)
elif model_name == 'se_resnext101':
self.base = SENet(block=SEResNeXtBottleneck,
layers=[3, 4, 23, 3],
groups=32,
reduction=16,
dropout_p=None,
inplanes=64,
input_3x3=False,
downsample_kernel_size=1,
downsample_padding=0,
last_stride=last_stride)
elif model_name == 'senet154':
self.base = SENet(block=SEBottleneck,
layers=[3, 8, 36, 3],
groups=64,
reduction=16,
dropout_p=0.2,
last_stride=last_stride)
elif model_name == 'resnet50_ibn_a':
self.base = resnet50_ibn_a(last_stride)
if pretrain_choice == 'imagenet':
# ---------------------------------
model = self.base # model
model_dict = model.state_dict() # get model defaults weight
pretrained_dict = torch.load(model_path) # pretrained weight load
pretrained_dict = {k: v for k, v in pretrained_dict.items() if
k in model_dict.keys() == pretrained_dict.keys()}
model_dict.update(pretrained_dict) # update weight
model.load_state_dict(model_dict) # load pretrained into model
print('Loading pretrained ImageNet model......')
self.gap = nn.AdaptiveAvgPool2d(1) # add avgpool layer
self.num_classes = num_classes
self.neck = neck
self.neck_feat = neck_feat
if self.neck == 'no':
self.classifier = nn.Linear(self.in_planes, self.num_classes) # 池化后的通道数为2048,FC的输出为类别数量
# self.classifier = nn.Linear(self.in_planes, self.num_classes, bias=False) # new add by luo
# self.classifier.apply(weights_init_classifier) # new add by luo
elif self.neck == 'bnneck':
self.bottleneck = nn.BatchNorm1d(self.in_planes)
self.bottleneck.bias.requires_grad_(False) # no shift
self.classifier = nn.Linear(self.in_planes, self.num_classes, bias=False)
self.bottleneck.apply(weights_init_kaiming)
self.classifier.apply(weights_init_classifier)
def forward(self, x):
global_feat = self.gap(self.base(x)) # (b, 2048, 1, 1)
global_feat = global_feat.view(global_feat.shape[0], -1) # flatten to (bs, 2048)
if self.neck == 'no':
feat = global_feat
elif self.neck == 'bnneck':
feat = self.bottleneck(global_feat) # normalize for angular softmax
if self.training:
cls_score = self.classifier(feat)
# cls_score 分类情况,hard label; global_feat:特征
return cls_score, global_feat # global feature for triplet loss
else:
if self.neck_feat == 'after':
# print("Test with feature after BN")
return feat
else:
# print("Test with feature before BN")
return global_feat
def load_param(self, trained_path):
param_dict = torch.load(trained_path)
for i in param_dict:
if 'classifier' in i:
continue
self.state_dict()[i].copy_(param_dict[i])
此时我可以随便放一个输入经过Model,看一下输出结果。
torch.Size([1, 3, 256, 128]) # 输入shape
cls_score shape: torch.Size([1, 751]) # 分类输出shape
global_feat shape is: torch.Size([1, 2048]) # 特征层shape
服务器托管,北京服务器托管,服务器租用 http://www.fwqtg.net