常用的数据集WebVision介绍和pytorch下的简单使用

cnblogs 2024-08-03 15:13:00 阅读 81

WebVision数据集介绍

官方下载地址

WebVision数据集常用于开集/闭集噪声学习、长尾噪声学习方法在真实数据集上的评估。根据[2]的统计,干净样本占70%,OOD噪声占25%,ID噪声占5%。

由于数据集本身较大,论文中使用的都是其中很小的一部分,进入下载页面,选择《WebVision Dataset 1.0》《Resized Images (small version)》:

  • 一般需要数据集的训练集《Google Images Resized (16 GB) 》
  • 验证集《Validation Images Resized (834 MB)》
  • 这两个集合的标签《Metadata》下的《Training & Validation Labels (183 MB)》。

注意,由于测试集不提供标签,因此评估论文方法性能时不去使用。另外大部分实验仅使用了google子集,因此这里也只使用google子集。

整理下下载的数据集,放到目录 ~/data/webvision1.0 下,目录结构如下:

<code>├─google

│ ├─q0001

│ ├─q0002

│ ├─...

│ ├─q1631

│ └─q1632

├─info

└─val_images_256

info 目录下重要的几个txt:

  1. queries_google.txt:1632行,与google目录下的文件夹相对应,每行是一个查询词。
  2. synsets.txt:1000行表示1000个类,标签 \(i(0\le i\le 999)\) 的具体含义在第 \(i+1\)行。
  3. train_filelist_google.txt:每行表示一个图片的路径和标签,路径是相对于google目录的。
  4. val_filelist.txt:同上,但是是相对于val_images_256目录的。

在Pytorch中使用WebVision数据集

大多数论文使用mini-Webvision,即仅使用前50个类。

数据集 & DataLoader

# webvision.py

from torch.utils.data import Dataset, DataLoader

import torchvision.transforms as transforms

from PIL import Image

import os

class Webvision(Dataset):

def __init__(self, root, train=True, transform=None, num_classes=50):

root = os.path.expanduser(root)

self.root = root

self.transform = transform

self.train = train

if train:

with open(os.path.join(root, 'info/train_filelist_google.txt')) as f:

lines = f.readlines()

data, targets = [], []

for line in lines:

img, target = line.split()

target = int(target)

if target < num_classes:

data.append(img)

targets.append(target)

else:

with open(os.path.join(root, 'info/val_filelist.txt')) as f:

lines = f.readlines()

data, targets = [], []

for line in lines:

img, target = line.split()

target = int(target)

if target < num_classes:

data.append(img)

targets.append(target)

assert len(data) == len(targets)

self.data = data

self.targets = targets

def __len__(self):

return len(self.targets)

def __getitem__(self, index):

img_path = self.data[index]

target = self.targets[index]

if self.train:

image = Image.open(os.path.join(self.root, img_path)).convert('RGB')

else:

image = Image.open(os.path.join(self.root, 'val_images_256', img_path)).convert('RGB')

image = self.transform(image)

return image, target

class WebvisionDataloader:

def __init__(self, batch_size=128, num_classes=50, num_workers=8, root='~/data/webvision1.0'):code>

self.batch_size = batch_size

self.num_classes = num_classes

self.num_workers = num_workers

self.root = root

self.transform_train = transforms.Compose([

transforms.Resize(320),

transforms.RandomResizedCrop(299),

transforms.RandomHorizontalFlip(),

transforms.ToTensor(),

transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),

])

self.transform_test = transforms.Compose([

transforms.Resize(320),

transforms.CenterCrop(299),

transforms.ToTensor(),

transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),

])

def train(self):

dataset = Webvision(root=self.root, train=True, transform=self.transform_train,

num_classes=self.num_classes)

dataloader = DataLoader(

dataset=dataset, batch_size=self.batch_size,

shuffle=True, num_workers=self.num_workers, pin_memory=True)

return dataloader

def test(self):

dataset = Webvision(root=self.root, train=False, transform=self.transform_test,

num_classes=self.num_classes)

test_loader = DataLoader(

dataset=dataset, batch_size=self.batch_size,

shuffle=False, num_workers=self.num_workers, pin_memory=True)

return test_loader

网络结构

大部分论文使用InceptionV4[3]作为该数据集的网络。

# InceptionResNetV2.py

import torch

from torch import nn

class BasicConv2d(nn.Module):

def __init__(self, in_planes, out_planes, kernel_size, stride, padding: int | tuple[int, int] = 0):

super(BasicConv2d, self).__init__()

self.conv = nn.Conv2d(in_planes, out_planes,

kernel_size=kernel_size, stride=stride,

padding=padding, bias=False) # verify bias false

self.bn = nn.BatchNorm2d(out_planes,

eps=0.001, # value found in tensorflow

momentum=0.1, # default pytorch value

affine=True)

self.relu = nn.ReLU(inplace=False)

def forward(self, x):

x = self.conv(x)

x = self.bn(x)

x = self.relu(x)

return x

class Mixed_5b(nn.Module):

def __init__(self):

super(Mixed_5b, self).__init__()

self.branch0 = BasicConv2d(192, 96, kernel_size=1, stride=1)

self.branch1 = nn.Sequential(

BasicConv2d(192, 48, kernel_size=1, stride=1),

BasicConv2d(48, 64, kernel_size=5, stride=1, padding=2)

)

self.branch2 = nn.Sequential(

BasicConv2d(192, 64, kernel_size=1, stride=1),

BasicConv2d(64, 96, kernel_size=3, stride=1, padding=1),

BasicConv2d(96, 96, kernel_size=3, stride=1, padding=1)

)

self.branch3 = nn.Sequential(

nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False),

BasicConv2d(192, 64, kernel_size=1, stride=1)

)

def forward(self, x):

x0 = self.branch0(x)

x1 = self.branch1(x)

x2 = self.branch2(x)

x3 = self.branch3(x)

out = torch.cat((x0, x1, x2, x3), 1)

return out

class Block35(nn.Module):

def __init__(self, scale=1.0):

super(Block35, self).__init__()

self.scale = scale

self.branch0 = BasicConv2d(320, 32, kernel_size=1, stride=1)

self.branch1 = nn.Sequential(

BasicConv2d(320, 32, kernel_size=1, stride=1),

BasicConv2d(32, 32, kernel_size=3, stride=1, padding=1)

)

self.branch2 = nn.Sequential(

BasicConv2d(320, 32, kernel_size=1, stride=1),

BasicConv2d(32, 48, kernel_size=3, stride=1, padding=1),

BasicConv2d(48, 64, kernel_size=3, stride=1, padding=1)

)

self.conv2d = nn.Conv2d(128, 320, kernel_size=1, stride=1)

self.relu = nn.ReLU(inplace=False)

def forward(self, x):

x0 = self.branch0(x)

x1 = self.branch1(x)

x2 = self.branch2(x)

out = torch.cat((x0, x1, x2), 1)

out = self.conv2d(out)

out = out * self.scale + x

out = self.relu(out)

return out

class Mixed_6a(nn.Module):

def __init__(self):

super(Mixed_6a, self).__init__()

self.branch0 = BasicConv2d(320, 384, kernel_size=3, stride=2)

self.branch1 = nn.Sequential(

BasicConv2d(320, 256, kernel_size=1, stride=1),

BasicConv2d(256, 256, kernel_size=3, stride=1, padding=1),

BasicConv2d(256, 384, kernel_size=3, stride=2)

)

self.branch2 = nn.MaxPool2d(3, stride=2)

def forward(self, x):

x0 = self.branch0(x)

x1 = self.branch1(x)

x2 = self.branch2(x)

out = torch.cat((x0, x1, x2), 1)

return out

class Block17(nn.Module):

def __init__(self, scale=1.0):

super(Block17, self).__init__()

self.scale = scale

self.branch0 = BasicConv2d(1088, 192, kernel_size=1, stride=1)

self.branch1 = nn.Sequential(

BasicConv2d(1088, 128, kernel_size=1, stride=1),

BasicConv2d(128, 160, kernel_size=(1, 7), stride=1, padding=(0, 3)),

BasicConv2d(160, 192, kernel_size=(7, 1), stride=1, padding=(3, 0))

)

self.conv2d = nn.Conv2d(384, 1088, kernel_size=1, stride=1)

self.relu = nn.ReLU(inplace=False)

def forward(self, x):

x0 = self.branch0(x)

x1 = self.branch1(x)

out = torch.cat((x0, x1), 1)

out = self.conv2d(out)

out = out * self.scale + x

out = self.relu(out)

return out

class Mixed_7a(nn.Module):

def __init__(self):

super(Mixed_7a, self).__init__()

self.branch0 = nn.Sequential(

BasicConv2d(1088, 256, kernel_size=1, stride=1),

BasicConv2d(256, 384, kernel_size=3, stride=2)

)

self.branch1 = nn.Sequential(

BasicConv2d(1088, 256, kernel_size=1, stride=1),

BasicConv2d(256, 288, kernel_size=3, stride=2)

)

self.branch2 = nn.Sequential(

BasicConv2d(1088, 256, kernel_size=1, stride=1),

BasicConv2d(256, 288, kernel_size=3, stride=1, padding=1),

BasicConv2d(288, 320, kernel_size=3, stride=2)

)

self.branch3 = nn.MaxPool2d(3, stride=2)

def forward(self, x):

x0 = self.branch0(x)

x1 = self.branch1(x)

x2 = self.branch2(x)

x3 = self.branch3(x)

out = torch.cat((x0, x1, x2, x3), 1)

return out

class Block8(nn.Module):

def __init__(self, scale=1.0, noReLU=False):

super(Block8, self).__init__()

self.scale = scale

self.noReLU = noReLU

self.branch0 = BasicConv2d(2080, 192, kernel_size=1, stride=1)

self.branch1 = nn.Sequential(

BasicConv2d(2080, 192, kernel_size=1, stride=1),

BasicConv2d(192, 224, kernel_size=(1, 3), stride=1, padding=(0, 1)),

BasicConv2d(224, 256, kernel_size=(3, 1), stride=1, padding=(1, 0))

)

self.conv2d = nn.Conv2d(448, 2080, kernel_size=1, stride=1)

if not self.noReLU:

self.relu = nn.ReLU(inplace=False)

def forward(self, x):

x0 = self.branch0(x)

x1 = self.branch1(x)

out = torch.cat((x0, x1), 1)

out = self.conv2d(out)

out = out * self.scale + x

if not self.noReLU:

out = self.relu(out)

return out

def _make_layer(block, num_blocks, **kwargs):

layers = []

for _ in range(num_blocks):

layers.append(block(**kwargs))

return nn.Sequential(*layers)

class InceptionResNetV2(nn.Module):

def __init__(self, num_classes=1001):

super(InceptionResNetV2, self).__init__()

# Special attributes

self.input_space = None

self.input_size = (299, 299, 3)

self.mean = None

self.std = None

# Modules

self.conv2d_1a = BasicConv2d(3, 32, kernel_size=3, stride=2)

self.conv2d_2a = BasicConv2d(32, 32, kernel_size=3, stride=1)

self.conv2d_2b = BasicConv2d(32, 64, kernel_size=3, stride=1, padding=1)

self.maxpool_3a = nn.MaxPool2d(3, stride=2)

self.conv2d_3b = BasicConv2d(64, 80, kernel_size=1, stride=1)

self.conv2d_4a = BasicConv2d(80, 192, kernel_size=3, stride=1)

self.maxpool_5a = nn.MaxPool2d(3, stride=2)

self.mixed_5b = Mixed_5b()

self.repeat = _make_layer(Block35, 10, scale=0.17)

self.mixed_6a = Mixed_6a()

self.repeat_1 = _make_layer(Block17, 20, scale=0.10)

self.mixed_7a = Mixed_7a()

self.repeat_2 = _make_layer(Block8, 9, scale=0.20)

self.block8 = Block8(noReLU=True)

self.conv2d_7b = BasicConv2d(2080, 1536, kernel_size=1, stride=1)

self.avgpool_1a = nn.AvgPool2d(8, count_include_pad=False)

self.last_linear = nn.Linear(1536, num_classes)

def features(self, input_):

x = self.conv2d_1a(input_)

x = self.conv2d_2a(x)

x = self.conv2d_2b(x)

x = self.maxpool_3a(x)

x = self.conv2d_3b(x)

x = self.conv2d_4a(x)

x = self.maxpool_5a(x)

x = self.mixed_5b(x)

x = self.repeat(x)

x = self.mixed_6a(x)

x = self.repeat_1(x)

x = self.mixed_7a(x)

x = self.repeat_2(x)

x = self.block8(x)

x = self.conv2d_7b(x)

return x

def logits(self, features):

x = self.avgpool_1a(features)

x = x.view(x.size(0), -1)

x = self.last_linear(x)

return x

def forward(self, input_):

x = self.features(input_)

x = self.logits(x)

return x

训练

import torch

from torch import optim

from torch import nn

from torch.nn import functional as F

import time

from torch import Tensor

from InceptionResNetV2 import InceptionResNetV2

from webvision import WebvisionDataloader

def accuracy(output: Tensor, target: Tensor, topk=(1,)) -> list:

maxk = max(topk)

batch_size = target.size(0)

# Get the descending order of the top k probabilities

_, pred = output.topk(maxk, dim=1) # Shape: [batch_size, maxk]

pred = pred.t() # Shape: [maxk, batch_size]

correct = pred.eq(target.view(1, -1).expand_as(pred)) # Shape: [maxk, batch_size]

res = []

for k in topk:

correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)

res.append(correct_k.mul_(100.0 / batch_size))

return res

best_acc1, best_acc5 = 0, 0

@torch.no_grad()

def evaluate(model, dataloader):

global best_acc1, best_acc5

model.eval()

total_loss = 0.0

total_acc1 = 0.0

total_acc5 = 0.0

total_samples = 0

for x, labels in dataloader:

x = x.cuda(non_blocking=True)

labels = labels.cuda(non_blocking=True)

logits = model(x)

loss = F.cross_entropy(logits, labels)

acc1, acc5 = accuracy(logits.cpu(), labels.cpu(), topk=(1, 5))

batch_size = x.size(0)

total_loss += loss.item() * batch_size

total_acc1 += acc1.item() * batch_size

total_acc5 += acc5.item() * batch_size

total_samples += batch_size

avg_loss = total_loss / total_samples

avg_acc1 = total_acc1 / total_samples

avg_acc5 = total_acc5 / total_samples

best_acc1 = max(best_acc1, avg_acc1)

best_acc5 = max(best_acc5, avg_acc5)

print(f'Average Loss: {avg_loss:.4f}\t'

f'Acc@1 {avg_acc1:.2f}(Best {best_acc1:.2f})\t'

f'Acc@5 {avg_acc5:.2f}({best_acc5:.2f})')

return avg_loss, avg_acc1, avg_acc5

def train(dataloader, model, optimizer, criterion):

end = time.time()

model.train()

for i, (x, labels) in enumerate(dataloader):

x = x.cuda(non_blocking=True)

labels = labels.cuda(non_blocking=True)

optimizer.zero_grad()

logits = model(x)

loss = criterion(logits, labels)

loss.backward()

optimizer.step()

if i == 100:

print(f'Loss: {loss.item():.4f}')

print(f'Training done in {time.time() - end:.2f}s')

def main_work():

num_classes = 50

epochs = 80

lr = 0.1

end = time.time()

dataloaders = WebvisionDataloader(num_classes=50)

train_loder = dataloaders.train()

test_loader = dataloaders.test()

print(f'Dataloader made in {time.time() - end:.2f}s')

model = InceptionResNetV2(num_classes).cuda()

optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4)

criterion = nn.CrossEntropyLoss()

scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[30, 60], gamma=0.1)

for i in range(epochs):

print(f'Epoch[{i}] starting')

end = time.time()

train(train_loder, model, optimizer, criterion)

evaluate(model, test_loader)

scheduler.step()

print(f'Epoch[{i}] done in {time.time() - end:.2f}s')

def main():

end = time.time()

main_work()

print(f'Total time: {time.time() - end:.2f}s')

if __name__ == '__main__':

main()

简单运行了1个epoch,设备为NVIDIA GeForce RTX 3090,还是非常耗时的。

Dataloader made in 0.40s

Epoch[0] starting

Loss: 3.4720

Training done in 4325.29s

Average Loss: 3.0656Acc@1 19.28(Best 19.28)Acc@5 51.40(51.40)

Epoch[0] done in 4374.54s

Epoch[1] starting

Loss: 3.0587

Training done in 4264.95s

Average Loss: 2.8655Acc@1 25.56(Best 25.56)Acc@5 54.72(54.72)

Epoch[1] done in 4310.30s

运行环境

# Name Version Build Channel

pytorch 2.3.1 py3.12_cuda12.1_cudnn8_0 pytorch

参考文献

  1. Li, Wen, et al. "Webvision database: Visual learning and understanding from web data." arXiv preprint arXiv:1708.02862 (2017).
  2. Albert, Paul, et al. "Addressing out-of-distribution label noise in webly-labelled data." Proceedings of the IEEE/CVF winter conference on applications of computer vision. 2022.
  3. Szegedy, Christian, et al. "Inception-v4, inception-resnet and the impact of residual connections on learning." Proceedings of the AAAI conference on artificial intelligence. Vol. 31. No. 1. 2017.


声明

本文内容仅代表作者观点,或转载于其他网站,本站不以此文作为商业用途
如有涉及侵权,请联系本站进行删除
转载本站原创文章,请注明来源及作者。