"george-chou/HEp2" 数据集卡片
用法
打印
from datasets import load_dataset
data = load_dataset("george-chou/HEp2")
trainset = data["train"]
validset = data["validation"]
testset = data["test"]
labels = trainset.features["label"].names
for item in trainset:
print("image: ", item["image"])
print("label name: " + labels[item["label"]])
for item in validset:
print("image: ", item["image"])
print("label name: " + labels[item["label"]])
for item in testset:
print("image: ", item["image"])
print("label name: " + labels[item["label"]])
在 Torch DataLoader 上使用
import torch
from datasets import load_dataset
from torch.utils.data import DataLoader
from torchvision.transforms import *
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
compose = Compose([
Resize(300),
CenterCrop(300),
RandomAffine(5),
ToTensor(),
Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
def transform(example_batch):
inputs = [compose(x.convert("RGB")) for x in example_batch["image"]]
example_batch["image"] = inputs
return example_batch
ds = load_dataset("george-chou/HEp2")
trainset = ds["train"].with_transform(transform)
validset = ds["validation"].with_transform(transform)
testset = ds["test"].with_transform(transform)
traLoader = DataLoader(trainset, batch_size=4)
valLoader = DataLoader(validset, batch_size=4)
tesLoader = DataLoader(testset, batch_size=4)
for i, data in enumerate(traLoader, 0):
inputs, labels = data["image"].to(device), data["label"].to(device)
print("inputs: ", inputs)
print("labels: ", labels)
for i, data in enumerate(valLoader, 0):
inputs, labels = data["image"].to(device), data["label"].to(device)
print("inputs: ", inputs)
print("labels: ", labels)
for i, data in enumerate(tesLoader, 0):
inputs, labels = data["image"].to(device), data["label"].to(device)
print("inputs: ", inputs)
print("labels: ", labels)
维护
git clone git@hf.co:datasets/george-chou/HEp2