数据集:

george-chou/HEp2

英文

"george-chou/HEp2" 数据集卡片

用法

打印

from datasets import load_dataset

data = load_dataset("george-chou/HEp2")
trainset = data["train"]
validset = data["validation"]
testset = data["test"]
labels = trainset.features["label"].names

for item in trainset:
    print("image: ", item["image"])
    print("label name: " + labels[item["label"]])

for item in validset:
    print("image: ", item["image"])
    print("label name: " + labels[item["label"]])

for item in testset:
    print("image: ", item["image"])
    print("label name: " + labels[item["label"]])

在 Torch DataLoader 上使用

import torch
from datasets import load_dataset
from torch.utils.data import DataLoader
from torchvision.transforms import *

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

compose = Compose([
    Resize(300),
    CenterCrop(300),
    RandomAffine(5),
    ToTensor(),
    Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])


def transform(example_batch):
    inputs = [compose(x.convert("RGB")) for x in example_batch["image"]]
    example_batch["image"] = inputs
    return example_batch


ds = load_dataset("george-chou/HEp2")
trainset = ds["train"].with_transform(transform)
validset = ds["validation"].with_transform(transform)
testset = ds["test"].with_transform(transform)

traLoader = DataLoader(trainset, batch_size=4)
valLoader = DataLoader(validset, batch_size=4)
tesLoader = DataLoader(testset, batch_size=4)

for i, data in enumerate(traLoader, 0):
    inputs, labels = data["image"].to(device), data["label"].to(device)
    print("inputs: ", inputs)
    print("labels: ", labels)

for i, data in enumerate(valLoader, 0):
    inputs, labels = data["image"].to(device), data["label"].to(device)
    print("inputs: ", inputs)
    print("labels: ", labels)

for i, data in enumerate(tesLoader, 0):
    inputs, labels = data["image"].to(device), data["label"].to(device)
    print("inputs: ", inputs)
    print("labels: ", labels)

维护

git clone git@hf.co:datasets/george-chou/HEp2