数据集:

vietgpt/xnli_en

语言:

en

大小:

100K<n<1M

其他:

LM
中文

XNLI

from datasets import load_dataset

load_dataset("tdtunlp/xnli_en")
  • Format for NLI task
import random

def preprocess(
    sample,
    sep_key="<|endofprompt|>",
    end_key="<|endoftext|>",
):
    premise = sample['premise']
    hypothesis = sample['hypothesis']
    label = sample['label']

    template_idx = random.randint(0, 4)
    if template_idx == 0:
        answer_choices = ["True", "Inconclusive", "False"]
        return {'text': """Take the following as truth: "{premise}"
Then the following statement: "{hypothesis}" is True, False, or Inconclusive?
{sep_key}
{label}
{end_key}""".format(
        premise=premise,
        hypothesis=hypothesis,
        sep_key=sep_key,
        label=answer_choices[label],
        end_key=end_key,
    )}
    elif template_idx == 1:
        answer_choices = ["Yes", "Maybe", "No"]
        return {'text': """{premise}
Question: Does this imply that "{hypothesis}"? Yes, no, or maybe?
{sep_key}
{label}
{end_key}""".format(
        premise=premise,
        hypothesis=hypothesis,
        sep_key=sep_key,
        label=answer_choices[label],
        end_key=end_key,
    )}
    elif template_idx == 2:
        answer_choices = ["True", "Neither", "False"]
        return {'text': """{premise}
Question: {hypothesis} True, False, or Neither?
{sep_key}
{label}
{end_key}""".format(
        premise=premise,
        hypothesis=hypothesis,
        sep_key=sep_key,
        label=answer_choices[label],
        end_key=end_key,
    )}
    elif template_idx == 3:
        answer_choices = ["Yes", "Maybe", "No"]
        return {'text': """Given that {premise} Does it follow that {hypothesis} Yes, no, or maybe?
{sep_key}
{label}
{end_key}""".format(
        premise=premise,
        hypothesis=hypothesis,
        sep_key=sep_key,
        label=answer_choices[label],
        end_key=end_key,
    )}
    elif template_idx == 4:
        answer_choices = ["Yes", "Maybe", "No"]
        return {'text': """Given that {premise} Therefore, it must be true that "{hypothesis}" Yes, no, or maybe?
{sep_key}
{label}
{end_key}""".format(
        premise=premise,
        hypothesis=hypothesis,
        sep_key=sep_key,
        label=answer_choices[label],
        end_key=end_key,
    )}

"""
The Pacific War actually began 70 minutes before the attack on Pearl Harbor , on Malaysia 's east coast , near Kota Bharu .
Question: Does this imply that "70 minutes prior to the Pearl Harbor attack , the Pacific War began ."? Yes, no, or maybe?
<|endofprompt|>
Yes
<|endoftext|>
"""