数据集:
bigbio/hallmarks_of_cancer
癌症标志物(Hallmarks of Cancer,HOC)语料库由1852个PubMed出版物摘要组成,由专家根据分类法进行手动标注。该分类法包含了37个层次结构中的类别。对于语料库中的每个句子,可以分配零个或多个类别标签。标签可以在"labels"目录下找到,而分词后的文本可在"text"目录下找到。文件名是对应的PubMed ID(PMID)。
@article{DBLP:journals/bioinformatics/BakerSGAHSK16, author = {Simon Baker and Ilona Silins and Yufan Guo and Imran Ali and Johan H{"{o}}gberg and Ulla Stenius and Anna Korhonen}, title = {Automatic semantic classification of scientific literature according to the hallmarks of cancer}, journal = {Bioinform.}, volume = {32}, number = {3}, pages = {432--440}, year = {2016}, url = {https://doi.org/10.1093/bioinformatics/btv585}, doi = {10.1093/bioinformatics/btv585}, timestamp = {Thu, 14 Oct 2021 08:57:44 +0200}, biburl = {https://dblp.org/rec/journals/bioinformatics/BakerSGAHSK16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }