数据集:
bigbio/genia_term_corpus
在生物分子文本挖掘领域中,识别与分子生物学中感兴趣的实体相关的语言表达是一项基本任务,如蛋白质、基因和细胞。 GENIA技术术语标注涵盖了物理生物实体的识别以及其他重要术语。语料库注释包括主要GENIA语料库的全部1,999个摘要。
@inproceedings{10.5555/1289189.1289260, author = {Ohta, Tomoko and Tateisi, Yuka and Kim, Jin-Dong}, title = {The GENIA Corpus: An Annotated Research Abstract Corpus in Molecular Biology Domain}, year = {2002}, publisher = {Morgan Kaufmann Publishers Inc.}, address = {San Francisco, CA, USA}, booktitle = {Proceedings of the Second International Conference on Human Language Technology Research}, pages = {82–86}, numpages = {5}, location = {San Diego, California}, series = {HLT '02} } @article{Kim2003GENIAC, title={GENIA corpus - a semantically annotated corpus for bio-textmining}, author={Jin-Dong Kim and Tomoko Ohta and Yuka Tateisi and Junichi Tsujii}, journal={Bioinformatics}, year={2003}, volume={19 Suppl 1}, pages={ i180-2 } } @inproceedings{10.5555/1567594.1567610, author = {Kim, Jin-Dong and Ohta, Tomoko and Tsuruoka, Yoshimasa and Tateisi, Yuka and Collier, Nigel}, title = {Introduction to the Bio-Entity Recognition Task at JNLPBA}, year = {2004}, publisher = {Association for Computational Linguistics}, address = {USA}, booktitle = {Proceedings of the International Joint Workshop on Natural Language Processing in Biomedicine and Its Applications}, pages = {70–75}, numpages = {6}, location = {Geneva, Switzerland}, series = {JNLPBA '04} }