GENIA Term Corpus 数据集卡片

在生物分子文本挖掘领域中，识别与分子生物学中感兴趣的实体相关的语言表达是一项基本任务，如蛋白质、基因和细胞。 GENIA技术术语标注涵盖了物理生物实体的识别以及其他重要术语。语料库注释包括主要GENIA语料库的全部1,999个摘要。

引用信息

@inproceedings{10.5555/1289189.1289260,
author = {Ohta, Tomoko and Tateisi, Yuka and Kim, Jin-Dong},
title = {The GENIA Corpus: An Annotated Research Abstract Corpus in Molecular Biology Domain},
year = {2002},
publisher = {Morgan Kaufmann Publishers Inc.},
address = {San Francisco, CA, USA},
booktitle = {Proceedings of the Second International Conference on Human Language Technology Research},
pages = {82–86},
numpages = {5},
location = {San Diego, California},
series = {HLT '02}
}

@article{Kim2003GENIAC,
  title={GENIA corpus - a semantically annotated corpus for bio-textmining},
  author={Jin-Dong Kim and Tomoko Ohta and Yuka Tateisi and Junichi Tsujii},
  journal={Bioinformatics},
  year={2003},
  volume={19 Suppl 1},
  pages={
          i180-2
        }
}

@inproceedings{10.5555/1567594.1567610,
author = {Kim, Jin-Dong and Ohta, Tomoko and Tsuruoka, Yoshimasa and Tateisi, Yuka and Collier, Nigel},
title = {Introduction to the Bio-Entity Recognition Task at JNLPBA},
year = {2004},
publisher = {Association for Computational Linguistics},
address = {USA},
booktitle = {Proceedings of the International Joint Workshop on Natural Language Processing in Biomedicine and Its
Applications},
pages = {70–75},
numpages = {6},
location = {Geneva, Switzerland},
series = {JNLPBA '04}
}

作者:

bigbio

数据集大小:

34.07 KB