I used a Pretrained Deberta-v3-base and finetuned it on Few-NERD, A NER dataset that contains over 180k examples and over 4.6 million tokens.
The Token labels are Person, Organisation, Location, Building, Event, Product, Art & Misc.
from transformers import pipeline def print_ner(sentences): """Cleaning and printing NER results """ for sentence in sentences: last_entity_type = sentence[0]['entity'] last_index = sentence[0]['index'] word = sentence[0]['word'] for i, token in enumerate(sentence): if (i > 0): if (token['entity'] == last_entity_type) and (token['index'] == last_index + 1): word = word + '' + token['word'] else: word = word.replace('▁', ' ') print(f"{word[1:]} {last_entity_type}") word = token['word'] last_entity_type = token['entity'] last_index = token['index'] if i == len(sentence) - 1: word = word.replace('▁', ' ') print(f"{word[1:]} {last_entity_type}") pipe = pipeline(model='RashidNLP/NER-Deberta') sentence = pipe(["Elon Musk will be at SpaceX's Starbase facility in Boca Chica for the orbital launch of starship next month"]) print_ner(sentence)