/** * <summary> Creates a named entity recognition corpus from the treeBank. Calls generateAnnotatedSentence for each parse tree * in the treebank.</summary> * * <returns>Created corpus.</returns> */ public NERCorpus Generate() { var corpus = new NERCorpus(); for (var i = 0; i < _treeBank.Size(); i++) { var parseTree = _treeBank.Get(i); if (parseTree.LayerAll(ViewLayerType.NER)) { Sentence sentence = parseTree.GenerateAnnotatedSentence(); corpus.AddSentence(sentence); } } return(corpus); }
public void TestNERCorpus() { CounterHashMap <NamedEntityType> counter = new CounterHashMap <NamedEntityType>(); NERCorpus nerCorpus = new NERCorpus("../../../nerdata.txt"); Assert.AreEqual(27556, nerCorpus.SentenceCount()); Assert.AreEqual(492233, nerCorpus.NumberOfWords()); for (int i = 0; i < nerCorpus.SentenceCount(); i++) { NamedEntitySentence namedEntitySentence = (NamedEntitySentence)nerCorpus.GetSentence(i); for (int j = 0; j < namedEntitySentence.WordCount(); j++) { NamedEntityWord namedEntityWord = (NamedEntityWord)namedEntitySentence.GetWord(j); counter.Put(namedEntityWord.GetNamedEntityType()); } } Assert.AreEqual(438976, counter[NamedEntityType.NONE]); Assert.AreEqual(23878, counter[NamedEntityType.PERSON]); Assert.AreEqual(16931, counter[NamedEntityType.ORGANIZATION]); Assert.AreEqual(12448, counter[NamedEntityType.LOCATION]); }