/// <exception cref="System.Exception"/> public override Document NextDoc() { IList <IList <CoreLabel> > allWords = new List <IList <CoreLabel> >(); IList <IList <Mention> > allGoldMentions = new List <IList <Mention> >(); IList <IList <Mention> > allPredictedMentions; IList <Tree> allTrees = new List <Tree>(); Annotation anno; try { string filename = string.Empty; while (files.Length > fileIndex) { if (files[fileIndex].Contains("apf.xml")) { filename = files[fileIndex]; fileIndex++; break; } else { fileIndex++; filename = string.Empty; } } if (files.Length <= fileIndex && filename.Equals(string.Empty)) { return(null); } anno = aceReader.Parse(corpusPath + filename); stanfordProcessor.Annotate(anno); IList <ICoreMap> sentences = anno.Get(typeof(CoreAnnotations.SentencesAnnotation)); foreach (ICoreMap s in sentences) { int i = 1; foreach (CoreLabel w in s.Get(typeof(CoreAnnotations.TokensAnnotation))) { w.Set(typeof(CoreAnnotations.IndexAnnotation), i++); if (!w.ContainsKey(typeof(CoreAnnotations.UtteranceAnnotation))) { w.Set(typeof(CoreAnnotations.UtteranceAnnotation), 0); } } allTrees.Add(s.Get(typeof(TreeCoreAnnotations.TreeAnnotation))); allWords.Add(s.Get(typeof(CoreAnnotations.TokensAnnotation))); ACEMentionExtractor.EntityComparator comparator = new ACEMentionExtractor.EntityComparator(); ExtractGoldMentions(s, allGoldMentions, comparator); } allPredictedMentions = mentionFinder.ExtractPredictedMentions(anno, maxID, dictionaries); PrintRawDoc(sentences, allGoldMentions, filename, true); PrintRawDoc(sentences, allPredictedMentions, filename, false); } catch (IOException e) { throw new RuntimeIOException(e); } return(Arrange(anno, allWords, allTrees, allPredictedMentions, allGoldMentions, true)); }
private void ExtractGoldMentions(ICoreMap s, IList <IList <Mention> > allGoldMentions, ACEMentionExtractor.EntityComparator comparator) { IList <Mention> goldMentions = new List <Mention>(); allGoldMentions.Add(goldMentions); IList <EntityMention> goldMentionList = s.Get(typeof(MachineReadingAnnotations.EntityMentionsAnnotation)); IList <CoreLabel> words = s.Get(typeof(CoreAnnotations.TokensAnnotation)); TreeSet <EntityMention> treeForSortGoldMentions = new TreeSet <EntityMention>(comparator); if (goldMentionList != null) { Sharpen.Collections.AddAll(treeForSortGoldMentions, goldMentionList); } if (!treeForSortGoldMentions.IsEmpty()) { foreach (EntityMention e in treeForSortGoldMentions) { Mention men = new Mention(); men.dependency = s.Get(typeof(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation)); if (men.dependency == null) { men.dependency = s.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation)); } men.startIndex = e.GetExtentTokenStart(); men.endIndex = e.GetExtentTokenEnd(); string[] parseID = e.GetObjectId().Split("-"); men.mentionID = System.Convert.ToInt32(parseID[parseID.Length - 1]); string[] parseCorefID = e.GetCorefID().Split("-E"); men.goldCorefClusterID = System.Convert.ToInt32(parseCorefID[parseCorefID.Length - 1]); men.originalRef = -1; for (int j = allGoldMentions.Count - 1; j >= 0; j--) { IList <Mention> l = allGoldMentions[j]; for (int k = l.Count - 1; k >= 0; k--) { Mention m = l[k]; if (men.goldCorefClusterID == m.goldCorefClusterID) { men.originalRef = m.mentionID; } } } goldMentions.Add(men); if (men.mentionID > maxID) { maxID = men.mentionID; } // set ner type for (int j_1 = e.GetExtentTokenStart(); j_1 < e.GetExtentTokenEnd(); j_1++) { CoreLabel word = words[j_1]; string ner = e.GetType() + "-" + e.GetSubType(); } } } }