public static IDictionary <string, DataInstance> ParseColumnFile(BufferedReader reader, ICollection <string> categoriesAllowed, IDictionary <string, Type> setClassForTheseLabels, bool setGoldClass, string sentIDprefix) { CoNLLDocumentReaderAndWriter conllreader = new CoNLLDocumentReaderAndWriter(); Properties props = new Properties(); SeqClassifierFlags flags = new SeqClassifierFlags(props); flags.entitySubclassification = "noprefix"; flags.retainEntitySubclassification = false; conllreader.Init(flags); IEnumerator <IList <CoreLabel> > dociter = conllreader.GetIterator(reader); int num = -1; IDictionary <string, DataInstance> sents = new Dictionary <string, DataInstance>(); while (dociter.MoveNext()) { IList <CoreLabel> doc = dociter.Current; IList <string> words = new List <string>(); IList <CoreLabel> sentcore = new List <CoreLabel>(); int tokenindex = 0; foreach (CoreLabel l in doc) { if (l.Word().Equals(CoNLLDocumentReaderAndWriter.Boundary) || l.Word().Equals("-DOCSTART-")) { if (words.Count > 0) { num++; string docid = sentIDprefix + "-" + num.ToString(); DataInstance sentInst = DataInstance.GetNewSurfaceInstance(sentcore); sents[docid] = sentInst; words = new List <string>(); sentcore = new List <CoreLabel>(); tokenindex = 0; } continue; } tokenindex++; words.Add(l.Word()); l.Set(typeof(CoreAnnotations.IndexAnnotation), tokenindex); l.Set(typeof(CoreAnnotations.ValueAnnotation), l.Word()); string label = l.Get(typeof(CoreAnnotations.AnswerAnnotation)); System.Diagnostics.Debug.Assert(label != null, "label cannot be null"); l.Set(typeof(CoreAnnotations.TextAnnotation), l.Word()); l.Set(typeof(CoreAnnotations.OriginalTextAnnotation), l.Word()); if (setGoldClass) { l.Set(typeof(CoreAnnotations.GoldAnswerAnnotation), label); } if (setClassForTheseLabels != null && setClassForTheseLabels.Contains(label)) { l.Set(setClassForTheseLabels[label], label); } sentcore.Add(l); } if (words.Count > 0) { num++; string docid = sentIDprefix + "-" + num.ToString(); DataInstance sentInst = DataInstance.GetNewSurfaceInstance(sentcore); sents[docid] = sentInst; } } return(sents); }