public CoNLLIterator(CoNLLDocumentReaderAndWriter _enclosing, Reader r) { this._enclosing = _enclosing; this.stringIter = CoNLLDocumentReaderAndWriter.SplitIntoDocs(r); }
/// <summary>Count some stats on what occurs in a file.</summary> /// <exception cref="System.IO.IOException"/> /// <exception cref="System.TypeLoadException"/> public static void Main(string[] args) { CoNLLDocumentReaderAndWriter rw = new CoNLLDocumentReaderAndWriter(); rw.Init(new SeqClassifierFlags()); int numDocs = 0; int numTokens = 0; int numEntities = 0; string lastAnsBase = string.Empty; ICounter <string> miscCounter = new ClassicCounter <string>(); StringBuilder inProgressMisc = new StringBuilder(); for (IEnumerator <IList <CoreLabel> > it = rw.GetIterator(IOUtils.ReaderFromString(args[0])); it.MoveNext();) { IList <CoreLabel> doc = it.Current; numDocs++; foreach (CoreLabel fl in doc) { string word = fl.Word(); // System.out.println("FL " + (++i) + " was " + fl); if (word.Equals(Boundary)) { continue; } string ans = fl.Get(typeof(CoreAnnotations.AnswerAnnotation)); string ansBase; string ansPrefix; string[] bits = ans.Split("-"); if (bits.Length == 1) { ansBase = bits[0]; ansPrefix = string.Empty; } else { ansBase = bits[1]; ansPrefix = bits[0]; } numTokens++; if (!ansBase.Equals("O")) { if (ansBase.Equals(lastAnsBase)) { if (ansPrefix.Equals("B")) { numEntities++; inProgressMisc = MaybeIncrementCounter(inProgressMisc, miscCounter); } } else { numEntities++; inProgressMisc = MaybeIncrementCounter(inProgressMisc, miscCounter); } if (ansBase.Equals("MISC")) { if (inProgressMisc.Length > 0) { // already something there inProgressMisc.Append(' '); } inProgressMisc.Append(word); } } else { inProgressMisc = MaybeIncrementCounter(inProgressMisc, miscCounter); } lastAnsBase = ansBase; } } // for tokens // for documents System.Console.Out.WriteLine("File " + args[0] + " has " + numDocs + " documents, " + numTokens + " (non-blank line) tokens and " + numEntities + " entities."); System.Console.Out.Printf("Here are the %.0f MISC items with counts:%n", miscCounter.TotalCount()); System.Console.Out.WriteLine(Counters.ToVerticalString(miscCounter, "%.0f\t%s")); }