/// <summary>Adds a sentence final punctuation mark to sentences that lack one.</summary>
        /// <remarks>
        /// Adds a sentence final punctuation mark to sentences that lack one.
        /// This method adds a period (the first sentence final punctuation word
        /// in a parser language pack) to sentences that don't have one within
        /// the last 3 words (to allow for close parentheses, etc.).  It checks
        /// tags for punctuation, if available, otherwise words.
        /// </remarks>
        /// <param name="sentence">The sentence to check</param>
        /// <param name="length">The length of the sentence (just to avoid recomputation)</param>
        private bool AddSentenceFinalPunctIfNeeded(IList <IHasWord> sentence, int length)
        {
            int start = length - 3;

            if (start < 0)
            {
                start = 0;
            }
            ITreebankLanguagePack tlp = op.tlpParams.TreebankLanguagePack();

            for (int i = length - 1; i >= start; i--)
            {
                IHasWord item = sentence[i];
                // An object (e.g., CoreLabel) can implement HasTag but not actually store
                // a tag so we need to check that there is something there for this case.
                // If there is, use only it, since word tokens can be ambiguous.
                string tag = null;
                if (item is IHasTag)
                {
                    tag = ((IHasTag)item).Tag();
                }
                if (tag != null && !tag.IsEmpty())
                {
                    if (tlp.IsSentenceFinalPunctuationTag(tag))
                    {
                        return(false);
                    }
                }
                else
                {
                    string str = item.Word();
                    if (tlp.IsPunctuationWord(str))
                    {
                        return(false);
                    }
                }
            }
            // none found so add one.
            if (op.testOptions.verbose)
            {
                log.Info("Adding missing final punctuation to sentence.");
            }
            string[] sfpWords = tlp.SentenceFinalPunctuationWords();
            if (sfpWords.Length > 0)
            {
                sentence.Add(new Word(sfpWords[0]));
            }
            return(true);
        }
     public ParseFiles(Options op, TreePrint treePrint, LexicalizedParser pqFactory)
 {
     this.op        = op;
     this.pqFactory = pqFactory;
     this.treePrint = treePrint;
     this.tlp       = op.tlpParams.TreebankLanguagePack();
     this.pwOut     = op.tlpParams.Pw();
     this.pwErr     = op.tlpParams.Pw(System.Console.Error);
     if (op.testOptions.verbose)
     {
         pwErr.Println("Sentence final words are: " + Arrays.AsList(tlp.SentenceFinalPunctuationWords()));
         pwErr.Println("File encoding is: " + op.tlpParams.GetInputEncoding());
     }
     // evaluation setup
     this.runningAverages = bool.ParseBoolean(op.testOptions.evals.GetProperty("runningAverages"));
     this.summary         = bool.ParseBoolean(op.testOptions.evals.GetProperty("summary"));
     if (bool.ParseBoolean(op.testOptions.evals.GetProperty("pcfgLL")))
     {
         this.pcfgLL = new AbstractEval.ScoreEval("pcfgLL", runningAverages);
     }
     else
     {
         this.pcfgLL = null;
     }
     if (bool.ParseBoolean(op.testOptions.evals.GetProperty("depLL")))
     {
         this.depLL = new AbstractEval.ScoreEval("depLL", runningAverages);
     }
     else
     {
         this.depLL = null;
     }
     if (bool.ParseBoolean(op.testOptions.evals.GetProperty("factLL")))
     {
         this.factLL = new AbstractEval.ScoreEval("factLL", runningAverages);
     }
     else
     {
         this.factLL = null;
     }
 }
 public virtual void ParseFiles <_T0>(string[] args, int argIndex, bool tokenized, ITokenizerFactory <_T0> tokenizerFactory, string elementDelimiter, string sentenceDelimiter, IFunction <IList <IHasWord>, IList <IHasWord> > escaper, string tagDelimiter
                                      )
 where _T0 : IHasWord
     {
      DocumentPreprocessor.DocType docType = (elementDelimiter == null) ? DocumentPreprocessor.DocType.Plain : DocumentPreprocessor.DocType.Xml;
      if (op.testOptions.verbose)
     {
         if (tokenizerFactory != null)
         {
             pwErr.Println("parseFiles: Tokenizer factory is: " + tokenizerFactory);
         }
     }
      Timing timer = new Timing();
      // timer.start(); // constructor already starts it.
      //Loop over the files
      for (int i = argIndex; i < args.Length; i++)
     {
         string filename = args[i];
         DocumentPreprocessor documentPreprocessor;
         if (filename.Equals("-"))
         {
             try
             {
                 documentPreprocessor = new DocumentPreprocessor(IOUtils.ReaderFromStdin(op.tlpParams.GetInputEncoding()), docType);
             }
             catch (IOException e)
             {
                 throw new RuntimeIOException(e);
             }
         }
         else
         {
             documentPreprocessor = new DocumentPreprocessor(filename, docType, op.tlpParams.GetInputEncoding());
         }
         //Unused values are null per the main() method invocation below
         //null is the default for these properties
         documentPreprocessor.SetSentenceFinalPuncWords(tlp.SentenceFinalPunctuationWords());
         documentPreprocessor.SetEscaper(escaper);
         documentPreprocessor.SetSentenceDelimiter(sentenceDelimiter);
         documentPreprocessor.SetTagDelimiter(tagDelimiter);
         documentPreprocessor.SetElementDelimiter(elementDelimiter);
         if (tokenizerFactory == null)
         {
             documentPreprocessor.SetTokenizerFactory((tokenized) ? null : tlp.GetTokenizerFactory());
         }
         else
         {
             documentPreprocessor.SetTokenizerFactory(tokenizerFactory);
         }
         //Setup the output
         PrintWriter pwo = pwOut;
         if (op.testOptions.writeOutputFiles)
         {
             string normalizedName = filename;
             try
             {
                 new URL(normalizedName);
                 // this will exception if not a URL
                 normalizedName = normalizedName.ReplaceAll("/", "_");
             }
             catch (MalformedURLException)
             {
             }
             //It isn't a URL, so silently ignore
             string ext   = (op.testOptions.outputFilesExtension == null) ? "stp" : op.testOptions.outputFilesExtension;
             string fname = normalizedName + '.' + ext;
             if (op.testOptions.outputFilesDirectory != null && !op.testOptions.outputFilesDirectory.IsEmpty())
             {
                 string fseparator = Runtime.GetProperty("file.separator");
                 if (fseparator == null || fseparator.IsEmpty())
                 {
                     fseparator = "/";
                 }
                 File fnameFile = new File(fname);
                 fname          = op.testOptions.outputFilesDirectory + fseparator + fnameFile.GetName();
             }
             try
             {
                 pwo = op.tlpParams.Pw(new FileOutputStream(fname));
             }
             catch (IOException ioe)
             {
                 throw new RuntimeIOException(ioe);
             }
         }
         treePrint.PrintHeader(pwo, op.tlpParams.GetOutputEncoding());
         pwErr.Println("Parsing file: " + filename);
         int num          = 0;
         int numProcessed = 0;
         if (op.testOptions.testingThreads != 1)
         {
             MulticoreWrapper <IList <IHasWord>, IParserQuery> wrapper = new MulticoreWrapper <IList <IHasWord>, IParserQuery>(op.testOptions.testingThreads, new ParsingThreadsafeProcessor(pqFactory, pwErr));
             foreach (IList <IHasWord> sentence in documentPreprocessor)
             {
                 num++;
                 numSents++;
                 int len   = sentence.Count;
                 numWords += len;
                 pwErr.Println("Parsing [sent. " + num + " len. " + len + "]: " + SentenceUtils.ListToString(sentence, true));
                 wrapper.Put(sentence);
                 while (wrapper.Peek())
                 {
                     IParserQuery pq = wrapper.Poll();
                     ProcessResults(pq, numProcessed++, pwo);
                 }
             }
             wrapper.Join();
             while (wrapper.Peek())
             {
                 IParserQuery pq = wrapper.Poll();
                 ProcessResults(pq, numProcessed++, pwo);
             }
         }
         else
         {
             IParserQuery pq = pqFactory.ParserQuery();
             foreach (IList <IHasWord> sentence in documentPreprocessor)
             {
                 num++;
                 numSents++;
                 int len   = sentence.Count;
                 numWords += len;
                 pwErr.Println("Parsing [sent. " + num + " len. " + len + "]: " + SentenceUtils.ListToString(sentence, true));
                 pq.ParseAndReport(sentence, pwErr);
                 ProcessResults(pq, numProcessed++, pwo);
             }
         }
         treePrint.PrintFooter(pwo);
         if (op.testOptions.writeOutputFiles)
         {
             pwo.Close();
         }
         pwErr.Println("Parsed file: " + filename + " [" + num + " sentences].");
     }
      long millis = timer.Stop();
      if (summary)
     {
         if (pcfgLL != null)
         {
             pcfgLL.Display(false, pwErr);
         }
         if (depLL != null)
         {
             depLL.Display(false, pwErr);
         }
         if (factLL != null)
         {
             factLL.Display(false, pwErr);
         }
     }
      if (saidMemMessage)
     {
         ParserUtils.PrintOutOfMemory(pwErr);
     }
      double wordspersec = numWords / (((double)millis) / 1000);
      double sentspersec = numSents / (((double)millis) / 1000);
      NumberFormat nf    = new DecimalFormat("0.00");
      // easier way!
      pwErr.Println("Parsed " + numWords + " words in " + numSents + " sentences (" + nf.Format(wordspersec) + " wds/sec; " + nf.Format(sentspersec) + " sents/sec).");
      if (numFallback > 0)
     {
         pwErr.Println("  " + numFallback + " sentences were parsed by fallback to PCFG.");
     }
      if (numUnparsable > 0 || numNoMemory > 0 || numSkipped > 0)
     {
         pwErr.Println("  " + (numUnparsable + numNoMemory + numSkipped) + " sentences were not parsed:");
         if (numUnparsable > 0)
         {
             pwErr.Println("    " + numUnparsable + " were not parsable with non-zero probability.");
         }
         if (numNoMemory > 0)
         {
             pwErr.Println("    " + numNoMemory + " were skipped because of insufficient memory.");
         }
         if (numSkipped > 0)
         {
             pwErr.Println("    " + numSkipped + " were skipped as length 0 or greater than " + op.testOptions.maxLength);
         }
     }
     }