Example #1
0
        /// <summary>This will do the escaping on an input file.</summary>
        /// <remarks>
        /// This will do the escaping on an input file. Input file should already be tokenized,
        /// with tokens separated by whitespace. <br />
        /// Usage: java edu.stanford.nlp.process.PTBEscapingProcessor fileOrUrl
        /// </remarks>
        /// <param name="args">Command line argument: a file or URL</param>
        public static void Main(string[] args)
        {
            if (args.Length != 1)
            {
                System.Console.Out.WriteLine("usage: java edu.stanford.nlp.process.PTBEscapingProcessor fileOrUrl");
                return;
            }
            string filename = args[0];

            try
            {
                IDocument <string, Word, Word> d;
                // initialized below
                if (filename.StartsWith("http://"))
                {
                    IDocument <string, Word, Word> dpre = new BasicDocument <string>(WhitespaceTokenizer.Factory()).Init(new URL(filename));
                    IDocumentProcessor <Word, Word, string, Word> notags = new StripTagsProcessor <string, Word>();
                    d = notags.ProcessDocument(dpre);
                }
                else
                {
                    d = new BasicDocument <string>(WhitespaceTokenizer.Factory()).Init(new File(filename));
                }
                IDocumentProcessor <Word, IHasWord, string, Word> proc = new Edu.Stanford.Nlp.Process.PTBEscapingProcessor <Word, string, Word>();
                IDocument <string, Word, IHasWord> newD = proc.ProcessDocument(d);
                foreach (IHasWord word in newD)
                {
                    System.Console.Out.WriteLine(word);
                }
            }
            catch (Exception e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
        }
 public virtual void TestWordTokenizer()
 {
     RunTest(WhitespaceTokenizer.Factory(false), Test, ResultsNoEol);
     RunTest(WhitespaceTokenizer.Factory(true), Test, ResultsEol);
 }