/// <summary>This will do the escaping on an input file.</summary> /// <remarks> /// This will do the escaping on an input file. Input file should already be tokenized, /// with tokens separated by whitespace. <br /> /// Usage: java edu.stanford.nlp.process.PTBEscapingProcessor fileOrUrl /// </remarks> /// <param name="args">Command line argument: a file or URL</param> public static void Main(string[] args) { if (args.Length != 1) { System.Console.Out.WriteLine("usage: java edu.stanford.nlp.process.PTBEscapingProcessor fileOrUrl"); return; } string filename = args[0]; try { IDocument <string, Word, Word> d; // initialized below if (filename.StartsWith("http://")) { IDocument <string, Word, Word> dpre = new BasicDocument <string>(WhitespaceTokenizer.Factory()).Init(new URL(filename)); IDocumentProcessor <Word, Word, string, Word> notags = new StripTagsProcessor <string, Word>(); d = notags.ProcessDocument(dpre); } else { d = new BasicDocument <string>(WhitespaceTokenizer.Factory()).Init(new File(filename)); } IDocumentProcessor <Word, IHasWord, string, Word> proc = new Edu.Stanford.Nlp.Process.PTBEscapingProcessor <Word, string, Word>(); IDocument <string, Word, IHasWord> newD = proc.ProcessDocument(d); foreach (IHasWord word in newD) { System.Console.Out.WriteLine(word); } } catch (Exception e) { Sharpen.Runtime.PrintStackTrace(e); } }
public virtual void TestWordTokenizer() { RunTest(WhitespaceTokenizer.Factory(false), Test, ResultsNoEol); RunTest(WhitespaceTokenizer.Factory(true), Test, ResultsEol); }