Example #1
0
        /// <summary>Reads a file from the argument and prints its tokens one per line.</summary>
        /// <remarks>
        /// Reads a file from the argument and prints its tokens one per line.
        /// This is mainly as a testing aid, but it can also be quite useful
        /// standalone to turn a corpus into a one token per line file of tokens.
        /// Usage:
        /// <c>java edu.stanford.nlp.process.WhitespaceTokenizer filename</c>
        /// </remarks>
        /// <param name="args">Command line arguments</param>
        /// <exception cref="System.IO.IOException">If can't open files, etc.</exception>
        public static void Main(string[] args)
        {
            bool   eolIsSignificant = (args.Length > 0 && args[0].Equals("-cr"));
            Reader reader           = ((args.Length > 0 && !args[args.Length - 1].Equals("-cr")) ? new InputStreamReader(new FileInputStream(args[args.Length - 1]), "UTF-8") : new InputStreamReader(Runtime.@in, "UTF-8"));
            WhitespaceTokenizer <Word> tokenizer = new WhitespaceTokenizer <Word>(new WordTokenFactory(), reader, eolIsSignificant);
            PrintWriter pw = new PrintWriter(new OutputStreamWriter(System.Console.Out, "UTF-8"), true);

            while (tokenizer.MoveNext())
            {
                Word w = tokenizer.Current;
                if (w.Value().Equals(WhitespaceLexer.Newline))
                {
                    pw.Println("***CR***");
                }
                else
                {
                    pw.Println(w);
                }
            }
        }