public virtual IList <CoreLabel> Apply(string line)
            {
                if (line == null)
                {
                    return(null);
                }
                // logger.info("input: " + line);
                //Matcher tagMatcher = tagPattern.matcher(line);
                //line = tagMatcher.replaceAll("");
                line = line.Trim();
                IList <CoreLabel> lwi      = new List <CoreLabel>();
                string            origLine = line;

                line = this._enclosing.cdtos.Normalization(origLine);
                int           origIndex      = 0;
                int           position       = 0;
                StringBuilder nonspaceLineSB = new StringBuilder();

                for (int index = 0; index < len; index++)
                {
                    char      ch = line[index];
                    CoreLabel wi = new CoreLabel();
                    if (!char.IsWhiteSpace(ch) && !char.IsISOControl(ch))
                    {
                        string wordString = char.ToString(ch);
                        wi.Set(typeof(CoreAnnotations.CharAnnotation), Sighan2005DocumentReaderAndWriter.Intern(wordString));
                        nonspaceLineSB.Append(wordString);
                        // non-breaking space is skipped as well
                        while (char.IsWhiteSpace(origLine[origIndex]) || char.IsISOControl(origLine[origIndex]) || (origLine[origIndex] == '\u00A0'))
                        {
                            origIndex++;
                        }
                        wordString = char.ToString(origLine[origIndex]);
                        wi.Set(typeof(CoreAnnotations.OriginalCharAnnotation), Sighan2005DocumentReaderAndWriter.Intern(wordString));
                        // put in a word shape
                        if (this._enclosing.flags.useShapeStrings)
                        {
                            wi.Set(typeof(CoreAnnotations.ShapeAnnotation), this._enclosing.ShapeOf(wordString));
                        }
                        if (this._enclosing.flags.useUnicodeType || this._enclosing.flags.useUnicodeType4gram || this._enclosing.flags.useUnicodeType5gram)
                        {
                            wi.Set(typeof(CoreAnnotations.UTypeAnnotation), char.GetType(ch));
                        }
                        if (this._enclosing.flags.useUnicodeBlock)
                        {
                            wi.Set(typeof(CoreAnnotations.UBlockAnnotation), Characters.UnicodeBlockStringOf(ch));
                        }
                        origIndex++;
                        if (index == 0)
                        {
                            // first character of a sentence (a line)
                            wi.Set(typeof(CoreAnnotations.AnswerAnnotation), "1");
                            wi.Set(typeof(CoreAnnotations.SpaceBeforeAnnotation), "1");
                            wi.Set(typeof(CoreAnnotations.GoldAnswerAnnotation), "1");
                        }
                        else
                        {
                            if (char.IsWhiteSpace(line[index - 1]) || char.IsISOControl(line[index - 1]))
                            {
                                wi.Set(typeof(CoreAnnotations.AnswerAnnotation), "1");
                                wi.Set(typeof(CoreAnnotations.SpaceBeforeAnnotation), "1");
                                wi.Set(typeof(CoreAnnotations.GoldAnswerAnnotation), "1");
                            }
                            else
                            {
                                wi.Set(typeof(CoreAnnotations.AnswerAnnotation), "0");
                                wi.Set(typeof(CoreAnnotations.SpaceBeforeAnnotation), "0");
                                wi.Set(typeof(CoreAnnotations.GoldAnswerAnnotation), "0");
                            }
                        }
                        wi.Set(typeof(CoreAnnotations.PositionAnnotation), Sighan2005DocumentReaderAndWriter.Intern((position).ToString()));
                        position++;
                        lwi.Add(wi);
                    }
                }
                if (this._enclosing.flags.dictionary != null || this._enclosing.flags.serializedDictionary != null)
                {
                    string nonspaceLine = nonspaceLineSB.ToString();
                    Sighan2005DocumentReaderAndWriter.AddDictionaryFeatures(this._enclosing.cdict, typeof(CoreAnnotations.LBeginAnnotation), typeof(CoreAnnotations.LMiddleAnnotation), typeof(CoreAnnotations.LEndAnnotation), nonspaceLine, lwi);
                }
                if (this._enclosing.flags.dictionary2 != null)
                {
                    string nonspaceLine = nonspaceLineSB.ToString();
                    Sighan2005DocumentReaderAndWriter.AddDictionaryFeatures(this._enclosing.cdict2, typeof(CoreAnnotations.D2_LBeginAnnotation), typeof(CoreAnnotations.D2_LMiddleAnnotation), typeof(CoreAnnotations.D2_LEndAnnotation), nonspaceLine, lwi);
                }
                // logger.info("output: " + lwi.size());
                return(lwi);
            }
 internal CTBDocumentParser(Sighan2005DocumentReaderAndWriter _enclosing)
 {
     this._enclosing = _enclosing;
 }
        public static void Main(string[] args)
        {
            Properties props = StringUtils.ArgsToProperties(args);
            // logger.debug(props.toString());
            SeqClassifierFlags flags       = new SeqClassifierFlags(props);
            MaxMatchSegmenter  seg         = new MaxMatchSegmenter();
            string             lexiconFile = props.GetProperty("lexicon");

            if (lexiconFile != null)
            {
                seg.AddLexicon(lexiconFile);
            }
            else
            {
                logger.Error("Error: no lexicon file!");
                System.Environment.Exit(1);
            }
            Sighan2005DocumentReaderAndWriter sighanRW = new Sighan2005DocumentReaderAndWriter();

            sighanRW.Init(flags);
            BufferedReader br      = new BufferedReader(new InputStreamReader(Runtime.@in));
            PrintWriter    stdoutW = new PrintWriter(System.Console.Out);
            int            lineNb  = 0;

            for (; ;)
            {
                ++lineNb;
                logger.Info("line: " + lineNb);
                try
                {
                    string line = br.ReadLine();
                    if (line == null)
                    {
                        break;
                    }
                    string outputLine = null;
                    if (props.GetProperty("greedy") != null)
                    {
                        List <Word> sentence = seg.GreedilySegmentWords(line);
                        outputLine = SentenceUtils.ListToString(sentence);
                    }
                    else
                    {
                        if (props.GetProperty("maxwords") != null)
                        {
                            seg.BuildSegmentationLattice(line);
                            outputLine = SentenceUtils.ListToString(seg.SegmentWords(MaxMatchSegmenter.MatchHeuristic.Maxwords));
                        }
                        else
                        {
                            seg.BuildSegmentationLattice(line);
                            outputLine = SentenceUtils.ListToString(seg.MaxMatchSegmentation());
                        }
                    }
                    StringReader strR = new StringReader(outputLine);
                    IEnumerator <IList <CoreLabel> > itr = sighanRW.GetIterator(strR);
                    while (itr.MoveNext())
                    {
                        sighanRW.PrintAnswers(itr.Current, stdoutW);
                    }
                }
                catch (IOException)
                {
                    // System.out.println(outputLine);
                    break;
                }
            }
            stdoutW.Flush();
        }