示例#1
0
        public List <Token> Tokenize(CultureInfo cultureInfo, string text)
        {
            List <Token> list        = null;
            IWordBreaker wordBreaker = this.LoadWordBreaker(cultureInfo);

            if (wordBreaker != null)
            {
                IWordSink   wordSink    = new WordSink();
                TEXT_SOURCE text_SOURCE = default(TEXT_SOURCE);
                text_SOURCE.FillTextBuffer = new FillTextBuffer(this.FillBuffer);
                text_SOURCE.Buffer         = text;
                text_SOURCE.Current        = 0;
                text_SOURCE.End            = text_SOURCE.Buffer.Length;
                if (wordBreaker.BreakText(ref text_SOURCE, wordSink, null) == 0)
                {
                    list = ((WordSink)wordSink).Tokens;
                }
            }
            if (list == null)
            {
                list = new List <Token>();
                list.Add(new Token(0, text.Length));
            }
            return(list);
        }
示例#2
0
        //struct StemItem { public PutTypes type; public string word; }

        static void wordBreak(string text, IWordBreaker breaker, Action <PutTypes, int, int> onPutWord)
        {
            if (string.IsNullOrEmpty(text))
            {
                return;
            }
            BreakSink   cws         = new BreakSink(onPutWord);
            TEXT_SOURCE pTextSource = new TEXT_SOURCE();

            pTextSource.pfnFillTextBuffer += fillTextBuffer;
            pTextSource.awcBuffer          = text;
            pTextSource.iCur = 0;
            pTextSource.iEnd = text.Length;
            breaker.BreakText(ref pTextSource, cws, /*cps*/ null);
        }