public List <Token> Tokenize(CultureInfo cultureInfo, string text) { List <Token> list = null; IWordBreaker wordBreaker = this.LoadWordBreaker(cultureInfo); if (wordBreaker != null) { IWordSink wordSink = new WordSink(); TEXT_SOURCE text_SOURCE = default(TEXT_SOURCE); text_SOURCE.FillTextBuffer = new FillTextBuffer(this.FillBuffer); text_SOURCE.Buffer = text; text_SOURCE.Current = 0; text_SOURCE.End = text_SOURCE.Buffer.Length; if (wordBreaker.BreakText(ref text_SOURCE, wordSink, null) == 0) { list = ((WordSink)wordSink).Tokens; } } if (list == null) { list = new List <Token>(); list.Add(new Token(0, text.Length)); } return(list); }
//struct StemItem { public PutTypes type; public string word; } static void wordBreak(string text, IWordBreaker breaker, Action <PutTypes, int, int> onPutWord) { if (string.IsNullOrEmpty(text)) { return; } BreakSink cws = new BreakSink(onPutWord); TEXT_SOURCE pTextSource = new TEXT_SOURCE(); pTextSource.pfnFillTextBuffer += fillTextBuffer; pTextSource.awcBuffer = text; pTextSource.iCur = 0; pTextSource.iEnd = text.Length; breaker.BreakText(ref pTextSource, cws, /*cps*/ null); }