// a naive tokenizer implementation to treat a single text run as one token. private List <Token> Tokenize(Segment segment) { var tokens = new List <Token>(); var run = 0; foreach (var element in segment.Elements) { var text = element as Text; if (text == null || string.IsNullOrEmpty(text.Value)) { continue; } var token = new global::Sdl.LanguagePlatform.Core.Tokenization.SimpleToken(text.Value) { Span = new SegmentRange(run, 0, text.Value.Length - 1) }; tokens.Add(token); run++; } return(tokens); }
public void VisitSimpleToken(global::Sdl.LanguagePlatform.Core.Tokenization.SimpleToken token) { _plainText += token.Text; }