public Tokenize ( string Text, int LanguageID ) : string[] | ||
Text | string | |
LanguageID | int | |
return | string[] |
public string Preprocess(string phrase0, SolarixGrammarEngineNET.GrammarEngine2 gren) { string phrase = phrase0; if (phrase.EndsWith("..")) { phrase = phrase.Substring(0, phrase.Length - 2); } if (phrase.EndsWith("!")) { phrase = phrase.Substring(0, phrase.Length - 1); } string[] tokens = gren.Tokenize(phrase, SolarixGrammarEngineNET.GrammarEngineAPI.RUSSIAN_LANGUAGE); List <string> res_tokens = tokens.ToList(); bool changed = false; string s = string.Join("|", tokens).ToLower(); foreach (string prefix in prefixes) { if (s.StartsWith(prefix)) { // Ну и жара нынче стоит! res_tokens = res_tokens.Skip(prefix.Split("|".ToCharArray(), StringSplitOptions.RemoveEmptyEntries).Length).ToList(); changed = true; break; } } foreach (string infix in infixes) { if (res_tokens.Contains(infix)) { res_tokens.Remove(infix); changed = true; } } if (changed) { return(string.Join(" ", res_tokens)); } else { return(phrase); } }