/// <summary> /// Split the string into words and return the list of these words /// in a single string separated with specified character. /// Language-specific rules are used to process dots, hyphens etc. /// </summary> public string TokenizeWithSeparator(string text, Languages language, char separator = '|') { var hTokens = GrammarApi.sol_TokenizeW(_engine, text, (int)language); if (hTokens == IntPtr.Zero) { return(string.Empty); } var result = new StringBuilder(text.Length); int maxWordLen = GrammarApi.sol_MaxLexemLen(_engine) + 1; int ntoken = GrammarApi.sol_CountStrings(hTokens); var buffer = new StringBuilder(maxWordLen); for (var i = 0; i < ntoken; ++i) { buffer.Length = 0; GrammarApi.sol_GetStringW(hTokens, i, buffer); result.Append(buffer.ToString()).Append(separator); } result.Length--; GrammarApi.sol_DeleteStrings(hTokens); return(result.ToString()); }
public List <string> GenerateWordforms(int entryId, List <int> coordId, List <int> stateId) { var npairs = coordId.Count; var pairs = new int[npairs * 2]; for (int i = 0, j = 0; i < npairs; ++i) { pairs[j++] = coordId[i]; pairs[j++] = stateId[i]; } var res = new List <string>(); var hStr = GrammarApi.sol_GenerateWordforms(_engine, entryId, npairs, pairs); if (hStr != (IntPtr)0) { var nstr = GrammarApi.sol_CountStrings(hStr); for (var k = 0; k < nstr; ++k) { res.Add(GrammarApi.sol_GetStringFX(hStr, k)); } GrammarApi.sol_DeleteStrings(hStr); } return(res); }
/// <summary> /// Split the string into words and return the list of these words. /// Language-specific rules are used to process dots, hyphens etc. /// </summary> /// <remarks> /// Works only on pre-segmented sentnces. /// </remarks> public string[] TokenizeSentence(string text, Languages language) { var hTokens = GrammarApi.sol_TokenizeW(_engine, text, (int)language); string[] tokens = null; var maxWordLen = GrammarApi.sol_MaxLexemLen(_engine) + 1; if (hTokens != (IntPtr)null) { var ntoken = GrammarApi.sol_CountStrings(hTokens); tokens = new string[ntoken]; var buffer = new StringBuilder(maxWordLen); for (var i = 0; i < ntoken; ++i) { buffer.Length = 0; GrammarApi.sol_GetStringW(hTokens, i, buffer); tokens[i] = buffer.ToString(); } GrammarApi.sol_DeleteStrings(hTokens); } return(tokens); }