예제 #1
0
        /// <summary>
        ///     Split the string into words and return the list of these words
        ///     in a single string separated with specified character.
        ///     Language-specific rules are used to process dots, hyphens etc.
        /// </summary>
        public string TokenizeWithSeparator(string text, Languages language, char separator = '|')
        {
            var hTokens = GrammarApi.sol_TokenizeW(_engine, text, (int)language);

            if (hTokens == IntPtr.Zero)
            {
                return(string.Empty);
            }

            var result     = new StringBuilder(text.Length);
            int maxWordLen = GrammarApi.sol_MaxLexemLen(_engine) + 1;
            int ntoken     = GrammarApi.sol_CountStrings(hTokens);

            var buffer = new StringBuilder(maxWordLen);

            for (var i = 0; i < ntoken; ++i)
            {
                buffer.Length = 0;
                GrammarApi.sol_GetStringW(hTokens, i, buffer);
                result.Append(buffer.ToString()).Append(separator);
            }

            result.Length--;
            GrammarApi.sol_DeleteStrings(hTokens);

            return(result.ToString());
        }
예제 #2
0
        public List <string> GenerateWordforms(int entryId, List <int> coordId, List <int> stateId)
        {
            var npairs = coordId.Count;
            var pairs  = new int[npairs * 2];

            for (int i = 0, j = 0; i < npairs; ++i)
            {
                pairs[j++] = coordId[i];
                pairs[j++] = stateId[i];
            }

            var res  = new List <string>();
            var hStr = GrammarApi.sol_GenerateWordforms(_engine, entryId, npairs, pairs);

            if (hStr != (IntPtr)0)
            {
                var nstr = GrammarApi.sol_CountStrings(hStr);
                for (var k = 0; k < nstr; ++k)
                {
                    res.Add(GrammarApi.sol_GetStringFX(hStr, k));
                }

                GrammarApi.sol_DeleteStrings(hStr);
            }

            return(res);
        }
예제 #3
0
        /// <summary>
        ///     Split the string into words and return the list of these words.
        ///     Language-specific rules are used to process dots, hyphens etc.
        /// </summary>
        /// <remarks>
        /// Works only on pre-segmented sentnces.
        /// </remarks>
        public string[] TokenizeSentence(string text, Languages language)
        {
            var hTokens = GrammarApi.sol_TokenizeW(_engine, text, (int)language);

            string[] tokens     = null;
            var      maxWordLen = GrammarApi.sol_MaxLexemLen(_engine) + 1;

            if (hTokens != (IntPtr)null)
            {
                var ntoken = GrammarApi.sol_CountStrings(hTokens);
                tokens = new string[ntoken];

                var buffer = new StringBuilder(maxWordLen);
                for (var i = 0; i < ntoken; ++i)
                {
                    buffer.Length = 0;
                    GrammarApi.sol_GetStringW(hTokens, i, buffer);
                    tokens[i] = buffer.ToString();
                }

                GrammarApi.sol_DeleteStrings(hTokens);
            }

            return(tokens);
        }