public List <string> GetWordLemmas(string word)
        {
            try {
                LoadIfNeed();

                var result = new HashSet <string>();
                var buffer = new StringBuilder();
                LemmatizatorEngine.sol_GetLemmaW(_hEngine, word, buffer, buffer.Capacity);
                AddNormalWordToList(buffer, result);

                IntPtr hList = LemmatizatorEngine.sol_GetLemmasW(_hEngine, word);
                return(AddLemmasToResult(hList, buffer, result));
            } catch (Exception e) {
                return(null);
            }
        }
Beispiel #2
0
        static void Main(string[] args)
        {
            // http://www.solarix.ru/api/en/sol_LoadLemmatizator.shtml
            IntPtr hEngine = LemmatizatorEngine.sol_LoadLemmatizatorW("..\\..\\..\\..\\..\\..\\..\\..\\bin-windows64\\lemmatizer.db", 0);

//   IntPtr hEngine = LemmatizatorEngine.sol_LoadLemmatizatorW("lemmatizer.db",0);
            if (hEngine == IntPtr.Zero)
            {
                Console.WriteLine("Error");
                return;
            }

            System.Text.StringBuilder lemma = new System.Text.StringBuilder();
            lemma.EnsureCapacity(32);

            // http://www.solarix.ru/api/en/sol_GetLemma.shtml
            LemmatizatorEngine.sol_GetLemmaW(hEngine, "галактическими", lemma, 32);

            String slemma = lemma.ToString();

            Console.WriteLine("lemma={0}\n", slemma);

            // http://www.solarix.ru/api/en/sol_GetLemmas.shtml
            IntPtr lemmas = LemmatizatorEngine.sol_GetLemmasW(hEngine, "роем");

            if (lemmas != (IntPtr)0)
            {
                // http://www.solarix.ru/api/en/sol_CountLemmas.shtml
                int n = LemmatizatorEngine.sol_CountLemmas(lemmas);

                for (int i = 0; i < n; ++i)
                {
                    lemma.Length = 0;
                    // http://www.solarix.ru/api/en/sol_GetLemmaString.shtml
                    LemmatizatorEngine.sol_GetLemmaStringW(lemmas, i, lemma, 32);
                    Console.WriteLine("lemma[{0}]={1}", i, lemma.ToString());
                }

                // http://www.solarix.ru/api/en/sol_DeleteLemmas.shtml
                LemmatizatorEngine.sol_DeleteLemmas(lemmas);
            }

            // http://www.solarix.ru/api/en/sol_DeleteLemmatizator.shtml
            LemmatizatorEngine.sol_DeleteLemmatizator(hEngine);

            return;
        }
Beispiel #3
0
    public void process()
    {
        //tokenize

        bool   prevgood       = false;
        bool   currgood       = false;
        string wordinprogress = "";

        for (int i = 0; i < text.Length; i++)
        {
            char currch = text[i];

            if (!isGood(currch))
            {
                currgood = false;
            }
            else
            {
                currgood = true;
            }
            if (!prevgood && !currgood)
            {
                //do nothing
            }
            if (!prevgood && currgood)
            {
                wordinprogress = wordinprogress + text[i];
            }
            if (prevgood && currgood)
            {
                wordinprogress = wordinprogress + text[i];
            }
            if (prevgood && !currgood)
            {
                wordsList.Add(wordinprogress);
                wordinprogress = "";
            }
            prevgood = currgood;
        }

        bool badchar = false;

        for (int i = 0; i < wordinprogress.Length; i++)
        {
            if (!isGood(wordinprogress[i]))
            {
                badchar = true;
            }
        }

        if (wordinprogress == "")
        {
            badchar = true;
        }

        if (!badchar)
        {
            wordsList.Add(wordinprogress);
        }


        if (OnProgressUpdate != null)
        {
            OnProgressUpdate(17);
        }


        wordsTotal = wordsList.Count;

        //lemmatize

        if (lemmatize)
        {
            for (int i = 0; i < wordsList.Count; i++)
            {
                System.Text.StringBuilder lemma = new System.Text.StringBuilder();
                lemma.EnsureCapacity(32);

                LemmatizatorEngine.sol_GetLemmaW(hEngine, wordsList[i], lemma, 32);
                String slemma = lemma.ToString();
                wordsList[i] = slemma;

                if (OnProgressUpdate != null)
                {
                    OnProgressUpdate(17 + i * 83 / wordsList.Count);
                }
            }
        }

        //get to lowercase

        for (int i = 0; i < wordsList.Count; i++)
        {
            wordsList[i] = wordsList[i].ToLower();
        }

        //count words

        for (int i = 0; i < wordsList.Count; i++)
        {
            int count = 1;
            int j     = i + 1;
            while (j < wordsList.Count)
            {
                if (wordsList[i] == wordsList[j])
                {
                    count++;
                    wordsList.RemoveAt(j);
                    j--;
                }
                j++;
            }
            wordsCount.Add(count);
        }

        //delete numbers

        if (deletenumbers)
        {
            int i1 = 0;
            while (i1 < wordsList.Count)
            {
                int  n;
                bool isNumeric = int.TryParse(wordsList[i1], out n);
                if (isNumeric)
                {
                    wordsList.RemoveAt(i1);
                    wordsCount.RemoveAt(i1);
                    i1--;
                }
                i1++;
            }
        }

        //delete words from blacklist

        if (blacklistEnabled)

        {
            int i2 = 0;
            while (i2 < wordsList.Count)
            {
                for (int i3 = 0; i3 < blacklist.Count; i3++)
                {
                    if (wordsList[i2] == blacklist[i3])
                    {
                        wordsList.RemoveAt(i2);
                        wordsCount.RemoveAt(i2);
                        i2--;
                        goto m1;
                    }
                }
                m1 : i2++;
            }
        }

        //form the final list

        wordsUnique = wordsList.Count;
    }