Пример #1
0
    public void ProcessValidationSample(SentenceData sample)
    {
        n_test_samples++;

        for (int iword = 1; iword < sample.CountWords() - 1; ++iword)
        {
            WordData token = sample.GetWord(iword);

            string wordform = token.GetWord().ToLower();
            string lemma    = gren.GetEntryName(token.GetEntryID());
            if (IsUnknownLexem(lemma) || IsNumword(lemma))
            {
                continue;
            }

            CheckData d = new CheckData();
            d.POS_tag  = tags.MatchTags(token, gren);
            d.wordform = wordform;
            d.lemma    = lemma;

            check_data_list.Add(d);
        }


        return;
    }
Пример #2
0
    public bool ProcessTrainingSample(SentenceData sample)
    {
        n_learn_samples++;

        for (int iword = 1; iword < sample.CountWords() - 1; ++iword)
        {
            WordData token    = sample.GetWord(iword);
            string   wordform = token.GetWord().ToLower();

            if (wordform.Contains("  "))
            {
                // кратные пробелы сокращаем до одинарных
                System.Text.RegularExpressions.Regex rx = new System.Text.RegularExpressions.Regex("[ ]{2,}");
                wordform = rx.Replace(wordform, " ");
            }

            string lemma = gren.GetEntryName(token.GetEntryID());
            if (IsUnknownLexem(lemma) || IsNumword(lemma))
            {
                continue;
            }

            int POS_tag = tags.MatchTags(token, gren);

            table.Store(POS_tag, wordform, lemma);
            n_learn_wordforms++;
        }


        return(true);
    }