public void ProcessValidationSample(SentenceData sample) { n_test_samples++; for (int iword = 1; iword < sample.CountWords() - 1; ++iword) { WordData token = sample.GetWord(iword); string wordform = token.GetWord().ToLower(); string lemma = gren.GetEntryName(token.GetEntryID()); if (IsUnknownLexem(lemma) || IsNumword(lemma)) { continue; } CheckData d = new CheckData(); d.POS_tag = tags.MatchTags(token, gren); d.wordform = wordform; d.lemma = lemma; check_data_list.Add(d); } return; }
public bool ProcessTrainingSample(SentenceData sample) { n_learn_samples++; for (int iword = 1; iword < sample.CountWords() - 1; ++iword) { WordData token = sample.GetWord(iword); string wordform = token.GetWord().ToLower(); if (wordform.Contains(" ")) { // кратные пробелы сокращаем до одинарных System.Text.RegularExpressions.Regex rx = new System.Text.RegularExpressions.Regex("[ ]{2,}"); wordform = rx.Replace(wordform, " "); } string lemma = gren.GetEntryName(token.GetEntryID()); if (IsUnknownLexem(lemma) || IsNumword(lemma)) { continue; } int POS_tag = tags.MatchTags(token, gren); table.Store(POS_tag, wordform, lemma); n_learn_wordforms++; } return(true); }