public void ProcessValidationSample(SentenceData sample) { n_test_samples++; for (int iword = 1; iword < sample.CountWords() - 1; ++iword) { WordData token = sample.GetWord(iword); string wordform = token.GetWord().ToLower(); string lemma = gren.GetEntryName(token.GetEntryID()); if (IsUnknownLexem(lemma) || IsNumword(lemma)) { continue; } CheckData d = new CheckData(); d.POS_tag = tags.MatchTags(token, gren); d.wordform = wordform; d.lemma = lemma; check_data_list.Add(d); } return; }
public bool ProcessTrainingSample(SentenceData sample) { n_learn_samples++; for (int iword = 1; iword < sample.CountWords() - 1; ++iword) { WordData token = sample.GetWord(iword); string wordform = token.GetWord().ToLower(); if (wordform.Contains(" ")) { // кратные пробелы сокращаем до одинарных System.Text.RegularExpressions.Regex rx = new System.Text.RegularExpressions.Regex("[ ]{2,}"); wordform = rx.Replace(wordform, " "); } string lemma = gren.GetEntryName(token.GetEntryID()); if (IsUnknownLexem(lemma) || IsNumword(lemma)) { continue; } int POS_tag = tags.MatchTags(token, gren); table.Store(POS_tag, wordform, lemma); n_learn_wordforms++; } return(true); }
public string FindMatchingText(WordData wd) { string description = wd.GetDesc(); string word = wd.GetWord(); char[] descriptionArr = description.ToLower().ToCharArray(); char[] wordArr = word.ToLower().ToCharArray(); for (int i = 0; i < descriptionArr.Length; i++) { if (CheckCurrentIndex(i, descriptionArr, wordArr)) { return addBeginEnd(i, description, word); } } return description.Insert(0, beginWord + word + endWord + "\n"); }