public List <Pullenti.Morph.MorphToken> Run(string text, bool onlyTokenizing, Pullenti.Morph.MorphLang dlang, bool goodText, ProgressChangedEventHandler progress) { if (string.IsNullOrEmpty(text)) { return(null); } TextWrapper twr = new TextWrapper(text, goodText); TextWrapper.CharsList twrch = twr.Chars; List <Pullenti.Morph.MorphToken> res = new List <Pullenti.Morph.MorphToken>(text.Length / 6); Dictionary <string, UniLexWrap> uniLex = new Dictionary <string, UniLexWrap>(); int i; int j; string term0 = null; int pureRusWords = 0; int pureUkrWords = 0; int pureByWords = 0; int pureKzWords = 0; int totRusWords = 0; int totUkrWords = 0; int totByWords = 0; int totKzWords = 0; for (i = 0; i < twr.Length; i++) { int ty = this.GetCharTyp(twrch[i]); if (ty == 0) { continue; } if (ty > 2) { j = i + 1; } else { for (j = i + 1; j < twr.Length; j++) { if (this.GetCharTyp(twrch[j]) != ty) { break; } } } string wstr = text.Substring(i, j - i); string term = null; if (goodText) { term = wstr; } else { string trstr = Pullenti.Morph.LanguageHelper.TransliteralCorrection(wstr, term0, false); term = Pullenti.Morph.LanguageHelper.CorrectWord(trstr); } if (string.IsNullOrEmpty(term)) { i = j - 1; continue; } Pullenti.Morph.MorphLang lang = Pullenti.Morph.LanguageHelper.GetWordLang(term); if (lang == Pullenti.Morph.MorphLang.UA) { pureUkrWords++; } else if (lang == Pullenti.Morph.MorphLang.RU) { pureRusWords++; } else if (lang == Pullenti.Morph.MorphLang.BY) { pureByWords++; } else if (lang == Pullenti.Morph.MorphLang.KZ) { pureKzWords++; } if (((lang & Pullenti.Morph.MorphLang.RU)) != Pullenti.Morph.MorphLang.Unknown) { totRusWords++; } if (((lang & Pullenti.Morph.MorphLang.UA)) != Pullenti.Morph.MorphLang.Unknown) { totUkrWords++; } if (((lang & Pullenti.Morph.MorphLang.BY)) != Pullenti.Morph.MorphLang.Unknown) { totByWords++; } if (((lang & Pullenti.Morph.MorphLang.KZ)) != Pullenti.Morph.MorphLang.Unknown) { totKzWords++; } if (ty == 1) { term0 = term; } UniLexWrap lemmas = null; if (ty == 1 && !onlyTokenizing) { if (!uniLex.TryGetValue(term, out lemmas)) { UniLexWrap nuni = new UniLexWrap(lang); uniLex.Add(term, nuni); lemmas = nuni; } } Pullenti.Morph.MorphToken tok = new Pullenti.Morph.MorphToken(); tok.Term = term; tok.BeginChar = i; if (i == 733860) { } tok.EndChar = j - 1; tok.Tag = lemmas; res.Add(tok); i = j - 1; } Pullenti.Morph.MorphLang defLang = new Pullenti.Morph.MorphLang(); if (dlang != null) { defLang.Value = dlang.Value; } if (pureRusWords > pureUkrWords && pureRusWords > pureByWords && pureRusWords > pureKzWords) { defLang = Pullenti.Morph.MorphLang.RU; } else if (totRusWords > totUkrWords && totRusWords > totByWords && totRusWords > totKzWords) { defLang = Pullenti.Morph.MorphLang.RU; } else if (pureUkrWords > pureRusWords && pureUkrWords > pureByWords && pureUkrWords > pureKzWords) { defLang = Pullenti.Morph.MorphLang.UA; } else if (totUkrWords > totRusWords && totUkrWords > totByWords && totUkrWords > totKzWords) { defLang = Pullenti.Morph.MorphLang.UA; } else if (pureKzWords > pureRusWords && pureKzWords > pureUkrWords && pureKzWords > pureByWords) { defLang = Pullenti.Morph.MorphLang.KZ; } else if (totKzWords > totRusWords && totKzWords > totUkrWords && totKzWords > totByWords) { defLang = Pullenti.Morph.MorphLang.KZ; } else if (pureByWords > pureRusWords && pureByWords > pureUkrWords && pureByWords > pureKzWords) { defLang = Pullenti.Morph.MorphLang.BY; } else if (totByWords > totRusWords && totByWords > totUkrWords && totByWords > totKzWords) { if (totRusWords > 10 && totByWords > (totRusWords + 20)) { defLang = Pullenti.Morph.MorphLang.BY; } else if (totRusWords == 0 || totByWords >= (totRusWords * 2)) { defLang = Pullenti.Morph.MorphLang.BY; } } if (((defLang.IsUndefined || defLang.IsUa)) && totRusWords > 0) { if (((totUkrWords > totRusWords && m_EngineUa.Language.IsUa)) || ((totByWords > totRusWords && m_EngineBy.Language.IsBy)) || ((totKzWords > totRusWords && m_EngineKz.Language.IsKz))) { int cou0 = 0; totRusWords = (totByWords = (totUkrWords = (totKzWords = 0))); foreach (KeyValuePair <string, UniLexWrap> kp in uniLex) { Pullenti.Morph.MorphLang lang = new Pullenti.Morph.MorphLang(); kp.Value.WordForms = this.ProcessOneWord(kp.Key, ref lang); if (kp.Value.WordForms != null) { foreach (Pullenti.Morph.MorphWordForm wf in kp.Value.WordForms) { lang |= wf.Language; } } kp.Value.Lang = lang; if (lang.IsRu) { totRusWords++; } if (lang.IsUa) { totUkrWords++; } if (lang.IsBy) { totByWords++; } if (lang.IsKz) { totKzWords++; } if (lang.IsCyrillic) { cou0++; } if (cou0 >= 100) { break; } } if (totRusWords > ((totByWords / 2)) && totRusWords > ((totUkrWords / 2))) { defLang = Pullenti.Morph.MorphLang.RU; } else if (totUkrWords > ((totRusWords / 2)) && totUkrWords > ((totByWords / 2))) { defLang = Pullenti.Morph.MorphLang.UA; } else if (totByWords > ((totRusWords / 2)) && totByWords > ((totUkrWords / 2))) { defLang = Pullenti.Morph.MorphLang.BY; } } else if (defLang.IsUndefined) { defLang = Pullenti.Morph.MorphLang.RU; } } int cou = 0; totRusWords = (totByWords = (totUkrWords = (totKzWords = 0))); foreach (KeyValuePair <string, UniLexWrap> kp in uniLex) { Pullenti.Morph.MorphLang lang = defLang; if (lang.IsUndefined) { if (totRusWords > totByWords && totRusWords > totUkrWords && totRusWords > totKzWords) { lang = Pullenti.Morph.MorphLang.RU; } else if (totUkrWords > totRusWords && totUkrWords > totByWords && totUkrWords > totKzWords) { lang = Pullenti.Morph.MorphLang.UA; } else if (totByWords > totRusWords && totByWords > totUkrWords && totByWords > totKzWords) { lang = Pullenti.Morph.MorphLang.BY; } else if (totKzWords > totRusWords && totKzWords > totUkrWords && totKzWords > totByWords) { lang = Pullenti.Morph.MorphLang.KZ; } } kp.Value.WordForms = this.ProcessOneWord(kp.Key, ref lang); kp.Value.Lang = lang; if (((lang & Pullenti.Morph.MorphLang.RU)) != Pullenti.Morph.MorphLang.Unknown) { totRusWords++; } if (((lang & Pullenti.Morph.MorphLang.UA)) != Pullenti.Morph.MorphLang.Unknown) { totUkrWords++; } if (((lang & Pullenti.Morph.MorphLang.BY)) != Pullenti.Morph.MorphLang.Unknown) { totByWords++; } if (((lang & Pullenti.Morph.MorphLang.KZ)) != Pullenti.Morph.MorphLang.Unknown) { totKzWords++; } if (progress != null) { this.OnProgress(cou, uniLex.Count, progress); } cou++; } List <Pullenti.Morph.MorphWordForm> emptyList = null; foreach (Pullenti.Morph.MorphToken r in res) { UniLexWrap uni = r.Tag as UniLexWrap; r.Tag = null; if (uni == null || uni.WordForms == null || uni.WordForms.Count == 0) { if (emptyList == null) { emptyList = new List <Pullenti.Morph.MorphWordForm>(); } r.WordForms = emptyList; if (uni != null) { r.Language = uni.Lang; } } else { r.WordForms = uni.WordForms; } } if (!goodText) { for (i = 0; i < (res.Count - 2); i++) { UnicodeInfo ui0 = twrch[res[i].BeginChar]; UnicodeInfo ui1 = twrch[res[i + 1].BeginChar]; UnicodeInfo ui2 = twrch[res[i + 2].BeginChar]; if (ui1.IsQuot) { int p = res[i + 1].BeginChar; if ((p >= 2 && "БбТт".IndexOf(text[p - 1]) >= 0 && ((p + 3) < text.Length)) && "ЕеЯяЁё".IndexOf(text[p + 1]) >= 0) { string wstr = Pullenti.Morph.LanguageHelper.TransliteralCorrection(Pullenti.Morph.LanguageHelper.CorrectWord(string.Format("{0}Ъ{1}", res[i].GetSourceText(text), res[i + 2].GetSourceText(text))), null, false); List <Pullenti.Morph.MorphWordForm> li = this.ProcessOneWord0(wstr); if (li != null && li.Count > 0 && li[0].IsInDictionary) { res[i].EndChar = res[i + 2].EndChar; res[i].Term = wstr; res[i].WordForms = li; res.RemoveRange(i + 1, 2); } } else if ((ui1.IsApos && p > 0 && char.IsLetter(text[p - 1])) && ((p + 1) < text.Length) && char.IsLetter(text[p + 1])) { if (defLang == Pullenti.Morph.MorphLang.UA || ((res[i].Language & Pullenti.Morph.MorphLang.UA)) != Pullenti.Morph.MorphLang.Unknown || ((res[i + 2].Language & Pullenti.Morph.MorphLang.UA)) != Pullenti.Morph.MorphLang.Unknown) { string wstr = Pullenti.Morph.LanguageHelper.TransliteralCorrection(Pullenti.Morph.LanguageHelper.CorrectWord(string.Format("{0}{1}", res[i].GetSourceText(text), res[i + 2].GetSourceText(text))), null, false); List <Pullenti.Morph.MorphWordForm> li = this.ProcessOneWord0(wstr); bool okk = true; if (okk) { res[i].EndChar = res[i + 2].EndChar; res[i].Term = wstr; if (li == null) { li = new List <Pullenti.Morph.MorphWordForm>(); } if (li != null && li.Count > 0) { res[i].Language = li[0].Language; } res[i].WordForms = li; res.RemoveRange(i + 1, 2); } } } } else if (((ui1.UniChar == '3' || ui1.UniChar == '4')) && res[i + 1].Length == 1) { string src = (ui1.UniChar == '3' ? "З" : "Ч"); int i0 = i + 1; if ((res[i].EndChar + 1) == res[i + 1].BeginChar && ui0.IsCyrillic) { i0--; src = res[i0].GetSourceText(text) + src; } int i1 = i + 1; if ((res[i + 1].EndChar + 1) == res[i + 2].BeginChar && ui2.IsCyrillic) { i1++; src += res[i1].GetSourceText(text); } if (src.Length > 2) { string wstr = Pullenti.Morph.LanguageHelper.TransliteralCorrection(Pullenti.Morph.LanguageHelper.CorrectWord(src), null, false); List <Pullenti.Morph.MorphWordForm> li = this.ProcessOneWord0(wstr); if (li != null && li.Count > 0 && li[0].IsInDictionary) { res[i0].EndChar = res[i1].EndChar; res[i0].Term = wstr; res[i0].WordForms = li; res.RemoveRange(i0 + 1, i1 - i0); } } } else if ((ui1.IsHiphen && ui0.IsLetter && ui2.IsLetter) && res[i].EndChar > res[i].BeginChar && res[i + 2].EndChar > res[i + 2].BeginChar) { bool newline = false; int sps = 0; for (j = res[i + 1].EndChar + 1; j < res[i + 2].BeginChar; j++) { if (text[j] == '\r' || text[j] == '\n') { newline = true; sps++; } else if (!char.IsWhiteSpace(text[j])) { break; } else { sps++; } } string fullWord = Pullenti.Morph.LanguageHelper.CorrectWord(res[i].GetSourceText(text) + res[i + 2].GetSourceText(text)); if (!newline) { if (uniLex.ContainsKey(fullWord) || fullWord == "ИЗЗА") { newline = true; } else if (text[res[i + 1].BeginChar] == ((char)0x00AD)) { newline = true; } else if (Pullenti.Morph.LanguageHelper.EndsWithEx(res[i].GetSourceText(text), "О", "о", null, null) && res[i + 2].WordForms.Count > 0 && res[i + 2].WordForms[0].IsInDictionary) { if (text[res[i + 1].BeginChar] == '¬') { List <Pullenti.Morph.MorphWordForm> li = this.ProcessOneWord0(fullWord); if (li != null && li.Count > 0 && li[0].IsInDictionary) { newline = true; } } } else if ((res[i].EndChar + 2) == res[i + 2].BeginChar) { if (!char.IsUpper(text[res[i + 2].BeginChar]) && (sps < 2) && fullWord.Length > 4) { newline = true; if ((i + 3) < res.Count) { UnicodeInfo ui3 = twrch[res[i + 3].BeginChar]; if (ui3.IsHiphen) { newline = false; } } } } else if (((res[i].EndChar + 1) == res[i + 1].BeginChar && sps > 0 && (sps < 3)) && fullWord.Length > 4) { newline = true; } } if (newline) { List <Pullenti.Morph.MorphWordForm> li = this.ProcessOneWord0(fullWord); if (li != null && li.Count > 0 && ((li[0].IsInDictionary || uniLex.ContainsKey(fullWord)))) { res[i].EndChar = res[i + 2].EndChar; res[i].Term = fullWord; res[i].WordForms = li; res.RemoveRange(i + 1, 2); } } else { } } else if ((ui1.IsLetter && ui0.IsLetter && res[i].Length > 2) && res[i + 1].Length > 1) { if (ui0.IsUpper != ui1.IsUpper) { continue; } if (!ui0.IsCyrillic || !ui1.IsCyrillic) { continue; } bool newline = false; for (j = res[i].EndChar + 1; j < res[i + 1].BeginChar; j++) { if (twrch[j].Code == 0xD || twrch[j].Code == 0xA) { newline = true; break; } } if (!newline) { continue; } string fullWord = Pullenti.Morph.LanguageHelper.CorrectWord(res[i].GetSourceText(text) + res[i + 1].GetSourceText(text)); if (!uniLex.ContainsKey(fullWord)) { continue; } List <Pullenti.Morph.MorphWordForm> li = this.ProcessOneWord0(fullWord); if (li != null && li.Count > 0 && li[0].IsInDictionary) { res[i].EndChar = res[i + 1].EndChar; res[i].Term = fullWord; res[i].WordForms = li; res.RemoveAt(i + 1); } } } } for (i = 0; i < res.Count; i++) { Pullenti.Morph.MorphToken mt = res[i]; mt.CharInfo = new Pullenti.Morph.CharsInfo(); UnicodeInfo ui0 = twrch[mt.BeginChar]; UnicodeInfo ui00 = UnicodeInfo.AllChars[(int)(mt.Term[0])]; for (j = mt.BeginChar + 1; j <= mt.EndChar; j++) { if (ui0.IsLetter) { break; } ui0 = twrch[j]; } if (ui0.IsLetter) { mt.CharInfo.IsLetter = true; if (ui00.IsLatin) { mt.CharInfo.IsLatinLetter = true; } else if (ui00.IsCyrillic) { mt.CharInfo.IsCyrillicLetter = true; } if (mt.Language == Pullenti.Morph.MorphLang.Unknown) { if (Pullenti.Morph.LanguageHelper.IsCyrillic(mt.Term)) { mt.Language = (defLang.IsUndefined ? Pullenti.Morph.MorphLang.RU : defLang); } } if (goodText) { continue; } bool allUp = true; bool allLo = true; for (j = mt.BeginChar; j <= mt.EndChar; j++) { if (twrch[j].IsUpper || twrch[j].IsDigit) { allLo = false; } else { allUp = false; } } if (allUp) { mt.CharInfo.IsAllUpper = true; } else if (allLo) { mt.CharInfo.IsAllLower = true; } else if (((ui0.IsUpper || twrch[mt.BeginChar].IsDigit)) && mt.EndChar > mt.BeginChar) { allLo = true; for (j = mt.BeginChar + 1; j <= mt.EndChar; j++) { if (twrch[j].IsUpper || twrch[j].IsDigit) { allLo = false; break; } } if (allLo) { mt.CharInfo.IsCapitalUpper = true; } else if (twrch[mt.EndChar].IsLower && (mt.EndChar - mt.BeginChar) > 1) { allUp = true; for (j = mt.BeginChar; j < mt.EndChar; j++) { if (twrch[j].IsLower) { allUp = false; break; } } if (allUp) { mt.CharInfo.IsLastLower = true; } } } } if (mt.CharInfo.IsLastLower && mt.Length > 2 && mt.CharInfo.IsCyrillicLetter) { string pref = text.Substring(mt.BeginChar, mt.EndChar - mt.BeginChar); bool ok = false; foreach (Pullenti.Morph.MorphWordForm wf in mt.WordForms) { if (wf.NormalCase == pref || wf.NormalFull == pref) { ok = true; break; } } if (!ok) { Pullenti.Morph.MorphWordForm wf0 = new Pullenti.Morph.MorphWordForm() { NormalCase = pref, Class = Pullenti.Morph.MorphClass.Noun, UndefCoef = 1 }; mt.WordForms = new List <Pullenti.Morph.MorphWordForm>(mt.WordForms); mt.WordForms.Insert(0, wf0); } } } if (goodText || onlyTokenizing) { return(res); } for (i = 0; i < res.Count; i++) { if (res[i].Length == 1 && res[i].CharInfo.IsLatinLetter) { char ch = res[i].Term[0]; if (ch == 'C' || ch == 'A' || ch == 'P') { } else { continue; } bool isRus = false; for (int ii = i - 1; ii >= 0; ii--) { if ((res[ii].EndChar + 1) != res[ii + 1].BeginChar) { break; } else if (res[ii].CharInfo.IsLetter) { isRus = res[ii].CharInfo.IsCyrillicLetter; break; } } if (!isRus) { for (int ii = i + 1; ii < res.Count; ii++) { if ((res[ii - 1].EndChar + 1) != res[ii].BeginChar) { break; } else if (res[ii].CharInfo.IsLetter) { isRus = res[ii].CharInfo.IsCyrillicLetter; break; } } } if (isRus) { res[i].Term = Pullenti.Morph.LanguageHelper.TransliteralCorrection(res[i].Term, null, true); res[i].CharInfo.IsCyrillicLetter = true; res[i].CharInfo.IsLatinLetter = true; } } } foreach (Pullenti.Morph.MorphToken r in res) { if (r.CharInfo.IsAllUpper || r.CharInfo.IsCapitalUpper) { if (r.Language.IsCyrillic) { bool ok = false; foreach (Pullenti.Morph.MorphWordForm wf in r.WordForms) { if (wf.Class.IsProperSurname) { ok = true; break; } } if (!ok) { r.WordForms = new List <Pullenti.Morph.MorphWordForm>(r.WordForms); m_EngineRu.ProcessSurnameVariants(r.Term, r.WordForms); } } } } foreach (Pullenti.Morph.MorphToken r in res) { foreach (Pullenti.Morph.MorphWordForm mv in r.WordForms) { if (mv.NormalCase == null) { mv.NormalCase = r.Term; } } } for (i = 0; i < (res.Count - 2); i++) { if (res[i].CharInfo.IsLatinLetter && res[i].CharInfo.IsAllUpper && res[i].Length == 1) { if (twrch[res[i + 1].BeginChar].IsQuot && res[i + 2].CharInfo.IsLatinLetter && res[i + 2].Length > 2) { if ((res[i].EndChar + 1) == res[i + 1].BeginChar && (res[i + 1].EndChar + 1) == res[i + 2].BeginChar) { string wstr = string.Format("{0}{1}", res[i].Term, res[i + 2].Term); List <Pullenti.Morph.MorphWordForm> li = this.ProcessOneWord0(wstr); if (li != null) { res[i].WordForms = li; } res[i].EndChar = res[i + 2].EndChar; res[i].Term = wstr; if (res[i + 2].CharInfo.IsAllLower) { res[i].CharInfo.IsAllUpper = false; res[i].CharInfo.IsCapitalUpper = true; } else if (!res[i + 2].CharInfo.IsAllUpper) { res[i].CharInfo.IsAllUpper = false; } res.RemoveRange(i + 1, 2); } } } } for (i = 0; i < (res.Count - 1); i++) { if (!res[i].CharInfo.IsLetter && !res[i + 1].CharInfo.IsLetter && (res[i].EndChar + 1) == res[i + 1].BeginChar) { if (twrch[res[i].BeginChar].IsHiphen && twrch[res[i + 1].BeginChar].IsHiphen) { if (i == 0 || !twrch[res[i - 1].BeginChar].IsHiphen) { } else { continue; } if ((i + 2) == res.Count || !twrch[res[i + 2].BeginChar].IsHiphen) { } else { continue; } res[i].EndChar = res[i + 1].EndChar; res.RemoveAt(i + 1); } } } return(res); }
public AnalysisKit(Pullenti.Ner.SourceOfAnalysis sofa = null, bool onlyTokenizing = false, Pullenti.Morph.MorphLang lang = null, ProgressChangedEventHandler progress = null) { if (sofa == null) { return; } m_Sofa = sofa; StartDate = DateTime.Now; List <Pullenti.Morph.MorphToken> tokens = Pullenti.Morph.MorphologyService.Process(sofa.Text, lang, progress); Pullenti.Ner.Token t0 = null; if (tokens != null) { for (int ii = 0; ii < tokens.Count; ii++) { Pullenti.Morph.MorphToken mt = tokens[ii]; if (mt.BeginChar == 733860) { } Pullenti.Ner.TextToken tt = new Pullenti.Ner.TextToken(mt, this); if (sofa.CorrectionDict != null) { string corw; if (sofa.CorrectionDict.TryGetValue(mt.Term, out corw)) { List <Pullenti.Morph.MorphToken> ccc = Pullenti.Morph.MorphologyService.Process(corw, lang, null); if (ccc != null && ccc.Count == 1) { Pullenti.Ner.TextToken tt1 = new Pullenti.Ner.TextToken(ccc[0], this, tt.BeginChar, tt.EndChar) { Term0 = tt.Term }; tt1.Chars = tt.Chars; tt = tt1; if (CorrectedTokens == null) { CorrectedTokens = new Dictionary <Pullenti.Ner.Token, string>(); } CorrectedTokens.Add(tt, tt.GetSourceText()); } } } if (t0 == null) { FirstToken = tt; } else { t0.Next = tt; } t0 = tt; } } if (sofa.ClearDust) { this.ClearDust(); } if (sofa.DoWordsMergingByMorph) { this.CorrectWordsByMerging(lang); } if (sofa.DoWordCorrectionByMorph) { this.CorrectWordsByMorph(lang); } this.MergeLetters(); this.DefineBaseLanguage(); if (sofa.CreateNumberTokens) { for (Pullenti.Ner.Token t = FirstToken; t != null; t = t.Next) { Pullenti.Ner.NumberToken nt = NumberHelper.TryParseNumber(t); if (nt == null) { continue; } this.EmbedToken(nt); t = nt; } } if (onlyTokenizing) { return; } for (Pullenti.Ner.Token t = FirstToken; t != null; t = t.Next) { if (t.Morph.Class.IsPreposition) { continue; } Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary(); if (mc.IsUndefined && t.Chars.IsCyrillicLetter && t.LengthChar > 4) { string tail = sofa.Text.Substring(t.EndChar - 1, 2); Pullenti.Ner.Token tte = null; Pullenti.Ner.Token tt = t.Previous; if (tt != null && ((tt.IsCommaAnd || tt.Morph.Class.IsPreposition || tt.Morph.Class.IsConjunction))) { tt = tt.Previous; } if ((tt != null && !tt.GetMorphClassInDictionary().IsUndefined&& ((tt.Morph.Class.Value & t.Morph.Class.Value)) != 0) && tt.LengthChar > 4) { string tail2 = sofa.Text.Substring(tt.EndChar - 1, 2); if (tail2 == tail) { tte = tt; } } if (tte == null) { tt = t.Next; if (tt != null && ((tt.IsCommaAnd || tt.Morph.Class.IsPreposition || tt.Morph.Class.IsConjunction))) { tt = tt.Next; } if ((tt != null && !tt.GetMorphClassInDictionary().IsUndefined&& ((tt.Morph.Class.Value & t.Morph.Class.Value)) != 0) && tt.LengthChar > 4) { string tail2 = sofa.Text.Substring(tt.EndChar - 1, 2); if (tail2 == tail) { tte = tt; } } } if (tte != null) { t.Morph.RemoveItemsEx(tte.Morph, tte.GetMorphClassInDictionary()); } } continue; } this.CreateStatistics(); }
public TextToken(Pullenti.Morph.MorphToken source, Pullenti.Ner.Core.AnalysisKit kit, int bchar = -1, int echar = -1) : base(kit, (bchar >= 0 ? bchar : (source == null ? 0 : source.BeginChar)), (echar >= 0 ? echar : (source == null ? 0 : source.EndChar))) { if (source == null) { return; } Chars = source.CharInfo; Term = source.Term; Lemma = source.GetLemma() ?? Term; MaxLengthOfMorphVars = (short)Term.Length; Morph = new MorphCollection(); if (source.WordForms != null) { foreach (Pullenti.Morph.MorphWordForm wf in source.WordForms) { Morph.AddItem(wf); if (wf.NormalCase != null && (MaxLengthOfMorphVars < wf.NormalCase.Length)) { MaxLengthOfMorphVars = (short)wf.NormalCase.Length; } if (wf.NormalFull != null && (MaxLengthOfMorphVars < wf.NormalFull.Length)) { MaxLengthOfMorphVars = (short)wf.NormalFull.Length; } } } for (int i = 0; i < Term.Length; i++) { char ch = Term[i]; int j; for (j = 0; j < Morph.ItemsCount; j++) { Pullenti.Morph.MorphWordForm wf = Morph[j] as Pullenti.Morph.MorphWordForm; if (wf.NormalCase != null) { if (i >= wf.NormalCase.Length) { break; } if (wf.NormalCase[i] != ch) { break; } } if (wf.NormalFull != null) { if (i >= wf.NormalFull.Length) { break; } if (wf.NormalFull[i] != ch) { break; } } } if (j < Morph.ItemsCount) { break; } InvariantPrefixLengthOfMorphVars = (short)((i + 1)); } if (Morph.Language.IsUndefined && !source.Language.IsUndefined) { Morph.Language = source.Language; } }