/// <summary> /// Попытаться выделить предлог с указанного токена /// </summary> /// <param name="t">начальный токен</param> /// <return>результат или null</return> public static PrepositionToken TryParse(Pullenti.Ner.Token t) { if (!(t is Pullenti.Ner.TextToken)) { return(null); } TerminToken tok = m_Ontology.TryParse(t, TerminParseAttr.No); if (tok != null) { return new PrepositionToken(t, tok.EndToken) { Normal = tok.Termin.CanonicText, NextCase = (Pullenti.Morph.MorphCase)tok.Termin.Tag } } ; Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary(); if (!mc.IsPreposition) { return(null); } PrepositionToken res = new PrepositionToken(t, t); res.Normal = t.GetNormalCaseText(Pullenti.Morph.MorphClass.Preposition, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false); res.NextCase = Pullenti.Morph.LanguageHelper.GetCaseAfterPreposition(res.Normal); if ((t.Next != null && t.Next.IsHiphen && !t.IsWhitespaceAfter) && (t.Next.Next is Pullenti.Ner.TextToken) && t.Next.Next.GetMorphClassInDictionary().IsPreposition) { res.EndToken = t.Next.Next; } return(res); }
static AutoannoSentToken TryParse(Pullenti.Ner.Token t) { if (t == null || !Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t)) { return(null); } AutoannoSentToken res = new AutoannoSentToken(t, t); bool hasVerb = false; for (; t != null; t = t.Next) { if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t) && t != res.BeginToken) { break; } Pullenti.Ner.Referent r = t.GetReferent(); if (r is Pullenti.Ner.Keyword.KeywordReferent) { res.Rank += (r as Pullenti.Ner.Keyword.KeywordReferent).Rank; if ((r as Pullenti.Ner.Keyword.KeywordReferent).Typ == Pullenti.Ner.Keyword.KeywordType.Predicate) { hasVerb = true; } } else if (t is Pullenti.Ner.TextToken) { Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary(); if (mc.IsPronoun || mc.IsPersonalPronoun) { res.Rank -= 1; } else if (t.LengthChar > 1) { res.Rank -= 0.1; } } res.EndToken = t; } if (!hasVerb) { res.Rank /= 3; } res.Value = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(res, Pullenti.Ner.Core.GetTextAttr.KeepRegister | Pullenti.Ner.Core.GetTextAttr.KeepQuotes); return(res); }
void CorrectWordsByMerging(Pullenti.Morph.MorphLang lang) { for (Pullenti.Ner.Token t = FirstToken; t != null && t.Next != null; t = t.Next) { if (!t.Chars.IsLetter || (t.LengthChar < 2)) { continue; } Pullenti.Morph.MorphClass mc0 = t.GetMorphClassInDictionary(); if (t.Morph.ContainsAttr("прдктв.", null)) { continue; } Pullenti.Ner.Token t1 = t.Next; if (t1.IsHiphen && t1.Next != null && !t1.IsNewlineAfter) { t1 = t1.Next; } if (t1.LengthChar == 1) { continue; } if (!t1.Chars.IsLetter || !t.Chars.IsLetter || t1.Chars.IsLatinLetter != t.Chars.IsLatinLetter) { continue; } if (t1.Chars.IsAllUpper && !t.Chars.IsAllUpper) { continue; } else if (!t1.Chars.IsAllLower) { continue; } else if (t.Chars.IsAllUpper) { continue; } if (t1.Morph.ContainsAttr("прдктв.", null)) { continue; } Pullenti.Morph.MorphClass mc1 = t1.GetMorphClassInDictionary(); if (!mc1.IsUndefined && !mc0.IsUndefined) { continue; } if (((t as Pullenti.Ner.TextToken).Term.Length + (t1 as Pullenti.Ner.TextToken).Term.Length) < 6) { continue; } string corw = (t as Pullenti.Ner.TextToken).Term + (t1 as Pullenti.Ner.TextToken).Term; List <Pullenti.Morph.MorphToken> ccc = Pullenti.Morph.MorphologyService.Process(corw, lang, null); if (ccc == null || ccc.Count != 1) { continue; } if (corw == "ПОСТ" || corw == "ВРЕД") { continue; } Pullenti.Ner.TextToken tt = new Pullenti.Ner.TextToken(ccc[0], this, t.BeginChar, t1.EndChar); if (tt.GetMorphClassInDictionary().IsUndefined) { continue; } tt.Chars = t.Chars; if (t == FirstToken) { FirstToken = tt; } else { t.Previous.Next = tt; } if (t1.Next != null) { tt.Next = t1.Next; } t = tt; } }
public AnalysisKit(Pullenti.Ner.SourceOfAnalysis sofa = null, bool onlyTokenizing = false, Pullenti.Morph.MorphLang lang = null, ProgressChangedEventHandler progress = null) { if (sofa == null) { return; } m_Sofa = sofa; StartDate = DateTime.Now; List <Pullenti.Morph.MorphToken> tokens = Pullenti.Morph.MorphologyService.Process(sofa.Text, lang, progress); Pullenti.Ner.Token t0 = null; if (tokens != null) { for (int ii = 0; ii < tokens.Count; ii++) { Pullenti.Morph.MorphToken mt = tokens[ii]; if (mt.BeginChar == 733860) { } Pullenti.Ner.TextToken tt = new Pullenti.Ner.TextToken(mt, this); if (sofa.CorrectionDict != null) { string corw; if (sofa.CorrectionDict.TryGetValue(mt.Term, out corw)) { List <Pullenti.Morph.MorphToken> ccc = Pullenti.Morph.MorphologyService.Process(corw, lang, null); if (ccc != null && ccc.Count == 1) { Pullenti.Ner.TextToken tt1 = new Pullenti.Ner.TextToken(ccc[0], this, tt.BeginChar, tt.EndChar) { Term0 = tt.Term }; tt1.Chars = tt.Chars; tt = tt1; if (CorrectedTokens == null) { CorrectedTokens = new Dictionary <Pullenti.Ner.Token, string>(); } CorrectedTokens.Add(tt, tt.GetSourceText()); } } } if (t0 == null) { FirstToken = tt; } else { t0.Next = tt; } t0 = tt; } } if (sofa.ClearDust) { this.ClearDust(); } if (sofa.DoWordsMergingByMorph) { this.CorrectWordsByMerging(lang); } if (sofa.DoWordCorrectionByMorph) { this.CorrectWordsByMorph(lang); } this.MergeLetters(); this.DefineBaseLanguage(); if (sofa.CreateNumberTokens) { for (Pullenti.Ner.Token t = FirstToken; t != null; t = t.Next) { Pullenti.Ner.NumberToken nt = NumberHelper.TryParseNumber(t); if (nt == null) { continue; } this.EmbedToken(nt); t = nt; } } if (onlyTokenizing) { return; } for (Pullenti.Ner.Token t = FirstToken; t != null; t = t.Next) { if (t.Morph.Class.IsPreposition) { continue; } Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary(); if (mc.IsUndefined && t.Chars.IsCyrillicLetter && t.LengthChar > 4) { string tail = sofa.Text.Substring(t.EndChar - 1, 2); Pullenti.Ner.Token tte = null; Pullenti.Ner.Token tt = t.Previous; if (tt != null && ((tt.IsCommaAnd || tt.Morph.Class.IsPreposition || tt.Morph.Class.IsConjunction))) { tt = tt.Previous; } if ((tt != null && !tt.GetMorphClassInDictionary().IsUndefined&& ((tt.Morph.Class.Value & t.Morph.Class.Value)) != 0) && tt.LengthChar > 4) { string tail2 = sofa.Text.Substring(tt.EndChar - 1, 2); if (tail2 == tail) { tte = tt; } } if (tte == null) { tt = t.Next; if (tt != null && ((tt.IsCommaAnd || tt.Morph.Class.IsPreposition || tt.Morph.Class.IsConjunction))) { tt = tt.Next; } if ((tt != null && !tt.GetMorphClassInDictionary().IsUndefined&& ((tt.Morph.Class.Value & t.Morph.Class.Value)) != 0) && tt.LengthChar > 4) { string tail2 = sofa.Text.Substring(tt.EndChar - 1, 2); if (tail2 == tail) { tte = tt; } } } if (tte != null) { t.Morph.RemoveItemsEx(tte.Morph, tte.GetMorphClassInDictionary()); } } continue; } this.CreateStatistics(); }
public static ParenthesisToken TryAttach(Pullenti.Ner.Token t) { if (t == null) { return(null); } Pullenti.Ner.Core.TerminToken tok = m_Termins.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { ParenthesisToken res = new ParenthesisToken(t, tok.EndToken); return(res); } if (!(t is Pullenti.Ner.TextToken)) { return(null); } Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary(); bool ok = false; Pullenti.Ner.Token t1; if (mc.IsAdverb) { ok = true; } else if (mc.IsAdjective) { if (t.Morph.ContainsAttr("сравн.", null) && t.Morph.ContainsAttr("кач.прил.", null)) { ok = true; } } if (ok && t.Next != null) { if (t.Next.IsChar(',')) { return(new ParenthesisToken(t, t)); } t1 = t.Next; if (t1.GetMorphClassInDictionary() == Pullenti.Morph.MorphClass.Verb) { if (t1.Morph.ContainsAttr("н.вр.", null) && t1.Morph.ContainsAttr("нес.в.", null) && t1.Morph.ContainsAttr("дейст.з.", null)) { return(new ParenthesisToken(t, t1)); } } } t1 = null; if ((t.IsValue("В", null) && t.Next != null && t.Next.IsValue("СООТВЕТСТВИЕ", null)) && t.Next.Next != null && t.Next.Next.Morph.Class.IsPreposition) { t1 = t.Next.Next.Next; } else if (t.IsValue("СОГЛАСНО", null)) { t1 = t.Next; } else if (t.IsValue("В", null) && t.Next != null) { if (t.Next.IsValue("СИЛА", null)) { t1 = t.Next.Next; } else if (t.Next.Morph.Class.IsAdjective || t.Next.Morph.Class.IsPronoun) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { if (npt.Noun.IsValue("ВИД", null) || npt.Noun.IsValue("СЛУЧАЙ", null) || npt.Noun.IsValue("СФЕРА", null)) { return(new ParenthesisToken(t, npt.EndToken)); } } } } if (t1 != null) { if (t1.Next != null) { Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t1, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt1 != null) { if (npt1.Noun.IsValue("НОРМА", null) || npt1.Noun.IsValue("ПОЛОЖЕНИЕ", null) || npt1.Noun.IsValue("УКАЗАНИЕ", null)) { t1 = npt1.EndToken.Next; } } } Pullenti.Ner.Referent r = t1.GetReferent(); if (r != null) { ParenthesisToken res = new ParenthesisToken(t, t1) { Ref = r }; if (t1.Next != null && t1.Next.IsComma) { bool sila = false; for (Pullenti.Ner.Token ttt = t1.Next.Next; ttt != null; ttt = ttt.Next) { if (ttt.IsValue("СИЛА", null) || ttt.IsValue("ДЕЙСТВИЕ", null)) { sila = true; continue; } if (ttt.IsComma) { if (sila) { res.EndToken = ttt.Previous; } break; } if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(ttt, false, false)) { break; } } } return(res); } Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t1, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { return(new ParenthesisToken(t, npt.EndToken)); } } Pullenti.Ner.Token tt = t; if (tt.IsValue("НЕ", null) && t != null) { tt = tt.Next; } if (tt.Morph.Class.IsPreposition && tt != null) { tt = tt.Next; Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt1 != null) { tt = npt1.EndToken; if (tt.Next != null && tt.Next.IsComma) { return(new ParenthesisToken(t, tt.Next)); } if (npt1.Noun.IsValue("ОЧЕРЕДЬ", null)) { return(new ParenthesisToken(t, tt)); } } } if (t.IsValue("ВЕДЬ", null)) { return(new ParenthesisToken(t, t)); } return(null); }
/// <summary> /// Попытаться выделить союз с указанного токена. /// </summary> /// <param name="t">начальный токен</param> /// <return>результат или null</return> public static ConjunctionToken TryParse(Pullenti.Ner.Token t) { if (!(t is Pullenti.Ner.TextToken)) { return(null); } if (t.IsComma) { ConjunctionToken ne = TryParse(t.Next); if (ne != null) { ne.BeginToken = t; ne.IsSimple = false; return(ne); } return(new ConjunctionToken(t, t) { Typ = ConjunctionType.Comma, IsSimple = true, Normal = "," }); } TerminToken tok = m_Ontology.TryParse(t, TerminParseAttr.No); if (tok != null) { if (t.IsValue("ТО", null)) { NounPhraseToken npt = NounPhraseHelper.TryParse(t, NounPhraseParseAttr.ParseAdverbs, 0, null); if (npt != null && npt.EndChar > tok.EndToken.EndChar) { return(null); } } if (tok.Termin.Tag2 != null) { if (!(tok.EndToken is Pullenti.Ner.TextToken)) { return(null); } if (tok.EndToken.GetMorphClassInDictionary().IsVerb) { if (!(tok.EndToken as Pullenti.Ner.TextToken).Term.EndsWith("АЯ")) { return(null); } } } return(new ConjunctionToken(t, tok.EndToken) { Normal = tok.Termin.CanonicText, Typ = (ConjunctionType)tok.Termin.Tag }); } if (!t.GetMorphClassInDictionary().IsConjunction) { return(null); } if (t.IsAnd || t.IsOr) { ConjunctionToken res = new ConjunctionToken(t, t) { Normal = (t as Pullenti.Ner.TextToken).Term, IsSimple = true, Typ = (t.IsOr ? ConjunctionType.Or : ConjunctionType.And) }; if (((t.Next != null && t.Next.IsChar('(') && (t.Next.Next is Pullenti.Ner.TextToken)) && t.Next.Next.IsOr && t.Next.Next.Next != null) && t.Next.Next.Next.IsChar(')')) { res.EndToken = t.Next.Next.Next; } else if ((t.Next != null && t.Next.IsCharOf("\\/") && (t.Next.Next is Pullenti.Ner.TextToken)) && t.Next.Next.IsOr) { res.EndToken = t.Next.Next; } return(res); } string term = (t as Pullenti.Ner.TextToken).Term; if (term == "НИ") { return new ConjunctionToken(t, t) { Normal = term, Typ = ConjunctionType.Not } } ; if ((term == "А" || term == "НО" || term == "ЗАТО") || term == "ОДНАКО") { return new ConjunctionToken(t, t) { Normal = term, Typ = ConjunctionType.But } } ; return(null); }
public static OrgItemEponymToken TryAttach(Pullenti.Ner.Token t, bool mustHasPrefix = false) { Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; if (tt == null) { if (t == null) { return(null); } Pullenti.Ner.Referent r1 = t.GetReferent(); if (r1 != null && r1.TypeName == "DATE") { string str = r1.ToString().ToUpper(); if ((str == "1 МАЯ" || str == "7 ОКТЯБРЯ" || str == "9 МАЯ") || str == "8 МАРТА") { OrgItemEponymToken dt = new OrgItemEponymToken(t, t) { Eponyms = new List <string>() }; dt.Eponyms.Add(str); return(dt); } } Pullenti.Ner.NumberToken age = Pullenti.Ner.Core.NumberHelper.TryParseAge(t); if ((age != null && (((age.EndToken.Next is Pullenti.Ner.TextToken) || (age.EndToken.Next is Pullenti.Ner.ReferentToken))) && (age.WhitespacesAfterCount < 3)) && !age.EndToken.Next.Chars.IsAllLower && age.EndToken.Next.Chars.IsCyrillicLetter) { OrgItemEponymToken dt = new OrgItemEponymToken(t, age.EndToken.Next) { Eponyms = new List <string>() }; dt.Eponyms.Add(string.Format("{0} {1}", age.Value, dt.EndToken.GetSourceText().ToUpper())); return(dt); } return(null); } Pullenti.Ner.Token t1 = null; bool full = false; bool hasName = false; if (tt.Term == "ИМЕНИ" || tt.Term == "ІМЕНІ") { t1 = t.Next; full = true; hasName = true; } else if (((tt.Term == "ИМ" || tt.Term == "ІМ")) && tt.Next != null) { if (tt.Next.IsChar('.')) { t1 = tt.Next.Next; full = true; } else if ((tt.Next is Pullenti.Ner.TextToken) && tt.Chars.IsAllLower && !tt.Next.Chars.IsAllLower) { t1 = tt.Next; } hasName = true; } else if (tt.Previous != null && ((tt.Previous.IsValue("ФОНД", null) || tt.Previous.IsValue("ХРАМ", null) || tt.Previous.IsValue("ЦЕРКОВЬ", "ЦЕРКВА")))) { if ((!tt.Chars.IsCyrillicLetter || tt.Morph.Class.IsPreposition || tt.Morph.Class.IsConjunction) || !tt.Chars.IsLetter) { return(null); } if (tt.WhitespacesBeforeCount != 1) { return(null); } if (tt.Chars.IsAllLower) { return(null); } if (tt.Morph.Class.IsAdjective) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && npt.BeginToken != npt.EndToken) { return(null); } } OrgItemNameToken na = OrgItemNameToken.TryAttach(tt, null, false, true); if (na != null) { if (na.IsEmptyWord || na.IsStdName || na.IsStdTail) { return(null); } } t1 = tt; } if (t1 == null || ((t1.IsNewlineBefore && !full))) { return(null); } if (tt.Previous != null && tt.Previous.Morph.Class.IsPreposition) { return(null); } if (mustHasPrefix && !hasName) { return(null); } Pullenti.Ner.Referent r = t1.GetReferent(); if ((r != null && r.TypeName == "DATE" && full) && r.FindSlot("DAY", null, true) != null && r.FindSlot("YEAR", null, true) == null) { OrgItemEponymToken dt = new OrgItemEponymToken(t, t1) { Eponyms = new List <string>() }; dt.Eponyms.Add(r.ToString().ToUpper()); return(dt); } bool holy = false; if ((t1.IsValue("СВЯТОЙ", null) || t1.IsValue("СВЯТИЙ", null) || t1.IsValue("СВ", null)) || t1.IsValue("СВЯТ", null)) { t1 = t1.Next; holy = true; if (t1 != null && t1.IsChar('.')) { t1 = t1.Next; } } if (t1 == null) { return(null); } Pullenti.Morph.MorphClass cl = t1.GetMorphClassInDictionary(); if (cl.IsNoun || cl.IsAdjective) { Pullenti.Ner.ReferentToken rt = t1.Kit.ProcessReferent("PERSON", t1); if (rt != null && rt.Referent.TypeName == "PERSON" && rt.BeginToken != rt.EndToken) { string e = rt.Referent.GetStringValue("LASTNAME"); if (e != null) { if (rt.EndToken.IsValue(e, null)) { OrgItemEponymToken re = new OrgItemEponymToken(t, rt.EndToken); re.Eponyms.Add(rt.EndToken.GetSourceText()); return(re); } } } } Pullenti.Ner.NumberToken nt = Pullenti.Ner.Core.NumberHelper.TryParseAnniversary(t1); if (nt != null && nt.Typ == Pullenti.Ner.NumberSpellingType.Age) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(nt.EndToken.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { string s = string.Format("{0}-{1} {2}", nt.Value, (t.Kit.BaseLanguage.IsUa ? "РОКІВ" : "ЛЕТ"), Pullenti.Ner.Core.MiscHelper.GetTextValue(npt.BeginToken, npt.EndToken, Pullenti.Ner.Core.GetTextAttr.No)); OrgItemEponymToken res = new OrgItemEponymToken(t, npt.EndToken); res.Eponyms.Add(s); return(res); } } List <PersonItemToken> its = PersonItemToken.TryAttach(t1); if (its == null) { if ((t1 is Pullenti.Ner.ReferentToken) && (t1.GetReferent() is Pullenti.Ner.Geo.GeoReferent)) { string s = Pullenti.Ner.Core.MiscHelper.GetTextValue(t1, t1, Pullenti.Ner.Core.GetTextAttr.No); OrgItemEponymToken re = new OrgItemEponymToken(t, t1); re.Eponyms.Add(s); return(re); } return(null); } List <string> eponims = new List <string>(); int i = 0; int j; if (its[i].Typ == PersonItemType.LocaseWord) { i++; } if (i >= its.Count) { return(null); } if (!full) { if (its[i].BeginToken.Morph.Class.IsAdjective && !its[i].BeginToken.Morph.Class.IsProperSurname) { return(null); } } if (its[i].Typ == PersonItemType.Initial) { i++; while (true) { if ((i < its.Count) && its[i].Typ == PersonItemType.Initial) { i++; } if (i >= its.Count || ((its[i].Typ != PersonItemType.Surname && its[i].Typ != PersonItemType.Name))) { break; } eponims.Add(its[i].Value); t1 = its[i].EndToken; if ((i + 2) >= its.Count || its[i + 1].Typ != PersonItemType.And || its[i + 2].Typ != PersonItemType.Initial) { break; } i += 3; } } else if (((i + 1) < its.Count) && its[i].Typ == PersonItemType.Name && its[i + 1].Typ == PersonItemType.Surname) { eponims.Add(its[i + 1].Value); t1 = its[i + 1].EndToken; i += 2; if ((((i + 2) < its.Count) && its[i].Typ == PersonItemType.And && its[i + 1].Typ == PersonItemType.Name) && its[i + 2].Typ == PersonItemType.Surname) { eponims.Add(its[i + 2].Value); t1 = its[i + 2].EndToken; } } else if (its[i].Typ == PersonItemType.Surname) { if (its.Count == (i + 2) && its[i].Chars == its[i + 1].Chars) { its[i].Value += (" " + its[i + 1].Value); its[i].EndToken = its[i + 1].EndToken; its.RemoveAt(i + 1); } eponims.Add(its[i].Value); if (((i + 1) < its.Count) && its[i + 1].Typ == PersonItemType.Name) { if ((i + 2) == its.Count) { i++; } else if (its[i + 2].Typ != PersonItemType.Surname) { i++; } } else if (((i + 1) < its.Count) && its[i + 1].Typ == PersonItemType.Initial) { if ((i + 2) == its.Count) { i++; } else if (its[i + 2].Typ == PersonItemType.Initial && (i + 3) == its.Count) { i += 2; } } else if (((i + 2) < its.Count) && its[i + 1].Typ == PersonItemType.And && its[i + 2].Typ == PersonItemType.Surname) { bool ok = true; Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(its[i + 2].BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && !npt.Morph.Case.IsGenitive && !npt.Morph.Case.IsUndefined) { ok = false; } if (ok) { eponims.Add(its[i + 2].Value); i += 2; } } t1 = its[i].EndToken; } else if (its[i].Typ == PersonItemType.Name && holy) { t1 = its[i].EndToken; bool sec = false; if (((i + 1) < its.Count) && its[i].Chars == its[i + 1].Chars && its[i + 1].Typ != PersonItemType.Initial) { sec = true; t1 = its[i + 1].EndToken; } if (sec) { eponims.Add(string.Format("СВЯТ.{0} {1}", its[i].Value, its[i + 1].Value)); } else { eponims.Add(string.Format("СВЯТ.{0}", its[i].Value)); } } else if (full && (i + 1) == its.Count && ((its[i].Typ == PersonItemType.Name || its[i].Typ == PersonItemType.Surname))) { t1 = its[i].EndToken; eponims.Add(its[i].Value); } else if ((its[i].Typ == PersonItemType.Name && its.Count == 3 && its[i + 1].Typ == PersonItemType.Name) && its[i + 2].Typ == PersonItemType.Surname) { t1 = its[i + 2].EndToken; eponims.Add(string.Format("{0} {1} {2}", its[i].Value, its[i + 1].Value, its[i + 2].Value)); i += 2; } if (eponims.Count == 0) { return(null); } return(new OrgItemEponymToken(t, t1) { Eponyms = eponims }); }
public static List <SentItem> ParseNearItems(Pullenti.Ner.Token t, Pullenti.Ner.Token t1, int lev, List <SentItem> prev) { if (lev > 100) { return(null); } if (t == null || t.BeginChar > t1.EndChar) { return(null); } List <SentItem> res = new List <SentItem>(); if (t is Pullenti.Ner.ReferentToken) { res.Add(new SentItem(t as Pullenti.Ner.MetaToken)); return(res); } DelimToken delim = DelimToken.TryParse(t); if (delim != null) { res.Add(new SentItem(delim)); return(res); } Pullenti.Ner.Core.ConjunctionToken conj = Pullenti.Ner.Core.ConjunctionHelper.TryParse(t); if (conj != null) { res.Add(new SentItem(conj)); return(res); } Pullenti.Ner.Core.PrepositionToken prep = Pullenti.Ner.Core.PrepositionHelper.TryParse(t); Pullenti.Ner.Token t111 = (prep == null ? t : prep.EndToken.Next); if ((t111 is Pullenti.Ner.NumberToken) && ((t111.Morph.Class.IsAdjective && !t111.Morph.Class.IsNoun))) { t111 = null; } Pullenti.Ner.Measure.Internal.NumbersWithUnitToken num = (t111 == null ? null : Pullenti.Ner.Measure.Internal.NumbersWithUnitToken.TryParse(t111, null, false, false, false, false)); if (num != null) { if (num.Units.Count == 0) { Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(num.EndToken.Next, m_NptAttrs, 0, null); if (npt1 == null && num.EndToken.Next != null && num.EndToken.Next.IsValue("РАЗ", null)) { npt1 = new Pullenti.Ner.Core.NounPhraseToken(num.EndToken.Next, num.EndToken.Next); npt1.Noun = new Pullenti.Ner.MetaToken(num.EndToken.Next, num.EndToken.Next); } if (npt1 != null && prep != null) { if (npt1.Noun.EndToken.IsValue("РАЗ", null)) { npt1.Morph.RemoveItems(prep.NextCase); } else if (((npt1.Morph.Case & prep.NextCase)).IsUndefined) { npt1 = null; } else { npt1.Morph.RemoveItems(prep.NextCase); } } if ((npt1 != null && npt1.EndToken.IsValue("ОНИ", null) && npt1.Preposition != null) && npt1.Preposition.Normal == "ИЗ") { npt1.Morph = new Pullenti.Ner.MorphCollection(num.EndToken.Morph); npt1.Preposition = null; string nn = num.ToString(); SentItem si1 = new SentItem(npt1); if (nn == "1" && (num.EndToken is Pullenti.Ner.NumberToken) && (num.EndToken as Pullenti.Ner.NumberToken).EndToken.IsValue("ОДИН", null)) { Pullenti.Semantic.SemAttribute a = new Pullenti.Semantic.SemAttribute() { Typ = Pullenti.Semantic.SemAttributeType.OneOf, Spelling = (num.EndToken as Pullenti.Ner.NumberToken).EndToken.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false) }; SemAttributeEx aex = new SemAttributeEx(num) { Attr = a }; si1.Attrs = new List <SemAttributeEx>(); si1.Attrs.Add(aex); } else { si1.Quant = new Pullenti.Semantic.SemQuantity(nn, num.BeginToken, num.EndToken); } if (prep != null) { si1.Prep = prep.Normal; } res.Add(si1); return(res); } if (npt1 != null) { SentItem si1 = new SentItem(npt1) { Quant = new Pullenti.Semantic.SemQuantity(num.ToString(), num.BeginToken, num.EndToken) }; if (prep != null) { si1.Prep = prep.Normal; } if (npt1.EndToken.IsValue("РАЗ", null)) { si1.Typ = SentItemType.Formula; } if (((npt1.Morph.Number & Pullenti.Morph.MorphNumber.Plural)) == Pullenti.Morph.MorphNumber.Undefined && si1.Quant.Spelling != "1") { bool ok = false; if (si1.Quant.Spelling.EndsWith("1")) { ok = true; } else if (si1.Typ == SentItemType.Formula) { ok = true; } else if (si1.Quant.Spelling.EndsWith("2") && npt1.Morph.Case.IsGenitive) { ok = true; } else if (si1.Quant.Spelling.EndsWith("3") && npt1.Morph.Case.IsGenitive) { ok = true; } else if (si1.Quant.Spelling.EndsWith("4") && npt1.Morph.Case.IsGenitive) { ok = true; } if (ok) { npt1.Morph = new Pullenti.Ner.MorphCollection(); npt1.Morph.Number = Pullenti.Morph.MorphNumber.Plural; } } res.Add(si1); return(res); } } num.BeginToken = t; num.Morph = new Pullenti.Ner.MorphCollection(num.EndToken.Morph); SentItem si = new SentItem(num); if (prep != null) { si.Prep = prep.Normal; } res.Add(si); if (si.Prep == "НА") { AdverbToken aa = AdverbToken.TryParse(si.EndToken.Next); if (aa != null && ((aa.Typ == Pullenti.Semantic.SemAttributeType.Less || aa.Typ == Pullenti.Semantic.SemAttributeType.Great))) { si.AddAttr(aa); si.EndToken = aa.EndToken; } } return(res); } Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary(); AdverbToken adv = AdverbToken.TryParse(t); Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, m_NptAttrs, 0, null); if (npt != null && (npt.EndToken is Pullenti.Ner.TextToken) && (npt.EndToken as Pullenti.Ner.TextToken).Term == "БЫЛИ") { npt = null; } if (npt != null && adv != null) { if (adv.EndChar > npt.EndChar) { npt = null; } else if (adv.EndChar == npt.EndChar) { res.Add(new SentItem(npt)); res.Add(new SentItem(adv)); return(res); } } if (npt != null && npt.Adjectives.Count == 0) { if (npt.EndToken.IsValue("КОТОРЫЙ", null) && t.Previous != null && t.Previous.IsCommaAnd) { List <SentItem> res1 = ParseSubsent(npt, t1, lev + 1, prev); if (res1 != null) { return(res1); } } if (npt.EndToken.IsValue("СКОЛЬКО", null)) { Pullenti.Ner.Token tt1 = npt.EndToken.Next; if (tt1 != null && tt1.IsValue("ВСЕГО", null)) { tt1 = tt1.Next; } Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt1, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt1 != null && !npt1.Morph.Case.IsUndefined && prep != null) { if (((prep.NextCase & npt1.Morph.Case)).IsUndefined) { npt1 = null; } else { npt1.Morph.RemoveItems(prep.NextCase); } } if (npt1 != null) { npt1.BeginToken = npt.BeginToken; npt1.Preposition = npt.Preposition; npt1.Adjectives.Add(new Pullenti.Ner.MetaToken(npt.EndToken, npt.EndToken)); npt = npt1; } } if (npt.EndToken.Morph.Class.IsAdjective) { if (Pullenti.Ner.Core.VerbPhraseHelper.TryParse(t, true, false, false) != null) { npt = null; } } } Pullenti.Ner.Core.VerbPhraseToken vrb = null; if (npt != null && npt.Adjectives.Count > 0) { vrb = Pullenti.Ner.Core.VerbPhraseHelper.TryParse(t, true, false, false); if (vrb != null && vrb.FirstVerb.IsParticiple) { npt = null; } } else if (adv == null || npt != null) { vrb = Pullenti.Ner.Core.VerbPhraseHelper.TryParse(t, true, false, false); } if (npt != null) { res.Add(new SentItem(npt)); } if (vrb != null && !vrb.FirstVerb.IsParticiple && !vrb.FirstVerb.IsDeeParticiple) { List <Pullenti.Morph.MorphWordForm> vars = new List <Pullenti.Morph.MorphWordForm>(); foreach (Pullenti.Morph.MorphBaseInfo wf in vrb.FirstVerb.Morph.Items) { if (wf.Class.IsVerb && (wf is Pullenti.Morph.MorphWordForm) && (wf as Pullenti.Morph.MorphWordForm).IsInDictionary) { vars.Add(wf as Pullenti.Morph.MorphWordForm); } } if (vars.Count < 2) { res.Add(new SentItem(vrb)); } else { vrb.FirstVerb.VerbMorph = vars[0]; res.Add(new SentItem(vrb)); for (int i = 1; i < vars.Count; i++) { vrb = Pullenti.Ner.Core.VerbPhraseHelper.TryParse(t, false, false, false); if (vrb == null) { break; } vrb.FirstVerb.VerbMorph = vars[i]; res.Add(new SentItem(vrb)); } if (vars[0].Misc.Mood == Pullenti.Morph.MorphMood.Imperative && vars[1].Misc.Mood != Pullenti.Morph.MorphMood.Imperative) { SentItem rr = res[0]; res[0] = res[1]; res[1] = rr; } } return(res); } if (vrb != null) { List <SentItem> res1 = ParseParticiples(vrb, t1, lev + 1); if (res1 != null) { res.AddRange(res1); } } if (res.Count > 0) { return(res); } if (adv != null) { if (adv.Typ == Pullenti.Semantic.SemAttributeType.Other) { Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(adv.EndToken.Next, m_NptAttrs, 0, null); if (npt1 != null && npt1.EndToken.IsValue("ОНИ", null) && npt1.Preposition != null) { SentItem si1 = new SentItem(npt1); Pullenti.Semantic.SemAttribute a = new Pullenti.Semantic.SemAttribute() { Typ = Pullenti.Semantic.SemAttributeType.Other, Spelling = adv.EndToken.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false) }; SemAttributeEx aex = new SemAttributeEx(num) { Attr = a }; si1.Attrs = new List <SemAttributeEx>(); si1.Attrs.Add(aex); if (prep != null) { si1.Prep = prep.Normal; } res.Add(si1); return(res); } for (int i = prev.Count - 1; i >= 0; i--) { if (prev[i].Attrs != null) { foreach (SemAttributeEx a in prev[i].Attrs) { if (a.Attr.Typ == Pullenti.Semantic.SemAttributeType.OneOf) { SentItem si1 = new SentItem(prev[i].Source); Pullenti.Semantic.SemAttribute aa = new Pullenti.Semantic.SemAttribute() { Typ = Pullenti.Semantic.SemAttributeType.Other, Spelling = adv.EndToken.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false) }; SemAttributeEx aex = new SemAttributeEx(adv) { Attr = aa }; si1.Attrs = new List <SemAttributeEx>(); si1.Attrs.Add(aex); if (prep != null) { si1.Prep = prep.Normal; } si1.BeginToken = adv.BeginToken; si1.EndToken = adv.EndToken; res.Add(si1); return(res); } } } } } res.Add(new SentItem(adv)); return(res); } if (mc.IsAdjective) { npt = new Pullenti.Ner.Core.NounPhraseToken(t, t) { Morph = new Pullenti.Ner.MorphCollection(t.Morph) }; npt.Noun = new Pullenti.Ner.MetaToken(t, t); res.Add(new SentItem(npt)); return(res); } return(null); }
public static Pullenti.Ner.ReferentToken TryAttachOrg(Pullenti.Ner.Token t, bool canBeCyr = false) { if (t == null) { return(null); } bool br = false; if (t.IsChar('(') && t.Next != null) { t = t.Next; br = true; } if (t is Pullenti.Ner.NumberToken) { if ((t as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Words && t.Morph.Class.IsAdjective && t.Chars.IsCapitalUpper) { } else { return(null); } } else { if (t.Chars.IsAllLower) { return(null); } if ((t.LengthChar < 3) && !t.Chars.IsLetter) { return(null); } if (!t.Chars.IsLatinLetter) { if (!canBeCyr || !t.Chars.IsCyrillicLetter) { return(null); } } } Pullenti.Ner.Token t0 = t; Pullenti.Ner.Token t1 = t0; int namWo = 0; OrgItemEngItem tok = null; Pullenti.Ner.Geo.GeoReferent geo = null; OrgItemTypeToken addTyp = null; for (; t != null; t = t.Next) { if (t != t0 && t.WhitespacesBeforeCount > 1) { break; } if (t.IsChar(')')) { break; } if (t.IsChar('(') && t.Next != null) { if ((t.Next.GetReferent() is Pullenti.Ner.Geo.GeoReferent) && t.Next.Next != null && t.Next.Next.IsChar(')')) { geo = t.Next.GetReferent() as Pullenti.Ner.Geo.GeoReferent; t = t.Next.Next; continue; } OrgItemTypeToken typ = OrgItemTypeToken.TryAttach(t.Next, true, null); if ((typ != null && typ.EndToken.Next != null && typ.EndToken.Next.IsChar(')')) && typ.Chars.IsLatinLetter) { addTyp = typ; t = typ.EndToken.Next; continue; } if (((t.Next is Pullenti.Ner.TextToken) && t.Next.Next != null && t.Next.Next.IsChar(')')) && t.Next.Chars.IsCapitalUpper) { t1 = (t = t.Next.Next); continue; } break; } tok = TryAttach(t, canBeCyr); if (tok == null && t.IsCharOf(".,") && t.Next != null) { tok = TryAttach(t.Next, canBeCyr); if (tok == null && t.Next.IsCharOf(",.")) { tok = TryAttach(t.Next.Next, canBeCyr); } } if (tok != null) { if (tok.LengthChar == 1 && t0.Chars.IsCyrillicLetter) { return(null); } break; } if (t.IsHiphen && !t.IsWhitespaceAfter && !t.IsWhitespaceBefore) { continue; } if (t.IsCharOf("&+") || t.IsAnd) { continue; } if (t.IsChar('.')) { if (t.Previous != null && t.Previous.LengthChar == 1) { continue; } else if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t.Next)) { break; } } if (!t.Chars.IsLatinLetter) { if (!canBeCyr || !t.Chars.IsCyrillicLetter) { break; } } if (t.Chars.IsAllLower) { if (t.Morph.Class.IsPreposition || t.Morph.Class.IsConjunction) { continue; } if (br) { continue; } break; } Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary(); if (mc.IsVerb) { if (t.Next != null && t.Next.Morph.Class.IsPreposition) { break; } } if (t.Next != null && t.Next.IsValue("OF", null)) { break; } if (t is Pullenti.Ner.TextToken) { namWo++; } t1 = t; } if (tok == null) { return(null); } if (t0 == tok.BeginToken) { Pullenti.Ner.Core.BracketSequenceToken br2 = Pullenti.Ner.Core.BracketHelper.TryParse(tok.EndToken.Next, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br2 != null) { Pullenti.Ner.Org.OrganizationReferent org1 = new Pullenti.Ner.Org.OrganizationReferent(); if (tok.ShortValue != null) { org1.AddTypeStr(tok.ShortValue); } org1.AddTypeStr(tok.FullValue); string nam1 = Pullenti.Ner.Core.MiscHelper.GetTextValue(br2.BeginToken, br2.EndToken, Pullenti.Ner.Core.GetTextAttr.No); if (nam1 != null) { org1.AddName(nam1, true, null); return(new Pullenti.Ner.ReferentToken(org1, t0, br2.EndToken)); } } return(null); } Pullenti.Ner.Org.OrganizationReferent org = new Pullenti.Ner.Org.OrganizationReferent(); Pullenti.Ner.Token te = tok.EndToken; if (tok.IsBank) { t1 = tok.EndToken; } if (tok.FullValue == "company" && (tok.WhitespacesAfterCount < 3)) { OrgItemEngItem tok1 = TryAttach(tok.EndToken.Next, canBeCyr); if (tok1 != null) { t1 = tok.EndToken; tok = tok1; te = tok.EndToken; } } if (tok.FullValue == "company") { if (namWo == 0) { return(null); } } string nam = Pullenti.Ner.Core.MiscHelper.GetTextValue(t0, t1, Pullenti.Ner.Core.GetTextAttr.IgnoreArticles); if (nam == "STOCK" && tok.FullValue == "company") { return(null); } string altNam = null; if (string.IsNullOrEmpty(nam)) { return(null); } if (nam.IndexOf('(') > 0) { int i1 = nam.IndexOf('('); int i2 = nam.IndexOf(')'); if (i1 < i2) { altNam = nam; string tai = null; if ((i2 + 1) < nam.Length) { tai = nam.Substring(i2).Trim(); } nam = nam.Substring(0, i1).Trim(); if (tai != null) { nam = string.Format("{0} {1}", nam, tai); } } } if (tok.IsBank) { org.AddTypeStr((tok.Kit.BaseLanguage.IsEn ? "bank" : "банк")); org.AddProfile(Pullenti.Ner.Org.OrgProfile.Finance); if ((t1.Next != null && t1.Next.IsValue("OF", null) && t1.Next.Next != null) && t1.Next.Next.Chars.IsLatinLetter) { OrgItemNameToken nam0 = OrgItemNameToken.TryAttach(t1.Next, null, false, false); if (nam0 != null) { te = nam0.EndToken; } else { te = t1.Next.Next; } nam = Pullenti.Ner.Core.MiscHelper.GetTextValue(t0, te, Pullenti.Ner.Core.GetTextAttr.No); if (te.GetReferent() is Pullenti.Ner.Geo.GeoReferent) { org.AddGeoObject(te.GetReferent() as Pullenti.Ner.Geo.GeoReferent); } } else if (t0 == t1) { return(null); } } else { if (tok.ShortValue != null) { org.AddTypeStr(tok.ShortValue); } org.AddTypeStr(tok.FullValue); } if (string.IsNullOrEmpty(nam)) { return(null); } org.AddName(nam, true, null); if (altNam != null) { org.AddName(altNam, true, null); } Pullenti.Ner.ReferentToken res = new Pullenti.Ner.ReferentToken(org, t0, te); t = te; while (t.Next != null) { if (t.Next.IsCharOf(",.")) { t = t.Next; } else { break; } } if (t.WhitespacesAfterCount < 2) { tok = TryAttach(t.Next, canBeCyr); if (tok != null) { if (tok.ShortValue != null) { org.AddTypeStr(tok.ShortValue); } org.AddTypeStr(tok.FullValue); res.EndToken = tok.EndToken; } } if (geo != null) { org.AddGeoObject(geo); } if (addTyp != null) { org.AddType(addTyp, false); } if (!br) { return(res); } t = res.EndToken; if (t.Next == null || t.Next.IsChar(')')) { res.EndToken = t.Next; } else { return(null); } return(res); }
public static OrgItemNameToken TryAttach(Pullenti.Ner.Token t, OrgItemNameToken prev, bool extOnto, bool first) { if (t == null) { return(null); } if (t.IsValue("ОРДЕНА", null) && t.Next != null) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { Pullenti.Ner.Token t1 = npt.EndToken; if (((t1.IsValue("ЗНАК", null) || t1.IsValue("ДРУЖБА", null))) && (t1.WhitespacesAfterCount < 2)) { npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t1.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { t1 = npt.EndToken; } } return(new OrgItemNameToken(t, t1) { IsIgnoredPart = true }); } if (t.Next.GetMorphClassInDictionary().IsProperSurname) { return new OrgItemNameToken(t, t.Next) { IsIgnoredPart = true } } ; Pullenti.Ner.ReferentToken ppp = t.Kit.ProcessReferent("PERSON", t.Next); if (ppp != null) { return new OrgItemNameToken(t, ppp.EndToken) { IsIgnoredPart = true } } ; if ((t.WhitespacesAfterCount < 2) && Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(t.Next, true, false)) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t.Next, Pullenti.Ner.Core.BracketParseAttr.NearCloseBracket, 10); if (br != null && (br.LengthChar < 40)) { return new OrgItemNameToken(t, br.EndToken) { IsIgnoredPart = true } } ; } } if (first && t.Chars.IsCyrillicLetter && t.Morph.Class.IsPreposition) { if (!t.IsValue("ПО", null) && !t.IsValue("ПРИ", null)) { return(null); } } OrgItemNameToken res = _TryAttach(t, prev, extOnto); if (res == null) { if (extOnto) { if ((t.GetReferent() is Pullenti.Ner.Geo.GeoReferent) || (((t is Pullenti.Ner.TextToken) && !t.IsChar(';')))) { return new OrgItemNameToken(t, t) { Value = t.GetSourceText() } } ; } return(null); } if (prev == null && !extOnto) { if (t.Kit.Ontology != null) { Pullenti.Ner.Org.OrganizationAnalyzer.OrgAnalyzerData ad = t.Kit.Ontology._getAnalyzerData(Pullenti.Ner.Org.OrganizationAnalyzer.ANALYZER_NAME) as Pullenti.Ner.Org.OrganizationAnalyzer.OrgAnalyzerData; if (ad != null) { Pullenti.Ner.Core.TerminToken tok = ad.OrgPureNames.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null && tok.EndChar > res.EndChar) { res.EndToken = tok.EndToken; } } } } if (prev != null && !extOnto) { if ((prev.Chars.IsAllLower && !res.Chars.IsAllLower && !res.IsStdTail) && !res.IsStdName) { if (prev.Chars.IsLatinLetter && res.Chars.IsLatinLetter) { } else if (m_StdNouns.TryParse(res.BeginToken, Pullenti.Ner.Core.TerminParseAttr.No) != null) { } else { return(null); } } } if ((res.EndToken.Next != null && !res.EndToken.IsWhitespaceAfter && res.EndToken.Next.IsHiphen) && !res.EndToken.Next.IsWhitespaceAfter) { Pullenti.Ner.TextToken tt = res.EndToken.Next.Next as Pullenti.Ner.TextToken; if (tt != null) { if (tt.Chars == res.Chars || tt.Chars.IsAllUpper) { res.EndToken = tt; res.Value = string.Format("{0}-{1}", res.Value, tt.Term); } } } if ((res.EndToken.Next != null && res.EndToken.Next.IsAnd && res.EndToken.WhitespacesAfterCount == 1) && res.EndToken.Next.WhitespacesAfterCount == 1) { OrgItemNameToken res1 = _TryAttach(res.EndToken.Next.Next, prev, extOnto); if (res1 != null && res1.Chars == res.Chars && OrgItemTypeToken.TryAttach(res.EndToken.Next.Next, false, null) == null) { if (!((res1.Morph.Case & res.Morph.Case)).IsUndefined) { res.EndToken = res1.EndToken; res.Value = string.Format("{0} {1} {2}", res.Value, (res.Kit.BaseLanguage.IsUa ? "ТА" : "И"), res1.Value); } } } for (Pullenti.Ner.Token tt = res.BeginToken; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next) { if (m_StdNouns.TryParse(tt, Pullenti.Ner.Core.TerminParseAttr.No) != null) { res.StdOrgNameNouns++; } } if (m_StdNouns.TryParse(res.EndToken, Pullenti.Ner.Core.TerminParseAttr.No) != null) { int cou = 1; bool non = false; Pullenti.Ner.Token et = res.EndToken; if (!_isNotTermNoun(res.EndToken)) { non = true; } bool br = false; for (Pullenti.Ner.Token tt = res.EndToken.Next; tt != null; tt = tt.Next) { if (tt.IsTableControlChar) { break; } if (tt.IsChar('(')) { if (!non) { break; } br = true; continue; } if (tt.IsChar(')')) { br = false; et = tt; break; } if (!(tt is Pullenti.Ner.TextToken)) { break; } if (tt.WhitespacesBeforeCount > 1) { if (tt.NewlinesBeforeCount > 1) { break; } if (tt.Chars != res.EndToken.Chars) { break; } } if (tt.Morph.Class.IsPreposition || tt.IsCommaAnd) { continue; } Pullenti.Morph.MorphClass dd = tt.GetMorphClassInDictionary(); if (!dd.IsNoun && !dd.IsAdjective) { break; } Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt2 == null) { if (dd == Pullenti.Morph.MorphClass.Adjective) { continue; } break; } if (m_StdNouns.TryParse(npt2.EndToken, Pullenti.Ner.Core.TerminParseAttr.No) == null) { break; } if (npt2.EndToken.Chars != res.EndToken.Chars) { break; } if ((npt2.EndToken.IsValue("УПРАВЛЕНИЕ", null) || npt2.EndToken.IsValue("ИНСТИТУТ", null) || npt2.EndToken.IsValue("УПРАВЛІННЯ", null)) || npt2.EndToken.IsValue("ІНСТИТУТ", null) || tt.Previous.IsValue("ПРИ", null)) { Pullenti.Ner.ReferentToken rt = tt.Kit.ProcessReferent(Pullenti.Ner.Org.OrganizationAnalyzer.ANALYZER_NAME, tt); if (rt != null) { break; } } cou++; tt = npt2.EndToken; if (!_isNotTermNoun(tt)) { non = true; et = tt; } } if (non && !br) { res.StdOrgNameNouns += cou; res.EndToken = et; } } return(res); }
static OrgItemNameToken _TryAttach(Pullenti.Ner.Token t, OrgItemNameToken prev, bool extOnto) { if (t == null) { return(null); } Pullenti.Ner.Referent r = t.GetReferent(); if (r != null) { if (r.TypeName == "DENOMINATION") { return new OrgItemNameToken(t, t) { Value = r.ToString(true, t.Kit.BaseLanguage, 0), IsDenomination = true } } ; if ((r is Pullenti.Ner.Geo.GeoReferent) && t.Chars.IsLatinLetter) { OrgItemNameToken res2 = _TryAttach(t.Next, prev, extOnto); if (res2 != null && res2.Chars.IsLatinLetter) { res2.BeginToken = t; res2.Value = string.Format("{0} {1}", Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(t as Pullenti.Ner.MetaToken, Pullenti.Ner.Core.GetTextAttr.No), res2.Value); res2.IsInDictionary = false; return(res2); } } return(null); } Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; if (tt == null) { return(null); } OrgItemNameToken res = null; Pullenti.Ner.Core.TerminToken tok = m_StdTails.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok == null && t.IsChar(',')) { tok = m_StdTails.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No); } if (tok != null) { return new OrgItemNameToken(t, tok.EndToken) { Value = tok.Termin.CanonicText, IsStdTail = tok.Termin.Tag == null, IsEmptyWord = tok.Termin.Tag != null, Morph = tok.Morph } } ; if ((((tok = m_StdNames.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No)))) != null) { return new OrgItemNameToken(t, tok.EndToken) { Value = tok.Termin.CanonicText, IsStdName = true } } ; OrgItemEngItem eng = OrgItemEngItem.TryAttach(t, false); if (eng == null && t.IsChar(',')) { eng = OrgItemEngItem.TryAttach(t.Next, false); } if (eng != null) { return new OrgItemNameToken(t, eng.EndToken) { Value = eng.FullValue, IsStdTail = true } } ; if (tt.Chars.IsAllLower && prev != null) { if (!prev.Chars.IsAllLower && !prev.Chars.IsCapitalUpper) { return(null); } } if (tt.IsChar(',') && prev != null) { Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt1 == null || npt1.Chars != prev.Chars || ((npt1.Morph.Case & prev.Morph.Case)).IsUndefined) { return(null); } OrgItemTypeToken ty = OrgItemTypeToken.TryAttach(t.Next, false, null); if (ty != null) { return(null); } if (npt1.EndToken.Next == null || !npt1.EndToken.Next.IsValue("И", null)) { return(null); } Pullenti.Ner.Token t1 = npt1.EndToken.Next; Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t1.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt2 == null || npt2.Chars != prev.Chars || ((npt2.Morph.Case & npt1.Morph.Case & prev.Morph.Case)).IsUndefined) { return(null); } ty = OrgItemTypeToken.TryAttach(t1.Next, false, null); if (ty != null) { return(null); } res = new OrgItemNameToken(npt1.BeginToken, npt1.EndToken) { Morph = npt1.Morph, Value = npt1.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false) }; res.IsNounPhrase = true; res.IsAfterConjunction = true; if (prev.Preposition != null) { res.Preposition = prev.Preposition; } return(res); } if (((tt.IsChar('&') || tt.IsValue("AND", null) || tt.IsValue("UND", null))) && prev != null) { if ((tt.Next is Pullenti.Ner.TextToken) && tt.LengthChar == 1 && tt.Next.Chars.IsLatinLetter) { res = new OrgItemNameToken(tt, tt.Next) { Chars = tt.Next.Chars }; res.IsAfterConjunction = true; res.Value = "& " + (tt.Next as Pullenti.Ner.TextToken).Term; return(res); } res = OrgItemNameToken.TryAttach(tt.Next, null, extOnto, false); if (res == null || res.Chars != prev.Chars) { return(null); } res.IsAfterConjunction = true; res.Value = "& " + res.Value; return(res); } if (!tt.Chars.IsLetter) { return(null); } List <Pullenti.Semantic.Utils.DerivateGroup> expinf = null; if (prev != null && prev.EndToken.GetMorphClassInDictionary().IsNoun) { string wo = prev.EndToken.GetNormalCaseText(Pullenti.Morph.MorphClass.Noun, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false); expinf = Pullenti.Semantic.Utils.DerivateService.FindDerivates(wo, true, prev.EndToken.Morph.Language); } Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && npt.InternalNoun != null) { npt = null; } bool explOk = false; if (npt != null && prev != null && prev.EndToken.GetMorphClassInDictionary().IsNoun) { Pullenti.Ner.Core.NounPhraseToken npt0 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(prev.EndToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt0 != null) { List <Pullenti.Semantic.Core.SemanticLink> links = Pullenti.Semantic.Core.SemanticHelper.TryCreateLinks(npt0, npt, null); if (links.Count > 0) { explOk = true; } } } if (npt != null && ((explOk || npt.Morph.Case.IsGenitive || ((prev != null && !((prev.Morph.Case & npt.Morph.Case)).IsUndefined))))) { Pullenti.Morph.MorphClass mc = npt.BeginToken.GetMorphClassInDictionary(); if (mc.IsVerb || mc.IsPronoun) { return(null); } if (mc.IsAdverb) { if (npt.BeginToken.Next != null && npt.BeginToken.Next.IsHiphen) { } else { return(null); } } if (mc.IsPreposition) { return(null); } if (mc.IsNoun && npt.Chars.IsAllLower) { Pullenti.Morph.MorphCase ca = npt.Morph.Case; if ((!ca.IsDative && !ca.IsGenitive && !ca.IsInstrumental) && !ca.IsPrepositional) { return(null); } } res = new OrgItemNameToken(npt.BeginToken, npt.EndToken) { Morph = npt.Morph, Value = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false) }; res.IsNounPhrase = true; if ((npt.EndToken.WhitespacesAfterCount < 2) && (npt.EndToken.Next is Pullenti.Ner.TextToken)) { Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(npt.EndToken.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt2 != null && npt2.Morph.Case.IsGenitive && npt2.Chars.IsAllLower) { OrgItemTypeToken typ = OrgItemTypeToken.TryAttach(npt.EndToken.Next, true, null); OrgItemEponymToken epo = OrgItemEponymToken.TryAttach(npt.EndToken.Next, false); Pullenti.Ner.ReferentToken rtt = t.Kit.ProcessReferent("PERSONPROPERTY", npt.EndToken.Next); if (typ == null && epo == null && ((rtt == null || rtt.Morph.Number == Pullenti.Morph.MorphNumber.Plural))) { res.EndToken = npt2.EndToken; res.Value = string.Format("{0} {1}", res.Value, Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(npt2, Pullenti.Ner.Core.GetTextAttr.No)); } } else if (npt.EndToken.Next.IsComma && (npt.EndToken.Next.Next is Pullenti.Ner.TextToken)) { Pullenti.Ner.Token tt2 = npt.EndToken.Next.Next; Pullenti.Morph.MorphClass mv2 = tt2.GetMorphClassInDictionary(); if (mv2.IsAdjective && mv2.IsVerb) { Pullenti.Morph.MorphBaseInfo bi = new Pullenti.Morph.MorphBaseInfo() { Case = npt.Morph.Case, Gender = npt.Morph.Gender, Number = npt.Morph.Number }; if (tt2.Morph.CheckAccord(bi, false, false)) { npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt2.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt2 != null && ((npt2.Morph.Case.IsDative || npt2.Morph.Case.IsGenitive)) && npt2.Chars.IsAllLower) { res.EndToken = npt2.EndToken; res.Value = string.Format("{0} {1}", res.Value, Pullenti.Ner.Core.MiscHelper.GetTextValue(npt.EndToken.Next, res.EndToken, Pullenti.Ner.Core.GetTextAttr.No)); } } } } } if (explOk) { res.IsAfterConjunction = true; } } else if (npt != null && ((((prev != null && prev.IsNounPhrase && npt.Morph.Case.IsInstrumental)) || extOnto))) { res = new OrgItemNameToken(npt.BeginToken, npt.EndToken) { Morph = npt.Morph, Value = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false) }; res.IsNounPhrase = true; } else if (tt.IsAnd) { res = TryAttach(tt.Next, prev, extOnto, false); if (res == null || !res.IsNounPhrase || prev == null) { return(null); } if (((prev.Morph.Case & res.Morph.Case)).IsUndefined) { return(null); } if (prev.Morph.Number != Pullenti.Morph.MorphNumber.Undefined && res.Morph.Number != Pullenti.Morph.MorphNumber.Undefined) { if (((prev.Morph.Number & res.Morph.Number)) == Pullenti.Morph.MorphNumber.Undefined) { if (prev.Chars != res.Chars) { return(null); } OrgItemTypeToken ty = OrgItemTypeToken.TryAttach(res.EndToken.Next, false, null); if (ty != null) { return(null); } } } Pullenti.Morph.CharsInfo ci = res.Chars; res.Chars = ci; res.IsAfterConjunction = true; return(res); } else if (((tt.Term == "ПО" || tt.Term == "ПРИ" || tt.Term == "ЗА") || tt.Term == "С" || tt.Term == "В") || tt.Term == "НА") { npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { if (m_VervotWords.TryParse(npt.EndToken, Pullenti.Ner.Core.TerminParseAttr.No) != null) { return(null); } bool ok = false; if (tt.Term == "ПО") { ok = npt.Morph.Case.IsDative; } else if (tt.Term == "С") { ok = npt.Morph.Case.IsInstrumental; } else if (tt.Term == "ЗА") { ok = npt.Morph.Case.IsGenitive | npt.Morph.Case.IsInstrumental; } else if (tt.Term == "НА") { ok = npt.Morph.Case.IsPrepositional; } else if (tt.Term == "В") { ok = npt.Morph.Case.IsDative | npt.Morph.Case.IsPrepositional; if (ok) { ok = false; if (t.Next.IsValue("СФЕРА", null) || t.Next.IsValue("ОБЛАСТЬ", null)) { ok = true; } } } else if (tt.Term == "ПРИ") { ok = npt.Morph.Case.IsPrepositional; if (ok) { if (OrgItemTypeToken.TryAttach(tt.Next, true, null) != null) { ok = false; } else { Pullenti.Ner.ReferentToken rt = tt.Kit.ProcessReferent(Pullenti.Ner.Org.OrganizationAnalyzer.ANALYZER_NAME, tt.Next); if (rt != null) { ok = false; } } } string s = npt.Noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false); if (s == "ПОДДЕРЖКА" || s == "УЧАСТИЕ") { ok = false; } } else { ok = npt.Morph.Case.IsPrepositional; } if (ok) { res = new OrgItemNameToken(t, npt.EndToken) { Morph = npt.Morph, Value = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false), Chars = npt.Chars }; res.IsNounPhrase = true; res.Preposition = tt.Term; if (((res.Value == "ДЕЛО" || res.Value == "ВОПРОС")) && !res.IsNewlineAfter) { OrgItemNameToken res2 = _TryAttach(res.EndToken.Next, res, extOnto); if (res2 != null && res2.Morph.Case.IsGenitive) { res.Value = string.Format("{0} {1}", res.Value, res2.Value); res.EndToken = res2.EndToken; for (Pullenti.Ner.Token ttt = res2.EndToken.Next; ttt != null; ttt = ttt.Next) { if (!ttt.IsCommaAnd) { break; } OrgItemNameToken res3 = _TryAttach(ttt.Next, res2, extOnto); if (res3 == null) { break; } res.Value = string.Format("{0} {1}", res.Value, res3.Value); res.EndToken = res3.EndToken; if (ttt.IsAnd) { break; } ttt = res.EndToken; } } } } } if (res == null) { return(null); } } else if (tt.Term == "OF") { Pullenti.Ner.Token t1 = tt.Next; if (t1 != null && Pullenti.Ner.Core.MiscHelper.IsEngArticle(t1)) { t1 = t1.Next; } if (t1 != null && t1.Chars.IsLatinLetter && !t1.Chars.IsAllLower) { res = new OrgItemNameToken(t, t1) { Chars = t1.Chars, Morph = t1.Morph }; for (Pullenti.Ner.Token ttt = t1.Next; ttt != null; ttt = ttt.Next) { if (ttt.WhitespacesBeforeCount > 2) { break; } if (Pullenti.Ner.Core.MiscHelper.IsEngAdjSuffix(ttt)) { ttt = ttt.Next; continue; } if (!ttt.Chars.IsLatinLetter) { break; } if (ttt.Morph.Class.IsPreposition) { break; } t1 = (res.EndToken = ttt); } res.Value = Pullenti.Ner.Core.MiscHelper.GetTextValue(t, t1, Pullenti.Ner.Core.GetTextAttr.IgnoreArticles); res.Preposition = tt.Term; return(res); } } if (res == null) { if (tt.Chars.IsLatinLetter && tt.LengthChar == 1) { } else if (tt.Chars.IsAllLower || (tt.LengthChar < 2)) { if (!tt.Chars.IsLatinLetter || prev == null || !prev.Chars.IsLatinLetter) { return(null); } } if (tt.Chars.IsCyrillicLetter) { Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary(); if (mc.IsVerb || mc.IsAdverb) { return(null); } } else if (tt.Chars.IsLatinLetter && !tt.IsWhitespaceAfter) { if (!tt.IsWhitespaceAfter && (tt.LengthChar < 5)) { if (tt.Next is Pullenti.Ner.NumberToken) { return(null); } } } res = new OrgItemNameToken(tt, tt) { Value = tt.Term, Morph = tt.Morph }; for (t = tt.Next; t != null; t = t.Next) { if ((((t.IsHiphen || t.IsCharOf("\\/"))) && t.Next != null && (t.Next is Pullenti.Ner.TextToken)) && !t.IsWhitespaceBefore && !t.IsWhitespaceAfter) { t = t.Next; res.EndToken = t; res.Value = string.Format("{0}{1}{2}", res.Value, (t.Previous.IsChar('.') ? '.' : '-'), (t as Pullenti.Ner.TextToken).Term); } else if (t.IsChar('.')) { if (!t.IsWhitespaceAfter && !t.IsWhitespaceBefore && (t.Next is Pullenti.Ner.TextToken)) { res.EndToken = t.Next; t = t.Next; res.Value = string.Format("{0}.{1}", res.Value, (t as Pullenti.Ner.TextToken).Term); } else if ((t.Next != null && !t.IsNewlineAfter && t.Next.Chars.IsLatinLetter) && tt.Chars.IsLatinLetter) { res.EndToken = t; } else { break; } } else { break; } } } for (Pullenti.Ner.Token t0 = res.BeginToken; t0 != null; t0 = t0.Next) { if ((((tt = t0 as Pullenti.Ner.TextToken))) != null && tt.IsLetters) { if (!tt.Morph.Class.IsConjunction && !tt.Morph.Class.IsPreposition) { foreach (Pullenti.Morph.MorphBaseInfo mf in tt.Morph.Items) { if ((mf as Pullenti.Morph.MorphWordForm).IsInDictionary) { res.IsInDictionary = true; } } } } if (t0 == res.EndToken) { break; } } if (res.BeginToken == res.EndToken && res.BeginToken.Chars.IsAllUpper) { if (res.EndToken.Next != null && !res.EndToken.IsWhitespaceAfter) { Pullenti.Ner.Token t1 = res.EndToken.Next; if (t1.Next != null && !t1.IsWhitespaceAfter && t1.IsHiphen) { t1 = t1.Next; } if (t1 is Pullenti.Ner.NumberToken) { res.Value += (t1 as Pullenti.Ner.NumberToken).Value; res.EndToken = t1; } } } if (res.BeginToken == res.EndToken && res.BeginToken.Chars.IsLastLower) { string src = res.BeginToken.GetSourceText(); for (int i = src.Length - 1; i >= 0; i--) { if (char.IsUpper(src[i])) { res.Value = src.Substring(0, i + 1); break; } } } return(res); }
public static AdverbToken TryParse(Pullenti.Ner.Token t) { if (t == null) { return(null); } if ((t is Pullenti.Ner.TextToken) && (t as Pullenti.Ner.TextToken).Term == "НЕ") { AdverbToken nn = TryParse(t.Next); if (nn != null) { nn.Not = true; nn.BeginToken = t; return(nn); } } Pullenti.Ner.Token t0 = t; Pullenti.Ner.Token t1; if (t.Next != null && t.Morph.Class.IsPreposition) { t = t.Next; } if (t.IsValue("ДРУГ", null) || t.IsValue("САМ", null)) { t1 = t.Next; if (t1 != null && t1.Morph.Class.IsPreposition) { t1 = t1.Next; } if (t1 != null) { if (t1.IsValue("ДРУГ", null) && t.IsValue("ДРУГ", null)) { return new AdverbToken(t0, t1) { Typ = Pullenti.Semantic.SemAttributeType.EachOther } } ; if (t1.IsValue("СЕБЯ", null) && t.IsValue("САМ", null)) { return new AdverbToken(t0, t1) { Typ = Pullenti.Semantic.SemAttributeType.Himelf } } ; } } Pullenti.Ner.Core.TerminToken tok = m_Termins.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { AdverbToken res = new AdverbToken(t0, tok.EndToken) { Typ = (Pullenti.Semantic.SemAttributeType)tok.Termin.Tag }; t = res.EndToken.Next; if (t != null && t.IsComma) { t = t.Next; } if (res.Typ == Pullenti.Semantic.SemAttributeType.Less || res.Typ == Pullenti.Semantic.SemAttributeType.Great) { if (t != null && t.IsValue("ЧЕМ", null)) { res.EndToken = t; } } return(res); } Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary(); if (mc.IsAdverb) { return(new AdverbToken(t, t)); } if (t.IsValue("ВСТРЕЧА", null) && t.Previous != null && t.Previous.IsValue("НА", null)) { AdverbToken ne = TryParse(t.Next); if (ne != null && ne.Typ == Pullenti.Semantic.SemAttributeType.EachOther) { return(new AdverbToken(t.Previous, t)); } } return(null); }
/// <summary> /// Попробовать восстановить последовательность, обрамляемую кавычками или скобками. Поддерживается /// вложенность, возможность отсутствия закрывающего элемента и др. /// </summary> /// <param name="t">начальный токен</param> /// <param name="attrs">параметры выделения</param> /// <param name="maxTokens">максимально токенов (вдруг забыли закрывающую кавычку)</param> /// <return>метатокен BracketSequenceToken</return> public static BracketSequenceToken TryParse(Pullenti.Ner.Token t, BracketParseAttr attrs = BracketParseAttr.No, int maxTokens = 100) { Pullenti.Ner.Token t0 = t; int cou = 0; if (!CanBeStartOfSequence(t0, false, false)) { return(null); } List <Bracket> brList = new List <Bracket>(); brList.Add(new Bracket(t0)); cou = 0; int crlf = 0; Pullenti.Ner.Token last = null; int lev = 1; bool isAssim = brList[0].Char != '«' && m_AssymOPenChars.IndexOf(brList[0].Char) >= 0; bool genCase = false; for (t = t0.Next; t != null; t = t.Next) { if (t.IsTableControlChar) { break; } last = t; if (t.IsCharOf(m_OpenChars) || t.IsCharOf(m_CloseChars)) { if (t.IsNewlineBefore && ((attrs & BracketParseAttr.CanBeManyLines)) == BracketParseAttr.No) { if (t.WhitespacesBeforeCount > 10 || CanBeStartOfSequence(t, false, false)) { if (t.IsChar('(') && !t0.IsChar('(')) { } else { last = t.Previous; break; } } } Bracket bb = new Bracket(t); brList.Add(bb); if (brList.Count > 20) { break; } if ((brList.Count == 3 && brList[1].CanBeOpen && bb.CanBeClose) && MustBeCloseChar(bb.Char, brList[1].Char) && MustBeCloseChar(bb.Char, brList[0].Char)) { bool ok = false; for (Pullenti.Ner.Token tt = t.Next; tt != null; tt = tt.Next) { if (tt.IsNewlineBefore) { break; } if (tt.IsChar(',')) { break; } if (tt.IsChar('.')) { for (tt = tt.Next; tt != null; tt = tt.Next) { if (tt.IsNewlineBefore) { break; } else if (tt.IsCharOf(m_OpenChars) || tt.IsCharOf(m_CloseChars)) { Bracket bb2 = new Bracket(tt); if (BracketHelper.CanBeEndOfSequence(tt, false, null, false) && CanBeCloseChar(bb2.Char, brList[0].Char)) { ok = true; } break; } } break; } if (t.IsCharOf(m_OpenChars) || t.IsCharOf(m_CloseChars)) { ok = true; break; } } if (!ok) { break; } } if (isAssim) { if (bb.CanBeOpen && !bb.CanBeClose && bb.Char == brList[0].Char) { lev++; } else if (bb.CanBeClose && !bb.CanBeOpen && m_OpenChars.IndexOf(brList[0].Char) == m_CloseChars.IndexOf(bb.Char)) { lev--; if (lev == 0) { break; } } } } else { if ((++cou) > maxTokens) { break; } if (((attrs & BracketParseAttr.CanContainsVerbs)) == BracketParseAttr.No) { if (t.Morph.Language.IsCyrillic) { if (t.GetMorphClassInDictionary() == Pullenti.Morph.MorphClass.Verb) { if (!t.Morph.Class.IsAdjective && !t.Morph.ContainsAttr("страд.з.", null)) { if (t.Chars.IsAllLower) { string norm = t.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false); if (!Pullenti.Morph.LanguageHelper.EndsWith(norm, "СЯ")) { if (brList.Count > 1) { break; } if (brList[0].Char != '(') { break; } } } } } } else if (t.Morph.Language.IsEn) { if (t.Morph.Class == Pullenti.Morph.MorphClass.Verb && t.Chars.IsAllLower) { break; } } Pullenti.Ner.Referent r = t.GetReferent(); if (r != null && r.TypeName == "ADDRESS") { if (!t0.IsChar('(')) { break; } } } } if (((attrs & BracketParseAttr.CanBeManyLines)) != BracketParseAttr.No) { if (t.IsNewlineBefore) { if (t.NewlinesBeforeCount > 1) { break; } crlf++; } continue; } if (t.IsNewlineBefore) { if (t.WhitespacesBeforeCount > 15) { last = t.Previous; break; } crlf++; if (!t.Chars.IsAllLower) { if (MiscHelper.CanBeStartOfSentence(t)) { bool has = false; for (Pullenti.Ner.Token tt = t.Next; tt != null; tt = tt.Next) { if (tt.IsNewlineBefore) { break; } else if (tt.LengthChar == 1 && tt.IsCharOf(m_OpenChars) && tt.IsWhitespaceBefore) { break; } else if (tt.LengthChar == 1 && tt.IsCharOf(m_CloseChars) && !tt.IsWhitespaceBefore) { has = true; break; } } if (!has) { last = t.Previous; break; } } } if ((t.Previous is Pullenti.Ner.MetaToken) && CanBeEndOfSequence((t.Previous as Pullenti.Ner.MetaToken).EndToken, false, null, false)) { last = t.Previous; break; } } if (crlf > 1) { if (brList.Count > 1) { break; } if (crlf > 10) { break; } } if (t.IsChar(';') && t.IsNewlineAfter) { break; } NounPhraseToken npt = NounPhraseHelper.TryParse(t, NounPhraseParseAttr.No, 0, null); if (npt != null) { if (t.IsNewlineBefore) { genCase = npt.Morph.Case.IsGenitive; } last = (t = npt.EndToken); } } if ((brList.Count == 1 && brList[0].CanBeOpen && (last is Pullenti.Ner.MetaToken)) && last.IsNewlineAfter) { if (BracketHelper.CanBeEndOfSequence((last as Pullenti.Ner.MetaToken).EndToken, false, null, false)) { return(new BracketSequenceToken(t0, last)); } } if ((brList.Count == 1 && brList[0].CanBeOpen && genCase) && last.IsNewlineAfter && crlf <= 2) { return(new BracketSequenceToken(t0, last)); } if (brList.Count < 1) { return(null); } for (int i = 1; i < (brList.Count - 1); i++) { if (brList[i].Char == '<' && brList[i + 1].Char == '>') { brList[i].CanBeOpen = true; brList[i + 1].CanBeClose = true; } } List <BracketSequenceToken> internals = null; while (brList.Count > 3) { int i = brList.Count - 1; if ((brList[i].CanBeClose && brList[i - 1].CanBeOpen && !CanBeCloseChar(brList[i].Char, brList[0].Char)) && CanBeCloseChar(brList[i].Char, brList[i - 1].Char)) { brList.RemoveRange(brList.Count - 2, 2); continue; } break; } while (brList.Count >= 4) { bool changed = false; for (int i = 1; i < (brList.Count - 2); i++) { if ((brList[i].CanBeOpen && !brList[i].CanBeClose && brList[i + 1].CanBeClose) && !brList[i + 1].CanBeOpen) { bool ok = false; if (MustBeCloseChar(brList[i + 1].Char, brList[i].Char) || brList[i].Char != brList[0].Char) { ok = true; if ((i == 1 && ((i + 2) < brList.Count) && brList[i + 2].Char == ')') && brList[i + 1].Char != ')' && CanBeCloseChar(brList[i + 1].Char, brList[i - 1].Char)) { brList[i + 2] = brList[i + 1]; } } else if (i > 1 && ((i + 2) < brList.Count) && MustBeCloseChar(brList[i + 2].Char, brList[i - 1].Char)) { ok = true; } if (ok) { if (internals == null) { internals = new List <BracketSequenceToken>(); } internals.Add(new BracketSequenceToken(brList[i].Source, brList[i + 1].Source)); brList.RemoveRange(i, 2); changed = true; break; } } } if (!changed) { break; } } BracketSequenceToken res = null; if ((brList.Count >= 4 && brList[1].CanBeOpen && brList[2].CanBeClose) && brList[3].CanBeClose && !brList[3].CanBeOpen) { if (CanBeCloseChar(brList[3].Char, brList[0].Char)) { res = new BracketSequenceToken(brList[0].Source, brList[3].Source); if (brList[0].Source.Next != brList[1].Source || brList[2].Source.Next != brList[3].Source) { res.Internal.Add(new BracketSequenceToken(brList[1].Source, brList[2].Source)); } if (internals != null) { res.Internal.AddRange(internals); } } } if ((res == null && brList.Count >= 3 && brList[2].CanBeClose) && !brList[2].CanBeOpen) { if (((attrs & BracketParseAttr.NearCloseBracket)) != BracketParseAttr.No) { if (CanBeCloseChar(brList[1].Char, brList[0].Char)) { return(new BracketSequenceToken(brList[0].Source, brList[1].Source)); } } bool ok = true; if (CanBeCloseChar(brList[2].Char, brList[0].Char) && CanBeCloseChar(brList[1].Char, brList[0].Char) && brList[1].CanBeClose) { for (t = brList[1].Source; t != brList[2].Source && t != null; t = t.Next) { if (t.IsNewlineBefore) { ok = false; break; } if (t.Chars.IsLetter && t.Chars.IsAllLower) { ok = false; break; } NounPhraseToken npt = NounPhraseHelper.TryParse(t, NounPhraseParseAttr.No, 0, null); if (npt != null) { t = npt.EndToken; } } if (ok) { for (t = brList[0].Source.Next; t != brList[1].Source && t != null; t = t.Next) { if (t.IsNewlineBefore) { return(new BracketSequenceToken(brList[0].Source, t.Previous)); } } } int lev1 = 0; for (Pullenti.Ner.Token tt = brList[0].Source.Previous; tt != null; tt = tt.Previous) { if (tt.IsNewlineAfter || tt.IsTableControlChar) { break; } if (!(tt is Pullenti.Ner.TextToken)) { continue; } if (tt.Chars.IsLetter || tt.LengthChar > 1) { continue; } char ch = (tt as Pullenti.Ner.TextToken).Term[0]; if (CanBeCloseChar(ch, brList[0].Char)) { lev1++; } else if (CanBeCloseChar(brList[1].Char, ch)) { lev1--; if (lev1 < 0) { return(new BracketSequenceToken(brList[0].Source, brList[1].Source)); } } } } if (ok && CanBeCloseChar(brList[2].Char, brList[0].Char)) { BracketSequenceToken intern = new BracketSequenceToken(brList[1].Source, brList[2].Source); res = new BracketSequenceToken(brList[0].Source, brList[2].Source); res.Internal.Add(intern); } else if (ok && CanBeCloseChar(brList[2].Char, brList[1].Char) && brList[0].CanBeOpen) { if (CanBeCloseChar(brList[2].Char, brList[0].Char)) { BracketSequenceToken intern = new BracketSequenceToken(brList[1].Source, brList[2].Source); res = new BracketSequenceToken(brList[0].Source, brList[2].Source); res.Internal.Add(intern); } else if (brList.Count == 3) { return(null); } } } if (res == null && brList.Count > 1 && brList[1].CanBeClose) { res = new BracketSequenceToken(brList[0].Source, brList[1].Source); } if (res == null && brList.Count > 1 && CanBeCloseChar(brList[1].Char, brList[0].Char)) { res = new BracketSequenceToken(brList[0].Source, brList[1].Source); } if (res == null && brList.Count == 2 && brList[0].Char == brList[1].Char) { res = new BracketSequenceToken(brList[0].Source, brList[1].Source); } if (res != null && internals != null) { foreach (BracketSequenceToken i in internals) { if (i.BeginChar < res.EndChar) { res.Internal.Add(i); } } } if (res == null) { cou = 0; for (Pullenti.Ner.Token tt = t0.Next; tt != null; tt = tt.Next, cou++) { if (tt.IsTableControlChar) { break; } if (MiscHelper.CanBeStartOfSentence(tt)) { break; } if (maxTokens > 0 && cou > maxTokens) { break; } Pullenti.Ner.MetaToken mt = tt as Pullenti.Ner.MetaToken; if (mt == null) { continue; } if (mt.EndToken is Pullenti.Ner.TextToken) { if ((mt.EndToken as Pullenti.Ner.TextToken).IsCharOf(m_CloseChars)) { Bracket bb = new Bracket(mt.EndToken as Pullenti.Ner.TextToken); if (bb.CanBeClose && CanBeCloseChar(bb.Char, brList[0].Char)) { return(new BracketSequenceToken(t0, tt)); } } } } } return(res); }
static string GetNameWithoutBrackets(Pullenti.Ner.Token begin, Pullenti.Ner.Token end, bool normalizeFirstNounGroup = false, bool normalFirstGroupSingle = false, bool ignoreGeoReferent = false) { string res = null; if (BracketHelper.CanBeStartOfSequence(begin, false, false) && BracketHelper.CanBeEndOfSequence(end, false, begin, false)) { begin = begin.Next; end = end.Previous; } if (normalizeFirstNounGroup && !begin.Morph.Class.IsPreposition) { NounPhraseToken npt = NounPhraseHelper.TryParse(begin, NounPhraseParseAttr.ReferentCanBeNoun, 0, null); if (npt != null) { if (npt.Noun.GetMorphClassInDictionary().IsUndefined&& npt.Adjectives.Count == 0) { npt = null; } } if (npt != null && npt.EndToken.EndChar > end.EndChar) { npt = null; } if (npt != null) { res = npt.GetNormalCaseText(null, (normalFirstGroupSingle ? Pullenti.Morph.MorphNumber.Singular : Pullenti.Morph.MorphNumber.Undefined), Pullenti.Morph.MorphGender.Undefined, false); Pullenti.Ner.Token te = npt.EndToken.Next; if (((te != null && te.Next != null && te.IsComma) && (te.Next is Pullenti.Ner.TextToken) && te.Next.EndChar <= end.EndChar) && te.Next.Morph.Class.IsVerb && te.Next.Morph.Class.IsAdjective) { foreach (Pullenti.Morph.MorphBaseInfo it in te.Next.Morph.Items) { if (it.Gender == npt.Morph.Gender || ((it.Gender & npt.Morph.Gender)) != Pullenti.Morph.MorphGender.Undefined) { if (!((it.Case & npt.Morph.Case)).IsUndefined) { if (it.Number == npt.Morph.Number || ((it.Number & npt.Morph.Number)) != Pullenti.Morph.MorphNumber.Undefined) { string var = (te.Next as Pullenti.Ner.TextToken).Term; if (it is Pullenti.Morph.MorphWordForm) { var = (it as Pullenti.Morph.MorphWordForm).NormalCase; } Pullenti.Morph.MorphBaseInfo bi = new Pullenti.Morph.MorphBaseInfo() { Class = Pullenti.Morph.MorphClass.Adjective, Gender = npt.Morph.Gender, Number = npt.Morph.Number, Language = npt.Morph.Language }; var = Pullenti.Morph.MorphologyService.GetWordform(var, bi); if (var != null) { res = string.Format("{0}, {1}", res, var); te = te.Next.Next; } break; } } } } } if (te != null && te.EndChar <= end.EndChar) { string s = GetNameEx(te, end, Pullenti.Morph.MorphClass.Undefined, Pullenti.Morph.MorphCase.Undefined, Pullenti.Morph.MorphGender.Undefined, true, ignoreGeoReferent); if (!string.IsNullOrEmpty(s)) { if (!char.IsLetterOrDigit(s[0])) { res = string.Format("{0}{1}", res, s); } else { res = string.Format("{0} {1}", res, s); } } } } else if ((begin is Pullenti.Ner.TextToken) && begin.Chars.IsCyrillicLetter) { Pullenti.Morph.MorphClass mm = begin.GetMorphClassInDictionary(); if (!mm.IsUndefined) { res = begin.GetNormalCaseText(mm, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false); if (begin.EndChar < end.EndChar) { res = string.Format("{0} {1}", res, GetNameEx(begin.Next, end, Pullenti.Morph.MorphClass.Undefined, Pullenti.Morph.MorphCase.Undefined, Pullenti.Morph.MorphGender.Undefined, true, false)); } } } } if (res == null) { res = GetNameEx(begin, end, Pullenti.Morph.MorphClass.Undefined, Pullenti.Morph.MorphCase.Undefined, Pullenti.Morph.MorphGender.Undefined, true, ignoreGeoReferent); } if (!string.IsNullOrEmpty(res)) { int k = 0; for (int i = res.Length - 1; i >= 0; i--, k++) { if (res[i] == '*' || char.IsWhiteSpace(res[i])) { } else { break; } } if (k > 0) { if (k == res.Length) { return(null); } res = res.Substring(0, res.Length - k); } } return(res); }
void CorrectWordsByMorph(Pullenti.Morph.MorphLang lang) { for (Pullenti.Ner.Token tt = FirstToken; tt != null; tt = tt.Next) { if (!(tt is Pullenti.Ner.TextToken)) { continue; } if (tt.Morph.ContainsAttr("прдктв.", null)) { continue; } Pullenti.Morph.MorphClass dd = tt.GetMorphClassInDictionary(); if (!dd.IsUndefined || (tt.LengthChar < 4)) { continue; } if (tt.Morph.Class.IsProperSurname && !tt.Chars.IsAllLower) { continue; } if (tt.Chars.IsAllUpper) { continue; } string corw = Pullenti.Morph.MorphologyService.CorrectWord((tt as Pullenti.Ner.TextToken).Term, (tt.Morph.Language.IsUndefined ? lang : tt.Morph.Language)); if (corw == null) { continue; } List <Pullenti.Morph.MorphToken> ccc = Pullenti.Morph.MorphologyService.Process(corw, lang, null); if (ccc == null || ccc.Count != 1) { continue; } Pullenti.Ner.TextToken tt1 = new Pullenti.Ner.TextToken(ccc[0], this, tt.BeginChar, tt.EndChar) { Chars = tt.Chars, Term0 = (tt as Pullenti.Ner.TextToken).Term }; Pullenti.Morph.MorphClass mc = tt1.GetMorphClassInDictionary(); if (mc.IsProperSurname) { continue; } if (tt == FirstToken) { FirstToken = tt1; } else { tt.Previous.Next = tt1; } tt1.Next = tt.Next; tt = tt1; if (CorrectedTokens == null) { CorrectedTokens = new Dictionary <Pullenti.Ner.Token, string>(); } CorrectedTokens.Add(tt, tt.GetSourceText()); } }
static WeaponItemToken _TryParse(Pullenti.Ner.Token t, WeaponItemToken prev, bool afterConj, bool attachHigh = false) { if (t == null) { return(null); } if (Pullenti.Ner.Core.BracketHelper.IsBracket(t, true)) { WeaponItemToken wit = _TryParse(t.Next, prev, afterConj, attachHigh); if (wit != null) { if (wit.EndToken.Next == null) { wit.BeginToken = t; return(wit); } if (Pullenti.Ner.Core.BracketHelper.IsBracket(wit.EndToken.Next, true)) { wit.BeginToken = t; wit.EndToken = wit.EndToken.Next; return(wit); } } } Pullenti.Ner.Core.TerminToken tok = m_Ontology.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { WeaponItemToken res = new WeaponItemToken(t, tok.EndToken); res.Typ = (Typs)tok.Termin.Tag; if (res.Typ == Typs.Noun) { res.Value = tok.Termin.CanonicText; if (tok.Termin.Tag2 != null) { res.IsDoubt = true; } for (Pullenti.Ner.Token tt = res.EndToken.Next; tt != null; tt = tt.Next) { if (tt.WhitespacesBeforeCount > 2) { break; } WeaponItemToken wit = _TryParse(tt, null, false, false); if (wit != null) { if (wit.Typ == Typs.Brand) { res.InnerTokens.Add(wit); res.EndToken = (tt = wit.EndToken); continue; } break; } if (!(tt is Pullenti.Ner.TextToken)) { break; } Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary(); if (mc == Pullenti.Morph.MorphClass.Adjective) { if (res.AltValue == null) { res.AltValue = res.Value; } if (res.AltValue.EndsWith(res.Value)) { res.AltValue = res.AltValue.Substring(0, res.AltValue.Length - res.Value.Length); } res.AltValue = string.Format("{0}{1} {2}", res.AltValue, (tt as Pullenti.Ner.TextToken).Term, res.Value); res.EndToken = tt; continue; } break; } return(res); } if (res.Typ == Typs.Brand || res.Typ == Typs.Name) { res.Value = tok.Termin.CanonicText; return(res); } if (res.Typ == Typs.Model) { res.Value = tok.Termin.CanonicText; if (tok.Termin.Tag2 is List <Pullenti.Ner.Core.Termin> ) { List <Pullenti.Ner.Core.Termin> li = tok.Termin.Tag2 as List <Pullenti.Ner.Core.Termin>; foreach (Pullenti.Ner.Core.Termin to in li) { WeaponItemToken wit = new WeaponItemToken(t, tok.EndToken) { Typ = (Typs)to.Tag, Value = to.CanonicText, IsInternal = tok.BeginToken == tok.EndToken }; res.InnerTokens.Add(wit); if (to.AdditionalVars != null && to.AdditionalVars.Count > 0) { wit.AltValue = to.AdditionalVars[0].CanonicText; } } } res._correctModel(); return(res); } } Pullenti.Ner.Token nnn = Pullenti.Ner.Core.MiscHelper.CheckNumberPrefix(t); if (nnn != null) { Pullenti.Ner.Transport.Internal.TransItemToken tit = Pullenti.Ner.Transport.Internal.TransItemToken._attachNumber(nnn, true); if (tit != null) { WeaponItemToken res = new WeaponItemToken(t, tit.EndToken) { Typ = Typs.Number }; res.Value = tit.Value; res.AltValue = tit.AltValue; return(res); } } if (((t is Pullenti.Ner.TextToken) && t.Chars.IsLetter && t.Chars.IsAllUpper) && (t.LengthChar < 4)) { if ((t.Next != null && ((t.Next.IsHiphen || t.Next.IsChar('.'))) && (t.Next.WhitespacesAfterCount < 2)) && (t.Next.Next is Pullenti.Ner.NumberToken)) { WeaponItemToken res = new WeaponItemToken(t, t.Next) { Typ = Typs.Model, IsDoubt = true }; res.Value = (t as Pullenti.Ner.TextToken).Term; res._correctModel(); return(res); } if ((t.Next is Pullenti.Ner.NumberToken) && !t.IsWhitespaceAfter) { WeaponItemToken res = new WeaponItemToken(t, t) { Typ = Typs.Model, IsDoubt = true }; res.Value = (t as Pullenti.Ner.TextToken).Term; res._correctModel(); return(res); } if ((t as Pullenti.Ner.TextToken).Term == "СП" && (t.WhitespacesAfterCount < 3) && (t.Next is Pullenti.Ner.TextToken)) { WeaponItemToken pp = _TryParse(t.Next, null, false, false); if (pp != null && ((pp.Typ == Typs.Model || pp.Typ == Typs.Brand))) { WeaponItemToken res = new WeaponItemToken(t, t) { Typ = Typs.Noun }; res.Value = "ПИСТОЛЕТ"; res.AltValue = "СЛУЖЕБНЫЙ ПИСТОЛЕТ"; return(res); } } } if (((t is Pullenti.Ner.TextToken) && t.Chars.IsLetter && !t.Chars.IsAllLower) && t.LengthChar > 2) { bool ok = false; if (prev != null && ((prev.Typ == Typs.Noun || prev.Typ == Typs.Model || prev.Typ == Typs.Brand))) { ok = true; } else if (prev == null && t.Previous != null && t.Previous.IsCommaAnd) { ok = true; } if (ok) { WeaponItemToken res = new WeaponItemToken(t, t) { Typ = Typs.Name, IsDoubt = true }; res.Value = (t as Pullenti.Ner.TextToken).Term; if ((t.Next != null && t.Next.IsHiphen && (t.Next.Next is Pullenti.Ner.TextToken)) && t.Next.Next.Chars == t.Chars) { res.Value = string.Format("{0}-{1}", res.Value, (t.Next.Next as Pullenti.Ner.TextToken).Term); res.EndToken = t.Next.Next; } if (prev != null && prev.Typ == Typs.Noun) { res.Typ = Typs.Brand; } if (res.EndToken.Next != null && res.EndToken.Next.IsHiphen && (res.EndToken.Next.Next is Pullenti.Ner.NumberToken)) { res.Typ = Typs.Model; res._correctModel(); } else if (!res.EndToken.IsWhitespaceAfter && (res.EndToken.Next is Pullenti.Ner.NumberToken)) { res.Typ = Typs.Model; res._correctModel(); } return(res); } } if (t.IsValue("МАРКА", null)) { WeaponItemToken res = _TryParse(t.Next, prev, afterConj, false); if (res != null && res.Typ == Typs.Brand) { res.BeginToken = t; return(res); } if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(t.Next, true, false)) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t.Next, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { return new WeaponItemToken(t, br.EndToken) { Typ = Typs.Brand, Value = Pullenti.Ner.Core.MiscHelper.GetTextValue(br.BeginToken, br.EndToken, Pullenti.Ner.Core.GetTextAttr.No) } } ; } if (((t is Pullenti.Ner.TextToken) && (t.Next is Pullenti.Ner.TextToken) && t.Next.LengthChar > 1) && !t.Next.Chars.IsAllLower) { return new WeaponItemToken(t, t.Next) { Typ = Typs.Brand, Value = (t as Pullenti.Ner.TextToken).Term } } ; } if (t.IsValue("КАЛИБР", "КАЛІБР")) { Pullenti.Ner.Token tt1 = t.Next; if (tt1 != null && ((tt1.IsHiphen || tt1.IsChar(':')))) { tt1 = tt1.Next; } Pullenti.Ner.Measure.Internal.NumbersWithUnitToken num = Pullenti.Ner.Measure.Internal.NumbersWithUnitToken.TryParse(tt1, null, false, false, false, false); if (num != null && num.SingleVal != null) { return new WeaponItemToken(t, num.EndToken) { Typ = Typs.Caliber, Value = Pullenti.Ner.Core.NumberHelper.DoubleToString(num.SingleVal.Value) } } ; } if (t is Pullenti.Ner.NumberToken) { Pullenti.Ner.Measure.Internal.NumbersWithUnitToken num = Pullenti.Ner.Measure.Internal.NumbersWithUnitToken.TryParse(t, null, false, false, false, false); if (num != null && num.SingleVal != null) { if (num.Units.Count == 1 && num.Units[0].Unit != null && num.Units[0].Unit.NameCyr == "мм") { return new WeaponItemToken(t, num.EndToken) { Typ = Typs.Caliber, Value = Pullenti.Ner.Core.NumberHelper.DoubleToString(num.SingleVal.Value) } } ; if (num.EndToken.Next != null && num.EndToken.Next.IsValue("КАЛИБР", "КАЛІБР")) { return new WeaponItemToken(t, num.EndToken.Next) { Typ = Typs.Caliber, Value = Pullenti.Ner.Core.NumberHelper.DoubleToString(num.SingleVal.Value) } } ; } } if (t.IsValue("ПРОИЗВОДСТВО", "ВИРОБНИЦТВО")) { Pullenti.Ner.Token tt1 = t.Next; if (tt1 != null && ((tt1.IsHiphen || tt1.IsChar(':')))) { tt1 = tt1.Next; } if (tt1 is Pullenti.Ner.ReferentToken) { if ((tt1.GetReferent() is Pullenti.Ner.Org.OrganizationReferent) || (tt1.GetReferent() is Pullenti.Ner.Geo.GeoReferent)) { return new WeaponItemToken(t, tt1) { Typ = Typs.Developer, Ref = tt1.GetReferent() } } ; } } return(null); } void _correctModel() { Pullenti.Ner.Token tt = EndToken.Next; if (tt == null || tt.WhitespacesBeforeCount > 2) { return; } if (tt.IsValue(":\\/.", null) || tt.IsHiphen) { tt = tt.Next; } if (tt is Pullenti.Ner.NumberToken) { StringBuilder tmp = new StringBuilder(); tmp.Append((tt as Pullenti.Ner.NumberToken).Value); bool isLat = Pullenti.Morph.LanguageHelper.IsLatinChar(Value[0]); EndToken = tt; for (tt = tt.Next; tt != null; tt = tt.Next) { if ((tt is Pullenti.Ner.TextToken) && tt.LengthChar == 1 && tt.Chars.IsLetter) { if (!tt.IsWhitespaceBefore || ((tt.Previous != null && tt.Previous.IsHiphen))) { char ch = (tt as Pullenti.Ner.TextToken).Term[0]; EndToken = tt; char ch2 = (char)0; if (Pullenti.Morph.LanguageHelper.IsLatinChar(ch) && !isLat) { ch2 = Pullenti.Morph.LanguageHelper.GetCyrForLat(ch); if (ch2 != ((char)0)) { ch = ch2; } } else if (Pullenti.Morph.LanguageHelper.IsCyrillicChar(ch) && isLat) { ch2 = Pullenti.Morph.LanguageHelper.GetLatForCyr(ch); if (ch2 != ((char)0)) { ch = ch2; } } tmp.Append(ch); continue; } } break; } Value = string.Format("{0}-{1}", Value, tmp.ToString()); AltValue = Pullenti.Ner.Core.MiscHelper.CreateCyrLatAlternative(Value); } if (!EndToken.IsWhitespaceAfter && EndToken.Next != null && ((EndToken.Next.IsHiphen || EndToken.Next.IsCharOf("\\/")))) { if (!EndToken.Next.IsWhitespaceAfter && (EndToken.Next.Next is Pullenti.Ner.NumberToken)) { EndToken = EndToken.Next.Next; Value = string.Format("{0}-{1}", Value, (EndToken as Pullenti.Ner.NumberToken).Value); if (AltValue != null) { AltValue = string.Format("{0}-{1}", AltValue, (EndToken as Pullenti.Ner.NumberToken).Value); } } } }
public static NounPhraseItem TryParse(Pullenti.Ner.Token t, List <NounPhraseItem> items, Pullenti.Ner.Core.NounPhraseParseAttr attrs) { if (t == null) { return(null); } Pullenti.Ner.Token t0 = t; bool _canBeSurname = false; bool _isDoubtAdj = false; Pullenti.Ner.ReferentToken rt = t as Pullenti.Ner.ReferentToken; if (rt != null && rt.BeginToken == rt.EndToken && (rt.BeginToken is Pullenti.Ner.TextToken)) { NounPhraseItem res = TryParse(rt.BeginToken, items, attrs); if (res != null) { res.BeginToken = (res.EndToken = t); res.CanBeNoun = true; return(res); } } if (rt != null) { NounPhraseItem res = new NounPhraseItem(t, t); foreach (Pullenti.Morph.MorphBaseInfo m in t.Morph.Items) { NounPhraseItemTextVar v = new NounPhraseItemTextVar(m, null); v.NormalValue = t.GetReferent().ToString(); res.NounMorph.Add(v); } res.CanBeNoun = true; return(res); } if (t is Pullenti.Ner.NumberToken) { } bool hasLegalVerb = false; if (t is Pullenti.Ner.TextToken) { if (!t.Chars.IsLetter) { return(null); } string str = (t as Pullenti.Ner.TextToken).Term; if (str[str.Length - 1] == 'А' || str[str.Length - 1] == 'О') { foreach (Pullenti.Morph.MorphBaseInfo wf in t.Morph.Items) { if ((wf is Pullenti.Morph.MorphWordForm) && (wf as Pullenti.Morph.MorphWordForm).IsInDictionary) { if (wf.Class.IsVerb) { Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary(); if (!mc.IsNoun && ((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.IgnoreParticiples)) == Pullenti.Ner.Core.NounPhraseParseAttr.No) { if (!Pullenti.Morph.LanguageHelper.EndsWithEx(str, "ОГО", "ЕГО", null, null)) { return(null); } } hasLegalVerb = true; } if (wf.Class.IsAdverb) { if (t.Next == null || !t.Next.IsHiphen) { if ((str == "ВСЕГО" || str == "ДОМА" || str == "НЕСКОЛЬКО") || str == "МНОГО" || str == "ПОРЯДКА") { } else { return(null); } } } if (wf.Class.IsAdjective) { if (wf.ContainsAttr("к.ф.", null)) { if (t.GetMorphClassInDictionary() == Pullenti.Morph.MorphClass.Adjective) { } else { _isDoubtAdj = true; } } } } } } Pullenti.Morph.MorphClass mc0 = t.Morph.Class; if (mc0.IsProperSurname && !t.Chars.IsAllLower) { foreach (Pullenti.Morph.MorphBaseInfo wf in t.Morph.Items) { if (wf.Class.IsProperSurname && wf.Number != Pullenti.Morph.MorphNumber.Plural) { Pullenti.Morph.MorphWordForm wff = wf as Pullenti.Morph.MorphWordForm; if (wff == null) { continue; } string s = ((wff.NormalFull ?? wff.NormalCase)) ?? ""; if (Pullenti.Morph.LanguageHelper.EndsWithEx(s, "ИН", "ЕН", "ЫН", null)) { if (!wff.IsInDictionary) { _canBeSurname = true; } else { return(null); } } if (wff.IsInDictionary && Pullenti.Morph.LanguageHelper.EndsWith(s, "ОВ")) { _canBeSurname = true; } } } } if (mc0.IsProperName && !t.Chars.IsAllLower) { foreach (Pullenti.Morph.MorphBaseInfo wff in t.Morph.Items) { Pullenti.Morph.MorphWordForm wf = wff as Pullenti.Morph.MorphWordForm; if (wf == null) { continue; } if (wf.NormalCase == "ГОР") { continue; } if (wf.Class.IsProperName && wf.IsInDictionary) { if (wf.NormalCase == null || !wf.NormalCase.StartsWith("ЛЮБ")) { if (mc0.IsAdjective && t.Morph.ContainsAttr("неизм.", null)) { } else if (((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.ReferentCanBeNoun)) == Pullenti.Ner.Core.NounPhraseParseAttr.ReferentCanBeNoun) { } else { if (items == null || (items.Count < 1)) { return(null); } if (!items[0].IsStdAdjective) { return(null); } } } } } } if (mc0.IsAdjective && t.Morph.ItemsCount == 1) { if (t.Morph[0].ContainsAttr("в.ср.ст.", null)) { return(null); } } Pullenti.Morph.MorphClass mc1 = t.GetMorphClassInDictionary(); if (mc1 == Pullenti.Morph.MorphClass.Verb && t.Morph.Case.IsUndefined) { return(null); } if ((((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.IgnoreParticiples)) == Pullenti.Ner.Core.NounPhraseParseAttr.IgnoreParticiples && t.Morph.Class.IsVerb && !t.Morph.Class.IsNoun) && !t.Morph.Class.IsProper) { foreach (Pullenti.Morph.MorphBaseInfo wf in t.Morph.Items) { if (wf.Class.IsVerb) { if (wf.ContainsAttr("дейст.з.", null)) { if (Pullenti.Morph.LanguageHelper.EndsWith((t as Pullenti.Ner.TextToken).Term, "СЯ")) { } else { return(null); } } } } } } Pullenti.Ner.Token t1 = null; for (int k = 0; k < 2; k++) { t = t1 ?? t0; if (k == 0) { if (((t0 is Pullenti.Ner.TextToken) && t0.Next != null && t0.Next.IsHiphen) && t0.Next.Next != null) { if (!t0.IsWhitespaceAfter && !t0.Morph.Class.IsPronoun && !(t0.Next.Next is Pullenti.Ner.NumberToken)) { if (!t0.Next.IsWhitespaceAfter) { t = t0.Next.Next; } else if (t0.Next.Next.Chars.IsAllLower && Pullenti.Morph.LanguageHelper.EndsWith((t0 as Pullenti.Ner.TextToken).Term, "О")) { t = t0.Next.Next; } } } } NounPhraseItem it = new NounPhraseItem(t0, t) { CanBeSurname = _canBeSurname }; if (t0 == t && (t0 is Pullenti.Ner.ReferentToken)) { it.CanBeNoun = true; it.Morph = new Pullenti.Ner.MorphCollection(t0.Morph); } bool canBePrepos = false; foreach (Pullenti.Morph.MorphBaseInfo v in t.Morph.Items) { Pullenti.Morph.MorphWordForm wf = v as Pullenti.Morph.MorphWordForm; if (v.Class.IsVerb && !v.Case.IsUndefined) { it.CanBeAdj = true; it.AdjMorph.Add(new NounPhraseItemTextVar(v, t)); continue; } if (v.Class.IsPreposition) { canBePrepos = true; } if (v.Class.IsAdjective || ((v.Class.IsPronoun && !v.Class.IsPersonalPronoun && !v.ContainsAttr("неизм.", null))) || ((v.Class.IsNoun && (t is Pullenti.Ner.NumberToken)))) { if (TryAccordVariant(items, (items == null ? 0 : items.Count), v, false)) { bool isDoub = false; if (v.ContainsAttr("к.ф.", null)) { continue; } if (v.ContainsAttr("собир.", null) && !(t is Pullenti.Ner.NumberToken)) { if (wf != null && wf.IsInDictionary) { return(null); } continue; } if (v.ContainsAttr("сравн.", null)) { continue; } bool ok = true; if (t is Pullenti.Ner.TextToken) { string s = (t as Pullenti.Ner.TextToken).Term; if (s == "ПРАВО" || s == "ПРАВА") { ok = false; } else if (Pullenti.Morph.LanguageHelper.EndsWith(s, "ОВ") && t.GetMorphClassInDictionary().IsNoun) { ok = false; } } else if (t is Pullenti.Ner.NumberToken) { if (v.Class.IsNoun && t.Morph.Class.IsAdjective) { ok = false; } else if (t.Morph.Class.IsNoun && ((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.ParseNumericAsAdjective)) == Pullenti.Ner.Core.NounPhraseParseAttr.No) { ok = false; } } if (ok) { it.AdjMorph.Add(new NounPhraseItemTextVar(v, t)); it.CanBeAdj = true; if (_isDoubtAdj && t0 == t) { it.IsDoubtAdjective = true; } if (hasLegalVerb && wf != null && wf.IsInDictionary) { it.CanBeNoun = true; } if (wf != null && wf.Class.IsPronoun) { it.CanBeNoun = true; it.NounMorph.Add(new NounPhraseItemTextVar(v, t)); } } } } bool canBeNoun = false; if (t is Pullenti.Ner.NumberToken) { } else if (v.Class.IsNoun || ((wf != null && wf.NormalCase == "САМ"))) { canBeNoun = true; } else if (v.Class.IsPersonalPronoun) { if (items == null || items.Count == 0) { canBeNoun = true; } else { foreach (NounPhraseItem it1 in items) { if (it1.IsVerb) { if (items.Count == 1 && !v.Case.IsNominative) { canBeNoun = true; } else { return(null); } } } if (items.Count == 1) { if (items[0].CanBeAdjForPersonalPronoun) { canBeNoun = true; } } } } else if ((v.Class.IsPronoun && ((items == null || items.Count == 0 || ((items.Count == 1 && items[0].CanBeAdjForPersonalPronoun)))) && wf != null) && (((((wf.NormalCase == "ТОТ" || wf.NormalFull == "ТО" || wf.NormalCase == "ТО") || wf.NormalCase == "ЭТО" || wf.NormalCase == "ВСЕ") || wf.NormalCase == "ЧТО" || wf.NormalCase == "КТО") || wf.NormalFull == "КОТОРЫЙ" || wf.NormalCase == "КОТОРЫЙ"))) { if (wf.NormalCase == "ВСЕ") { if (t.Next != null && t.Next.IsValue("РАВНО", null)) { return(null); } } canBeNoun = true; } else if (wf != null && ((wf.NormalFull ?? wf.NormalCase)) == "КОТОРЫЙ" && ((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.ParsePronouns)) == Pullenti.Ner.Core.NounPhraseParseAttr.No) { return(null); } else if (v.Class.IsProper && (t is Pullenti.Ner.TextToken)) { if (t.LengthChar > 4 || v.Class.IsProperName) { canBeNoun = true; } } if (canBeNoun) { bool added = false; if (items != null && items.Count > 1 && ((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.MultiNouns)) != Pullenti.Ner.Core.NounPhraseParseAttr.No) { bool ok1 = true; for (int ii = 1; ii < items.Count; ii++) { if (!items[ii].ConjBefore) { ok1 = false; break; } } if (ok1) { if (TryAccordVariant(items, (items == null ? 0 : items.Count), v, true)) { it.NounMorph.Add(new NounPhraseItemTextVar(v, t)); it.CanBeNoun = true; it.MultiNouns = true; added = true; } } } if (!added) { if (TryAccordVariant(items, (items == null ? 0 : items.Count), v, false)) { it.NounMorph.Add(new NounPhraseItemTextVar(v, t)); it.CanBeNoun = true; if (v.Class.IsPersonalPronoun && t.Morph.ContainsAttr("неизм.", null) && !it.CanBeAdj) { NounPhraseItemTextVar itt = new NounPhraseItemTextVar(v, t); itt.Case = Pullenti.Morph.MorphCase.AllCases; itt.Number = Pullenti.Morph.MorphNumber.Undefined; if (itt.NormalValue == null) { } it.AdjMorph.Add(itt); it.CanBeAdj = true; } } else if ((items.Count > 0 && items[0].AdjMorph.Count > 0 && items[0].AdjMorph[0].Number == Pullenti.Morph.MorphNumber.Plural) && !((items[0].AdjMorph[0].Case & v.Case)).IsUndefined && !items[0].AdjMorph[0].Class.IsVerb) { if (t.Next != null && t.Next.IsCommaAnd && (t.Next.Next is Pullenti.Ner.TextToken)) { Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next.Next, attrs, 0, null); if (npt2 != null && npt2.Preposition == null && !((npt2.Morph.Case & v.Case & items[0].AdjMorph[0].Case)).IsUndefined) { it.NounMorph.Add(new NounPhraseItemTextVar(v, t)); it.CanBeNoun = true; } } } } } } if (t0 != t) { foreach (NounPhraseItemTextVar v in it.AdjMorph) { v.CorrectPrefix(t0 as Pullenti.Ner.TextToken, false); } foreach (NounPhraseItemTextVar v in it.NounMorph) { v.CorrectPrefix(t0 as Pullenti.Ner.TextToken, true); } } if (k == 1 && it.CanBeNoun && !it.CanBeAdj) { if (t1 != null) { it.EndToken = t1; } else { it.EndToken = t0.Next.Next; } foreach (NounPhraseItemTextVar v in it.NounMorph) { if (v.NormalValue != null && (v.NormalValue.IndexOf('-') < 0)) { v.NormalValue = string.Format("{0}-{1}", v.NormalValue, it.EndToken.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false)); } } } if (it.CanBeAdj) { if (m_StdAdjectives.TryParse(it.BeginToken, Pullenti.Ner.Core.TerminParseAttr.No) != null) { it.IsStdAdjective = true; } } if (canBePrepos && it.CanBeNoun) { if (items != null && items.Count > 0) { Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition | Pullenti.Ner.Core.NounPhraseParseAttr.ParsePronouns | Pullenti.Ner.Core.NounPhraseParseAttr.ParseVerbs, 0, null); if (npt1 != null && npt1.EndChar > t.EndChar) { return(null); } } else { Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePronouns | Pullenti.Ner.Core.NounPhraseParseAttr.ParseVerbs, 0, null); if (npt1 != null) { Pullenti.Morph.MorphCase mc = Pullenti.Morph.LanguageHelper.GetCaseAfterPreposition((t as Pullenti.Ner.TextToken).Lemma); if (!((mc & npt1.Morph.Case)).IsUndefined) { return(null); } } } } if (it.CanBeNoun || it.CanBeAdj || k == 1) { if (it.BeginToken.Morph.Class.IsPronoun) { Pullenti.Ner.Token tt2 = it.EndToken.Next; if ((tt2 != null && tt2.IsHiphen && !tt2.IsWhitespaceAfter) && !tt2.IsWhitespaceBefore) { tt2 = tt2.Next; } if (tt2 is Pullenti.Ner.TextToken) { string ss = (tt2 as Pullenti.Ner.TextToken).Term; if ((ss == "ЖЕ" || ss == "БЫ" || ss == "ЛИ") || ss == "Ж") { it.EndToken = tt2; } else if (ss == "НИБУДЬ" || ss == "ЛИБО" || (((ss == "ТО" && tt2.Previous.IsHiphen)) && it.CanBeAdj)) { it.EndToken = tt2; foreach (NounPhraseItemTextVar m in it.AdjMorph) { m.NormalValue = string.Format("{0}-{1}", m.NormalValue, ss); if (m.SingleNumberValue != null) { m.SingleNumberValue = string.Format("{0}-{1}", m.SingleNumberValue, ss); } } } } } return(it); } if (t0 == t) { if (t0.IsValue("БИЗНЕС", null) && t0.Next != null && t0.Next.Chars == t0.Chars) { t1 = t0.Next; continue; } return(it); } } return(null); }
public static MeasureToken TryParse(Pullenti.Ner.Token t, Pullenti.Ner.Core.TerminCollection addUnits, bool canBeSet = true, bool canUnitsAbsent = false, bool isResctriction = false, bool isSubval = false) { if (!(t is Pullenti.Ner.TextToken)) { return(null); } if (t.IsTableControlChar) { return(null); } Pullenti.Ner.Token t0 = t; Pullenti.Ner.MetaToken whd = null; int minmax = 0; Pullenti.Ner.Token tt = NumbersWithUnitToken._isMinOrMax(t0, ref minmax); if (tt != null) { t = tt.Next; } Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition | Pullenti.Ner.Core.NounPhraseParseAttr.IgnoreBrackets, 0, null); if (npt == null) { whd = NumbersWithUnitToken._tryParseWHL(t); if (whd != null) { npt = new Pullenti.Ner.Core.NounPhraseToken(t0, whd.EndToken); } else if (t0.IsValue("КПД", null)) { npt = new Pullenti.Ner.Core.NounPhraseToken(t0, t0); } else if ((t0 is Pullenti.Ner.TextToken) && t0.LengthChar > 3 && t0.GetMorphClassInDictionary().IsUndefined) { npt = new Pullenti.Ner.Core.NounPhraseToken(t0, t0); } else if (t0.IsValue("T", null) && t0.Chars.IsAllLower) { npt = new Pullenti.Ner.Core.NounPhraseToken(t0, t0); t = t0; if (t.Next != null && t.Next.IsChar('=')) { npt.EndToken = t.Next; } } else if ((t0 is Pullenti.Ner.TextToken) && t0.Chars.IsLetter && isSubval) { if (NumbersWithUnitToken.TryParse(t, addUnits, false, false, false, false) != null) { return(null); } npt = new Pullenti.Ner.Core.NounPhraseToken(t0, t0); for (t = t0.Next; t != null; t = t.Next) { if (t.WhitespacesBeforeCount > 2) { break; } else if (!(t is Pullenti.Ner.TextToken)) { break; } else if (!t.Chars.IsLetter) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { npt.EndToken = (t = br.EndToken); } else { break; } } else if (NumbersWithUnitToken.TryParse(t, addUnits, false, false, false, false) != null) { break; } else { npt.EndToken = t; } } } else { return(null); } } else if (Pullenti.Ner.Core.NumberHelper.TryParseRealNumber(t, true, false) != null) { return(null); } else { Pullenti.Ner.Date.Internal.DateItemToken dtok = Pullenti.Ner.Date.Internal.DateItemToken.TryAttach(t, null, false); if (dtok != null) { return(null); } } Pullenti.Ner.Token t1 = npt.EndToken; t = npt.EndToken; Pullenti.Ner.MetaToken name = new Pullenti.Ner.MetaToken(npt.BeginToken, npt.EndToken) { Morph = npt.Morph }; List <UnitToken> units = null; List <UnitToken> units2 = null; List <MeasureToken> internals = new List <MeasureToken>(); bool not = false; for (tt = t1.Next; tt != null; tt = tt.Next) { if (tt.IsNewlineBefore) { break; } if (tt.IsTableControlChar) { break; } Pullenti.Ner.Token tt2 = NumbersWithUnitToken._isMinOrMax(tt, ref minmax); if (tt2 != null) { t1 = (t = (tt = tt2)); continue; } if ((tt.IsValue("БЫТЬ", null) || tt.IsValue("ДОЛЖЕН", null) || tt.IsValue("ДОЛЖНЫЙ", null)) || tt.IsValue("МОЖЕТ", null) || ((tt.IsValue("СОСТАВЛЯТЬ", null) && !tt.GetMorphClassInDictionary().IsAdjective))) { t1 = (t = tt); if (tt.Previous.IsValue("НЕ", null)) { not = true; } continue; } Pullenti.Ner.MetaToken www = NumbersWithUnitToken._tryParseWHL(tt); if (www != null) { whd = www; t1 = (t = (tt = www.EndToken)); continue; } if (tt.IsValue("ПРИ", null)) { MeasureToken mt1 = TryParse(tt.Next, addUnits, false, false, true, false); if (mt1 != null) { internals.Add(mt1); t1 = (t = (tt = mt1.EndToken)); continue; } NumbersWithUnitToken n1 = NumbersWithUnitToken.TryParse(tt.Next, addUnits, false, false, false, false); if (n1 != null && n1.Units.Count > 0) { mt1 = new MeasureToken(n1.BeginToken, n1.EndToken) { Nums = n1 }; internals.Add(mt1); t1 = (t = (tt = mt1.EndToken)); continue; } } if (tt.IsValue("ПО", null) && tt.Next != null && tt.Next.IsValue("U", null)) { t1 = (t = (tt = tt.Next)); continue; } if (internals.Count > 0) { if (tt.IsChar(':')) { break; } MeasureToken mt1 = TryParse(tt.Next, addUnits, false, false, true, false); if (mt1 != null && mt1.Reliable) { internals.Add(mt1); t1 = (t = (tt = mt1.EndToken)); continue; } } if ((tt is Pullenti.Ner.NumberToken) && (tt as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Words) { Pullenti.Ner.Core.NounPhraseToken npt3 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.ParseNumericAsAdjective, 0, null); if (npt3 != null) { t1 = (tt = npt3.EndToken); if (internals.Count == 0) { name.EndToken = t1; } continue; } } if (((tt.IsHiphen && !tt.IsWhitespaceBefore && !tt.IsWhitespaceAfter) && (tt.Next is Pullenti.Ner.NumberToken) && (tt.Previous is Pullenti.Ner.TextToken)) && tt.Previous.Chars.IsAllUpper) { t1 = (tt = (t = tt.Next)); if (internals.Count == 0) { name.EndToken = t1; } continue; } if (((tt is Pullenti.Ner.NumberToken) && !tt.IsWhitespaceBefore && (tt.Previous is Pullenti.Ner.TextToken)) && tt.Previous.Chars.IsAllUpper) { t1 = (t = tt); if (internals.Count == 0) { name.EndToken = t1; } continue; } if ((((tt is Pullenti.Ner.NumberToken) && !tt.IsWhitespaceAfter && tt.Next.IsHiphen) && !tt.Next.IsWhitespaceAfter && (tt.Next.Next is Pullenti.Ner.TextToken)) && tt.Next.Next.LengthChar > 2) { t1 = (t = (tt = tt.Next.Next)); Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt1 != null && npt1.EndChar > tt.EndChar) { t1 = (t = (tt = npt1.EndToken)); } if (internals.Count == 0) { name.EndToken = t1; } continue; } if ((tt is Pullenti.Ner.NumberToken) && tt.Previous != null) { if (tt.Previous.IsValue("USB", null)) { t1 = (t = tt); if (internals.Count == 0) { name.EndToken = t1; } for (Pullenti.Ner.Token ttt = tt.Next; ttt != null; ttt = ttt.Next) { if (ttt.IsWhitespaceBefore) { break; } if (ttt.IsCharOf(",:")) { break; } t1 = (t = (tt = ttt)); if (internals.Count == 0) { name.EndToken = t1; } } continue; } } NumbersWithUnitToken mt0 = NumbersWithUnitToken.TryParse(tt, addUnits, false, false, false, false); if (mt0 != null) { Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.ParseNumericAsAdjective | Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition, 0, null); if (npt1 != null && npt1.EndChar > mt0.EndChar) { t1 = (t = (tt = npt1.EndToken)); if (internals.Count == 0) { name.EndToken = t1; } continue; } break; } if (((tt.IsComma || tt.IsChar('('))) && tt.Next != null) { www = NumbersWithUnitToken._tryParseWHL(tt.Next); if (www != null) { whd = www; t1 = (t = (tt = www.EndToken)); if (tt.Next != null && tt.Next.IsComma) { t1 = (tt = tt.Next); } if (tt.Next != null && tt.Next.IsChar(')')) { t1 = (tt = tt.Next); continue; } } List <UnitToken> uu = UnitToken.TryParseList(tt.Next, addUnits, false); if (uu != null) { t1 = (t = uu[uu.Count - 1].EndToken); units = uu; if (tt.IsChar('(') && t1.Next != null && t1.Next.IsChar(')')) { t1 = (t = (tt = t1.Next)); continue; } else if (t1.Next != null && t1.Next.IsChar('(')) { uu = UnitToken.TryParseList(t1.Next.Next, addUnits, false); if (uu != null && uu[uu.Count - 1].EndToken.Next != null && uu[uu.Count - 1].EndToken.Next.IsChar(')')) { units2 = uu; t1 = (t = (tt = uu[uu.Count - 1].EndToken.Next)); continue; } www = NumbersWithUnitToken._tryParseWHL(t1.Next); if (www != null) { whd = www; t1 = (t = (tt = www.EndToken)); continue; } } if (uu != null && uu.Count > 0 && !uu[0].IsDoubt) { break; } if (t1.Next != null) { if (t1.Next.IsTableControlChar || t1.IsNewlineAfter) { break; } } units = null; } } if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(tt, false, false) && !(tt.Next is Pullenti.Ner.NumberToken)) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(tt, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { t1 = (t = (tt = br.EndToken)); continue; } } if (tt.IsValue("НЕ", null) && tt.Next != null) { Pullenti.Morph.MorphClass mc = tt.Next.GetMorphClassInDictionary(); if (mc.IsAdverb || mc.IsMisc) { break; } continue; } if (tt.IsValue("ЯМЗ", null)) { } Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition | Pullenti.Ner.Core.NounPhraseParseAttr.IgnoreBrackets | Pullenti.Ner.Core.NounPhraseParseAttr.ParsePronouns, 0, null); if (npt2 == null) { if (tt.Morph.Class.IsPreposition || tt.Morph.Class.IsConjunction) { Pullenti.Ner.Core.TerminToken to = NumbersWithUnitToken.m_Termins.TryParse(tt, Pullenti.Ner.Core.TerminParseAttr.No); if (to != null) { if ((to.EndToken.Next is Pullenti.Ner.TextToken) && to.EndToken.Next.IsLetters) { } else { break; } } t1 = tt; continue; } Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary(); if (((tt is Pullenti.Ner.TextToken) && tt.Chars.IsLetter && tt.LengthChar > 1) && (((tt.Chars.IsAllUpper || mc.IsAdverb || mc.IsUndefined) || mc.IsAdjective))) { List <UnitToken> uu = UnitToken.TryParseList(tt, addUnits, false); if (uu != null) { if (uu[0].LengthChar > 1 || uu.Count > 1) { units = uu; t1 = (t = uu[uu.Count - 1].EndToken); break; } } t1 = (t = tt); if (internals.Count == 0) { name.EndToken = tt; } continue; } if (tt.IsComma) { continue; } if (tt.IsChar('.')) { if (!Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(tt.Next)) { continue; } List <UnitToken> uu = UnitToken.TryParseList(tt.Next, addUnits, false); if (uu != null) { if (uu[0].LengthChar > 2 || uu.Count > 1) { units = uu; t1 = (t = uu[uu.Count - 1].EndToken); break; } } } break; } t1 = (t = (tt = npt2.EndToken)); if (internals.Count > 0) { } else if (t.IsValue("ПРЕДЕЛ", null) || t.IsValue("ГРАНИЦА", null) || t.IsValue("ДИАПАЗОН", null)) { } else if (t.Chars.IsLetter) { name.EndToken = t1; } } Pullenti.Ner.Token t11 = t1; for (t1 = t1.Next; t1 != null; t1 = t1.Next) { if (t1.IsTableControlChar) { } else if (t1.IsCharOf(":,_")) { if (isResctriction) { return(null); } Pullenti.Ner.MetaToken www = NumbersWithUnitToken._tryParseWHL(t1.Next); if (www != null) { whd = www; t1 = (t = www.EndToken); continue; } List <UnitToken> uu = UnitToken.TryParseList(t1.Next, addUnits, false); if (uu != null) { if (uu[0].LengthChar > 1 || uu.Count > 1) { units = uu; t1 = (t = uu[uu.Count - 1].EndToken); continue; } } if (t1.IsChar(':')) { List <MeasureToken> li = new List <MeasureToken>(); for (Pullenti.Ner.Token ttt = t1.Next; ttt != null; ttt = ttt.Next) { if (ttt.IsHiphen || ttt.IsTableControlChar) { continue; } if ((ttt is Pullenti.Ner.TextToken) && !ttt.Chars.IsLetter) { continue; } MeasureToken mt1 = TryParse(ttt, addUnits, true, true, false, true); if (mt1 == null) { break; } li.Add(mt1); ttt = mt1.EndToken; if (ttt.Next != null && ttt.Next.IsChar(';')) { ttt = ttt.Next; } if (ttt.IsChar(';')) { } else if (ttt.IsNewlineAfter && mt1.IsNewlineBefore) { } else { break; } } if (li.Count > 1) { MeasureToken res0 = new MeasureToken(t0, li[li.Count - 1].EndToken) { Internals = li, IsEmpty = true }; if (internals != null && internals.Count > 0) { res0.InternalEx = internals[0]; } string nam = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(name, Pullenti.Ner.Core.GetTextAttr.FirstNounGroupToNominative); li[0].BeginToken = t0; foreach (MeasureToken v in li) { v.Name = string.Format("{0} ({1})", nam, v.Name ?? "").Trim(); if (v.Nums != null && v.Nums.Units.Count == 0 && units != null) { v.Nums.Units = units; } } return(res0); } } } else if (t1.IsHiphen && t1.IsWhitespaceAfter && t1.IsWhitespaceBefore) { } else if (t1.IsHiphen && t1.Next != null && t1.Next.IsChar('(')) { } else { break; } } if (t1 == null) { return(null); } List <NumbersWithUnitToken> mts = NumbersWithUnitToken.TryParseMulti(t1, addUnits, false, not, true, isResctriction); if (mts == null) { if (units != null && units.Count > 0) { if (t1 == null || t1.Previous.IsChar(':')) { mts = new List <NumbersWithUnitToken>(); if (t1 == null) { for (t1 = t11; t1 != null && t1.Next != null; t1 = t1.Next) { } } else { t1 = t1.Previous; } mts.Add(new NumbersWithUnitToken(t0, t1) { SingleVal = double.NaN }); } } if (mts == null) { return(null); } } NumbersWithUnitToken mt = mts[0]; if (mt.BeginToken == mt.EndToken && !(mt.BeginToken is Pullenti.Ner.NumberToken)) { return(null); } if (!isSubval && name.BeginToken.Morph.Class.IsPreposition) { name.BeginToken = name.BeginToken.Next; } if (mt.WHL != null) { whd = mt.WHL; } for (int kk = 0; kk < 10; kk++) { if (whd != null && whd.EndToken == name.EndToken) { name.EndToken = whd.BeginToken.Previous; continue; } if (units != null) { if (units[units.Count - 1].EndToken == name.EndToken) { name.EndToken = units[0].BeginToken.Previous; continue; } } break; } if (mts.Count > 1 && internals.Count == 0) { if (mt.Units.Count == 0) { if (units != null) { foreach (NumbersWithUnitToken m in mts) { m.Units = units; } } } MeasureToken res1 = new MeasureToken(t0, mts[mts.Count - 1].EndToken) { Morph = name.Morph, Reliable = true }; res1.Name = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(name, Pullenti.Ner.Core.GetTextAttr.FirstNounGroupToNominative); for (int k = 0; k < mts.Count; k++) { MeasureToken ttt = new MeasureToken(mts[k].BeginToken, mts[k].EndToken) { Nums = mts[k] }; if (whd != null) { List <string> nams = whd.Tag as List <string>; if (k < nams.Count) { ttt.Name = nams[k]; } } res1.Internals.Add(ttt); } Pullenti.Ner.Token tt1 = res1.EndToken.Next; if (tt1 != null && tt1.IsChar('±')) { NumbersWithUnitToken nn = NumbersWithUnitToken._tryParse(tt1, addUnits, true, false, false); if (nn != null && nn.PlusMinusPercent) { res1.EndToken = nn.EndToken; res1.Nums = nn; if (nn.Units.Count > 0 && units == null && mt.Units.Count == 0) { foreach (NumbersWithUnitToken m in mts) { m.Units = nn.Units; } } } } return(res1); } if (!mt.IsWhitespaceBefore) { if (mt.BeginToken.Previous == null) { return(null); } if (mt.BeginToken.Previous.IsCharOf(":),") || mt.BeginToken.Previous.IsTableControlChar || mt.BeginToken.Previous.IsValue("IP", null)) { } else if (mt.BeginToken.IsHiphen && mt.Units.Count > 0 && !mt.Units[0].IsDoubt) { } else { return(null); } } if (mt.Units.Count == 0 && units != null) { mt.Units = units; if (mt.DivNum != null && units.Count > 1 && mt.DivNum.Units.Count == 0) { for (int i = 1; i < units.Count; i++) { if (units[i].Pow == -1) { for (int j = i; j < units.Count; j++) { mt.DivNum.Units.Add(units[j]); units[j].Pow = -units[j].Pow; } mt.Units.RemoveRange(i, units.Count - i); break; } } } } if ((minmax < 0) && mt.SingleVal != null) { mt.FromVal = mt.SingleVal; mt.FromInclude = true; mt.SingleVal = null; } if (minmax > 0 && mt.SingleVal != null) { mt.ToVal = mt.SingleVal; mt.ToInclude = true; mt.SingleVal = null; } if (mt.Units.Count == 0) { units = UnitToken.TryParseList(mt.EndToken.Next, addUnits, true); if (units == null) { if (canUnitsAbsent) { } else { return(null); } } else { mt.Units = units; } } MeasureToken res = new MeasureToken(t0, mt.EndToken) { Morph = name.Morph, Internals = internals }; if (((!t0.IsWhitespaceBefore && t0.Previous != null && t0 == name.BeginToken) && t0.Previous.IsHiphen && !t0.Previous.IsWhitespaceBefore) && (t0.Previous.Previous is Pullenti.Ner.TextToken)) { name.BeginToken = (res.BeginToken = name.BeginToken.Previous.Previous); } res.Name = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(name, (!isSubval ? Pullenti.Ner.Core.GetTextAttr.FirstNounGroupToNominative : Pullenti.Ner.Core.GetTextAttr.No)); res.Nums = mt; foreach (UnitToken u in res.Nums.Units) { if (u.Keyword != null) { if (u.Keyword.BeginChar >= res.BeginChar) { res.Reliable = true; } } } res._parseInternals(addUnits); if (res.Internals.Count > 0 || !canBeSet) { return(res); } t1 = res.EndToken.Next; if (t1 != null && t1.IsCommaAnd) { t1 = t1.Next; } List <NumbersWithUnitToken> mts1 = NumbersWithUnitToken.TryParseMulti(t1, addUnits, false, false, false, false); if ((mts1 != null && mts1.Count == 1 && (t1.WhitespacesBeforeCount < 3)) && mts1[0].Units.Count > 0 && !UnitToken.CanBeEquals(mts[0].Units, mts1[0].Units)) { res.IsSet = true; res.Nums = null; res.Internals.Add(new MeasureToken(mt.BeginToken, mt.EndToken) { Nums = mt }); res.Internals.Add(new MeasureToken(mts1[0].BeginToken, mts1[0].EndToken) { Nums = mts1[0] }); res.EndToken = mts1[0].EndToken; } return(res); }
public static PhoneItemToken TryAttach(Pullenti.Ner.Token t0) { PhoneItemToken res = _TryAttach(t0); if (res == null) { return(null); } if (res.ItemType != PhoneItemType.Prefix) { return(res); } for (Pullenti.Ner.Token t = res.EndToken.Next; t != null; t = t.Next) { if (t.IsTableControlChar) { break; } if (t.IsNewlineBefore) { break; } PhoneItemToken res2 = _TryAttach(t); if (res2 != null) { if (res2.ItemType == PhoneItemType.Prefix) { if (res.Kind == Pullenti.Ner.Phone.PhoneKind.Undefined) { res.Kind = res2.Kind; } t = (res.EndToken = res2.EndToken); continue; } break; } if (t.IsChar(':')) { res.EndToken = t; break; } if (!(t is Pullenti.Ner.TextToken)) { break; } if (t0.LengthChar == 1) { break; } Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { t = npt.EndToken; if (t.IsValue("ПОСЕЛЕНИЕ", null)) { return(null); } res.EndToken = t; continue; } if (t.GetMorphClassInDictionary().IsProper) { res.EndToken = t; continue; } if (t.Morph.Class.IsPreposition) { continue; } break; } return(res); }
public static BlockLine Create(Pullenti.Ner.Token t, Pullenti.Ner.Core.TerminCollection names) { if (t == null) { return(null); } BlockLine res = new BlockLine(t, t); for (Pullenti.Ner.Token tt = t; tt != null; tt = tt.Next) { if (tt != t && tt.IsNewlineBefore) { break; } else { res.EndToken = tt; } } int nums = 0; while (t != null && t.Next != null && t.EndChar <= res.EndChar) { if (t is Pullenti.Ner.NumberToken) { } else { Pullenti.Ner.NumberToken rom = Pullenti.Ner.Core.NumberHelper.TryParseRoman(t); if (rom != null && rom.EndToken.Next != null) { t = rom.EndToken; } else { break; } } if (t.Next.IsChar('.')) { } else if ((t.Next is Pullenti.Ner.TextToken) && !t.Next.Chars.IsAllLower) { } else { break; } res.NumberEnd = t; t = t.Next; if (t.IsChar('.') && t.Next != null) { res.NumberEnd = t; t = t.Next; } if (t.IsNewlineBefore) { return(res); } nums++; } Pullenti.Ner.Core.TerminToken tok = m_Ontology.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok == null) { Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt1 != null && npt1.EndToken != npt1.BeginToken) { tok = m_Ontology.TryParse(npt1.Noun.BeginToken, Pullenti.Ner.Core.TerminParseAttr.No); } } if (tok != null) { if (t.Previous != null && t.Previous.IsChar(':')) { tok = null; } } if (tok != null) { BlkTyps typ = (BlkTyps)tok.Termin.Tag; if (typ == BlkTyps.Conslusion) { if (t.IsNewlineAfter) { } else if (t.Next != null && t.Next.Morph.Class.IsPreposition && t.Next.Next != null) { Pullenti.Ner.Core.TerminToken tok2 = m_Ontology.TryParse(t.Next.Next, Pullenti.Ner.Core.TerminParseAttr.No); if (tok2 != null && ((BlkTyps)tok2.Termin.Tag) == BlkTyps.Chapter) { } else { tok = null; } } else { tok = null; } } if (t.Kit.BaseLanguage != t.Morph.Language) { tok = null; } if (typ == BlkTyps.Index && !t.IsValue("ОГЛАВЛЕНИЕ", null)) { if (!t.IsNewlineAfter && t.Next != null) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && npt.IsNewlineAfter && npt.Morph.Case.IsGenitive) { tok = null; } else if (npt == null) { tok = null; } } } if ((typ == BlkTyps.Intro && tok != null && !tok.IsNewlineAfter) && t.IsValue("ВВЕДЕНИЕ", null)) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && npt.Morph.Case.IsGenitive) { tok = null; } } if (tok != null) { if (res.NumberEnd == null) { res.NumberEnd = tok.EndToken; if (res.NumberEnd.EndChar > res.EndChar) { res.EndToken = res.NumberEnd; } } res.Typ = typ; t = tok.EndToken; if (t.Next != null && t.Next.IsCharOf(":.")) { t = t.Next; res.EndToken = t; } if (t.IsNewlineAfter || t.Next == null) { return(res); } t = t.Next; } } if (t.IsChar('§') && (t.Next is Pullenti.Ner.NumberToken)) { res.Typ = BlkTyps.Chapter; res.NumberEnd = t; t = t.Next; } if (names != null) { Pullenti.Ner.Core.TerminToken tok2 = names.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok2 != null && tok2.EndToken.IsNewlineAfter) { res.EndToken = tok2.EndToken; res.IsExistName = true; if (res.Typ == BlkTyps.Undefined) { BlockLine li2 = Create((res.NumberEnd == null ? null : res.NumberEnd.Next), null); if (li2 != null && ((li2.Typ == BlkTyps.Literature || li2.Typ == BlkTyps.Intro || li2.Typ == BlkTyps.Conslusion))) { res.Typ = li2.Typ; } else { res.Typ = BlkTyps.Chapter; } } return(res); } } Pullenti.Ner.Token t1 = res.EndToken; if ((((t1 is Pullenti.Ner.NumberToken) || t1.IsChar('.'))) && t1.Previous != null) { t1 = t1.Previous; if (t1.IsChar('.')) { res.HasContentItemTail = true; for (; t1 != null && t1.BeginChar > res.BeginChar; t1 = t1.Previous) { if (!t1.IsChar('.')) { break; } } } } res.IsAllUpper = true; for (; t != null && t.EndChar <= t1.EndChar; t = t.Next) { if (!(t is Pullenti.Ner.TextToken) || !t.Chars.IsLetter) { res.NotWords++; } else { Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary(); if (mc.IsUndefined) { res.NotWords++; } else if (t.LengthChar > 2) { res.Words++; } if (!t.Chars.IsAllUpper) { res.IsAllUpper = false; } if ((t as Pullenti.Ner.TextToken).IsPureVerb) { if (!(t as Pullenti.Ner.TextToken).Term.EndsWith("ING")) { res.HasVerb = true; } } } } if (res.Typ == BlkTyps.Undefined) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse((res.NumberEnd == null ? res.BeginToken : res.NumberEnd.Next), Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { if (npt.Noun.IsValue("ХАРАКТЕРИСТИКА", null) || npt.Noun.IsValue("СОДЕРЖАНИЕ", "ЗМІСТ")) { bool ok = true; for (Pullenti.Ner.Token tt = npt.EndToken.Next; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next) { if (tt.IsChar('.')) { continue; } Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt2 == null || !npt2.Morph.Case.IsGenitive) { ok = false; break; } tt = npt2.EndToken; if (tt.EndChar > res.EndChar) { res.EndToken = tt; if (!tt.IsNewlineAfter) { for (; res.EndToken.Next != null; res.EndToken = res.EndToken.Next) { if (res.EndToken.IsNewlineAfter) { break; } } } } } if (ok) { res.Typ = BlkTyps.Intro; res.IsExistName = true; } } else if (npt.Noun.IsValue("ВЫВОД", "ВИСНОВОК") || npt.Noun.IsValue("РЕЗУЛЬТАТ", "ДОСЛІДЖЕННЯ")) { bool ok = true; for (Pullenti.Ner.Token tt = npt.EndToken.Next; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next) { if (tt.IsCharOf(",.") || tt.IsAnd) { continue; } Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt1 != null) { if (npt1.Noun.IsValue("РЕЗУЛЬТАТ", "ДОСЛІДЖЕННЯ") || npt1.Noun.IsValue("РЕКОМЕНДАЦИЯ", "РЕКОМЕНДАЦІЯ") || npt1.Noun.IsValue("ИССЛЕДОВАНИЕ", "ДОСЛІДЖЕННЯ")) { tt = npt1.EndToken; if (tt.EndChar > res.EndChar) { res.EndToken = tt; if (!tt.IsNewlineAfter) { for (; res.EndToken.Next != null; res.EndToken = res.EndToken.Next) { if (res.EndToken.IsNewlineAfter) { break; } } } } continue; } } ok = false; break; } if (ok) { res.Typ = BlkTyps.Conslusion; res.IsExistName = true; } } if (res.Typ == BlkTyps.Undefined && npt != null && npt.EndChar <= res.EndChar) { bool ok = false; int publ = 0; if (_isPub(npt)) { ok = true; publ = 1; } else if ((npt.Noun.IsValue("СПИСОК", null) || npt.Noun.IsValue("УКАЗАТЕЛЬ", "ПОКАЖЧИК") || npt.Noun.IsValue("ПОЛОЖЕНИЕ", "ПОЛОЖЕННЯ")) || npt.Noun.IsValue("ВЫВОД", "ВИСНОВОК") || npt.Noun.IsValue("РЕЗУЛЬТАТ", "ДОСЛІДЖЕННЯ")) { if (npt.EndChar == res.EndChar) { return(null); } ok = true; } if (ok) { if (npt.BeginToken == npt.EndToken && npt.Noun.IsValue("СПИСОК", null) && npt.EndChar == res.EndChar) { ok = false; } for (Pullenti.Ner.Token tt = npt.EndToken.Next; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next) { if (tt.IsCharOf(",.:") || tt.IsAnd || tt.Morph.Class.IsPreposition) { continue; } if (tt.IsValue("ОТРАЖЕНЫ", "ВІДОБРАЖЕНІ")) { continue; } npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt == null) { ok = false; break; } if (((_isPub(npt) || npt.Noun.IsValue("РАБОТА", "РОБОТА") || npt.Noun.IsValue("ИССЛЕДОВАНИЕ", "ДОСЛІДЖЕННЯ")) || npt.Noun.IsValue("АВТОР", null) || npt.Noun.IsValue("ТРУД", "ПРАЦЯ")) || npt.Noun.IsValue("ТЕМА", null) || npt.Noun.IsValue("ДИССЕРТАЦИЯ", "ДИСЕРТАЦІЯ")) { tt = npt.EndToken; if (_isPub(npt)) { publ++; } if (tt.EndChar > res.EndChar) { res.EndToken = tt; if (!tt.IsNewlineAfter) { for (; res.EndToken.Next != null; res.EndToken = res.EndToken.Next) { if (res.EndToken.IsNewlineAfter) { break; } } } } continue; } ok = false; break; } if (ok) { res.Typ = BlkTyps.Literature; res.IsExistName = true; if (publ == 0 && (res.EndChar < (((res.Kit.Sofa.Text.Length * 2) / 3)))) { if (res.NumberEnd != null) { res.Typ = BlkTyps.Misc; } else { res.Typ = BlkTyps.Undefined; } } } } } } } return(res); }
// Основная функция выделения телефонов public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this); bool hasDenoms = false; foreach (Pullenti.Ner.Analyzer a in kit.Processor.Analyzers) { if ((a is Pullenti.Ner.Denomination.DenominationAnalyzer) && !a.IgnoreThisAnalyzer) { hasDenoms = true; } } if (!hasDenoms) { Pullenti.Ner.Denomination.DenominationAnalyzer a = new Pullenti.Ner.Denomination.DenominationAnalyzer(); a.Process(kit); } List <KeywordReferent> li = new List <KeywordReferent>(); StringBuilder tmp = new StringBuilder(); List <string> tmp2 = new List <string>(); int max = 0; for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { max++; } int cur = 0; for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next, cur++) { Pullenti.Ner.Referent r = t.GetReferent(); if (r != null) { t = this._addReferents(ad, t, cur, max); continue; } if (!(t is Pullenti.Ner.TextToken)) { continue; } if (!t.Chars.IsLetter || (t.LengthChar < 3)) { continue; } string term = (t as Pullenti.Ner.TextToken).Term; if (term == "ЕСТЬ") { if ((t.Previous is Pullenti.Ner.TextToken) && t.Previous.Morph.Class.IsVerb) { } else { continue; } } Pullenti.Ner.Core.NounPhraseToken npt = null; npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.AdjectiveCanBeLast | Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition, 0, null); if (npt == null) { Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary(); if (mc.IsVerb && !mc.IsPreposition) { if ((t as Pullenti.Ner.TextToken).IsVerbBe) { continue; } if (t.IsValue("МОЧЬ", null) || t.IsValue("WOULD", null)) { continue; } KeywordReferent kref = new KeywordReferent() { Typ = KeywordType.Predicate }; string norm = t.GetNormalCaseText(Pullenti.Morph.MorphClass.Verb, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false); if (norm == null) { norm = (t as Pullenti.Ner.TextToken).Lemma; } if (norm.EndsWith("ЬСЯ")) { norm = norm.Substring(0, norm.Length - 2); } kref.AddSlot(KeywordReferent.ATTR_VALUE, norm, false, 0); List <Pullenti.Semantic.Utils.DerivateGroup> drv = Pullenti.Semantic.Utils.DerivateService.FindDerivates(norm, true, t.Morph.Language); _addNormals(kref, drv, norm); kref = ad.RegisterReferent(kref) as KeywordReferent; _setRank(kref, cur, max); Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(ad.RegisterReferent(kref), t, t) { Morph = t.Morph }; kit.EmbedToken(rt1); t = rt1; continue; } continue; } if (npt.InternalNoun != null) { continue; } if (npt.EndToken.IsValue("ЦЕЛОМ", null) || npt.EndToken.IsValue("ЧАСТНОСТИ", null)) { if (npt.Preposition != null) { t = npt.EndToken; continue; } } if (npt.EndToken.IsValue("СТОРОНЫ", null) && npt.Preposition != null && npt.Preposition.Normal == "С") { t = npt.EndToken; continue; } if (npt.BeginToken == npt.EndToken) { Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary(); if (mc.IsPreposition) { continue; } else if (mc.IsAdverb) { if (t.IsValue("ПОТОМ", null)) { continue; } } } else { } li.Clear(); Pullenti.Ner.Token t0 = t; for (Pullenti.Ner.Token tt = t; tt != null && tt.EndChar <= npt.EndChar; tt = tt.Next) { if (!(tt is Pullenti.Ner.TextToken)) { continue; } if (tt.IsValue("NATURAL", null)) { } if ((tt.LengthChar < 3) || !tt.Chars.IsLetter) { continue; } Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary(); if ((mc.IsPreposition || mc.IsPronoun || mc.IsPersonalPronoun) || mc.IsConjunction) { if (tt.IsValue("ОТНОШЕНИЕ", null)) { } else { continue; } } if (mc.IsMisc) { if (Pullenti.Ner.Core.MiscHelper.IsEngArticle(tt)) { continue; } } KeywordReferent kref = new KeywordReferent() { Typ = KeywordType.Object }; string norm = (tt as Pullenti.Ner.TextToken).Lemma; kref.AddSlot(KeywordReferent.ATTR_VALUE, norm, false, 0); if (norm != "ЕСТЬ") { List <Pullenti.Semantic.Utils.DerivateGroup> drv = Pullenti.Semantic.Utils.DerivateService.FindDerivates(norm, true, tt.Morph.Language); _addNormals(kref, drv, norm); } kref = ad.RegisterReferent(kref) as KeywordReferent; _setRank(kref, cur, max); Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(kref, tt, tt) { Morph = tt.Morph }; kit.EmbedToken(rt1); if (tt == t && li.Count == 0) { t0 = rt1; } t = rt1; li.Add(kref); } if (li.Count > 1) { KeywordReferent kref = new KeywordReferent() { Typ = KeywordType.Object }; tmp.Length = 0; tmp2.Clear(); bool hasNorm = false; foreach (KeywordReferent kw in li) { string s = kw.GetStringValue(KeywordReferent.ATTR_VALUE); if (tmp.Length > 0) { tmp.Append(' '); } tmp.Append(s); string n = kw.GetStringValue(KeywordReferent.ATTR_NORMAL); if (n != null) { hasNorm = true; tmp2.Add(n); } else { tmp2.Add(s); } kref.AddSlot(KeywordReferent.ATTR_REF, kw, false, 0); } string val = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false); kref.AddSlot(KeywordReferent.ATTR_VALUE, val, false, 0); tmp.Length = 0; tmp2.Sort(); foreach (string s in tmp2) { if (tmp.Length > 0) { tmp.Append(' '); } tmp.Append(s); } string norm = tmp.ToString(); if (norm != val) { kref.AddSlot(KeywordReferent.ATTR_NORMAL, norm, false, 0); } kref = ad.RegisterReferent(kref) as KeywordReferent; _setRank(kref, cur, max); Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(kref, t0, t) { Morph = npt.Morph }; kit.EmbedToken(rt1); t = rt1; } } cur = 0; for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next, cur++) { KeywordReferent kw = t.GetReferent() as KeywordReferent; if (kw == null || kw.Typ != KeywordType.Object) { continue; } if (t.Next == null || kw.ChildWords > 2) { continue; } Pullenti.Ner.Token t1 = t.Next; if (t1.IsValue("OF", null) && (t1.WhitespacesAfterCount < 3) && t1.Next != null) { t1 = t1.Next; if ((t1 is Pullenti.Ner.TextToken) && Pullenti.Ner.Core.MiscHelper.IsEngArticle(t1) && t1.Next != null) { t1 = t1.Next; } } else if (!t1.Morph.Case.IsGenitive || t.WhitespacesAfterCount > 1) { continue; } KeywordReferent kw2 = t1.GetReferent() as KeywordReferent; if (kw2 == null) { continue; } if (kw == kw2) { continue; } if (kw2.Typ != KeywordType.Object || (kw.ChildWords + kw2.ChildWords) > 3) { continue; } KeywordReferent kwUn = new KeywordReferent(); kwUn.Union(kw, kw2, Pullenti.Ner.Core.MiscHelper.GetTextValue(t1, t1, Pullenti.Ner.Core.GetTextAttr.No)); kwUn = ad.RegisterReferent(kwUn) as KeywordReferent; _setRank(kwUn, cur, max); Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(kwUn, t, t1) { Morph = t.Morph }; kit.EmbedToken(rt1); t = rt1; } if (SortKeywordsByRank) { List <Pullenti.Ner.Referent> all = new List <Pullenti.Ner.Referent>(ad.Referents); all.Sort(new CompByRank()); ad.Referents = all; } if (AnnotationMaxSentences > 0) { KeywordReferent ano = Pullenti.Ner.Keyword.Internal.AutoannoSentToken.CreateAnnotation(kit, AnnotationMaxSentences); if (ano != null) { ad.RegisterReferent(ano); } } }
public static UnitToken TryParse(Pullenti.Ner.Token t, Pullenti.Ner.Core.TerminCollection addUnits, UnitToken prev, bool parseUnknownUnits = false) { if (t == null) { return(null); } Pullenti.Ner.Token t0 = t; int pow = 1; bool isNeg = false; if ((t.IsCharOf("\\/") || t.IsValue("НА", null) || t.IsValue("OF", null)) || t.IsValue("PER", null)) { isNeg = true; t = t.Next; } else if (t.IsValue("В", null) && prev != null) { isNeg = true; t = t.Next; } else if (MeasureHelper.IsMultChar(t)) { t = t.Next; } Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; if (tt == null) { return(null); } if (tt.Term == "КВ" || tt.Term == "КВАДР" || tt.IsValue("КВАДРАТНЫЙ", null)) { pow = 2; tt = tt.Next as Pullenti.Ner.TextToken; if (tt != null && tt.IsChar('.')) { tt = tt.Next as Pullenti.Ner.TextToken; } if (tt == null) { return(null); } } else if (tt.Term == "КУБ" || tt.Term == "КУБИЧ" || tt.IsValue("КУБИЧЕСКИЙ", null)) { pow = 3; tt = tt.Next as Pullenti.Ner.TextToken; if (tt != null && tt.IsChar('.')) { tt = tt.Next as Pullenti.Ner.TextToken; } if (tt == null) { return(null); } } else if (tt.Term == "µ") { UnitToken res = TryParse(tt.Next, addUnits, prev, false); if (res != null) { foreach (Unit u in UnitsHelper.Units) { if (u.Factor == UnitsFactors.Micro && string.Compare("мк" + u.NameCyr, res.Unit.NameCyr, true) == 0) { res.Unit = u; res.BeginToken = tt; res.Pow = pow; if (isNeg) { res.Pow = -pow; } return(res); } } } } List <Pullenti.Ner.Core.TerminToken> toks = UnitsHelper.Termins.TryParseAll(tt, Pullenti.Ner.Core.TerminParseAttr.No); if (toks != null) { if ((prev != null && tt == t0 && toks.Count == 1) && t.IsWhitespaceBefore) { return(null); } if (toks[0].BeginToken == toks[0].EndToken && tt.Morph.Class.IsPreposition && (tt.WhitespacesAfterCount < 3)) { if (Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition, 0, null) != null) { return(null); } if (tt.Next is Pullenti.Ner.NumberToken) { if ((tt.Next as Pullenti.Ner.NumberToken).Typ != Pullenti.Ner.NumberSpellingType.Digit) { return(null); } } UnitToken nex = TryParse(tt.Next, addUnits, null, false); if (nex != null) { return(null); } } if (toks[0].BeginToken == toks[0].EndToken && ((toks[0].BeginToken.IsValue("М", null) || toks[0].BeginToken.IsValue("M", null))) && toks[0].BeginToken.Chars.IsAllLower) { if (prev != null && prev.Unit != null && prev.Unit.Kind == Pullenti.Ner.Measure.MeasureKind.Length) { UnitToken res = new UnitToken(t0, toks[0].EndToken) { Unit = UnitsHelper.uMinute }; res.Pow = pow; if (isNeg) { res.Pow = -pow; } return(res); } } List <UnitToken> uts = new List <UnitToken>(); foreach (Pullenti.Ner.Core.TerminToken tok in toks) { UnitToken res = new UnitToken(t0, tok.EndToken) { Unit = tok.Termin.Tag as Unit }; res.Pow = pow; if (isNeg) { res.Pow = -pow; } if (res.Unit.BaseMultiplier == 1000000 && (t0 is Pullenti.Ner.TextToken) && char.IsLower((t0 as Pullenti.Ner.TextToken).GetSourceText()[0])) { foreach (Unit u in UnitsHelper.Units) { if (u.Factor == UnitsFactors.Milli && string.Compare(u.NameCyr, res.Unit.NameCyr, true) == 0) { res.Unit = u; break; } } } res._correct(); res._checkDoubt(); uts.Add(res); } int max = 0; UnitToken best = null; foreach (UnitToken ut in uts) { if (ut.Keyword != null) { if (ut.Keyword.BeginChar >= max) { max = ut.Keyword.BeginChar; best = ut; } } } if (best != null) { return(best); } foreach (UnitToken ut in uts) { if (!ut.IsDoubt) { return(ut); } } return(uts[0]); } Pullenti.Ner.Token t1 = null; if (t.IsCharOf("º°")) { t1 = t; } else if ((t.IsChar('<') && t.Next != null && t.Next.Next != null) && t.Next.Next.IsChar('>') && ((t.Next.IsValue("О", null) || t.Next.IsValue("O", null) || (((t.Next is Pullenti.Ner.NumberToken) && (t.Next as Pullenti.Ner.NumberToken).Value == "0"))))) { t1 = t.Next.Next; } if (t1 != null) { UnitToken res = new UnitToken(t0, t1) { Unit = UnitsHelper.uGradus }; res._checkDoubt(); t = t1.Next; if (t != null && t.IsComma) { t = t.Next; } if (t != null && t.IsValue("ПО", null)) { t = t.Next; } if (t is Pullenti.Ner.TextToken) { string vv = (t as Pullenti.Ner.TextToken).Term; if (vv == "C" || vv == "С" || vv.StartsWith("ЦЕЛЬС")) { res.Unit = UnitsHelper.uGradusC; res.IsDoubt = false; res.EndToken = t; } if (vv == "F" || vv.StartsWith("ФАР")) { res.Unit = UnitsHelper.uGradusF; res.IsDoubt = false; res.EndToken = t; } } return(res); } if ((t is Pullenti.Ner.TextToken) && ((t.IsValue("ОС", null) || t.IsValue("OC", null)))) { string str = t.GetSourceText(); if (str == "оС" || str == "oC") { UnitToken res = new UnitToken(t, t) { Unit = UnitsHelper.uGradusC, IsDoubt = false }; return(res); } } if (t.IsChar('%')) { Pullenti.Ner.Token tt1 = t.Next; if (tt1 != null && tt1.IsChar('(')) { tt1 = tt1.Next; } if ((tt1 is Pullenti.Ner.TextToken) && (tt1 as Pullenti.Ner.TextToken).Term.StartsWith("ОБ")) { UnitToken re = new UnitToken(t, tt1) { Unit = UnitsHelper.uAlco }; if (re.EndToken.Next != null && re.EndToken.Next.IsChar('.')) { re.EndToken = re.EndToken.Next; } if (re.EndToken.Next != null && re.EndToken.Next.IsChar(')') && t.Next.IsChar('(')) { re.EndToken = re.EndToken.Next; } return(re); } return(new UnitToken(t, t) { Unit = UnitsHelper.uPercent }); } if (addUnits != null) { Pullenti.Ner.Core.TerminToken tok = addUnits.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { UnitToken res = new UnitToken(t0, tok.EndToken) { ExtOnto = tok.Termin.Tag as Pullenti.Ner.Measure.UnitReferent }; if (tok.EndToken.Next != null && tok.EndToken.Next.IsChar('.')) { tok.EndToken = tok.EndToken.Next; } res.Pow = pow; if (isNeg) { res.Pow = -pow; } res._correct(); return(res); } } if (!parseUnknownUnits) { return(null); } if ((t.WhitespacesBeforeCount > 2 || !t.Chars.IsLetter || t.LengthChar > 5) || !(t is Pullenti.Ner.TextToken)) { return(null); } if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t)) { return(null); } t1 = t; if (t.Next != null && t.Next.IsChar('.')) { t1 = t; } bool ok = false; if (t1.Next == null || t1.WhitespacesAfterCount > 2) { ok = true; } else if (t1.Next.IsComma || t1.Next.IsCharOf("\\/") || t1.Next.IsTableControlChar) { ok = true; } else if (MeasureHelper.IsMultChar(t1.Next)) { ok = true; } if (!ok) { return(null); } Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary(); if (mc.IsUndefined) { } else if (t.LengthChar > 7) { return(null); } UnitToken res1 = new UnitToken(t0, t1) { Pow = pow, IsDoubt = true }; res1.UnknownName = (t as Pullenti.Ner.TextToken).GetSourceText(); res1._correct(); return(res1); }
static Pullenti.Ner.ReferentToken TryParseThesis(Pullenti.Ner.Token t) { if (t == null) { return(null); } Pullenti.Ner.Token t0 = t; Pullenti.Ner.Token tt = t; Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary(); Pullenti.Ner.MetaToken preamb = null; if (mc.IsConjunction) { return(null); } if (t.IsValue("LET", null)) { return(null); } if (mc.IsPreposition || mc.IsMisc || mc.IsAdverb) { if (!Pullenti.Ner.Core.MiscHelper.IsEngArticle(tt)) { for (tt = tt.Next; tt != null; tt = tt.Next) { if (tt.IsComma) { break; } if (tt.IsChar('(')) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(tt, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { tt = br.EndToken; continue; } } if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(tt)) { break; } Pullenti.Ner.Core.NounPhraseToken npt0 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.ParseNumericAsAdjective | Pullenti.Ner.Core.NounPhraseParseAttr.ReferentCanBeNoun, 0, null); if (npt0 != null) { tt = npt0.EndToken; continue; } if (tt.GetMorphClassInDictionary().IsVerb) { break; } } if (tt == null || !tt.IsComma || tt.Next == null) { return(null); } preamb = new Pullenti.Ner.MetaToken(t0, tt.Previous); tt = tt.Next; } } Pullenti.Ner.Token t1 = tt; mc = tt.GetMorphClassInDictionary(); Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.ParseNumericAsAdjective | Pullenti.Ner.Core.NounPhraseParseAttr.ReferentCanBeNoun | Pullenti.Ner.Core.NounPhraseParseAttr.ParseAdverbs, 0, null); if (npt == null && (tt is Pullenti.Ner.TextToken)) { if (tt.Chars.IsAllUpper) { npt = new Pullenti.Ner.Core.NounPhraseToken(tt, tt); } else if (!tt.Chars.IsAllLower) { if (mc.IsProper || preamb != null) { npt = new Pullenti.Ner.Core.NounPhraseToken(tt, tt); } } } if (npt == null) { return(null); } if (mc.IsPersonalPronoun) { return(null); } Pullenti.Ner.Token t2 = npt.EndToken.Next; if (t2 == null || Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t2) || !(t2 is Pullenti.Ner.TextToken)) { return(null); } if (!t2.GetMorphClassInDictionary().IsVerb) { return(null); } Pullenti.Ner.Token t3 = t2; for (tt = t2.Next; tt != null; tt = tt.Next) { if (!tt.GetMorphClassInDictionary().IsVerb) { break; } } for (; tt != null; tt = tt.Next) { if (tt.Next == null) { t3 = tt; break; } if (tt.IsCharOf(".;!?")) { if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(tt.Next)) { t3 = tt; break; } } if (!(tt is Pullenti.Ner.TextToken)) { continue; } if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(tt, false, false)) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(tt, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { tt = br.EndToken; continue; } } } tt = t3; if (t3.IsCharOf(";.!?")) { tt = tt.Previous; } string txt = Pullenti.Ner.Core.MiscHelper.GetTextValue(t2, tt, Pullenti.Ner.Core.GetTextAttr.KeepRegister | Pullenti.Ner.Core.GetTextAttr.KeepQuotes); if (txt == null || (txt.Length < 15)) { return(null); } if (t0 != t1) { tt = t1.Previous; if (tt.IsComma) { tt = tt.Previous; } string txt0 = Pullenti.Ner.Core.MiscHelper.GetTextValue(t0, tt, Pullenti.Ner.Core.GetTextAttr.KeepRegister | Pullenti.Ner.Core.GetTextAttr.KeepQuotes); if (txt0 != null && txt0.Length > 10) { if (t0.Chars.IsCapitalUpper) { txt0 = char.ToLower(txt0[0]) + txt0.Substring(1); } txt = string.Format("{0}, {1}", txt, txt0); } } tt = t1; if (Pullenti.Ner.Core.MiscHelper.IsEngArticle(tt)) { tt = tt.Next; } string nam = Pullenti.Ner.Core.MiscHelper.GetTextValue(tt, t2.Previous, Pullenti.Ner.Core.GetTextAttr.KeepQuotes); if (nam.StartsWith("SO-CALLED")) { nam = nam.Substring(9).Trim(); } Pullenti.Ner.Definition.DefinitionReferent dr = new Pullenti.Ner.Definition.DefinitionReferent(); dr.Kind = Pullenti.Ner.Definition.DefinitionKind.Assertation; dr.AddSlot(Pullenti.Ner.Definition.DefinitionReferent.ATTR_TERMIN, nam, false, 0); dr.AddSlot(Pullenti.Ner.Definition.DefinitionReferent.ATTR_VALUE, txt, false, 0); return(new Pullenti.Ner.ReferentToken(dr, t0, t3)); }