public static NounPhraseItem TryParse(Pullenti.Ner.Token t, List <NounPhraseItem> items, Pullenti.Ner.Core.NounPhraseParseAttr attrs) { if (t == null) { return(null); } Pullenti.Ner.Token t0 = t; bool _canBeSurname = false; bool _isDoubtAdj = false; Pullenti.Ner.ReferentToken rt = t as Pullenti.Ner.ReferentToken; if (rt != null && rt.BeginToken == rt.EndToken && (rt.BeginToken is Pullenti.Ner.TextToken)) { NounPhraseItem res = TryParse(rt.BeginToken, items, attrs); if (res != null) { res.BeginToken = (res.EndToken = t); res.CanBeNoun = true; return(res); } } if (rt != null) { NounPhraseItem res = new NounPhraseItem(t, t); foreach (Pullenti.Morph.MorphBaseInfo m in t.Morph.Items) { NounPhraseItemTextVar v = new NounPhraseItemTextVar(m, null); v.NormalValue = t.GetReferent().ToString(); res.NounMorph.Add(v); } res.CanBeNoun = true; return(res); } if (t is Pullenti.Ner.NumberToken) { } bool hasLegalVerb = false; if (t is Pullenti.Ner.TextToken) { if (!t.Chars.IsLetter) { return(null); } string str = (t as Pullenti.Ner.TextToken).Term; if (str[str.Length - 1] == 'А' || str[str.Length - 1] == 'О') { foreach (Pullenti.Morph.MorphBaseInfo wf in t.Morph.Items) { if ((wf is Pullenti.Morph.MorphWordForm) && (wf as Pullenti.Morph.MorphWordForm).IsInDictionary) { if (wf.Class.IsVerb) { Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary(); if (!mc.IsNoun && ((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.IgnoreParticiples)) == Pullenti.Ner.Core.NounPhraseParseAttr.No) { if (!Pullenti.Morph.LanguageHelper.EndsWithEx(str, "ОГО", "ЕГО", null, null)) { return(null); } } hasLegalVerb = true; } if (wf.Class.IsAdverb) { if (t.Next == null || !t.Next.IsHiphen) { if ((str == "ВСЕГО" || str == "ДОМА" || str == "НЕСКОЛЬКО") || str == "МНОГО" || str == "ПОРЯДКА") { } else { return(null); } } } if (wf.Class.IsAdjective) { if (wf.ContainsAttr("к.ф.", null)) { if (t.GetMorphClassInDictionary() == Pullenti.Morph.MorphClass.Adjective) { } else { _isDoubtAdj = true; } } } } } } Pullenti.Morph.MorphClass mc0 = t.Morph.Class; if (mc0.IsProperSurname && !t.Chars.IsAllLower) { foreach (Pullenti.Morph.MorphBaseInfo wf in t.Morph.Items) { if (wf.Class.IsProperSurname && wf.Number != Pullenti.Morph.MorphNumber.Plural) { Pullenti.Morph.MorphWordForm wff = wf as Pullenti.Morph.MorphWordForm; if (wff == null) { continue; } string s = ((wff.NormalFull ?? wff.NormalCase)) ?? ""; if (Pullenti.Morph.LanguageHelper.EndsWithEx(s, "ИН", "ЕН", "ЫН", null)) { if (!wff.IsInDictionary) { _canBeSurname = true; } else { return(null); } } if (wff.IsInDictionary && Pullenti.Morph.LanguageHelper.EndsWith(s, "ОВ")) { _canBeSurname = true; } } } } if (mc0.IsProperName && !t.Chars.IsAllLower) { foreach (Pullenti.Morph.MorphBaseInfo wff in t.Morph.Items) { Pullenti.Morph.MorphWordForm wf = wff as Pullenti.Morph.MorphWordForm; if (wf == null) { continue; } if (wf.NormalCase == "ГОР") { continue; } if (wf.Class.IsProperName && wf.IsInDictionary) { if (wf.NormalCase == null || !wf.NormalCase.StartsWith("ЛЮБ")) { if (mc0.IsAdjective && t.Morph.ContainsAttr("неизм.", null)) { } else if (((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.ReferentCanBeNoun)) == Pullenti.Ner.Core.NounPhraseParseAttr.ReferentCanBeNoun) { } else { if (items == null || (items.Count < 1)) { return(null); } if (!items[0].IsStdAdjective) { return(null); } } } } } } if (mc0.IsAdjective && t.Morph.ItemsCount == 1) { if (t.Morph[0].ContainsAttr("в.ср.ст.", null)) { return(null); } } Pullenti.Morph.MorphClass mc1 = t.GetMorphClassInDictionary(); if (mc1 == Pullenti.Morph.MorphClass.Verb && t.Morph.Case.IsUndefined) { return(null); } if ((((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.IgnoreParticiples)) == Pullenti.Ner.Core.NounPhraseParseAttr.IgnoreParticiples && t.Morph.Class.IsVerb && !t.Morph.Class.IsNoun) && !t.Morph.Class.IsProper) { foreach (Pullenti.Morph.MorphBaseInfo wf in t.Morph.Items) { if (wf.Class.IsVerb) { if (wf.ContainsAttr("дейст.з.", null)) { if (Pullenti.Morph.LanguageHelper.EndsWith((t as Pullenti.Ner.TextToken).Term, "СЯ")) { } else { return(null); } } } } } } Pullenti.Ner.Token t1 = null; for (int k = 0; k < 2; k++) { t = t1 ?? t0; if (k == 0) { if (((t0 is Pullenti.Ner.TextToken) && t0.Next != null && t0.Next.IsHiphen) && t0.Next.Next != null) { if (!t0.IsWhitespaceAfter && !t0.Morph.Class.IsPronoun && !(t0.Next.Next is Pullenti.Ner.NumberToken)) { if (!t0.Next.IsWhitespaceAfter) { t = t0.Next.Next; } else if (t0.Next.Next.Chars.IsAllLower && Pullenti.Morph.LanguageHelper.EndsWith((t0 as Pullenti.Ner.TextToken).Term, "О")) { t = t0.Next.Next; } } } } NounPhraseItem it = new NounPhraseItem(t0, t) { CanBeSurname = _canBeSurname }; if (t0 == t && (t0 is Pullenti.Ner.ReferentToken)) { it.CanBeNoun = true; it.Morph = new Pullenti.Ner.MorphCollection(t0.Morph); } bool canBePrepos = false; foreach (Pullenti.Morph.MorphBaseInfo v in t.Morph.Items) { Pullenti.Morph.MorphWordForm wf = v as Pullenti.Morph.MorphWordForm; if (v.Class.IsVerb && !v.Case.IsUndefined) { it.CanBeAdj = true; it.AdjMorph.Add(new NounPhraseItemTextVar(v, t)); continue; } if (v.Class.IsPreposition) { canBePrepos = true; } if (v.Class.IsAdjective || ((v.Class.IsPronoun && !v.Class.IsPersonalPronoun && !v.ContainsAttr("неизм.", null))) || ((v.Class.IsNoun && (t is Pullenti.Ner.NumberToken)))) { if (TryAccordVariant(items, (items == null ? 0 : items.Count), v, false)) { bool isDoub = false; if (v.ContainsAttr("к.ф.", null)) { continue; } if (v.ContainsAttr("собир.", null) && !(t is Pullenti.Ner.NumberToken)) { if (wf != null && wf.IsInDictionary) { return(null); } continue; } if (v.ContainsAttr("сравн.", null)) { continue; } bool ok = true; if (t is Pullenti.Ner.TextToken) { string s = (t as Pullenti.Ner.TextToken).Term; if (s == "ПРАВО" || s == "ПРАВА") { ok = false; } else if (Pullenti.Morph.LanguageHelper.EndsWith(s, "ОВ") && t.GetMorphClassInDictionary().IsNoun) { ok = false; } } else if (t is Pullenti.Ner.NumberToken) { if (v.Class.IsNoun && t.Morph.Class.IsAdjective) { ok = false; } else if (t.Morph.Class.IsNoun && ((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.ParseNumericAsAdjective)) == Pullenti.Ner.Core.NounPhraseParseAttr.No) { ok = false; } } if (ok) { it.AdjMorph.Add(new NounPhraseItemTextVar(v, t)); it.CanBeAdj = true; if (_isDoubtAdj && t0 == t) { it.IsDoubtAdjective = true; } if (hasLegalVerb && wf != null && wf.IsInDictionary) { it.CanBeNoun = true; } if (wf != null && wf.Class.IsPronoun) { it.CanBeNoun = true; it.NounMorph.Add(new NounPhraseItemTextVar(v, t)); } } } } bool canBeNoun = false; if (t is Pullenti.Ner.NumberToken) { } else if (v.Class.IsNoun || ((wf != null && wf.NormalCase == "САМ"))) { canBeNoun = true; } else if (v.Class.IsPersonalPronoun) { if (items == null || items.Count == 0) { canBeNoun = true; } else { foreach (NounPhraseItem it1 in items) { if (it1.IsVerb) { if (items.Count == 1 && !v.Case.IsNominative) { canBeNoun = true; } else { return(null); } } } if (items.Count == 1) { if (items[0].CanBeAdjForPersonalPronoun) { canBeNoun = true; } } } } else if ((v.Class.IsPronoun && ((items == null || items.Count == 0 || ((items.Count == 1 && items[0].CanBeAdjForPersonalPronoun)))) && wf != null) && (((((wf.NormalCase == "ТОТ" || wf.NormalFull == "ТО" || wf.NormalCase == "ТО") || wf.NormalCase == "ЭТО" || wf.NormalCase == "ВСЕ") || wf.NormalCase == "ЧТО" || wf.NormalCase == "КТО") || wf.NormalFull == "КОТОРЫЙ" || wf.NormalCase == "КОТОРЫЙ"))) { if (wf.NormalCase == "ВСЕ") { if (t.Next != null && t.Next.IsValue("РАВНО", null)) { return(null); } } canBeNoun = true; } else if (wf != null && ((wf.NormalFull ?? wf.NormalCase)) == "КОТОРЫЙ" && ((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.ParsePronouns)) == Pullenti.Ner.Core.NounPhraseParseAttr.No) { return(null); } else if (v.Class.IsProper && (t is Pullenti.Ner.TextToken)) { if (t.LengthChar > 4 || v.Class.IsProperName) { canBeNoun = true; } } if (canBeNoun) { bool added = false; if (items != null && items.Count > 1 && ((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.MultiNouns)) != Pullenti.Ner.Core.NounPhraseParseAttr.No) { bool ok1 = true; for (int ii = 1; ii < items.Count; ii++) { if (!items[ii].ConjBefore) { ok1 = false; break; } } if (ok1) { if (TryAccordVariant(items, (items == null ? 0 : items.Count), v, true)) { it.NounMorph.Add(new NounPhraseItemTextVar(v, t)); it.CanBeNoun = true; it.MultiNouns = true; added = true; } } } if (!added) { if (TryAccordVariant(items, (items == null ? 0 : items.Count), v, false)) { it.NounMorph.Add(new NounPhraseItemTextVar(v, t)); it.CanBeNoun = true; if (v.Class.IsPersonalPronoun && t.Morph.ContainsAttr("неизм.", null) && !it.CanBeAdj) { NounPhraseItemTextVar itt = new NounPhraseItemTextVar(v, t); itt.Case = Pullenti.Morph.MorphCase.AllCases; itt.Number = Pullenti.Morph.MorphNumber.Undefined; if (itt.NormalValue == null) { } it.AdjMorph.Add(itt); it.CanBeAdj = true; } } else if ((items.Count > 0 && items[0].AdjMorph.Count > 0 && items[0].AdjMorph[0].Number == Pullenti.Morph.MorphNumber.Plural) && !((items[0].AdjMorph[0].Case & v.Case)).IsUndefined && !items[0].AdjMorph[0].Class.IsVerb) { if (t.Next != null && t.Next.IsCommaAnd && (t.Next.Next is Pullenti.Ner.TextToken)) { Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next.Next, attrs, 0, null); if (npt2 != null && npt2.Preposition == null && !((npt2.Morph.Case & v.Case & items[0].AdjMorph[0].Case)).IsUndefined) { it.NounMorph.Add(new NounPhraseItemTextVar(v, t)); it.CanBeNoun = true; } } } } } } if (t0 != t) { foreach (NounPhraseItemTextVar v in it.AdjMorph) { v.CorrectPrefix(t0 as Pullenti.Ner.TextToken, false); } foreach (NounPhraseItemTextVar v in it.NounMorph) { v.CorrectPrefix(t0 as Pullenti.Ner.TextToken, true); } } if (k == 1 && it.CanBeNoun && !it.CanBeAdj) { if (t1 != null) { it.EndToken = t1; } else { it.EndToken = t0.Next.Next; } foreach (NounPhraseItemTextVar v in it.NounMorph) { if (v.NormalValue != null && (v.NormalValue.IndexOf('-') < 0)) { v.NormalValue = string.Format("{0}-{1}", v.NormalValue, it.EndToken.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false)); } } } if (it.CanBeAdj) { if (m_StdAdjectives.TryParse(it.BeginToken, Pullenti.Ner.Core.TerminParseAttr.No) != null) { it.IsStdAdjective = true; } } if (canBePrepos && it.CanBeNoun) { if (items != null && items.Count > 0) { Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition | Pullenti.Ner.Core.NounPhraseParseAttr.ParsePronouns | Pullenti.Ner.Core.NounPhraseParseAttr.ParseVerbs, 0, null); if (npt1 != null && npt1.EndChar > t.EndChar) { return(null); } } else { Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePronouns | Pullenti.Ner.Core.NounPhraseParseAttr.ParseVerbs, 0, null); if (npt1 != null) { Pullenti.Morph.MorphCase mc = Pullenti.Morph.LanguageHelper.GetCaseAfterPreposition((t as Pullenti.Ner.TextToken).Lemma); if (!((mc & npt1.Morph.Case)).IsUndefined) { return(null); } } } } if (it.CanBeNoun || it.CanBeAdj || k == 1) { if (it.BeginToken.Morph.Class.IsPronoun) { Pullenti.Ner.Token tt2 = it.EndToken.Next; if ((tt2 != null && tt2.IsHiphen && !tt2.IsWhitespaceAfter) && !tt2.IsWhitespaceBefore) { tt2 = tt2.Next; } if (tt2 is Pullenti.Ner.TextToken) { string ss = (tt2 as Pullenti.Ner.TextToken).Term; if ((ss == "ЖЕ" || ss == "БЫ" || ss == "ЛИ") || ss == "Ж") { it.EndToken = tt2; } else if (ss == "НИБУДЬ" || ss == "ЛИБО" || (((ss == "ТО" && tt2.Previous.IsHiphen)) && it.CanBeAdj)) { it.EndToken = tt2; foreach (NounPhraseItemTextVar m in it.AdjMorph) { m.NormalValue = string.Format("{0}-{1}", m.NormalValue, ss); if (m.SingleNumberValue != null) { m.SingleNumberValue = string.Format("{0}-{1}", m.SingleNumberValue, ss); } } } } } return(it); } if (t0 == t) { if (t0.IsValue("БИЗНЕС", null) && t0.Next != null && t0.Next.Chars == t0.Chars) { t1 = t0.Next; continue; } return(it); } } return(null); }
public static PhoneItemToken TryAttach(Pullenti.Ner.Token t0) { PhoneItemToken res = _TryAttach(t0); if (res == null) { return(null); } if (res.ItemType != PhoneItemType.Prefix) { return(res); } for (Pullenti.Ner.Token t = res.EndToken.Next; t != null; t = t.Next) { if (t.IsTableControlChar) { break; } if (t.IsNewlineBefore) { break; } PhoneItemToken res2 = _TryAttach(t); if (res2 != null) { if (res2.ItemType == PhoneItemType.Prefix) { if (res.Kind == Pullenti.Ner.Phone.PhoneKind.Undefined) { res.Kind = res2.Kind; } t = (res.EndToken = res2.EndToken); continue; } break; } if (t.IsChar(':')) { res.EndToken = t; break; } if (!(t is Pullenti.Ner.TextToken)) { break; } if (t0.LengthChar == 1) { break; } Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { t = npt.EndToken; if (t.IsValue("ПОСЕЛЕНИЕ", null)) { return(null); } res.EndToken = t; continue; } if (t.GetMorphClassInDictionary().IsProper) { res.EndToken = t; continue; } if (t.Morph.Class.IsPreposition) { continue; } break; } return(res); }
static OrgItemNameToken _TryAttach(Pullenti.Ner.Token t, OrgItemNameToken prev, bool extOnto) { if (t == null) { return(null); } Pullenti.Ner.Referent r = t.GetReferent(); if (r != null) { if (r.TypeName == "DENOMINATION") { return new OrgItemNameToken(t, t) { Value = r.ToString(true, t.Kit.BaseLanguage, 0), IsDenomination = true } } ; if ((r is Pullenti.Ner.Geo.GeoReferent) && t.Chars.IsLatinLetter) { OrgItemNameToken res2 = _TryAttach(t.Next, prev, extOnto); if (res2 != null && res2.Chars.IsLatinLetter) { res2.BeginToken = t; res2.Value = string.Format("{0} {1}", Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(t as Pullenti.Ner.MetaToken, Pullenti.Ner.Core.GetTextAttr.No), res2.Value); res2.IsInDictionary = false; return(res2); } } return(null); } Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; if (tt == null) { return(null); } OrgItemNameToken res = null; Pullenti.Ner.Core.TerminToken tok = m_StdTails.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok == null && t.IsChar(',')) { tok = m_StdTails.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No); } if (tok != null) { return new OrgItemNameToken(t, tok.EndToken) { Value = tok.Termin.CanonicText, IsStdTail = tok.Termin.Tag == null, IsEmptyWord = tok.Termin.Tag != null, Morph = tok.Morph } } ; if ((((tok = m_StdNames.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No)))) != null) { return new OrgItemNameToken(t, tok.EndToken) { Value = tok.Termin.CanonicText, IsStdName = true } } ; OrgItemEngItem eng = OrgItemEngItem.TryAttach(t, false); if (eng == null && t.IsChar(',')) { eng = OrgItemEngItem.TryAttach(t.Next, false); } if (eng != null) { return new OrgItemNameToken(t, eng.EndToken) { Value = eng.FullValue, IsStdTail = true } } ; if (tt.Chars.IsAllLower && prev != null) { if (!prev.Chars.IsAllLower && !prev.Chars.IsCapitalUpper) { return(null); } } if (tt.IsChar(',') && prev != null) { Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt1 == null || npt1.Chars != prev.Chars || ((npt1.Morph.Case & prev.Morph.Case)).IsUndefined) { return(null); } OrgItemTypeToken ty = OrgItemTypeToken.TryAttach(t.Next, false, null); if (ty != null) { return(null); } if (npt1.EndToken.Next == null || !npt1.EndToken.Next.IsValue("И", null)) { return(null); } Pullenti.Ner.Token t1 = npt1.EndToken.Next; Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t1.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt2 == null || npt2.Chars != prev.Chars || ((npt2.Morph.Case & npt1.Morph.Case & prev.Morph.Case)).IsUndefined) { return(null); } ty = OrgItemTypeToken.TryAttach(t1.Next, false, null); if (ty != null) { return(null); } res = new OrgItemNameToken(npt1.BeginToken, npt1.EndToken) { Morph = npt1.Morph, Value = npt1.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false) }; res.IsNounPhrase = true; res.IsAfterConjunction = true; if (prev.Preposition != null) { res.Preposition = prev.Preposition; } return(res); } if (((tt.IsChar('&') || tt.IsValue("AND", null) || tt.IsValue("UND", null))) && prev != null) { if ((tt.Next is Pullenti.Ner.TextToken) && tt.LengthChar == 1 && tt.Next.Chars.IsLatinLetter) { res = new OrgItemNameToken(tt, tt.Next) { Chars = tt.Next.Chars }; res.IsAfterConjunction = true; res.Value = "& " + (tt.Next as Pullenti.Ner.TextToken).Term; return(res); } res = OrgItemNameToken.TryAttach(tt.Next, null, extOnto, false); if (res == null || res.Chars != prev.Chars) { return(null); } res.IsAfterConjunction = true; res.Value = "& " + res.Value; return(res); } if (!tt.Chars.IsLetter) { return(null); } List <Pullenti.Semantic.Utils.DerivateGroup> expinf = null; if (prev != null && prev.EndToken.GetMorphClassInDictionary().IsNoun) { string wo = prev.EndToken.GetNormalCaseText(Pullenti.Morph.MorphClass.Noun, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false); expinf = Pullenti.Semantic.Utils.DerivateService.FindDerivates(wo, true, prev.EndToken.Morph.Language); } Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && npt.InternalNoun != null) { npt = null; } bool explOk = false; if (npt != null && prev != null && prev.EndToken.GetMorphClassInDictionary().IsNoun) { Pullenti.Ner.Core.NounPhraseToken npt0 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(prev.EndToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt0 != null) { List <Pullenti.Semantic.Core.SemanticLink> links = Pullenti.Semantic.Core.SemanticHelper.TryCreateLinks(npt0, npt, null); if (links.Count > 0) { explOk = true; } } } if (npt != null && ((explOk || npt.Morph.Case.IsGenitive || ((prev != null && !((prev.Morph.Case & npt.Morph.Case)).IsUndefined))))) { Pullenti.Morph.MorphClass mc = npt.BeginToken.GetMorphClassInDictionary(); if (mc.IsVerb || mc.IsPronoun) { return(null); } if (mc.IsAdverb) { if (npt.BeginToken.Next != null && npt.BeginToken.Next.IsHiphen) { } else { return(null); } } if (mc.IsPreposition) { return(null); } if (mc.IsNoun && npt.Chars.IsAllLower) { Pullenti.Morph.MorphCase ca = npt.Morph.Case; if ((!ca.IsDative && !ca.IsGenitive && !ca.IsInstrumental) && !ca.IsPrepositional) { return(null); } } res = new OrgItemNameToken(npt.BeginToken, npt.EndToken) { Morph = npt.Morph, Value = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false) }; res.IsNounPhrase = true; if ((npt.EndToken.WhitespacesAfterCount < 2) && (npt.EndToken.Next is Pullenti.Ner.TextToken)) { Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(npt.EndToken.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt2 != null && npt2.Morph.Case.IsGenitive && npt2.Chars.IsAllLower) { OrgItemTypeToken typ = OrgItemTypeToken.TryAttach(npt.EndToken.Next, true, null); OrgItemEponymToken epo = OrgItemEponymToken.TryAttach(npt.EndToken.Next, false); Pullenti.Ner.ReferentToken rtt = t.Kit.ProcessReferent("PERSONPROPERTY", npt.EndToken.Next); if (typ == null && epo == null && ((rtt == null || rtt.Morph.Number == Pullenti.Morph.MorphNumber.Plural))) { res.EndToken = npt2.EndToken; res.Value = string.Format("{0} {1}", res.Value, Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(npt2, Pullenti.Ner.Core.GetTextAttr.No)); } } else if (npt.EndToken.Next.IsComma && (npt.EndToken.Next.Next is Pullenti.Ner.TextToken)) { Pullenti.Ner.Token tt2 = npt.EndToken.Next.Next; Pullenti.Morph.MorphClass mv2 = tt2.GetMorphClassInDictionary(); if (mv2.IsAdjective && mv2.IsVerb) { Pullenti.Morph.MorphBaseInfo bi = new Pullenti.Morph.MorphBaseInfo() { Case = npt.Morph.Case, Gender = npt.Morph.Gender, Number = npt.Morph.Number }; if (tt2.Morph.CheckAccord(bi, false, false)) { npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt2.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt2 != null && ((npt2.Morph.Case.IsDative || npt2.Morph.Case.IsGenitive)) && npt2.Chars.IsAllLower) { res.EndToken = npt2.EndToken; res.Value = string.Format("{0} {1}", res.Value, Pullenti.Ner.Core.MiscHelper.GetTextValue(npt.EndToken.Next, res.EndToken, Pullenti.Ner.Core.GetTextAttr.No)); } } } } } if (explOk) { res.IsAfterConjunction = true; } } else if (npt != null && ((((prev != null && prev.IsNounPhrase && npt.Morph.Case.IsInstrumental)) || extOnto))) { res = new OrgItemNameToken(npt.BeginToken, npt.EndToken) { Morph = npt.Morph, Value = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false) }; res.IsNounPhrase = true; } else if (tt.IsAnd) { res = TryAttach(tt.Next, prev, extOnto, false); if (res == null || !res.IsNounPhrase || prev == null) { return(null); } if (((prev.Morph.Case & res.Morph.Case)).IsUndefined) { return(null); } if (prev.Morph.Number != Pullenti.Morph.MorphNumber.Undefined && res.Morph.Number != Pullenti.Morph.MorphNumber.Undefined) { if (((prev.Morph.Number & res.Morph.Number)) == Pullenti.Morph.MorphNumber.Undefined) { if (prev.Chars != res.Chars) { return(null); } OrgItemTypeToken ty = OrgItemTypeToken.TryAttach(res.EndToken.Next, false, null); if (ty != null) { return(null); } } } Pullenti.Morph.CharsInfo ci = res.Chars; res.Chars = ci; res.IsAfterConjunction = true; return(res); } else if (((tt.Term == "ПО" || tt.Term == "ПРИ" || tt.Term == "ЗА") || tt.Term == "С" || tt.Term == "В") || tt.Term == "НА") { npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { if (m_VervotWords.TryParse(npt.EndToken, Pullenti.Ner.Core.TerminParseAttr.No) != null) { return(null); } bool ok = false; if (tt.Term == "ПО") { ok = npt.Morph.Case.IsDative; } else if (tt.Term == "С") { ok = npt.Morph.Case.IsInstrumental; } else if (tt.Term == "ЗА") { ok = npt.Morph.Case.IsGenitive | npt.Morph.Case.IsInstrumental; } else if (tt.Term == "НА") { ok = npt.Morph.Case.IsPrepositional; } else if (tt.Term == "В") { ok = npt.Morph.Case.IsDative | npt.Morph.Case.IsPrepositional; if (ok) { ok = false; if (t.Next.IsValue("СФЕРА", null) || t.Next.IsValue("ОБЛАСТЬ", null)) { ok = true; } } } else if (tt.Term == "ПРИ") { ok = npt.Morph.Case.IsPrepositional; if (ok) { if (OrgItemTypeToken.TryAttach(tt.Next, true, null) != null) { ok = false; } else { Pullenti.Ner.ReferentToken rt = tt.Kit.ProcessReferent(Pullenti.Ner.Org.OrganizationAnalyzer.ANALYZER_NAME, tt.Next); if (rt != null) { ok = false; } } } string s = npt.Noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false); if (s == "ПОДДЕРЖКА" || s == "УЧАСТИЕ") { ok = false; } } else { ok = npt.Morph.Case.IsPrepositional; } if (ok) { res = new OrgItemNameToken(t, npt.EndToken) { Morph = npt.Morph, Value = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false), Chars = npt.Chars }; res.IsNounPhrase = true; res.Preposition = tt.Term; if (((res.Value == "ДЕЛО" || res.Value == "ВОПРОС")) && !res.IsNewlineAfter) { OrgItemNameToken res2 = _TryAttach(res.EndToken.Next, res, extOnto); if (res2 != null && res2.Morph.Case.IsGenitive) { res.Value = string.Format("{0} {1}", res.Value, res2.Value); res.EndToken = res2.EndToken; for (Pullenti.Ner.Token ttt = res2.EndToken.Next; ttt != null; ttt = ttt.Next) { if (!ttt.IsCommaAnd) { break; } OrgItemNameToken res3 = _TryAttach(ttt.Next, res2, extOnto); if (res3 == null) { break; } res.Value = string.Format("{0} {1}", res.Value, res3.Value); res.EndToken = res3.EndToken; if (ttt.IsAnd) { break; } ttt = res.EndToken; } } } } } if (res == null) { return(null); } } else if (tt.Term == "OF") { Pullenti.Ner.Token t1 = tt.Next; if (t1 != null && Pullenti.Ner.Core.MiscHelper.IsEngArticle(t1)) { t1 = t1.Next; } if (t1 != null && t1.Chars.IsLatinLetter && !t1.Chars.IsAllLower) { res = new OrgItemNameToken(t, t1) { Chars = t1.Chars, Morph = t1.Morph }; for (Pullenti.Ner.Token ttt = t1.Next; ttt != null; ttt = ttt.Next) { if (ttt.WhitespacesBeforeCount > 2) { break; } if (Pullenti.Ner.Core.MiscHelper.IsEngAdjSuffix(ttt)) { ttt = ttt.Next; continue; } if (!ttt.Chars.IsLatinLetter) { break; } if (ttt.Morph.Class.IsPreposition) { break; } t1 = (res.EndToken = ttt); } res.Value = Pullenti.Ner.Core.MiscHelper.GetTextValue(t, t1, Pullenti.Ner.Core.GetTextAttr.IgnoreArticles); res.Preposition = tt.Term; return(res); } } if (res == null) { if (tt.Chars.IsLatinLetter && tt.LengthChar == 1) { } else if (tt.Chars.IsAllLower || (tt.LengthChar < 2)) { if (!tt.Chars.IsLatinLetter || prev == null || !prev.Chars.IsLatinLetter) { return(null); } } if (tt.Chars.IsCyrillicLetter) { Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary(); if (mc.IsVerb || mc.IsAdverb) { return(null); } } else if (tt.Chars.IsLatinLetter && !tt.IsWhitespaceAfter) { if (!tt.IsWhitespaceAfter && (tt.LengthChar < 5)) { if (tt.Next is Pullenti.Ner.NumberToken) { return(null); } } } res = new OrgItemNameToken(tt, tt) { Value = tt.Term, Morph = tt.Morph }; for (t = tt.Next; t != null; t = t.Next) { if ((((t.IsHiphen || t.IsCharOf("\\/"))) && t.Next != null && (t.Next is Pullenti.Ner.TextToken)) && !t.IsWhitespaceBefore && !t.IsWhitespaceAfter) { t = t.Next; res.EndToken = t; res.Value = string.Format("{0}{1}{2}", res.Value, (t.Previous.IsChar('.') ? '.' : '-'), (t as Pullenti.Ner.TextToken).Term); } else if (t.IsChar('.')) { if (!t.IsWhitespaceAfter && !t.IsWhitespaceBefore && (t.Next is Pullenti.Ner.TextToken)) { res.EndToken = t.Next; t = t.Next; res.Value = string.Format("{0}.{1}", res.Value, (t as Pullenti.Ner.TextToken).Term); } else if ((t.Next != null && !t.IsNewlineAfter && t.Next.Chars.IsLatinLetter) && tt.Chars.IsLatinLetter) { res.EndToken = t; } else { break; } } else { break; } } } for (Pullenti.Ner.Token t0 = res.BeginToken; t0 != null; t0 = t0.Next) { if ((((tt = t0 as Pullenti.Ner.TextToken))) != null && tt.IsLetters) { if (!tt.Morph.Class.IsConjunction && !tt.Morph.Class.IsPreposition) { foreach (Pullenti.Morph.MorphBaseInfo mf in tt.Morph.Items) { if ((mf as Pullenti.Morph.MorphWordForm).IsInDictionary) { res.IsInDictionary = true; } } } } if (t0 == res.EndToken) { break; } } if (res.BeginToken == res.EndToken && res.BeginToken.Chars.IsAllUpper) { if (res.EndToken.Next != null && !res.EndToken.IsWhitespaceAfter) { Pullenti.Ner.Token t1 = res.EndToken.Next; if (t1.Next != null && !t1.IsWhitespaceAfter && t1.IsHiphen) { t1 = t1.Next; } if (t1 is Pullenti.Ner.NumberToken) { res.Value += (t1 as Pullenti.Ner.NumberToken).Value; res.EndToken = t1; } } } if (res.BeginToken == res.EndToken && res.BeginToken.Chars.IsLastLower) { string src = res.BeginToken.GetSourceText(); for (int i = src.Length - 1; i >= 0; i--) { if (char.IsUpper(src[i])) { res.Value = src.Substring(0, i + 1); break; } } } return(res); }
public static OrgItemNameToken TryAttach(Pullenti.Ner.Token t, OrgItemNameToken prev, bool extOnto, bool first) { if (t == null) { return(null); } if (t.IsValue("ОРДЕНА", null) && t.Next != null) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { Pullenti.Ner.Token t1 = npt.EndToken; if (((t1.IsValue("ЗНАК", null) || t1.IsValue("ДРУЖБА", null))) && (t1.WhitespacesAfterCount < 2)) { npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t1.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { t1 = npt.EndToken; } } return(new OrgItemNameToken(t, t1) { IsIgnoredPart = true }); } if (t.Next.GetMorphClassInDictionary().IsProperSurname) { return new OrgItemNameToken(t, t.Next) { IsIgnoredPart = true } } ; Pullenti.Ner.ReferentToken ppp = t.Kit.ProcessReferent("PERSON", t.Next); if (ppp != null) { return new OrgItemNameToken(t, ppp.EndToken) { IsIgnoredPart = true } } ; if ((t.WhitespacesAfterCount < 2) && Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(t.Next, true, false)) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t.Next, Pullenti.Ner.Core.BracketParseAttr.NearCloseBracket, 10); if (br != null && (br.LengthChar < 40)) { return new OrgItemNameToken(t, br.EndToken) { IsIgnoredPart = true } } ; } } if (first && t.Chars.IsCyrillicLetter && t.Morph.Class.IsPreposition) { if (!t.IsValue("ПО", null) && !t.IsValue("ПРИ", null)) { return(null); } } OrgItemNameToken res = _TryAttach(t, prev, extOnto); if (res == null) { if (extOnto) { if ((t.GetReferent() is Pullenti.Ner.Geo.GeoReferent) || (((t is Pullenti.Ner.TextToken) && !t.IsChar(';')))) { return new OrgItemNameToken(t, t) { Value = t.GetSourceText() } } ; } return(null); } if (prev == null && !extOnto) { if (t.Kit.Ontology != null) { Pullenti.Ner.Org.OrganizationAnalyzer.OrgAnalyzerData ad = t.Kit.Ontology._getAnalyzerData(Pullenti.Ner.Org.OrganizationAnalyzer.ANALYZER_NAME) as Pullenti.Ner.Org.OrganizationAnalyzer.OrgAnalyzerData; if (ad != null) { Pullenti.Ner.Core.TerminToken tok = ad.OrgPureNames.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null && tok.EndChar > res.EndChar) { res.EndToken = tok.EndToken; } } } } if (prev != null && !extOnto) { if ((prev.Chars.IsAllLower && !res.Chars.IsAllLower && !res.IsStdTail) && !res.IsStdName) { if (prev.Chars.IsLatinLetter && res.Chars.IsLatinLetter) { } else if (m_StdNouns.TryParse(res.BeginToken, Pullenti.Ner.Core.TerminParseAttr.No) != null) { } else { return(null); } } } if ((res.EndToken.Next != null && !res.EndToken.IsWhitespaceAfter && res.EndToken.Next.IsHiphen) && !res.EndToken.Next.IsWhitespaceAfter) { Pullenti.Ner.TextToken tt = res.EndToken.Next.Next as Pullenti.Ner.TextToken; if (tt != null) { if (tt.Chars == res.Chars || tt.Chars.IsAllUpper) { res.EndToken = tt; res.Value = string.Format("{0}-{1}", res.Value, tt.Term); } } } if ((res.EndToken.Next != null && res.EndToken.Next.IsAnd && res.EndToken.WhitespacesAfterCount == 1) && res.EndToken.Next.WhitespacesAfterCount == 1) { OrgItemNameToken res1 = _TryAttach(res.EndToken.Next.Next, prev, extOnto); if (res1 != null && res1.Chars == res.Chars && OrgItemTypeToken.TryAttach(res.EndToken.Next.Next, false, null) == null) { if (!((res1.Morph.Case & res.Morph.Case)).IsUndefined) { res.EndToken = res1.EndToken; res.Value = string.Format("{0} {1} {2}", res.Value, (res.Kit.BaseLanguage.IsUa ? "ТА" : "И"), res1.Value); } } } for (Pullenti.Ner.Token tt = res.BeginToken; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next) { if (m_StdNouns.TryParse(tt, Pullenti.Ner.Core.TerminParseAttr.No) != null) { res.StdOrgNameNouns++; } } if (m_StdNouns.TryParse(res.EndToken, Pullenti.Ner.Core.TerminParseAttr.No) != null) { int cou = 1; bool non = false; Pullenti.Ner.Token et = res.EndToken; if (!_isNotTermNoun(res.EndToken)) { non = true; } bool br = false; for (Pullenti.Ner.Token tt = res.EndToken.Next; tt != null; tt = tt.Next) { if (tt.IsTableControlChar) { break; } if (tt.IsChar('(')) { if (!non) { break; } br = true; continue; } if (tt.IsChar(')')) { br = false; et = tt; break; } if (!(tt is Pullenti.Ner.TextToken)) { break; } if (tt.WhitespacesBeforeCount > 1) { if (tt.NewlinesBeforeCount > 1) { break; } if (tt.Chars != res.EndToken.Chars) { break; } } if (tt.Morph.Class.IsPreposition || tt.IsCommaAnd) { continue; } Pullenti.Morph.MorphClass dd = tt.GetMorphClassInDictionary(); if (!dd.IsNoun && !dd.IsAdjective) { break; } Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt2 == null) { if (dd == Pullenti.Morph.MorphClass.Adjective) { continue; } break; } if (m_StdNouns.TryParse(npt2.EndToken, Pullenti.Ner.Core.TerminParseAttr.No) == null) { break; } if (npt2.EndToken.Chars != res.EndToken.Chars) { break; } if ((npt2.EndToken.IsValue("УПРАВЛЕНИЕ", null) || npt2.EndToken.IsValue("ИНСТИТУТ", null) || npt2.EndToken.IsValue("УПРАВЛІННЯ", null)) || npt2.EndToken.IsValue("ІНСТИТУТ", null) || tt.Previous.IsValue("ПРИ", null)) { Pullenti.Ner.ReferentToken rt = tt.Kit.ProcessReferent(Pullenti.Ner.Org.OrganizationAnalyzer.ANALYZER_NAME, tt); if (rt != null) { break; } } cou++; tt = npt2.EndToken; if (!_isNotTermNoun(tt)) { non = true; et = tt; } } if (non && !br) { res.StdOrgNameNouns += cou; res.EndToken = et; } } return(res); }
static Pullenti.Ner.ReferentToken _tryNameExist(List <CityItemToken> li, out Pullenti.Ner.Core.IntOntologyItem oi, bool always) { oi = null; if (li == null || li[0].Typ != CityItemToken.ItemType.City) { return(null); } oi = li[0].OntoItem; Pullenti.Ner.TextToken tt = li[0].BeginToken as Pullenti.Ner.TextToken; if (tt == null) { return(null); } bool ok = false; string nam = (oi == null ? li[0].Value : oi.CanonicText); if (nam == null) { return(null); } if (nam == "РИМ") { if (tt.Term == "РИМ") { if ((tt.Next is Pullenti.Ner.TextToken) && tt.Next.GetMorphClassInDictionary().IsProperSecname) { } else { ok = true; } } else if (tt.Previous != null && tt.Previous.IsValue("В", null) && tt.Term == "РИМЕ") { ok = true; } } else if (oi != null && oi.Referent != null && oi.Owner.IsExtOntology) { ok = true; } else if (nam.EndsWith("ГРАД") || nam.EndsWith("СК")) { ok = true; } else if (nam.EndsWith("TOWN") || nam.StartsWith("SAN")) { ok = true; } else if (li[0].Chars.IsLatinLetter && li[0].BeginToken.Previous != null && ((li[0].BeginToken.Previous.IsValue("IN", null) || li[0].BeginToken.Previous.IsValue("FROM", null)))) { ok = true; } else { for (Pullenti.Ner.Token tt2 = li[0].EndToken.Next; tt2 != null; tt2 = tt2.Next) { if (tt2.IsNewlineBefore) { break; } if ((tt2.IsCharOf(",(") || tt2.Morph.Class.IsPreposition || tt2.Morph.Class.IsConjunction) || tt2.Morph.Class.IsMisc) { continue; } if ((tt2.GetReferent() is Pullenti.Ner.Geo.GeoReferent) && tt2.Chars.IsCyrillicLetter == li[0].Chars.IsCyrillicLetter) { ok = true; } break; } if (!ok) { for (Pullenti.Ner.Token tt2 = li[0].BeginToken.Previous; tt2 != null; tt2 = tt2.Previous) { if (tt2.IsNewlineAfter) { break; } if ((tt2.IsCharOf(",)") || tt2.Morph.Class.IsPreposition || tt2.Morph.Class.IsConjunction) || tt2.Morph.Class.IsMisc) { continue; } if ((tt2.GetReferent() is Pullenti.Ner.Geo.GeoReferent) && tt2.Chars.IsCyrillicLetter == li[0].Chars.IsCyrillicLetter) { ok = true; } if (ok) { List <Pullenti.Ner.Address.Internal.StreetItemToken> sits = Pullenti.Ner.Address.Internal.StreetItemToken.TryParseList(li[0].BeginToken, null, 10); if (sits != null && sits.Count > 1) { Pullenti.Ner.Address.Internal.AddressItemToken ss = Pullenti.Ner.Address.Internal.StreetDefineHelper.TryParseStreet(sits, false, false); if (ss != null) { sits.RemoveAt(0); if (Pullenti.Ner.Address.Internal.StreetDefineHelper.TryParseStreet(sits, false, false) == null) { ok = false; } } } } if (ok) { if (li.Count > 1 && li[1].Typ == CityItemToken.ItemType.ProperName && (li[1].WhitespacesBeforeCount < 3)) { ok = false; } else { Pullenti.Morph.MorphClass mc = li[0].BeginToken.GetMorphClassInDictionary(); if (mc.IsProperName || mc.IsProperSurname || mc.IsAdjective) { ok = false; } else { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(li[0].BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && npt.EndChar > li[0].EndChar) { ok = false; } } } } if (Pullenti.Ner.Address.Internal.AddressItemToken.TryAttachOrg(li[0].BeginToken) != null) { ok = false; break; } break; } } } if (always) { if (li[0].WhitespacesBeforeCount > 3 && li[0].Doubtful && li[0].BeginToken.GetMorphClassInDictionary().IsProperSurname) { Pullenti.Ner.ReferentToken pp = li[0].Kit.ProcessReferent("PERSON", li[0].BeginToken); if (pp != null) { always = false; } } } if (li[0].BeginToken.Chars.IsLatinLetter && li[0].BeginToken == li[0].EndToken) { Pullenti.Ner.Token tt1 = li[0].EndToken.Next; if (tt1 != null && tt1.IsChar(',')) { tt1 = tt1.Next; } if (((tt1 is Pullenti.Ner.TextToken) && tt1.Chars.IsLatinLetter && (tt1.LengthChar < 3)) && !tt1.Chars.IsAllLower) { ok = false; } } if (!ok && !always) { return(null); } Pullenti.Ner.Geo.GeoReferent city = null; if (oi != null && (oi.Referent is Pullenti.Ner.Geo.GeoReferent) && !oi.Owner.IsExtOntology) { city = oi.Referent.Clone() as Pullenti.Ner.Geo.GeoReferent; city.Occurrence.Clear(); } else { city = new Pullenti.Ner.Geo.GeoReferent(); city.AddName(nam); if (oi != null && (oi.Referent is Pullenti.Ner.Geo.GeoReferent)) { city.MergeSlots2(oi.Referent as Pullenti.Ner.Geo.GeoReferent, li[0].Kit.BaseLanguage); } if (!city.IsCity) { city.AddTypCity(li[0].Kit.BaseLanguage); } } return(new Pullenti.Ner.ReferentToken(city, li[0].BeginToken, li[0].EndToken) { Morph = li[0].Morph }); }
static Pullenti.Ner.ReferentToken _tryNounName(List <CityItemToken> li, out Pullenti.Ner.Core.IntOntologyItem oi, bool always) { oi = null; if (li == null || (li.Count < 2) || ((li[0].Typ != CityItemToken.ItemType.Noun && li[0].Typ != CityItemToken.ItemType.Misc))) { return(null); } bool ok = !li[0].Doubtful; if (ok && li[0].Typ == CityItemToken.ItemType.Misc) { ok = false; } string typ = (li[0].Typ == CityItemToken.ItemType.Misc ? null : li[0].Value); string typ2 = (li[0].Typ == CityItemToken.ItemType.Misc ? null : li[0].AltValue); string probAdj = null; int i1 = 1; Pullenti.Ner.Referent org = null; if ((typ != null && li[i1].Typ == CityItemToken.ItemType.Noun && ((i1 + 1) < li.Count)) && li[0].WhitespacesAfterCount <= 1 && (((Pullenti.Morph.LanguageHelper.EndsWith(typ, "ПОСЕЛОК") || Pullenti.Morph.LanguageHelper.EndsWith(typ, "СЕЛИЩЕ") || typ == "ДЕРЕВНЯ") || typ == "СЕЛО"))) { if (li[i1].BeginToken == li[i1].EndToken) { Pullenti.Ner.Address.Internal.AddressItemToken ooo = Pullenti.Ner.Address.Internal.AddressItemToken.TryAttachOrg(li[i1].BeginToken); if (ooo != null && ooo.RefToken != null) { return(null); } } typ2 = li[i1].Value; if (typ2 == "СТАНЦИЯ" && li[i1].BeginToken.IsValue("СТ", null) && ((i1 + 1) < li.Count)) { Pullenti.Ner.MorphCollection m = li[i1 + 1].Morph; if (m.Number == Pullenti.Morph.MorphNumber.Plural) { probAdj = "СТАРЫЕ"; } else if (m.Gender == Pullenti.Morph.MorphGender.Feminie) { probAdj = "СТАРАЯ"; } else if (m.Gender == Pullenti.Morph.MorphGender.Masculine) { probAdj = "СТАРЫЙ"; } else { probAdj = "СТАРОЕ"; } } i1++; } string name = li[i1].Value ?? ((li[i1].OntoItem == null ? null : li[i1].OntoItem.CanonicText)); string altName = li[i1].AltValue; if (name == null) { return(null); } Pullenti.Ner.MorphCollection mc = li[0].Morph; if (i1 == 1 && li[i1].Typ == CityItemToken.ItemType.City && ((li[0].Value == "ГОРОД" || li[0].Value == "МІСТО" || li[0].Typ == CityItemToken.ItemType.Misc))) { if (typ == null && ((i1 + 1) < li.Count) && li[i1 + 1].Typ == CityItemToken.ItemType.Noun) { return(null); } oi = li[i1].OntoItem; if (oi != null) { name = oi.CanonicText; } if (name.Length > 2 || oi.MiscAttr != null) { if (!li[1].Doubtful || ((oi != null && oi.MiscAttr != null))) { ok = true; } else if (!ok && !li[1].IsNewlineBefore) { if (li[0].GeoObjectBefore || li[1].GeoObjectAfter) { ok = true; } else if (Pullenti.Ner.Address.Internal.StreetDefineHelper.CheckStreetAfter(li[1].EndToken.Next)) { ok = true; } else if (li[1].EndToken.Next != null && (li[1].EndToken.Next.GetReferent() is Pullenti.Ner.Date.DateReferent)) { ok = true; } else if ((li[1].WhitespacesBeforeCount < 2) && li[1].OntoItem != null) { if (li[1].IsNewlineAfter) { ok = true; } else { ok = true; } } } if (li[1].Doubtful && li[1].EndToken.Next != null && li[1].EndToken.Chars == li[1].EndToken.Next.Chars) { ok = false; } if (li[0].BeginToken.Previous != null && li[0].BeginToken.Previous.IsValue("В", null)) { ok = true; } } if (!ok) { ok = CheckYearAfter(li[1].EndToken.Next); } if (!ok) { ok = CheckCityAfter(li[1].EndToken.Next); } } else if ((li[i1].Typ == CityItemToken.ItemType.ProperName || li[i1].Typ == CityItemToken.ItemType.City)) { if (((li[0].Value == "АДМИНИСТРАЦИЯ" || li[0].Value == "АДМІНІСТРАЦІЯ")) && i1 == 1) { return(null); } if (li[i1].IsNewlineBefore) { if (li.Count != 2) { return(null); } } if (!li[0].Doubtful) { ok = true; if (name.Length < 2) { ok = false; } else if ((name.Length < 3) && li[0].Morph.Number != Pullenti.Morph.MorphNumber.Singular) { ok = false; } if (li[i1].Doubtful && !li[i1].GeoObjectAfter && !li[0].GeoObjectBefore) { if (li[i1].Morph.Case.IsGenitive) { if (li[i1].EndToken.Next == null || MiscLocationHelper.CheckGeoObjectAfter(li[i1].EndToken.Next, false) || Pullenti.Ner.Address.Internal.AddressItemToken.CheckHouseAfter(li[i1].EndToken.Next, false, true)) { } else if (li[0].BeginToken.Previous == null || MiscLocationHelper.CheckGeoObjectBefore(li[0].BeginToken)) { } else { ok = false; } } if (ok) { Pullenti.Ner.ReferentToken rt0 = li[i1].Kit.ProcessReferent("PERSONPROPERTY", li[0].BeginToken.Previous); if (rt0 != null) { Pullenti.Ner.ReferentToken rt1 = li[i1].Kit.ProcessReferent("PERSON", li[i1].BeginToken); if (rt1 != null) { ok = false; } } } } Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(li[i1].BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { if (npt.EndToken.EndChar > li[i1].EndChar && npt.Adjectives.Count > 0 && !npt.Adjectives[0].EndToken.Next.IsComma) { ok = false; } else if (TerrItemToken.m_UnknownRegions.TryParse(npt.EndToken, Pullenti.Ner.Core.TerminParseAttr.FullwordsOnly) != null) { bool ok1 = false; if (li[0].BeginToken.Previous != null) { Pullenti.Ner.Token ttt = li[0].BeginToken.Previous; if (ttt.IsComma && ttt.Previous != null) { ttt = ttt.Previous; } Pullenti.Ner.Geo.GeoReferent geo = ttt.GetReferent() as Pullenti.Ner.Geo.GeoReferent; if (geo != null && !geo.IsCity) { ok1 = true; } } if (npt.EndToken.Next != null) { Pullenti.Ner.Token ttt = npt.EndToken.Next; if (ttt.IsComma && ttt.Next != null) { ttt = ttt.Next; } Pullenti.Ner.Geo.GeoReferent geo = ttt.GetReferent() as Pullenti.Ner.Geo.GeoReferent; if (geo != null && !geo.IsCity) { ok1 = true; } } if (!ok1) { return(null); } } } if (li[0].Value == "ПОРТ") { if (li[i1].Chars.IsAllUpper || li[i1].Chars.IsLatinLetter) { return(null); } } } else if (li[0].GeoObjectBefore) { ok = true; } else if (li[i1].GeoObjectAfter && !li[i1].IsNewlineAfter) { ok = true; } else { ok = CheckYearAfter(li[i1].EndToken.Next); } if (!ok) { ok = CheckStreetAfter(li[i1].EndToken.Next); } if (!ok && li[0].BeginToken.Previous != null && li[0].BeginToken.Previous.IsValue("В", null)) { ok = true; } } else { return(null); } if (!ok && !always) { if (MiscLocationHelper.CheckNearBefore(li[0].BeginToken.Previous) == null) { return(null); } } if (li.Count > (i1 + 1)) { li.RemoveRange(i1 + 1, li.Count - i1 - 1); } Pullenti.Ner.Geo.GeoReferent city = new Pullenti.Ner.Geo.GeoReferent(); if (oi != null && oi.Referent != null) { city = oi.Referent.Clone() as Pullenti.Ner.Geo.GeoReferent; city.Occurrence.Clear(); } if (!li[0].Morph.Case.IsUndefined && li[0].Morph.Gender != Pullenti.Morph.MorphGender.Undefined) { if (li[i1].EndToken.Morph.Class.IsAdjective && li[i1].BeginToken == li[i1].EndToken) { string nam = Pullenti.Ner.Core.ProperNameHelper.GetNameEx(li[i1].BeginToken, li[i1].EndToken, Pullenti.Morph.MorphClass.Adjective, li[0].Morph.Case, li[0].Morph.Gender, false, false); if (nam != null && nam != name) { name = nam; } } } if (li[0].Morph.Case.IsNominative) { if (altName != null) { city.AddName(altName); } altName = null; } city.AddName(name); if (probAdj != null) { city.AddName(probAdj + " " + name); } if (altName != null) { city.AddName(altName); if (probAdj != null) { city.AddName(probAdj + " " + altName); } } if (typ != null) { city.AddTyp(typ); } else if (!city.IsCity) { city.AddTypCity(li[0].Kit.BaseLanguage); } if (typ2 != null) { city.AddTyp(typ2.ToLower()); } if (li[0].HigherGeo != null && GeoOwnerHelper.CanBeHigher(li[0].HigherGeo, city)) { city.Higher = li[0].HigherGeo; } if (li[0].Typ == CityItemToken.ItemType.Misc) { li.RemoveAt(0); } Pullenti.Ner.ReferentToken res = new Pullenti.Ner.ReferentToken(city, li[0].BeginToken, li[li.Count - 1].EndToken) { Morph = mc }; if (res.EndToken.Next != null && res.EndToken.Next.IsHiphen && (res.EndToken.Next.Next is Pullenti.Ner.NumberToken)) { Pullenti.Ner.NumberToken num = res.EndToken.Next.Next as Pullenti.Ner.NumberToken; if ((num.Typ == Pullenti.Ner.NumberSpellingType.Digit && !num.Morph.Class.IsAdjective && num.IntValue != null) && (num.IntValue.Value < 50)) { foreach (Pullenti.Ner.Slot s in city.Slots) { if (s.TypeName == Pullenti.Ner.Geo.GeoReferent.ATTR_NAME) { city.UploadSlot(s, string.Format("{0}-{1}", s.Value, num.Value)); } } res.EndToken = num; } } if (li[0].BeginToken == li[0].EndToken && li[0].BeginToken.IsValue("ГОРОДОК", null)) { if (Pullenti.Ner.Address.Internal.AddressItemToken.CheckHouseAfter(res.EndToken.Next, true, false)) { return(null); } } return(res); }
public static Pullenti.Ner.ReferentToken TryAttachTerritory(List <TerrItemToken> li, Pullenti.Ner.Core.AnalyzerData ad, bool attachAlways = false, List <CityItemToken> cits = null, List <Pullenti.Ner.Geo.GeoReferent> exists = null) { if (li == null || li.Count == 0) { return(null); } TerrItemToken exObj = null; TerrItemToken newName = null; List <TerrItemToken> adjList = new List <TerrItemToken>(); TerrItemToken noun = null; TerrItemToken addNoun = null; Pullenti.Ner.ReferentToken rt = _tryAttachMoscowAO(li, ad); if (rt != null) { return(rt); } if (li[0].TerminItem != null && li[0].TerminItem.CanonicText == "ТЕРРИТОРИЯ") { Pullenti.Ner.ReferentToken res2 = _tryAttachPureTerr(li, ad); return(res2); } if (li.Count == 2) { if (li[0].Rzd != null && li[1].RzdDir != null) { Pullenti.Ner.Geo.GeoReferent rzd = new Pullenti.Ner.Geo.GeoReferent(); rzd.AddName(li[1].RzdDir); rzd.AddTypTer(li[0].Kit.BaseLanguage); rzd.AddSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_REF, li[0].Rzd.Referent, false, 0); rzd.AddExtReferent(li[0].Rzd); return(new Pullenti.Ner.ReferentToken(rzd, li[0].BeginToken, li[1].EndToken)); } if (li[1].Rzd != null && li[0].RzdDir != null) { Pullenti.Ner.Geo.GeoReferent rzd = new Pullenti.Ner.Geo.GeoReferent(); rzd.AddName(li[0].RzdDir); rzd.AddTypTer(li[0].Kit.BaseLanguage); rzd.AddSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_REF, li[1].Rzd.Referent, false, 0); rzd.AddExtReferent(li[1].Rzd); return(new Pullenti.Ner.ReferentToken(rzd, li[0].BeginToken, li[1].EndToken)); } } bool canBeCityBefore = false; bool adjTerrBefore = false; if (cits != null) { if (cits[0].Typ == CityItemToken.ItemType.City) { canBeCityBefore = true; } else if (cits[0].Typ == CityItemToken.ItemType.Noun && cits.Count > 1) { canBeCityBefore = true; } } int k; for (k = 0; k < li.Count; k++) { if (li[k].OntoItem != null) { if (exObj != null || newName != null) { break; } if (noun != null) { if (k == 1) { if (noun.TerminItem.CanonicText == "РАЙОН" || noun.TerminItem.CanonicText == "ОБЛАСТЬ" || noun.TerminItem.CanonicText == "СОЮЗ") { if (li[k].OntoItem.Referent is Pullenti.Ner.Geo.GeoReferent) { if ((li[k].OntoItem.Referent as Pullenti.Ner.Geo.GeoReferent).IsState) { break; } } bool ok = false; Pullenti.Ner.Token tt = li[k].EndToken.Next; if (tt == null) { ok = true; } else if (tt.IsCharOf(",.")) { ok = true; } if (!ok) { ok = MiscLocationHelper.CheckGeoObjectBefore(li[0].BeginToken); } if (!ok) { Pullenti.Ner.Address.Internal.AddressItemToken adr = Pullenti.Ner.Address.Internal.AddressItemToken.TryParse(tt, null, false, false, null); if (adr != null) { if (adr.Typ == Pullenti.Ner.Address.Internal.AddressItemToken.ItemType.Street) { ok = true; } } } if (!ok) { break; } } if (li[k].OntoItem != null) { if (noun.BeginToken.IsValue("МО", null) || noun.BeginToken.IsValue("ЛО", null)) { return(null); } } } } exObj = li[k]; } else if (li[k].TerminItem != null) { if (noun != null) { break; } if (li[k].TerminItem.IsAlwaysPrefix && k > 0) { break; } if (k > 0 && li[k].IsDoubt) { if (li[k].BeginToken == li[k].EndToken && li[k].BeginToken.IsValue("ЗАО", null)) { break; } } if (li[k].TerminItem.IsAdjective || li[k].IsGeoInDictionary) { adjList.Add(li[k]); } else { if (exObj != null) { Pullenti.Ner.Geo.GeoReferent geo = exObj.OntoItem.Referent as Pullenti.Ner.Geo.GeoReferent; if (geo == null) { break; } if (exObj.IsAdjective && ((li[k].TerminItem.CanonicText == "СОЮЗ" || li[k].TerminItem.CanonicText == "ФЕДЕРАЦИЯ"))) { string str = exObj.OntoItem.ToString(); if (!str.Contains(li[k].TerminItem.CanonicText)) { return(null); } } if (li[k].TerminItem.CanonicText == "РАЙОН" || li[k].TerminItem.CanonicText == "ОКРУГ" || li[k].TerminItem.CanonicText == "КРАЙ") { StringBuilder tmp = new StringBuilder(); foreach (Pullenti.Ner.Slot s in geo.Slots) { if (s.TypeName == Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE) { tmp.AppendFormat("{0};", s.Value); } } if (!tmp.ToString().ToUpper().Contains(li[k].TerminItem.CanonicText)) { if (k != 1 || newName != null) { break; } newName = li[0]; newName.IsAdjective = true; newName.OntoItem = null; exObj = null; } } } noun = li[k]; if (k == 0) { TerrItemToken tt = TerrItemToken.TryParse(li[k].BeginToken.Previous, null, true, false, null); if (tt != null && tt.Morph.Class.IsAdjective) { adjTerrBefore = true; } } } } else { if (exObj != null) { break; } if (newName != null) { break; } newName = li[k]; } } string name = null; string altName = null; string fullName = null; Pullenti.Ner.MorphCollection morph = null; if (exObj != null) { if (exObj.IsAdjective && !exObj.Morph.Language.IsEn && noun == null) { if (attachAlways && exObj.EndToken.Next != null) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(exObj.BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (exObj.EndToken.Next.IsCommaAnd) { } else if (npt == null) { } else { Pullenti.Ner.Address.Internal.StreetItemToken str = Pullenti.Ner.Address.Internal.StreetItemToken.TryParse(exObj.EndToken.Next, null, false, null, false); if (str != null) { if (str.Typ == Pullenti.Ner.Address.Internal.StreetItemType.Noun && str.EndToken == npt.EndToken) { return(null); } } } } else { CityItemToken cit = CityItemToken.TryParse(exObj.EndToken.Next, null, false, null); if (cit != null && ((cit.Typ == CityItemToken.ItemType.Noun || cit.Typ == CityItemToken.ItemType.City))) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(exObj.BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && npt.EndToken == cit.EndToken) { } else { return(null); } } else if (exObj.BeginToken.IsValue("ПОДНЕБЕСНЫЙ", null)) { } else { return(null); } } } if (noun == null && exObj.CanBeCity) { CityItemToken cit0 = CityItemToken.TryParseBack(exObj.BeginToken.Previous); if (cit0 != null && cit0.Typ != CityItemToken.ItemType.ProperName) { return(null); } } if (exObj.IsDoubt && noun == null) { bool ok2 = false; if (_canBeGeoAfter(exObj.EndToken.Next)) { ok2 = true; } else if (!exObj.CanBeSurname && !exObj.CanBeCity) { if ((exObj.EndToken.Next != null && exObj.EndToken.Next.IsChar(')') && exObj.BeginToken.Previous != null) && exObj.BeginToken.Previous.IsChar('(')) { ok2 = true; } else if (exObj.Chars.IsLatinLetter && exObj.BeginToken.Previous != null) { if (exObj.BeginToken.Previous.IsValue("IN", null)) { ok2 = true; } else if (exObj.BeginToken.Previous.IsValue("THE", null) && exObj.BeginToken.Previous.Previous != null && exObj.BeginToken.Previous.Previous.IsValue("IN", null)) { ok2 = true; } } } if (!ok2) { CityItemToken cit0 = CityItemToken.TryParseBack(exObj.BeginToken.Previous); if (cit0 != null && cit0.Typ != CityItemToken.ItemType.ProperName) { } else if (MiscLocationHelper.CheckGeoObjectBefore(exObj.BeginToken.Previous)) { } else { return(null); } } } name = exObj.OntoItem.CanonicText; morph = exObj.Morph; } else if (newName != null) { if (noun == null) { return(null); } for (int j = 1; j < k; j++) { if (li[j].IsNewlineBefore && !li[0].IsNewlineBefore) { if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(li[j].BeginToken, false, false)) { } else { return(null); } } } morph = noun.Morph; if (newName.IsAdjective) { if (noun.TerminItem.Acronym == "АО") { if (noun.BeginToken != noun.EndToken) { return(null); } if (newName.Morph.Gender != Pullenti.Morph.MorphGender.Feminie) { return(null); } } Pullenti.Ner.Geo.GeoReferent geoBefore = null; Pullenti.Ner.Token tt0 = li[0].BeginToken.Previous; if (tt0 != null && tt0.IsCommaAnd) { tt0 = tt0.Previous; } if (!li[0].IsNewlineBefore && tt0 != null) { geoBefore = tt0.GetReferent() as Pullenti.Ner.Geo.GeoReferent; } if (li.IndexOf(noun) < li.IndexOf(newName)) { if (noun.TerminItem.IsState) { return(null); } if (newName.CanBeSurname && geoBefore == null) { if (((noun.Morph.Case & newName.Morph.Case)).IsUndefined) { return(null); } } if (Pullenti.Ner.Core.MiscHelper.IsExistsInDictionary(newName.BeginToken, newName.EndToken, Pullenti.Morph.MorphClass.Adjective | Pullenti.Morph.MorphClass.Pronoun | Pullenti.Morph.MorphClass.Verb)) { if (noun.BeginToken != newName.BeginToken) { if (geoBefore == null) { if (li.Count == 2 && _canBeGeoAfter(li[1].EndToken.Next)) { } else if (li.Count == 3 && li[2].TerminItem != null && _canBeGeoAfter(li[2].EndToken.Next)) { } else if (newName.IsGeoInDictionary) { } else if (newName.EndToken.IsNewlineAfter) { } else { return(null); } } } } Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(newName.EndToken, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePronouns, 0, null); if (npt != null && npt.EndToken != newName.EndToken) { if (li.Count >= 3 && li[2].TerminItem != null && npt.EndToken == li[2].EndToken) { addNoun = li[2]; } else { return(null); } } Pullenti.Ner.ReferentToken rtp = newName.Kit.ProcessReferent("PERSON", newName.BeginToken); if (rtp != null) { return(null); } name = Pullenti.Ner.Core.ProperNameHelper.GetNameEx(newName.BeginToken, newName.EndToken, Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphCase.Undefined, noun.TerminItem.Gender, false, false); } else { bool ok = false; if (((k + 1) < li.Count) && li[k].TerminItem == null && li[k + 1].TerminItem != null) { ok = true; } else if ((k < li.Count) && li[k].OntoItem != null) { ok = true; } else if (k == li.Count && !newName.IsAdjInDictionary) { ok = true; } else if (MiscLocationHelper.CheckGeoObjectBefore(li[0].BeginToken) || canBeCityBefore) { ok = true; } else if (MiscLocationHelper.CheckGeoObjectAfter(li[k - 1].EndToken, false)) { ok = true; } else if (li.Count == 3 && k == 2) { CityItemToken cit = CityItemToken.TryParse(li[2].BeginToken, null, false, null); if (cit != null) { if (cit.Typ == CityItemToken.ItemType.City || cit.Typ == CityItemToken.ItemType.Noun) { ok = true; } } } else if (li.Count == 2) { ok = _canBeGeoAfter(li[li.Count - 1].EndToken.Next); } if (!ok && !li[0].IsNewlineBefore && !li[0].Chars.IsAllLower) { Pullenti.Ner.ReferentToken rt00 = li[0].Kit.ProcessReferent("PERSONPROPERTY", li[0].BeginToken.Previous); if (rt00 != null) { ok = true; } } if (noun.TerminItem != null && noun.TerminItem.IsStrong && newName.IsAdjective) { ok = true; } if (noun.IsDoubt && adjList.Count == 0 && geoBefore == null) { return(null); } name = Pullenti.Ner.Core.ProperNameHelper.GetNameEx(newName.BeginToken, newName.EndToken, Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphCase.Undefined, noun.TerminItem.Gender, false, false); if (!ok && !attachAlways) { if (Pullenti.Ner.Core.MiscHelper.IsExistsInDictionary(newName.BeginToken, newName.EndToken, Pullenti.Morph.MorphClass.Adjective | Pullenti.Morph.MorphClass.Pronoun | Pullenti.Morph.MorphClass.Verb)) { if (exists != null) { foreach (Pullenti.Ner.Geo.GeoReferent e in exists) { if (e.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_NAME, name, true) != null) { ok = true; break; } } } if (!ok) { return(null); } } } fullName = string.Format("{0} {1}", Pullenti.Ner.Core.ProperNameHelper.GetNameEx(li[0].BeginToken, noun.BeginToken.Previous, Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphCase.Undefined, noun.TerminItem.Gender, false, false), noun.TerminItem.CanonicText); } } else { if (!attachAlways || ((noun.TerminItem != null && noun.TerminItem.CanonicText == "ФЕДЕРАЦИЯ"))) { bool isLatin = noun.Chars.IsLatinLetter && newName.Chars.IsLatinLetter; if (li.IndexOf(noun) > li.IndexOf(newName)) { if (!isLatin) { return(null); } } if (!newName.IsDistrictName && !Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(newName.BeginToken, false, false)) { if (adjList.Count == 0 && Pullenti.Ner.Core.MiscHelper.IsExistsInDictionary(newName.BeginToken, newName.EndToken, Pullenti.Morph.MorphClass.Noun | Pullenti.Morph.MorphClass.Pronoun)) { if (li.Count == 2 && noun.IsCityRegion && (noun.WhitespacesAfterCount < 2)) { } else { return(null); } } if (!isLatin) { if ((noun.TerminItem.IsRegion && !attachAlways && ((!adjTerrBefore || newName.IsDoubt))) && !noun.IsCityRegion && !noun.TerminItem.IsSpecificPrefix) { if (!MiscLocationHelper.CheckGeoObjectBefore(noun.BeginToken)) { if (!noun.IsDoubt && noun.BeginToken != noun.EndToken) { } else if ((noun.TerminItem.IsAlwaysPrefix && li.Count == 2 && li[0] == noun) && li[1] == newName) { } else { return(null); } } } if (noun.IsDoubt && adjList.Count == 0) { if (noun.TerminItem.Acronym == "МО" || noun.TerminItem.Acronym == "ЛО") { if (k == (li.Count - 1) && li[k].TerminItem != null) { addNoun = li[k]; k++; } else if (li.Count == 2 && noun == li[0] && newName.ToString().EndsWith("совет")) { } else { return(null); } } else { return(null); } } Pullenti.Ner.ReferentToken pers = newName.Kit.ProcessReferent("PERSON", newName.BeginToken); if (pers != null) { return(null); } } } } name = Pullenti.Ner.Core.MiscHelper.GetTextValue(newName.BeginToken, newName.EndToken, Pullenti.Ner.Core.GetTextAttr.No); if (newName.BeginToken != newName.EndToken) { for (Pullenti.Ner.Token ttt = newName.BeginToken.Next; ttt != null && ttt.EndChar <= newName.EndChar; ttt = ttt.Next) { if (ttt.Chars.IsLetter) { TerrItemToken ty = TerrItemToken.TryParse(ttt, null, false, false, null); if ((ty != null && ty.TerminItem != null && noun != null) && ((ty.TerminItem.CanonicText.Contains(noun.TerminItem.CanonicText) || noun.TerminItem.CanonicText.Contains(ty.TerminItem.CanonicText)))) { name = Pullenti.Ner.Core.MiscHelper.GetTextValue(newName.BeginToken, ttt.Previous, Pullenti.Ner.Core.GetTextAttr.No); break; } } } } if (adjList.Count > 0) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(adjList[0].BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && npt.EndToken == noun.EndToken) { altName = string.Format("{0} {1}", npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false), name); } } } } else { if ((li.Count == 1 && noun != null && noun.EndToken.Next != null) && (noun.EndToken.Next.GetReferent() is Pullenti.Ner.Geo.GeoReferent)) { Pullenti.Ner.Geo.GeoReferent g = noun.EndToken.Next.GetReferent() as Pullenti.Ner.Geo.GeoReferent; if (noun.TerminItem != null) { string tyy = noun.TerminItem.CanonicText.ToLower(); bool ooo = false; if (g.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, tyy, true) != null) { ooo = true; } else if (tyy.EndsWith("район") && g.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, "район", true) != null) { ooo = true; } if (ooo) { return new Pullenti.Ner.ReferentToken(g, noun.BeginToken, noun.EndToken.Next) { Morph = noun.BeginToken.Morph } } ; } } if ((li.Count == 1 && noun == li[0] && li[0].TerminItem != null) && TerrItemToken.TryParse(li[0].EndToken.Next, null, true, false, null) == null && TerrItemToken.TryParse(li[0].BeginToken.Previous, null, true, false, null) == null) { if (li[0].Morph.Number == Pullenti.Morph.MorphNumber.Plural) { return(null); } int cou = 0; string str = li[0].TerminItem.CanonicText.ToLower(); for (Pullenti.Ner.Token tt = li[0].BeginToken.Previous; tt != null; tt = tt.Previous) { if (tt.IsNewlineAfter) { cou += 10; } else { cou++; } if (cou > 500) { break; } Pullenti.Ner.Geo.GeoReferent g = tt.GetReferent() as Pullenti.Ner.Geo.GeoReferent; if (g == null) { continue; } bool ok = true; cou = 0; for (tt = li[0].EndToken.Next; tt != null; tt = tt.Next) { if (tt.IsNewlineBefore) { cou += 10; } else { cou++; } if (cou > 500) { break; } TerrItemToken tee = TerrItemToken.TryParse(tt, null, true, false, null); if (tee == null) { continue; } ok = false; break; } if (ok) { for (int ii = 0; g != null && (ii < 3); g = g.Higher, ii++) { if (g.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, str, true) != null) { return new Pullenti.Ner.ReferentToken(g, li[0].BeginToken, li[0].EndToken) { Morph = noun.BeginToken.Morph } } ; } } break; } } return(null); } Pullenti.Ner.Geo.GeoReferent ter = null; if (exObj != null && (exObj.Tag is Pullenti.Ner.Geo.GeoReferent)) { ter = exObj.Tag as Pullenti.Ner.Geo.GeoReferent; } else { ter = new Pullenti.Ner.Geo.GeoReferent(); if (exObj != null) { Pullenti.Ner.Geo.GeoReferent geo = exObj.OntoItem.Referent as Pullenti.Ner.Geo.GeoReferent; if (geo != null && !geo.IsCity) { ter.MergeSlots2(geo, li[0].Kit.BaseLanguage); } else { ter.AddName(name); } if (noun == null && exObj.CanBeCity) { ter.AddTypCity(li[0].Kit.BaseLanguage); } else { } } else if (newName != null) { ter.AddName(name); if (altName != null) { ter.AddName(altName); } } if (noun != null) { if (noun.TerminItem.CanonicText == "АО") { ter.AddTyp((li[0].Kit.BaseLanguage.IsUa ? "АВТОНОМНИЙ ОКРУГ" : "АВТОНОМНЫЙ ОКРУГ")); } else if (noun.TerminItem.CanonicText == "МУНИЦИПАЛЬНОЕ СОБРАНИЕ" || noun.TerminItem.CanonicText == "МУНІЦИПАЛЬНЕ ЗБОРИ") { ter.AddTyp((li[0].Kit.BaseLanguage.IsUa ? "МУНІЦИПАЛЬНЕ УТВОРЕННЯ" : "МУНИЦИПАЛЬНОЕ ОБРАЗОВАНИЕ")); } else if (noun.TerminItem.Acronym == "МО" && addNoun != null) { ter.AddTyp(addNoun.TerminItem.CanonicText); } else { if (noun.TerminItem.CanonicText == "СОЮЗ" && exObj != null && exObj.EndChar > noun.EndChar) { return new Pullenti.Ner.ReferentToken(ter, exObj.BeginToken, exObj.EndToken) { Morph = exObj.Morph } } ; ter.AddTyp(noun.TerminItem.CanonicText); if (noun.TerminItem.IsRegion && ter.IsState) { ter.AddTypReg(li[0].Kit.BaseLanguage); } } } if (ter.IsState && ter.IsRegion) { foreach (TerrItemToken a in adjList) { if (a.TerminItem.IsRegion) { ter.AddTypReg(li[0].Kit.BaseLanguage); break; } } } if (ter.IsState) { if (fullName != null) { ter.AddName(fullName); } } } Pullenti.Ner.ReferentToken res = new Pullenti.Ner.ReferentToken(ter, li[0].BeginToken, li[k - 1].EndToken); if (noun != null && noun.Morph.Class.IsNoun) { res.Morph = noun.Morph; } else { res.Morph = new Pullenti.Ner.MorphCollection(); for (int ii = 0; ii < k; ii++) { foreach (Pullenti.Morph.MorphBaseInfo v in li[ii].Morph.Items) { Pullenti.Morph.MorphBaseInfo bi = new Pullenti.Morph.MorphBaseInfo(); bi.CopyFrom(v); if (noun != null) { if (bi.Class.IsAdjective) { bi.Class = Pullenti.Morph.MorphClass.Noun; } } res.Morph.AddItem(bi); } } } if (li[0].TerminItem != null && li[0].TerminItem.IsSpecificPrefix) { res.BeginToken = li[0].EndToken.Next; } if (addNoun != null && addNoun.EndChar > res.EndChar) { res.EndToken = addNoun.EndToken; } if ((res.BeginToken.Previous is Pullenti.Ner.TextToken) && (res.WhitespacesBeforeCount < 2)) { Pullenti.Ner.TextToken tt = res.BeginToken.Previous as Pullenti.Ner.TextToken; if (tt.Term == "АР") { foreach (string ty in ter.Typs) { if (ty.Contains("республика") || ty.Contains("республіка")) { res.BeginToken = tt; break; } } } } return(res); }
static List <SemanticLink> _tryCreateNoun(Pullenti.Ner.Core.NounPhraseToken npt1, Pullenti.Ner.MetaToken slave, Pullenti.Semantic.Utils.DerivateGroup gr) { if (npt1 == null || slave == null) { return(null); } if (slave is Pullenti.Ner.Core.VerbPhraseToken) { return(_tryCreateInf(npt1, slave as Pullenti.Ner.Core.VerbPhraseToken, gr)); } SemanticAbstractSlave sla2 = slave as SemanticAbstractSlave; List <SemanticLink> res = new List <SemanticLink>(); if (sla2 == null) { return(res); } Pullenti.Semantic.Utils.ControlModelItem cit = FindControlItem(npt1, gr); _createRoles(cit, sla2.Preposition, sla2.Morph.Case, res, false, false); if (res.Count == 1 && res[0].Role == SemanticRole.Agent && res[0].Question == Pullenti.Semantic.Utils.ControlModelQuestion.BaseInstrumental) { if (gr.Model.Items.Count > 0 && gr.Model.Items[0].Typ == Pullenti.Semantic.Utils.ControlModelItemType.Verb && gr.Model.Items[0].Links.ContainsKey(Pullenti.Semantic.Utils.ControlModelQuestion.BaseInstrumental)) { res[0].Role = gr.Model.Items[0].Links[Pullenti.Semantic.Utils.ControlModelQuestion.BaseInstrumental]; } } bool ok = false; Pullenti.Semantic.Utils.DerivateWord w = FindWordInGroup(npt1, gr); if (w != null && w.NextWords != null && w.NextWords.Count > 0) { foreach (string n in w.NextWords) { if (sla2.Source != null) { if (sla2.Source.EndToken.IsValue(n, null)) { ok = true; break; } } } } if (gr != null && gr.Model.Pacients.Count > 0) { foreach (string n in gr.Model.Pacients) { if (sla2.Source != null) { if (sla2.Source.EndToken.IsValue(n, null)) { ok = true; break; } } } } if (ok) { if (res.Count == 0) { res.Add(new SemanticLink() { Question = Pullenti.Semantic.Utils.ControlModelQuestion.BaseGenetive, Role = SemanticRole.Pacient, Idiom = true }); } foreach (SemanticLink r in res) { r.Rank += 4; if (r.Role == SemanticRole.Common) { r.Role = SemanticRole.Strong; } if (npt1.EndToken.Next == sla2.BeginToken) { r.Rank += 2; } r.Idiom = true; } } return(res); }
/// <summary> /// Попробовать создать семантическую связь между элементами. /// Элементом м.б. именная (NounPhraseToken) или глагольная группа (VerbPhraseToken). /// </summary> /// <param name="master">основной элемент</param> /// <param name="slave">стыкуемый элемент (также м.б. SemanticAbstractSlave)</param> /// <param name="onto">дополнительный онтологический словарь</param> /// <return>список вариантов (возможно, пустой)</return> public static List <SemanticLink> TryCreateLinks(Pullenti.Ner.MetaToken master, Pullenti.Ner.MetaToken slave, ISemanticOnto onto = null) { List <SemanticLink> res = new List <SemanticLink>(); Pullenti.Ner.Core.VerbPhraseToken vpt1 = master as Pullenti.Ner.Core.VerbPhraseToken; Pullenti.Ner.Core.VerbPhraseToken vpt2 = slave as Pullenti.Ner.Core.VerbPhraseToken; Pullenti.Ner.Core.NounPhraseToken npt1 = master as Pullenti.Ner.Core.NounPhraseToken; if (slave is Pullenti.Ner.Core.NounPhraseToken) { slave = SemanticAbstractSlave.CreateFromNoun(slave as Pullenti.Ner.Core.NounPhraseToken); } SemanticAbstractSlave sla2 = slave as SemanticAbstractSlave; if (vpt2 != null) { if (!vpt2.FirstVerb.IsVerbInfinitive || !vpt2.LastVerb.IsVerbInfinitive) { return(res); } } List <Pullenti.Semantic.Utils.DerivateGroup> grs = FindDerivates(master); if (grs == null || grs.Count == 0) { List <SemanticLink> rl = (vpt1 != null ? _tryCreateVerb(vpt1, slave, null) : _tryCreateNoun(npt1, slave, null)); if (rl != null) { res.AddRange(rl); } } else { foreach (Pullenti.Semantic.Utils.DerivateGroup gr in grs) { List <SemanticLink> rl = (vpt1 != null ? _tryCreateVerb(vpt1, slave, gr) : _tryCreateNoun(npt1, slave, gr)); if (rl == null || rl.Count == 0) { continue; } res.AddRange(rl); } } if ((npt1 != null && sla2 != null && sla2.Morph.Case.IsGenitive) && sla2.Preposition == null) { if (npt1.Noun.BeginToken.GetMorphClassInDictionary().IsPersonalPronoun) { } else { bool hasGen = false; foreach (SemanticLink r in res) { if (r.Question == Pullenti.Semantic.Utils.ControlModelQuestion.BaseGenetive) { hasGen = true; break; } } if (!hasGen) { res.Add(new SemanticLink() { Modelled = true, Master = npt1, Slave = sla2, Rank = 0.5, Question = Pullenti.Semantic.Utils.ControlModelQuestion.BaseGenetive }); } } } if (onto != null) { string str1 = GetKeyword(master); string str2 = GetKeyword(slave); if (str2 != null) { if (onto.CheckLink(str1, str2)) { if (res.Count > 0) { foreach (SemanticLink r in res) { r.Rank += 3; if (r.Role == SemanticRole.Common) { r.Role = SemanticRole.Strong; } } } else { res.Add(new SemanticLink() { Role = SemanticRole.Strong, Master = master, Slave = slave, Rank = 3 }); } } } } if (npt1 != null) { if (((npt1.Adjectives.Count > 0 && npt1.Adjectives[0].BeginToken.Morph.Class.IsPronoun)) || npt1.Anafor != null) { foreach (SemanticLink r in res) { if (r.Question == Pullenti.Semantic.Utils.ControlModelQuestion.BaseGenetive) { r.Rank -= 0.5; if (r.Role == SemanticRole.Strong) { r.Role = SemanticRole.Common; } } } } } foreach (SemanticLink r in res) { if (r.Role == SemanticRole.Strong) { foreach (SemanticLink rr in res) { if (rr != r && rr.Role != SemanticRole.Strong) { rr.Rank /= 2; } } } } for (int i = 0; i < res.Count; i++) { for (int j = 0; j < (res.Count - 1); j++) { if (res[j].CompareTo(res[j + 1]) > 0) { SemanticLink r = res[j]; res[j] = res[j + 1]; res[j + 1] = r; } } } foreach (SemanticLink r in res) { r.Master = master; r.Slave = slave; } return(res); }