// Проверить, что все элементы находятся на более низком уровне, чем у аргумента internal bool IsAllItemsLessLevel(Pullenti.Ner.Referent upperParts, bool ignoreEquals) { if (upperParts is DecreeReferent) { return(true); } foreach (Pullenti.Ner.Slot s in Slots) { int l = this._getLevel(s.TypeName); if (l < 0) { continue; } if (upperParts.FindSlot(s.TypeName, null, true) != null) { if (upperParts.FindSlot(s.TypeName, s.Value, true) == null) { return(false); } continue; } foreach (Pullenti.Ner.Slot ss in upperParts.Slots) { int ll = this._getLevel(ss.TypeName); if (ll >= l) { return(false); } } } return(true); }
// Проверка, что этот референт может выступать в качестве ATTR_REF public bool CanHasRef(Pullenti.Ner.Referent r) { string nam = Name; if (nam == null || r == null) { return(false); } if (r is Pullenti.Ner.Geo.GeoReferent) { Pullenti.Ner.Geo.GeoReferent g = r as Pullenti.Ner.Geo.GeoReferent; if (Pullenti.Morph.LanguageHelper.EndsWithEx(nam, "президент", "губернатор", null, null)) { return(g.IsState || g.IsRegion); } if (nam == "мэр" || nam == "градоначальник") { return(g.IsCity); } if (nam == "глава") { return(true); } return(false); } if (r.TypeName == "ORGANIZATION") { if ((Pullenti.Morph.LanguageHelper.EndsWith(nam, "губернатор") || nam == "мэр" || nam == "градоначальник") || nam == "президент") { return(false); } if (nam.Contains("министр")) { if (r.FindSlot(null, "министерство", true) == null) { return(false); } } if (nam.EndsWith("директор")) { if ((r.FindSlot(null, "суд", true)) != null) { return(false); } } return(true); } return(false); }
public static OrgItemEponymToken TryAttach(Pullenti.Ner.Token t, bool mustHasPrefix = false) { Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; if (tt == null) { if (t == null) { return(null); } Pullenti.Ner.Referent r1 = t.GetReferent(); if (r1 != null && r1.TypeName == "DATE") { string str = r1.ToString().ToUpper(); if ((str == "1 МАЯ" || str == "7 ОКТЯБРЯ" || str == "9 МАЯ") || str == "8 МАРТА") { OrgItemEponymToken dt = new OrgItemEponymToken(t, t) { Eponyms = new List <string>() }; dt.Eponyms.Add(str); return(dt); } } Pullenti.Ner.NumberToken age = Pullenti.Ner.Core.NumberHelper.TryParseAge(t); if ((age != null && (((age.EndToken.Next is Pullenti.Ner.TextToken) || (age.EndToken.Next is Pullenti.Ner.ReferentToken))) && (age.WhitespacesAfterCount < 3)) && !age.EndToken.Next.Chars.IsAllLower && age.EndToken.Next.Chars.IsCyrillicLetter) { OrgItemEponymToken dt = new OrgItemEponymToken(t, age.EndToken.Next) { Eponyms = new List <string>() }; dt.Eponyms.Add(string.Format("{0} {1}", age.Value, dt.EndToken.GetSourceText().ToUpper())); return(dt); } return(null); } Pullenti.Ner.Token t1 = null; bool full = false; bool hasName = false; if (tt.Term == "ИМЕНИ" || tt.Term == "ІМЕНІ") { t1 = t.Next; full = true; hasName = true; } else if (((tt.Term == "ИМ" || tt.Term == "ІМ")) && tt.Next != null) { if (tt.Next.IsChar('.')) { t1 = tt.Next.Next; full = true; } else if ((tt.Next is Pullenti.Ner.TextToken) && tt.Chars.IsAllLower && !tt.Next.Chars.IsAllLower) { t1 = tt.Next; } hasName = true; } else if (tt.Previous != null && ((tt.Previous.IsValue("ФОНД", null) || tt.Previous.IsValue("ХРАМ", null) || tt.Previous.IsValue("ЦЕРКОВЬ", "ЦЕРКВА")))) { if ((!tt.Chars.IsCyrillicLetter || tt.Morph.Class.IsPreposition || tt.Morph.Class.IsConjunction) || !tt.Chars.IsLetter) { return(null); } if (tt.WhitespacesBeforeCount != 1) { return(null); } if (tt.Chars.IsAllLower) { return(null); } if (tt.Morph.Class.IsAdjective) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && npt.BeginToken != npt.EndToken) { return(null); } } OrgItemNameToken na = OrgItemNameToken.TryAttach(tt, null, false, true); if (na != null) { if (na.IsEmptyWord || na.IsStdName || na.IsStdTail) { return(null); } } t1 = tt; } if (t1 == null || ((t1.IsNewlineBefore && !full))) { return(null); } if (tt.Previous != null && tt.Previous.Morph.Class.IsPreposition) { return(null); } if (mustHasPrefix && !hasName) { return(null); } Pullenti.Ner.Referent r = t1.GetReferent(); if ((r != null && r.TypeName == "DATE" && full) && r.FindSlot("DAY", null, true) != null && r.FindSlot("YEAR", null, true) == null) { OrgItemEponymToken dt = new OrgItemEponymToken(t, t1) { Eponyms = new List <string>() }; dt.Eponyms.Add(r.ToString().ToUpper()); return(dt); } bool holy = false; if ((t1.IsValue("СВЯТОЙ", null) || t1.IsValue("СВЯТИЙ", null) || t1.IsValue("СВ", null)) || t1.IsValue("СВЯТ", null)) { t1 = t1.Next; holy = true; if (t1 != null && t1.IsChar('.')) { t1 = t1.Next; } } if (t1 == null) { return(null); } Pullenti.Morph.MorphClass cl = t1.GetMorphClassInDictionary(); if (cl.IsNoun || cl.IsAdjective) { Pullenti.Ner.ReferentToken rt = t1.Kit.ProcessReferent("PERSON", t1); if (rt != null && rt.Referent.TypeName == "PERSON" && rt.BeginToken != rt.EndToken) { string e = rt.Referent.GetStringValue("LASTNAME"); if (e != null) { if (rt.EndToken.IsValue(e, null)) { OrgItemEponymToken re = new OrgItemEponymToken(t, rt.EndToken); re.Eponyms.Add(rt.EndToken.GetSourceText()); return(re); } } } } Pullenti.Ner.NumberToken nt = Pullenti.Ner.Core.NumberHelper.TryParseAnniversary(t1); if (nt != null && nt.Typ == Pullenti.Ner.NumberSpellingType.Age) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(nt.EndToken.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { string s = string.Format("{0}-{1} {2}", nt.Value, (t.Kit.BaseLanguage.IsUa ? "РОКІВ" : "ЛЕТ"), Pullenti.Ner.Core.MiscHelper.GetTextValue(npt.BeginToken, npt.EndToken, Pullenti.Ner.Core.GetTextAttr.No)); OrgItemEponymToken res = new OrgItemEponymToken(t, npt.EndToken); res.Eponyms.Add(s); return(res); } } List <PersonItemToken> its = PersonItemToken.TryAttach(t1); if (its == null) { if ((t1 is Pullenti.Ner.ReferentToken) && (t1.GetReferent() is Pullenti.Ner.Geo.GeoReferent)) { string s = Pullenti.Ner.Core.MiscHelper.GetTextValue(t1, t1, Pullenti.Ner.Core.GetTextAttr.No); OrgItemEponymToken re = new OrgItemEponymToken(t, t1); re.Eponyms.Add(s); return(re); } return(null); } List <string> eponims = new List <string>(); int i = 0; int j; if (its[i].Typ == PersonItemType.LocaseWord) { i++; } if (i >= its.Count) { return(null); } if (!full) { if (its[i].BeginToken.Morph.Class.IsAdjective && !its[i].BeginToken.Morph.Class.IsProperSurname) { return(null); } } if (its[i].Typ == PersonItemType.Initial) { i++; while (true) { if ((i < its.Count) && its[i].Typ == PersonItemType.Initial) { i++; } if (i >= its.Count || ((its[i].Typ != PersonItemType.Surname && its[i].Typ != PersonItemType.Name))) { break; } eponims.Add(its[i].Value); t1 = its[i].EndToken; if ((i + 2) >= its.Count || its[i + 1].Typ != PersonItemType.And || its[i + 2].Typ != PersonItemType.Initial) { break; } i += 3; } } else if (((i + 1) < its.Count) && its[i].Typ == PersonItemType.Name && its[i + 1].Typ == PersonItemType.Surname) { eponims.Add(its[i + 1].Value); t1 = its[i + 1].EndToken; i += 2; if ((((i + 2) < its.Count) && its[i].Typ == PersonItemType.And && its[i + 1].Typ == PersonItemType.Name) && its[i + 2].Typ == PersonItemType.Surname) { eponims.Add(its[i + 2].Value); t1 = its[i + 2].EndToken; } } else if (its[i].Typ == PersonItemType.Surname) { if (its.Count == (i + 2) && its[i].Chars == its[i + 1].Chars) { its[i].Value += (" " + its[i + 1].Value); its[i].EndToken = its[i + 1].EndToken; its.RemoveAt(i + 1); } eponims.Add(its[i].Value); if (((i + 1) < its.Count) && its[i + 1].Typ == PersonItemType.Name) { if ((i + 2) == its.Count) { i++; } else if (its[i + 2].Typ != PersonItemType.Surname) { i++; } } else if (((i + 1) < its.Count) && its[i + 1].Typ == PersonItemType.Initial) { if ((i + 2) == its.Count) { i++; } else if (its[i + 2].Typ == PersonItemType.Initial && (i + 3) == its.Count) { i += 2; } } else if (((i + 2) < its.Count) && its[i + 1].Typ == PersonItemType.And && its[i + 2].Typ == PersonItemType.Surname) { bool ok = true; Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(its[i + 2].BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && !npt.Morph.Case.IsGenitive && !npt.Morph.Case.IsUndefined) { ok = false; } if (ok) { eponims.Add(its[i + 2].Value); i += 2; } } t1 = its[i].EndToken; } else if (its[i].Typ == PersonItemType.Name && holy) { t1 = its[i].EndToken; bool sec = false; if (((i + 1) < its.Count) && its[i].Chars == its[i + 1].Chars && its[i + 1].Typ != PersonItemType.Initial) { sec = true; t1 = its[i + 1].EndToken; } if (sec) { eponims.Add(string.Format("СВЯТ.{0} {1}", its[i].Value, its[i + 1].Value)); } else { eponims.Add(string.Format("СВЯТ.{0}", its[i].Value)); } } else if (full && (i + 1) == its.Count && ((its[i].Typ == PersonItemType.Name || its[i].Typ == PersonItemType.Surname))) { t1 = its[i].EndToken; eponims.Add(its[i].Value); } else if ((its[i].Typ == PersonItemType.Name && its.Count == 3 && its[i + 1].Typ == PersonItemType.Name) && its[i + 2].Typ == PersonItemType.Surname) { t1 = its[i + 2].EndToken; eponims.Add(string.Format("{0} {1} {2}", its[i].Value, its[i + 1].Value, its[i + 2].Value)); i += 2; } if (eponims.Count == 0) { return(null); } return(new OrgItemEponymToken(t, t1) { Eponyms = eponims }); }
internal void MergeSlots2(Pullenti.Ner.Referent obj, Pullenti.Morph.MorphLang lang) { bool mergeStatistic = true; foreach (Pullenti.Ner.Slot s in obj.Slots) { if (s.TypeName == ATTR_NAME || s.TypeName == ATTR_TYPE) { string nam = (string)s.Value; if (Pullenti.Morph.LanguageHelper.IsLatinChar(nam[0])) { if (!lang.IsEn) { continue; } } else if (lang.IsEn) { continue; } if (Pullenti.Morph.LanguageHelper.EndsWith(nam, " ССР")) { continue; } } this.AddSlot(s.TypeName, s.Value, false, (mergeStatistic ? s.Count : 0)); } if (this.FindSlot(ATTR_NAME, null, true) == null && obj.FindSlot(ATTR_NAME, null, true) != null) { foreach (Pullenti.Ner.Slot s in obj.Slots) { if (s.TypeName == ATTR_NAME) { this.AddSlot(s.TypeName, s.Value, false, (mergeStatistic ? s.Count : 0)); } } } if (this.FindSlot(ATTR_TYPE, null, true) == null && obj.FindSlot(ATTR_TYPE, null, true) != null) { foreach (Pullenti.Ner.Slot s in obj.Slots) { if (s.TypeName == ATTR_TYPE) { this.AddSlot(s.TypeName, s.Value, false, (mergeStatistic ? s.Count : 0)); } } } if (IsTerritory) { if (((Alpha2 != null || this.FindSlot(ATTR_TYPE, "государство", true) != null || this.FindSlot(ATTR_TYPE, "держава", true) != null) || this.FindSlot(ATTR_TYPE, "империя", true) != null || this.FindSlot(ATTR_TYPE, "імперія", true) != null) || this.FindSlot(ATTR_TYPE, "state", true) != null) { Pullenti.Ner.Slot s = this.FindSlot(ATTR_TYPE, "территория", true); if (s != null) { Slots.Remove(s); } } } if (IsState) { foreach (Pullenti.Ner.Slot s in Slots) { if (s.TypeName == ATTR_TYPE && ((s.Value.ToString() == "регион" || s.Value.ToString() == "регіон" || s.Value.ToString() == "region"))) { Slots.Remove(s); break; } } } if (IsCity) { Pullenti.Ner.Slot s = this.FindSlot(ATTR_TYPE, "город", true) ?? this.FindSlot(ATTR_TYPE, "місто", true) ?? this.FindSlot(ATTR_TYPE, "city", true); if (s != null) { foreach (Pullenti.Ner.Slot ss in Slots) { if (ss.TypeName == ATTR_TYPE && ss != s && _isCity((string)ss.Value)) { Slots.Remove(s); break; } } } } bool has = false; for (int i = 0; i < Slots.Count; i++) { if (Slots[i].TypeName == ATTR_HIGHER) { if (!has) { has = true; } else { Slots.RemoveAt(i); i--; } } } this._mergeExtReferents(obj); }
public override bool CanBeEquals(Pullenti.Ner.Referent obj, Pullenti.Ner.Core.ReferentsEqualType typ) { GeoReferent geo = obj as GeoReferent; if (geo == null) { return(false); } if (geo.Alpha2 != null && geo.Alpha2 == Alpha2) { return(true); } if (IsCity != geo.IsCity) { return(false); } if (IsUnion != geo.IsUnion) { return(false); } if (IsUnion) { foreach (Pullenti.Ner.Slot s in Slots) { if (s.TypeName == ATTR_REF) { if (obj.FindSlot(ATTR_REF, s.Value, true) == null) { return(false); } } } foreach (Pullenti.Ner.Slot s in obj.Slots) { if (s.TypeName == ATTR_REF) { if (this.FindSlot(ATTR_REF, s.Value, true) == null) { return(false); } } } return(true); } Pullenti.Ner.Referent ref1 = this.GetSlotValue(ATTR_REF) as Pullenti.Ner.Referent; Pullenti.Ner.Referent ref2 = geo.GetSlotValue(ATTR_REF) as Pullenti.Ner.Referent; if (ref1 != null && ref2 != null) { if (ref1 != ref2) { return(false); } } bool r = IsRegion || IsState; bool r1 = geo.IsRegion || geo.IsState; if (r != r1) { if (IsTerritory != geo.IsTerritory) { return(false); } return(false); } bool eqNames = false; foreach (Pullenti.Ner.Slot s in Slots) { if (s.TypeName == ATTR_NAME) { if (geo.FindSlot(s.TypeName, s.Value, true) != null) { eqNames = true; break; } } } if (!eqNames) { return(false); } if (IsRegion && geo.IsRegion) { List <string> typs1 = Typs; List <string> typs2 = geo.Typs; bool ok = false; foreach (string t in typs1) { if (typs2.Contains(t)) { ok = true; } else { foreach (string tt in typs2) { if (Pullenti.Morph.LanguageHelper.EndsWith(tt, t) || Pullenti.Morph.LanguageHelper.EndsWith(t, tt)) { ok = true; } } } } if (!ok) { return(false); } } if (Higher != null && geo.Higher != null) { if (_checkRoundDep(this) || _checkRoundDep(geo)) { return(false); } if (Higher.CanBeEquals(geo.Higher, typ)) { } else if (geo.Higher.Higher != null && Higher.CanBeEquals(geo.Higher.Higher, typ)) { } else if (Higher.Higher != null && Higher.Higher.CanBeEquals(geo.Higher, typ)) { } else { return(false); } } return(true); }
/// <summary> /// Зарегистрировать новую сущность или привязать к существующей сущности. Сущности, получаемые в ходе анализа, /// должны сохраняться через эту функцию. Именно здесь решается задача кореференции, то есть объединения /// сущностей, соответствующих одному и тому же объекту текста. /// </summary> /// <param name="referent">сохраняемая сущность</param> /// <return>этот же экземпляр referent или другой, если удалось объединиться с ранее выделенной сущностью</return> public virtual Pullenti.Ner.Referent RegisterReferent(Pullenti.Ner.Referent referent) { if (referent == null) { return(null); } if (referent.m_ExtReferents != null) { if (m_RegRefLevel > 2) { } else { foreach (Pullenti.Ner.ReferentToken rt in referent.m_ExtReferents) { Pullenti.Ner.Referent oldRef = rt.Referent; m_RegRefLevel++; rt.SaveToLocalOntology(); m_RegRefLevel--; if (oldRef == rt.Referent || rt.Referent == null) { continue; } foreach (Pullenti.Ner.Slot s in referent.Slots) { if (s.Value == oldRef) { referent.UploadSlot(s, rt.Referent); } } if (referent.m_ExtReferents != null) { foreach (Pullenti.Ner.ReferentToken rtt in referent.m_ExtReferents) { foreach (Pullenti.Ner.Slot s in rtt.Referent.Slots) { if (s.Value == oldRef) { referent.UploadSlot(s, rt.Referent); } } } } } referent.m_ExtReferents = null; } } List <Pullenti.Ner.Referent> eq = null; if (m_Referents.Contains(referent)) { return(referent); } for (int i = m_Referents.Count - 1; i >= 0 && ((m_Referents.Count - i) < 1000); i--) { Pullenti.Ner.Referent p = m_Referents[i]; if (p.CanBeEquals(referent, ReferentsEqualType.WithinOneText)) { if (!p.CanBeGeneralFor(referent) && !referent.CanBeGeneralFor(p)) { if (eq == null) { eq = new List <Pullenti.Ner.Referent>(); } eq.Add(p); } } } if (eq != null) { if (eq.Count == 1) { eq[0].MergeSlots(referent, true); return(eq[0]); } if (eq.Count > 1) { foreach (Pullenti.Ner.Referent e in eq) { if (e.Slots.Count != referent.Slots.Count) { continue; } bool ok = true; foreach (Pullenti.Ner.Slot s in referent.Slots) { if (e.FindSlot(s.TypeName, s.Value, true) == null) { ok = false; break; } } if (ok) { foreach (Pullenti.Ner.Slot s in e.Slots) { if (referent.FindSlot(s.TypeName, s.Value, true) == null) { ok = false; break; } } } if (ok) { return(e); } } } } m_Referents.Add(referent); return(referent); }