public override string GetImageId(Pullenti.Ner.Referent obj = null) { Pullenti.Ner.Geo.GeoReferent ter = obj as Pullenti.Ner.Geo.GeoReferent; if (ter != null) { if (ter.IsUnion) { return(UnionImageId); } if (ter.IsCity && ((ter.IsState || ter.IsRegion))) { return(CountryCityImageId); } if (ter.IsState) { return(CountryImageId); } if (ter.IsCity) { return(CityImageId); } if (ter.IsRegion && ter.Higher != null && ter.Higher.IsCity) { return(DistrictImageId); } if (ter.IsTerritory) { return(TerrImageId); } } return(RegionImageId); }
static bool canBeRef(NamedEntityKind ki, Pullenti.Ner.Referent re) { if (re == null) { return(false); } if (ki == NamedEntityKind.Monument) { if (re.TypeName == "PERSON" || re.TypeName == "PERSONPROPERTY") { return(true); } } else if (ki == NamedEntityKind.Location) { if (re is Pullenti.Ner.Geo.GeoReferent) { Pullenti.Ner.Geo.GeoReferent geo = re as Pullenti.Ner.Geo.GeoReferent; if (geo.IsRegion || geo.IsState) { return(true); } } } else if (ki == NamedEntityKind.Building) { if (re.TypeName == "ORGANIZATION") { return(true); } } return(false); }
static Pullenti.Ner.ReferentToken Try4(List <CityItemToken> li) { if ((li.Count > 0 && li[0].Typ == CityItemToken.ItemType.Noun && ((li[0].Value != "ГОРОД" && li[0].Value != "МІСТО" && li[0].Value != "CITY"))) && ((!li[0].Doubtful || li[0].GeoObjectBefore))) { if (li.Count > 1 && li[1].OrgRef != null) { Pullenti.Ner.Geo.GeoReferent geo = new Pullenti.Ner.Geo.GeoReferent(); geo.AddTyp(li[0].Value); geo.AddOrgReferent(li[1].OrgRef.Referent); geo.AddExtReferent(li[1].OrgRef); return(new Pullenti.Ner.ReferentToken(geo, li[0].BeginToken, li[1].EndToken)); } else { Pullenti.Ner.Address.Internal.AddressItemToken aid = Pullenti.Ner.Address.Internal.AddressItemToken.TryAttachOrg(li[0].EndToken.Next); if (aid != null) { Pullenti.Ner.Geo.GeoReferent geo = new Pullenti.Ner.Geo.GeoReferent(); geo.AddTyp(li[0].Value); geo.AddOrgReferent(aid.Referent); geo.AddExtReferent(aid.RefToken); return(new Pullenti.Ner.ReferentToken(geo, li[0].BeginToken, aid.EndToken)); } } } return(null); }
static bool _isHigher(Pullenti.Ner.Geo.GeoReferent gHi, Pullenti.Ner.Geo.GeoReferent gLo) { int i = 0; for (; gLo != null && (i < 10); gLo = gLo.Higher, i++) { if (gLo.CanBeEquals(gHi, Pullenti.Ner.Core.ReferentsEqualType.WithinOneText)) { return(true); } } return(false); }
static string _getTypesString(Pullenti.Ner.Geo.GeoReferent g) { StringBuilder tmp = new StringBuilder(); foreach (Pullenti.Ner.Slot s in g.Slots) { if (s.TypeName == Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE) { tmp.AppendFormat("{0};", s.Value); } } return(tmp.ToString()); }
public override bool CanBeGeneralFor(Pullenti.Ner.Referent obj) { if (!this._CanBeEquals(obj, Pullenti.Ner.Core.ReferentsEqualType.WithinOneText, true)) { return(false); } Pullenti.Ner.Geo.GeoReferent g1 = this.GetSlotValue(ATTR_GEO) as Pullenti.Ner.Geo.GeoReferent; Pullenti.Ner.Geo.GeoReferent g2 = obj.GetSlotValue(ATTR_GEO) as Pullenti.Ner.Geo.GeoReferent; if (g1 == null && g2 != null) { return(true); } return(false); }
// Проверка, что этот референт может выступать в качестве ATTR_REF public bool CanHasRef(Pullenti.Ner.Referent r) { string nam = Name; if (nam == null || r == null) { return(false); } if (r is Pullenti.Ner.Geo.GeoReferent) { Pullenti.Ner.Geo.GeoReferent g = r as Pullenti.Ner.Geo.GeoReferent; if (Pullenti.Morph.LanguageHelper.EndsWithEx(nam, "президент", "губернатор", null, null)) { return(g.IsState || g.IsRegion); } if (nam == "мэр" || nam == "градоначальник") { return(g.IsCity); } if (nam == "глава") { return(true); } return(false); } if (r.TypeName == "ORGANIZATION") { if ((Pullenti.Morph.LanguageHelper.EndsWith(nam, "губернатор") || nam == "мэр" || nam == "градоначальник") || nam == "президент") { return(false); } if (nam.Contains("министр")) { if (r.FindSlot(null, "министерство", true) == null) { return(false); } } if (nam.EndsWith("директор")) { if ((r.FindSlot(null, "суд", true)) != null) { return(false); } } return(true); } return(false); }
public static Pullenti.Ner.Geo.GeoReferent GetGeoReferentByName(string name) { Pullenti.Ner.Geo.GeoReferent res = null; if (m_GeoRefByName.TryGetValue(name, out res)) { return(res); } foreach (Pullenti.Ner.Referent r in TerrItemToken.m_AllStates) { if (r.FindSlot(null, name, true) != null) { res = r as Pullenti.Ner.Geo.GeoReferent; break; } } m_GeoRefByName.Add(name, res); return(res); }
static Pullenti.Ner.ReferentToken _tryAttachMoscowAO(List <TerrItemToken> li, Pullenti.Ner.Core.AnalyzerData ad) { if (li[0].TerminItem == null || !li[0].TerminItem.IsMoscowRegion) { return(null); } if (li[0].IsDoubt) { bool ok = false; if (CityAttachHelper.CheckCityAfter(li[0].EndToken.Next)) { ok = true; } else { List <Pullenti.Ner.Address.Internal.AddressItemToken> ali = Pullenti.Ner.Address.Internal.AddressItemToken.TryParseList(li[0].EndToken.Next, null, 2); if (ali != null && ali.Count > 0 && ali[0].Typ == Pullenti.Ner.Address.Internal.AddressItemToken.ItemType.Street) { ok = true; } } if (!ok) { return(null); } } Pullenti.Ner.Geo.GeoReferent reg = new Pullenti.Ner.Geo.GeoReferent(); string typ = "АДМИНИСТРАТИВНЫЙ ОКРУГ"; reg.AddTyp(typ); string name = li[0].TerminItem.CanonicText; if (Pullenti.Morph.LanguageHelper.EndsWith(name, typ)) { name = name.Substring(0, name.Length - typ.Length - 1).Trim(); } reg.AddName(name); return(new Pullenti.Ner.ReferentToken(reg, li[0].BeginToken, li[0].EndToken)); }
public void AddReferent(Pullenti.Ner.Referent r) { if (r == null) { return; } Pullenti.Ner.Geo.GeoReferent geo = r as Pullenti.Ner.Geo.GeoReferent; if (geo != null) { foreach (Pullenti.Ner.Slot s in Slots) { if (s.TypeName == ATTR_GEO) { Pullenti.Ner.Geo.GeoReferent geo0 = s.Value as Pullenti.Ner.Geo.GeoReferent; if (geo0 == null) { continue; } if (Pullenti.Ner.Geo.Internal.GeoOwnerHelper.CanBeHigher(geo0, geo)) { if (geo.Higher == geo0 || geo.IsCity) { this.UploadSlot(s, geo); return; } } if (Pullenti.Ner.Geo.Internal.GeoOwnerHelper.CanBeHigher(geo, geo0)) { return; } } } this.AddSlot(ATTR_GEO, r, false, 0); } else if ((r is StreetReferent) || r.TypeName == "ORGANIZATION") { this.AddSlot(ATTR_STREET, r, false, 0); } }
public static Pullenti.Ner.ReferentToken TryAttachStateUSATerritory(Pullenti.Ner.Token t) { if (t == null || !t.Chars.IsLatinLetter) { return(null); } Pullenti.Ner.Core.TerminToken tok = TerrItemToken.m_GeoAbbrs.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok == null) { return(null); } Pullenti.Ner.Geo.GeoReferent g = tok.Termin.Tag as Pullenti.Ner.Geo.GeoReferent; if (g == null) { return(null); } if (tok.EndToken.Next != null && tok.EndToken.Next.IsChar('.')) { tok.EndToken = tok.EndToken.Next; } Pullenti.Ner.Referent gg = g.Clone(); gg.Occurrence.Clear(); return(new Pullenti.Ner.ReferentToken(gg, tok.BeginToken, tok.EndToken)); }
static Pullenti.Ner.ReferentToken Try1(List <CityItemToken> li, out Pullenti.Ner.Core.IntOntologyItem oi, Pullenti.Ner.Core.AnalyzerDataWithOntology ad) { oi = null; if (li == null || (li.Count < 1)) { return(null); } else if (li[0].Typ != CityItemToken.ItemType.City) { if (li.Count != 2 || li[0].Typ != CityItemToken.ItemType.ProperName || li[1].Typ != CityItemToken.ItemType.Noun) { return(null); } } int i = 1; oi = li[0].OntoItem; bool ok = !li[0].Doubtful; if ((ok && li[0].OntoItem != null && li[0].OntoItem.MiscAttr == null) && ad != null) { if (li[0].OntoItem.Owner != ad.LocalOntology && !li[0].OntoItem.Owner.IsExtOntology) { if (li[0].BeginToken.Previous != null && li[0].BeginToken.Previous.IsValue("В", null)) { } else { ok = false; } } } if (li.Count == 1 && li[0].BeginToken.Morph.Class.IsAdjective) { List <Pullenti.Ner.Address.Internal.StreetItemToken> sits = Pullenti.Ner.Address.Internal.StreetItemToken.TryParseList(li[0].BeginToken, null, 3); if (sits != null && sits.Count == 2 && sits[1].Typ == Pullenti.Ner.Address.Internal.StreetItemType.Noun) { return(null); } } string typ = null; string alttyp = null; Pullenti.Ner.MorphCollection mc = li[0].Morph; if (i < li.Count) { if (li[i].Typ == CityItemToken.ItemType.Noun) { Pullenti.Ner.Address.Internal.AddressItemToken at = null; if (!li[i].Chars.IsAllLower && (li[i].WhitespacesAfterCount < 2)) { Pullenti.Ner.Address.Internal.StreetItemToken sit = Pullenti.Ner.Address.Internal.StreetItemToken.TryParse(li[i].EndToken.Next, null, false, null, false); if (sit != null && sit.Typ == Pullenti.Ner.Address.Internal.StreetItemType.Noun) { at = Pullenti.Ner.Address.Internal.AddressItemToken.TryParse(li[i].BeginToken, null, false, false, null); if (at != null) { Pullenti.Ner.Address.Internal.AddressItemToken at2 = Pullenti.Ner.Address.Internal.AddressItemToken.TryParse(li[i].EndToken.Next, null, false, false, null); if (at2 != null && at2.Typ == Pullenti.Ner.Address.Internal.AddressItemToken.ItemType.Street) { at = null; } } } } if (at == null) { typ = li[i].Value; alttyp = li[i].AltValue; if (li[i].BeginToken.IsValue("СТ", null) && li[i].BeginToken.Chars.IsAllUpper) { return(null); } if ((i + 1) == li.Count) { ok = true; if (!li[i].Morph.Case.IsUndefined) { mc = li[i].Morph; } i++; } else if (ok) { i++; } else { Pullenti.Ner.Token tt0 = li[0].BeginToken.Previous; if ((tt0 is Pullenti.Ner.TextToken) && (tt0.WhitespacesAfterCount < 3)) { if (tt0.IsValue("МЭР", "МЕР") || tt0.IsValue("ГЛАВА", null) || tt0.IsValue("ГРАДОНАЧАЛЬНИК", null)) { ok = true; i++; } } } } } } if (!ok && oi != null && (oi.CanonicText.Length < 4)) { return(null); } if (!ok && li[0].BeginToken.Morph.Class.IsProperName) { return(null); } if (!ok) { if (!Pullenti.Ner.Core.MiscHelper.IsExistsInDictionary(li[0].BeginToken, li[0].EndToken, Pullenti.Morph.MorphClass.Adjective | Pullenti.Morph.MorphClass.Noun | Pullenti.Morph.MorphClass.Pronoun)) { ok = li[0].GeoObjectBefore || li[i - 1].GeoObjectAfter; if (ok && li[0].BeginToken == li[0].EndToken) { Pullenti.Morph.MorphClass mcc = li[0].BeginToken.GetMorphClassInDictionary(); if (mcc.IsProperName || mcc.IsProperSurname) { ok = false; } else if (li[0].GeoObjectBefore && (li[0].WhitespacesAfterCount < 2)) { Pullenti.Ner.Address.Internal.AddressItemToken ad1 = Pullenti.Ner.Address.Internal.AddressItemToken.TryParse(li[0].BeginToken, null, false, false, null); if (ad1 != null && ad1.Typ == Pullenti.Ner.Address.Internal.AddressItemToken.ItemType.Street) { Pullenti.Ner.Address.Internal.AddressItemToken ad2 = Pullenti.Ner.Address.Internal.AddressItemToken.TryParse(li[0].EndToken.Next, null, false, false, null); if (ad2 == null || ad2.Typ != Pullenti.Ner.Address.Internal.AddressItemToken.ItemType.Street) { ok = false; } } else if (Pullenti.Ner.Address.Internal.AddressItemToken.TryAttachOrg(li[0].BeginToken) != null) { ok = false; } } } } if (ok) { if (li[0].Kit.ProcessReferent("PERSON", li[0].BeginToken) != null) { ok = false; } } } if (!ok) { ok = CheckYearAfter(li[0].EndToken.Next); } if (!ok && ((!li[0].BeginToken.Morph.Class.IsAdjective || li[0].BeginToken != li[0].EndToken))) { ok = CheckCityAfter(li[0].EndToken.Next); } if (!ok) { return(null); } if (i < li.Count) { li.RemoveRange(i, li.Count - i); } Pullenti.Ner.ReferentToken rt = null; if (oi == null) { if (li[0].Value != null && li[0].HigherGeo != null) { Pullenti.Ner.Geo.GeoReferent cap = new Pullenti.Ner.Geo.GeoReferent(); cap.AddName(li[0].Value); cap.AddTypCity(li[0].Kit.BaseLanguage); cap.Higher = li[0].HigherGeo; if (typ != null) { cap.AddTyp(typ); } if (alttyp != null) { cap.AddTyp(alttyp); } rt = new Pullenti.Ner.ReferentToken(cap, li[0].BeginToken, li[0].EndToken); } else { if (li[0].Value == null) { return(null); } if (typ == null) { if ((li.Count == 1 && li[0].BeginToken.Previous != null && li[0].BeginToken.Previous.IsHiphen) && (li[0].BeginToken.Previous.Previous is Pullenti.Ner.ReferentToken) && (li[0].BeginToken.Previous.Previous.GetReferent() is Pullenti.Ner.Geo.GeoReferent)) { } else { return(null); } } else { if (!Pullenti.Morph.LanguageHelper.EndsWithEx(typ, "ПУНКТ", "ПОСЕЛЕНИЕ", "ПОСЕЛЕННЯ", "ПОСЕЛОК")) { if (!Pullenti.Morph.LanguageHelper.EndsWith(typ, "CITY")) { if (typ == "СТАНЦИЯ" && (MiscLocationHelper.CheckGeoObjectBefore(li[0].BeginToken))) { } else if (li.Count > 1 && li[1].Typ == CityItemToken.ItemType.Noun && li[0].Typ == CityItemToken.ItemType.City) { } else if ((li.Count == 2 && li[1].Typ == CityItemToken.ItemType.Noun && li[0].Typ == CityItemToken.ItemType.ProperName) && ((li[0].GeoObjectBefore || li[1].GeoObjectAfter))) { } else { return(null); } } } if (li[0].BeginToken.Morph.Class.IsAdjective) { li[0].Value = Pullenti.Ner.Core.ProperNameHelper.GetNameEx(li[0].BeginToken, li[0].EndToken, Pullenti.Morph.MorphClass.Adjective, li[1].Morph.Case, li[1].Morph.Gender, false, false); } } } } else if (oi.Referent is Pullenti.Ner.Geo.GeoReferent) { Pullenti.Ner.Geo.GeoReferent city = oi.Referent.Clone() as Pullenti.Ner.Geo.GeoReferent; city.Occurrence.Clear(); rt = new Pullenti.Ner.ReferentToken(city, li[0].BeginToken, li[li.Count - 1].EndToken) { Morph = mc }; } else if (typ == null) { typ = oi.Typ; } if (rt == null) { Pullenti.Ner.Geo.GeoReferent city = new Pullenti.Ner.Geo.GeoReferent(); city.AddName((oi == null ? li[0].Value : oi.CanonicText)); if (typ != null) { city.AddTyp(typ); } else { city.AddTypCity(li[0].Kit.BaseLanguage); } if (alttyp != null) { city.AddTyp(alttyp); } rt = new Pullenti.Ner.ReferentToken(city, li[0].BeginToken, li[li.Count - 1].EndToken) { Morph = mc }; } if ((rt.Referent is Pullenti.Ner.Geo.GeoReferent) && li.Count == 1 && (rt.Referent as Pullenti.Ner.Geo.GeoReferent).IsCity) { if (rt.BeginToken.Previous != null && rt.BeginToken.Previous.IsValue("Г", null)) { rt.BeginToken = rt.BeginToken.Previous; } else if ((rt.BeginToken.Previous != null && rt.BeginToken.Previous.IsChar('.') && rt.BeginToken.Previous.Previous != null) && rt.BeginToken.Previous.Previous.IsValue("Г", null)) { rt.BeginToken = rt.BeginToken.Previous.Previous; } else if (rt.EndToken.Next != null && (rt.WhitespacesAfterCount < 2) && rt.EndToken.Next.IsValue("Г", null)) { rt.EndToken = rt.EndToken.Next; if (rt.EndToken.Next != null && rt.EndToken.Next.IsChar('.')) { rt.EndToken = rt.EndToken.Next; } } } return(rt); }
public static bool CanBeHigher(Pullenti.Ner.Geo.GeoReferent hi, Pullenti.Ner.Geo.GeoReferent lo) { if (hi == null || lo == null || hi == lo) { return(false); } if (lo.Higher != null) { return(lo.Higher == hi); } if (lo.IsState) { if (lo.IsRegion && hi.IsState && !hi.IsRegion) { return(true); } return(false); } if (hi.IsTerritory) { return(false); } if (lo.IsTerritory) { return(true); } string hit = _getTypesString(hi); string lot = _getTypesString(lo); if (hi.IsCity) { if (lo.IsRegion) { if (hit.Contains("город;") || hit.Contains("місто") || hit.Contains("city")) { if ((lot.Contains("район") || lot.Contains("административный округ") || lot.Contains("адміністративний округ")) || lot.Contains("муниципальн") || lot.Contains("муніципаль")) { return(true); } if (lo.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, "округ", true) != null && !lot.Contains("автономн")) { return(true); } } } if (lo.IsCity) { if (!hit.Contains("станция") && lot.Contains("станция")) { return(true); } if (!hit.Contains("станція") && lot.Contains("станція")) { return(true); } if (hit.Contains("город;") || hit.Contains("місто") || hit.Contains("city")) { if ((lot.Contains("поселок") || lot.Contains("селище") || lot.Contains("село")) || lot.Contains("деревня") || lot.Contains("городок")) { return(true); } } if (hit.Contains("поселение") || hit.Contains("поселок")) { if (lot.Contains("село;") || lot.Contains("деревня") || lot.Contains("хутор")) { return(true); } } if (hit.Contains("поселение") && lot.Contains("поселок")) { return(true); } if (hit.Contains("село;")) { if (lot.Contains("поселение") || lot.Contains("поселок")) { return(true); } } if (hi.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_NAME, "МОСКВА", true) != null) { if (lot.Contains("город;") || lot.Contains("місто") || lot.Contains("city")) { if (lo.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_NAME, "ЗЕЛЕНОГРАД", true) != null || lo.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_NAME, "ТРОИЦК", true) != null) { return(true); } } } } } else if (lo.IsCity) { if (!lot.Contains("город") && !lot.Contains("місто") && !lot.Contains("city")) { if (hi.IsRegion) { return(true); } } else { if (hi.IsState) { return(true); } if ((hit.Contains("административный округ") || hit.Contains("адміністративний округ") || hit.Contains("муниципальн")) || hit.Contains("муніципаль")) { return(false); } if (!hit.Contains("район")) { return(true); } if (hi.Higher != null && hi.Higher.IsRegion) { return(true); } } } else if (lo.IsRegion) { foreach (Pullenti.Ner.Slot s in hi.Slots) { if (s.TypeName == Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE) { if (((string)s.Value) != "регион" && ((string)s.Value) != "регіон") { if (lo.FindSlot(s.TypeName, s.Value, true) != null) { return(false); } } } } if (hit.Contains("почтовое отделение")) { return(false); } if (lot.Contains("почтовое отделение")) { return(true); } if (hi.IsState) { return(true); } if (lot.Contains("волость")) { return(true); } if (lot.Contains("county") || lot.Contains("borough") || lot.Contains("parish")) { if (hit.Contains("state")) { return(true); } } if (lot.Contains("район")) { if ((hit.Contains("область") || hit.Contains("регион") || hit.Contains("край")) || hit.Contains("регіон")) { return(true); } if (hit.Contains("округ") && !hit.Contains("сельский") && !hit.Contains("поселковый")) { return(true); } } if (lot.Contains("область")) { if (hit.Contains("край")) { return(true); } if (hit.Contains("округ") && !hit.Contains("сельский") && !hit.Contains("поселковый")) { return(true); } } if (lot.Contains("округ")) { if (lot.Contains("сельский") || lot.Contains("поселковый")) { return(true); } if (hit.Contains("край")) { return(true); } if (lot.Contains("округ")) { if (hit.Contains("область") || hit.Contains("республика")) { return(true); } } } if (lot.Contains("муницип")) { if (hit.Contains("область") || hit.Contains("район") || hit.Contains("округ")) { return(true); } } } return(false); }
internal void Correct() { List <Pullenti.Ner.Geo.GeoReferent> geos = new List <Pullenti.Ner.Geo.GeoReferent>(); foreach (Pullenti.Ner.Slot a in Slots) { if (a.TypeName == ATTR_GEO && (a.Value is Pullenti.Ner.Geo.GeoReferent)) { geos.Add(a.Value as Pullenti.Ner.Geo.GeoReferent); } else if (a.TypeName == ATTR_STREET && (a.Value is Pullenti.Ner.Referent)) { foreach (Pullenti.Ner.Slot s in (a.Value as Pullenti.Ner.Referent).Slots) { if (s.Value is Pullenti.Ner.Geo.GeoReferent) { geos.Add(s.Value as Pullenti.Ner.Geo.GeoReferent); } } } } for (int i = geos.Count - 1; i > 0; i--) { for (int j = i - 1; j >= 0; j--) { if (_isHigher(geos[i], geos[j])) { Pullenti.Ner.Slot s = this.FindSlot(ATTR_GEO, geos[i], true); if (s != null) { Slots.Remove(s); } geos.RemoveAt(i); break; } else if (_isHigher(geos[j], geos[i])) { Pullenti.Ner.Slot s = this.FindSlot(ATTR_GEO, geos[j], true); if (s != null) { Slots.Remove(s); } geos.RemoveAt(j); i--; } } } if (geos.Count == 2) { Pullenti.Ner.Geo.GeoReferent reg = null; Pullenti.Ner.Geo.GeoReferent cit = null; for (int ii = 0; ii < geos.Count; ii++) { if (geos[ii].IsTerritory && geos[ii].Higher != null) { geos[ii] = geos[ii].Higher; } } if (geos[0].IsCity && geos[1].IsRegion) { cit = geos[0]; reg = geos[1]; } else if (geos[1].IsCity && geos[0].IsRegion) { cit = geos[1]; reg = geos[0]; } if (cit != null && cit.Higher == null && Pullenti.Ner.Geo.Internal.GeoOwnerHelper.CanBeHigher(reg, cit)) { cit.Higher = reg; Pullenti.Ner.Slot ss = this.FindSlot(ATTR_GEO, reg, true); if (ss != null) { Slots.Remove(ss); } geos = Geos; } else { Pullenti.Ner.Geo.GeoReferent stat = null; Pullenti.Ner.Geo.GeoReferent geo = null; if (geos[0].IsState && !geos[1].IsState) { stat = geos[0]; geo = geos[1]; } else if (geos[1].IsState && !geos[0].IsState) { stat = geos[1]; geo = geos[0]; } if (stat != null) { geo = geo.TopHigher; if (geo.Higher == null) { geo.Higher = stat; Pullenti.Ner.Slot s = this.FindSlot(ATTR_GEO, stat, true); if (s != null) { Slots.Remove(s); } } } } } }
internal static bool CanBeHigherToken(Pullenti.Ner.Token rhi, Pullenti.Ner.Token rlo) { if (rhi == null || rlo == null) { return(false); } if (rhi.Morph.Case.IsInstrumental && !rhi.Morph.Case.IsGenitive) { return(false); } Pullenti.Ner.Geo.GeoReferent hi = rhi.GetReferent() as Pullenti.Ner.Geo.GeoReferent; Pullenti.Ner.Geo.GeoReferent lo = rlo.GetReferent() as Pullenti.Ner.Geo.GeoReferent; if (hi == null || lo == null) { return(false); } bool citiInReg = false; if (hi.IsCity && lo.IsRegion) { if (hi.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, "город", true) != null || hi.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, "місто", true) != null || hi.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, "city", true) != null) { string s = _getTypesString(lo); if (((s.Contains("район") || s.Contains("административный округ") || s.Contains("муниципальный округ")) || s.Contains("адміністративний округ") || s.Contains("муніципальний округ")) || lo.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, "округ", true) != null) { if (rhi.Next == rlo && rlo.Morph.Case.IsGenitive) { citiInReg = true; } } } } if (hi.IsRegion && lo.IsCity) { if (lo.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, "город", true) != null || lo.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, "місто", true) != null || lo.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, "city", true) != null) { string s = _getTypesString(hi); if (s == "район;") { if (hi.Higher != null && hi.Higher.IsRegion) { citiInReg = true; } else if (rhi.EndChar <= rlo.BeginChar && rhi.Next.IsComma && !rlo.Morph.Case.IsGenitive) { citiInReg = true; } else if (rhi.EndChar <= rlo.BeginChar && rhi.Next.IsComma) { citiInReg = true; } } } else { citiInReg = true; } } if (rhi.EndChar <= rlo.BeginChar) { if (!rhi.Morph.Class.IsAdjective) { if (hi.IsState && !rhi.Chars.IsLatinLetter) { return(false); } } if (rhi.IsNewlineAfter || rlo.IsNewlineBefore) { if (!citiInReg) { return(false); } } } else { } if (rlo.Previous != null && rlo.Previous.Morph.Class.IsPreposition) { if (rlo.Previous.Morph.Language.IsUa) { if ((rlo.Previous.IsValue("У", null) && !rlo.Morph.Case.IsDative && !rlo.Morph.Case.IsPrepositional) && !rlo.Morph.Case.IsUndefined) { return(false); } if (rlo.Previous.IsValue("З", null) && !rlo.Morph.Case.IsGenitive && !rlo.Morph.Case.IsUndefined) { return(false); } } else { if ((rlo.Previous.IsValue("В", null) && !rlo.Morph.Case.IsDative && !rlo.Morph.Case.IsPrepositional) && !rlo.Morph.Case.IsUndefined) { return(false); } if (rlo.Previous.IsValue("ИЗ", null) && !rlo.Morph.Case.IsGenitive && !rlo.Morph.Case.IsUndefined) { return(false); } } } if (!CanBeHigher(hi, lo)) { return(citiInReg); } return(true); }
public static Pullenti.Ner.ReferentToken TryAttachOrg(Pullenti.Ner.Token t, bool canBeCyr = false) { if (t == null) { return(null); } bool br = false; if (t.IsChar('(') && t.Next != null) { t = t.Next; br = true; } if (t is Pullenti.Ner.NumberToken) { if ((t as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Words && t.Morph.Class.IsAdjective && t.Chars.IsCapitalUpper) { } else { return(null); } } else { if (t.Chars.IsAllLower) { return(null); } if ((t.LengthChar < 3) && !t.Chars.IsLetter) { return(null); } if (!t.Chars.IsLatinLetter) { if (!canBeCyr || !t.Chars.IsCyrillicLetter) { return(null); } } } Pullenti.Ner.Token t0 = t; Pullenti.Ner.Token t1 = t0; int namWo = 0; OrgItemEngItem tok = null; Pullenti.Ner.Geo.GeoReferent geo = null; OrgItemTypeToken addTyp = null; for (; t != null; t = t.Next) { if (t != t0 && t.WhitespacesBeforeCount > 1) { break; } if (t.IsChar(')')) { break; } if (t.IsChar('(') && t.Next != null) { if ((t.Next.GetReferent() is Pullenti.Ner.Geo.GeoReferent) && t.Next.Next != null && t.Next.Next.IsChar(')')) { geo = t.Next.GetReferent() as Pullenti.Ner.Geo.GeoReferent; t = t.Next.Next; continue; } OrgItemTypeToken typ = OrgItemTypeToken.TryAttach(t.Next, true, null); if ((typ != null && typ.EndToken.Next != null && typ.EndToken.Next.IsChar(')')) && typ.Chars.IsLatinLetter) { addTyp = typ; t = typ.EndToken.Next; continue; } if (((t.Next is Pullenti.Ner.TextToken) && t.Next.Next != null && t.Next.Next.IsChar(')')) && t.Next.Chars.IsCapitalUpper) { t1 = (t = t.Next.Next); continue; } break; } tok = TryAttach(t, canBeCyr); if (tok == null && t.IsCharOf(".,") && t.Next != null) { tok = TryAttach(t.Next, canBeCyr); if (tok == null && t.Next.IsCharOf(",.")) { tok = TryAttach(t.Next.Next, canBeCyr); } } if (tok != null) { if (tok.LengthChar == 1 && t0.Chars.IsCyrillicLetter) { return(null); } break; } if (t.IsHiphen && !t.IsWhitespaceAfter && !t.IsWhitespaceBefore) { continue; } if (t.IsCharOf("&+") || t.IsAnd) { continue; } if (t.IsChar('.')) { if (t.Previous != null && t.Previous.LengthChar == 1) { continue; } else if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t.Next)) { break; } } if (!t.Chars.IsLatinLetter) { if (!canBeCyr || !t.Chars.IsCyrillicLetter) { break; } } if (t.Chars.IsAllLower) { if (t.Morph.Class.IsPreposition || t.Morph.Class.IsConjunction) { continue; } if (br) { continue; } break; } Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary(); if (mc.IsVerb) { if (t.Next != null && t.Next.Morph.Class.IsPreposition) { break; } } if (t.Next != null && t.Next.IsValue("OF", null)) { break; } if (t is Pullenti.Ner.TextToken) { namWo++; } t1 = t; } if (tok == null) { return(null); } if (t0 == tok.BeginToken) { Pullenti.Ner.Core.BracketSequenceToken br2 = Pullenti.Ner.Core.BracketHelper.TryParse(tok.EndToken.Next, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br2 != null) { Pullenti.Ner.Org.OrganizationReferent org1 = new Pullenti.Ner.Org.OrganizationReferent(); if (tok.ShortValue != null) { org1.AddTypeStr(tok.ShortValue); } org1.AddTypeStr(tok.FullValue); string nam1 = Pullenti.Ner.Core.MiscHelper.GetTextValue(br2.BeginToken, br2.EndToken, Pullenti.Ner.Core.GetTextAttr.No); if (nam1 != null) { org1.AddName(nam1, true, null); return(new Pullenti.Ner.ReferentToken(org1, t0, br2.EndToken)); } } return(null); } Pullenti.Ner.Org.OrganizationReferent org = new Pullenti.Ner.Org.OrganizationReferent(); Pullenti.Ner.Token te = tok.EndToken; if (tok.IsBank) { t1 = tok.EndToken; } if (tok.FullValue == "company" && (tok.WhitespacesAfterCount < 3)) { OrgItemEngItem tok1 = TryAttach(tok.EndToken.Next, canBeCyr); if (tok1 != null) { t1 = tok.EndToken; tok = tok1; te = tok.EndToken; } } if (tok.FullValue == "company") { if (namWo == 0) { return(null); } } string nam = Pullenti.Ner.Core.MiscHelper.GetTextValue(t0, t1, Pullenti.Ner.Core.GetTextAttr.IgnoreArticles); if (nam == "STOCK" && tok.FullValue == "company") { return(null); } string altNam = null; if (string.IsNullOrEmpty(nam)) { return(null); } if (nam.IndexOf('(') > 0) { int i1 = nam.IndexOf('('); int i2 = nam.IndexOf(')'); if (i1 < i2) { altNam = nam; string tai = null; if ((i2 + 1) < nam.Length) { tai = nam.Substring(i2).Trim(); } nam = nam.Substring(0, i1).Trim(); if (tai != null) { nam = string.Format("{0} {1}", nam, tai); } } } if (tok.IsBank) { org.AddTypeStr((tok.Kit.BaseLanguage.IsEn ? "bank" : "банк")); org.AddProfile(Pullenti.Ner.Org.OrgProfile.Finance); if ((t1.Next != null && t1.Next.IsValue("OF", null) && t1.Next.Next != null) && t1.Next.Next.Chars.IsLatinLetter) { OrgItemNameToken nam0 = OrgItemNameToken.TryAttach(t1.Next, null, false, false); if (nam0 != null) { te = nam0.EndToken; } else { te = t1.Next.Next; } nam = Pullenti.Ner.Core.MiscHelper.GetTextValue(t0, te, Pullenti.Ner.Core.GetTextAttr.No); if (te.GetReferent() is Pullenti.Ner.Geo.GeoReferent) { org.AddGeoObject(te.GetReferent() as Pullenti.Ner.Geo.GeoReferent); } } else if (t0 == t1) { return(null); } } else { if (tok.ShortValue != null) { org.AddTypeStr(tok.ShortValue); } org.AddTypeStr(tok.FullValue); } if (string.IsNullOrEmpty(nam)) { return(null); } org.AddName(nam, true, null); if (altNam != null) { org.AddName(altNam, true, null); } Pullenti.Ner.ReferentToken res = new Pullenti.Ner.ReferentToken(org, t0, te); t = te; while (t.Next != null) { if (t.Next.IsCharOf(",.")) { t = t.Next; } else { break; } } if (t.WhitespacesAfterCount < 2) { tok = TryAttach(t.Next, canBeCyr); if (tok != null) { if (tok.ShortValue != null) { org.AddTypeStr(tok.ShortValue); } org.AddTypeStr(tok.FullValue); res.EndToken = tok.EndToken; } } if (geo != null) { org.AddGeoObject(geo); } if (addTyp != null) { org.AddType(addTyp, false); } if (!br) { return(res); } t = res.EndToken; if (t.Next == null || t.Next.IsChar(')')) { res.EndToken = t.Next; } else { return(null); } return(res); }
public static Pullenti.Ner.ReferentToken TryAttachTerritory(List <TerrItemToken> li, Pullenti.Ner.Core.AnalyzerData ad, bool attachAlways = false, List <CityItemToken> cits = null, List <Pullenti.Ner.Geo.GeoReferent> exists = null) { if (li == null || li.Count == 0) { return(null); } TerrItemToken exObj = null; TerrItemToken newName = null; List <TerrItemToken> adjList = new List <TerrItemToken>(); TerrItemToken noun = null; TerrItemToken addNoun = null; Pullenti.Ner.ReferentToken rt = _tryAttachMoscowAO(li, ad); if (rt != null) { return(rt); } if (li[0].TerminItem != null && li[0].TerminItem.CanonicText == "ТЕРРИТОРИЯ") { Pullenti.Ner.ReferentToken res2 = _tryAttachPureTerr(li, ad); return(res2); } if (li.Count == 2) { if (li[0].Rzd != null && li[1].RzdDir != null) { Pullenti.Ner.Geo.GeoReferent rzd = new Pullenti.Ner.Geo.GeoReferent(); rzd.AddName(li[1].RzdDir); rzd.AddTypTer(li[0].Kit.BaseLanguage); rzd.AddSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_REF, li[0].Rzd.Referent, false, 0); rzd.AddExtReferent(li[0].Rzd); return(new Pullenti.Ner.ReferentToken(rzd, li[0].BeginToken, li[1].EndToken)); } if (li[1].Rzd != null && li[0].RzdDir != null) { Pullenti.Ner.Geo.GeoReferent rzd = new Pullenti.Ner.Geo.GeoReferent(); rzd.AddName(li[0].RzdDir); rzd.AddTypTer(li[0].Kit.BaseLanguage); rzd.AddSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_REF, li[1].Rzd.Referent, false, 0); rzd.AddExtReferent(li[1].Rzd); return(new Pullenti.Ner.ReferentToken(rzd, li[0].BeginToken, li[1].EndToken)); } } bool canBeCityBefore = false; bool adjTerrBefore = false; if (cits != null) { if (cits[0].Typ == CityItemToken.ItemType.City) { canBeCityBefore = true; } else if (cits[0].Typ == CityItemToken.ItemType.Noun && cits.Count > 1) { canBeCityBefore = true; } } int k; for (k = 0; k < li.Count; k++) { if (li[k].OntoItem != null) { if (exObj != null || newName != null) { break; } if (noun != null) { if (k == 1) { if (noun.TerminItem.CanonicText == "РАЙОН" || noun.TerminItem.CanonicText == "ОБЛАСТЬ" || noun.TerminItem.CanonicText == "СОЮЗ") { if (li[k].OntoItem.Referent is Pullenti.Ner.Geo.GeoReferent) { if ((li[k].OntoItem.Referent as Pullenti.Ner.Geo.GeoReferent).IsState) { break; } } bool ok = false; Pullenti.Ner.Token tt = li[k].EndToken.Next; if (tt == null) { ok = true; } else if (tt.IsCharOf(",.")) { ok = true; } if (!ok) { ok = MiscLocationHelper.CheckGeoObjectBefore(li[0].BeginToken); } if (!ok) { Pullenti.Ner.Address.Internal.AddressItemToken adr = Pullenti.Ner.Address.Internal.AddressItemToken.TryParse(tt, null, false, false, null); if (adr != null) { if (adr.Typ == Pullenti.Ner.Address.Internal.AddressItemToken.ItemType.Street) { ok = true; } } } if (!ok) { break; } } if (li[k].OntoItem != null) { if (noun.BeginToken.IsValue("МО", null) || noun.BeginToken.IsValue("ЛО", null)) { return(null); } } } } exObj = li[k]; } else if (li[k].TerminItem != null) { if (noun != null) { break; } if (li[k].TerminItem.IsAlwaysPrefix && k > 0) { break; } if (k > 0 && li[k].IsDoubt) { if (li[k].BeginToken == li[k].EndToken && li[k].BeginToken.IsValue("ЗАО", null)) { break; } } if (li[k].TerminItem.IsAdjective || li[k].IsGeoInDictionary) { adjList.Add(li[k]); } else { if (exObj != null) { Pullenti.Ner.Geo.GeoReferent geo = exObj.OntoItem.Referent as Pullenti.Ner.Geo.GeoReferent; if (geo == null) { break; } if (exObj.IsAdjective && ((li[k].TerminItem.CanonicText == "СОЮЗ" || li[k].TerminItem.CanonicText == "ФЕДЕРАЦИЯ"))) { string str = exObj.OntoItem.ToString(); if (!str.Contains(li[k].TerminItem.CanonicText)) { return(null); } } if (li[k].TerminItem.CanonicText == "РАЙОН" || li[k].TerminItem.CanonicText == "ОКРУГ" || li[k].TerminItem.CanonicText == "КРАЙ") { StringBuilder tmp = new StringBuilder(); foreach (Pullenti.Ner.Slot s in geo.Slots) { if (s.TypeName == Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE) { tmp.AppendFormat("{0};", s.Value); } } if (!tmp.ToString().ToUpper().Contains(li[k].TerminItem.CanonicText)) { if (k != 1 || newName != null) { break; } newName = li[0]; newName.IsAdjective = true; newName.OntoItem = null; exObj = null; } } } noun = li[k]; if (k == 0) { TerrItemToken tt = TerrItemToken.TryParse(li[k].BeginToken.Previous, null, true, false, null); if (tt != null && tt.Morph.Class.IsAdjective) { adjTerrBefore = true; } } } } else { if (exObj != null) { break; } if (newName != null) { break; } newName = li[k]; } } string name = null; string altName = null; string fullName = null; Pullenti.Ner.MorphCollection morph = null; if (exObj != null) { if (exObj.IsAdjective && !exObj.Morph.Language.IsEn && noun == null) { if (attachAlways && exObj.EndToken.Next != null) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(exObj.BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (exObj.EndToken.Next.IsCommaAnd) { } else if (npt == null) { } else { Pullenti.Ner.Address.Internal.StreetItemToken str = Pullenti.Ner.Address.Internal.StreetItemToken.TryParse(exObj.EndToken.Next, null, false, null, false); if (str != null) { if (str.Typ == Pullenti.Ner.Address.Internal.StreetItemType.Noun && str.EndToken == npt.EndToken) { return(null); } } } } else { CityItemToken cit = CityItemToken.TryParse(exObj.EndToken.Next, null, false, null); if (cit != null && ((cit.Typ == CityItemToken.ItemType.Noun || cit.Typ == CityItemToken.ItemType.City))) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(exObj.BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && npt.EndToken == cit.EndToken) { } else { return(null); } } else if (exObj.BeginToken.IsValue("ПОДНЕБЕСНЫЙ", null)) { } else { return(null); } } } if (noun == null && exObj.CanBeCity) { CityItemToken cit0 = CityItemToken.TryParseBack(exObj.BeginToken.Previous); if (cit0 != null && cit0.Typ != CityItemToken.ItemType.ProperName) { return(null); } } if (exObj.IsDoubt && noun == null) { bool ok2 = false; if (_canBeGeoAfter(exObj.EndToken.Next)) { ok2 = true; } else if (!exObj.CanBeSurname && !exObj.CanBeCity) { if ((exObj.EndToken.Next != null && exObj.EndToken.Next.IsChar(')') && exObj.BeginToken.Previous != null) && exObj.BeginToken.Previous.IsChar('(')) { ok2 = true; } else if (exObj.Chars.IsLatinLetter && exObj.BeginToken.Previous != null) { if (exObj.BeginToken.Previous.IsValue("IN", null)) { ok2 = true; } else if (exObj.BeginToken.Previous.IsValue("THE", null) && exObj.BeginToken.Previous.Previous != null && exObj.BeginToken.Previous.Previous.IsValue("IN", null)) { ok2 = true; } } } if (!ok2) { CityItemToken cit0 = CityItemToken.TryParseBack(exObj.BeginToken.Previous); if (cit0 != null && cit0.Typ != CityItemToken.ItemType.ProperName) { } else if (MiscLocationHelper.CheckGeoObjectBefore(exObj.BeginToken.Previous)) { } else { return(null); } } } name = exObj.OntoItem.CanonicText; morph = exObj.Morph; } else if (newName != null) { if (noun == null) { return(null); } for (int j = 1; j < k; j++) { if (li[j].IsNewlineBefore && !li[0].IsNewlineBefore) { if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(li[j].BeginToken, false, false)) { } else { return(null); } } } morph = noun.Morph; if (newName.IsAdjective) { if (noun.TerminItem.Acronym == "АО") { if (noun.BeginToken != noun.EndToken) { return(null); } if (newName.Morph.Gender != Pullenti.Morph.MorphGender.Feminie) { return(null); } } Pullenti.Ner.Geo.GeoReferent geoBefore = null; Pullenti.Ner.Token tt0 = li[0].BeginToken.Previous; if (tt0 != null && tt0.IsCommaAnd) { tt0 = tt0.Previous; } if (!li[0].IsNewlineBefore && tt0 != null) { geoBefore = tt0.GetReferent() as Pullenti.Ner.Geo.GeoReferent; } if (li.IndexOf(noun) < li.IndexOf(newName)) { if (noun.TerminItem.IsState) { return(null); } if (newName.CanBeSurname && geoBefore == null) { if (((noun.Morph.Case & newName.Morph.Case)).IsUndefined) { return(null); } } if (Pullenti.Ner.Core.MiscHelper.IsExistsInDictionary(newName.BeginToken, newName.EndToken, Pullenti.Morph.MorphClass.Adjective | Pullenti.Morph.MorphClass.Pronoun | Pullenti.Morph.MorphClass.Verb)) { if (noun.BeginToken != newName.BeginToken) { if (geoBefore == null) { if (li.Count == 2 && _canBeGeoAfter(li[1].EndToken.Next)) { } else if (li.Count == 3 && li[2].TerminItem != null && _canBeGeoAfter(li[2].EndToken.Next)) { } else if (newName.IsGeoInDictionary) { } else if (newName.EndToken.IsNewlineAfter) { } else { return(null); } } } } Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(newName.EndToken, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePronouns, 0, null); if (npt != null && npt.EndToken != newName.EndToken) { if (li.Count >= 3 && li[2].TerminItem != null && npt.EndToken == li[2].EndToken) { addNoun = li[2]; } else { return(null); } } Pullenti.Ner.ReferentToken rtp = newName.Kit.ProcessReferent("PERSON", newName.BeginToken); if (rtp != null) { return(null); } name = Pullenti.Ner.Core.ProperNameHelper.GetNameEx(newName.BeginToken, newName.EndToken, Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphCase.Undefined, noun.TerminItem.Gender, false, false); } else { bool ok = false; if (((k + 1) < li.Count) && li[k].TerminItem == null && li[k + 1].TerminItem != null) { ok = true; } else if ((k < li.Count) && li[k].OntoItem != null) { ok = true; } else if (k == li.Count && !newName.IsAdjInDictionary) { ok = true; } else if (MiscLocationHelper.CheckGeoObjectBefore(li[0].BeginToken) || canBeCityBefore) { ok = true; } else if (MiscLocationHelper.CheckGeoObjectAfter(li[k - 1].EndToken, false)) { ok = true; } else if (li.Count == 3 && k == 2) { CityItemToken cit = CityItemToken.TryParse(li[2].BeginToken, null, false, null); if (cit != null) { if (cit.Typ == CityItemToken.ItemType.City || cit.Typ == CityItemToken.ItemType.Noun) { ok = true; } } } else if (li.Count == 2) { ok = _canBeGeoAfter(li[li.Count - 1].EndToken.Next); } if (!ok && !li[0].IsNewlineBefore && !li[0].Chars.IsAllLower) { Pullenti.Ner.ReferentToken rt00 = li[0].Kit.ProcessReferent("PERSONPROPERTY", li[0].BeginToken.Previous); if (rt00 != null) { ok = true; } } if (noun.TerminItem != null && noun.TerminItem.IsStrong && newName.IsAdjective) { ok = true; } if (noun.IsDoubt && adjList.Count == 0 && geoBefore == null) { return(null); } name = Pullenti.Ner.Core.ProperNameHelper.GetNameEx(newName.BeginToken, newName.EndToken, Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphCase.Undefined, noun.TerminItem.Gender, false, false); if (!ok && !attachAlways) { if (Pullenti.Ner.Core.MiscHelper.IsExistsInDictionary(newName.BeginToken, newName.EndToken, Pullenti.Morph.MorphClass.Adjective | Pullenti.Morph.MorphClass.Pronoun | Pullenti.Morph.MorphClass.Verb)) { if (exists != null) { foreach (Pullenti.Ner.Geo.GeoReferent e in exists) { if (e.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_NAME, name, true) != null) { ok = true; break; } } } if (!ok) { return(null); } } } fullName = string.Format("{0} {1}", Pullenti.Ner.Core.ProperNameHelper.GetNameEx(li[0].BeginToken, noun.BeginToken.Previous, Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphCase.Undefined, noun.TerminItem.Gender, false, false), noun.TerminItem.CanonicText); } } else { if (!attachAlways || ((noun.TerminItem != null && noun.TerminItem.CanonicText == "ФЕДЕРАЦИЯ"))) { bool isLatin = noun.Chars.IsLatinLetter && newName.Chars.IsLatinLetter; if (li.IndexOf(noun) > li.IndexOf(newName)) { if (!isLatin) { return(null); } } if (!newName.IsDistrictName && !Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(newName.BeginToken, false, false)) { if (adjList.Count == 0 && Pullenti.Ner.Core.MiscHelper.IsExistsInDictionary(newName.BeginToken, newName.EndToken, Pullenti.Morph.MorphClass.Noun | Pullenti.Morph.MorphClass.Pronoun)) { if (li.Count == 2 && noun.IsCityRegion && (noun.WhitespacesAfterCount < 2)) { } else { return(null); } } if (!isLatin) { if ((noun.TerminItem.IsRegion && !attachAlways && ((!adjTerrBefore || newName.IsDoubt))) && !noun.IsCityRegion && !noun.TerminItem.IsSpecificPrefix) { if (!MiscLocationHelper.CheckGeoObjectBefore(noun.BeginToken)) { if (!noun.IsDoubt && noun.BeginToken != noun.EndToken) { } else if ((noun.TerminItem.IsAlwaysPrefix && li.Count == 2 && li[0] == noun) && li[1] == newName) { } else { return(null); } } } if (noun.IsDoubt && adjList.Count == 0) { if (noun.TerminItem.Acronym == "МО" || noun.TerminItem.Acronym == "ЛО") { if (k == (li.Count - 1) && li[k].TerminItem != null) { addNoun = li[k]; k++; } else if (li.Count == 2 && noun == li[0] && newName.ToString().EndsWith("совет")) { } else { return(null); } } else { return(null); } } Pullenti.Ner.ReferentToken pers = newName.Kit.ProcessReferent("PERSON", newName.BeginToken); if (pers != null) { return(null); } } } } name = Pullenti.Ner.Core.MiscHelper.GetTextValue(newName.BeginToken, newName.EndToken, Pullenti.Ner.Core.GetTextAttr.No); if (newName.BeginToken != newName.EndToken) { for (Pullenti.Ner.Token ttt = newName.BeginToken.Next; ttt != null && ttt.EndChar <= newName.EndChar; ttt = ttt.Next) { if (ttt.Chars.IsLetter) { TerrItemToken ty = TerrItemToken.TryParse(ttt, null, false, false, null); if ((ty != null && ty.TerminItem != null && noun != null) && ((ty.TerminItem.CanonicText.Contains(noun.TerminItem.CanonicText) || noun.TerminItem.CanonicText.Contains(ty.TerminItem.CanonicText)))) { name = Pullenti.Ner.Core.MiscHelper.GetTextValue(newName.BeginToken, ttt.Previous, Pullenti.Ner.Core.GetTextAttr.No); break; } } } } if (adjList.Count > 0) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(adjList[0].BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && npt.EndToken == noun.EndToken) { altName = string.Format("{0} {1}", npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false), name); } } } } else { if ((li.Count == 1 && noun != null && noun.EndToken.Next != null) && (noun.EndToken.Next.GetReferent() is Pullenti.Ner.Geo.GeoReferent)) { Pullenti.Ner.Geo.GeoReferent g = noun.EndToken.Next.GetReferent() as Pullenti.Ner.Geo.GeoReferent; if (noun.TerminItem != null) { string tyy = noun.TerminItem.CanonicText.ToLower(); bool ooo = false; if (g.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, tyy, true) != null) { ooo = true; } else if (tyy.EndsWith("район") && g.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, "район", true) != null) { ooo = true; } if (ooo) { return new Pullenti.Ner.ReferentToken(g, noun.BeginToken, noun.EndToken.Next) { Morph = noun.BeginToken.Morph } } ; } } if ((li.Count == 1 && noun == li[0] && li[0].TerminItem != null) && TerrItemToken.TryParse(li[0].EndToken.Next, null, true, false, null) == null && TerrItemToken.TryParse(li[0].BeginToken.Previous, null, true, false, null) == null) { if (li[0].Morph.Number == Pullenti.Morph.MorphNumber.Plural) { return(null); } int cou = 0; string str = li[0].TerminItem.CanonicText.ToLower(); for (Pullenti.Ner.Token tt = li[0].BeginToken.Previous; tt != null; tt = tt.Previous) { if (tt.IsNewlineAfter) { cou += 10; } else { cou++; } if (cou > 500) { break; } Pullenti.Ner.Geo.GeoReferent g = tt.GetReferent() as Pullenti.Ner.Geo.GeoReferent; if (g == null) { continue; } bool ok = true; cou = 0; for (tt = li[0].EndToken.Next; tt != null; tt = tt.Next) { if (tt.IsNewlineBefore) { cou += 10; } else { cou++; } if (cou > 500) { break; } TerrItemToken tee = TerrItemToken.TryParse(tt, null, true, false, null); if (tee == null) { continue; } ok = false; break; } if (ok) { for (int ii = 0; g != null && (ii < 3); g = g.Higher, ii++) { if (g.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, str, true) != null) { return new Pullenti.Ner.ReferentToken(g, li[0].BeginToken, li[0].EndToken) { Morph = noun.BeginToken.Morph } } ; } } break; } } return(null); } Pullenti.Ner.Geo.GeoReferent ter = null; if (exObj != null && (exObj.Tag is Pullenti.Ner.Geo.GeoReferent)) { ter = exObj.Tag as Pullenti.Ner.Geo.GeoReferent; } else { ter = new Pullenti.Ner.Geo.GeoReferent(); if (exObj != null) { Pullenti.Ner.Geo.GeoReferent geo = exObj.OntoItem.Referent as Pullenti.Ner.Geo.GeoReferent; if (geo != null && !geo.IsCity) { ter.MergeSlots2(geo, li[0].Kit.BaseLanguage); } else { ter.AddName(name); } if (noun == null && exObj.CanBeCity) { ter.AddTypCity(li[0].Kit.BaseLanguage); } else { } } else if (newName != null) { ter.AddName(name); if (altName != null) { ter.AddName(altName); } } if (noun != null) { if (noun.TerminItem.CanonicText == "АО") { ter.AddTyp((li[0].Kit.BaseLanguage.IsUa ? "АВТОНОМНИЙ ОКРУГ" : "АВТОНОМНЫЙ ОКРУГ")); } else if (noun.TerminItem.CanonicText == "МУНИЦИПАЛЬНОЕ СОБРАНИЕ" || noun.TerminItem.CanonicText == "МУНІЦИПАЛЬНЕ ЗБОРИ") { ter.AddTyp((li[0].Kit.BaseLanguage.IsUa ? "МУНІЦИПАЛЬНЕ УТВОРЕННЯ" : "МУНИЦИПАЛЬНОЕ ОБРАЗОВАНИЕ")); } else if (noun.TerminItem.Acronym == "МО" && addNoun != null) { ter.AddTyp(addNoun.TerminItem.CanonicText); } else { if (noun.TerminItem.CanonicText == "СОЮЗ" && exObj != null && exObj.EndChar > noun.EndChar) { return new Pullenti.Ner.ReferentToken(ter, exObj.BeginToken, exObj.EndToken) { Morph = exObj.Morph } } ; ter.AddTyp(noun.TerminItem.CanonicText); if (noun.TerminItem.IsRegion && ter.IsState) { ter.AddTypReg(li[0].Kit.BaseLanguage); } } } if (ter.IsState && ter.IsRegion) { foreach (TerrItemToken a in adjList) { if (a.TerminItem.IsRegion) { ter.AddTypReg(li[0].Kit.BaseLanguage); break; } } } if (ter.IsState) { if (fullName != null) { ter.AddName(fullName); } } } Pullenti.Ner.ReferentToken res = new Pullenti.Ner.ReferentToken(ter, li[0].BeginToken, li[k - 1].EndToken); if (noun != null && noun.Morph.Class.IsNoun) { res.Morph = noun.Morph; } else { res.Morph = new Pullenti.Ner.MorphCollection(); for (int ii = 0; ii < k; ii++) { foreach (Pullenti.Morph.MorphBaseInfo v in li[ii].Morph.Items) { Pullenti.Morph.MorphBaseInfo bi = new Pullenti.Morph.MorphBaseInfo(); bi.CopyFrom(v); if (noun != null) { if (bi.Class.IsAdjective) { bi.Class = Pullenti.Morph.MorphClass.Noun; } } res.Morph.AddItem(bi); } } } if (li[0].TerminItem != null && li[0].TerminItem.IsSpecificPrefix) { res.BeginToken = li[0].EndToken.Next; } if (addNoun != null && addNoun.EndChar > res.EndChar) { res.EndToken = addNoun.EndToken; } if ((res.BeginToken.Previous is Pullenti.Ner.TextToken) && (res.WhitespacesBeforeCount < 2)) { Pullenti.Ner.TextToken tt = res.BeginToken.Previous as Pullenti.Ner.TextToken; if (tt.Term == "АР") { foreach (string ty in ter.Typs) { if (ty.Contains("республика") || ty.Contains("республіка")) { res.BeginToken = tt; break; } } } } return(res); }
internal static TitlePageReferent _process(Pullenti.Ner.Token begin, int maxCharPos, Pullenti.Ner.Core.AnalysisKit kit, out Pullenti.Ner.Token endToken) { endToken = begin; TitlePageReferent res = new TitlePageReferent(); Pullenti.Ner.Core.Termin term = null; List <Pullenti.Ner.Titlepage.Internal.Line> lines = Pullenti.Ner.Titlepage.Internal.Line.Parse(begin, 30, 1500, maxCharPos); if (lines.Count < 1) { return(null); } int cou = lines.Count; int minNewlinesCount = 10; Dictionary <int, int> linesCountStat = new Dictionary <int, int>(); for (int i = 0; i < lines.Count; i++) { if (Pullenti.Ner.Titlepage.Internal.TitleNameToken.CanBeStartOfTextOrContent(lines[i].BeginToken, lines[i].EndToken)) { cou = i; break; } int j = lines[i].NewlinesBeforeCount; if (i > 0 && j > 0) { if (!linesCountStat.ContainsKey(j)) { linesCountStat.Add(j, 1); } else { linesCountStat[j]++; } } } int max = 0; foreach (KeyValuePair <int, int> kp in linesCountStat) { if (kp.Value > max) { max = kp.Value; minNewlinesCount = kp.Key; } } int endChar = (cou > 0 ? lines[cou - 1].EndChar : 0); if (maxCharPos > 0 && endChar > maxCharPos) { endChar = maxCharPos; } List <Pullenti.Ner.Titlepage.Internal.TitleNameToken> names = new List <Pullenti.Ner.Titlepage.Internal.TitleNameToken>(); for (int i = 0; i < cou; i++) { if (i == 6) { } for (int j = i; (j < cou) && (j < (i + 5)); j++) { if (i == 6 && j == 8) { } if (j > i) { if (lines[j - 1].IsPureEn && lines[j].IsPureRu) { break; } if (lines[j - 1].IsPureRu && lines[j].IsPureEn) { break; } if (lines[j].NewlinesBeforeCount >= (minNewlinesCount * 2)) { break; } } Pullenti.Ner.Titlepage.Internal.TitleNameToken ttt = Pullenti.Ner.Titlepage.Internal.TitleNameToken.TryParse(lines[i].BeginToken, lines[j].EndToken, minNewlinesCount); if (ttt != null) { if (lines[i].IsPureEn) { ttt.Morph.Language = Pullenti.Morph.MorphLang.EN; } else if (lines[i].IsPureRu) { ttt.Morph.Language = Pullenti.Morph.MorphLang.RU; } names.Add(ttt); } } } Pullenti.Ner.Titlepage.Internal.TitleNameToken.Sort(names); Pullenti.Ner.ReferentToken nameRt = null; if (names.Count > 0) { int i0 = 0; if (names[i0].Morph.Language.IsEn) { for (int ii = 1; ii < names.Count; ii++) { if (names[ii].Morph.Language.IsRu && names[ii].Rank > 0) { i0 = ii; break; } } } term = res.AddName(names[i0].BeginNameToken, names[i0].EndNameToken); if (names[i0].TypeValue != null) { res.AddType(names[i0].TypeValue); } if (names[i0].Speciality != null) { res.Speciality = names[i0].Speciality; } Pullenti.Ner.ReferentToken rt = new Pullenti.Ner.ReferentToken(res, names[i0].BeginToken, names[i0].EndToken); if (kit != null) { kit.EmbedToken(rt); } else { res.AddOccurence(new Pullenti.Ner.TextAnnotation(rt.BeginToken, rt.EndToken)); } endToken = rt.EndToken; nameRt = rt; if (begin.BeginChar == rt.BeginChar) { begin = rt; } } if (term != null && kit != null) { for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { Pullenti.Ner.Core.TerminToken tok = term.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok == null) { continue; } Pullenti.Ner.Token t0 = t; Pullenti.Ner.Token t1 = tok.EndToken; if (t1.Next != null && t1.Next.IsChar('.')) { t1 = t1.Next; } if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(t0.Previous, false, false) && Pullenti.Ner.Core.BracketHelper.CanBeEndOfSequence(t1.Next, false, null, false)) { t0 = t0.Previous; t1 = t1.Next; } Pullenti.Ner.ReferentToken rt = new Pullenti.Ner.ReferentToken(res, t0, t1); kit.EmbedToken(rt); t = rt; } } Pullenti.Ner.Titlepage.Internal.PersonRelations pr = new Pullenti.Ner.Titlepage.Internal.PersonRelations(); Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types persTyp = Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined; List <Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types> persTypes = pr.RelTypes; for (Pullenti.Ner.Token t = begin; t != null; t = t.Next) { if (maxCharPos > 0 && t.BeginChar > maxCharPos) { break; } if (t == nameRt) { continue; } Pullenti.Ner.Titlepage.Internal.TitleItemToken tpt = Pullenti.Ner.Titlepage.Internal.TitleItemToken.TryAttach(t); if (tpt != null) { persTyp = Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined; if (tpt.Typ == Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Typ) { if (res.Types.Count == 0) { res.AddType(tpt.Value); } else if (res.Types.Count == 1) { string ty = res.Types[0].ToUpper(); if (ty == "РЕФЕРАТ") { res.AddType(tpt.Value); } else if (ty == "АВТОРЕФЕРАТ") { if (tpt.Value == "КАНДИДАТСКАЯ ДИССЕРТАЦИЯ") { res.AddSlot(TitlePageReferent.ATTR_TYPE, "автореферат кандидатской диссертации", true, 0); } else if (tpt.Value == "ДОКТОРСКАЯ ДИССЕРТАЦИЯ") { res.AddSlot(TitlePageReferent.ATTR_TYPE, "автореферат докторской диссертации", true, 0); } else if (tpt.Value == "МАГИСТЕРСКАЯ ДИССЕРТАЦИЯ") { res.AddSlot(TitlePageReferent.ATTR_TYPE, "автореферат магистерской диссертации", true, 0); } else if (tpt.Value == "КАНДИДАТСЬКА ДИСЕРТАЦІЯ") { res.AddSlot(TitlePageReferent.ATTR_TYPE, "автореферат кандидатської дисертації", true, 0); } else if (tpt.Value == "ДОКТОРСЬКА ДИСЕРТАЦІЯ") { res.AddSlot(TitlePageReferent.ATTR_TYPE, "автореферат докторської дисертації", true, 0); } else if (tpt.Value == "МАГІСТЕРСЬКА ДИСЕРТАЦІЯ") { res.AddSlot(TitlePageReferent.ATTR_TYPE, "автореферат магістерської дисертації", true, 0); } else { res.AddType(tpt.Value); } } else if (tpt.Value == "РЕФЕРАТ" || tpt.Value == "АВТОРЕФЕРАТ") { if (!ty.Contains(tpt.Value)) { res.AddType(tpt.Value); } } } } else if (tpt.Typ == Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Speciality) { if (res.Speciality == null) { res.Speciality = tpt.Value; } } else if (persTypes.Contains(tpt.Typ)) { persTyp = tpt.Typ; } t = tpt.EndToken; if (t.EndChar > endToken.EndChar) { endToken = t; } if (t.Next != null && t.Next.IsCharOf(":-")) { t = t.Next; } continue; } if (t.EndChar > endChar) { break; } List <Pullenti.Ner.Referent> rli = t.GetReferents(); if (rli == null) { continue; } if (!t.IsNewlineBefore && (t.Previous is Pullenti.Ner.TextToken)) { string s = (t.Previous as Pullenti.Ner.TextToken).Term; if (s == "ИМЕНИ" || s == "ИМ") { continue; } if (s == "." && t.Previous.Previous != null && t.Previous.Previous.IsValue("ИМ", null)) { continue; } } foreach (Pullenti.Ner.Referent r in rli) { if (r is Pullenti.Ner.Person.PersonReferent) { if (r != rli[0]) { continue; } Pullenti.Ner.Person.PersonReferent p = r as Pullenti.Ner.Person.PersonReferent; if (persTyp != Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined) { if (t.Previous != null && t.Previous.IsChar('.')) { persTyp = Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined; } } Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types typ = pr.CalcTypFromAttrs(p); if (typ != Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined) { pr.Add(p, typ, 1); persTyp = typ; } else if (persTyp != Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined) { pr.Add(p, persTyp, 1); } else if (t.Previous != null && t.Previous.IsChar('©')) { persTyp = Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Worker; pr.Add(p, persTyp, 1); } else { for (Pullenti.Ner.Token tt = t.Next; tt != null; tt = tt.Next) { Pullenti.Ner.Referent rr = tt.GetReferent(); if (rr == res) { persTyp = Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Worker; break; } if (rr is Pullenti.Ner.Person.PersonReferent) { if (pr.CalcTypFromAttrs(r as Pullenti.Ner.Person.PersonReferent) != Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined) { break; } else { continue; } } if (rr != null) { break; } tpt = Pullenti.Ner.Titlepage.Internal.TitleItemToken.TryAttach(tt); if (tpt != null) { if (tpt.Typ != Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Typ && tpt.Typ != Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.TypAndTheme) { break; } tt = tpt.EndToken; if (tt.EndChar > endToken.EndChar) { endToken = tt; } continue; } } if (persTyp == Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined) { for (Pullenti.Ner.Token tt = t.Previous; tt != null; tt = tt.Previous) { Pullenti.Ner.Referent rr = tt.GetReferent(); if (rr == res) { persTyp = Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Worker; break; } if (rr != null) { break; } if ((tt.IsValue("СТУДЕНТ", null) || tt.IsValue("СТУДЕНТКА", null) || tt.IsValue("СЛУШАТЕЛЬ", null)) || tt.IsValue("ДИПЛОМНИК", null) || tt.IsValue("ИСПОЛНИТЕЛЬ", null)) { persTyp = Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Worker; break; } tpt = Pullenti.Ner.Titlepage.Internal.TitleItemToken.TryAttach(tt); if (tpt != null && tpt.Typ != Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Typ) { break; } } } if (persTyp != Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined) { pr.Add(p, persTyp, 1); } else { pr.Add(p, persTyp, (float)0.5); } if (t.EndChar > endToken.EndChar) { endToken = t; } } continue; } if (r == rli[0]) { persTyp = Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined; } if (r is Pullenti.Ner.Date.DateReferent) { if (res.Date == null) { res.Date = r as Pullenti.Ner.Date.DateReferent; if (t.EndChar > endToken.EndChar) { endToken = t; } } } else if (r is Pullenti.Ner.Geo.GeoReferent) { if (res.City == null && (r as Pullenti.Ner.Geo.GeoReferent).IsCity) { res.City = r as Pullenti.Ner.Geo.GeoReferent; if (t.EndChar > endToken.EndChar) { endToken = t; } } } if (r is Pullenti.Ner.Org.OrganizationReferent) { Pullenti.Ner.Org.OrganizationReferent org = r as Pullenti.Ner.Org.OrganizationReferent; if (org.Types.Contains("курс") && org.Number != null) { int i; if (int.TryParse(org.Number, out i)) { if (i > 0 && (i < 8)) { res.StudentYear = i; } } } for (; org.Higher != null; org = org.Higher) { if (org.Kind != Pullenti.Ner.Org.OrganizationKind.Department) { break; } } if (org.Kind != Pullenti.Ner.Org.OrganizationKind.Department) { if (res.Org == null) { res.Org = org; } else if (Pullenti.Ner.Org.OrganizationReferent.CanBeHigher(res.Org, org)) { res.Org = org; } } if (t.EndChar > endToken.EndChar) { endToken = t; } } if ((r is Pullenti.Ner.Uri.UriReferent) || (r is Pullenti.Ner.Geo.GeoReferent)) { if (t.EndChar > endToken.EndChar) { endToken = t; } } } } foreach (Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types ty in persTypes) { foreach (Pullenti.Ner.Person.PersonReferent p in pr.GetPersons(ty)) { if (pr.GetAttrNameForType(ty) != null) { res.AddSlot(pr.GetAttrNameForType(ty), p, false, 0); } } } if (res.GetSlotValue(TitlePageReferent.ATTR_AUTHOR) == null) { foreach (Pullenti.Ner.Person.PersonReferent p in pr.GetPersons(Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined)) { res.AddSlot(TitlePageReferent.ATTR_AUTHOR, p, false, 0); break; } } if (res.City == null && res.Org != null) { Pullenti.Ner.Slot s = res.Org.FindSlot(Pullenti.Ner.Org.OrganizationReferent.ATTR_GEO, null, true); if (s != null && (s.Value is Pullenti.Ner.Geo.GeoReferent)) { if ((s.Value as Pullenti.Ner.Geo.GeoReferent).IsCity) { res.City = s.Value as Pullenti.Ner.Geo.GeoReferent; } } } if (res.Date == null) { for (Pullenti.Ner.Token t = begin; t != null && t.EndChar <= endChar; t = t.Next) { Pullenti.Ner.Geo.GeoReferent city = t.GetReferent() as Pullenti.Ner.Geo.GeoReferent; if (city == null) { continue; } if (t.Next is Pullenti.Ner.TextToken) { if (t.Next.IsCharOf(":,") || t.Next.IsHiphen) { t = t.Next; } } Pullenti.Ner.ReferentToken rt = t.Kit.ProcessReferent(Pullenti.Ner.Date.DateAnalyzer.ANALYZER_NAME, t.Next); if (rt != null) { rt.SaveToLocalOntology(); res.Date = rt.Referent as Pullenti.Ner.Date.DateReferent; if (kit != null) { kit.EmbedToken(rt); } break; } } } if (res.Slots.Count == 0) { return(null); } else { return(res); } }
static Pullenti.Ner.ReferentToken _tryNameExist(List <CityItemToken> li, out Pullenti.Ner.Core.IntOntologyItem oi, bool always) { oi = null; if (li == null || li[0].Typ != CityItemToken.ItemType.City) { return(null); } oi = li[0].OntoItem; Pullenti.Ner.TextToken tt = li[0].BeginToken as Pullenti.Ner.TextToken; if (tt == null) { return(null); } bool ok = false; string nam = (oi == null ? li[0].Value : oi.CanonicText); if (nam == null) { return(null); } if (nam == "РИМ") { if (tt.Term == "РИМ") { if ((tt.Next is Pullenti.Ner.TextToken) && tt.Next.GetMorphClassInDictionary().IsProperSecname) { } else { ok = true; } } else if (tt.Previous != null && tt.Previous.IsValue("В", null) && tt.Term == "РИМЕ") { ok = true; } } else if (oi != null && oi.Referent != null && oi.Owner.IsExtOntology) { ok = true; } else if (nam.EndsWith("ГРАД") || nam.EndsWith("СК")) { ok = true; } else if (nam.EndsWith("TOWN") || nam.StartsWith("SAN")) { ok = true; } else if (li[0].Chars.IsLatinLetter && li[0].BeginToken.Previous != null && ((li[0].BeginToken.Previous.IsValue("IN", null) || li[0].BeginToken.Previous.IsValue("FROM", null)))) { ok = true; } else { for (Pullenti.Ner.Token tt2 = li[0].EndToken.Next; tt2 != null; tt2 = tt2.Next) { if (tt2.IsNewlineBefore) { break; } if ((tt2.IsCharOf(",(") || tt2.Morph.Class.IsPreposition || tt2.Morph.Class.IsConjunction) || tt2.Morph.Class.IsMisc) { continue; } if ((tt2.GetReferent() is Pullenti.Ner.Geo.GeoReferent) && tt2.Chars.IsCyrillicLetter == li[0].Chars.IsCyrillicLetter) { ok = true; } break; } if (!ok) { for (Pullenti.Ner.Token tt2 = li[0].BeginToken.Previous; tt2 != null; tt2 = tt2.Previous) { if (tt2.IsNewlineAfter) { break; } if ((tt2.IsCharOf(",)") || tt2.Morph.Class.IsPreposition || tt2.Morph.Class.IsConjunction) || tt2.Morph.Class.IsMisc) { continue; } if ((tt2.GetReferent() is Pullenti.Ner.Geo.GeoReferent) && tt2.Chars.IsCyrillicLetter == li[0].Chars.IsCyrillicLetter) { ok = true; } if (ok) { List <Pullenti.Ner.Address.Internal.StreetItemToken> sits = Pullenti.Ner.Address.Internal.StreetItemToken.TryParseList(li[0].BeginToken, null, 10); if (sits != null && sits.Count > 1) { Pullenti.Ner.Address.Internal.AddressItemToken ss = Pullenti.Ner.Address.Internal.StreetDefineHelper.TryParseStreet(sits, false, false); if (ss != null) { sits.RemoveAt(0); if (Pullenti.Ner.Address.Internal.StreetDefineHelper.TryParseStreet(sits, false, false) == null) { ok = false; } } } } if (ok) { if (li.Count > 1 && li[1].Typ == CityItemToken.ItemType.ProperName && (li[1].WhitespacesBeforeCount < 3)) { ok = false; } else { Pullenti.Morph.MorphClass mc = li[0].BeginToken.GetMorphClassInDictionary(); if (mc.IsProperName || mc.IsProperSurname || mc.IsAdjective) { ok = false; } else { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(li[0].BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && npt.EndChar > li[0].EndChar) { ok = false; } } } } if (Pullenti.Ner.Address.Internal.AddressItemToken.TryAttachOrg(li[0].BeginToken) != null) { ok = false; break; } break; } } } if (always) { if (li[0].WhitespacesBeforeCount > 3 && li[0].Doubtful && li[0].BeginToken.GetMorphClassInDictionary().IsProperSurname) { Pullenti.Ner.ReferentToken pp = li[0].Kit.ProcessReferent("PERSON", li[0].BeginToken); if (pp != null) { always = false; } } } if (li[0].BeginToken.Chars.IsLatinLetter && li[0].BeginToken == li[0].EndToken) { Pullenti.Ner.Token tt1 = li[0].EndToken.Next; if (tt1 != null && tt1.IsChar(',')) { tt1 = tt1.Next; } if (((tt1 is Pullenti.Ner.TextToken) && tt1.Chars.IsLatinLetter && (tt1.LengthChar < 3)) && !tt1.Chars.IsAllLower) { ok = false; } } if (!ok && !always) { return(null); } Pullenti.Ner.Geo.GeoReferent city = null; if (oi != null && (oi.Referent is Pullenti.Ner.Geo.GeoReferent) && !oi.Owner.IsExtOntology) { city = oi.Referent.Clone() as Pullenti.Ner.Geo.GeoReferent; city.Occurrence.Clear(); } else { city = new Pullenti.Ner.Geo.GeoReferent(); city.AddName(nam); if (oi != null && (oi.Referent is Pullenti.Ner.Geo.GeoReferent)) { city.MergeSlots2(oi.Referent as Pullenti.Ner.Geo.GeoReferent, li[0].Kit.BaseLanguage); } if (!city.IsCity) { city.AddTypCity(li[0].Kit.BaseLanguage); } } return(new Pullenti.Ner.ReferentToken(city, li[0].BeginToken, li[0].EndToken) { Morph = li[0].Morph }); }
static Pullenti.Ner.ReferentToken _tryNounName(List <CityItemToken> li, out Pullenti.Ner.Core.IntOntologyItem oi, bool always) { oi = null; if (li == null || (li.Count < 2) || ((li[0].Typ != CityItemToken.ItemType.Noun && li[0].Typ != CityItemToken.ItemType.Misc))) { return(null); } bool ok = !li[0].Doubtful; if (ok && li[0].Typ == CityItemToken.ItemType.Misc) { ok = false; } string typ = (li[0].Typ == CityItemToken.ItemType.Misc ? null : li[0].Value); string typ2 = (li[0].Typ == CityItemToken.ItemType.Misc ? null : li[0].AltValue); string probAdj = null; int i1 = 1; Pullenti.Ner.Referent org = null; if ((typ != null && li[i1].Typ == CityItemToken.ItemType.Noun && ((i1 + 1) < li.Count)) && li[0].WhitespacesAfterCount <= 1 && (((Pullenti.Morph.LanguageHelper.EndsWith(typ, "ПОСЕЛОК") || Pullenti.Morph.LanguageHelper.EndsWith(typ, "СЕЛИЩЕ") || typ == "ДЕРЕВНЯ") || typ == "СЕЛО"))) { if (li[i1].BeginToken == li[i1].EndToken) { Pullenti.Ner.Address.Internal.AddressItemToken ooo = Pullenti.Ner.Address.Internal.AddressItemToken.TryAttachOrg(li[i1].BeginToken); if (ooo != null && ooo.RefToken != null) { return(null); } } typ2 = li[i1].Value; if (typ2 == "СТАНЦИЯ" && li[i1].BeginToken.IsValue("СТ", null) && ((i1 + 1) < li.Count)) { Pullenti.Ner.MorphCollection m = li[i1 + 1].Morph; if (m.Number == Pullenti.Morph.MorphNumber.Plural) { probAdj = "СТАРЫЕ"; } else if (m.Gender == Pullenti.Morph.MorphGender.Feminie) { probAdj = "СТАРАЯ"; } else if (m.Gender == Pullenti.Morph.MorphGender.Masculine) { probAdj = "СТАРЫЙ"; } else { probAdj = "СТАРОЕ"; } } i1++; } string name = li[i1].Value ?? ((li[i1].OntoItem == null ? null : li[i1].OntoItem.CanonicText)); string altName = li[i1].AltValue; if (name == null) { return(null); } Pullenti.Ner.MorphCollection mc = li[0].Morph; if (i1 == 1 && li[i1].Typ == CityItemToken.ItemType.City && ((li[0].Value == "ГОРОД" || li[0].Value == "МІСТО" || li[0].Typ == CityItemToken.ItemType.Misc))) { if (typ == null && ((i1 + 1) < li.Count) && li[i1 + 1].Typ == CityItemToken.ItemType.Noun) { return(null); } oi = li[i1].OntoItem; if (oi != null) { name = oi.CanonicText; } if (name.Length > 2 || oi.MiscAttr != null) { if (!li[1].Doubtful || ((oi != null && oi.MiscAttr != null))) { ok = true; } else if (!ok && !li[1].IsNewlineBefore) { if (li[0].GeoObjectBefore || li[1].GeoObjectAfter) { ok = true; } else if (Pullenti.Ner.Address.Internal.StreetDefineHelper.CheckStreetAfter(li[1].EndToken.Next)) { ok = true; } else if (li[1].EndToken.Next != null && (li[1].EndToken.Next.GetReferent() is Pullenti.Ner.Date.DateReferent)) { ok = true; } else if ((li[1].WhitespacesBeforeCount < 2) && li[1].OntoItem != null) { if (li[1].IsNewlineAfter) { ok = true; } else { ok = true; } } } if (li[1].Doubtful && li[1].EndToken.Next != null && li[1].EndToken.Chars == li[1].EndToken.Next.Chars) { ok = false; } if (li[0].BeginToken.Previous != null && li[0].BeginToken.Previous.IsValue("В", null)) { ok = true; } } if (!ok) { ok = CheckYearAfter(li[1].EndToken.Next); } if (!ok) { ok = CheckCityAfter(li[1].EndToken.Next); } } else if ((li[i1].Typ == CityItemToken.ItemType.ProperName || li[i1].Typ == CityItemToken.ItemType.City)) { if (((li[0].Value == "АДМИНИСТРАЦИЯ" || li[0].Value == "АДМІНІСТРАЦІЯ")) && i1 == 1) { return(null); } if (li[i1].IsNewlineBefore) { if (li.Count != 2) { return(null); } } if (!li[0].Doubtful) { ok = true; if (name.Length < 2) { ok = false; } else if ((name.Length < 3) && li[0].Morph.Number != Pullenti.Morph.MorphNumber.Singular) { ok = false; } if (li[i1].Doubtful && !li[i1].GeoObjectAfter && !li[0].GeoObjectBefore) { if (li[i1].Morph.Case.IsGenitive) { if (li[i1].EndToken.Next == null || MiscLocationHelper.CheckGeoObjectAfter(li[i1].EndToken.Next, false) || Pullenti.Ner.Address.Internal.AddressItemToken.CheckHouseAfter(li[i1].EndToken.Next, false, true)) { } else if (li[0].BeginToken.Previous == null || MiscLocationHelper.CheckGeoObjectBefore(li[0].BeginToken)) { } else { ok = false; } } if (ok) { Pullenti.Ner.ReferentToken rt0 = li[i1].Kit.ProcessReferent("PERSONPROPERTY", li[0].BeginToken.Previous); if (rt0 != null) { Pullenti.Ner.ReferentToken rt1 = li[i1].Kit.ProcessReferent("PERSON", li[i1].BeginToken); if (rt1 != null) { ok = false; } } } } Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(li[i1].BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { if (npt.EndToken.EndChar > li[i1].EndChar && npt.Adjectives.Count > 0 && !npt.Adjectives[0].EndToken.Next.IsComma) { ok = false; } else if (TerrItemToken.m_UnknownRegions.TryParse(npt.EndToken, Pullenti.Ner.Core.TerminParseAttr.FullwordsOnly) != null) { bool ok1 = false; if (li[0].BeginToken.Previous != null) { Pullenti.Ner.Token ttt = li[0].BeginToken.Previous; if (ttt.IsComma && ttt.Previous != null) { ttt = ttt.Previous; } Pullenti.Ner.Geo.GeoReferent geo = ttt.GetReferent() as Pullenti.Ner.Geo.GeoReferent; if (geo != null && !geo.IsCity) { ok1 = true; } } if (npt.EndToken.Next != null) { Pullenti.Ner.Token ttt = npt.EndToken.Next; if (ttt.IsComma && ttt.Next != null) { ttt = ttt.Next; } Pullenti.Ner.Geo.GeoReferent geo = ttt.GetReferent() as Pullenti.Ner.Geo.GeoReferent; if (geo != null && !geo.IsCity) { ok1 = true; } } if (!ok1) { return(null); } } } if (li[0].Value == "ПОРТ") { if (li[i1].Chars.IsAllUpper || li[i1].Chars.IsLatinLetter) { return(null); } } } else if (li[0].GeoObjectBefore) { ok = true; } else if (li[i1].GeoObjectAfter && !li[i1].IsNewlineAfter) { ok = true; } else { ok = CheckYearAfter(li[i1].EndToken.Next); } if (!ok) { ok = CheckStreetAfter(li[i1].EndToken.Next); } if (!ok && li[0].BeginToken.Previous != null && li[0].BeginToken.Previous.IsValue("В", null)) { ok = true; } } else { return(null); } if (!ok && !always) { if (MiscLocationHelper.CheckNearBefore(li[0].BeginToken.Previous) == null) { return(null); } } if (li.Count > (i1 + 1)) { li.RemoveRange(i1 + 1, li.Count - i1 - 1); } Pullenti.Ner.Geo.GeoReferent city = new Pullenti.Ner.Geo.GeoReferent(); if (oi != null && oi.Referent != null) { city = oi.Referent.Clone() as Pullenti.Ner.Geo.GeoReferent; city.Occurrence.Clear(); } if (!li[0].Morph.Case.IsUndefined && li[0].Morph.Gender != Pullenti.Morph.MorphGender.Undefined) { if (li[i1].EndToken.Morph.Class.IsAdjective && li[i1].BeginToken == li[i1].EndToken) { string nam = Pullenti.Ner.Core.ProperNameHelper.GetNameEx(li[i1].BeginToken, li[i1].EndToken, Pullenti.Morph.MorphClass.Adjective, li[0].Morph.Case, li[0].Morph.Gender, false, false); if (nam != null && nam != name) { name = nam; } } } if (li[0].Morph.Case.IsNominative) { if (altName != null) { city.AddName(altName); } altName = null; } city.AddName(name); if (probAdj != null) { city.AddName(probAdj + " " + name); } if (altName != null) { city.AddName(altName); if (probAdj != null) { city.AddName(probAdj + " " + altName); } } if (typ != null) { city.AddTyp(typ); } else if (!city.IsCity) { city.AddTypCity(li[0].Kit.BaseLanguage); } if (typ2 != null) { city.AddTyp(typ2.ToLower()); } if (li[0].HigherGeo != null && GeoOwnerHelper.CanBeHigher(li[0].HigherGeo, city)) { city.Higher = li[0].HigherGeo; } if (li[0].Typ == CityItemToken.ItemType.Misc) { li.RemoveAt(0); } Pullenti.Ner.ReferentToken res = new Pullenti.Ner.ReferentToken(city, li[0].BeginToken, li[li.Count - 1].EndToken) { Morph = mc }; if (res.EndToken.Next != null && res.EndToken.Next.IsHiphen && (res.EndToken.Next.Next is Pullenti.Ner.NumberToken)) { Pullenti.Ner.NumberToken num = res.EndToken.Next.Next as Pullenti.Ner.NumberToken; if ((num.Typ == Pullenti.Ner.NumberSpellingType.Digit && !num.Morph.Class.IsAdjective && num.IntValue != null) && (num.IntValue.Value < 50)) { foreach (Pullenti.Ner.Slot s in city.Slots) { if (s.TypeName == Pullenti.Ner.Geo.GeoReferent.ATTR_NAME) { city.UploadSlot(s, string.Format("{0}-{1}", s.Value, num.Value)); } } res.EndToken = num; } } if (li[0].BeginToken == li[0].EndToken && li[0].BeginToken.IsValue("ГОРОДОК", null)) { if (Pullenti.Ner.Address.Internal.AddressItemToken.CheckHouseAfter(res.EndToken.Next, true, false)) { return(null); } } return(res); }