示例#1
0
 public override string GetImageId(Pullenti.Ner.Referent obj = null)
 {
     Pullenti.Ner.Geo.GeoReferent ter = obj as Pullenti.Ner.Geo.GeoReferent;
     if (ter != null)
     {
         if (ter.IsUnion)
         {
             return(UnionImageId);
         }
         if (ter.IsCity && ((ter.IsState || ter.IsRegion)))
         {
             return(CountryCityImageId);
         }
         if (ter.IsState)
         {
             return(CountryImageId);
         }
         if (ter.IsCity)
         {
             return(CityImageId);
         }
         if (ter.IsRegion && ter.Higher != null && ter.Higher.IsCity)
         {
             return(DistrictImageId);
         }
         if (ter.IsTerritory)
         {
             return(TerrImageId);
         }
     }
     return(RegionImageId);
 }
示例#2
0
 static bool canBeRef(NamedEntityKind ki, Pullenti.Ner.Referent re)
 {
     if (re == null)
     {
         return(false);
     }
     if (ki == NamedEntityKind.Monument)
     {
         if (re.TypeName == "PERSON" || re.TypeName == "PERSONPROPERTY")
         {
             return(true);
         }
     }
     else if (ki == NamedEntityKind.Location)
     {
         if (re is Pullenti.Ner.Geo.GeoReferent)
         {
             Pullenti.Ner.Geo.GeoReferent geo = re as Pullenti.Ner.Geo.GeoReferent;
             if (geo.IsRegion || geo.IsState)
             {
                 return(true);
             }
         }
     }
     else if (ki == NamedEntityKind.Building)
     {
         if (re.TypeName == "ORGANIZATION")
         {
             return(true);
         }
     }
     return(false);
 }
示例#3
0
 static Pullenti.Ner.ReferentToken Try4(List <CityItemToken> li)
 {
     if ((li.Count > 0 && li[0].Typ == CityItemToken.ItemType.Noun && ((li[0].Value != "ГОРОД" && li[0].Value != "МІСТО" && li[0].Value != "CITY"))) && ((!li[0].Doubtful || li[0].GeoObjectBefore)))
     {
         if (li.Count > 1 && li[1].OrgRef != null)
         {
             Pullenti.Ner.Geo.GeoReferent geo = new Pullenti.Ner.Geo.GeoReferent();
             geo.AddTyp(li[0].Value);
             geo.AddOrgReferent(li[1].OrgRef.Referent);
             geo.AddExtReferent(li[1].OrgRef);
             return(new Pullenti.Ner.ReferentToken(geo, li[0].BeginToken, li[1].EndToken));
         }
         else
         {
             Pullenti.Ner.Address.Internal.AddressItemToken aid = Pullenti.Ner.Address.Internal.AddressItemToken.TryAttachOrg(li[0].EndToken.Next);
             if (aid != null)
             {
                 Pullenti.Ner.Geo.GeoReferent geo = new Pullenti.Ner.Geo.GeoReferent();
                 geo.AddTyp(li[0].Value);
                 geo.AddOrgReferent(aid.Referent);
                 geo.AddExtReferent(aid.RefToken);
                 return(new Pullenti.Ner.ReferentToken(geo, li[0].BeginToken, aid.EndToken));
             }
         }
     }
     return(null);
 }
示例#4
0
        static bool _isHigher(Pullenti.Ner.Geo.GeoReferent gHi, Pullenti.Ner.Geo.GeoReferent gLo)
        {
            int i = 0;

            for (; gLo != null && (i < 10); gLo = gLo.Higher, i++)
            {
                if (gLo.CanBeEquals(gHi, Pullenti.Ner.Core.ReferentsEqualType.WithinOneText))
                {
                    return(true);
                }
            }
            return(false);
        }
示例#5
0
        static string _getTypesString(Pullenti.Ner.Geo.GeoReferent g)
        {
            StringBuilder tmp = new StringBuilder();

            foreach (Pullenti.Ner.Slot s in g.Slots)
            {
                if (s.TypeName == Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE)
                {
                    tmp.AppendFormat("{0};", s.Value);
                }
            }
            return(tmp.ToString());
        }
示例#6
0
 public override bool CanBeGeneralFor(Pullenti.Ner.Referent obj)
 {
     if (!this._CanBeEquals(obj, Pullenti.Ner.Core.ReferentsEqualType.WithinOneText, true))
     {
         return(false);
     }
     Pullenti.Ner.Geo.GeoReferent g1 = this.GetSlotValue(ATTR_GEO) as Pullenti.Ner.Geo.GeoReferent;
     Pullenti.Ner.Geo.GeoReferent g2 = obj.GetSlotValue(ATTR_GEO) as Pullenti.Ner.Geo.GeoReferent;
     if (g1 == null && g2 != null)
     {
         return(true);
     }
     return(false);
 }
示例#7
0
        // Проверка, что этот референт может выступать в качестве ATTR_REF
        public bool CanHasRef(Pullenti.Ner.Referent r)
        {
            string nam = Name;

            if (nam == null || r == null)
            {
                return(false);
            }
            if (r is Pullenti.Ner.Geo.GeoReferent)
            {
                Pullenti.Ner.Geo.GeoReferent g = r as Pullenti.Ner.Geo.GeoReferent;
                if (Pullenti.Morph.LanguageHelper.EndsWithEx(nam, "президент", "губернатор", null, null))
                {
                    return(g.IsState || g.IsRegion);
                }
                if (nam == "мэр" || nam == "градоначальник")
                {
                    return(g.IsCity);
                }
                if (nam == "глава")
                {
                    return(true);
                }
                return(false);
            }
            if (r.TypeName == "ORGANIZATION")
            {
                if ((Pullenti.Morph.LanguageHelper.EndsWith(nam, "губернатор") || nam == "мэр" || nam == "градоначальник") || nam == "президент")
                {
                    return(false);
                }
                if (nam.Contains("министр"))
                {
                    if (r.FindSlot(null, "министерство", true) == null)
                    {
                        return(false);
                    }
                }
                if (nam.EndsWith("директор"))
                {
                    if ((r.FindSlot(null, "суд", true)) != null)
                    {
                        return(false);
                    }
                }
                return(true);
            }
            return(false);
        }
示例#8
0
 public static Pullenti.Ner.Geo.GeoReferent GetGeoReferentByName(string name)
 {
     Pullenti.Ner.Geo.GeoReferent res = null;
     if (m_GeoRefByName.TryGetValue(name, out res))
     {
         return(res);
     }
     foreach (Pullenti.Ner.Referent r in TerrItemToken.m_AllStates)
     {
         if (r.FindSlot(null, name, true) != null)
         {
             res = r as Pullenti.Ner.Geo.GeoReferent;
             break;
         }
     }
     m_GeoRefByName.Add(name, res);
     return(res);
 }
示例#9
0
        static Pullenti.Ner.ReferentToken _tryAttachMoscowAO(List <TerrItemToken> li, Pullenti.Ner.Core.AnalyzerData ad)
        {
            if (li[0].TerminItem == null || !li[0].TerminItem.IsMoscowRegion)
            {
                return(null);
            }
            if (li[0].IsDoubt)
            {
                bool ok = false;
                if (CityAttachHelper.CheckCityAfter(li[0].EndToken.Next))
                {
                    ok = true;
                }
                else
                {
                    List <Pullenti.Ner.Address.Internal.AddressItemToken> ali = Pullenti.Ner.Address.Internal.AddressItemToken.TryParseList(li[0].EndToken.Next, null, 2);
                    if (ali != null && ali.Count > 0 && ali[0].Typ == Pullenti.Ner.Address.Internal.AddressItemToken.ItemType.Street)
                    {
                        ok = true;
                    }
                }
                if (!ok)
                {
                    return(null);
                }
            }
            Pullenti.Ner.Geo.GeoReferent reg = new Pullenti.Ner.Geo.GeoReferent();
            string typ = "АДМИНИСТРАТИВНЫЙ ОКРУГ";

            reg.AddTyp(typ);
            string name = li[0].TerminItem.CanonicText;

            if (Pullenti.Morph.LanguageHelper.EndsWith(name, typ))
            {
                name = name.Substring(0, name.Length - typ.Length - 1).Trim();
            }
            reg.AddName(name);
            return(new Pullenti.Ner.ReferentToken(reg, li[0].BeginToken, li[0].EndToken));
        }
示例#10
0
 public void AddReferent(Pullenti.Ner.Referent r)
 {
     if (r == null)
     {
         return;
     }
     Pullenti.Ner.Geo.GeoReferent geo = r as Pullenti.Ner.Geo.GeoReferent;
     if (geo != null)
     {
         foreach (Pullenti.Ner.Slot s in Slots)
         {
             if (s.TypeName == ATTR_GEO)
             {
                 Pullenti.Ner.Geo.GeoReferent geo0 = s.Value as Pullenti.Ner.Geo.GeoReferent;
                 if (geo0 == null)
                 {
                     continue;
                 }
                 if (Pullenti.Ner.Geo.Internal.GeoOwnerHelper.CanBeHigher(geo0, geo))
                 {
                     if (geo.Higher == geo0 || geo.IsCity)
                     {
                         this.UploadSlot(s, geo);
                         return;
                     }
                 }
                 if (Pullenti.Ner.Geo.Internal.GeoOwnerHelper.CanBeHigher(geo, geo0))
                 {
                     return;
                 }
             }
         }
         this.AddSlot(ATTR_GEO, r, false, 0);
     }
     else if ((r is StreetReferent) || r.TypeName == "ORGANIZATION")
     {
         this.AddSlot(ATTR_STREET, r, false, 0);
     }
 }
示例#11
0
 public static Pullenti.Ner.ReferentToken TryAttachStateUSATerritory(Pullenti.Ner.Token t)
 {
     if (t == null || !t.Chars.IsLatinLetter)
     {
         return(null);
     }
     Pullenti.Ner.Core.TerminToken tok = TerrItemToken.m_GeoAbbrs.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
     if (tok == null)
     {
         return(null);
     }
     Pullenti.Ner.Geo.GeoReferent g = tok.Termin.Tag as Pullenti.Ner.Geo.GeoReferent;
     if (g == null)
     {
         return(null);
     }
     if (tok.EndToken.Next != null && tok.EndToken.Next.IsChar('.'))
     {
         tok.EndToken = tok.EndToken.Next;
     }
     Pullenti.Ner.Referent gg = g.Clone();
     gg.Occurrence.Clear();
     return(new Pullenti.Ner.ReferentToken(gg, tok.BeginToken, tok.EndToken));
 }
示例#12
0
        static Pullenti.Ner.ReferentToken Try1(List <CityItemToken> li, out Pullenti.Ner.Core.IntOntologyItem oi, Pullenti.Ner.Core.AnalyzerDataWithOntology ad)
        {
            oi = null;
            if (li == null || (li.Count < 1))
            {
                return(null);
            }
            else if (li[0].Typ != CityItemToken.ItemType.City)
            {
                if (li.Count != 2 || li[0].Typ != CityItemToken.ItemType.ProperName || li[1].Typ != CityItemToken.ItemType.Noun)
                {
                    return(null);
                }
            }
            int i = 1;

            oi = li[0].OntoItem;
            bool ok = !li[0].Doubtful;

            if ((ok && li[0].OntoItem != null && li[0].OntoItem.MiscAttr == null) && ad != null)
            {
                if (li[0].OntoItem.Owner != ad.LocalOntology && !li[0].OntoItem.Owner.IsExtOntology)
                {
                    if (li[0].BeginToken.Previous != null && li[0].BeginToken.Previous.IsValue("В", null))
                    {
                    }
                    else
                    {
                        ok = false;
                    }
                }
            }
            if (li.Count == 1 && li[0].BeginToken.Morph.Class.IsAdjective)
            {
                List <Pullenti.Ner.Address.Internal.StreetItemToken> sits = Pullenti.Ner.Address.Internal.StreetItemToken.TryParseList(li[0].BeginToken, null, 3);
                if (sits != null && sits.Count == 2 && sits[1].Typ == Pullenti.Ner.Address.Internal.StreetItemType.Noun)
                {
                    return(null);
                }
            }
            string typ    = null;
            string alttyp = null;

            Pullenti.Ner.MorphCollection mc = li[0].Morph;
            if (i < li.Count)
            {
                if (li[i].Typ == CityItemToken.ItemType.Noun)
                {
                    Pullenti.Ner.Address.Internal.AddressItemToken at = null;
                    if (!li[i].Chars.IsAllLower && (li[i].WhitespacesAfterCount < 2))
                    {
                        Pullenti.Ner.Address.Internal.StreetItemToken sit = Pullenti.Ner.Address.Internal.StreetItemToken.TryParse(li[i].EndToken.Next, null, false, null, false);
                        if (sit != null && sit.Typ == Pullenti.Ner.Address.Internal.StreetItemType.Noun)
                        {
                            at = Pullenti.Ner.Address.Internal.AddressItemToken.TryParse(li[i].BeginToken, null, false, false, null);
                            if (at != null)
                            {
                                Pullenti.Ner.Address.Internal.AddressItemToken at2 = Pullenti.Ner.Address.Internal.AddressItemToken.TryParse(li[i].EndToken.Next, null, false, false, null);
                                if (at2 != null && at2.Typ == Pullenti.Ner.Address.Internal.AddressItemToken.ItemType.Street)
                                {
                                    at = null;
                                }
                            }
                        }
                    }
                    if (at == null)
                    {
                        typ    = li[i].Value;
                        alttyp = li[i].AltValue;
                        if (li[i].BeginToken.IsValue("СТ", null) && li[i].BeginToken.Chars.IsAllUpper)
                        {
                            return(null);
                        }
                        if ((i + 1) == li.Count)
                        {
                            ok = true;
                            if (!li[i].Morph.Case.IsUndefined)
                            {
                                mc = li[i].Morph;
                            }
                            i++;
                        }
                        else if (ok)
                        {
                            i++;
                        }
                        else
                        {
                            Pullenti.Ner.Token tt0 = li[0].BeginToken.Previous;
                            if ((tt0 is Pullenti.Ner.TextToken) && (tt0.WhitespacesAfterCount < 3))
                            {
                                if (tt0.IsValue("МЭР", "МЕР") || tt0.IsValue("ГЛАВА", null) || tt0.IsValue("ГРАДОНАЧАЛЬНИК", null))
                                {
                                    ok = true;
                                    i++;
                                }
                            }
                        }
                    }
                }
            }
            if (!ok && oi != null && (oi.CanonicText.Length < 4))
            {
                return(null);
            }
            if (!ok && li[0].BeginToken.Morph.Class.IsProperName)
            {
                return(null);
            }
            if (!ok)
            {
                if (!Pullenti.Ner.Core.MiscHelper.IsExistsInDictionary(li[0].BeginToken, li[0].EndToken, Pullenti.Morph.MorphClass.Adjective | Pullenti.Morph.MorphClass.Noun | Pullenti.Morph.MorphClass.Pronoun))
                {
                    ok = li[0].GeoObjectBefore || li[i - 1].GeoObjectAfter;
                    if (ok && li[0].BeginToken == li[0].EndToken)
                    {
                        Pullenti.Morph.MorphClass mcc = li[0].BeginToken.GetMorphClassInDictionary();
                        if (mcc.IsProperName || mcc.IsProperSurname)
                        {
                            ok = false;
                        }
                        else if (li[0].GeoObjectBefore && (li[0].WhitespacesAfterCount < 2))
                        {
                            Pullenti.Ner.Address.Internal.AddressItemToken ad1 = Pullenti.Ner.Address.Internal.AddressItemToken.TryParse(li[0].BeginToken, null, false, false, null);
                            if (ad1 != null && ad1.Typ == Pullenti.Ner.Address.Internal.AddressItemToken.ItemType.Street)
                            {
                                Pullenti.Ner.Address.Internal.AddressItemToken ad2 = Pullenti.Ner.Address.Internal.AddressItemToken.TryParse(li[0].EndToken.Next, null, false, false, null);
                                if (ad2 == null || ad2.Typ != Pullenti.Ner.Address.Internal.AddressItemToken.ItemType.Street)
                                {
                                    ok = false;
                                }
                            }
                            else if (Pullenti.Ner.Address.Internal.AddressItemToken.TryAttachOrg(li[0].BeginToken) != null)
                            {
                                ok = false;
                            }
                        }
                    }
                }
                if (ok)
                {
                    if (li[0].Kit.ProcessReferent("PERSON", li[0].BeginToken) != null)
                    {
                        ok = false;
                    }
                }
            }
            if (!ok)
            {
                ok = CheckYearAfter(li[0].EndToken.Next);
            }
            if (!ok && ((!li[0].BeginToken.Morph.Class.IsAdjective || li[0].BeginToken != li[0].EndToken)))
            {
                ok = CheckCityAfter(li[0].EndToken.Next);
            }
            if (!ok)
            {
                return(null);
            }
            if (i < li.Count)
            {
                li.RemoveRange(i, li.Count - i);
            }
            Pullenti.Ner.ReferentToken rt = null;
            if (oi == null)
            {
                if (li[0].Value != null && li[0].HigherGeo != null)
                {
                    Pullenti.Ner.Geo.GeoReferent cap = new Pullenti.Ner.Geo.GeoReferent();
                    cap.AddName(li[0].Value);
                    cap.AddTypCity(li[0].Kit.BaseLanguage);
                    cap.Higher = li[0].HigherGeo;
                    if (typ != null)
                    {
                        cap.AddTyp(typ);
                    }
                    if (alttyp != null)
                    {
                        cap.AddTyp(alttyp);
                    }
                    rt = new Pullenti.Ner.ReferentToken(cap, li[0].BeginToken, li[0].EndToken);
                }
                else
                {
                    if (li[0].Value == null)
                    {
                        return(null);
                    }
                    if (typ == null)
                    {
                        if ((li.Count == 1 && li[0].BeginToken.Previous != null && li[0].BeginToken.Previous.IsHiphen) && (li[0].BeginToken.Previous.Previous is Pullenti.Ner.ReferentToken) && (li[0].BeginToken.Previous.Previous.GetReferent() is Pullenti.Ner.Geo.GeoReferent))
                        {
                        }
                        else
                        {
                            return(null);
                        }
                    }
                    else
                    {
                        if (!Pullenti.Morph.LanguageHelper.EndsWithEx(typ, "ПУНКТ", "ПОСЕЛЕНИЕ", "ПОСЕЛЕННЯ", "ПОСЕЛОК"))
                        {
                            if (!Pullenti.Morph.LanguageHelper.EndsWith(typ, "CITY"))
                            {
                                if (typ == "СТАНЦИЯ" && (MiscLocationHelper.CheckGeoObjectBefore(li[0].BeginToken)))
                                {
                                }
                                else if (li.Count > 1 && li[1].Typ == CityItemToken.ItemType.Noun && li[0].Typ == CityItemToken.ItemType.City)
                                {
                                }
                                else if ((li.Count == 2 && li[1].Typ == CityItemToken.ItemType.Noun && li[0].Typ == CityItemToken.ItemType.ProperName) && ((li[0].GeoObjectBefore || li[1].GeoObjectAfter)))
                                {
                                }
                                else
                                {
                                    return(null);
                                }
                            }
                        }
                        if (li[0].BeginToken.Morph.Class.IsAdjective)
                        {
                            li[0].Value = Pullenti.Ner.Core.ProperNameHelper.GetNameEx(li[0].BeginToken, li[0].EndToken, Pullenti.Morph.MorphClass.Adjective, li[1].Morph.Case, li[1].Morph.Gender, false, false);
                        }
                    }
                }
            }
            else if (oi.Referent is Pullenti.Ner.Geo.GeoReferent)
            {
                Pullenti.Ner.Geo.GeoReferent city = oi.Referent.Clone() as Pullenti.Ner.Geo.GeoReferent;
                city.Occurrence.Clear();
                rt = new Pullenti.Ner.ReferentToken(city, li[0].BeginToken, li[li.Count - 1].EndToken)
                {
                    Morph = mc
                };
            }
            else if (typ == null)
            {
                typ = oi.Typ;
            }
            if (rt == null)
            {
                Pullenti.Ner.Geo.GeoReferent city = new Pullenti.Ner.Geo.GeoReferent();
                city.AddName((oi == null ? li[0].Value : oi.CanonicText));
                if (typ != null)
                {
                    city.AddTyp(typ);
                }
                else
                {
                    city.AddTypCity(li[0].Kit.BaseLanguage);
                }
                if (alttyp != null)
                {
                    city.AddTyp(alttyp);
                }
                rt = new Pullenti.Ner.ReferentToken(city, li[0].BeginToken, li[li.Count - 1].EndToken)
                {
                    Morph = mc
                };
            }
            if ((rt.Referent is Pullenti.Ner.Geo.GeoReferent) && li.Count == 1 && (rt.Referent as Pullenti.Ner.Geo.GeoReferent).IsCity)
            {
                if (rt.BeginToken.Previous != null && rt.BeginToken.Previous.IsValue("Г", null))
                {
                    rt.BeginToken = rt.BeginToken.Previous;
                }
                else if ((rt.BeginToken.Previous != null && rt.BeginToken.Previous.IsChar('.') && rt.BeginToken.Previous.Previous != null) && rt.BeginToken.Previous.Previous.IsValue("Г", null))
                {
                    rt.BeginToken = rt.BeginToken.Previous.Previous;
                }
                else if (rt.EndToken.Next != null && (rt.WhitespacesAfterCount < 2) && rt.EndToken.Next.IsValue("Г", null))
                {
                    rt.EndToken = rt.EndToken.Next;
                    if (rt.EndToken.Next != null && rt.EndToken.Next.IsChar('.'))
                    {
                        rt.EndToken = rt.EndToken.Next;
                    }
                }
            }
            return(rt);
        }
示例#13
0
        public static bool CanBeHigher(Pullenti.Ner.Geo.GeoReferent hi, Pullenti.Ner.Geo.GeoReferent lo)
        {
            if (hi == null || lo == null || hi == lo)
            {
                return(false);
            }
            if (lo.Higher != null)
            {
                return(lo.Higher == hi);
            }
            if (lo.IsState)
            {
                if (lo.IsRegion && hi.IsState && !hi.IsRegion)
                {
                    return(true);
                }
                return(false);
            }
            if (hi.IsTerritory)
            {
                return(false);
            }
            if (lo.IsTerritory)
            {
                return(true);
            }
            string hit = _getTypesString(hi);
            string lot = _getTypesString(lo);

            if (hi.IsCity)
            {
                if (lo.IsRegion)
                {
                    if (hit.Contains("город;") || hit.Contains("місто") || hit.Contains("city"))
                    {
                        if ((lot.Contains("район") || lot.Contains("административный округ") || lot.Contains("адміністративний округ")) || lot.Contains("муниципальн") || lot.Contains("муніципаль"))
                        {
                            return(true);
                        }
                        if (lo.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, "округ", true) != null && !lot.Contains("автономн"))
                        {
                            return(true);
                        }
                    }
                }
                if (lo.IsCity)
                {
                    if (!hit.Contains("станция") && lot.Contains("станция"))
                    {
                        return(true);
                    }
                    if (!hit.Contains("станція") && lot.Contains("станція"))
                    {
                        return(true);
                    }
                    if (hit.Contains("город;") || hit.Contains("місто") || hit.Contains("city"))
                    {
                        if ((lot.Contains("поселок") || lot.Contains("селище") || lot.Contains("село")) || lot.Contains("деревня") || lot.Contains("городок"))
                        {
                            return(true);
                        }
                    }
                    if (hit.Contains("поселение") || hit.Contains("поселок"))
                    {
                        if (lot.Contains("село;") || lot.Contains("деревня") || lot.Contains("хутор"))
                        {
                            return(true);
                        }
                    }
                    if (hit.Contains("поселение") && lot.Contains("поселок"))
                    {
                        return(true);
                    }
                    if (hit.Contains("село;"))
                    {
                        if (lot.Contains("поселение") || lot.Contains("поселок"))
                        {
                            return(true);
                        }
                    }
                    if (hi.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_NAME, "МОСКВА", true) != null)
                    {
                        if (lot.Contains("город;") || lot.Contains("місто") || lot.Contains("city"))
                        {
                            if (lo.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_NAME, "ЗЕЛЕНОГРАД", true) != null || lo.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_NAME, "ТРОИЦК", true) != null)
                            {
                                return(true);
                            }
                        }
                    }
                }
            }
            else if (lo.IsCity)
            {
                if (!lot.Contains("город") && !lot.Contains("місто") && !lot.Contains("city"))
                {
                    if (hi.IsRegion)
                    {
                        return(true);
                    }
                }
                else
                {
                    if (hi.IsState)
                    {
                        return(true);
                    }
                    if ((hit.Contains("административный округ") || hit.Contains("адміністративний округ") || hit.Contains("муниципальн")) || hit.Contains("муніципаль"))
                    {
                        return(false);
                    }
                    if (!hit.Contains("район"))
                    {
                        return(true);
                    }
                    if (hi.Higher != null && hi.Higher.IsRegion)
                    {
                        return(true);
                    }
                }
            }
            else if (lo.IsRegion)
            {
                foreach (Pullenti.Ner.Slot s in hi.Slots)
                {
                    if (s.TypeName == Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE)
                    {
                        if (((string)s.Value) != "регион" && ((string)s.Value) != "регіон")
                        {
                            if (lo.FindSlot(s.TypeName, s.Value, true) != null)
                            {
                                return(false);
                            }
                        }
                    }
                }
                if (hit.Contains("почтовое отделение"))
                {
                    return(false);
                }
                if (lot.Contains("почтовое отделение"))
                {
                    return(true);
                }
                if (hi.IsState)
                {
                    return(true);
                }
                if (lot.Contains("волость"))
                {
                    return(true);
                }
                if (lot.Contains("county") || lot.Contains("borough") || lot.Contains("parish"))
                {
                    if (hit.Contains("state"))
                    {
                        return(true);
                    }
                }
                if (lot.Contains("район"))
                {
                    if ((hit.Contains("область") || hit.Contains("регион") || hit.Contains("край")) || hit.Contains("регіон"))
                    {
                        return(true);
                    }
                    if (hit.Contains("округ") && !hit.Contains("сельский") && !hit.Contains("поселковый"))
                    {
                        return(true);
                    }
                }
                if (lot.Contains("область"))
                {
                    if (hit.Contains("край"))
                    {
                        return(true);
                    }
                    if (hit.Contains("округ") && !hit.Contains("сельский") && !hit.Contains("поселковый"))
                    {
                        return(true);
                    }
                }
                if (lot.Contains("округ"))
                {
                    if (lot.Contains("сельский") || lot.Contains("поселковый"))
                    {
                        return(true);
                    }
                    if (hit.Contains("край"))
                    {
                        return(true);
                    }
                    if (lot.Contains("округ"))
                    {
                        if (hit.Contains("область") || hit.Contains("республика"))
                        {
                            return(true);
                        }
                    }
                }
                if (lot.Contains("муницип"))
                {
                    if (hit.Contains("область") || hit.Contains("район") || hit.Contains("округ"))
                    {
                        return(true);
                    }
                }
            }
            return(false);
        }
示例#14
0
        internal void Correct()
        {
            List <Pullenti.Ner.Geo.GeoReferent> geos = new List <Pullenti.Ner.Geo.GeoReferent>();

            foreach (Pullenti.Ner.Slot a in Slots)
            {
                if (a.TypeName == ATTR_GEO && (a.Value is Pullenti.Ner.Geo.GeoReferent))
                {
                    geos.Add(a.Value as Pullenti.Ner.Geo.GeoReferent);
                }
                else if (a.TypeName == ATTR_STREET && (a.Value is Pullenti.Ner.Referent))
                {
                    foreach (Pullenti.Ner.Slot s in (a.Value as Pullenti.Ner.Referent).Slots)
                    {
                        if (s.Value is Pullenti.Ner.Geo.GeoReferent)
                        {
                            geos.Add(s.Value as Pullenti.Ner.Geo.GeoReferent);
                        }
                    }
                }
            }
            for (int i = geos.Count - 1; i > 0; i--)
            {
                for (int j = i - 1; j >= 0; j--)
                {
                    if (_isHigher(geos[i], geos[j]))
                    {
                        Pullenti.Ner.Slot s = this.FindSlot(ATTR_GEO, geos[i], true);
                        if (s != null)
                        {
                            Slots.Remove(s);
                        }
                        geos.RemoveAt(i);
                        break;
                    }
                    else if (_isHigher(geos[j], geos[i]))
                    {
                        Pullenti.Ner.Slot s = this.FindSlot(ATTR_GEO, geos[j], true);
                        if (s != null)
                        {
                            Slots.Remove(s);
                        }
                        geos.RemoveAt(j);
                        i--;
                    }
                }
            }
            if (geos.Count == 2)
            {
                Pullenti.Ner.Geo.GeoReferent reg = null;
                Pullenti.Ner.Geo.GeoReferent cit = null;
                for (int ii = 0; ii < geos.Count; ii++)
                {
                    if (geos[ii].IsTerritory && geos[ii].Higher != null)
                    {
                        geos[ii] = geos[ii].Higher;
                    }
                }
                if (geos[0].IsCity && geos[1].IsRegion)
                {
                    cit = geos[0];
                    reg = geos[1];
                }
                else if (geos[1].IsCity && geos[0].IsRegion)
                {
                    cit = geos[1];
                    reg = geos[0];
                }
                if (cit != null && cit.Higher == null && Pullenti.Ner.Geo.Internal.GeoOwnerHelper.CanBeHigher(reg, cit))
                {
                    cit.Higher = reg;
                    Pullenti.Ner.Slot ss = this.FindSlot(ATTR_GEO, reg, true);
                    if (ss != null)
                    {
                        Slots.Remove(ss);
                    }
                    geos = Geos;
                }
                else
                {
                    Pullenti.Ner.Geo.GeoReferent stat = null;
                    Pullenti.Ner.Geo.GeoReferent geo  = null;
                    if (geos[0].IsState && !geos[1].IsState)
                    {
                        stat = geos[0];
                        geo  = geos[1];
                    }
                    else if (geos[1].IsState && !geos[0].IsState)
                    {
                        stat = geos[1];
                        geo  = geos[0];
                    }
                    if (stat != null)
                    {
                        geo = geo.TopHigher;
                        if (geo.Higher == null)
                        {
                            geo.Higher = stat;
                            Pullenti.Ner.Slot s = this.FindSlot(ATTR_GEO, stat, true);
                            if (s != null)
                            {
                                Slots.Remove(s);
                            }
                        }
                    }
                }
            }
        }
示例#15
0
        internal static bool CanBeHigherToken(Pullenti.Ner.Token rhi, Pullenti.Ner.Token rlo)
        {
            if (rhi == null || rlo == null)
            {
                return(false);
            }
            if (rhi.Morph.Case.IsInstrumental && !rhi.Morph.Case.IsGenitive)
            {
                return(false);
            }
            Pullenti.Ner.Geo.GeoReferent hi = rhi.GetReferent() as Pullenti.Ner.Geo.GeoReferent;
            Pullenti.Ner.Geo.GeoReferent lo = rlo.GetReferent() as Pullenti.Ner.Geo.GeoReferent;
            if (hi == null || lo == null)
            {
                return(false);
            }
            bool citiInReg = false;

            if (hi.IsCity && lo.IsRegion)
            {
                if (hi.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, "город", true) != null || hi.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, "місто", true) != null || hi.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, "city", true) != null)
                {
                    string s = _getTypesString(lo);
                    if (((s.Contains("район") || s.Contains("административный округ") || s.Contains("муниципальный округ")) || s.Contains("адміністративний округ") || s.Contains("муніципальний округ")) || lo.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, "округ", true) != null)
                    {
                        if (rhi.Next == rlo && rlo.Morph.Case.IsGenitive)
                        {
                            citiInReg = true;
                        }
                    }
                }
            }
            if (hi.IsRegion && lo.IsCity)
            {
                if (lo.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, "город", true) != null || lo.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, "місто", true) != null || lo.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, "city", true) != null)
                {
                    string s = _getTypesString(hi);
                    if (s == "район;")
                    {
                        if (hi.Higher != null && hi.Higher.IsRegion)
                        {
                            citiInReg = true;
                        }
                        else if (rhi.EndChar <= rlo.BeginChar && rhi.Next.IsComma && !rlo.Morph.Case.IsGenitive)
                        {
                            citiInReg = true;
                        }
                        else if (rhi.EndChar <= rlo.BeginChar && rhi.Next.IsComma)
                        {
                            citiInReg = true;
                        }
                    }
                }
                else
                {
                    citiInReg = true;
                }
            }
            if (rhi.EndChar <= rlo.BeginChar)
            {
                if (!rhi.Morph.Class.IsAdjective)
                {
                    if (hi.IsState && !rhi.Chars.IsLatinLetter)
                    {
                        return(false);
                    }
                }
                if (rhi.IsNewlineAfter || rlo.IsNewlineBefore)
                {
                    if (!citiInReg)
                    {
                        return(false);
                    }
                }
            }
            else
            {
            }
            if (rlo.Previous != null && rlo.Previous.Morph.Class.IsPreposition)
            {
                if (rlo.Previous.Morph.Language.IsUa)
                {
                    if ((rlo.Previous.IsValue("У", null) && !rlo.Morph.Case.IsDative && !rlo.Morph.Case.IsPrepositional) && !rlo.Morph.Case.IsUndefined)
                    {
                        return(false);
                    }
                    if (rlo.Previous.IsValue("З", null) && !rlo.Morph.Case.IsGenitive && !rlo.Morph.Case.IsUndefined)
                    {
                        return(false);
                    }
                }
                else
                {
                    if ((rlo.Previous.IsValue("В", null) && !rlo.Morph.Case.IsDative && !rlo.Morph.Case.IsPrepositional) && !rlo.Morph.Case.IsUndefined)
                    {
                        return(false);
                    }
                    if (rlo.Previous.IsValue("ИЗ", null) && !rlo.Morph.Case.IsGenitive && !rlo.Morph.Case.IsUndefined)
                    {
                        return(false);
                    }
                }
            }
            if (!CanBeHigher(hi, lo))
            {
                return(citiInReg);
            }
            return(true);
        }
示例#16
0
        public static Pullenti.Ner.ReferentToken TryAttachOrg(Pullenti.Ner.Token t, bool canBeCyr = false)
        {
            if (t == null)
            {
                return(null);
            }
            bool br = false;

            if (t.IsChar('(') && t.Next != null)
            {
                t  = t.Next;
                br = true;
            }
            if (t is Pullenti.Ner.NumberToken)
            {
                if ((t as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Words && t.Morph.Class.IsAdjective && t.Chars.IsCapitalUpper)
                {
                }
                else
                {
                    return(null);
                }
            }
            else
            {
                if (t.Chars.IsAllLower)
                {
                    return(null);
                }
                if ((t.LengthChar < 3) && !t.Chars.IsLetter)
                {
                    return(null);
                }
                if (!t.Chars.IsLatinLetter)
                {
                    if (!canBeCyr || !t.Chars.IsCyrillicLetter)
                    {
                        return(null);
                    }
                }
            }
            Pullenti.Ner.Token t0 = t;
            Pullenti.Ner.Token t1 = t0;
            int            namWo  = 0;
            OrgItemEngItem tok    = null;

            Pullenti.Ner.Geo.GeoReferent geo    = null;
            OrgItemTypeToken             addTyp = null;

            for (; t != null; t = t.Next)
            {
                if (t != t0 && t.WhitespacesBeforeCount > 1)
                {
                    break;
                }
                if (t.IsChar(')'))
                {
                    break;
                }
                if (t.IsChar('(') && t.Next != null)
                {
                    if ((t.Next.GetReferent() is Pullenti.Ner.Geo.GeoReferent) && t.Next.Next != null && t.Next.Next.IsChar(')'))
                    {
                        geo = t.Next.GetReferent() as Pullenti.Ner.Geo.GeoReferent;
                        t   = t.Next.Next;
                        continue;
                    }
                    OrgItemTypeToken typ = OrgItemTypeToken.TryAttach(t.Next, true, null);
                    if ((typ != null && typ.EndToken.Next != null && typ.EndToken.Next.IsChar(')')) && typ.Chars.IsLatinLetter)
                    {
                        addTyp = typ;
                        t      = typ.EndToken.Next;
                        continue;
                    }
                    if (((t.Next is Pullenti.Ner.TextToken) && t.Next.Next != null && t.Next.Next.IsChar(')')) && t.Next.Chars.IsCapitalUpper)
                    {
                        t1 = (t = t.Next.Next);
                        continue;
                    }
                    break;
                }
                tok = TryAttach(t, canBeCyr);
                if (tok == null && t.IsCharOf(".,") && t.Next != null)
                {
                    tok = TryAttach(t.Next, canBeCyr);
                    if (tok == null && t.Next.IsCharOf(",."))
                    {
                        tok = TryAttach(t.Next.Next, canBeCyr);
                    }
                }
                if (tok != null)
                {
                    if (tok.LengthChar == 1 && t0.Chars.IsCyrillicLetter)
                    {
                        return(null);
                    }
                    break;
                }
                if (t.IsHiphen && !t.IsWhitespaceAfter && !t.IsWhitespaceBefore)
                {
                    continue;
                }
                if (t.IsCharOf("&+") || t.IsAnd)
                {
                    continue;
                }
                if (t.IsChar('.'))
                {
                    if (t.Previous != null && t.Previous.LengthChar == 1)
                    {
                        continue;
                    }
                    else if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t.Next))
                    {
                        break;
                    }
                }
                if (!t.Chars.IsLatinLetter)
                {
                    if (!canBeCyr || !t.Chars.IsCyrillicLetter)
                    {
                        break;
                    }
                }
                if (t.Chars.IsAllLower)
                {
                    if (t.Morph.Class.IsPreposition || t.Morph.Class.IsConjunction)
                    {
                        continue;
                    }
                    if (br)
                    {
                        continue;
                    }
                    break;
                }
                Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
                if (mc.IsVerb)
                {
                    if (t.Next != null && t.Next.Morph.Class.IsPreposition)
                    {
                        break;
                    }
                }
                if (t.Next != null && t.Next.IsValue("OF", null))
                {
                    break;
                }
                if (t is Pullenti.Ner.TextToken)
                {
                    namWo++;
                }
                t1 = t;
            }
            if (tok == null)
            {
                return(null);
            }
            if (t0 == tok.BeginToken)
            {
                Pullenti.Ner.Core.BracketSequenceToken br2 = Pullenti.Ner.Core.BracketHelper.TryParse(tok.EndToken.Next, Pullenti.Ner.Core.BracketParseAttr.No, 100);
                if (br2 != null)
                {
                    Pullenti.Ner.Org.OrganizationReferent org1 = new Pullenti.Ner.Org.OrganizationReferent();
                    if (tok.ShortValue != null)
                    {
                        org1.AddTypeStr(tok.ShortValue);
                    }
                    org1.AddTypeStr(tok.FullValue);
                    string nam1 = Pullenti.Ner.Core.MiscHelper.GetTextValue(br2.BeginToken, br2.EndToken, Pullenti.Ner.Core.GetTextAttr.No);
                    if (nam1 != null)
                    {
                        org1.AddName(nam1, true, null);
                        return(new Pullenti.Ner.ReferentToken(org1, t0, br2.EndToken));
                    }
                }
                return(null);
            }
            Pullenti.Ner.Org.OrganizationReferent org = new Pullenti.Ner.Org.OrganizationReferent();
            Pullenti.Ner.Token te = tok.EndToken;
            if (tok.IsBank)
            {
                t1 = tok.EndToken;
            }
            if (tok.FullValue == "company" && (tok.WhitespacesAfterCount < 3))
            {
                OrgItemEngItem tok1 = TryAttach(tok.EndToken.Next, canBeCyr);
                if (tok1 != null)
                {
                    t1  = tok.EndToken;
                    tok = tok1;
                    te  = tok.EndToken;
                }
            }
            if (tok.FullValue == "company")
            {
                if (namWo == 0)
                {
                    return(null);
                }
            }
            string nam = Pullenti.Ner.Core.MiscHelper.GetTextValue(t0, t1, Pullenti.Ner.Core.GetTextAttr.IgnoreArticles);

            if (nam == "STOCK" && tok.FullValue == "company")
            {
                return(null);
            }
            string altNam = null;

            if (string.IsNullOrEmpty(nam))
            {
                return(null);
            }
            if (nam.IndexOf('(') > 0)
            {
                int i1 = nam.IndexOf('(');
                int i2 = nam.IndexOf(')');
                if (i1 < i2)
                {
                    altNam = nam;
                    string tai = null;
                    if ((i2 + 1) < nam.Length)
                    {
                        tai = nam.Substring(i2).Trim();
                    }
                    nam = nam.Substring(0, i1).Trim();
                    if (tai != null)
                    {
                        nam = string.Format("{0} {1}", nam, tai);
                    }
                }
            }
            if (tok.IsBank)
            {
                org.AddTypeStr((tok.Kit.BaseLanguage.IsEn ? "bank" : "банк"));
                org.AddProfile(Pullenti.Ner.Org.OrgProfile.Finance);
                if ((t1.Next != null && t1.Next.IsValue("OF", null) && t1.Next.Next != null) && t1.Next.Next.Chars.IsLatinLetter)
                {
                    OrgItemNameToken nam0 = OrgItemNameToken.TryAttach(t1.Next, null, false, false);
                    if (nam0 != null)
                    {
                        te = nam0.EndToken;
                    }
                    else
                    {
                        te = t1.Next.Next;
                    }
                    nam = Pullenti.Ner.Core.MiscHelper.GetTextValue(t0, te, Pullenti.Ner.Core.GetTextAttr.No);
                    if (te.GetReferent() is Pullenti.Ner.Geo.GeoReferent)
                    {
                        org.AddGeoObject(te.GetReferent() as Pullenti.Ner.Geo.GeoReferent);
                    }
                }
                else if (t0 == t1)
                {
                    return(null);
                }
            }
            else
            {
                if (tok.ShortValue != null)
                {
                    org.AddTypeStr(tok.ShortValue);
                }
                org.AddTypeStr(tok.FullValue);
            }
            if (string.IsNullOrEmpty(nam))
            {
                return(null);
            }
            org.AddName(nam, true, null);
            if (altNam != null)
            {
                org.AddName(altNam, true, null);
            }
            Pullenti.Ner.ReferentToken res = new Pullenti.Ner.ReferentToken(org, t0, te);
            t = te;
            while (t.Next != null)
            {
                if (t.Next.IsCharOf(",."))
                {
                    t = t.Next;
                }
                else
                {
                    break;
                }
            }
            if (t.WhitespacesAfterCount < 2)
            {
                tok = TryAttach(t.Next, canBeCyr);
                if (tok != null)
                {
                    if (tok.ShortValue != null)
                    {
                        org.AddTypeStr(tok.ShortValue);
                    }
                    org.AddTypeStr(tok.FullValue);
                    res.EndToken = tok.EndToken;
                }
            }
            if (geo != null)
            {
                org.AddGeoObject(geo);
            }
            if (addTyp != null)
            {
                org.AddType(addTyp, false);
            }
            if (!br)
            {
                return(res);
            }
            t = res.EndToken;
            if (t.Next == null || t.Next.IsChar(')'))
            {
                res.EndToken = t.Next;
            }
            else
            {
                return(null);
            }
            return(res);
        }
示例#17
0
        public static Pullenti.Ner.ReferentToken TryAttachTerritory(List <TerrItemToken> li, Pullenti.Ner.Core.AnalyzerData ad, bool attachAlways = false, List <CityItemToken> cits = null, List <Pullenti.Ner.Geo.GeoReferent> exists = null)
        {
            if (li == null || li.Count == 0)
            {
                return(null);
            }
            TerrItemToken        exObj   = null;
            TerrItemToken        newName = null;
            List <TerrItemToken> adjList = new List <TerrItemToken>();
            TerrItemToken        noun    = null;
            TerrItemToken        addNoun = null;

            Pullenti.Ner.ReferentToken rt = _tryAttachMoscowAO(li, ad);
            if (rt != null)
            {
                return(rt);
            }
            if (li[0].TerminItem != null && li[0].TerminItem.CanonicText == "ТЕРРИТОРИЯ")
            {
                Pullenti.Ner.ReferentToken res2 = _tryAttachPureTerr(li, ad);
                return(res2);
            }
            if (li.Count == 2)
            {
                if (li[0].Rzd != null && li[1].RzdDir != null)
                {
                    Pullenti.Ner.Geo.GeoReferent rzd = new Pullenti.Ner.Geo.GeoReferent();
                    rzd.AddName(li[1].RzdDir);
                    rzd.AddTypTer(li[0].Kit.BaseLanguage);
                    rzd.AddSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_REF, li[0].Rzd.Referent, false, 0);
                    rzd.AddExtReferent(li[0].Rzd);
                    return(new Pullenti.Ner.ReferentToken(rzd, li[0].BeginToken, li[1].EndToken));
                }
                if (li[1].Rzd != null && li[0].RzdDir != null)
                {
                    Pullenti.Ner.Geo.GeoReferent rzd = new Pullenti.Ner.Geo.GeoReferent();
                    rzd.AddName(li[0].RzdDir);
                    rzd.AddTypTer(li[0].Kit.BaseLanguage);
                    rzd.AddSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_REF, li[1].Rzd.Referent, false, 0);
                    rzd.AddExtReferent(li[1].Rzd);
                    return(new Pullenti.Ner.ReferentToken(rzd, li[0].BeginToken, li[1].EndToken));
                }
            }
            bool canBeCityBefore = false;
            bool adjTerrBefore   = false;

            if (cits != null)
            {
                if (cits[0].Typ == CityItemToken.ItemType.City)
                {
                    canBeCityBefore = true;
                }
                else if (cits[0].Typ == CityItemToken.ItemType.Noun && cits.Count > 1)
                {
                    canBeCityBefore = true;
                }
            }
            int k;

            for (k = 0; k < li.Count; k++)
            {
                if (li[k].OntoItem != null)
                {
                    if (exObj != null || newName != null)
                    {
                        break;
                    }
                    if (noun != null)
                    {
                        if (k == 1)
                        {
                            if (noun.TerminItem.CanonicText == "РАЙОН" || noun.TerminItem.CanonicText == "ОБЛАСТЬ" || noun.TerminItem.CanonicText == "СОЮЗ")
                            {
                                if (li[k].OntoItem.Referent is Pullenti.Ner.Geo.GeoReferent)
                                {
                                    if ((li[k].OntoItem.Referent as Pullenti.Ner.Geo.GeoReferent).IsState)
                                    {
                                        break;
                                    }
                                }
                                bool ok = false;
                                Pullenti.Ner.Token tt = li[k].EndToken.Next;
                                if (tt == null)
                                {
                                    ok = true;
                                }
                                else if (tt.IsCharOf(",."))
                                {
                                    ok = true;
                                }
                                if (!ok)
                                {
                                    ok = MiscLocationHelper.CheckGeoObjectBefore(li[0].BeginToken);
                                }
                                if (!ok)
                                {
                                    Pullenti.Ner.Address.Internal.AddressItemToken adr = Pullenti.Ner.Address.Internal.AddressItemToken.TryParse(tt, null, false, false, null);
                                    if (adr != null)
                                    {
                                        if (adr.Typ == Pullenti.Ner.Address.Internal.AddressItemToken.ItemType.Street)
                                        {
                                            ok = true;
                                        }
                                    }
                                }
                                if (!ok)
                                {
                                    break;
                                }
                            }
                            if (li[k].OntoItem != null)
                            {
                                if (noun.BeginToken.IsValue("МО", null) || noun.BeginToken.IsValue("ЛО", null))
                                {
                                    return(null);
                                }
                            }
                        }
                    }
                    exObj = li[k];
                }
                else if (li[k].TerminItem != null)
                {
                    if (noun != null)
                    {
                        break;
                    }
                    if (li[k].TerminItem.IsAlwaysPrefix && k > 0)
                    {
                        break;
                    }
                    if (k > 0 && li[k].IsDoubt)
                    {
                        if (li[k].BeginToken == li[k].EndToken && li[k].BeginToken.IsValue("ЗАО", null))
                        {
                            break;
                        }
                    }
                    if (li[k].TerminItem.IsAdjective || li[k].IsGeoInDictionary)
                    {
                        adjList.Add(li[k]);
                    }
                    else
                    {
                        if (exObj != null)
                        {
                            Pullenti.Ner.Geo.GeoReferent geo = exObj.OntoItem.Referent as Pullenti.Ner.Geo.GeoReferent;
                            if (geo == null)
                            {
                                break;
                            }
                            if (exObj.IsAdjective && ((li[k].TerminItem.CanonicText == "СОЮЗ" || li[k].TerminItem.CanonicText == "ФЕДЕРАЦИЯ")))
                            {
                                string str = exObj.OntoItem.ToString();
                                if (!str.Contains(li[k].TerminItem.CanonicText))
                                {
                                    return(null);
                                }
                            }
                            if (li[k].TerminItem.CanonicText == "РАЙОН" || li[k].TerminItem.CanonicText == "ОКРУГ" || li[k].TerminItem.CanonicText == "КРАЙ")
                            {
                                StringBuilder tmp = new StringBuilder();
                                foreach (Pullenti.Ner.Slot s in geo.Slots)
                                {
                                    if (s.TypeName == Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE)
                                    {
                                        tmp.AppendFormat("{0};", s.Value);
                                    }
                                }
                                if (!tmp.ToString().ToUpper().Contains(li[k].TerminItem.CanonicText))
                                {
                                    if (k != 1 || newName != null)
                                    {
                                        break;
                                    }
                                    newName             = li[0];
                                    newName.IsAdjective = true;
                                    newName.OntoItem    = null;
                                    exObj = null;
                                }
                            }
                        }
                        noun = li[k];
                        if (k == 0)
                        {
                            TerrItemToken tt = TerrItemToken.TryParse(li[k].BeginToken.Previous, null, true, false, null);
                            if (tt != null && tt.Morph.Class.IsAdjective)
                            {
                                adjTerrBefore = true;
                            }
                        }
                    }
                }
                else
                {
                    if (exObj != null)
                    {
                        break;
                    }
                    if (newName != null)
                    {
                        break;
                    }
                    newName = li[k];
                }
            }
            string name     = null;
            string altName  = null;
            string fullName = null;

            Pullenti.Ner.MorphCollection morph = null;
            if (exObj != null)
            {
                if (exObj.IsAdjective && !exObj.Morph.Language.IsEn && noun == null)
                {
                    if (attachAlways && exObj.EndToken.Next != null)
                    {
                        Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(exObj.BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                        if (exObj.EndToken.Next.IsCommaAnd)
                        {
                        }
                        else if (npt == null)
                        {
                        }
                        else
                        {
                            Pullenti.Ner.Address.Internal.StreetItemToken str = Pullenti.Ner.Address.Internal.StreetItemToken.TryParse(exObj.EndToken.Next, null, false, null, false);
                            if (str != null)
                            {
                                if (str.Typ == Pullenti.Ner.Address.Internal.StreetItemType.Noun && str.EndToken == npt.EndToken)
                                {
                                    return(null);
                                }
                            }
                        }
                    }
                    else
                    {
                        CityItemToken cit = CityItemToken.TryParse(exObj.EndToken.Next, null, false, null);
                        if (cit != null && ((cit.Typ == CityItemToken.ItemType.Noun || cit.Typ == CityItemToken.ItemType.City)))
                        {
                            Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(exObj.BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                            if (npt != null && npt.EndToken == cit.EndToken)
                            {
                            }
                            else
                            {
                                return(null);
                            }
                        }
                        else if (exObj.BeginToken.IsValue("ПОДНЕБЕСНЫЙ", null))
                        {
                        }
                        else
                        {
                            return(null);
                        }
                    }
                }
                if (noun == null && exObj.CanBeCity)
                {
                    CityItemToken cit0 = CityItemToken.TryParseBack(exObj.BeginToken.Previous);
                    if (cit0 != null && cit0.Typ != CityItemToken.ItemType.ProperName)
                    {
                        return(null);
                    }
                }
                if (exObj.IsDoubt && noun == null)
                {
                    bool ok2 = false;
                    if (_canBeGeoAfter(exObj.EndToken.Next))
                    {
                        ok2 = true;
                    }
                    else if (!exObj.CanBeSurname && !exObj.CanBeCity)
                    {
                        if ((exObj.EndToken.Next != null && exObj.EndToken.Next.IsChar(')') && exObj.BeginToken.Previous != null) && exObj.BeginToken.Previous.IsChar('('))
                        {
                            ok2 = true;
                        }
                        else if (exObj.Chars.IsLatinLetter && exObj.BeginToken.Previous != null)
                        {
                            if (exObj.BeginToken.Previous.IsValue("IN", null))
                            {
                                ok2 = true;
                            }
                            else if (exObj.BeginToken.Previous.IsValue("THE", null) && exObj.BeginToken.Previous.Previous != null && exObj.BeginToken.Previous.Previous.IsValue("IN", null))
                            {
                                ok2 = true;
                            }
                        }
                    }
                    if (!ok2)
                    {
                        CityItemToken cit0 = CityItemToken.TryParseBack(exObj.BeginToken.Previous);
                        if (cit0 != null && cit0.Typ != CityItemToken.ItemType.ProperName)
                        {
                        }
                        else if (MiscLocationHelper.CheckGeoObjectBefore(exObj.BeginToken.Previous))
                        {
                        }
                        else
                        {
                            return(null);
                        }
                    }
                }
                name  = exObj.OntoItem.CanonicText;
                morph = exObj.Morph;
            }
            else if (newName != null)
            {
                if (noun == null)
                {
                    return(null);
                }
                for (int j = 1; j < k; j++)
                {
                    if (li[j].IsNewlineBefore && !li[0].IsNewlineBefore)
                    {
                        if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(li[j].BeginToken, false, false))
                        {
                        }
                        else
                        {
                            return(null);
                        }
                    }
                }
                morph = noun.Morph;
                if (newName.IsAdjective)
                {
                    if (noun.TerminItem.Acronym == "АО")
                    {
                        if (noun.BeginToken != noun.EndToken)
                        {
                            return(null);
                        }
                        if (newName.Morph.Gender != Pullenti.Morph.MorphGender.Feminie)
                        {
                            return(null);
                        }
                    }
                    Pullenti.Ner.Geo.GeoReferent geoBefore = null;
                    Pullenti.Ner.Token           tt0       = li[0].BeginToken.Previous;
                    if (tt0 != null && tt0.IsCommaAnd)
                    {
                        tt0 = tt0.Previous;
                    }
                    if (!li[0].IsNewlineBefore && tt0 != null)
                    {
                        geoBefore = tt0.GetReferent() as Pullenti.Ner.Geo.GeoReferent;
                    }
                    if (li.IndexOf(noun) < li.IndexOf(newName))
                    {
                        if (noun.TerminItem.IsState)
                        {
                            return(null);
                        }
                        if (newName.CanBeSurname && geoBefore == null)
                        {
                            if (((noun.Morph.Case & newName.Morph.Case)).IsUndefined)
                            {
                                return(null);
                            }
                        }
                        if (Pullenti.Ner.Core.MiscHelper.IsExistsInDictionary(newName.BeginToken, newName.EndToken, Pullenti.Morph.MorphClass.Adjective | Pullenti.Morph.MorphClass.Pronoun | Pullenti.Morph.MorphClass.Verb))
                        {
                            if (noun.BeginToken != newName.BeginToken)
                            {
                                if (geoBefore == null)
                                {
                                    if (li.Count == 2 && _canBeGeoAfter(li[1].EndToken.Next))
                                    {
                                    }
                                    else if (li.Count == 3 && li[2].TerminItem != null && _canBeGeoAfter(li[2].EndToken.Next))
                                    {
                                    }
                                    else if (newName.IsGeoInDictionary)
                                    {
                                    }
                                    else if (newName.EndToken.IsNewlineAfter)
                                    {
                                    }
                                    else
                                    {
                                        return(null);
                                    }
                                }
                            }
                        }
                        Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(newName.EndToken, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePronouns, 0, null);
                        if (npt != null && npt.EndToken != newName.EndToken)
                        {
                            if (li.Count >= 3 && li[2].TerminItem != null && npt.EndToken == li[2].EndToken)
                            {
                                addNoun = li[2];
                            }
                            else
                            {
                                return(null);
                            }
                        }
                        Pullenti.Ner.ReferentToken rtp = newName.Kit.ProcessReferent("PERSON", newName.BeginToken);
                        if (rtp != null)
                        {
                            return(null);
                        }
                        name = Pullenti.Ner.Core.ProperNameHelper.GetNameEx(newName.BeginToken, newName.EndToken, Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphCase.Undefined, noun.TerminItem.Gender, false, false);
                    }
                    else
                    {
                        bool ok = false;
                        if (((k + 1) < li.Count) && li[k].TerminItem == null && li[k + 1].TerminItem != null)
                        {
                            ok = true;
                        }
                        else if ((k < li.Count) && li[k].OntoItem != null)
                        {
                            ok = true;
                        }
                        else if (k == li.Count && !newName.IsAdjInDictionary)
                        {
                            ok = true;
                        }
                        else if (MiscLocationHelper.CheckGeoObjectBefore(li[0].BeginToken) || canBeCityBefore)
                        {
                            ok = true;
                        }
                        else if (MiscLocationHelper.CheckGeoObjectAfter(li[k - 1].EndToken, false))
                        {
                            ok = true;
                        }
                        else if (li.Count == 3 && k == 2)
                        {
                            CityItemToken cit = CityItemToken.TryParse(li[2].BeginToken, null, false, null);
                            if (cit != null)
                            {
                                if (cit.Typ == CityItemToken.ItemType.City || cit.Typ == CityItemToken.ItemType.Noun)
                                {
                                    ok = true;
                                }
                            }
                        }
                        else if (li.Count == 2)
                        {
                            ok = _canBeGeoAfter(li[li.Count - 1].EndToken.Next);
                        }
                        if (!ok && !li[0].IsNewlineBefore && !li[0].Chars.IsAllLower)
                        {
                            Pullenti.Ner.ReferentToken rt00 = li[0].Kit.ProcessReferent("PERSONPROPERTY", li[0].BeginToken.Previous);
                            if (rt00 != null)
                            {
                                ok = true;
                            }
                        }
                        if (noun.TerminItem != null && noun.TerminItem.IsStrong && newName.IsAdjective)
                        {
                            ok = true;
                        }
                        if (noun.IsDoubt && adjList.Count == 0 && geoBefore == null)
                        {
                            return(null);
                        }
                        name = Pullenti.Ner.Core.ProperNameHelper.GetNameEx(newName.BeginToken, newName.EndToken, Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphCase.Undefined, noun.TerminItem.Gender, false, false);
                        if (!ok && !attachAlways)
                        {
                            if (Pullenti.Ner.Core.MiscHelper.IsExistsInDictionary(newName.BeginToken, newName.EndToken, Pullenti.Morph.MorphClass.Adjective | Pullenti.Morph.MorphClass.Pronoun | Pullenti.Morph.MorphClass.Verb))
                            {
                                if (exists != null)
                                {
                                    foreach (Pullenti.Ner.Geo.GeoReferent e in exists)
                                    {
                                        if (e.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_NAME, name, true) != null)
                                        {
                                            ok = true;
                                            break;
                                        }
                                    }
                                }
                                if (!ok)
                                {
                                    return(null);
                                }
                            }
                        }
                        fullName = string.Format("{0} {1}", Pullenti.Ner.Core.ProperNameHelper.GetNameEx(li[0].BeginToken, noun.BeginToken.Previous, Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphCase.Undefined, noun.TerminItem.Gender, false, false), noun.TerminItem.CanonicText);
                    }
                }
                else
                {
                    if (!attachAlways || ((noun.TerminItem != null && noun.TerminItem.CanonicText == "ФЕДЕРАЦИЯ")))
                    {
                        bool isLatin = noun.Chars.IsLatinLetter && newName.Chars.IsLatinLetter;
                        if (li.IndexOf(noun) > li.IndexOf(newName))
                        {
                            if (!isLatin)
                            {
                                return(null);
                            }
                        }
                        if (!newName.IsDistrictName && !Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(newName.BeginToken, false, false))
                        {
                            if (adjList.Count == 0 && Pullenti.Ner.Core.MiscHelper.IsExistsInDictionary(newName.BeginToken, newName.EndToken, Pullenti.Morph.MorphClass.Noun | Pullenti.Morph.MorphClass.Pronoun))
                            {
                                if (li.Count == 2 && noun.IsCityRegion && (noun.WhitespacesAfterCount < 2))
                                {
                                }
                                else
                                {
                                    return(null);
                                }
                            }
                            if (!isLatin)
                            {
                                if ((noun.TerminItem.IsRegion && !attachAlways && ((!adjTerrBefore || newName.IsDoubt))) && !noun.IsCityRegion && !noun.TerminItem.IsSpecificPrefix)
                                {
                                    if (!MiscLocationHelper.CheckGeoObjectBefore(noun.BeginToken))
                                    {
                                        if (!noun.IsDoubt && noun.BeginToken != noun.EndToken)
                                        {
                                        }
                                        else if ((noun.TerminItem.IsAlwaysPrefix && li.Count == 2 && li[0] == noun) && li[1] == newName)
                                        {
                                        }
                                        else
                                        {
                                            return(null);
                                        }
                                    }
                                }
                                if (noun.IsDoubt && adjList.Count == 0)
                                {
                                    if (noun.TerminItem.Acronym == "МО" || noun.TerminItem.Acronym == "ЛО")
                                    {
                                        if (k == (li.Count - 1) && li[k].TerminItem != null)
                                        {
                                            addNoun = li[k];
                                            k++;
                                        }
                                        else if (li.Count == 2 && noun == li[0] && newName.ToString().EndsWith("совет"))
                                        {
                                        }
                                        else
                                        {
                                            return(null);
                                        }
                                    }
                                    else
                                    {
                                        return(null);
                                    }
                                }
                                Pullenti.Ner.ReferentToken pers = newName.Kit.ProcessReferent("PERSON", newName.BeginToken);
                                if (pers != null)
                                {
                                    return(null);
                                }
                            }
                        }
                    }
                    name = Pullenti.Ner.Core.MiscHelper.GetTextValue(newName.BeginToken, newName.EndToken, Pullenti.Ner.Core.GetTextAttr.No);
                    if (newName.BeginToken != newName.EndToken)
                    {
                        for (Pullenti.Ner.Token ttt = newName.BeginToken.Next; ttt != null && ttt.EndChar <= newName.EndChar; ttt = ttt.Next)
                        {
                            if (ttt.Chars.IsLetter)
                            {
                                TerrItemToken ty = TerrItemToken.TryParse(ttt, null, false, false, null);
                                if ((ty != null && ty.TerminItem != null && noun != null) && ((ty.TerminItem.CanonicText.Contains(noun.TerminItem.CanonicText) || noun.TerminItem.CanonicText.Contains(ty.TerminItem.CanonicText))))
                                {
                                    name = Pullenti.Ner.Core.MiscHelper.GetTextValue(newName.BeginToken, ttt.Previous, Pullenti.Ner.Core.GetTextAttr.No);
                                    break;
                                }
                            }
                        }
                    }
                    if (adjList.Count > 0)
                    {
                        Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(adjList[0].BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                        if (npt != null && npt.EndToken == noun.EndToken)
                        {
                            altName = string.Format("{0} {1}", npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false), name);
                        }
                    }
                }
            }
            else
            {
                if ((li.Count == 1 && noun != null && noun.EndToken.Next != null) && (noun.EndToken.Next.GetReferent() is Pullenti.Ner.Geo.GeoReferent))
                {
                    Pullenti.Ner.Geo.GeoReferent g = noun.EndToken.Next.GetReferent() as Pullenti.Ner.Geo.GeoReferent;
                    if (noun.TerminItem != null)
                    {
                        string tyy = noun.TerminItem.CanonicText.ToLower();
                        bool   ooo = false;
                        if (g.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, tyy, true) != null)
                        {
                            ooo = true;
                        }
                        else if (tyy.EndsWith("район") && g.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, "район", true) != null)
                        {
                            ooo = true;
                        }
                        if (ooo)
                        {
                            return new Pullenti.Ner.ReferentToken(g, noun.BeginToken, noun.EndToken.Next)
                                   {
                                       Morph = noun.BeginToken.Morph
                                   }
                        }
                        ;
                    }
                }
                if ((li.Count == 1 && noun == li[0] && li[0].TerminItem != null) && TerrItemToken.TryParse(li[0].EndToken.Next, null, true, false, null) == null && TerrItemToken.TryParse(li[0].BeginToken.Previous, null, true, false, null) == null)
                {
                    if (li[0].Morph.Number == Pullenti.Morph.MorphNumber.Plural)
                    {
                        return(null);
                    }
                    int    cou = 0;
                    string str = li[0].TerminItem.CanonicText.ToLower();
                    for (Pullenti.Ner.Token tt = li[0].BeginToken.Previous; tt != null; tt = tt.Previous)
                    {
                        if (tt.IsNewlineAfter)
                        {
                            cou += 10;
                        }
                        else
                        {
                            cou++;
                        }
                        if (cou > 500)
                        {
                            break;
                        }
                        Pullenti.Ner.Geo.GeoReferent g = tt.GetReferent() as Pullenti.Ner.Geo.GeoReferent;
                        if (g == null)
                        {
                            continue;
                        }
                        bool ok = true;
                        cou = 0;
                        for (tt = li[0].EndToken.Next; tt != null; tt = tt.Next)
                        {
                            if (tt.IsNewlineBefore)
                            {
                                cou += 10;
                            }
                            else
                            {
                                cou++;
                            }
                            if (cou > 500)
                            {
                                break;
                            }
                            TerrItemToken tee = TerrItemToken.TryParse(tt, null, true, false, null);
                            if (tee == null)
                            {
                                continue;
                            }
                            ok = false;
                            break;
                        }
                        if (ok)
                        {
                            for (int ii = 0; g != null && (ii < 3); g = g.Higher, ii++)
                            {
                                if (g.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, str, true) != null)
                                {
                                    return new Pullenti.Ner.ReferentToken(g, li[0].BeginToken, li[0].EndToken)
                                           {
                                               Morph = noun.BeginToken.Morph
                                           }
                                }
                                ;
                            }
                        }
                        break;
                    }
                }
                return(null);
            }
            Pullenti.Ner.Geo.GeoReferent ter = null;
            if (exObj != null && (exObj.Tag is Pullenti.Ner.Geo.GeoReferent))
            {
                ter = exObj.Tag as Pullenti.Ner.Geo.GeoReferent;
            }
            else
            {
                ter = new Pullenti.Ner.Geo.GeoReferent();
                if (exObj != null)
                {
                    Pullenti.Ner.Geo.GeoReferent geo = exObj.OntoItem.Referent as Pullenti.Ner.Geo.GeoReferent;
                    if (geo != null && !geo.IsCity)
                    {
                        ter.MergeSlots2(geo, li[0].Kit.BaseLanguage);
                    }
                    else
                    {
                        ter.AddName(name);
                    }
                    if (noun == null && exObj.CanBeCity)
                    {
                        ter.AddTypCity(li[0].Kit.BaseLanguage);
                    }
                    else
                    {
                    }
                }
                else if (newName != null)
                {
                    ter.AddName(name);
                    if (altName != null)
                    {
                        ter.AddName(altName);
                    }
                }
                if (noun != null)
                {
                    if (noun.TerminItem.CanonicText == "АО")
                    {
                        ter.AddTyp((li[0].Kit.BaseLanguage.IsUa ? "АВТОНОМНИЙ ОКРУГ" : "АВТОНОМНЫЙ ОКРУГ"));
                    }
                    else if (noun.TerminItem.CanonicText == "МУНИЦИПАЛЬНОЕ СОБРАНИЕ" || noun.TerminItem.CanonicText == "МУНІЦИПАЛЬНЕ ЗБОРИ")
                    {
                        ter.AddTyp((li[0].Kit.BaseLanguage.IsUa ? "МУНІЦИПАЛЬНЕ УТВОРЕННЯ" : "МУНИЦИПАЛЬНОЕ ОБРАЗОВАНИЕ"));
                    }
                    else if (noun.TerminItem.Acronym == "МО" && addNoun != null)
                    {
                        ter.AddTyp(addNoun.TerminItem.CanonicText);
                    }
                    else
                    {
                        if (noun.TerminItem.CanonicText == "СОЮЗ" && exObj != null && exObj.EndChar > noun.EndChar)
                        {
                            return new Pullenti.Ner.ReferentToken(ter, exObj.BeginToken, exObj.EndToken)
                                   {
                                       Morph = exObj.Morph
                                   }
                        }
                        ;
                        ter.AddTyp(noun.TerminItem.CanonicText);
                        if (noun.TerminItem.IsRegion && ter.IsState)
                        {
                            ter.AddTypReg(li[0].Kit.BaseLanguage);
                        }
                    }
                }
                if (ter.IsState && ter.IsRegion)
                {
                    foreach (TerrItemToken a in adjList)
                    {
                        if (a.TerminItem.IsRegion)
                        {
                            ter.AddTypReg(li[0].Kit.BaseLanguage);
                            break;
                        }
                    }
                }
                if (ter.IsState)
                {
                    if (fullName != null)
                    {
                        ter.AddName(fullName);
                    }
                }
            }
            Pullenti.Ner.ReferentToken res = new Pullenti.Ner.ReferentToken(ter, li[0].BeginToken, li[k - 1].EndToken);
            if (noun != null && noun.Morph.Class.IsNoun)
            {
                res.Morph = noun.Morph;
            }
            else
            {
                res.Morph = new Pullenti.Ner.MorphCollection();
                for (int ii = 0; ii < k; ii++)
                {
                    foreach (Pullenti.Morph.MorphBaseInfo v in li[ii].Morph.Items)
                    {
                        Pullenti.Morph.MorphBaseInfo bi = new Pullenti.Morph.MorphBaseInfo();
                        bi.CopyFrom(v);
                        if (noun != null)
                        {
                            if (bi.Class.IsAdjective)
                            {
                                bi.Class = Pullenti.Morph.MorphClass.Noun;
                            }
                        }
                        res.Morph.AddItem(bi);
                    }
                }
            }
            if (li[0].TerminItem != null && li[0].TerminItem.IsSpecificPrefix)
            {
                res.BeginToken = li[0].EndToken.Next;
            }
            if (addNoun != null && addNoun.EndChar > res.EndChar)
            {
                res.EndToken = addNoun.EndToken;
            }
            if ((res.BeginToken.Previous is Pullenti.Ner.TextToken) && (res.WhitespacesBeforeCount < 2))
            {
                Pullenti.Ner.TextToken tt = res.BeginToken.Previous as Pullenti.Ner.TextToken;
                if (tt.Term == "АР")
                {
                    foreach (string ty in ter.Typs)
                    {
                        if (ty.Contains("республика") || ty.Contains("республіка"))
                        {
                            res.BeginToken = tt;
                            break;
                        }
                    }
                }
            }
            return(res);
        }
示例#18
0
        internal static TitlePageReferent _process(Pullenti.Ner.Token begin, int maxCharPos, Pullenti.Ner.Core.AnalysisKit kit, out Pullenti.Ner.Token endToken)
        {
            endToken = begin;
            TitlePageReferent res = new TitlePageReferent();

            Pullenti.Ner.Core.Termin term = null;
            List <Pullenti.Ner.Titlepage.Internal.Line> lines = Pullenti.Ner.Titlepage.Internal.Line.Parse(begin, 30, 1500, maxCharPos);

            if (lines.Count < 1)
            {
                return(null);
            }
            int cou = lines.Count;
            int minNewlinesCount = 10;
            Dictionary <int, int> linesCountStat = new Dictionary <int, int>();

            for (int i = 0; i < lines.Count; i++)
            {
                if (Pullenti.Ner.Titlepage.Internal.TitleNameToken.CanBeStartOfTextOrContent(lines[i].BeginToken, lines[i].EndToken))
                {
                    cou = i;
                    break;
                }
                int j = lines[i].NewlinesBeforeCount;
                if (i > 0 && j > 0)
                {
                    if (!linesCountStat.ContainsKey(j))
                    {
                        linesCountStat.Add(j, 1);
                    }
                    else
                    {
                        linesCountStat[j]++;
                    }
                }
            }
            int max = 0;

            foreach (KeyValuePair <int, int> kp in linesCountStat)
            {
                if (kp.Value > max)
                {
                    max = kp.Value;
                    minNewlinesCount = kp.Key;
                }
            }
            int endChar = (cou > 0 ? lines[cou - 1].EndChar : 0);

            if (maxCharPos > 0 && endChar > maxCharPos)
            {
                endChar = maxCharPos;
            }
            List <Pullenti.Ner.Titlepage.Internal.TitleNameToken> names = new List <Pullenti.Ner.Titlepage.Internal.TitleNameToken>();

            for (int i = 0; i < cou; i++)
            {
                if (i == 6)
                {
                }
                for (int j = i; (j < cou) && (j < (i + 5)); j++)
                {
                    if (i == 6 && j == 8)
                    {
                    }
                    if (j > i)
                    {
                        if (lines[j - 1].IsPureEn && lines[j].IsPureRu)
                        {
                            break;
                        }
                        if (lines[j - 1].IsPureRu && lines[j].IsPureEn)
                        {
                            break;
                        }
                        if (lines[j].NewlinesBeforeCount >= (minNewlinesCount * 2))
                        {
                            break;
                        }
                    }
                    Pullenti.Ner.Titlepage.Internal.TitleNameToken ttt = Pullenti.Ner.Titlepage.Internal.TitleNameToken.TryParse(lines[i].BeginToken, lines[j].EndToken, minNewlinesCount);
                    if (ttt != null)
                    {
                        if (lines[i].IsPureEn)
                        {
                            ttt.Morph.Language = Pullenti.Morph.MorphLang.EN;
                        }
                        else if (lines[i].IsPureRu)
                        {
                            ttt.Morph.Language = Pullenti.Morph.MorphLang.RU;
                        }
                        names.Add(ttt);
                    }
                }
            }
            Pullenti.Ner.Titlepage.Internal.TitleNameToken.Sort(names);
            Pullenti.Ner.ReferentToken nameRt = null;
            if (names.Count > 0)
            {
                int i0 = 0;
                if (names[i0].Morph.Language.IsEn)
                {
                    for (int ii = 1; ii < names.Count; ii++)
                    {
                        if (names[ii].Morph.Language.IsRu && names[ii].Rank > 0)
                        {
                            i0 = ii;
                            break;
                        }
                    }
                }
                term = res.AddName(names[i0].BeginNameToken, names[i0].EndNameToken);
                if (names[i0].TypeValue != null)
                {
                    res.AddType(names[i0].TypeValue);
                }
                if (names[i0].Speciality != null)
                {
                    res.Speciality = names[i0].Speciality;
                }
                Pullenti.Ner.ReferentToken rt = new Pullenti.Ner.ReferentToken(res, names[i0].BeginToken, names[i0].EndToken);
                if (kit != null)
                {
                    kit.EmbedToken(rt);
                }
                else
                {
                    res.AddOccurence(new Pullenti.Ner.TextAnnotation(rt.BeginToken, rt.EndToken));
                }
                endToken = rt.EndToken;
                nameRt   = rt;
                if (begin.BeginChar == rt.BeginChar)
                {
                    begin = rt;
                }
            }
            if (term != null && kit != null)
            {
                for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
                {
                    Pullenti.Ner.Core.TerminToken tok = term.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
                    if (tok == null)
                    {
                        continue;
                    }
                    Pullenti.Ner.Token t0 = t;
                    Pullenti.Ner.Token t1 = tok.EndToken;
                    if (t1.Next != null && t1.Next.IsChar('.'))
                    {
                        t1 = t1.Next;
                    }
                    if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(t0.Previous, false, false) && Pullenti.Ner.Core.BracketHelper.CanBeEndOfSequence(t1.Next, false, null, false))
                    {
                        t0 = t0.Previous;
                        t1 = t1.Next;
                    }
                    Pullenti.Ner.ReferentToken rt = new Pullenti.Ner.ReferentToken(res, t0, t1);
                    kit.EmbedToken(rt);
                    t = rt;
                }
            }
            Pullenti.Ner.Titlepage.Internal.PersonRelations             pr        = new Pullenti.Ner.Titlepage.Internal.PersonRelations();
            Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types        persTyp   = Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined;
            List <Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types> persTypes = pr.RelTypes;

            for (Pullenti.Ner.Token t = begin; t != null; t = t.Next)
            {
                if (maxCharPos > 0 && t.BeginChar > maxCharPos)
                {
                    break;
                }
                if (t == nameRt)
                {
                    continue;
                }
                Pullenti.Ner.Titlepage.Internal.TitleItemToken tpt = Pullenti.Ner.Titlepage.Internal.TitleItemToken.TryAttach(t);
                if (tpt != null)
                {
                    persTyp = Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined;
                    if (tpt.Typ == Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Typ)
                    {
                        if (res.Types.Count == 0)
                        {
                            res.AddType(tpt.Value);
                        }
                        else if (res.Types.Count == 1)
                        {
                            string ty = res.Types[0].ToUpper();
                            if (ty == "РЕФЕРАТ")
                            {
                                res.AddType(tpt.Value);
                            }
                            else if (ty == "АВТОРЕФЕРАТ")
                            {
                                if (tpt.Value == "КАНДИДАТСКАЯ ДИССЕРТАЦИЯ")
                                {
                                    res.AddSlot(TitlePageReferent.ATTR_TYPE, "автореферат кандидатской диссертации", true, 0);
                                }
                                else if (tpt.Value == "ДОКТОРСКАЯ ДИССЕРТАЦИЯ")
                                {
                                    res.AddSlot(TitlePageReferent.ATTR_TYPE, "автореферат докторской диссертации", true, 0);
                                }
                                else if (tpt.Value == "МАГИСТЕРСКАЯ ДИССЕРТАЦИЯ")
                                {
                                    res.AddSlot(TitlePageReferent.ATTR_TYPE, "автореферат магистерской диссертации", true, 0);
                                }
                                else if (tpt.Value == "КАНДИДАТСЬКА ДИСЕРТАЦІЯ")
                                {
                                    res.AddSlot(TitlePageReferent.ATTR_TYPE, "автореферат кандидатської дисертації", true, 0);
                                }
                                else if (tpt.Value == "ДОКТОРСЬКА ДИСЕРТАЦІЯ")
                                {
                                    res.AddSlot(TitlePageReferent.ATTR_TYPE, "автореферат докторської дисертації", true, 0);
                                }
                                else if (tpt.Value == "МАГІСТЕРСЬКА ДИСЕРТАЦІЯ")
                                {
                                    res.AddSlot(TitlePageReferent.ATTR_TYPE, "автореферат магістерської дисертації", true, 0);
                                }
                                else
                                {
                                    res.AddType(tpt.Value);
                                }
                            }
                            else if (tpt.Value == "РЕФЕРАТ" || tpt.Value == "АВТОРЕФЕРАТ")
                            {
                                if (!ty.Contains(tpt.Value))
                                {
                                    res.AddType(tpt.Value);
                                }
                            }
                        }
                    }
                    else if (tpt.Typ == Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Speciality)
                    {
                        if (res.Speciality == null)
                        {
                            res.Speciality = tpt.Value;
                        }
                    }
                    else if (persTypes.Contains(tpt.Typ))
                    {
                        persTyp = tpt.Typ;
                    }
                    t = tpt.EndToken;
                    if (t.EndChar > endToken.EndChar)
                    {
                        endToken = t;
                    }
                    if (t.Next != null && t.Next.IsCharOf(":-"))
                    {
                        t = t.Next;
                    }
                    continue;
                }
                if (t.EndChar > endChar)
                {
                    break;
                }
                List <Pullenti.Ner.Referent> rli = t.GetReferents();
                if (rli == null)
                {
                    continue;
                }
                if (!t.IsNewlineBefore && (t.Previous is Pullenti.Ner.TextToken))
                {
                    string s = (t.Previous as Pullenti.Ner.TextToken).Term;
                    if (s == "ИМЕНИ" || s == "ИМ")
                    {
                        continue;
                    }
                    if (s == "." && t.Previous.Previous != null && t.Previous.Previous.IsValue("ИМ", null))
                    {
                        continue;
                    }
                }
                foreach (Pullenti.Ner.Referent r in rli)
                {
                    if (r is Pullenti.Ner.Person.PersonReferent)
                    {
                        if (r != rli[0])
                        {
                            continue;
                        }
                        Pullenti.Ner.Person.PersonReferent p = r as Pullenti.Ner.Person.PersonReferent;
                        if (persTyp != Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined)
                        {
                            if (t.Previous != null && t.Previous.IsChar('.'))
                            {
                                persTyp = Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined;
                            }
                        }
                        Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types typ = pr.CalcTypFromAttrs(p);
                        if (typ != Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined)
                        {
                            pr.Add(p, typ, 1);
                            persTyp = typ;
                        }
                        else if (persTyp != Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined)
                        {
                            pr.Add(p, persTyp, 1);
                        }
                        else if (t.Previous != null && t.Previous.IsChar('©'))
                        {
                            persTyp = Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Worker;
                            pr.Add(p, persTyp, 1);
                        }
                        else
                        {
                            for (Pullenti.Ner.Token tt = t.Next; tt != null; tt = tt.Next)
                            {
                                Pullenti.Ner.Referent rr = tt.GetReferent();
                                if (rr == res)
                                {
                                    persTyp = Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Worker;
                                    break;
                                }
                                if (rr is Pullenti.Ner.Person.PersonReferent)
                                {
                                    if (pr.CalcTypFromAttrs(r as Pullenti.Ner.Person.PersonReferent) != Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined)
                                    {
                                        break;
                                    }
                                    else
                                    {
                                        continue;
                                    }
                                }
                                if (rr != null)
                                {
                                    break;
                                }
                                tpt = Pullenti.Ner.Titlepage.Internal.TitleItemToken.TryAttach(tt);
                                if (tpt != null)
                                {
                                    if (tpt.Typ != Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Typ && tpt.Typ != Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.TypAndTheme)
                                    {
                                        break;
                                    }
                                    tt = tpt.EndToken;
                                    if (tt.EndChar > endToken.EndChar)
                                    {
                                        endToken = tt;
                                    }
                                    continue;
                                }
                            }
                            if (persTyp == Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined)
                            {
                                for (Pullenti.Ner.Token tt = t.Previous; tt != null; tt = tt.Previous)
                                {
                                    Pullenti.Ner.Referent rr = tt.GetReferent();
                                    if (rr == res)
                                    {
                                        persTyp = Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Worker;
                                        break;
                                    }
                                    if (rr != null)
                                    {
                                        break;
                                    }
                                    if ((tt.IsValue("СТУДЕНТ", null) || tt.IsValue("СТУДЕНТКА", null) || tt.IsValue("СЛУШАТЕЛЬ", null)) || tt.IsValue("ДИПЛОМНИК", null) || tt.IsValue("ИСПОЛНИТЕЛЬ", null))
                                    {
                                        persTyp = Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Worker;
                                        break;
                                    }
                                    tpt = Pullenti.Ner.Titlepage.Internal.TitleItemToken.TryAttach(tt);
                                    if (tpt != null && tpt.Typ != Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Typ)
                                    {
                                        break;
                                    }
                                }
                            }
                            if (persTyp != Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined)
                            {
                                pr.Add(p, persTyp, 1);
                            }
                            else
                            {
                                pr.Add(p, persTyp, (float)0.5);
                            }
                            if (t.EndChar > endToken.EndChar)
                            {
                                endToken = t;
                            }
                        }
                        continue;
                    }
                    if (r == rli[0])
                    {
                        persTyp = Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined;
                    }
                    if (r is Pullenti.Ner.Date.DateReferent)
                    {
                        if (res.Date == null)
                        {
                            res.Date = r as Pullenti.Ner.Date.DateReferent;
                            if (t.EndChar > endToken.EndChar)
                            {
                                endToken = t;
                            }
                        }
                    }
                    else if (r is Pullenti.Ner.Geo.GeoReferent)
                    {
                        if (res.City == null && (r as Pullenti.Ner.Geo.GeoReferent).IsCity)
                        {
                            res.City = r as Pullenti.Ner.Geo.GeoReferent;
                            if (t.EndChar > endToken.EndChar)
                            {
                                endToken = t;
                            }
                        }
                    }
                    if (r is Pullenti.Ner.Org.OrganizationReferent)
                    {
                        Pullenti.Ner.Org.OrganizationReferent org = r as Pullenti.Ner.Org.OrganizationReferent;
                        if (org.Types.Contains("курс") && org.Number != null)
                        {
                            int i;
                            if (int.TryParse(org.Number, out i))
                            {
                                if (i > 0 && (i < 8))
                                {
                                    res.StudentYear = i;
                                }
                            }
                        }
                        for (; org.Higher != null; org = org.Higher)
                        {
                            if (org.Kind != Pullenti.Ner.Org.OrganizationKind.Department)
                            {
                                break;
                            }
                        }
                        if (org.Kind != Pullenti.Ner.Org.OrganizationKind.Department)
                        {
                            if (res.Org == null)
                            {
                                res.Org = org;
                            }
                            else if (Pullenti.Ner.Org.OrganizationReferent.CanBeHigher(res.Org, org))
                            {
                                res.Org = org;
                            }
                        }
                        if (t.EndChar > endToken.EndChar)
                        {
                            endToken = t;
                        }
                    }
                    if ((r is Pullenti.Ner.Uri.UriReferent) || (r is Pullenti.Ner.Geo.GeoReferent))
                    {
                        if (t.EndChar > endToken.EndChar)
                        {
                            endToken = t;
                        }
                    }
                }
            }
            foreach (Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types ty in persTypes)
            {
                foreach (Pullenti.Ner.Person.PersonReferent p in pr.GetPersons(ty))
                {
                    if (pr.GetAttrNameForType(ty) != null)
                    {
                        res.AddSlot(pr.GetAttrNameForType(ty), p, false, 0);
                    }
                }
            }
            if (res.GetSlotValue(TitlePageReferent.ATTR_AUTHOR) == null)
            {
                foreach (Pullenti.Ner.Person.PersonReferent p in pr.GetPersons(Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined))
                {
                    res.AddSlot(TitlePageReferent.ATTR_AUTHOR, p, false, 0);
                    break;
                }
            }
            if (res.City == null && res.Org != null)
            {
                Pullenti.Ner.Slot s = res.Org.FindSlot(Pullenti.Ner.Org.OrganizationReferent.ATTR_GEO, null, true);
                if (s != null && (s.Value is Pullenti.Ner.Geo.GeoReferent))
                {
                    if ((s.Value as Pullenti.Ner.Geo.GeoReferent).IsCity)
                    {
                        res.City = s.Value as Pullenti.Ner.Geo.GeoReferent;
                    }
                }
            }
            if (res.Date == null)
            {
                for (Pullenti.Ner.Token t = begin; t != null && t.EndChar <= endChar; t = t.Next)
                {
                    Pullenti.Ner.Geo.GeoReferent city = t.GetReferent() as Pullenti.Ner.Geo.GeoReferent;
                    if (city == null)
                    {
                        continue;
                    }
                    if (t.Next is Pullenti.Ner.TextToken)
                    {
                        if (t.Next.IsCharOf(":,") || t.Next.IsHiphen)
                        {
                            t = t.Next;
                        }
                    }
                    Pullenti.Ner.ReferentToken rt = t.Kit.ProcessReferent(Pullenti.Ner.Date.DateAnalyzer.ANALYZER_NAME, t.Next);
                    if (rt != null)
                    {
                        rt.SaveToLocalOntology();
                        res.Date = rt.Referent as Pullenti.Ner.Date.DateReferent;
                        if (kit != null)
                        {
                            kit.EmbedToken(rt);
                        }
                        break;
                    }
                }
            }
            if (res.Slots.Count == 0)
            {
                return(null);
            }
            else
            {
                return(res);
            }
        }
示例#19
0
        static Pullenti.Ner.ReferentToken _tryNameExist(List <CityItemToken> li, out Pullenti.Ner.Core.IntOntologyItem oi, bool always)
        {
            oi = null;
            if (li == null || li[0].Typ != CityItemToken.ItemType.City)
            {
                return(null);
            }
            oi = li[0].OntoItem;
            Pullenti.Ner.TextToken tt = li[0].BeginToken as Pullenti.Ner.TextToken;
            if (tt == null)
            {
                return(null);
            }
            bool   ok  = false;
            string nam = (oi == null ? li[0].Value : oi.CanonicText);

            if (nam == null)
            {
                return(null);
            }
            if (nam == "РИМ")
            {
                if (tt.Term == "РИМ")
                {
                    if ((tt.Next is Pullenti.Ner.TextToken) && tt.Next.GetMorphClassInDictionary().IsProperSecname)
                    {
                    }
                    else
                    {
                        ok = true;
                    }
                }
                else if (tt.Previous != null && tt.Previous.IsValue("В", null) && tt.Term == "РИМЕ")
                {
                    ok = true;
                }
            }
            else if (oi != null && oi.Referent != null && oi.Owner.IsExtOntology)
            {
                ok = true;
            }
            else if (nam.EndsWith("ГРАД") || nam.EndsWith("СК"))
            {
                ok = true;
            }
            else if (nam.EndsWith("TOWN") || nam.StartsWith("SAN"))
            {
                ok = true;
            }
            else if (li[0].Chars.IsLatinLetter && li[0].BeginToken.Previous != null && ((li[0].BeginToken.Previous.IsValue("IN", null) || li[0].BeginToken.Previous.IsValue("FROM", null))))
            {
                ok = true;
            }
            else
            {
                for (Pullenti.Ner.Token tt2 = li[0].EndToken.Next; tt2 != null; tt2 = tt2.Next)
                {
                    if (tt2.IsNewlineBefore)
                    {
                        break;
                    }
                    if ((tt2.IsCharOf(",(") || tt2.Morph.Class.IsPreposition || tt2.Morph.Class.IsConjunction) || tt2.Morph.Class.IsMisc)
                    {
                        continue;
                    }
                    if ((tt2.GetReferent() is Pullenti.Ner.Geo.GeoReferent) && tt2.Chars.IsCyrillicLetter == li[0].Chars.IsCyrillicLetter)
                    {
                        ok = true;
                    }
                    break;
                }
                if (!ok)
                {
                    for (Pullenti.Ner.Token tt2 = li[0].BeginToken.Previous; tt2 != null; tt2 = tt2.Previous)
                    {
                        if (tt2.IsNewlineAfter)
                        {
                            break;
                        }
                        if ((tt2.IsCharOf(",)") || tt2.Morph.Class.IsPreposition || tt2.Morph.Class.IsConjunction) || tt2.Morph.Class.IsMisc)
                        {
                            continue;
                        }
                        if ((tt2.GetReferent() is Pullenti.Ner.Geo.GeoReferent) && tt2.Chars.IsCyrillicLetter == li[0].Chars.IsCyrillicLetter)
                        {
                            ok = true;
                        }
                        if (ok)
                        {
                            List <Pullenti.Ner.Address.Internal.StreetItemToken> sits = Pullenti.Ner.Address.Internal.StreetItemToken.TryParseList(li[0].BeginToken, null, 10);
                            if (sits != null && sits.Count > 1)
                            {
                                Pullenti.Ner.Address.Internal.AddressItemToken ss = Pullenti.Ner.Address.Internal.StreetDefineHelper.TryParseStreet(sits, false, false);
                                if (ss != null)
                                {
                                    sits.RemoveAt(0);
                                    if (Pullenti.Ner.Address.Internal.StreetDefineHelper.TryParseStreet(sits, false, false) == null)
                                    {
                                        ok = false;
                                    }
                                }
                            }
                        }
                        if (ok)
                        {
                            if (li.Count > 1 && li[1].Typ == CityItemToken.ItemType.ProperName && (li[1].WhitespacesBeforeCount < 3))
                            {
                                ok = false;
                            }
                            else
                            {
                                Pullenti.Morph.MorphClass mc = li[0].BeginToken.GetMorphClassInDictionary();
                                if (mc.IsProperName || mc.IsProperSurname || mc.IsAdjective)
                                {
                                    ok = false;
                                }
                                else
                                {
                                    Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(li[0].BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                                    if (npt != null && npt.EndChar > li[0].EndChar)
                                    {
                                        ok = false;
                                    }
                                }
                            }
                        }
                        if (Pullenti.Ner.Address.Internal.AddressItemToken.TryAttachOrg(li[0].BeginToken) != null)
                        {
                            ok = false;
                            break;
                        }
                        break;
                    }
                }
            }
            if (always)
            {
                if (li[0].WhitespacesBeforeCount > 3 && li[0].Doubtful && li[0].BeginToken.GetMorphClassInDictionary().IsProperSurname)
                {
                    Pullenti.Ner.ReferentToken pp = li[0].Kit.ProcessReferent("PERSON", li[0].BeginToken);
                    if (pp != null)
                    {
                        always = false;
                    }
                }
            }
            if (li[0].BeginToken.Chars.IsLatinLetter && li[0].BeginToken == li[0].EndToken)
            {
                Pullenti.Ner.Token tt1 = li[0].EndToken.Next;
                if (tt1 != null && tt1.IsChar(','))
                {
                    tt1 = tt1.Next;
                }
                if (((tt1 is Pullenti.Ner.TextToken) && tt1.Chars.IsLatinLetter && (tt1.LengthChar < 3)) && !tt1.Chars.IsAllLower)
                {
                    ok = false;
                }
            }
            if (!ok && !always)
            {
                return(null);
            }
            Pullenti.Ner.Geo.GeoReferent city = null;
            if (oi != null && (oi.Referent is Pullenti.Ner.Geo.GeoReferent) && !oi.Owner.IsExtOntology)
            {
                city = oi.Referent.Clone() as Pullenti.Ner.Geo.GeoReferent;
                city.Occurrence.Clear();
            }
            else
            {
                city = new Pullenti.Ner.Geo.GeoReferent();
                city.AddName(nam);
                if (oi != null && (oi.Referent is Pullenti.Ner.Geo.GeoReferent))
                {
                    city.MergeSlots2(oi.Referent as Pullenti.Ner.Geo.GeoReferent, li[0].Kit.BaseLanguage);
                }
                if (!city.IsCity)
                {
                    city.AddTypCity(li[0].Kit.BaseLanguage);
                }
            }
            return(new Pullenti.Ner.ReferentToken(city, li[0].BeginToken, li[0].EndToken)
            {
                Morph = li[0].Morph
            });
        }
示例#20
0
        static Pullenti.Ner.ReferentToken _tryNounName(List <CityItemToken> li, out Pullenti.Ner.Core.IntOntologyItem oi, bool always)
        {
            oi = null;
            if (li == null || (li.Count < 2) || ((li[0].Typ != CityItemToken.ItemType.Noun && li[0].Typ != CityItemToken.ItemType.Misc)))
            {
                return(null);
            }
            bool ok = !li[0].Doubtful;

            if (ok && li[0].Typ == CityItemToken.ItemType.Misc)
            {
                ok = false;
            }
            string typ     = (li[0].Typ == CityItemToken.ItemType.Misc ? null : li[0].Value);
            string typ2    = (li[0].Typ == CityItemToken.ItemType.Misc ? null : li[0].AltValue);
            string probAdj = null;
            int    i1      = 1;

            Pullenti.Ner.Referent org = null;
            if ((typ != null && li[i1].Typ == CityItemToken.ItemType.Noun && ((i1 + 1) < li.Count)) && li[0].WhitespacesAfterCount <= 1 && (((Pullenti.Morph.LanguageHelper.EndsWith(typ, "ПОСЕЛОК") || Pullenti.Morph.LanguageHelper.EndsWith(typ, "СЕЛИЩЕ") || typ == "ДЕРЕВНЯ") || typ == "СЕЛО")))
            {
                if (li[i1].BeginToken == li[i1].EndToken)
                {
                    Pullenti.Ner.Address.Internal.AddressItemToken ooo = Pullenti.Ner.Address.Internal.AddressItemToken.TryAttachOrg(li[i1].BeginToken);
                    if (ooo != null && ooo.RefToken != null)
                    {
                        return(null);
                    }
                }
                typ2 = li[i1].Value;
                if (typ2 == "СТАНЦИЯ" && li[i1].BeginToken.IsValue("СТ", null) && ((i1 + 1) < li.Count))
                {
                    Pullenti.Ner.MorphCollection m = li[i1 + 1].Morph;
                    if (m.Number == Pullenti.Morph.MorphNumber.Plural)
                    {
                        probAdj = "СТАРЫЕ";
                    }
                    else if (m.Gender == Pullenti.Morph.MorphGender.Feminie)
                    {
                        probAdj = "СТАРАЯ";
                    }
                    else if (m.Gender == Pullenti.Morph.MorphGender.Masculine)
                    {
                        probAdj = "СТАРЫЙ";
                    }
                    else
                    {
                        probAdj = "СТАРОЕ";
                    }
                }
                i1++;
            }
            string name    = li[i1].Value ?? ((li[i1].OntoItem == null ? null : li[i1].OntoItem.CanonicText));
            string altName = li[i1].AltValue;

            if (name == null)
            {
                return(null);
            }
            Pullenti.Ner.MorphCollection mc = li[0].Morph;
            if (i1 == 1 && li[i1].Typ == CityItemToken.ItemType.City && ((li[0].Value == "ГОРОД" || li[0].Value == "МІСТО" || li[0].Typ == CityItemToken.ItemType.Misc)))
            {
                if (typ == null && ((i1 + 1) < li.Count) && li[i1 + 1].Typ == CityItemToken.ItemType.Noun)
                {
                    return(null);
                }
                oi = li[i1].OntoItem;
                if (oi != null)
                {
                    name = oi.CanonicText;
                }
                if (name.Length > 2 || oi.MiscAttr != null)
                {
                    if (!li[1].Doubtful || ((oi != null && oi.MiscAttr != null)))
                    {
                        ok = true;
                    }
                    else if (!ok && !li[1].IsNewlineBefore)
                    {
                        if (li[0].GeoObjectBefore || li[1].GeoObjectAfter)
                        {
                            ok = true;
                        }
                        else if (Pullenti.Ner.Address.Internal.StreetDefineHelper.CheckStreetAfter(li[1].EndToken.Next))
                        {
                            ok = true;
                        }
                        else if (li[1].EndToken.Next != null && (li[1].EndToken.Next.GetReferent() is Pullenti.Ner.Date.DateReferent))
                        {
                            ok = true;
                        }
                        else if ((li[1].WhitespacesBeforeCount < 2) && li[1].OntoItem != null)
                        {
                            if (li[1].IsNewlineAfter)
                            {
                                ok = true;
                            }
                            else
                            {
                                ok = true;
                            }
                        }
                    }
                    if (li[1].Doubtful && li[1].EndToken.Next != null && li[1].EndToken.Chars == li[1].EndToken.Next.Chars)
                    {
                        ok = false;
                    }
                    if (li[0].BeginToken.Previous != null && li[0].BeginToken.Previous.IsValue("В", null))
                    {
                        ok = true;
                    }
                }
                if (!ok)
                {
                    ok = CheckYearAfter(li[1].EndToken.Next);
                }
                if (!ok)
                {
                    ok = CheckCityAfter(li[1].EndToken.Next);
                }
            }
            else if ((li[i1].Typ == CityItemToken.ItemType.ProperName || li[i1].Typ == CityItemToken.ItemType.City))
            {
                if (((li[0].Value == "АДМИНИСТРАЦИЯ" || li[0].Value == "АДМІНІСТРАЦІЯ")) && i1 == 1)
                {
                    return(null);
                }
                if (li[i1].IsNewlineBefore)
                {
                    if (li.Count != 2)
                    {
                        return(null);
                    }
                }
                if (!li[0].Doubtful)
                {
                    ok = true;
                    if (name.Length < 2)
                    {
                        ok = false;
                    }
                    else if ((name.Length < 3) && li[0].Morph.Number != Pullenti.Morph.MorphNumber.Singular)
                    {
                        ok = false;
                    }
                    if (li[i1].Doubtful && !li[i1].GeoObjectAfter && !li[0].GeoObjectBefore)
                    {
                        if (li[i1].Morph.Case.IsGenitive)
                        {
                            if (li[i1].EndToken.Next == null || MiscLocationHelper.CheckGeoObjectAfter(li[i1].EndToken.Next, false) || Pullenti.Ner.Address.Internal.AddressItemToken.CheckHouseAfter(li[i1].EndToken.Next, false, true))
                            {
                            }
                            else if (li[0].BeginToken.Previous == null || MiscLocationHelper.CheckGeoObjectBefore(li[0].BeginToken))
                            {
                            }
                            else
                            {
                                ok = false;
                            }
                        }
                        if (ok)
                        {
                            Pullenti.Ner.ReferentToken rt0 = li[i1].Kit.ProcessReferent("PERSONPROPERTY", li[0].BeginToken.Previous);
                            if (rt0 != null)
                            {
                                Pullenti.Ner.ReferentToken rt1 = li[i1].Kit.ProcessReferent("PERSON", li[i1].BeginToken);
                                if (rt1 != null)
                                {
                                    ok = false;
                                }
                            }
                        }
                    }
                    Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(li[i1].BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                    if (npt != null)
                    {
                        if (npt.EndToken.EndChar > li[i1].EndChar && npt.Adjectives.Count > 0 && !npt.Adjectives[0].EndToken.Next.IsComma)
                        {
                            ok = false;
                        }
                        else if (TerrItemToken.m_UnknownRegions.TryParse(npt.EndToken, Pullenti.Ner.Core.TerminParseAttr.FullwordsOnly) != null)
                        {
                            bool ok1 = false;
                            if (li[0].BeginToken.Previous != null)
                            {
                                Pullenti.Ner.Token ttt = li[0].BeginToken.Previous;
                                if (ttt.IsComma && ttt.Previous != null)
                                {
                                    ttt = ttt.Previous;
                                }
                                Pullenti.Ner.Geo.GeoReferent geo = ttt.GetReferent() as Pullenti.Ner.Geo.GeoReferent;
                                if (geo != null && !geo.IsCity)
                                {
                                    ok1 = true;
                                }
                            }
                            if (npt.EndToken.Next != null)
                            {
                                Pullenti.Ner.Token ttt = npt.EndToken.Next;
                                if (ttt.IsComma && ttt.Next != null)
                                {
                                    ttt = ttt.Next;
                                }
                                Pullenti.Ner.Geo.GeoReferent geo = ttt.GetReferent() as Pullenti.Ner.Geo.GeoReferent;
                                if (geo != null && !geo.IsCity)
                                {
                                    ok1 = true;
                                }
                            }
                            if (!ok1)
                            {
                                return(null);
                            }
                        }
                    }
                    if (li[0].Value == "ПОРТ")
                    {
                        if (li[i1].Chars.IsAllUpper || li[i1].Chars.IsLatinLetter)
                        {
                            return(null);
                        }
                    }
                }
                else if (li[0].GeoObjectBefore)
                {
                    ok = true;
                }
                else if (li[i1].GeoObjectAfter && !li[i1].IsNewlineAfter)
                {
                    ok = true;
                }
                else
                {
                    ok = CheckYearAfter(li[i1].EndToken.Next);
                }
                if (!ok)
                {
                    ok = CheckStreetAfter(li[i1].EndToken.Next);
                }
                if (!ok && li[0].BeginToken.Previous != null && li[0].BeginToken.Previous.IsValue("В", null))
                {
                    ok = true;
                }
            }
            else
            {
                return(null);
            }
            if (!ok && !always)
            {
                if (MiscLocationHelper.CheckNearBefore(li[0].BeginToken.Previous) == null)
                {
                    return(null);
                }
            }
            if (li.Count > (i1 + 1))
            {
                li.RemoveRange(i1 + 1, li.Count - i1 - 1);
            }
            Pullenti.Ner.Geo.GeoReferent city = new Pullenti.Ner.Geo.GeoReferent();
            if (oi != null && oi.Referent != null)
            {
                city = oi.Referent.Clone() as Pullenti.Ner.Geo.GeoReferent;
                city.Occurrence.Clear();
            }
            if (!li[0].Morph.Case.IsUndefined && li[0].Morph.Gender != Pullenti.Morph.MorphGender.Undefined)
            {
                if (li[i1].EndToken.Morph.Class.IsAdjective && li[i1].BeginToken == li[i1].EndToken)
                {
                    string nam = Pullenti.Ner.Core.ProperNameHelper.GetNameEx(li[i1].BeginToken, li[i1].EndToken, Pullenti.Morph.MorphClass.Adjective, li[0].Morph.Case, li[0].Morph.Gender, false, false);
                    if (nam != null && nam != name)
                    {
                        name = nam;
                    }
                }
            }
            if (li[0].Morph.Case.IsNominative)
            {
                if (altName != null)
                {
                    city.AddName(altName);
                }
                altName = null;
            }
            city.AddName(name);
            if (probAdj != null)
            {
                city.AddName(probAdj + " " + name);
            }
            if (altName != null)
            {
                city.AddName(altName);
                if (probAdj != null)
                {
                    city.AddName(probAdj + " " + altName);
                }
            }
            if (typ != null)
            {
                city.AddTyp(typ);
            }
            else if (!city.IsCity)
            {
                city.AddTypCity(li[0].Kit.BaseLanguage);
            }
            if (typ2 != null)
            {
                city.AddTyp(typ2.ToLower());
            }
            if (li[0].HigherGeo != null && GeoOwnerHelper.CanBeHigher(li[0].HigherGeo, city))
            {
                city.Higher = li[0].HigherGeo;
            }
            if (li[0].Typ == CityItemToken.ItemType.Misc)
            {
                li.RemoveAt(0);
            }
            Pullenti.Ner.ReferentToken res = new Pullenti.Ner.ReferentToken(city, li[0].BeginToken, li[li.Count - 1].EndToken)
            {
                Morph = mc
            };
            if (res.EndToken.Next != null && res.EndToken.Next.IsHiphen && (res.EndToken.Next.Next is Pullenti.Ner.NumberToken))
            {
                Pullenti.Ner.NumberToken num = res.EndToken.Next.Next as Pullenti.Ner.NumberToken;
                if ((num.Typ == Pullenti.Ner.NumberSpellingType.Digit && !num.Morph.Class.IsAdjective && num.IntValue != null) && (num.IntValue.Value < 50))
                {
                    foreach (Pullenti.Ner.Slot s in city.Slots)
                    {
                        if (s.TypeName == Pullenti.Ner.Geo.GeoReferent.ATTR_NAME)
                        {
                            city.UploadSlot(s, string.Format("{0}-{1}", s.Value, num.Value));
                        }
                    }
                    res.EndToken = num;
                }
            }
            if (li[0].BeginToken == li[0].EndToken && li[0].BeginToken.IsValue("ГОРОДОК", null))
            {
                if (Pullenti.Ner.Address.Internal.AddressItemToken.CheckHouseAfter(res.EndToken.Next, true, false))
                {
                    return(null);
                }
            }
            return(res);
        }