Exemple #1
0
        public override Pullenti.Ner.Core.IntOntologyItem CreateOntologyItem()
        {
            Pullenti.Ner.Core.IntOntologyItem oi = new Pullenti.Ner.Core.IntOntologyItem(this);
            List <string> vars = new List <string>();

            foreach (Pullenti.Ner.Slot a in Slots)
            {
                if (a.TypeName == ATTR_NAME)
                {
                    string s = a.Value.ToString();
                    if (!vars.Contains(s))
                    {
                        vars.Add(s);
                    }
                }
            }
            if (Number != null)
            {
                foreach (string digs in this._allNumberDigits())
                {
                    if (!vars.Contains(digs))
                    {
                        vars.Add(digs);
                    }
                }
            }
            foreach (string v in vars)
            {
                oi.Termins.Add(new Pullenti.Ner.Core.Termin(v));
            }
            return(oi);
        }
Exemple #2
0
 public override Pullenti.Ner.Core.IntOntologyItem CreateOntologyItem()
 {
     Pullenti.Ner.Core.IntOntologyItem oi = new Pullenti.Ner.Core.IntOntologyItem(this);
     foreach (string v in NameVars)
     {
         oi.Termins.Add(new Pullenti.Ner.Core.Termin(v));
     }
     return(oi);
 }
Exemple #3
0
        public override Pullenti.Ner.Core.IntOntologyItem CreateOntologyItem()
        {
            Pullenti.Ner.Core.IntOntologyItem oi = new Pullenti.Ner.Core.IntOntologyItem(this);
            List <string> names = Names;

            foreach (string n in names)
            {
                oi.Termins.Add(new Pullenti.Ner.Core.Termin(n));
            }
            return(oi);
        }
Exemple #4
0
 public override Pullenti.Ner.Core.IntOntologyItem CreateOntologyItem()
 {
     Pullenti.Ner.Core.IntOntologyItem re = new Pullenti.Ner.Core.IntOntologyItem(this);
     foreach (Pullenti.Ner.Slot s in Slots)
     {
         if (s.TypeName == ATTR_ATTR)
         {
             re.Termins.Add(new Pullenti.Ner.Core.Termin(s.Value.ToString()));
         }
     }
     return(re);
 }
Exemple #5
0
 public override Pullenti.Ner.Core.IntOntologyItem CreateOntologyItem()
 {
     Pullenti.Ner.Core.IntOntologyItem oi = new Pullenti.Ner.Core.IntOntologyItem(this);
     foreach (Pullenti.Ner.Slot a in Slots)
     {
         if (a.TypeName == ATTR_NAME)
         {
             oi.Termins.Add(new Pullenti.Ner.Core.Termin(a.Value.ToString()));
         }
     }
     return(oi);
 }
Exemple #6
0
 public override Pullenti.Ner.Core.IntOntologyItem CreateOntologyItem()
 {
     Pullenti.Ner.Core.IntOntologyItem res = new Pullenti.Ner.Core.IntOntologyItem(this);
     foreach (Pullenti.Ner.Slot s in Slots)
     {
         if (s.TypeName == ATTR_NORMAL || s.TypeName == ATTR_VALUE)
         {
             res.Termins.Add(new Pullenti.Ner.Core.Termin((string)s.Value));
         }
     }
     return(res);
 }
Exemple #7
0
        public override Pullenti.Ner.Core.IntOntologyItem CreateOntologyItem()
        {
            Pullenti.Ner.Core.IntOntologyItem oi = new Pullenti.Ner.Core.IntOntologyItem(this);
            string tit = this._findShortestKingTitul(false);

            foreach (Pullenti.Ner.Slot a in Slots)
            {
                if (a.TypeName == ATTR_IDENTITY)
                {
                    oi.Termins.Add(new Pullenti.Ner.Core.Termin(a.Value.ToString())
                    {
                        IgnoreTermsOrder = true
                    });
                }
                else if (a.TypeName == ATTR_LASTNAME)
                {
                    Pullenti.Ner.Core.Termin t = new Pullenti.Ner.Core.Termin(a.Value.ToString());
                    if (t.Terms.Count > 20)
                    {
                    }
                    if (IsMale)
                    {
                        t.Gender = Pullenti.Morph.MorphGender.Masculine;
                    }
                    else if (IsFemale)
                    {
                        t.Gender = Pullenti.Morph.MorphGender.Feminie;
                    }
                    oi.Termins.Add(t);
                }
                else if (a.TypeName == ATTR_FIRSTNAME && tit != null)
                {
                    Pullenti.Ner.Core.Termin t = new Pullenti.Ner.Core.Termin(string.Format("{0} {1}", tit, a.Value.ToString()));
                    if (IsMale)
                    {
                        t.Gender = Pullenti.Morph.MorphGender.Masculine;
                    }
                    else if (IsFemale)
                    {
                        t.Gender = Pullenti.Morph.MorphGender.Feminie;
                    }
                    oi.Termins.Add(t);
                }
            }
            return(oi);
        }
Exemple #8
0
        public override Pullenti.Ner.Core.IntOntologyItem CreateOntologyItem()
        {
            bool isCity = IsCity;

            Pullenti.Ner.Core.IntOntologyItem oi = new Pullenti.Ner.Core.IntOntologyItem(this);
            foreach (Pullenti.Ner.Slot a in Slots)
            {
                if (a.TypeName == ATTR_NAME)
                {
                    string s = a.Value.ToString();
                    Pullenti.Ner.Core.Termin t = new Pullenti.Ner.Core.Termin();
                    t.InitByNormalText(s, null);
                    if (isCity)
                    {
                        t.AddStdAbridges();
                    }
                    oi.Termins.Add(t);
                }
            }
            return(oi);
        }
Exemple #9
0
        internal Pullenti.Ner.Core.IntOntologyItem _CreateOntologyItem(int minLen, bool onlyNames = false, bool pureNames = false)
        {
            Pullenti.Ner.Core.IntOntologyItem oi = new Pullenti.Ner.Core.IntOntologyItem(this);
            List <string> vars = new List <string>();
            List <string> typs = this.GetStringValues(ATTR_TYPE) ?? new List <string>();

            foreach (Pullenti.Ner.Slot a in Slots)
            {
                if (a.TypeName == ATTR_NAME)
                {
                    string s = a.Value.ToString().ToUpper();
                    if (!vars.Contains(s))
                    {
                        vars.Add(s);
                    }
                    if (!pureNames)
                    {
                        int sp = 0;
                        for (int jj = 0; jj < s.Length; jj++)
                        {
                            if (s[jj] == ' ')
                            {
                                sp++;
                            }
                        }
                        if (sp == 1)
                        {
                            s = s.Replace(" ", "");
                            if (!vars.Contains(s))
                            {
                                vars.Add(s);
                            }
                        }
                    }
                }
            }
            if (!onlyNames)
            {
                if (vars.Count == 0)
                {
                    foreach (string t in typs)
                    {
                        string up = t.ToUpper();
                        if (!vars.Contains(up))
                        {
                            vars.Add(up);
                        }
                    }
                }
            }
            int max = 20;
            int cou = 0;

            foreach (string v in vars)
            {
                if (v.Length >= minLen)
                {
                    oi.Termins.Add(new Pullenti.Ner.Core.Termin(v));
                    if ((++cou) >= max)
                    {
                        break;
                    }
                }
            }
            if (oi.Termins.Count == 0)
            {
                return(null);
            }
            return(oi);
        }
Exemple #10
0
        static Pullenti.Ner.ReferentToken Try1(List <CityItemToken> li, out Pullenti.Ner.Core.IntOntologyItem oi, Pullenti.Ner.Core.AnalyzerDataWithOntology ad)
        {
            oi = null;
            if (li == null || (li.Count < 1))
            {
                return(null);
            }
            else if (li[0].Typ != CityItemToken.ItemType.City)
            {
                if (li.Count != 2 || li[0].Typ != CityItemToken.ItemType.ProperName || li[1].Typ != CityItemToken.ItemType.Noun)
                {
                    return(null);
                }
            }
            int i = 1;

            oi = li[0].OntoItem;
            bool ok = !li[0].Doubtful;

            if ((ok && li[0].OntoItem != null && li[0].OntoItem.MiscAttr == null) && ad != null)
            {
                if (li[0].OntoItem.Owner != ad.LocalOntology && !li[0].OntoItem.Owner.IsExtOntology)
                {
                    if (li[0].BeginToken.Previous != null && li[0].BeginToken.Previous.IsValue("В", null))
                    {
                    }
                    else
                    {
                        ok = false;
                    }
                }
            }
            if (li.Count == 1 && li[0].BeginToken.Morph.Class.IsAdjective)
            {
                List <Pullenti.Ner.Address.Internal.StreetItemToken> sits = Pullenti.Ner.Address.Internal.StreetItemToken.TryParseList(li[0].BeginToken, null, 3);
                if (sits != null && sits.Count == 2 && sits[1].Typ == Pullenti.Ner.Address.Internal.StreetItemType.Noun)
                {
                    return(null);
                }
            }
            string typ    = null;
            string alttyp = null;

            Pullenti.Ner.MorphCollection mc = li[0].Morph;
            if (i < li.Count)
            {
                if (li[i].Typ == CityItemToken.ItemType.Noun)
                {
                    Pullenti.Ner.Address.Internal.AddressItemToken at = null;
                    if (!li[i].Chars.IsAllLower && (li[i].WhitespacesAfterCount < 2))
                    {
                        Pullenti.Ner.Address.Internal.StreetItemToken sit = Pullenti.Ner.Address.Internal.StreetItemToken.TryParse(li[i].EndToken.Next, null, false, null, false);
                        if (sit != null && sit.Typ == Pullenti.Ner.Address.Internal.StreetItemType.Noun)
                        {
                            at = Pullenti.Ner.Address.Internal.AddressItemToken.TryParse(li[i].BeginToken, null, false, false, null);
                            if (at != null)
                            {
                                Pullenti.Ner.Address.Internal.AddressItemToken at2 = Pullenti.Ner.Address.Internal.AddressItemToken.TryParse(li[i].EndToken.Next, null, false, false, null);
                                if (at2 != null && at2.Typ == Pullenti.Ner.Address.Internal.AddressItemToken.ItemType.Street)
                                {
                                    at = null;
                                }
                            }
                        }
                    }
                    if (at == null)
                    {
                        typ    = li[i].Value;
                        alttyp = li[i].AltValue;
                        if (li[i].BeginToken.IsValue("СТ", null) && li[i].BeginToken.Chars.IsAllUpper)
                        {
                            return(null);
                        }
                        if ((i + 1) == li.Count)
                        {
                            ok = true;
                            if (!li[i].Morph.Case.IsUndefined)
                            {
                                mc = li[i].Morph;
                            }
                            i++;
                        }
                        else if (ok)
                        {
                            i++;
                        }
                        else
                        {
                            Pullenti.Ner.Token tt0 = li[0].BeginToken.Previous;
                            if ((tt0 is Pullenti.Ner.TextToken) && (tt0.WhitespacesAfterCount < 3))
                            {
                                if (tt0.IsValue("МЭР", "МЕР") || tt0.IsValue("ГЛАВА", null) || tt0.IsValue("ГРАДОНАЧАЛЬНИК", null))
                                {
                                    ok = true;
                                    i++;
                                }
                            }
                        }
                    }
                }
            }
            if (!ok && oi != null && (oi.CanonicText.Length < 4))
            {
                return(null);
            }
            if (!ok && li[0].BeginToken.Morph.Class.IsProperName)
            {
                return(null);
            }
            if (!ok)
            {
                if (!Pullenti.Ner.Core.MiscHelper.IsExistsInDictionary(li[0].BeginToken, li[0].EndToken, Pullenti.Morph.MorphClass.Adjective | Pullenti.Morph.MorphClass.Noun | Pullenti.Morph.MorphClass.Pronoun))
                {
                    ok = li[0].GeoObjectBefore || li[i - 1].GeoObjectAfter;
                    if (ok && li[0].BeginToken == li[0].EndToken)
                    {
                        Pullenti.Morph.MorphClass mcc = li[0].BeginToken.GetMorphClassInDictionary();
                        if (mcc.IsProperName || mcc.IsProperSurname)
                        {
                            ok = false;
                        }
                        else if (li[0].GeoObjectBefore && (li[0].WhitespacesAfterCount < 2))
                        {
                            Pullenti.Ner.Address.Internal.AddressItemToken ad1 = Pullenti.Ner.Address.Internal.AddressItemToken.TryParse(li[0].BeginToken, null, false, false, null);
                            if (ad1 != null && ad1.Typ == Pullenti.Ner.Address.Internal.AddressItemToken.ItemType.Street)
                            {
                                Pullenti.Ner.Address.Internal.AddressItemToken ad2 = Pullenti.Ner.Address.Internal.AddressItemToken.TryParse(li[0].EndToken.Next, null, false, false, null);
                                if (ad2 == null || ad2.Typ != Pullenti.Ner.Address.Internal.AddressItemToken.ItemType.Street)
                                {
                                    ok = false;
                                }
                            }
                            else if (Pullenti.Ner.Address.Internal.AddressItemToken.TryAttachOrg(li[0].BeginToken) != null)
                            {
                                ok = false;
                            }
                        }
                    }
                }
                if (ok)
                {
                    if (li[0].Kit.ProcessReferent("PERSON", li[0].BeginToken) != null)
                    {
                        ok = false;
                    }
                }
            }
            if (!ok)
            {
                ok = CheckYearAfter(li[0].EndToken.Next);
            }
            if (!ok && ((!li[0].BeginToken.Morph.Class.IsAdjective || li[0].BeginToken != li[0].EndToken)))
            {
                ok = CheckCityAfter(li[0].EndToken.Next);
            }
            if (!ok)
            {
                return(null);
            }
            if (i < li.Count)
            {
                li.RemoveRange(i, li.Count - i);
            }
            Pullenti.Ner.ReferentToken rt = null;
            if (oi == null)
            {
                if (li[0].Value != null && li[0].HigherGeo != null)
                {
                    Pullenti.Ner.Geo.GeoReferent cap = new Pullenti.Ner.Geo.GeoReferent();
                    cap.AddName(li[0].Value);
                    cap.AddTypCity(li[0].Kit.BaseLanguage);
                    cap.Higher = li[0].HigherGeo;
                    if (typ != null)
                    {
                        cap.AddTyp(typ);
                    }
                    if (alttyp != null)
                    {
                        cap.AddTyp(alttyp);
                    }
                    rt = new Pullenti.Ner.ReferentToken(cap, li[0].BeginToken, li[0].EndToken);
                }
                else
                {
                    if (li[0].Value == null)
                    {
                        return(null);
                    }
                    if (typ == null)
                    {
                        if ((li.Count == 1 && li[0].BeginToken.Previous != null && li[0].BeginToken.Previous.IsHiphen) && (li[0].BeginToken.Previous.Previous is Pullenti.Ner.ReferentToken) && (li[0].BeginToken.Previous.Previous.GetReferent() is Pullenti.Ner.Geo.GeoReferent))
                        {
                        }
                        else
                        {
                            return(null);
                        }
                    }
                    else
                    {
                        if (!Pullenti.Morph.LanguageHelper.EndsWithEx(typ, "ПУНКТ", "ПОСЕЛЕНИЕ", "ПОСЕЛЕННЯ", "ПОСЕЛОК"))
                        {
                            if (!Pullenti.Morph.LanguageHelper.EndsWith(typ, "CITY"))
                            {
                                if (typ == "СТАНЦИЯ" && (MiscLocationHelper.CheckGeoObjectBefore(li[0].BeginToken)))
                                {
                                }
                                else if (li.Count > 1 && li[1].Typ == CityItemToken.ItemType.Noun && li[0].Typ == CityItemToken.ItemType.City)
                                {
                                }
                                else if ((li.Count == 2 && li[1].Typ == CityItemToken.ItemType.Noun && li[0].Typ == CityItemToken.ItemType.ProperName) && ((li[0].GeoObjectBefore || li[1].GeoObjectAfter)))
                                {
                                }
                                else
                                {
                                    return(null);
                                }
                            }
                        }
                        if (li[0].BeginToken.Morph.Class.IsAdjective)
                        {
                            li[0].Value = Pullenti.Ner.Core.ProperNameHelper.GetNameEx(li[0].BeginToken, li[0].EndToken, Pullenti.Morph.MorphClass.Adjective, li[1].Morph.Case, li[1].Morph.Gender, false, false);
                        }
                    }
                }
            }
            else if (oi.Referent is Pullenti.Ner.Geo.GeoReferent)
            {
                Pullenti.Ner.Geo.GeoReferent city = oi.Referent.Clone() as Pullenti.Ner.Geo.GeoReferent;
                city.Occurrence.Clear();
                rt = new Pullenti.Ner.ReferentToken(city, li[0].BeginToken, li[li.Count - 1].EndToken)
                {
                    Morph = mc
                };
            }
            else if (typ == null)
            {
                typ = oi.Typ;
            }
            if (rt == null)
            {
                Pullenti.Ner.Geo.GeoReferent city = new Pullenti.Ner.Geo.GeoReferent();
                city.AddName((oi == null ? li[0].Value : oi.CanonicText));
                if (typ != null)
                {
                    city.AddTyp(typ);
                }
                else
                {
                    city.AddTypCity(li[0].Kit.BaseLanguage);
                }
                if (alttyp != null)
                {
                    city.AddTyp(alttyp);
                }
                rt = new Pullenti.Ner.ReferentToken(city, li[0].BeginToken, li[li.Count - 1].EndToken)
                {
                    Morph = mc
                };
            }
            if ((rt.Referent is Pullenti.Ner.Geo.GeoReferent) && li.Count == 1 && (rt.Referent as Pullenti.Ner.Geo.GeoReferent).IsCity)
            {
                if (rt.BeginToken.Previous != null && rt.BeginToken.Previous.IsValue("Г", null))
                {
                    rt.BeginToken = rt.BeginToken.Previous;
                }
                else if ((rt.BeginToken.Previous != null && rt.BeginToken.Previous.IsChar('.') && rt.BeginToken.Previous.Previous != null) && rt.BeginToken.Previous.Previous.IsValue("Г", null))
                {
                    rt.BeginToken = rt.BeginToken.Previous.Previous;
                }
                else if (rt.EndToken.Next != null && (rt.WhitespacesAfterCount < 2) && rt.EndToken.Next.IsValue("Г", null))
                {
                    rt.EndToken = rt.EndToken.Next;
                    if (rt.EndToken.Next != null && rt.EndToken.Next.IsChar('.'))
                    {
                        rt.EndToken = rt.EndToken.Next;
                    }
                }
            }
            return(rt);
        }
Exemple #11
0
        static Pullenti.Ner.ReferentToken _tryNameExist(List <CityItemToken> li, out Pullenti.Ner.Core.IntOntologyItem oi, bool always)
        {
            oi = null;
            if (li == null || li[0].Typ != CityItemToken.ItemType.City)
            {
                return(null);
            }
            oi = li[0].OntoItem;
            Pullenti.Ner.TextToken tt = li[0].BeginToken as Pullenti.Ner.TextToken;
            if (tt == null)
            {
                return(null);
            }
            bool   ok  = false;
            string nam = (oi == null ? li[0].Value : oi.CanonicText);

            if (nam == null)
            {
                return(null);
            }
            if (nam == "РИМ")
            {
                if (tt.Term == "РИМ")
                {
                    if ((tt.Next is Pullenti.Ner.TextToken) && tt.Next.GetMorphClassInDictionary().IsProperSecname)
                    {
                    }
                    else
                    {
                        ok = true;
                    }
                }
                else if (tt.Previous != null && tt.Previous.IsValue("В", null) && tt.Term == "РИМЕ")
                {
                    ok = true;
                }
            }
            else if (oi != null && oi.Referent != null && oi.Owner.IsExtOntology)
            {
                ok = true;
            }
            else if (nam.EndsWith("ГРАД") || nam.EndsWith("СК"))
            {
                ok = true;
            }
            else if (nam.EndsWith("TOWN") || nam.StartsWith("SAN"))
            {
                ok = true;
            }
            else if (li[0].Chars.IsLatinLetter && li[0].BeginToken.Previous != null && ((li[0].BeginToken.Previous.IsValue("IN", null) || li[0].BeginToken.Previous.IsValue("FROM", null))))
            {
                ok = true;
            }
            else
            {
                for (Pullenti.Ner.Token tt2 = li[0].EndToken.Next; tt2 != null; tt2 = tt2.Next)
                {
                    if (tt2.IsNewlineBefore)
                    {
                        break;
                    }
                    if ((tt2.IsCharOf(",(") || tt2.Morph.Class.IsPreposition || tt2.Morph.Class.IsConjunction) || tt2.Morph.Class.IsMisc)
                    {
                        continue;
                    }
                    if ((tt2.GetReferent() is Pullenti.Ner.Geo.GeoReferent) && tt2.Chars.IsCyrillicLetter == li[0].Chars.IsCyrillicLetter)
                    {
                        ok = true;
                    }
                    break;
                }
                if (!ok)
                {
                    for (Pullenti.Ner.Token tt2 = li[0].BeginToken.Previous; tt2 != null; tt2 = tt2.Previous)
                    {
                        if (tt2.IsNewlineAfter)
                        {
                            break;
                        }
                        if ((tt2.IsCharOf(",)") || tt2.Morph.Class.IsPreposition || tt2.Morph.Class.IsConjunction) || tt2.Morph.Class.IsMisc)
                        {
                            continue;
                        }
                        if ((tt2.GetReferent() is Pullenti.Ner.Geo.GeoReferent) && tt2.Chars.IsCyrillicLetter == li[0].Chars.IsCyrillicLetter)
                        {
                            ok = true;
                        }
                        if (ok)
                        {
                            List <Pullenti.Ner.Address.Internal.StreetItemToken> sits = Pullenti.Ner.Address.Internal.StreetItemToken.TryParseList(li[0].BeginToken, null, 10);
                            if (sits != null && sits.Count > 1)
                            {
                                Pullenti.Ner.Address.Internal.AddressItemToken ss = Pullenti.Ner.Address.Internal.StreetDefineHelper.TryParseStreet(sits, false, false);
                                if (ss != null)
                                {
                                    sits.RemoveAt(0);
                                    if (Pullenti.Ner.Address.Internal.StreetDefineHelper.TryParseStreet(sits, false, false) == null)
                                    {
                                        ok = false;
                                    }
                                }
                            }
                        }
                        if (ok)
                        {
                            if (li.Count > 1 && li[1].Typ == CityItemToken.ItemType.ProperName && (li[1].WhitespacesBeforeCount < 3))
                            {
                                ok = false;
                            }
                            else
                            {
                                Pullenti.Morph.MorphClass mc = li[0].BeginToken.GetMorphClassInDictionary();
                                if (mc.IsProperName || mc.IsProperSurname || mc.IsAdjective)
                                {
                                    ok = false;
                                }
                                else
                                {
                                    Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(li[0].BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                                    if (npt != null && npt.EndChar > li[0].EndChar)
                                    {
                                        ok = false;
                                    }
                                }
                            }
                        }
                        if (Pullenti.Ner.Address.Internal.AddressItemToken.TryAttachOrg(li[0].BeginToken) != null)
                        {
                            ok = false;
                            break;
                        }
                        break;
                    }
                }
            }
            if (always)
            {
                if (li[0].WhitespacesBeforeCount > 3 && li[0].Doubtful && li[0].BeginToken.GetMorphClassInDictionary().IsProperSurname)
                {
                    Pullenti.Ner.ReferentToken pp = li[0].Kit.ProcessReferent("PERSON", li[0].BeginToken);
                    if (pp != null)
                    {
                        always = false;
                    }
                }
            }
            if (li[0].BeginToken.Chars.IsLatinLetter && li[0].BeginToken == li[0].EndToken)
            {
                Pullenti.Ner.Token tt1 = li[0].EndToken.Next;
                if (tt1 != null && tt1.IsChar(','))
                {
                    tt1 = tt1.Next;
                }
                if (((tt1 is Pullenti.Ner.TextToken) && tt1.Chars.IsLatinLetter && (tt1.LengthChar < 3)) && !tt1.Chars.IsAllLower)
                {
                    ok = false;
                }
            }
            if (!ok && !always)
            {
                return(null);
            }
            Pullenti.Ner.Geo.GeoReferent city = null;
            if (oi != null && (oi.Referent is Pullenti.Ner.Geo.GeoReferent) && !oi.Owner.IsExtOntology)
            {
                city = oi.Referent.Clone() as Pullenti.Ner.Geo.GeoReferent;
                city.Occurrence.Clear();
            }
            else
            {
                city = new Pullenti.Ner.Geo.GeoReferent();
                city.AddName(nam);
                if (oi != null && (oi.Referent is Pullenti.Ner.Geo.GeoReferent))
                {
                    city.MergeSlots2(oi.Referent as Pullenti.Ner.Geo.GeoReferent, li[0].Kit.BaseLanguage);
                }
                if (!city.IsCity)
                {
                    city.AddTypCity(li[0].Kit.BaseLanguage);
                }
            }
            return(new Pullenti.Ner.ReferentToken(city, li[0].BeginToken, li[0].EndToken)
            {
                Morph = li[0].Morph
            });
        }
Exemple #12
0
        static Pullenti.Ner.ReferentToken _tryNounName(List <CityItemToken> li, out Pullenti.Ner.Core.IntOntologyItem oi, bool always)
        {
            oi = null;
            if (li == null || (li.Count < 2) || ((li[0].Typ != CityItemToken.ItemType.Noun && li[0].Typ != CityItemToken.ItemType.Misc)))
            {
                return(null);
            }
            bool ok = !li[0].Doubtful;

            if (ok && li[0].Typ == CityItemToken.ItemType.Misc)
            {
                ok = false;
            }
            string typ     = (li[0].Typ == CityItemToken.ItemType.Misc ? null : li[0].Value);
            string typ2    = (li[0].Typ == CityItemToken.ItemType.Misc ? null : li[0].AltValue);
            string probAdj = null;
            int    i1      = 1;

            Pullenti.Ner.Referent org = null;
            if ((typ != null && li[i1].Typ == CityItemToken.ItemType.Noun && ((i1 + 1) < li.Count)) && li[0].WhitespacesAfterCount <= 1 && (((Pullenti.Morph.LanguageHelper.EndsWith(typ, "ПОСЕЛОК") || Pullenti.Morph.LanguageHelper.EndsWith(typ, "СЕЛИЩЕ") || typ == "ДЕРЕВНЯ") || typ == "СЕЛО")))
            {
                if (li[i1].BeginToken == li[i1].EndToken)
                {
                    Pullenti.Ner.Address.Internal.AddressItemToken ooo = Pullenti.Ner.Address.Internal.AddressItemToken.TryAttachOrg(li[i1].BeginToken);
                    if (ooo != null && ooo.RefToken != null)
                    {
                        return(null);
                    }
                }
                typ2 = li[i1].Value;
                if (typ2 == "СТАНЦИЯ" && li[i1].BeginToken.IsValue("СТ", null) && ((i1 + 1) < li.Count))
                {
                    Pullenti.Ner.MorphCollection m = li[i1 + 1].Morph;
                    if (m.Number == Pullenti.Morph.MorphNumber.Plural)
                    {
                        probAdj = "СТАРЫЕ";
                    }
                    else if (m.Gender == Pullenti.Morph.MorphGender.Feminie)
                    {
                        probAdj = "СТАРАЯ";
                    }
                    else if (m.Gender == Pullenti.Morph.MorphGender.Masculine)
                    {
                        probAdj = "СТАРЫЙ";
                    }
                    else
                    {
                        probAdj = "СТАРОЕ";
                    }
                }
                i1++;
            }
            string name    = li[i1].Value ?? ((li[i1].OntoItem == null ? null : li[i1].OntoItem.CanonicText));
            string altName = li[i1].AltValue;

            if (name == null)
            {
                return(null);
            }
            Pullenti.Ner.MorphCollection mc = li[0].Morph;
            if (i1 == 1 && li[i1].Typ == CityItemToken.ItemType.City && ((li[0].Value == "ГОРОД" || li[0].Value == "МІСТО" || li[0].Typ == CityItemToken.ItemType.Misc)))
            {
                if (typ == null && ((i1 + 1) < li.Count) && li[i1 + 1].Typ == CityItemToken.ItemType.Noun)
                {
                    return(null);
                }
                oi = li[i1].OntoItem;
                if (oi != null)
                {
                    name = oi.CanonicText;
                }
                if (name.Length > 2 || oi.MiscAttr != null)
                {
                    if (!li[1].Doubtful || ((oi != null && oi.MiscAttr != null)))
                    {
                        ok = true;
                    }
                    else if (!ok && !li[1].IsNewlineBefore)
                    {
                        if (li[0].GeoObjectBefore || li[1].GeoObjectAfter)
                        {
                            ok = true;
                        }
                        else if (Pullenti.Ner.Address.Internal.StreetDefineHelper.CheckStreetAfter(li[1].EndToken.Next))
                        {
                            ok = true;
                        }
                        else if (li[1].EndToken.Next != null && (li[1].EndToken.Next.GetReferent() is Pullenti.Ner.Date.DateReferent))
                        {
                            ok = true;
                        }
                        else if ((li[1].WhitespacesBeforeCount < 2) && li[1].OntoItem != null)
                        {
                            if (li[1].IsNewlineAfter)
                            {
                                ok = true;
                            }
                            else
                            {
                                ok = true;
                            }
                        }
                    }
                    if (li[1].Doubtful && li[1].EndToken.Next != null && li[1].EndToken.Chars == li[1].EndToken.Next.Chars)
                    {
                        ok = false;
                    }
                    if (li[0].BeginToken.Previous != null && li[0].BeginToken.Previous.IsValue("В", null))
                    {
                        ok = true;
                    }
                }
                if (!ok)
                {
                    ok = CheckYearAfter(li[1].EndToken.Next);
                }
                if (!ok)
                {
                    ok = CheckCityAfter(li[1].EndToken.Next);
                }
            }
            else if ((li[i1].Typ == CityItemToken.ItemType.ProperName || li[i1].Typ == CityItemToken.ItemType.City))
            {
                if (((li[0].Value == "АДМИНИСТРАЦИЯ" || li[0].Value == "АДМІНІСТРАЦІЯ")) && i1 == 1)
                {
                    return(null);
                }
                if (li[i1].IsNewlineBefore)
                {
                    if (li.Count != 2)
                    {
                        return(null);
                    }
                }
                if (!li[0].Doubtful)
                {
                    ok = true;
                    if (name.Length < 2)
                    {
                        ok = false;
                    }
                    else if ((name.Length < 3) && li[0].Morph.Number != Pullenti.Morph.MorphNumber.Singular)
                    {
                        ok = false;
                    }
                    if (li[i1].Doubtful && !li[i1].GeoObjectAfter && !li[0].GeoObjectBefore)
                    {
                        if (li[i1].Morph.Case.IsGenitive)
                        {
                            if (li[i1].EndToken.Next == null || MiscLocationHelper.CheckGeoObjectAfter(li[i1].EndToken.Next, false) || Pullenti.Ner.Address.Internal.AddressItemToken.CheckHouseAfter(li[i1].EndToken.Next, false, true))
                            {
                            }
                            else if (li[0].BeginToken.Previous == null || MiscLocationHelper.CheckGeoObjectBefore(li[0].BeginToken))
                            {
                            }
                            else
                            {
                                ok = false;
                            }
                        }
                        if (ok)
                        {
                            Pullenti.Ner.ReferentToken rt0 = li[i1].Kit.ProcessReferent("PERSONPROPERTY", li[0].BeginToken.Previous);
                            if (rt0 != null)
                            {
                                Pullenti.Ner.ReferentToken rt1 = li[i1].Kit.ProcessReferent("PERSON", li[i1].BeginToken);
                                if (rt1 != null)
                                {
                                    ok = false;
                                }
                            }
                        }
                    }
                    Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(li[i1].BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                    if (npt != null)
                    {
                        if (npt.EndToken.EndChar > li[i1].EndChar && npt.Adjectives.Count > 0 && !npt.Adjectives[0].EndToken.Next.IsComma)
                        {
                            ok = false;
                        }
                        else if (TerrItemToken.m_UnknownRegions.TryParse(npt.EndToken, Pullenti.Ner.Core.TerminParseAttr.FullwordsOnly) != null)
                        {
                            bool ok1 = false;
                            if (li[0].BeginToken.Previous != null)
                            {
                                Pullenti.Ner.Token ttt = li[0].BeginToken.Previous;
                                if (ttt.IsComma && ttt.Previous != null)
                                {
                                    ttt = ttt.Previous;
                                }
                                Pullenti.Ner.Geo.GeoReferent geo = ttt.GetReferent() as Pullenti.Ner.Geo.GeoReferent;
                                if (geo != null && !geo.IsCity)
                                {
                                    ok1 = true;
                                }
                            }
                            if (npt.EndToken.Next != null)
                            {
                                Pullenti.Ner.Token ttt = npt.EndToken.Next;
                                if (ttt.IsComma && ttt.Next != null)
                                {
                                    ttt = ttt.Next;
                                }
                                Pullenti.Ner.Geo.GeoReferent geo = ttt.GetReferent() as Pullenti.Ner.Geo.GeoReferent;
                                if (geo != null && !geo.IsCity)
                                {
                                    ok1 = true;
                                }
                            }
                            if (!ok1)
                            {
                                return(null);
                            }
                        }
                    }
                    if (li[0].Value == "ПОРТ")
                    {
                        if (li[i1].Chars.IsAllUpper || li[i1].Chars.IsLatinLetter)
                        {
                            return(null);
                        }
                    }
                }
                else if (li[0].GeoObjectBefore)
                {
                    ok = true;
                }
                else if (li[i1].GeoObjectAfter && !li[i1].IsNewlineAfter)
                {
                    ok = true;
                }
                else
                {
                    ok = CheckYearAfter(li[i1].EndToken.Next);
                }
                if (!ok)
                {
                    ok = CheckStreetAfter(li[i1].EndToken.Next);
                }
                if (!ok && li[0].BeginToken.Previous != null && li[0].BeginToken.Previous.IsValue("В", null))
                {
                    ok = true;
                }
            }
            else
            {
                return(null);
            }
            if (!ok && !always)
            {
                if (MiscLocationHelper.CheckNearBefore(li[0].BeginToken.Previous) == null)
                {
                    return(null);
                }
            }
            if (li.Count > (i1 + 1))
            {
                li.RemoveRange(i1 + 1, li.Count - i1 - 1);
            }
            Pullenti.Ner.Geo.GeoReferent city = new Pullenti.Ner.Geo.GeoReferent();
            if (oi != null && oi.Referent != null)
            {
                city = oi.Referent.Clone() as Pullenti.Ner.Geo.GeoReferent;
                city.Occurrence.Clear();
            }
            if (!li[0].Morph.Case.IsUndefined && li[0].Morph.Gender != Pullenti.Morph.MorphGender.Undefined)
            {
                if (li[i1].EndToken.Morph.Class.IsAdjective && li[i1].BeginToken == li[i1].EndToken)
                {
                    string nam = Pullenti.Ner.Core.ProperNameHelper.GetNameEx(li[i1].BeginToken, li[i1].EndToken, Pullenti.Morph.MorphClass.Adjective, li[0].Morph.Case, li[0].Morph.Gender, false, false);
                    if (nam != null && nam != name)
                    {
                        name = nam;
                    }
                }
            }
            if (li[0].Morph.Case.IsNominative)
            {
                if (altName != null)
                {
                    city.AddName(altName);
                }
                altName = null;
            }
            city.AddName(name);
            if (probAdj != null)
            {
                city.AddName(probAdj + " " + name);
            }
            if (altName != null)
            {
                city.AddName(altName);
                if (probAdj != null)
                {
                    city.AddName(probAdj + " " + altName);
                }
            }
            if (typ != null)
            {
                city.AddTyp(typ);
            }
            else if (!city.IsCity)
            {
                city.AddTypCity(li[0].Kit.BaseLanguage);
            }
            if (typ2 != null)
            {
                city.AddTyp(typ2.ToLower());
            }
            if (li[0].HigherGeo != null && GeoOwnerHelper.CanBeHigher(li[0].HigherGeo, city))
            {
                city.Higher = li[0].HigherGeo;
            }
            if (li[0].Typ == CityItemToken.ItemType.Misc)
            {
                li.RemoveAt(0);
            }
            Pullenti.Ner.ReferentToken res = new Pullenti.Ner.ReferentToken(city, li[0].BeginToken, li[li.Count - 1].EndToken)
            {
                Morph = mc
            };
            if (res.EndToken.Next != null && res.EndToken.Next.IsHiphen && (res.EndToken.Next.Next is Pullenti.Ner.NumberToken))
            {
                Pullenti.Ner.NumberToken num = res.EndToken.Next.Next as Pullenti.Ner.NumberToken;
                if ((num.Typ == Pullenti.Ner.NumberSpellingType.Digit && !num.Morph.Class.IsAdjective && num.IntValue != null) && (num.IntValue.Value < 50))
                {
                    foreach (Pullenti.Ner.Slot s in city.Slots)
                    {
                        if (s.TypeName == Pullenti.Ner.Geo.GeoReferent.ATTR_NAME)
                        {
                            city.UploadSlot(s, string.Format("{0}-{1}", s.Value, num.Value));
                        }
                    }
                    res.EndToken = num;
                }
            }
            if (li[0].BeginToken == li[0].EndToken && li[0].BeginToken.IsValue("ГОРОДОК", null))
            {
                if (Pullenti.Ner.Address.Internal.AddressItemToken.CheckHouseAfter(res.EndToken.Next, true, false))
                {
                    return(null);
                }
            }
            return(res);
        }