Exemplo n.º 1
0
 public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
 {
     Pullenti.Ner.Core.AnalyzerDataWithOntology ad = kit.GetAnalyzerData(this) as Pullenti.Ner.Core.AnalyzerDataWithOntology;
     for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
     {
         List <Pullenti.Ner.Named.Internal.NamedItemToken> li = Pullenti.Ner.Named.Internal.NamedItemToken.TryParseList(t, ad.LocalOntology);
         if (li == null || li.Count == 0)
         {
             continue;
         }
         Pullenti.Ner.ReferentToken rt = _tryAttach(li);
         if (rt != null)
         {
             rt.Referent = ad.RegisterReferent(rt.Referent);
             kit.EmbedToken(rt);
             t = rt;
             continue;
         }
     }
 }
Exemplo n.º 2
0
 // Основная функция выделения объектов
 public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
 {
     Pullenti.Ner.Core.AnalyzerDataWithOntology ad = kit.GetAnalyzerData(this) as Pullenti.Ner.Core.AnalyzerDataWithOntology;
     for (int k = 0; k < 2; k++)
     {
         bool     detectNewDenoms = false;
         DateTime dt = DateTime.Now;
         for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
         {
             if (t.IsWhitespaceBefore)
             {
             }
             else if (t.Previous != null && ((t.Previous.IsCharOf(",") || Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(t.Previous, false, false))))
             {
             }
             else
             {
                 continue;
             }
             Pullenti.Ner.ReferentToken rt0 = this.TryAttachSpec(t);
             if (rt0 != null)
             {
                 rt0.Referent = ad.RegisterReferent(rt0.Referent);
                 kit.EmbedToken(rt0);
                 t = rt0;
                 continue;
             }
             if (!t.Chars.IsLetter)
             {
                 continue;
             }
             if (!this.CanBeStartOfDenom(t))
             {
                 continue;
             }
             if (((DateTime.Now - dt)).TotalMinutes > 1)
             {
                 break;
             }
             List <Pullenti.Ner.Core.IntOntologyToken> ot = null;
             ot = ad.LocalOntology.TryAttach(t, null, false);
             if (ot != null && (ot[0].Item.Referent is DenominationReferent))
             {
                 if (this.CheckAttach(ot[0].BeginToken, ot[0].EndToken))
                 {
                     DenominationReferent cl = ot[0].Item.Referent.Clone() as DenominationReferent;
                     cl.Occurrence.Clear();
                     Pullenti.Ner.ReferentToken rt = new Pullenti.Ner.ReferentToken(cl, ot[0].BeginToken, ot[0].EndToken);
                     kit.EmbedToken(rt);
                     t = rt;
                     continue;
                 }
             }
             if (k > 0)
             {
                 continue;
             }
             if (t != null && t.Kit.Ontology != null)
             {
                 if ((((ot = t.Kit.Ontology.AttachToken(DenominationReferent.OBJ_TYPENAME, t)))) != null)
                 {
                     if (this.CheckAttach(ot[0].BeginToken, ot[0].EndToken))
                     {
                         DenominationReferent dr = new DenominationReferent();
                         dr.MergeSlots(ot[0].Item.Referent, true);
                         Pullenti.Ner.ReferentToken rt = new Pullenti.Ner.ReferentToken(ad.RegisterReferent(dr), ot[0].BeginToken, ot[0].EndToken);
                         kit.EmbedToken(rt);
                         t = rt;
                         continue;
                     }
                 }
             }
             rt0 = this.TryAttach(t, false);
             if (rt0 != null)
             {
                 rt0.Referent = ad.RegisterReferent(rt0.Referent);
                 kit.EmbedToken(rt0);
                 detectNewDenoms = true;
                 t = rt0;
                 if (ad.LocalOntology.Items.Count > 1000)
                 {
                     break;
                 }
             }
         }
         if (!detectNewDenoms)
         {
             break;
         }
     }
 }
Exemplo n.º 3
0
        static Pullenti.Ner.ReferentToken Try1(List <CityItemToken> li, out Pullenti.Ner.Core.IntOntologyItem oi, Pullenti.Ner.Core.AnalyzerDataWithOntology ad)
        {
            oi = null;
            if (li == null || (li.Count < 1))
            {
                return(null);
            }
            else if (li[0].Typ != CityItemToken.ItemType.City)
            {
                if (li.Count != 2 || li[0].Typ != CityItemToken.ItemType.ProperName || li[1].Typ != CityItemToken.ItemType.Noun)
                {
                    return(null);
                }
            }
            int i = 1;

            oi = li[0].OntoItem;
            bool ok = !li[0].Doubtful;

            if ((ok && li[0].OntoItem != null && li[0].OntoItem.MiscAttr == null) && ad != null)
            {
                if (li[0].OntoItem.Owner != ad.LocalOntology && !li[0].OntoItem.Owner.IsExtOntology)
                {
                    if (li[0].BeginToken.Previous != null && li[0].BeginToken.Previous.IsValue("В", null))
                    {
                    }
                    else
                    {
                        ok = false;
                    }
                }
            }
            if (li.Count == 1 && li[0].BeginToken.Morph.Class.IsAdjective)
            {
                List <Pullenti.Ner.Address.Internal.StreetItemToken> sits = Pullenti.Ner.Address.Internal.StreetItemToken.TryParseList(li[0].BeginToken, null, 3);
                if (sits != null && sits.Count == 2 && sits[1].Typ == Pullenti.Ner.Address.Internal.StreetItemType.Noun)
                {
                    return(null);
                }
            }
            string typ    = null;
            string alttyp = null;

            Pullenti.Ner.MorphCollection mc = li[0].Morph;
            if (i < li.Count)
            {
                if (li[i].Typ == CityItemToken.ItemType.Noun)
                {
                    Pullenti.Ner.Address.Internal.AddressItemToken at = null;
                    if (!li[i].Chars.IsAllLower && (li[i].WhitespacesAfterCount < 2))
                    {
                        Pullenti.Ner.Address.Internal.StreetItemToken sit = Pullenti.Ner.Address.Internal.StreetItemToken.TryParse(li[i].EndToken.Next, null, false, null, false);
                        if (sit != null && sit.Typ == Pullenti.Ner.Address.Internal.StreetItemType.Noun)
                        {
                            at = Pullenti.Ner.Address.Internal.AddressItemToken.TryParse(li[i].BeginToken, null, false, false, null);
                            if (at != null)
                            {
                                Pullenti.Ner.Address.Internal.AddressItemToken at2 = Pullenti.Ner.Address.Internal.AddressItemToken.TryParse(li[i].EndToken.Next, null, false, false, null);
                                if (at2 != null && at2.Typ == Pullenti.Ner.Address.Internal.AddressItemToken.ItemType.Street)
                                {
                                    at = null;
                                }
                            }
                        }
                    }
                    if (at == null)
                    {
                        typ    = li[i].Value;
                        alttyp = li[i].AltValue;
                        if (li[i].BeginToken.IsValue("СТ", null) && li[i].BeginToken.Chars.IsAllUpper)
                        {
                            return(null);
                        }
                        if ((i + 1) == li.Count)
                        {
                            ok = true;
                            if (!li[i].Morph.Case.IsUndefined)
                            {
                                mc = li[i].Morph;
                            }
                            i++;
                        }
                        else if (ok)
                        {
                            i++;
                        }
                        else
                        {
                            Pullenti.Ner.Token tt0 = li[0].BeginToken.Previous;
                            if ((tt0 is Pullenti.Ner.TextToken) && (tt0.WhitespacesAfterCount < 3))
                            {
                                if (tt0.IsValue("МЭР", "МЕР") || tt0.IsValue("ГЛАВА", null) || tt0.IsValue("ГРАДОНАЧАЛЬНИК", null))
                                {
                                    ok = true;
                                    i++;
                                }
                            }
                        }
                    }
                }
            }
            if (!ok && oi != null && (oi.CanonicText.Length < 4))
            {
                return(null);
            }
            if (!ok && li[0].BeginToken.Morph.Class.IsProperName)
            {
                return(null);
            }
            if (!ok)
            {
                if (!Pullenti.Ner.Core.MiscHelper.IsExistsInDictionary(li[0].BeginToken, li[0].EndToken, Pullenti.Morph.MorphClass.Adjective | Pullenti.Morph.MorphClass.Noun | Pullenti.Morph.MorphClass.Pronoun))
                {
                    ok = li[0].GeoObjectBefore || li[i - 1].GeoObjectAfter;
                    if (ok && li[0].BeginToken == li[0].EndToken)
                    {
                        Pullenti.Morph.MorphClass mcc = li[0].BeginToken.GetMorphClassInDictionary();
                        if (mcc.IsProperName || mcc.IsProperSurname)
                        {
                            ok = false;
                        }
                        else if (li[0].GeoObjectBefore && (li[0].WhitespacesAfterCount < 2))
                        {
                            Pullenti.Ner.Address.Internal.AddressItemToken ad1 = Pullenti.Ner.Address.Internal.AddressItemToken.TryParse(li[0].BeginToken, null, false, false, null);
                            if (ad1 != null && ad1.Typ == Pullenti.Ner.Address.Internal.AddressItemToken.ItemType.Street)
                            {
                                Pullenti.Ner.Address.Internal.AddressItemToken ad2 = Pullenti.Ner.Address.Internal.AddressItemToken.TryParse(li[0].EndToken.Next, null, false, false, null);
                                if (ad2 == null || ad2.Typ != Pullenti.Ner.Address.Internal.AddressItemToken.ItemType.Street)
                                {
                                    ok = false;
                                }
                            }
                            else if (Pullenti.Ner.Address.Internal.AddressItemToken.TryAttachOrg(li[0].BeginToken) != null)
                            {
                                ok = false;
                            }
                        }
                    }
                }
                if (ok)
                {
                    if (li[0].Kit.ProcessReferent("PERSON", li[0].BeginToken) != null)
                    {
                        ok = false;
                    }
                }
            }
            if (!ok)
            {
                ok = CheckYearAfter(li[0].EndToken.Next);
            }
            if (!ok && ((!li[0].BeginToken.Morph.Class.IsAdjective || li[0].BeginToken != li[0].EndToken)))
            {
                ok = CheckCityAfter(li[0].EndToken.Next);
            }
            if (!ok)
            {
                return(null);
            }
            if (i < li.Count)
            {
                li.RemoveRange(i, li.Count - i);
            }
            Pullenti.Ner.ReferentToken rt = null;
            if (oi == null)
            {
                if (li[0].Value != null && li[0].HigherGeo != null)
                {
                    Pullenti.Ner.Geo.GeoReferent cap = new Pullenti.Ner.Geo.GeoReferent();
                    cap.AddName(li[0].Value);
                    cap.AddTypCity(li[0].Kit.BaseLanguage);
                    cap.Higher = li[0].HigherGeo;
                    if (typ != null)
                    {
                        cap.AddTyp(typ);
                    }
                    if (alttyp != null)
                    {
                        cap.AddTyp(alttyp);
                    }
                    rt = new Pullenti.Ner.ReferentToken(cap, li[0].BeginToken, li[0].EndToken);
                }
                else
                {
                    if (li[0].Value == null)
                    {
                        return(null);
                    }
                    if (typ == null)
                    {
                        if ((li.Count == 1 && li[0].BeginToken.Previous != null && li[0].BeginToken.Previous.IsHiphen) && (li[0].BeginToken.Previous.Previous is Pullenti.Ner.ReferentToken) && (li[0].BeginToken.Previous.Previous.GetReferent() is Pullenti.Ner.Geo.GeoReferent))
                        {
                        }
                        else
                        {
                            return(null);
                        }
                    }
                    else
                    {
                        if (!Pullenti.Morph.LanguageHelper.EndsWithEx(typ, "ПУНКТ", "ПОСЕЛЕНИЕ", "ПОСЕЛЕННЯ", "ПОСЕЛОК"))
                        {
                            if (!Pullenti.Morph.LanguageHelper.EndsWith(typ, "CITY"))
                            {
                                if (typ == "СТАНЦИЯ" && (MiscLocationHelper.CheckGeoObjectBefore(li[0].BeginToken)))
                                {
                                }
                                else if (li.Count > 1 && li[1].Typ == CityItemToken.ItemType.Noun && li[0].Typ == CityItemToken.ItemType.City)
                                {
                                }
                                else if ((li.Count == 2 && li[1].Typ == CityItemToken.ItemType.Noun && li[0].Typ == CityItemToken.ItemType.ProperName) && ((li[0].GeoObjectBefore || li[1].GeoObjectAfter)))
                                {
                                }
                                else
                                {
                                    return(null);
                                }
                            }
                        }
                        if (li[0].BeginToken.Morph.Class.IsAdjective)
                        {
                            li[0].Value = Pullenti.Ner.Core.ProperNameHelper.GetNameEx(li[0].BeginToken, li[0].EndToken, Pullenti.Morph.MorphClass.Adjective, li[1].Morph.Case, li[1].Morph.Gender, false, false);
                        }
                    }
                }
            }
            else if (oi.Referent is Pullenti.Ner.Geo.GeoReferent)
            {
                Pullenti.Ner.Geo.GeoReferent city = oi.Referent.Clone() as Pullenti.Ner.Geo.GeoReferent;
                city.Occurrence.Clear();
                rt = new Pullenti.Ner.ReferentToken(city, li[0].BeginToken, li[li.Count - 1].EndToken)
                {
                    Morph = mc
                };
            }
            else if (typ == null)
            {
                typ = oi.Typ;
            }
            if (rt == null)
            {
                Pullenti.Ner.Geo.GeoReferent city = new Pullenti.Ner.Geo.GeoReferent();
                city.AddName((oi == null ? li[0].Value : oi.CanonicText));
                if (typ != null)
                {
                    city.AddTyp(typ);
                }
                else
                {
                    city.AddTypCity(li[0].Kit.BaseLanguage);
                }
                if (alttyp != null)
                {
                    city.AddTyp(alttyp);
                }
                rt = new Pullenti.Ner.ReferentToken(city, li[0].BeginToken, li[li.Count - 1].EndToken)
                {
                    Morph = mc
                };
            }
            if ((rt.Referent is Pullenti.Ner.Geo.GeoReferent) && li.Count == 1 && (rt.Referent as Pullenti.Ner.Geo.GeoReferent).IsCity)
            {
                if (rt.BeginToken.Previous != null && rt.BeginToken.Previous.IsValue("Г", null))
                {
                    rt.BeginToken = rt.BeginToken.Previous;
                }
                else if ((rt.BeginToken.Previous != null && rt.BeginToken.Previous.IsChar('.') && rt.BeginToken.Previous.Previous != null) && rt.BeginToken.Previous.Previous.IsValue("Г", null))
                {
                    rt.BeginToken = rt.BeginToken.Previous.Previous;
                }
                else if (rt.EndToken.Next != null && (rt.WhitespacesAfterCount < 2) && rt.EndToken.Next.IsValue("Г", null))
                {
                    rt.EndToken = rt.EndToken.Next;
                    if (rt.EndToken.Next != null && rt.EndToken.Next.IsChar('.'))
                    {
                        rt.EndToken = rt.EndToken.Next;
                    }
                }
            }
            return(rt);
        }
Exemplo n.º 4
0
 public static Pullenti.Ner.ReferentToken TryAttachCity(List <CityItemToken> li, Pullenti.Ner.Core.AnalyzerDataWithOntology ad, bool always = false)
 {
     if (li == null)
     {
         return(null);
     }
     Pullenti.Ner.Core.IntOntologyItem oi;
     if (li.Count > 2 && li[0].Typ == CityItemToken.ItemType.Misc && li[1].Typ == CityItemToken.ItemType.Noun)
     {
         li[1].Doubtful = false;
         li.RemoveAt(0);
     }
     Pullenti.Ner.ReferentToken res = null;
     if (res == null && li.Count > 1)
     {
         res = Try4(li);
         if (res != null && res.EndChar <= li[1].EndChar)
         {
             res = null;
         }
     }
     if (res == null)
     {
         res = Try1(li, out oi, ad);
     }
     if (res == null)
     {
         res = _tryNounName(li, out oi, false);
     }
     if (res == null)
     {
         res = _tryNameExist(li, out oi, false);
     }
     if (res == null)
     {
         res = Try4(li);
     }
     if (res == null && always)
     {
         res = _tryNounName(li, out oi, true);
     }
     if (res == null && always)
     {
         if (Pullenti.Ner.Address.Internal.AddressItemToken.TryAttachOrg(li[0].BeginToken) != null)
         {
         }
         else
         {
             res = _tryNameExist(li, out oi, true);
         }
     }
     if (res == null)
     {
         return(null);
     }
     if (res != null && res.Morph != null)
     {
     }
     if (res.BeginToken.Previous != null)
     {
         if (res.BeginToken.Previous.IsValue("ТЕРРИТОРИЯ", null))
         {
             res.BeginToken = res.BeginToken.Previous;
             res.Morph      = res.BeginToken.Morph;
         }
         if ((Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(res.BeginToken.Previous, false, false) && Pullenti.Ner.Core.BracketHelper.CanBeEndOfSequence(res.EndToken.Next, false, null, false) && res.BeginToken.Previous.Previous != null) && res.BeginToken.Previous.Previous.IsValue("ТЕРРИТОРИЯ", null))
         {
             res.BeginToken = res.BeginToken.Previous.Previous;
             res.Morph      = res.BeginToken.Morph;
             res.EndToken   = res.EndToken.Next;
         }
     }
     return(res);
 }