예제 #1
0
 public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
 {
     Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this);
     for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
     {
         Pullenti.Ner.ReferentToken rt = null;
         if (t.Chars.IsLetter)
         {
             Pullenti.Ner.Core.TerminToken tok = m_Ontology.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
             if (tok != null)
             {
                 Pullenti.Ner.Token tt = tok.EndToken.Next;
                 if (tt != null && tt.IsChar(':'))
                 {
                     tt = tt.Next;
                 }
                 rt = this.TryAttach(tt, true);
                 if (rt != null)
                 {
                     rt.BeginToken = t;
                 }
             }
         }
         if (rt == null && (((t is Pullenti.Ner.ReferentToken) || t.IsNewlineBefore)))
         {
             rt = this.TryAttach(t, false);
         }
         if (rt != null)
         {
             rt.Referent = ad.RegisterReferent(rt.Referent);
             kit.EmbedToken(rt);
             t = rt;
         }
     }
 }
예제 #2
0
 /// <summary>
 /// Попытаться выделить с заданного токена сущность указанным анализатором.
 /// Используется, если нужно "забежать вперёд" и проверить гипотезу, есть ли тут сущность конкретного типа или нет.
 /// </summary>
 /// <param name="analyzerName">имя анализатора</param>
 /// <param name="t">токен, с которого попробовать выделение</param>
 /// <return>метатокен с сущностью ReferentToken или null. Отметим, что сущность не сохранена и полученный метатокен никуда не встроен.</return>
 public Pullenti.Ner.ReferentToken ProcessReferent(string analyzerName, Pullenti.Ner.Token t)
 {
     if (Processor == null)
     {
         return(null);
     }
     if (m_AnalyzerStack.Contains(analyzerName))
     {
         return(null);
     }
     if (IsRecurceOverflow)
     {
         return(null);
     }
     Pullenti.Ner.Analyzer a = Processor.FindAnalyzer(analyzerName);
     if (a == null)
     {
         return(null);
     }
     RecurseLevel++;
     m_AnalyzerStack.Add(analyzerName);
     Pullenti.Ner.ReferentToken res = a.ProcessReferent(t, null);
     m_AnalyzerStack.Remove(analyzerName);
     RecurseLevel--;
     return(res);
 }
예제 #3
0
 static Pullenti.Ner.ReferentToken _tryAttachPureTerr(List <TerrItemToken> li, Pullenti.Ner.Core.AnalyzerData ad)
 {
     Pullenti.Ner.Address.Internal.AddressItemToken aid = null;
     Pullenti.Ner.Token t = li[0].EndToken.Next;
     if (t == null)
     {
         return(null);
     }
     Pullenti.Ner.Token tt = t;
     if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(tt, true, false))
     {
         tt = tt.Next;
     }
     if (li.Count > 1)
     {
         List <TerrItemToken> tmp = new List <TerrItemToken>(li);
         tmp.RemoveAt(0);
         Pullenti.Ner.ReferentToken rt0 = TryAttachTerritory(tmp, ad, false, null, null);
         if (rt0 == null && tmp.Count == 2)
         {
             if (((tmp[0].TerminItem == null && tmp[1].TerminItem != null)) || ((tmp[0].TerminItem != null && tmp[1].TerminItem == null)))
             {
                 if (aid == null)
                 {
                     rt0 = TryAttachTerritory(tmp, ad, true, null, null);
                 }
             }
         }
         if (rt0 != null)
         {
             if ((rt0.Referent as Pullenti.Ner.Geo.GeoReferent).IsState)
             {
                 return(null);
             }
             rt0.BeginToken = li[0].BeginToken;
             rt0.Morph      = li[0].Morph;
             return(rt0);
         }
     }
     if (aid == null)
     {
         aid = Pullenti.Ner.Address.Internal.AddressItemToken.TryAttachOrg(tt);
     }
     if (aid != null)
     {
         Pullenti.Ner.ReferentToken rt = aid.CreateGeoOrgTerr();
         if (rt == null)
         {
             return(null);
         }
         rt.BeginToken = li[0].BeginToken;
         Pullenti.Ner.Token t1 = rt.EndToken;
         if (tt != t && Pullenti.Ner.Core.BracketHelper.CanBeEndOfSequence(t1.Next, false, null, false))
         {
             rt.EndToken = (t1 = t1.Next);
         }
         return(rt);
     }
     return(null);
 }
예제 #4
0
        public override Pullenti.Ner.ReferentToken ProcessOntologyItem(Pullenti.Ner.Token begin)
        {
            if (begin == null)
            {
                return(null);
            }
            GoodAttributeReferent ga = new GoodAttributeReferent();

            if (begin.Chars.IsLatinLetter)
            {
                if (begin.IsValue("KEYWORD", null))
                {
                    ga.Typ = GoodAttrType.Keyword;
                    begin  = begin.Next;
                }
                else if (begin.IsValue("CHARACTER", null))
                {
                    ga.Typ = GoodAttrType.Character;
                    begin  = begin.Next;
                }
                else if (begin.IsValue("PROPER", null))
                {
                    ga.Typ = GoodAttrType.Proper;
                    begin  = begin.Next;
                }
                else if (begin.IsValue("MODEL", null))
                {
                    ga.Typ = GoodAttrType.Model;
                    begin  = begin.Next;
                }
                if (begin == null)
                {
                    return(null);
                }
            }
            Pullenti.Ner.ReferentToken res = new Pullenti.Ner.ReferentToken(ga, begin, begin);
            for (Pullenti.Ner.Token t = begin; t != null; t = t.Next)
            {
                if (t.IsChar(';'))
                {
                    ga.AddSlot(GoodAttributeReferent.ATTR_VALUE, Pullenti.Ner.Core.MiscHelper.GetTextValue(begin, t.Previous, Pullenti.Ner.Core.GetTextAttr.No), false, 0);
                    begin = t.Next;
                    continue;
                }
                res.EndToken = t;
            }
            if (res.EndChar > begin.BeginChar)
            {
                ga.AddSlot(GoodAttributeReferent.ATTR_VALUE, Pullenti.Ner.Core.MiscHelper.GetTextValue(begin, res.EndToken, Pullenti.Ner.Core.GetTextAttr.No), false, 0);
            }
            if (ga.Typ == GoodAttrType.Undefined)
            {
                if (!begin.Chars.IsAllLower)
                {
                    ga.Typ = GoodAttrType.Proper;
                }
            }
            return(res);
        }
예제 #5
0
        public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
        {
            Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this);
            int delta = 100000;
            int parts = (((kit.Sofa.Text.Length + delta) - 1)) / delta;

            if (parts == 0)
            {
                parts = 1;
            }
            int cur     = 0;
            int nextPos = 0;
            List <GoodReferent> goods = new List <GoodReferent>();

            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
            {
                if (!t.IsNewlineBefore)
                {
                    continue;
                }
                if (t.BeginChar > nextPos)
                {
                    nextPos += delta;
                    cur++;
                    if (!this.OnProgress(cur, parts, kit))
                    {
                        break;
                    }
                }
                if (!t.Chars.IsLetter && t.Next != null)
                {
                    t = t.Next;
                }
                List <Pullenti.Ner.ReferentToken> rts = Pullenti.Ner.Goods.Internal.GoodAttrToken.TryParseList(t);
                if (rts == null || rts.Count == 0)
                {
                    continue;
                }
                GoodReferent good = new GoodReferent();
                foreach (Pullenti.Ner.ReferentToken rt in rts)
                {
                    rt.Referent = ad.RegisterReferent(rt.Referent);
                    if (good.FindSlot(GoodReferent.ATTR_ATTR, rt.Referent, true) == null)
                    {
                        good.AddSlot(GoodReferent.ATTR_ATTR, rt.Referent, false, 0);
                    }
                    kit.EmbedToken(rt);
                }
                goods.Add(good);
                Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(good, rts[0], rts[rts.Count - 1]);
                kit.EmbedToken(rt0);
                t = rt0;
            }
            foreach (GoodReferent g in goods)
            {
                ad.Referents.Add(g);
            }
        }
예제 #6
0
 PhoneReferent GetNextPhone(Pullenti.Ner.Token t, int lev)
 {
     if (t != null && t.IsChar(',')) 
         t = t.Next;
     if (t == null || lev > 3) 
         return null;
     List<Pullenti.Ner.Phone.Internal.PhoneItemToken> its = Pullenti.Ner.Phone.Internal.PhoneItemToken.TryAttachAll(t, 15);
     if (its == null) 
         return null;
     Pullenti.Ner.ReferentToken rt = this._TryAttach_(its, 0, false, null, lev + 1);
     if (rt == null) 
         return null;
     return rt.Referent as PhoneReferent;
 }
예제 #7
0
 public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
 {
     Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this);
     for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
     {
         Pullenti.Ner.ReferentToken mon = TryParse(t);
         if (mon != null)
         {
             mon.Referent = ad.RegisterReferent(mon.Referent);
             kit.EmbedToken(mon);
             t = mon;
             continue;
         }
     }
 }
예제 #8
0
 static void _correctReferents(Pullenti.Ner.Token t)
 {
     Pullenti.Ner.ReferentToken rt = t as Pullenti.Ner.ReferentToken;
     if (rt == null)
     {
         return;
     }
     if (rt.Referent != null && (rt.Referent.Tag is Pullenti.Ner.Referent))
     {
         rt.Referent = rt.Referent.Tag as Pullenti.Ner.Referent;
     }
     for (Pullenti.Ner.Token tt = rt.BeginToken; tt != null && tt.EndChar <= rt.EndChar; tt = tt.Next)
     {
         _correctReferents(tt);
     }
 }
예제 #9
0
        public override Pullenti.Ner.ReferentToken ProcessReferent(Pullenti.Ner.Token begin, Pullenti.Ner.Token end)
        {
            List <Pullenti.Ner.Named.Internal.NamedItemToken> li = Pullenti.Ner.Named.Internal.NamedItemToken.TryParseList(begin, null);

            if (li == null || li.Count == 0)
            {
                return(null);
            }
            Pullenti.Ner.ReferentToken rt = _tryAttach(li);
            if (rt == null)
            {
                return(null);
            }
            rt.Data = begin.Kit.GetAnalyzerData(this);
            return(rt);
        }
예제 #10
0
 public static void Process(Pullenti.Ner.Core.AnalysisKit kit, Pullenti.Ner.Core.AnalyzerData ad)
 {
     for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
     {
         if (!Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t))
         {
             continue;
         }
         Pullenti.Ner.ReferentToken rt = TryParseThesis(t);
         if (rt == null)
         {
             continue;
         }
         rt.Referent = ad.RegisterReferent(rt.Referent);
         kit.EmbedToken(rt);
         t = rt;
     }
 }
예제 #11
0
 public override Pullenti.Ner.ReferentToken ProcessReferent(Pullenti.Ner.Token begin, Pullenti.Ner.Token end)
 {
     List<Pullenti.Ner.Phone.Internal.PhoneItemToken> pli = Pullenti.Ner.Phone.Internal.PhoneItemToken.TryAttachAll(begin, 15);
     if (pli == null || pli.Count == 0) 
         return null;
     int i = 0;
     for (; i < pli.Count; i++) 
     {
         if (pli[i].ItemType != Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Prefix) 
             break;
     }
     Pullenti.Ner.ReferentToken rt = this._TryAttach_(pli, i, true, null, 0);
     if (rt != null) 
     {
         rt.BeginToken = begin;
         return rt;
     }
     return null;
 }
예제 #12
0
 public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
 {
     Pullenti.Ner.Core.AnalyzerDataWithOntology ad = kit.GetAnalyzerData(this) as Pullenti.Ner.Core.AnalyzerDataWithOntology;
     for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
     {
         List <Pullenti.Ner.Named.Internal.NamedItemToken> li = Pullenti.Ner.Named.Internal.NamedItemToken.TryParseList(t, ad.LocalOntology);
         if (li == null || li.Count == 0)
         {
             continue;
         }
         Pullenti.Ner.ReferentToken rt = _tryAttach(li);
         if (rt != null)
         {
             rt.Referent = ad.RegisterReferent(rt.Referent);
             kit.EmbedToken(rt);
             t = rt;
             continue;
         }
     }
 }
예제 #13
0
        public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
        {
            Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this);
            int delta = 100000;
            int parts = (((kit.Sofa.Text.Length + delta) - 1)) / delta;

            if (parts == 0)
            {
                parts = 1;
            }
            int cur     = 0;
            int nextPos = 0;

            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
            {
                if (t.BeginChar > nextPos)
                {
                    nextPos += delta;
                    cur++;
                    if (!this.OnProgress(cur, parts, kit))
                    {
                        break;
                    }
                }
                Pullenti.Ner.Goods.Internal.GoodAttrToken at = Pullenti.Ner.Goods.Internal.GoodAttrToken.TryParse(t, null, true, true);
                if (at == null)
                {
                    continue;
                }
                GoodAttributeReferent attr = at._createAttr();
                if (attr == null)
                {
                    t = at.EndToken;
                    continue;
                }
                Pullenti.Ner.ReferentToken rt = new Pullenti.Ner.ReferentToken(attr, at.BeginToken, at.EndToken);
                rt.Referent = ad.RegisterReferent(attr);
                kit.EmbedToken(rt);
                t = rt;
            }
        }
예제 #14
0
 List<Pullenti.Ner.ReferentToken> TryAttach(List<Pullenti.Ner.Phone.Internal.PhoneItemToken> pli, int ind, bool isPhoneBefore, PhoneReferent prevPhone)
 {
     Pullenti.Ner.ReferentToken rt = this._TryAttach_(pli, ind, isPhoneBefore, prevPhone, 0);
     if (rt == null) 
         return null;
     List<Pullenti.Ner.ReferentToken> res = new List<Pullenti.Ner.ReferentToken>();
     res.Add(rt);
     for (int i = 0; i < 5; i++) 
     {
         PhoneReferent ph0 = rt.Referent as PhoneReferent;
         if (ph0.AddNumber != null) 
             return res;
         Pullenti.Ner.Phone.Internal.PhoneItemToken alt = Pullenti.Ner.Phone.Internal.PhoneItemToken.TryAttachAlternate(rt.EndToken.Next, ph0, pli);
         if (alt == null) 
             break;
         PhoneReferent ph = new PhoneReferent();
         foreach (Pullenti.Ner.Slot s in rt.Referent.Slots) 
         {
             ph.AddSlot(s.TypeName, s.Value, false, 0);
         }
         string num = ph.Number;
         if (num == null || num.Length <= alt.Value.Length) 
             break;
         ph.Number = num.Substring(0, num.Length - alt.Value.Length) + alt.Value;
         ph.m_Template = ph0.m_Template;
         Pullenti.Ner.ReferentToken rt2 = new Pullenti.Ner.ReferentToken(ph, alt.BeginToken, alt.EndToken);
         res.Add(rt2);
         rt = rt2;
     }
     Pullenti.Ner.Phone.Internal.PhoneItemToken add = Pullenti.Ner.Phone.Internal.PhoneItemToken.TryAttachAdditional(rt.EndToken.Next);
     if (add != null) 
     {
         foreach (Pullenti.Ner.ReferentToken rr in res) 
         {
             (rr.Referent as PhoneReferent).AddNumber = add.Value;
         }
         res[res.Count - 1].EndToken = add.EndToken;
     }
     return res;
 }
예제 #15
0
        static Pullenti.Ner.Token DeserializeToken(Stream stream, Pullenti.Ner.Core.AnalysisKit kit, int vers)
        {
            short typ = DeserializeShort(stream);

            if (typ == 0)
            {
                return(null);
            }
            Pullenti.Ner.Token t = null;
            if (typ == 1)
            {
                t = new Pullenti.Ner.TextToken(null, kit);
            }
            else if (typ == 2)
            {
                t = new Pullenti.Ner.NumberToken(null, null, null, Pullenti.Ner.NumberSpellingType.Digit, kit);
            }
            else if (typ == 3)
            {
                t = new Pullenti.Ner.ReferentToken(null, null, null, kit);
            }
            else
            {
                t = new Pullenti.Ner.MetaToken(null, null, kit);
            }
            t.Deserialize(stream, kit, vers);
            if (t is Pullenti.Ner.MetaToken)
            {
                Pullenti.Ner.Token tt = DeserializeTokens(stream, kit, vers);
                if (tt != null)
                {
                    (t as Pullenti.Ner.MetaToken).m_BeginToken = tt;
                    for (; tt != null; tt = tt.Next)
                    {
                        (t as Pullenti.Ner.MetaToken).m_EndToken = tt;
                    }
                }
            }
            return(t);
        }
예제 #16
0
        static UriItemToken _AttachUriContent(Pullenti.Ner.Token t0, string chars, bool canBeWhitespaces = false)
        {
            StringBuilder txt = new StringBuilder();

            Pullenti.Ner.Token t1  = t0;
            UriItemToken       dom = AttachDomainName(t0, true, canBeWhitespaces);

            if (dom != null)
            {
                if (dom.Value.Length < 3)
                {
                    return(null);
                }
            }
            char openChar = (char)0;

            Pullenti.Ner.Token t = t0;
            if (dom != null)
            {
                t = dom.EndToken.Next;
            }
            for (; t != null; t = t.Next)
            {
                if (t != t0 && t.IsWhitespaceBefore)
                {
                    if (t.IsNewlineBefore || !canBeWhitespaces)
                    {
                        break;
                    }
                    if (dom == null)
                    {
                        break;
                    }
                    if (t.Previous.IsHiphen)
                    {
                    }
                    else if (t.Previous.IsCharOf(",;"))
                    {
                        break;
                    }
                    else if (t.Previous.IsChar('.') && t.Chars.IsLetter && t.LengthChar == 2)
                    {
                    }
                    else
                    {
                        bool ok = false;
                        Pullenti.Ner.Token tt1 = t;
                        if (t.IsCharOf("\\/"))
                        {
                            tt1 = t.Next;
                        }
                        Pullenti.Ner.Token tt0 = tt1;
                        for (; tt1 != null; tt1 = tt1.Next)
                        {
                            if (tt1 != tt0 && tt1.IsWhitespaceBefore)
                            {
                                break;
                            }
                            if (tt1 is Pullenti.Ner.NumberToken)
                            {
                                continue;
                            }
                            if (!(tt1 is Pullenti.Ner.TextToken))
                            {
                                break;
                            }
                            string term1 = (tt1 as Pullenti.Ner.TextToken).Term;
                            if (((term1 == "HTM" || term1 == "HTML" || term1 == "SHTML") || term1 == "ASP" || term1 == "ASPX") || term1 == "JSP")
                            {
                                ok = true;
                                break;
                            }
                            if (!tt1.Chars.IsLetter)
                            {
                                if (tt1.IsCharOf("\\/"))
                                {
                                    ok = true;
                                    break;
                                }
                                if (!tt1.IsCharOf(chars))
                                {
                                    break;
                                }
                            }
                            else if (!tt1.Chars.IsLatinLetter)
                            {
                                break;
                            }
                        }
                        if (!ok)
                        {
                            break;
                        }
                    }
                }
                if (t is Pullenti.Ner.NumberToken)
                {
                    Pullenti.Ner.NumberToken nt = t as Pullenti.Ner.NumberToken;
                    txt.Append(nt.GetSourceText());
                    t1 = t;
                    continue;
                }
                Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
                if (tt == null)
                {
                    Pullenti.Ner.ReferentToken rt = t as Pullenti.Ner.ReferentToken;
                    if (rt != null && rt.BeginToken.IsValue("РФ", null))
                    {
                        if (txt.Length > 0 && txt[txt.Length - 1] == '.')
                        {
                            txt.Append(rt.BeginToken.GetSourceText());
                            t1 = t;
                            continue;
                        }
                    }
                    if (rt != null && rt.Chars.IsLatinLetter && rt.BeginToken == rt.EndToken)
                    {
                        txt.Append(rt.BeginToken.GetSourceText());
                        t1 = t;
                        continue;
                    }
                    break;
                }
                string src = tt.GetSourceText();
                char   ch  = src[0];
                if (!char.IsLetter(ch))
                {
                    if (chars.IndexOf(ch) < 0)
                    {
                        break;
                    }
                    if (ch == '(' || ch == '[')
                    {
                        openChar = ch;
                    }
                    else if (ch == ')')
                    {
                        if (openChar != '(')
                        {
                            break;
                        }
                        openChar = (char)0;
                    }
                    else if (ch == ']')
                    {
                        if (openChar != '[')
                        {
                            break;
                        }
                        openChar = (char)0;
                    }
                }
                txt.Append(src);
                t1 = t;
            }
            if (txt.Length == 0)
            {
                return(dom);
            }
            int i;

            for (i = 0; i < txt.Length; i++)
            {
                if (char.IsLetterOrDigit(txt[i]))
                {
                    break;
                }
            }
            if (i >= txt.Length)
            {
                return(dom);
            }
            if (txt[txt.Length - 1] == '.' || txt[txt.Length - 1] == '/')
            {
                txt.Length--;
                t1 = t1.Previous;
            }
            if (dom != null)
            {
                txt.Insert(0, dom.Value);
            }
            string tmp = txt.ToString();

            if (tmp.StartsWith("\\\\"))
            {
                txt.Replace("\\\\", "//");
                tmp = txt.ToString();
            }
            if (tmp.StartsWith("//"))
            {
                tmp = tmp.Substring(2);
            }
            if (string.Compare(tmp, "WWW", true) == 0)
            {
                return(null);
            }
            UriItemToken res = new UriItemToken(t0, t1)
            {
                Value = txt.ToString()
            };

            return(res);
        }
예제 #17
0
        public static OrgItemEponymToken TryAttach(Pullenti.Ner.Token t, bool mustHasPrefix = false)
        {
            Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
            if (tt == null)
            {
                if (t == null)
                {
                    return(null);
                }
                Pullenti.Ner.Referent r1 = t.GetReferent();
                if (r1 != null && r1.TypeName == "DATE")
                {
                    string str = r1.ToString().ToUpper();
                    if ((str == "1 МАЯ" || str == "7 ОКТЯБРЯ" || str == "9 МАЯ") || str == "8 МАРТА")
                    {
                        OrgItemEponymToken dt = new OrgItemEponymToken(t, t)
                        {
                            Eponyms = new List <string>()
                        };
                        dt.Eponyms.Add(str);
                        return(dt);
                    }
                }
                Pullenti.Ner.NumberToken age = Pullenti.Ner.Core.NumberHelper.TryParseAge(t);
                if ((age != null && (((age.EndToken.Next is Pullenti.Ner.TextToken) || (age.EndToken.Next is Pullenti.Ner.ReferentToken))) && (age.WhitespacesAfterCount < 3)) && !age.EndToken.Next.Chars.IsAllLower && age.EndToken.Next.Chars.IsCyrillicLetter)
                {
                    OrgItemEponymToken dt = new OrgItemEponymToken(t, age.EndToken.Next)
                    {
                        Eponyms = new List <string>()
                    };
                    dt.Eponyms.Add(string.Format("{0} {1}", age.Value, dt.EndToken.GetSourceText().ToUpper()));
                    return(dt);
                }
                return(null);
            }
            Pullenti.Ner.Token t1 = null;
            bool full             = false;
            bool hasName          = false;

            if (tt.Term == "ИМЕНИ" || tt.Term == "ІМЕНІ")
            {
                t1      = t.Next;
                full    = true;
                hasName = true;
            }
            else if (((tt.Term == "ИМ" || tt.Term == "ІМ")) && tt.Next != null)
            {
                if (tt.Next.IsChar('.'))
                {
                    t1   = tt.Next.Next;
                    full = true;
                }
                else if ((tt.Next is Pullenti.Ner.TextToken) && tt.Chars.IsAllLower && !tt.Next.Chars.IsAllLower)
                {
                    t1 = tt.Next;
                }
                hasName = true;
            }
            else if (tt.Previous != null && ((tt.Previous.IsValue("ФОНД", null) || tt.Previous.IsValue("ХРАМ", null) || tt.Previous.IsValue("ЦЕРКОВЬ", "ЦЕРКВА"))))
            {
                if ((!tt.Chars.IsCyrillicLetter || tt.Morph.Class.IsPreposition || tt.Morph.Class.IsConjunction) || !tt.Chars.IsLetter)
                {
                    return(null);
                }
                if (tt.WhitespacesBeforeCount != 1)
                {
                    return(null);
                }
                if (tt.Chars.IsAllLower)
                {
                    return(null);
                }
                if (tt.Morph.Class.IsAdjective)
                {
                    Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                    if (npt != null && npt.BeginToken != npt.EndToken)
                    {
                        return(null);
                    }
                }
                OrgItemNameToken na = OrgItemNameToken.TryAttach(tt, null, false, true);
                if (na != null)
                {
                    if (na.IsEmptyWord || na.IsStdName || na.IsStdTail)
                    {
                        return(null);
                    }
                }
                t1 = tt;
            }
            if (t1 == null || ((t1.IsNewlineBefore && !full)))
            {
                return(null);
            }
            if (tt.Previous != null && tt.Previous.Morph.Class.IsPreposition)
            {
                return(null);
            }
            if (mustHasPrefix && !hasName)
            {
                return(null);
            }
            Pullenti.Ner.Referent r = t1.GetReferent();
            if ((r != null && r.TypeName == "DATE" && full) && r.FindSlot("DAY", null, true) != null && r.FindSlot("YEAR", null, true) == null)
            {
                OrgItemEponymToken dt = new OrgItemEponymToken(t, t1)
                {
                    Eponyms = new List <string>()
                };
                dt.Eponyms.Add(r.ToString().ToUpper());
                return(dt);
            }
            bool holy = false;

            if ((t1.IsValue("СВЯТОЙ", null) || t1.IsValue("СВЯТИЙ", null) || t1.IsValue("СВ", null)) || t1.IsValue("СВЯТ", null))
            {
                t1   = t1.Next;
                holy = true;
                if (t1 != null && t1.IsChar('.'))
                {
                    t1 = t1.Next;
                }
            }
            if (t1 == null)
            {
                return(null);
            }
            Pullenti.Morph.MorphClass cl = t1.GetMorphClassInDictionary();
            if (cl.IsNoun || cl.IsAdjective)
            {
                Pullenti.Ner.ReferentToken rt = t1.Kit.ProcessReferent("PERSON", t1);
                if (rt != null && rt.Referent.TypeName == "PERSON" && rt.BeginToken != rt.EndToken)
                {
                    string e = rt.Referent.GetStringValue("LASTNAME");
                    if (e != null)
                    {
                        if (rt.EndToken.IsValue(e, null))
                        {
                            OrgItemEponymToken re = new OrgItemEponymToken(t, rt.EndToken);
                            re.Eponyms.Add(rt.EndToken.GetSourceText());
                            return(re);
                        }
                    }
                }
            }
            Pullenti.Ner.NumberToken nt = Pullenti.Ner.Core.NumberHelper.TryParseAnniversary(t1);
            if (nt != null && nt.Typ == Pullenti.Ner.NumberSpellingType.Age)
            {
                Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(nt.EndToken.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                if (npt != null)
                {
                    string             s   = string.Format("{0}-{1} {2}", nt.Value, (t.Kit.BaseLanguage.IsUa ? "РОКІВ" : "ЛЕТ"), Pullenti.Ner.Core.MiscHelper.GetTextValue(npt.BeginToken, npt.EndToken, Pullenti.Ner.Core.GetTextAttr.No));
                    OrgItemEponymToken res = new OrgItemEponymToken(t, npt.EndToken);
                    res.Eponyms.Add(s);
                    return(res);
                }
            }
            List <PersonItemToken> its = PersonItemToken.TryAttach(t1);

            if (its == null)
            {
                if ((t1 is Pullenti.Ner.ReferentToken) && (t1.GetReferent() is Pullenti.Ner.Geo.GeoReferent))
                {
                    string             s  = Pullenti.Ner.Core.MiscHelper.GetTextValue(t1, t1, Pullenti.Ner.Core.GetTextAttr.No);
                    OrgItemEponymToken re = new OrgItemEponymToken(t, t1);
                    re.Eponyms.Add(s);
                    return(re);
                }
                return(null);
            }
            List <string> eponims = new List <string>();
            int           i       = 0;
            int           j;

            if (its[i].Typ == PersonItemType.LocaseWord)
            {
                i++;
            }
            if (i >= its.Count)
            {
                return(null);
            }
            if (!full)
            {
                if (its[i].BeginToken.Morph.Class.IsAdjective && !its[i].BeginToken.Morph.Class.IsProperSurname)
                {
                    return(null);
                }
            }
            if (its[i].Typ == PersonItemType.Initial)
            {
                i++;
                while (true)
                {
                    if ((i < its.Count) && its[i].Typ == PersonItemType.Initial)
                    {
                        i++;
                    }
                    if (i >= its.Count || ((its[i].Typ != PersonItemType.Surname && its[i].Typ != PersonItemType.Name)))
                    {
                        break;
                    }
                    eponims.Add(its[i].Value);
                    t1 = its[i].EndToken;
                    if ((i + 2) >= its.Count || its[i + 1].Typ != PersonItemType.And || its[i + 2].Typ != PersonItemType.Initial)
                    {
                        break;
                    }
                    i += 3;
                }
            }
            else if (((i + 1) < its.Count) && its[i].Typ == PersonItemType.Name && its[i + 1].Typ == PersonItemType.Surname)
            {
                eponims.Add(its[i + 1].Value);
                t1 = its[i + 1].EndToken;
                i += 2;
                if ((((i + 2) < its.Count) && its[i].Typ == PersonItemType.And && its[i + 1].Typ == PersonItemType.Name) && its[i + 2].Typ == PersonItemType.Surname)
                {
                    eponims.Add(its[i + 2].Value);
                    t1 = its[i + 2].EndToken;
                }
            }
            else if (its[i].Typ == PersonItemType.Surname)
            {
                if (its.Count == (i + 2) && its[i].Chars == its[i + 1].Chars)
                {
                    its[i].Value   += (" " + its[i + 1].Value);
                    its[i].EndToken = its[i + 1].EndToken;
                    its.RemoveAt(i + 1);
                }
                eponims.Add(its[i].Value);
                if (((i + 1) < its.Count) && its[i + 1].Typ == PersonItemType.Name)
                {
                    if ((i + 2) == its.Count)
                    {
                        i++;
                    }
                    else if (its[i + 2].Typ != PersonItemType.Surname)
                    {
                        i++;
                    }
                }
                else if (((i + 1) < its.Count) && its[i + 1].Typ == PersonItemType.Initial)
                {
                    if ((i + 2) == its.Count)
                    {
                        i++;
                    }
                    else if (its[i + 2].Typ == PersonItemType.Initial && (i + 3) == its.Count)
                    {
                        i += 2;
                    }
                }
                else if (((i + 2) < its.Count) && its[i + 1].Typ == PersonItemType.And && its[i + 2].Typ == PersonItemType.Surname)
                {
                    bool ok = true;
                    Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(its[i + 2].BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                    if (npt != null && !npt.Morph.Case.IsGenitive && !npt.Morph.Case.IsUndefined)
                    {
                        ok = false;
                    }
                    if (ok)
                    {
                        eponims.Add(its[i + 2].Value);
                        i += 2;
                    }
                }
                t1 = its[i].EndToken;
            }
            else if (its[i].Typ == PersonItemType.Name && holy)
            {
                t1 = its[i].EndToken;
                bool sec = false;
                if (((i + 1) < its.Count) && its[i].Chars == its[i + 1].Chars && its[i + 1].Typ != PersonItemType.Initial)
                {
                    sec = true;
                    t1  = its[i + 1].EndToken;
                }
                if (sec)
                {
                    eponims.Add(string.Format("СВЯТ.{0} {1}", its[i].Value, its[i + 1].Value));
                }
                else
                {
                    eponims.Add(string.Format("СВЯТ.{0}", its[i].Value));
                }
            }
            else if (full && (i + 1) == its.Count && ((its[i].Typ == PersonItemType.Name || its[i].Typ == PersonItemType.Surname)))
            {
                t1 = its[i].EndToken;
                eponims.Add(its[i].Value);
            }
            else if ((its[i].Typ == PersonItemType.Name && its.Count == 3 && its[i + 1].Typ == PersonItemType.Name) && its[i + 2].Typ == PersonItemType.Surname)
            {
                t1 = its[i + 2].EndToken;
                eponims.Add(string.Format("{0} {1} {2}", its[i].Value, its[i + 1].Value, its[i + 2].Value));
                i += 2;
            }
            if (eponims.Count == 0)
            {
                return(null);
            }
            return(new OrgItemEponymToken(t, t1)
            {
                Eponyms = eponims
            });
        }
예제 #18
0
        public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
        {
            List <Pullenti.Ner.Mail.Internal.MailLine> lines = new List <Pullenti.Ner.Mail.Internal.MailLine>();

            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
            {
                Pullenti.Ner.Mail.Internal.MailLine ml = Pullenti.Ner.Mail.Internal.MailLine.Parse(t, 0, 0);
                if (ml == null)
                {
                    continue;
                }
                if (lines.Count == 91)
                {
                }
                lines.Add(ml);
                t = ml.EndToken;
            }
            if (lines.Count == 0)
            {
                return;
            }
            int i;
            List <List <Pullenti.Ner.Mail.Internal.MailLine> > blocks = new List <List <Pullenti.Ner.Mail.Internal.MailLine> >();
            List <Pullenti.Ner.Mail.Internal.MailLine>         blk    = null;

            for (i = 0; i < lines.Count; i++)
            {
                Pullenti.Ner.Mail.Internal.MailLine ml = lines[i];
                if (ml.Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From)
                {
                    bool isNew = ml.MustBeFirstLine || i == 0;
                    if (((i + 2) < lines.Count) && (((lines[i + 1].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From || lines[i + 2].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From || lines[i + 1].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.Hello) || lines[i + 2].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.Hello)))
                    {
                        isNew = true;
                    }
                    if (!isNew)
                    {
                        for (int j = i - 1; j >= 0; j--)
                        {
                            if (lines[j].Typ != Pullenti.Ner.Mail.Internal.MailLine.Types.Undefined)
                            {
                                if (lines[j].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.BestRegards)
                                {
                                    isNew = true;
                                }
                                break;
                            }
                        }
                    }
                    if (!isNew)
                    {
                        for (Pullenti.Ner.Token tt = ml.BeginToken; tt != null && tt.EndChar <= ml.EndChar; tt = tt.Next)
                        {
                            if (tt.GetReferent() != null)
                            {
                                if (tt.GetReferent().TypeName == "DATE" || tt.GetReferent().TypeName == "URI")
                                {
                                    isNew = true;
                                }
                            }
                        }
                    }
                    if (isNew)
                    {
                        blk = new List <Pullenti.Ner.Mail.Internal.MailLine>();
                        blocks.Add(blk);
                        for (; i < lines.Count; i++)
                        {
                            if (lines[i].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From)
                            {
                                if (blk.Count > 0 && lines[i].MustBeFirstLine)
                                {
                                    break;
                                }
                                blk.Add(lines[i]);
                            }
                            else if (((i + 1) < lines.Count) && lines[i + 1].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From)
                            {
                                int j;
                                for (j = 0; j < blk.Count; j++)
                                {
                                    if (blk[j].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From)
                                    {
                                        if (blk[j].IsRealFrom || blk[j].MustBeFirstLine || blk[j].MailAddr != null)
                                        {
                                            break;
                                        }
                                    }
                                }
                                if (j >= blk.Count)
                                {
                                    blk.Add(lines[i]);
                                    continue;
                                }
                                bool ok = false;
                                for (j = i + 1; j < lines.Count; j++)
                                {
                                    if (lines[j].Typ != Pullenti.Ner.Mail.Internal.MailLine.Types.From)
                                    {
                                        break;
                                    }
                                    if (lines[j].IsRealFrom || lines[j].MustBeFirstLine)
                                    {
                                        ok = true;
                                        break;
                                    }
                                    if (lines[j].MailAddr != null)
                                    {
                                        ok = true;
                                        break;
                                    }
                                }
                                if (ok)
                                {
                                    break;
                                }
                                blk.Add(lines[i]);
                            }
                            else
                            {
                                break;
                            }
                        }
                        i--;
                        continue;
                    }
                }
                if (blk == null)
                {
                    blocks.Add((blk = new List <Pullenti.Ner.Mail.Internal.MailLine>()));
                }
                blk.Add(lines[i]);
            }
            if (blocks.Count == 0)
            {
                return;
            }
            Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this);
            for (int j = 0; j < blocks.Count; j++)
            {
                lines = blocks[j];
                if (lines.Count == 0)
                {
                    continue;
                }
                i = 0;
                if (lines[0].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From)
                {
                    Pullenti.Ner.Token t1 = lines[0].EndToken;
                    for (; i < lines.Count; i++)
                    {
                        if (lines[i].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From)
                        {
                            t1 = lines[i].EndToken;
                        }
                        else if (((i + 1) < lines.Count) && lines[i + 1].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From)
                        {
                        }
                        else
                        {
                            break;
                        }
                    }
                    MailReferent mail = new MailReferent()
                    {
                        Kind = MailKind.Head
                    };
                    Pullenti.Ner.ReferentToken mt = new Pullenti.Ner.ReferentToken(mail, lines[0].BeginToken, t1);
                    mail.Text = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(mt, Pullenti.Ner.Core.GetTextAttr.KeepRegister);
                    ad.RegisterReferent(mail);
                    mail.AddOccurenceOfRefTok(mt);
                }
                int i0 = i;
                Pullenti.Ner.Token t2 = null;
                int err = 0;
                for (i = lines.Count - 1; i >= i0; i--)
                {
                    Pullenti.Ner.Mail.Internal.MailLine li = lines[i];
                    if (li.Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.BestRegards)
                    {
                        t2 = lines[i].BeginToken;
                        for (--i; i >= i0; i--)
                        {
                            if (lines[i].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.BestRegards && (lines[i].Words < 2))
                            {
                                t2 = lines[i].BeginToken;
                            }
                            else if ((i > i0 && (lines[i].Words < 3) && lines[i - 1].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.BestRegards) && (lines[i - 1].Words < 2))
                            {
                                i--;
                                t2 = lines[i].BeginToken;
                            }
                            else
                            {
                                break;
                            }
                        }
                        break;
                    }
                    if (li.Refs.Count > 0 && (li.Words < 3) && i > i0)
                    {
                        err = 0;
                        t2  = li.BeginToken;
                        continue;
                    }
                    if (li.Words > 10)
                    {
                        t2 = null;
                        continue;
                    }
                    if (li.Words > 2)
                    {
                        if ((++err) > 2)
                        {
                            t2 = null;
                        }
                    }
                }
                if (t2 == null)
                {
                    for (i = lines.Count - 1; i >= i0; i--)
                    {
                        Pullenti.Ner.Mail.Internal.MailLine li = lines[i];
                        if (li.Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.Undefined)
                        {
                            if (li.Refs.Count > 0 && (li.Refs[0] is Pullenti.Ner.Person.PersonReferent))
                            {
                                if (li.Words == 0 && i > i0)
                                {
                                    t2 = li.BeginToken;
                                    break;
                                }
                            }
                        }
                    }
                }
                for (int ii = i0; ii < lines.Count; ii++)
                {
                    if (lines[ii].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.Hello)
                    {
                        MailReferent mail = new MailReferent()
                        {
                            Kind = MailKind.Hello
                        };
                        Pullenti.Ner.ReferentToken mt = new Pullenti.Ner.ReferentToken(mail, lines[i0].BeginToken, lines[ii].EndToken);
                        if (mt.LengthChar > 0)
                        {
                            mail.Text = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(mt, Pullenti.Ner.Core.GetTextAttr.KeepRegister);
                            ad.RegisterReferent(mail);
                            mail.AddOccurenceOfRefTok(mt);
                            i0 = ii + 1;
                        }
                        break;
                    }
                    else if (lines[ii].Typ != Pullenti.Ner.Mail.Internal.MailLine.Types.Undefined || lines[ii].Words > 0 || lines[ii].Refs.Count > 0)
                    {
                        break;
                    }
                }
                if (i0 < lines.Count)
                {
                    if (t2 != null && t2.Previous == null)
                    {
                    }
                    else
                    {
                        MailReferent mail = new MailReferent()
                        {
                            Kind = MailKind.Body
                        };
                        Pullenti.Ner.ReferentToken mt = new Pullenti.Ner.ReferentToken(mail, lines[i0].BeginToken, (t2 != null && t2.Previous != null ? t2.Previous : lines[lines.Count - 1].EndToken));
                        if (mt.LengthChar > 0)
                        {
                            mail.Text = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(mt, Pullenti.Ner.Core.GetTextAttr.KeepRegister);
                            ad.RegisterReferent(mail);
                            mail.AddOccurenceOfRefTok(mt);
                        }
                    }
                    if (t2 != null)
                    {
                        MailReferent mail = new MailReferent()
                        {
                            Kind = MailKind.Tail
                        };
                        Pullenti.Ner.ReferentToken mt = new Pullenti.Ner.ReferentToken(mail, t2, lines[lines.Count - 1].EndToken);
                        if (mt.LengthChar > 0)
                        {
                            mail.Text = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(mt, Pullenti.Ner.Core.GetTextAttr.KeepRegister);
                            ad.RegisterReferent(mail);
                            mail.AddOccurenceOfRefTok(mt);
                        }
                        for (i = i0; i < lines.Count; i++)
                        {
                            if (lines[i].BeginChar >= t2.BeginChar)
                            {
                                foreach (Pullenti.Ner.Referent r in lines[i].Refs)
                                {
                                    mail.AddRef(r, 0);
                                }
                            }
                        }
                    }
                }
            }
        }
예제 #19
0
 public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
 {
     Pullenti.Ner.Core.AnalyzerData     ad       = kit.GetAnalyzerData(this);
     Pullenti.Ner.Core.TerminCollection addunits = null;
     if (kit.Ontology != null)
     {
         addunits = new Pullenti.Ner.Core.TerminCollection();
         foreach (Pullenti.Ner.ExtOntologyItem r in kit.Ontology.Items)
         {
             UnitReferent uu = r.Referent as UnitReferent;
             if (uu == null)
             {
                 continue;
             }
             if (uu.m_Unit != null)
             {
                 continue;
             }
             foreach (Pullenti.Ner.Slot s in uu.Slots)
             {
                 if (s.TypeName == UnitReferent.ATTR_NAME || s.TypeName == UnitReferent.ATTR_FULLNAME)
                 {
                     addunits.Add(new Pullenti.Ner.Core.Termin(s.Value as string)
                     {
                         Tag = uu
                     });
                 }
             }
         }
     }
     for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
     {
         Pullenti.Ner.Measure.Internal.MeasureToken mt = Pullenti.Ner.Measure.Internal.MeasureToken.TryParseMinimal(t, addunits, false);
         if (mt == null)
         {
             mt = Pullenti.Ner.Measure.Internal.MeasureToken.TryParse(t, addunits, true, false, false, false);
         }
         if (mt == null)
         {
             continue;
         }
         List <Pullenti.Ner.ReferentToken> rts = mt.CreateRefenetsTokensWithRegister(ad, true);
         if (rts == null)
         {
             continue;
         }
         for (int i = 0; i < rts.Count; i++)
         {
             Pullenti.Ner.ReferentToken rt = rts[i];
             t.Kit.EmbedToken(rt);
             t = rt;
             for (int j = i + 1; j < rts.Count; j++)
             {
                 if (rts[j].BeginToken == rt.BeginToken)
                 {
                     rts[j].BeginToken = t;
                 }
                 if (rts[j].EndToken == rt.EndToken)
                 {
                     rts[j].EndToken = t;
                 }
             }
         }
     }
     if (kit.Ontology != null)
     {
         foreach (Pullenti.Ner.Referent e in ad.Referents)
         {
             UnitReferent u = e as UnitReferent;
             if (u == null)
             {
                 continue;
             }
             foreach (Pullenti.Ner.ExtOntologyItem r in kit.Ontology.Items)
             {
                 UnitReferent uu = r.Referent as UnitReferent;
                 if (uu == null)
                 {
                     continue;
                 }
                 bool ok = false;
                 foreach (Pullenti.Ner.Slot s in uu.Slots)
                 {
                     if (s.TypeName == UnitReferent.ATTR_NAME || s.TypeName == UnitReferent.ATTR_FULLNAME)
                     {
                         if (u.FindSlot(null, s.Value, true) != null)
                         {
                             ok = true;
                             break;
                         }
                     }
                 }
                 if (ok)
                 {
                     u.OntologyItems = new List <Pullenti.Ner.ExtOntologyItem>();
                     u.OntologyItems.Add(r);
                     break;
                 }
             }
         }
     }
 }
예제 #20
0
        public static TitleItemToken TryAttach(Pullenti.Ner.Token t)
        {
            Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
            if (tt != null)
            {
                Pullenti.Ner.Token t1 = (Pullenti.Ner.Token)tt;
                if (tt.Term == "ТЕМА")
                {
                    TitleItemToken tit = TryAttach(tt.Next);
                    if (tit != null && tit.Typ == Types.Typ)
                    {
                        t1 = tit.EndToken;
                        if (t1.Next != null && t1.Next.IsChar(':'))
                        {
                            t1 = t1.Next;
                        }
                        return(new TitleItemToken(t, t1, Types.TypAndTheme)
                        {
                            Value = tit.Value
                        });
                    }
                    if (tt.Next != null && tt.Next.IsChar(':'))
                    {
                        t1 = tt.Next;
                    }
                    return(new TitleItemToken(tt, t1, Types.Theme));
                }
                if (tt.Term == "ПО" || tt.Term == "НА")
                {
                    if (tt.Next != null && tt.Next.IsValue("ТЕМА", null))
                    {
                        t1 = tt.Next;
                        if (t1.Next != null && t1.Next.IsChar(':'))
                        {
                            t1 = t1.Next;
                        }
                        return(new TitleItemToken(tt, t1, Types.Theme));
                    }
                }
                if (tt.Term == "ПЕРЕВОД" || tt.Term == "ПЕР")
                {
                    Pullenti.Ner.Token tt2 = tt.Next;
                    if (tt2 != null && tt2.IsChar('.'))
                    {
                        tt2 = tt2.Next;
                    }
                    if (tt2 is Pullenti.Ner.TextToken)
                    {
                        if ((tt2 as Pullenti.Ner.TextToken).Term == "C" || (tt2 as Pullenti.Ner.TextToken).Term == "С")
                        {
                            tt2 = tt2.Next;
                            if (tt2 is Pullenti.Ner.TextToken)
                            {
                                return(new TitleItemToken(t, tt2, Types.Translate));
                            }
                        }
                    }
                }
                if (tt.Term == "СЕКЦИЯ" || tt.Term == "SECTION" || tt.Term == "СЕКЦІЯ")
                {
                    t1 = tt.Next;
                    if (t1 != null && t1.IsChar(':'))
                    {
                        t1 = t1.Next;
                    }
                    Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t1, Pullenti.Ner.Core.BracketParseAttr.No, 100);
                    if (br != null)
                    {
                        t1 = br.EndToken;
                    }
                    else if (t1 != tt.Next)
                    {
                        for (; t1 != null; t1 = t1.Next)
                        {
                            if (t1.IsNewlineAfter)
                            {
                                break;
                            }
                        }
                        if (t1 == null)
                        {
                            return(null);
                        }
                    }
                    if (t1 != tt.Next)
                    {
                        return(new TitleItemToken(tt, t1, Types.Dust));
                    }
                }
                t1 = null;
                if (tt.IsValue("СПЕЦИАЛЬНОСТЬ", "СПЕЦІАЛЬНІСТЬ"))
                {
                    t1 = tt.Next;
                }
                else if (tt.Morph.Class.IsPreposition && tt.Next != null && tt.Next.IsValue("СПЕЦИАЛЬНОСТЬ", "СПЕЦІАЛЬНІСТЬ"))
                {
                    t1 = tt.Next.Next;
                }
                else if (tt.IsChar('/') && tt.IsNewlineBefore)
                {
                    t1 = tt.Next;
                }
                if (t1 != null)
                {
                    if (t1.IsCharOf(":") || t1.IsHiphen)
                    {
                        t1 = t1.Next;
                    }
                    TitleItemToken spec = TryAttachSpeciality(t1, true);
                    if (spec != null)
                    {
                        spec.BeginToken = t;
                        return(spec);
                    }
                }
            }
            TitleItemToken sss = TryAttachSpeciality(t, false);

            if (sss != null)
            {
                return(sss);
            }
            if (t is Pullenti.Ner.ReferentToken)
            {
                return(null);
            }
            Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
            if (npt != null)
            {
                string s = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false);
                Pullenti.Ner.Core.TerminToken tok = m_Termins.TryParse(npt.EndToken, Pullenti.Ner.Core.TerminParseAttr.No);
                if (tok != null)
                {
                    Types ty = (Types)tok.Termin.Tag;
                    if (ty == Types.Typ)
                    {
                        TitleItemToken tit = TryAttach(tok.EndToken.Next);
                        if (tit != null && tit.Typ == Types.Theme)
                        {
                            return new TitleItemToken(npt.BeginToken, tit.EndToken, Types.TypAndTheme)
                                   {
                                       Value = s
                                   }
                        }
                        ;
                        if (s == "РАБОТА" || s == "РОБОТА" || s == "ПРОЕКТ")
                        {
                            return(null);
                        }
                        Pullenti.Ner.Token t1 = tok.EndToken;
                        if (s == "ДИССЕРТАЦИЯ" || s == "ДИСЕРТАЦІЯ")
                        {
                            int err = 0;
                            for (Pullenti.Ner.Token ttt = t1.Next; ttt != null; ttt = ttt.Next)
                            {
                                if (ttt.Morph.Class.IsPreposition)
                                {
                                    continue;
                                }
                                if (ttt.IsValue("СОИСКАНИЕ", ""))
                                {
                                    continue;
                                }
                                Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(ttt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                                if (npt1 != null && npt1.Noun.IsValue("СТЕПЕНЬ", "СТУПІНЬ"))
                                {
                                    t1 = (ttt = npt1.EndToken);
                                    continue;
                                }
                                Pullenti.Ner.ReferentToken rt = t1.Kit.ProcessReferent("PERSON", ttt);
                                if (rt != null && (rt.Referent is Pullenti.Ner.Person.PersonPropertyReferent))
                                {
                                    Pullenti.Ner.Person.PersonPropertyReferent ppr = rt.Referent as Pullenti.Ner.Person.PersonPropertyReferent;
                                    if (ppr.Name == "доктор наук")
                                    {
                                        t1 = rt.EndToken;
                                        s  = "ДОКТОРСКАЯ ДИССЕРТАЦИЯ";
                                        break;
                                    }
                                    else if (ppr.Name == "кандидат наук")
                                    {
                                        t1 = rt.EndToken;
                                        s  = "КАНДИДАТСКАЯ ДИССЕРТАЦИЯ";
                                        break;
                                    }
                                    else if (ppr.Name == "магистр")
                                    {
                                        t1 = rt.EndToken;
                                        s  = "МАГИСТЕРСКАЯ ДИССЕРТАЦИЯ";
                                        break;
                                    }
                                }
                                if (ttt.IsValue("ДОКТОР", null) || ttt.IsValue("КАНДИДАТ", null) || ttt.IsValue("МАГИСТР", "МАГІСТР"))
                                {
                                    t1   = ttt;
                                    npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(ttt.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                                    if (npt1 != null && npt1.EndToken.IsValue("НАУК", null))
                                    {
                                        t1 = npt1.EndToken;
                                    }
                                    s = (ttt.IsValue("МАГИСТР", "МАГІСТР") ? "МАГИСТЕРСКАЯ ДИССЕРТАЦИЯ" : (ttt.IsValue("ДОКТОР", null) ? "ДОКТОРСКАЯ ДИССЕРТАЦИЯ" : "КАНДИДАТСКАЯ ДИССЕРТАЦИЯ"));
                                    break;
                                }
                                if ((++err) > 3)
                                {
                                    break;
                                }
                            }
                        }
                        if (t1.Next != null && t1.Next.IsChar('.'))
                        {
                            t1 = t1.Next;
                        }
                        if (s.EndsWith("ОТЧЕТ") && t1.Next != null && t1.Next.IsValue("О", null))
                        {
                            Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t1.Next, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition, 0, null);
                            if (npt1 != null && npt1.Morph.Case.IsPrepositional)
                            {
                                t1 = npt1.EndToken;
                            }
                        }
                        return(new TitleItemToken(npt.BeginToken, t1, ty)
                        {
                            Value = s
                        });
                    }
                }
            }
            Pullenti.Ner.Core.TerminToken tok1 = m_Termins.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
            if (tok1 != null)
            {
                Pullenti.Ner.Token t1 = tok1.EndToken;
                TitleItemToken     re = new TitleItemToken(tok1.BeginToken, t1, (Types)tok1.Termin.Tag);
                return(re);
            }
            if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(t, false, false))
            {
                tok1 = m_Termins.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No);
                if (tok1 != null && Pullenti.Ner.Core.BracketHelper.CanBeEndOfSequence(tok1.EndToken.Next, false, null, false))
                {
                    Pullenti.Ner.Token t1 = tok1.EndToken.Next;
                    return(new TitleItemToken(tok1.BeginToken, t1, (Types)tok1.Termin.Tag));
                }
            }
            return(null);
        }
예제 #21
0
        public static CanonicDecreeRefUri TryCreateCanonicDecreeRefUri(Pullenti.Ner.Token t)
        {
            if (!(t is Pullenti.Ner.ReferentToken))
            {
                return(null);
            }
            Pullenti.Ner.Decree.DecreeReferent dr = t.GetReferent() as Pullenti.Ner.Decree.DecreeReferent;
            CanonicDecreeRefUri res;

            if (dr != null)
            {
                if (dr.Kind == Pullenti.Ner.Decree.DecreeKind.Publisher)
                {
                    return(null);
                }
                res = new CanonicDecreeRefUri(t.Kit.Sofa.Text)
                {
                    Ref = dr, BeginChar = t.BeginChar, EndChar = t.EndChar
                };
                if ((t.Previous != null && t.Previous.IsChar('(') && t.Next != null) && t.Next.IsChar(')'))
                {
                    return(res);
                }
                if ((t as Pullenti.Ner.ReferentToken).MiscAttrs != 0)
                {
                    return(res);
                }
                Pullenti.Ner.ReferentToken rt = t as Pullenti.Ner.ReferentToken;
                if (rt.BeginToken.IsChar('(') && rt.EndToken.IsChar(')'))
                {
                    res = new CanonicDecreeRefUri(t.Kit.Sofa.Text)
                    {
                        Ref = dr, BeginChar = rt.BeginToken.Next.BeginChar, EndChar = rt.EndToken.Previous.EndChar
                    };
                    return(res);
                }
                List <DecreeToken> nextDecreeItems = null;
                if ((t.Next != null && t.Next.IsCommaAnd && (t.Next.Next is Pullenti.Ner.ReferentToken)) && (t.Next.Next.GetReferent() is Pullenti.Ner.Decree.DecreeReferent))
                {
                    nextDecreeItems = DecreeToken.TryAttachList((t.Next.Next as Pullenti.Ner.ReferentToken).BeginToken, null, 10, false);
                    if (nextDecreeItems != null && nextDecreeItems.Count > 1)
                    {
                        for (int i = 0; i < (nextDecreeItems.Count - 1); i++)
                        {
                            if (nextDecreeItems[i].IsNewlineAfter)
                            {
                                nextDecreeItems.RemoveRange(i + 1, nextDecreeItems.Count - i - 1);
                                break;
                            }
                        }
                    }
                }
                bool wasTyp = false;
                bool wasNum = false;
                for (Pullenti.Ner.Token tt = (t as Pullenti.Ner.MetaToken).BeginToken; tt != null && tt.EndChar <= t.EndChar; tt = tt.Next)
                {
                    if (tt.BeginChar == t.BeginChar && tt.IsChar('(') && tt.Next != null)
                    {
                        res.BeginChar = tt.Next.BeginChar;
                    }
                    if (tt.IsChar('(') && tt.Next != null && tt.Next.IsValue("ДАЛЕЕ", null))
                    {
                        if (res.EndChar >= tt.BeginChar)
                        {
                            res.EndChar = tt.Previous.EndChar;
                        }
                        break;
                    }
                    if (tt.EndChar == t.EndChar && tt.IsChar(')'))
                    {
                        res.EndChar = tt.Previous.EndChar;
                        for (Pullenti.Ner.Token tt1 = tt.Previous; tt1 != null && tt1.BeginChar >= res.BeginChar; tt1 = tt1.Previous)
                        {
                            if (tt1.IsChar('(') && tt1.Previous != null)
                            {
                                if (res.BeginChar < tt1.Previous.BeginChar)
                                {
                                    res.EndChar = tt1.Previous.EndChar;
                                }
                            }
                        }
                    }
                    List <DecreeToken> li = DecreeToken.TryAttachList(tt, null, 10, false);
                    if (li != null && li.Count > 0)
                    {
                        for (int ii = 0; ii < (li.Count - 1); ii++)
                        {
                            if (li[ii].Typ == DecreeToken.ItemType.Typ && li[ii + 1].Typ == DecreeToken.ItemType.Terr)
                            {
                                res.TypeWithGeo = Pullenti.Ner.Core.MiscHelper.GetTextValue(li[ii].BeginToken, li[ii + 1].EndToken, Pullenti.Ner.Core.GetTextAttr.FirstNounGroupToNominativeSingle);
                            }
                        }
                        if ((nextDecreeItems != null && nextDecreeItems.Count > 1 && (nextDecreeItems.Count < li.Count)) && nextDecreeItems[0].Typ != DecreeToken.ItemType.Typ)
                        {
                            int d = li.Count - nextDecreeItems.Count;
                            int j;
                            for (j = 0; j < nextDecreeItems.Count; j++)
                            {
                                if (nextDecreeItems[j].Typ != li[d + j].Typ)
                                {
                                    break;
                                }
                            }
                            if (j >= nextDecreeItems.Count)
                            {
                                li.RemoveRange(0, d);
                                res.BeginChar = li[0].BeginChar;
                            }
                        }
                        else if ((nextDecreeItems != null && nextDecreeItems.Count == 1 && nextDecreeItems[0].Typ == DecreeToken.ItemType.Name) && li.Count == 2 && li[1].Typ == DecreeToken.ItemType.Name)
                        {
                            res.BeginChar = li[1].BeginChar;
                            res.EndChar   = li[1].EndChar;
                            break;
                        }
                        else if ((nextDecreeItems != null && nextDecreeItems.Count == 1 && nextDecreeItems[0].Typ == DecreeToken.ItemType.Number) && li[li.Count - 1].Typ == DecreeToken.ItemType.Number)
                        {
                            res.BeginChar = li[li.Count - 1].BeginChar;
                            res.EndChar   = li[li.Count - 1].EndChar;
                        }
                        for (int i = 0; i < li.Count; i++)
                        {
                            DecreeToken l = li[i];
                            if (l.BeginChar > t.EndChar)
                            {
                                li.RemoveRange(i, li.Count - i);
                                break;
                            }
                            if (l.Typ == DecreeToken.ItemType.Name)
                            {
                                if (!wasNum)
                                {
                                    if (dr.Kind == Pullenti.Ner.Decree.DecreeKind.Contract)
                                    {
                                        continue;
                                    }
                                    if (((i + 1) < li.Count) && ((li[i + 1].Typ == DecreeToken.ItemType.Date || li[i + 1].Typ == DecreeToken.ItemType.Number)))
                                    {
                                        continue;
                                    }
                                }
                                int ee = l.BeginToken.Previous.EndChar;
                                if (ee > res.BeginChar && (ee < res.EndChar))
                                {
                                    res.EndChar = ee;
                                }
                                break;
                            }
                            if (l.Typ == DecreeToken.ItemType.Number)
                            {
                                wasNum = true;
                            }
                            if (i == 0)
                            {
                                if (l.Typ == DecreeToken.ItemType.Typ)
                                {
                                    wasTyp = true;
                                }
                                else if (l.Typ == DecreeToken.ItemType.Owner || l.Typ == DecreeToken.ItemType.Org)
                                {
                                    if (((i + 1) < li.Count) && ((li[1].Typ == DecreeToken.ItemType.Date || li[1].Typ == DecreeToken.ItemType.Number)))
                                    {
                                        wasTyp = true;
                                    }
                                }
                                if (wasTyp)
                                {
                                    Pullenti.Ner.Token tt0 = l.BeginToken.Previous;
                                    if (tt0 != null && tt0.IsChar('.'))
                                    {
                                        tt0 = tt0.Previous;
                                    }
                                    if (tt0 != null && ((tt0.IsValue("УТВЕРЖДЕННЫЙ", null) || tt0.IsValue("УТВЕРДИТЬ", null) || tt0.IsValue("УТВ", null))))
                                    {
                                        if (l.BeginChar > res.BeginChar)
                                        {
                                            res.BeginChar = l.BeginChar;
                                            if (res.EndChar < res.BeginChar)
                                            {
                                                res.EndChar = t.EndChar;
                                            }
                                            res.IsAdopted = true;
                                        }
                                    }
                                }
                            }
                        }
                        if (li.Count > 0)
                        {
                            tt = li[li.Count - 1].EndToken;
                            if (tt.IsChar(')'))
                            {
                                tt = tt.Previous;
                            }
                            continue;
                        }
                    }
                    if (wasTyp)
                    {
                        DecreeToken na = DecreeToken.TryAttachName(tt, dr.Typ0, true, false);
                        if (na != null && tt.BeginChar > t.BeginChar)
                        {
                            Pullenti.Ner.Token tt1 = na.EndToken.Next;
                            if (tt1 != null && tt1.IsCharOf(",()"))
                            {
                                tt1 = tt1.Next;
                            }
                            if (tt1 != null && (tt1.EndChar < t.EndChar))
                            {
                                if (tt1.IsValue("УТВЕРЖДЕННЫЙ", null) || tt1.IsValue("УТВЕРДИТЬ", null) || tt1.IsValue("УТВ", null))
                                {
                                    tt = tt1;
                                    continue;
                                }
                            }
                            if (tt.Previous != null && tt.Previous.IsChar(':') && na.EndChar <= res.EndChar)
                            {
                                res.BeginChar = tt.BeginChar;
                                break;
                            }
                            if (tt.Previous.EndChar > res.BeginChar)
                            {
                                res.EndChar = tt.Previous.EndChar;
                                break;
                            }
                        }
                    }
                }
                return(res);
            }
            Pullenti.Ner.Decree.DecreePartReferent dpr = t.GetReferent() as Pullenti.Ner.Decree.DecreePartReferent;
            if (dpr == null)
            {
                return(null);
            }
            if ((t.Previous != null && t.Previous.IsHiphen && (t.Previous.Previous is Pullenti.Ner.ReferentToken)) && (t.Previous.Previous.GetReferent() is Pullenti.Ner.Decree.DecreePartReferent))
            {
                if (Pullenti.Ner.Decree.DecreePartReferent.CreateRangeReferent(t.Previous.Previous.GetReferent() as Pullenti.Ner.Decree.DecreePartReferent, dpr) != null)
                {
                    return(null);
                }
            }
            Pullenti.Ner.Token t1 = t;
            bool hasDiap          = false;

            Pullenti.Ner.ReferentToken DiapRef = null;
            if ((t.Next != null && t.Next.IsHiphen && (t.Next.Next is Pullenti.Ner.ReferentToken)) && (t.Next.Next.GetReferent() is Pullenti.Ner.Decree.DecreePartReferent))
            {
                Pullenti.Ner.Decree.DecreePartReferent diap = Pullenti.Ner.Decree.DecreePartReferent.CreateRangeReferent(dpr as Pullenti.Ner.Decree.DecreePartReferent, t.Next.Next.GetReferent() as Pullenti.Ner.Decree.DecreePartReferent);
                if (diap != null)
                {
                    dpr     = diap;
                    hasDiap = true;
                    t1      = t.Next.Next;
                    DiapRef = t1 as Pullenti.Ner.ReferentToken;
                }
            }
            res = new CanonicDecreeRefUri(t.Kit.Sofa.Text)
            {
                Ref = dpr, BeginChar = t.BeginChar, EndChar = t1.EndChar, IsDiap = hasDiap
            };
            if ((t.Previous != null && t.Previous.IsChar('(') && t1.Next != null) && t1.Next.IsChar(')'))
            {
                return(res);
            }
            for (Pullenti.Ner.Token tt = (t as Pullenti.Ner.MetaToken).BeginToken; tt != null && tt.EndChar <= t.EndChar; tt = tt.Next)
            {
                if (tt.GetReferent() is Pullenti.Ner.Decree.DecreeReferent)
                {
                    if (tt.BeginChar > t.BeginChar)
                    {
                        res.EndChar = tt.Previous.EndChar;
                        if (tt.Previous.Morph.Class.IsPreposition && tt.Previous.Previous != null)
                        {
                            res.EndChar = tt.Previous.Previous.EndChar;
                        }
                    }
                    else if (tt.EndChar < t.EndChar)
                    {
                        res.BeginChar = tt.BeginChar;
                    }
                    break;
                }
            }
            bool hasSameBefore = _hasSameDecree(t, dpr, true);
            bool hasSameAfter  = _hasSameDecree(t, dpr, false);

            PartToken.ItemType ptmin  = PartToken.ItemType.Prefix;
            PartToken.ItemType ptmin2 = PartToken.ItemType.Prefix;
            int max  = 0;
            int max2 = 00;

            foreach (Pullenti.Ner.Slot s in dpr.Slots)
            {
                PartToken.ItemType pt = PartToken._getTypeByAttrName(s.TypeName);
                if (pt == PartToken.ItemType.Prefix)
                {
                    continue;
                }
                int co = PartToken._getRank(pt);
                if (co < 1)
                {
                    if (pt == PartToken.ItemType.Part && dpr.FindSlot(Pullenti.Ner.Decree.DecreePartReferent.ATTR_CLAUSE, null, true) != null)
                    {
                        co = PartToken._getRank(PartToken.ItemType.Paragraph);
                    }
                    else
                    {
                        continue;
                    }
                }
                if (co > max)
                {
                    max2   = max;
                    ptmin2 = ptmin;
                    max    = co;
                    ptmin  = pt;
                }
                else if (co > max2)
                {
                    max2   = co;
                    ptmin2 = pt;
                }
            }
            if (ptmin != PartToken.ItemType.Prefix)
            {
                for (Pullenti.Ner.Token tt = (t as Pullenti.Ner.MetaToken).BeginToken; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next)
                {
                    if (tt.BeginChar >= res.BeginChar)
                    {
                        PartToken pt = PartToken.TryAttach(tt, null, false, false);
                        if (pt != null && pt.Typ == ptmin)
                        {
                            res.BeginChar = pt.BeginChar;
                            res.EndChar   = pt.EndChar;
                            if (pt.Typ == PartToken.ItemType.Appendix && pt.EndToken.IsValue("К", null) && pt.BeginToken != pt.EndToken)
                            {
                                res.EndChar = pt.EndToken.Previous.EndChar;
                            }
                            if (pt.EndChar == t.EndChar)
                            {
                                if ((t.Next != null && t.Next.IsCommaAnd && (t.Next.Next is Pullenti.Ner.ReferentToken)) && (t.Next.Next.GetReferent() is Pullenti.Ner.Decree.DecreePartReferent))
                                {
                                    Pullenti.Ner.Token tt1 = (t.Next.Next as Pullenti.Ner.ReferentToken).BeginToken;
                                    bool ok = true;
                                    if (tt1.Chars.IsLetter)
                                    {
                                        ok = false;
                                    }
                                    if (ok)
                                    {
                                        foreach (PartToken.PartValue v in pt.Values)
                                        {
                                            res.BeginChar = v.BeginChar;
                                            res.EndChar   = v.EndChar;
                                            break;
                                        }
                                    }
                                }
                            }
                            if (!hasDiap)
                            {
                                return(res);
                            }
                            break;
                        }
                    }
                }
                if (hasDiap && DiapRef != null)
                {
                    for (Pullenti.Ner.Token tt = DiapRef.BeginToken; tt != null && tt.EndChar <= DiapRef.EndChar; tt = tt.Next)
                    {
                        if (tt.IsChar(','))
                        {
                            break;
                        }
                        if (tt != DiapRef.BeginToken && tt.IsWhitespaceBefore)
                        {
                            break;
                        }
                        res.EndChar = tt.EndChar;
                    }
                    return(res);
                }
            }
            if (((hasSameBefore || hasSameAfter)) && ptmin != PartToken.ItemType.Prefix)
            {
                for (Pullenti.Ner.Token tt = (t as Pullenti.Ner.MetaToken).BeginToken; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next)
                {
                    if (tt.BeginChar >= res.BeginChar)
                    {
                        PartToken pt = (!hasSameBefore ? PartToken.TryAttach(tt, null, false, false) : null);
                        if (pt != null)
                        {
                            if (pt.Typ == ptmin)
                            {
                                foreach (PartToken.PartValue v in pt.Values)
                                {
                                    res.BeginChar = v.BeginChar;
                                    res.EndChar   = v.EndChar;
                                    return(res);
                                }
                            }
                            tt = pt.EndToken;
                            continue;
                        }
                        if ((tt is Pullenti.Ner.NumberToken) && tt.BeginChar == res.BeginChar)
                        {
                            res.EndChar = tt.EndChar;
                            for (; tt != null && tt.Next != null;)
                            {
                                if (!tt.Next.IsChar('.') || tt.IsWhitespaceAfter || tt.Next.IsWhitespaceAfter)
                                {
                                    break;
                                }
                                if (!(tt.Next.Next is Pullenti.Ner.NumberToken))
                                {
                                    break;
                                }
                                tt          = tt.Next.Next;
                                res.EndChar = tt.EndChar;
                            }
                            if (tt.Next != null && tt.Next.IsHiphen)
                            {
                                if (tt.Next.Next is Pullenti.Ner.NumberToken)
                                {
                                    tt          = tt.Next.Next;
                                    res.EndChar = tt.EndChar;
                                    for (; tt != null && tt.Next != null;)
                                    {
                                        if (!tt.Next.IsChar('.') || tt.IsWhitespaceAfter || tt.Next.IsWhitespaceAfter)
                                        {
                                            break;
                                        }
                                        if (!(tt.Next.Next is Pullenti.Ner.NumberToken))
                                        {
                                            break;
                                        }
                                        tt          = tt.Next.Next;
                                        res.EndChar = tt.EndChar;
                                    }
                                }
                                else if (tt.Next.Next != null && (tt.Next.Next.GetReferent() is Pullenti.Ner.Decree.DecreePartReferent) && hasDiap)
                                {
                                    res.EndChar = (tt.Next.Next as Pullenti.Ner.MetaToken).BeginToken.EndChar;
                                }
                            }
                            return(res);
                        }
                        if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(tt, true, false) && tt.BeginChar == res.BeginChar && hasSameBefore)
                        {
                            Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(tt, Pullenti.Ner.Core.BracketParseAttr.No, 100);
                            if (br != null && br.EndToken.Previous == tt.Next)
                            {
                                res.EndChar = br.EndChar;
                                return(res);
                            }
                        }
                    }
                }
                return(res);
            }
            if (!hasSameBefore && !hasSameAfter && ptmin != PartToken.ItemType.Prefix)
            {
                for (Pullenti.Ner.Token tt = (t as Pullenti.Ner.MetaToken).BeginToken; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next)
                {
                    if (tt.BeginChar >= res.BeginChar)
                    {
                        List <PartToken> pts = PartToken.TryAttachList(tt, false, 40);
                        if (pts == null || pts.Count == 0)
                        {
                            break;
                        }
                        for (int i = 0; i < pts.Count; i++)
                        {
                            if (pts[i].Typ == ptmin)
                            {
                                res.BeginChar = pts[i].BeginChar;
                                res.EndChar   = pts[i].EndChar;
                                tt            = pts[i].EndToken;
                                if (tt.Next != null && tt.Next.IsHiphen)
                                {
                                    if (tt.Next.Next is Pullenti.Ner.NumberToken)
                                    {
                                        res.EndChar = tt.Next.Next.EndChar;
                                    }
                                    else if (tt.Next.Next != null && (tt.Next.Next.GetReferent() is Pullenti.Ner.Decree.DecreePartReferent) && hasDiap)
                                    {
                                        res.EndChar = (tt.Next.Next as Pullenti.Ner.MetaToken).BeginToken.EndChar;
                                    }
                                }
                                return(res);
                            }
                        }
                    }
                }
            }
            return(res);
        }
예제 #22
0
        public static Pullenti.Ner.ReferentToken TryParse(Pullenti.Ner.Token t)
        {
            if (t == null)
            {
                return(null);
            }
            if (!(t is Pullenti.Ner.NumberToken) && t.LengthChar != 1)
            {
                return(null);
            }
            Pullenti.Ner.Core.NumberExToken nex = Pullenti.Ner.Core.NumberHelper.TryParseNumberWithPostfix(t);
            if (nex == null || nex.ExTyp != Pullenti.Ner.Core.NumberExType.Money)
            {
                if ((t is Pullenti.Ner.NumberToken) && (t.Next is Pullenti.Ner.TextToken) && (t.Next.Next is Pullenti.Ner.NumberToken))
                {
                    if (t.Next.IsHiphen || t.Next.Morph.Class.IsPreposition)
                    {
                        Pullenti.Ner.Core.NumberExToken res1 = Pullenti.Ner.Core.NumberHelper.TryParseNumberWithPostfix(t.Next.Next);
                        if (res1 != null && res1.ExTyp == Pullenti.Ner.Core.NumberExType.Money)
                        {
                            MoneyReferent res0 = new MoneyReferent();
                            if ((t.Next.IsHiphen && res1.RealValue == 0 && res1.EndToken.Next != null) && res1.EndToken.Next.IsChar('('))
                            {
                                Pullenti.Ner.Core.NumberExToken nex2 = Pullenti.Ner.Core.NumberHelper.TryParseNumberWithPostfix(res1.EndToken.Next.Next);
                                if ((nex2 != null && nex2.ExTypParam == res1.ExTypParam && nex2.EndToken.Next != null) && nex2.EndToken.Next.IsChar(')'))
                                {
                                    if (nex2.Value == (t as Pullenti.Ner.NumberToken).Value)
                                    {
                                        res0.Currency = nex2.ExTypParam;
                                        res0.AddSlot(MoneyReferent.ATTR_VALUE, nex2.Value, true, 0);
                                        return(new Pullenti.Ner.ReferentToken(res0, t, nex2.EndToken.Next));
                                    }
                                    if (t.Previous is Pullenti.Ner.NumberToken)
                                    {
                                        if (nex2.Value == ((((t.Previous as Pullenti.Ner.NumberToken).RealValue * 1000) + (t as Pullenti.Ner.NumberToken).Value)))
                                        {
                                            res0.Currency = nex2.ExTypParam;
                                            res0.AddSlot(MoneyReferent.ATTR_VALUE, nex2.Value, true, 0);
                                            return(new Pullenti.Ner.ReferentToken(res0, t.Previous, nex2.EndToken.Next));
                                        }
                                        else if (t.Previous.Previous is Pullenti.Ner.NumberToken)
                                        {
                                            if (nex2.RealValue == ((((t.Previous.Previous as Pullenti.Ner.NumberToken).RealValue * 1000000) + ((t.Previous as Pullenti.Ner.NumberToken).RealValue * 1000) + (t as Pullenti.Ner.NumberToken).RealValue)))
                                            {
                                                res0.Currency = nex2.ExTypParam;
                                                res0.AddSlot(MoneyReferent.ATTR_VALUE, nex2.Value, true, 0);
                                                return(new Pullenti.Ner.ReferentToken(res0, t.Previous.Previous, nex2.EndToken.Next));
                                            }
                                        }
                                    }
                                }
                            }
                            res0.Currency = res1.ExTypParam;
                            res0.AddSlot(MoneyReferent.ATTR_VALUE, (t as Pullenti.Ner.NumberToken).Value, false, 0);
                            return(new Pullenti.Ner.ReferentToken(res0, t, t));
                        }
                    }
                }
                return(null);
            }
            MoneyReferent res = new MoneyReferent();

            res.Currency = nex.ExTypParam;
            string val = nex.Value;

            if (val.IndexOf('.') > 0)
            {
                val = val.Substring(0, val.IndexOf('.'));
            }
            res.AddSlot(MoneyReferent.ATTR_VALUE, val, true, 0);
            int re = (int)Math.Round(((nex.RealValue - res.Value)) * 100, 6);

            if (re != 0)
            {
                res.AddSlot(MoneyReferent.ATTR_REST, re.ToString(), true, 0);
            }
            if (nex.RealValue != nex.AltRealValue)
            {
                if (Math.Floor(res.Value) != Math.Floor(nex.AltRealValue))
                {
                    val = Pullenti.Ner.Core.NumberHelper.DoubleToString(nex.AltRealValue);
                    if (val.IndexOf('.') > 0)
                    {
                        val = val.Substring(0, val.IndexOf('.'));
                    }
                    res.AddSlot(MoneyReferent.ATTR_ALTVALUE, val, true, 0);
                }
                re = (int)Math.Round(((nex.AltRealValue - ((long)nex.AltRealValue))) * 100, 6);
                if (re != res.Rest && re != 0)
                {
                    res.AddSlot(MoneyReferent.ATTR_ALTREST, ((int)re).ToString(), true, 0);
                }
            }
            if (nex.AltRestMoney > 0)
            {
                res.AddSlot(MoneyReferent.ATTR_ALTREST, nex.AltRestMoney.ToString(), true, 0);
            }
            Pullenti.Ner.Token t1 = nex.EndToken;
            if (t1.Next != null && t1.Next.IsChar('('))
            {
                Pullenti.Ner.ReferentToken rt = TryParse(t1.Next.Next);
                if ((rt != null && rt.Referent.CanBeEquals(res, Pullenti.Ner.Core.ReferentsEqualType.WithinOneText) && rt.EndToken.Next != null) && rt.EndToken.Next.IsChar(')'))
                {
                    t1 = rt.EndToken.Next;
                }
                else
                {
                    rt = TryParse(t1.Next);
                    if (rt != null && rt.Referent.CanBeEquals(res, Pullenti.Ner.Core.ReferentsEqualType.WithinOneText))
                    {
                        t1 = rt.EndToken;
                    }
                }
            }
            if (res.AltValue != null && res.AltValue.Value > res.Value)
            {
                if (t.WhitespacesBeforeCount == 1 && (t.Previous is Pullenti.Ner.NumberToken))
                {
                    int delt = (int)((res.AltValue.Value - res.Value));
                    if ((((res.Value < 1000) && ((delt % 1000)) == 0)) || (((res.Value < 1000000) && ((delt % 1000000)) == 0)))
                    {
                        t = t.Previous;
                        res.AddSlot(MoneyReferent.ATTR_VALUE, res.GetStringValue(MoneyReferent.ATTR_ALTVALUE), true, 0);
                        res.AddSlot(MoneyReferent.ATTR_ALTVALUE, null, true, 0);
                    }
                }
            }
            return(new Pullenti.Ner.ReferentToken(res, t, t1));
        }
예제 #23
0
        internal static Pullenti.Ner.ReferentToken CreateReferentToken(Pullenti.Ner.Person.PersonReferent p, Pullenti.Ner.Token begin, Pullenti.Ner.Token end, Pullenti.Ner.MorphCollection morph, List <PersonAttrToken> attrs, Pullenti.Ner.Person.PersonAnalyzer.PersonAnalyzerData ad, bool forAttribute, bool afterBePredicate)
        {
            if (p == null)
            {
                return(null);
            }
            bool hasPrefix = false;

            if (attrs != null)
            {
                foreach (PersonAttrToken a in attrs)
                {
                    if (a.Typ == PersonAttrTerminType.BestRegards)
                    {
                        hasPrefix = true;
                    }
                    else
                    {
                        if (a.BeginChar < begin.BeginChar)
                        {
                            begin = a.BeginToken;
                            if ((a.EndToken.Next != null && a.EndToken.Next.IsChar(')') && begin.Previous != null) && begin.Previous.IsChar('('))
                            {
                                begin = begin.Previous;
                            }
                        }
                        if (a.Typ != PersonAttrTerminType.Prefix)
                        {
                            if (a.Age != null)
                            {
                                p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_AGE, a.Age, false, 0);
                            }
                            if (a.PropRef == null)
                            {
                                p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, a.Value, false, 0);
                            }
                            else
                            {
                                p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, a, false, 0);
                            }
                        }
                        else if (a.Gender == Pullenti.Morph.MorphGender.Feminie && !p.IsFemale)
                        {
                            p.IsFemale = true;
                        }
                        else if (a.Gender == Pullenti.Morph.MorphGender.Masculine && !p.IsMale)
                        {
                            p.IsMale = true;
                        }
                    }
                }
            }
            else if ((begin.Previous is Pullenti.Ner.TextToken) && (begin.WhitespacesBeforeCount < 3))
            {
                if ((begin.Previous as Pullenti.Ner.TextToken).Term == "ИП")
                {
                    PersonAttrToken a = new PersonAttrToken(begin.Previous, begin.Previous);
                    a.PropRef      = new Pullenti.Ner.Person.PersonPropertyReferent();
                    a.PropRef.Name = "индивидуальный предприниматель";
                    p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, a, false, 0);
                    begin = begin.Previous;
                }
            }
            Pullenti.Ner.MorphCollection m0 = new Pullenti.Ner.MorphCollection();
            foreach (Pullenti.Morph.MorphBaseInfo it in morph.Items)
            {
                Pullenti.Morph.MorphBaseInfo bi = new Pullenti.Morph.MorphBaseInfo();
                bi.CopyFrom(it);
                bi.Number = Pullenti.Morph.MorphNumber.Singular;
                if (bi.Gender == Pullenti.Morph.MorphGender.Undefined)
                {
                    if (p.IsMale && !p.IsFemale)
                    {
                        bi.Gender = Pullenti.Morph.MorphGender.Masculine;
                    }
                    if (!p.IsMale && p.IsFemale)
                    {
                        bi.Gender = Pullenti.Morph.MorphGender.Feminie;
                    }
                }
                m0.AddItem(bi);
            }
            morph = m0;
            if ((attrs != null && attrs.Count > 0 && !attrs[0].Morph.Case.IsUndefined) && morph.Case.IsUndefined)
            {
                morph.Case = attrs[0].Morph.Case;
                if (attrs[0].Morph.Number == Pullenti.Morph.MorphNumber.Singular)
                {
                    morph.Number = Pullenti.Morph.MorphNumber.Singular;
                }
                if (p.IsMale && !p.IsFemale)
                {
                    morph.Gender = Pullenti.Morph.MorphGender.Masculine;
                }
                else if (p.IsFemale)
                {
                    morph.Gender = Pullenti.Morph.MorphGender.Feminie;
                }
            }
            if (begin.Previous != null)
            {
                Pullenti.Ner.Token ttt = begin.Previous;
                if (ttt.IsValue("ИМЕНИ", "ІМЕНІ"))
                {
                    forAttribute = true;
                }
                else
                {
                    if (ttt.IsChar('.') && ttt.Previous != null)
                    {
                        ttt = ttt.Previous;
                    }
                    if (ttt.WhitespacesAfterCount < 3)
                    {
                        if (ttt.IsValue("ИМ", "ІМ"))
                        {
                            forAttribute = true;
                        }
                    }
                }
            }
            if (forAttribute)
            {
                return new Pullenti.Ner.ReferentToken(p, begin, end)
                       {
                           Morph = morph, MiscAttrs = (int)p.m_PersonIdentityTyp
                       }
            }
            ;
            if ((begin.Previous != null && begin.Previous.IsCommaAnd && (begin.Previous.Previous is Pullenti.Ner.ReferentToken)) && (begin.Previous.Previous.GetReferent() is Pullenti.Ner.Person.PersonReferent))
            {
                Pullenti.Ner.ReferentToken rt00 = begin.Previous.Previous as Pullenti.Ner.ReferentToken;

                for (Pullenti.Ner.Token ttt = (Pullenti.Ner.Token)rt00; ttt != null;)
                {
                    if (ttt.Previous == null || !(ttt.Previous.Previous is Pullenti.Ner.ReferentToken))
                    {
                        break;
                    }
                    if (!ttt.Previous.IsCommaAnd || !(ttt.Previous.Previous.GetReferent() is Pullenti.Ner.Person.PersonReferent))
                    {
                        break;
                    }
                    rt00 = ttt.Previous.Previous as Pullenti.Ner.ReferentToken;
                    ttt  = rt00;
                }
                if (rt00.BeginToken.GetReferent() is Pullenti.Ner.Person.PersonPropertyReferent)
                {
                    bool ok = false;
                    if ((rt00.BeginToken as Pullenti.Ner.ReferentToken).EndToken.Next != null && (rt00.BeginToken as Pullenti.Ner.ReferentToken).EndToken.Next.IsChar(':'))
                    {
                        ok = true;
                    }
                    else if (rt00.BeginToken.Morph.Number == Pullenti.Morph.MorphNumber.Plural)
                    {
                        ok = true;
                    }
                    if (ok)
                    {
                        p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, rt00.BeginToken.GetReferent(), false, 0);
                    }
                }
            }
            if (ad != null)
            {
                if (ad.OverflowLevel > 10)
                {
                    return new Pullenti.Ner.ReferentToken(p, begin, end)
                           {
                               Morph = morph, MiscAttrs = (int)p.m_PersonIdentityTyp
                           }
                }
                ;
                ad.OverflowLevel++;
            }
            List <PersonAttrToken> attrs1 = null;
            bool hasPosition = false;
            bool openBr      = false;

            for (Pullenti.Ner.Token t = end.Next; t != null; t = t.Next)
            {
                if (t.IsTableControlChar)
                {
                    break;
                }
                if (t.IsNewlineBefore)
                {
                    if (t.NewlinesBeforeCount > 2)
                    {
                        break;
                    }
                    if (attrs1 != null && attrs1.Count > 0)
                    {
                        break;
                    }
                    Pullenti.Ner.Mail.Internal.MailLine ml = Pullenti.Ner.Mail.Internal.MailLine.Parse(t, 0, 0);
                    if (ml != null && ml.Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From)
                    {
                        break;
                    }
                    if (t.Chars.IsCapitalUpper)
                    {
                        PersonAttrToken attr1 = PersonAttrToken.TryAttach(t, (ad == null ? null : ad.LocalOntology), PersonAttrToken.PersonAttrAttachAttrs.No);
                        bool            ok1   = false;
                        if (attr1 != null)
                        {
                            if (hasPrefix || attr1.IsNewlineAfter || ((attr1.EndToken.Next != null && attr1.EndToken.Next.IsTableControlChar)))
                            {
                                ok1 = true;
                            }
                            else
                            {
                                for (Pullenti.Ner.Token tt2 = t.Next; tt2 != null && tt2.EndChar <= attr1.EndChar; tt2 = tt2.Next)
                                {
                                    if (tt2.IsWhitespaceBefore)
                                    {
                                        ok1 = true;
                                    }
                                }
                            }
                        }
                        else
                        {
                            Pullenti.Ner.Token ttt = CorrectTailAttributes(p, t);
                            if (ttt != null && ttt != t)
                            {
                                end = (t = ttt);
                                continue;
                            }
                        }
                        if (!ok1)
                        {
                            break;
                        }
                    }
                }
                if (t.IsHiphen || t.IsCharOf("_>|"))
                {
                    continue;
                }
                if (t.IsValue("МОДЕЛЬ", null))
                {
                    break;
                }
                Pullenti.Ner.Token tt = CorrectTailAttributes(p, t);
                if (tt != t && tt != null)
                {
                    end = (t = tt);
                    continue;
                }
                bool isBe = false;
                if (t.IsChar('(') && t == end.Next)
                {
                    openBr = true;
                    t      = t.Next;
                    if (t == null)
                    {
                        break;
                    }
                    PersonItemToken pit1 = PersonItemToken.TryAttach(t, null, PersonItemToken.ParseAttr.No, null);
                    if ((pit1 != null && t.Chars.IsCapitalUpper && pit1.EndToken.Next != null) && (t is Pullenti.Ner.TextToken) && pit1.EndToken.Next.IsChar(')'))
                    {
                        if (pit1.Lastname != null)
                        {
                            Pullenti.Morph.MorphBaseInfo inf = new Pullenti.Morph.MorphBaseInfo()
                            {
                                Case = Pullenti.Morph.MorphCase.Nominative
                            };
                            if (p.IsMale)
                            {
                                inf.Gender |= Pullenti.Morph.MorphGender.Masculine;
                            }
                            if (p.IsFemale)
                            {
                                inf.Gender |= Pullenti.Morph.MorphGender.Feminie;
                            }
                            PersonMorphCollection sur = PersonIdentityToken.CreateLastname(pit1, inf);
                            if (sur != null)
                            {
                                p.AddFioIdentity(sur, null, null);
                                end = (t = pit1.EndToken.Next);
                                continue;
                            }
                        }
                    }
                    if ((t is Pullenti.Ner.TextToken) && t.Chars.IsLatinLetter)
                    {
                        List <PersonItemToken> pits = PersonItemToken.TryAttachList(t, null, PersonItemToken.ParseAttr.CanBeLatin, 10);
                        if (((pits != null && pits.Count >= 2 && pits.Count <= 3) && pits[0].Chars.IsLatinLetter && pits[1].Chars.IsLatinLetter) && pits[pits.Count - 1].EndToken.Next != null && pits[pits.Count - 1].EndToken.Next.IsChar(')'))
                        {
                            Pullenti.Ner.Person.PersonReferent pr2 = new Pullenti.Ner.Person.PersonReferent();
                            int cou = 0;
                            foreach (PersonItemToken pi in pits)
                            {
                                foreach (Pullenti.Ner.Slot si in p.Slots)
                                {
                                    if (si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_FIRSTNAME || si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_MIDDLENAME || si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_LASTNAME)
                                    {
                                        if (Pullenti.Ner.Core.MiscHelper.CanBeEqualCyrAndLatSS(si.Value.ToString(), pi.Value))
                                        {
                                            cou++;
                                            pr2.AddSlot(si.TypeName, pi.Value, false, 0);
                                            break;
                                        }
                                    }
                                }
                            }
                            if (cou == pits.Count)
                            {
                                foreach (Pullenti.Ner.Slot si in pr2.Slots)
                                {
                                    p.AddSlot(si.TypeName, si.Value, false, 0);
                                }
                                end = (t = pits[pits.Count - 1].EndToken.Next);
                                continue;
                            }
                        }
                    }
                }
                else if (t.IsComma)
                {
                    t = t.Next;
                    if ((t is Pullenti.Ner.TextToken) && (t as Pullenti.Ner.TextToken).IsValue("WHO", null))
                    {
                        continue;
                    }
                    if ((t is Pullenti.Ner.TextToken) && t.Chars.IsLatinLetter)
                    {
                        List <PersonItemToken> pits = PersonItemToken.TryAttachList(t, null, PersonItemToken.ParseAttr.CanBeLatin, 10);
                        if ((pits != null && pits.Count >= 2 && pits.Count <= 3) && pits[0].Chars.IsLatinLetter && pits[1].Chars.IsLatinLetter)
                        {
                            Pullenti.Ner.Person.PersonReferent pr2 = new Pullenti.Ner.Person.PersonReferent();
                            int cou = 0;
                            foreach (PersonItemToken pi in pits)
                            {
                                foreach (Pullenti.Ner.Slot si in p.Slots)
                                {
                                    if (si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_FIRSTNAME || si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_MIDDLENAME || si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_LASTNAME)
                                    {
                                        if (Pullenti.Ner.Core.MiscHelper.CanBeEqualCyrAndLatSS(si.Value.ToString(), pi.Value))
                                        {
                                            cou++;
                                            pr2.AddSlot(si.TypeName, pi.Value, false, 0);
                                            break;
                                        }
                                    }
                                }
                            }
                            if (cou == pits.Count)
                            {
                                foreach (Pullenti.Ner.Slot si in pr2.Slots)
                                {
                                    p.AddSlot(si.TypeName, si.Value, false, 0);
                                }
                                end = (t = pits[pits.Count - 1].EndToken);
                                continue;
                            }
                        }
                    }
                }
                else if ((t is Pullenti.Ner.TextToken) && (t as Pullenti.Ner.TextToken).IsVerbBe)
                {
                    t = t.Next;
                }
                else if (t.IsAnd && t.IsWhitespaceAfter && !t.IsNewlineAfter)
                {
                    if (t == end.Next)
                    {
                        break;
                    }
                    t = t.Next;
                }
                else if (t.IsHiphen && t == end.Next)
                {
                    t = t.Next;
                }
                else if (t.IsChar('.') && t == end.Next && hasPrefix)
                {
                    t = t.Next;
                }
                Pullenti.Ner.Token ttt2 = CreateNickname(p, t);
                if (ttt2 != null)
                {
                    t = (end = ttt2);
                    continue;
                }
                if (t == null)
                {
                    break;
                }
                PersonAttrToken attr = null;
                attr = PersonAttrToken.TryAttach(t, (ad == null ? null : ad.LocalOntology), PersonAttrToken.PersonAttrAttachAttrs.No);
                if (attr == null)
                {
                    if ((t != null && t.GetReferent() != null && t.GetReferent().TypeName == "GEO") && attrs1 != null && openBr)
                    {
                        continue;
                    }
                    if ((t.Chars.IsCapitalUpper && openBr && t.Next != null) && t.Next.IsChar(')'))
                    {
                        if (p.FindSlot(Pullenti.Ner.Person.PersonReferent.ATTR_LASTNAME, null, true) == null)
                        {
                            p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_LASTNAME, t.GetSourceText().ToUpper(), false, 0);
                            t   = t.Next;
                            end = t;
                        }
                    }
                    if (t != null && t.IsValue("КОТОРЫЙ", null) && t.Morph.Number == Pullenti.Morph.MorphNumber.Singular)
                    {
                        if (!p.IsFemale && t.Morph.Gender == Pullenti.Morph.MorphGender.Feminie)
                        {
                            p.IsFemale = true;
                            p.CorrectData();
                        }
                        else if (!p.IsMale && t.Morph.Gender == Pullenti.Morph.MorphGender.Masculine)
                        {
                            p.IsMale = true;
                            p.CorrectData();
                        }
                    }
                    break;
                }
                if (attr.Morph.Number == Pullenti.Morph.MorphNumber.Plural)
                {
                    break;
                }
                if (attr.Typ == PersonAttrTerminType.BestRegards)
                {
                    break;
                }
                if (attr.IsDoubt)
                {
                    if (hasPrefix)
                    {
                    }
                    else if (t.IsNewlineBefore && attr.IsNewlineAfter)
                    {
                    }
                    else if (t.Previous != null && ((t.Previous.IsHiphen || t.Previous.IsChar(':'))))
                    {
                    }
                    else
                    {
                        break;
                    }
                }
                if (!morph.Case.IsUndefined && !attr.Morph.Case.IsUndefined)
                {
                    if (((morph.Case & attr.Morph.Case)).IsUndefined && !isBe)
                    {
                        break;
                    }
                }
                if (openBr)
                {
                    if (Pullenti.Ner.Person.PersonAnalyzer.TryAttachPerson(t, ad, false, 0, true) != null)
                    {
                        break;
                    }
                }
                if (attrs1 == null)
                {
                    if (t.Previous.IsComma && t.Previous == end.Next)
                    {
                        Pullenti.Ner.Token ttt = attr.EndToken.Next;
                        if (ttt != null)
                        {
                            if (ttt.Morph.Class.IsVerb)
                            {
                                if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(begin))
                                {
                                }
                                else
                                {
                                    break;
                                }
                            }
                        }
                    }
                    attrs1 = new List <PersonAttrToken>();
                }
                attrs1.Add(attr);
                if (attr.Typ == PersonAttrTerminType.Position || attr.Typ == PersonAttrTerminType.King)
                {
                    if (!isBe)
                    {
                        hasPosition = true;
                    }
                }
                else if (attr.Typ != PersonAttrTerminType.Prefix)
                {
                    if (attr.Typ == PersonAttrTerminType.Other && attr.Age != null)
                    {
                    }
                    else
                    {
                        attrs1 = null;
                        break;
                    }
                }
                t = attr.EndToken;
            }
            if (attrs1 != null && hasPosition && attrs != null)
            {
                Pullenti.Ner.Token te1 = attrs[attrs.Count - 1].EndToken.Next;
                Pullenti.Ner.Token te2 = attrs1[0].BeginToken;
                if (te1.WhitespacesAfterCount > te2.WhitespacesBeforeCount && (te2.WhitespacesBeforeCount < 2))
                {
                }
                else if (attrs1[0].Age != null)
                {
                }
                else if (((te1.IsHiphen || te1.IsChar(':'))) && !attrs1[0].IsNewlineBefore && ((te2.Previous.IsComma || te2.Previous == end)))
                {
                }
                else
                {
                    foreach (PersonAttrToken a in attrs)
                    {
                        if (a.Typ == PersonAttrTerminType.Position)
                        {
                            Pullenti.Ner.Token te = attrs1[attrs1.Count - 1].EndToken;
                            if (te.Next != null)
                            {
                                if (!te.Next.IsChar('.'))
                                {
                                    attrs1 = null;
                                    break;
                                }
                            }
                        }
                    }
                }
            }
            if (attrs1 != null && !hasPrefix)
            {
                PersonAttrToken attr = attrs1[attrs1.Count - 1];
                bool            ok   = false;
                if (attr.EndToken.Next != null && attr.EndToken.Next.Chars.IsCapitalUpper)
                {
                    ok = true;
                }
                else
                {
                    Pullenti.Ner.ReferentToken rt = Pullenti.Ner.Person.PersonAnalyzer.TryAttachPerson(attr.BeginToken, ad, false, -1, false);
                    if (rt != null && (rt.Referent is Pullenti.Ner.Person.PersonReferent))
                    {
                        ok = true;
                    }
                }
                if (ok)
                {
                    if (attr.BeginToken.WhitespacesBeforeCount > attr.EndToken.WhitespacesAfterCount)
                    {
                        attrs1 = null;
                    }
                    else if (attr.BeginToken.WhitespacesBeforeCount == attr.EndToken.WhitespacesAfterCount)
                    {
                        Pullenti.Ner.ReferentToken rt1 = Pullenti.Ner.Person.PersonAnalyzer.TryAttachPerson(attr.BeginToken, ad, false, -1, false);
                        if (rt1 != null)
                        {
                            attrs1 = null;
                        }
                    }
                }
            }
            if (attrs1 != null)
            {
                foreach (PersonAttrToken a in attrs1)
                {
                    if (a.Typ != PersonAttrTerminType.Prefix)
                    {
                        if (a.Age != null)
                        {
                            p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_AGE, a.Age, true, 0);
                        }
                        else if (a.PropRef == null)
                        {
                            p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, a.Value, false, 0);
                        }
                        else
                        {
                            p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, a, false, 0);
                        }
                        end = a.EndToken;
                        if (a.Gender != Pullenti.Morph.MorphGender.Undefined && !p.IsFemale && !p.IsMale)
                        {
                            if (a.Gender == Pullenti.Morph.MorphGender.Masculine && !p.IsMale)
                            {
                                p.IsMale = true;
                                p.CorrectData();
                            }
                            else if (a.Gender == Pullenti.Morph.MorphGender.Feminie && !p.IsFemale)
                            {
                                p.IsFemale = true;
                                p.CorrectData();
                            }
                        }
                    }
                }
                if (openBr)
                {
                    if (end.Next != null && end.Next.IsChar(')'))
                    {
                        end = end.Next;
                    }
                }
            }
            int crlfCou = 0;

            for (Pullenti.Ner.Token t = end.Next; t != null; t = t.Next)
            {
                if (t.IsTableControlChar)
                {
                    break;
                }
                if (t.IsNewlineBefore)
                {
                    Pullenti.Ner.Mail.Internal.MailLine ml = Pullenti.Ner.Mail.Internal.MailLine.Parse(t, 0, 0);
                    if (ml != null && ml.Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From)
                    {
                        break;
                    }
                    crlfCou++;
                }
                if (t.IsCharOf(":,(") || t.IsHiphen)
                {
                    continue;
                }
                if (t.IsChar('.') && t == end.Next)
                {
                    continue;
                }
                Pullenti.Ner.Referent r = t.GetReferent();
                if (r != null)
                {
                    if (r.TypeName == "PHONE" || r.TypeName == "URI" || r.TypeName == "ADDRESS")
                    {
                        string ty = r.GetStringValue("SCHEME");
                        if (r.TypeName == "URI")
                        {
                            if ((ty != "mailto" && ty != "skype" && ty != "ICQ") && ty != "http")
                            {
                                break;
                            }
                        }
                        p.AddContact(r);
                        end     = t;
                        crlfCou = 0;
                        continue;
                    }
                }
                if (r is Pullenti.Ner.Person.PersonIdentityReferent)
                {
                    p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_IDDOC, r, false, 0);
                    end     = t;
                    crlfCou = 0;
                    continue;
                }
                if (r != null && r.TypeName == "ORGANIZATION")
                {
                    if (t.Next != null && t.Next.Morph.Class.IsVerb)
                    {
                        break;
                    }
                    if (begin.Previous != null && begin.Previous.Morph.Class.IsVerb)
                    {
                        break;
                    }
                    if (t.WhitespacesAfterCount == 1)
                    {
                        break;
                    }
                    bool exist = false;
                    foreach (Pullenti.Ner.Slot s in p.Slots)
                    {
                        if (s.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_ATTR && (s.Value is Pullenti.Ner.Person.PersonPropertyReferent))
                        {
                            Pullenti.Ner.Person.PersonPropertyReferent pr = s.Value as Pullenti.Ner.Person.PersonPropertyReferent;
                            if (pr.FindSlot(Pullenti.Ner.Person.PersonPropertyReferent.ATTR_REF, r, true) != null)
                            {
                                exist = true;
                                break;
                            }
                        }
                        else if (s.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_ATTR && (s.Value is PersonAttrToken))
                        {
                            PersonAttrToken pr = s.Value as PersonAttrToken;
                            if (pr.Referent.FindSlot(Pullenti.Ner.Person.PersonPropertyReferent.ATTR_REF, r, true) != null)
                            {
                                exist = true;
                                break;
                            }
                        }
                    }
                    if (!exist)
                    {
                        PersonAttrToken pat = new PersonAttrToken(t, t);
                        pat.PropRef = new Pullenti.Ner.Person.PersonPropertyReferent()
                        {
                            Name = "сотрудник"
                        };
                        pat.PropRef.AddSlot(Pullenti.Ner.Person.PersonPropertyReferent.ATTR_REF, r, false, 0);
                        p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, pat, false, 0);
                    }
                    continue;
                }
                if (r != null)
                {
                    break;
                }
                if (!hasPrefix || crlfCou >= 2)
                {
                    break;
                }
                Pullenti.Ner.ReferentToken rt = t.Kit.ProcessReferent("PERSON", t);
                if (rt != null)
                {
                    break;
                }
            }
            if (ad != null)
            {
                ad.OverflowLevel--;
            }
            if (begin.IsValue("НА", null) && begin.Next != null && begin.Next.IsValue("ИМЯ", null))
            {
                Pullenti.Ner.Token t0 = begin.Previous;
                if (t0 != null && t0.IsComma)
                {
                    t0 = t0.Previous;
                }
                if (t0 != null && (t0.GetReferent() is Pullenti.Ner.Person.PersonIdentityReferent))
                {
                    p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_IDDOC, t0.GetReferent(), false, 0);
                }
            }
            return(new Pullenti.Ner.ReferentToken(p, begin, end)
            {
                Morph = morph, MiscAttrs = (int)p.m_PersonIdentityTyp
            });
        }
예제 #24
0
 Pullenti.Ner.ReferentToken _TryAttach_(List<Pullenti.Ner.Phone.Internal.PhoneItemToken> pli, int ind, bool isPhoneBefore, PhoneReferent prevPhone, int lev = 0)
 {
     if (ind >= pli.Count || lev > 4) 
         return null;
     string countryCode = null;
     string cityCode = null;
     int j = ind;
     if (prevPhone != null && prevPhone.m_Template != null && pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Number) 
     {
         StringBuilder tmp = new StringBuilder();
         for (int jj = j; jj < pli.Count; jj++) 
         {
             if (pli[jj].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Number) 
                 tmp.Append(pli[jj].Value.Length);
             else if (pli[jj].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Delim) 
             {
                 if (pli[jj].Value == " ") 
                     break;
                 tmp.Append(pli[jj].Value);
                 continue;
             }
             else 
                 break;
             string templ0 = tmp.ToString();
             if (templ0 == prevPhone.m_Template) 
             {
                 if ((jj + 1) < pli.Count) 
                 {
                     if (pli[jj + 1].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Prefix && (jj + 2) == pli.Count) 
                     {
                     }
                     else 
                         pli.RemoveRange(jj + 1, pli.Count - jj - 1);
                 }
                 break;
             }
         }
     }
     if ((j < pli.Count) && pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.CountryCode) 
     {
         countryCode = pli[j].Value;
         if (countryCode != "8") 
         {
             string cc = Pullenti.Ner.Phone.Internal.PhoneHelper.GetCountryPrefix(countryCode);
             if (cc != null && (cc.Length < countryCode.Length)) 
             {
                 cityCode = countryCode.Substring(cc.Length);
                 countryCode = cc;
             }
         }
         j++;
     }
     else if ((j < pli.Count) && pli[j].CanBeCountryPrefix) 
     {
         int k = j + 1;
         if ((k < pli.Count) && pli[k].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Delim) 
             k++;
         Pullenti.Ner.ReferentToken rrt = this._TryAttach_(pli, k, isPhoneBefore, null, lev + 1);
         if (rrt != null) 
         {
             if ((((isPhoneBefore && pli[j + 1].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Delim && pli[j + 1].BeginToken.IsHiphen) && pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Number && pli[j].Value.Length == 3) && ((j + 2) < pli.Count) && pli[j + 2].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Number) && pli[j + 2].Value.Length == 3) 
             {
             }
             else 
             {
                 countryCode = pli[j].Value;
                 j++;
             }
         }
     }
     if (((j < pli.Count) && pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Number && ((pli[j].Value[0] == '8' || pli[j].Value[0] == '7'))) && countryCode == null) 
     {
         if (pli[j].Value.Length == 1) 
         {
             countryCode = pli[j].Value;
             j++;
         }
         else if (pli[j].Value.Length == 4) 
         {
             countryCode = pli[j].Value.Substring(0, 1);
             if (cityCode == null) 
                 cityCode = pli[j].Value.Substring(1);
             else 
                 cityCode += pli[j].Value.Substring(1);
             j++;
         }
         else if (pli[j].Value.Length == 11 && j == (pli.Count - 1) && isPhoneBefore) 
         {
             PhoneReferent ph0 = new PhoneReferent();
             if (pli[j].Value[0] != '8') 
                 ph0.CountryCode = pli[j].Value.Substring(0, 1);
             ph0.Number = pli[j].Value.Substring(1, 3) + pli[j].Value.Substring(4);
             return new Pullenti.Ner.ReferentToken(ph0, pli[0].BeginToken, pli[j].EndToken);
         }
         else if (cityCode == null && pli[j].Value.Length > 3 && ((j + 1) < pli.Count)) 
         {
             int sum = 0;
             foreach (Pullenti.Ner.Phone.Internal.PhoneItemToken it in pli) 
             {
                 if (it.ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Number) 
                     sum += it.Value.Length;
             }
             if (sum == 11) 
             {
                 cityCode = pli[j].Value.Substring(1);
                 j++;
             }
         }
     }
     if ((j < pli.Count) && pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.CityCode) 
     {
         if (cityCode == null) 
             cityCode = pli[j].Value;
         else 
             cityCode += pli[j].Value;
         j++;
     }
     if ((j < pli.Count) && pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Delim) 
         j++;
     if ((countryCode == "8" && cityCode == null && ((j + 3) < pli.Count)) && pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Number) 
     {
         if (pli[j].Value.Length == 3 || pli[j].Value.Length == 4) 
         {
             cityCode = pli[j].Value;
             j++;
             if ((j < pli.Count) && pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Delim) 
                 j++;
         }
     }
     int normalNumLen = 0;
     if (countryCode == "421") 
         normalNumLen = 9;
     StringBuilder num = new StringBuilder();
     StringBuilder templ = new StringBuilder();
     List<int> partLength = new List<int>();
     string delim = null;
     bool ok = false;
     string additional = null;
     bool std = false;
     if (countryCode != null && ((j + 4) < pli.Count) && j > 0) 
     {
         if (((((pli[j - 1].Value == "-" || pli[j - 1].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.CountryCode)) && pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Number && pli[j + 1].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Delim) && pli[j + 2].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Number && pli[j + 3].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Delim) && pli[j + 4].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Number) 
         {
             if ((((pli[j].Value.Length + pli[j + 2].Value.Length) == 6 || ((pli[j].Value.Length == 4 && pli[j + 2].Value.Length == 5)))) && ((pli[j + 4].Value.Length == 4 || pli[j + 4].Value.Length == 1))) 
             {
                 num.Append(pli[j].Value);
                 num.Append(pli[j + 2].Value);
                 num.Append(pli[j + 4].Value);
                 templ.AppendFormat("{0}{1}{2}{3}{4}", pli[j].Value.Length, pli[j + 1].Value, pli[j + 2].Value.Length, pli[j + 3].Value, pli[j + 4].Value.Length);
                 std = true;
                 ok = true;
                 j += 5;
             }
         }
     }
     for (; j < pli.Count; j++) 
     {
         if (std) 
             break;
         if (pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Delim) 
         {
             if (pli[j].IsInBrackets) 
                 continue;
             if (j > 0 && pli[j - 1].IsInBrackets) 
                 continue;
             if (templ.Length > 0) 
                 templ.Append(pli[j].Value);
             if (delim == null) 
                 delim = pli[j].Value;
             else if (pli[j].Value != delim) 
             {
                 if ((partLength.Count == 2 && ((partLength[0] == 3 || partLength[0] == 4)) && cityCode == null) && partLength[1] == 3) 
                 {
                     cityCode = num.ToString().Substring(0, partLength[0]);
                     num.Remove(0, partLength[0]);
                     partLength.RemoveAt(0);
                     delim = pli[j].Value;
                     continue;
                 }
                 if (isPhoneBefore && ((j + 1) < pli.Count) && pli[j + 1].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Number) 
                 {
                     if (num.Length < 6) 
                         continue;
                     if (normalNumLen > 0 && (num.Length + pli[j + 1].Value.Length) == normalNumLen) 
                         continue;
                 }
                 break;
             }
             else 
                 continue;
             ok = false;
         }
         else if (pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Number) 
         {
             if (num.Length == 0 && pli[j].BeginToken.Previous != null && pli[j].BeginToken.Previous.IsTableControlChar) 
             {
                 Pullenti.Ner.Token tt = pli[pli.Count - 1].EndToken.Next;
                 if (tt != null && tt.IsCharOf(",.")) 
                     tt = tt.Next;
                 if (tt is Pullenti.Ner.NumberToken) 
                     return null;
             }
             if ((num.Length + pli[j].Value.Length) > 13) 
             {
                 if (j > 0 && pli[j - 1].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Delim) 
                     j--;
                 ok = true;
                 break;
             }
             num.Append(pli[j].Value);
             partLength.Add(pli[j].Value.Length);
             templ.Append(pli[j].Value.Length);
             ok = true;
             if (num.Length > 10) 
             {
                 j++;
                 if ((j < pli.Count) && pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.AddNumber) 
                 {
                     additional = pli[j].Value;
                     j++;
                 }
                 break;
             }
         }
         else if (pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.AddNumber) 
         {
             additional = pli[j].Value;
             j++;
             break;
         }
         else 
             break;
     }
     if ((j == (pli.Count - 1) && pli[j].IsInBrackets && ((pli[j].Value.Length == 3 || pli[j].Value.Length == 4))) && additional == null) 
     {
         additional = pli[j].Value;
         j++;
     }
     if ((j < pli.Count) && pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Prefix && pli[j].IsInBrackets) 
     {
         isPhoneBefore = true;
         j++;
     }
     if ((countryCode == null && cityCode != null && cityCode.Length > 3) && (num.Length < 8) && cityCode[0] != '8') 
     {
         if ((cityCode.Length + num.Length) == 10) 
         {
         }
         else 
         {
             string cc = Pullenti.Ner.Phone.Internal.PhoneHelper.GetCountryPrefix(cityCode);
             if (cc != null) 
             {
                 if (cc.Length > 1 && (cityCode.Length - cc.Length) > 1) 
                 {
                     countryCode = cc;
                     cityCode = cityCode.Substring(cc.Length);
                 }
             }
         }
     }
     if (countryCode == null && cityCode != null && cityCode.StartsWith("00")) 
     {
         string cc = Pullenti.Ner.Phone.Internal.PhoneHelper.GetCountryPrefix(cityCode.Substring(2));
         if (cc != null) 
         {
             if (cityCode.Length > (cc.Length + 3)) 
             {
                 countryCode = cc;
                 cityCode = cityCode.Substring(cc.Length + 2);
             }
         }
     }
     if (num.Length == 0 && cityCode != null) 
     {
         if (cityCode.Length == 10) 
         {
             num.Append(cityCode.Substring(3));
             partLength.Add(num.Length);
             cityCode = cityCode.Substring(0, 3);
             ok = true;
         }
         else if (((cityCode.Length == 9 || cityCode.Length == 11 || cityCode.Length == 8)) && ((isPhoneBefore || countryCode != null))) 
         {
             num.Append(cityCode);
             partLength.Add(num.Length);
             cityCode = null;
             ok = true;
         }
     }
     if (num.Length < 4) 
         ok = false;
     if (num.Length < 7) 
     {
         if (cityCode != null && (cityCode.Length + num.Length) > 7) 
         {
             if (!isPhoneBefore && cityCode.Length == 3) 
             {
                 int ii;
                 for (ii = 0; ii < partLength.Count; ii++) 
                 {
                     if (partLength[ii] == 3) 
                     {
                     }
                     else if (partLength[ii] > 3) 
                         break;
                     else if ((ii < (partLength.Count - 1)) || (partLength[ii] < 2)) 
                         break;
                 }
                 if (ii >= partLength.Count) 
                 {
                     if (countryCode == "61") 
                     {
                     }
                     else 
                         ok = false;
                 }
             }
         }
         else if (((num.Length == 6 || num.Length == 5)) && ((partLength.Count >= 1 && partLength.Count <= 3)) && isPhoneBefore) 
         {
             if (pli[0].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Prefix && pli[0].Kind == PhoneKind.Home) 
                 ok = false;
         }
         else if (prevPhone != null && prevPhone.Number != null && ((prevPhone.Number.Length == num.Length || prevPhone.Number.Length == (num.Length + 3) || prevPhone.Number.Length == (num.Length + 4)))) 
         {
         }
         else if (num.Length > 4 && prevPhone != null && templ.ToString() == prevPhone.m_Template) 
             ok = true;
         else 
             ok = false;
     }
     if (delim == "." && countryCode == null && cityCode == null) 
         ok = false;
     if ((isPhoneBefore && countryCode == null && cityCode == null) && num.Length > 10) 
     {
         string cc = Pullenti.Ner.Phone.Internal.PhoneHelper.GetCountryPrefix(num.ToString());
         if (cc != null) 
         {
             if ((num.Length - cc.Length) == 9) 
             {
                 countryCode = cc;
                 num.Remove(0, cc.Length);
                 ok = true;
             }
         }
     }
     if (ok) 
     {
         if (std) 
         {
         }
         else if (prevPhone != null && prevPhone.Number != null && (((prevPhone.Number.Length == num.Length || prevPhone.Number.Length == (num.Length + 3) || prevPhone.Number.Length == (num.Length + 4)) || prevPhone.m_Template == templ.ToString()))) 
         {
         }
         else if ((partLength.Count == 3 && partLength[0] == 3 && partLength[1] == 2) && partLength[2] == 2) 
         {
         }
         else if (partLength.Count == 3 && isPhoneBefore) 
         {
         }
         else if ((partLength.Count == 4 && ((partLength[0] + partLength[1]) == 3) && partLength[2] == 2) && partLength[3] == 2) 
         {
         }
         else if ((partLength.Count == 4 && partLength[0] == 3 && partLength[1] == 3) && partLength[2] == 2 && partLength[3] == 2) 
         {
         }
         else if (partLength.Count == 5 && (partLength[1] + partLength[2]) == 4 && (partLength[3] + partLength[4]) == 4) 
         {
         }
         else if (partLength.Count > 4) 
             ok = false;
         else if (partLength.Count > 3 && cityCode != null) 
             ok = false;
         else if ((isPhoneBefore || cityCode != null || countryCode != null) || additional != null) 
             ok = true;
         else 
         {
             ok = false;
             if (((num.Length == 6 || num.Length == 7)) && (partLength.Count < 4) && j > 0) 
             {
                 PhoneReferent nextPh = this.GetNextPhone(pli[j - 1].EndToken.Next, lev + 1);
                 if (nextPh != null) 
                 {
                     int d = nextPh.Number.Length - num.Length;
                     if (d == 0 || d == 3 || d == 4) 
                         ok = true;
                 }
             }
         }
     }
     Pullenti.Ner.Token end = (j > 0 ? pli[j - 1].EndToken : null);
     if (end == null) 
         ok = false;
     if ((ok && cityCode == null && countryCode == null) && prevPhone == null && !isPhoneBefore) 
     {
         if (!end.IsWhitespaceAfter && end.Next != null) 
         {
             Pullenti.Ner.Token tt = end.Next;
             if (tt.IsCharOf(".,)") && tt.Next != null) 
                 tt = tt.Next;
             if (!tt.IsWhitespaceBefore) 
                 ok = false;
         }
     }
     if (!ok) 
         return null;
     if (templ.Length > 0 && !char.IsDigit(templ[templ.Length - 1])) 
         templ.Length--;
     if ((countryCode == null && cityCode != null && cityCode.Length > 3) && num.Length > 6) 
     {
         string cc = Pullenti.Ner.Phone.Internal.PhoneHelper.GetCountryPrefix(cityCode);
         if (cc != null && ((cc.Length + 1) < cityCode.Length)) 
         {
             countryCode = cc;
             cityCode = cityCode.Substring(cc.Length);
         }
     }
     if (pli[0].BeginToken.Previous != null) 
     {
         if (pli[0].BeginToken.Previous.IsValue("ГОСТ", null) || pli[0].BeginToken.Previous.IsValue("ТУ", null)) 
             return null;
     }
     PhoneReferent ph = new PhoneReferent();
     if (countryCode != null) 
         ph.CountryCode = countryCode;
     string number = num.ToString();
     if ((cityCode == null && num.Length > 7 && partLength.Count > 0) && (partLength[0] < 5)) 
     {
         cityCode = number.Substring(0, partLength[0]);
         number = number.Substring(partLength[0]);
     }
     if (cityCode == null && num.Length == 11 && num[0] == '8') 
     {
         cityCode = number.Substring(1, 3);
         number = number.Substring(4);
     }
     if (cityCode == null && num.Length == 10) 
     {
         cityCode = number.Substring(0, 3);
         number = number.Substring(3);
     }
     if (cityCode != null) 
         number = cityCode + number;
     else if (countryCode == null && prevPhone != null) 
     {
         bool ok1 = false;
         if (prevPhone.Number.Length >= (number.Length + 2)) 
             ok1 = true;
         else if (templ.Length > 0 && prevPhone.m_Template != null && Pullenti.Morph.LanguageHelper.EndsWith(prevPhone.m_Template, templ.ToString())) 
             ok1 = true;
         if (ok1 && prevPhone.Number.Length > number.Length) 
             number = prevPhone.Number.Substring(0, prevPhone.Number.Length - number.Length) + number;
     }
     if (ph.CountryCode == null && prevPhone != null && prevPhone.CountryCode != null) 
     {
         if (prevPhone.Number.Length == number.Length) 
             ph.CountryCode = prevPhone.CountryCode;
     }
     ok = false;
     foreach (char d in number) 
     {
         if (d != '0') 
         {
             ok = true;
             break;
         }
     }
     if (!ok) 
         return null;
     if (countryCode != null) 
     {
         if (number.Length < 7) 
             return null;
     }
     else 
     {
         string s = Pullenti.Ner.Phone.Internal.PhoneHelper.GetCountryPrefix(number);
         if (s != null) 
         {
             string num2 = number.Substring(s.Length);
             if (num2.Length >= 10 && num2.Length <= 11) 
             {
                 number = num2;
                 if (s != "7") 
                     ph.CountryCode = s;
             }
         }
         if (number.Length == 8 && prevPhone == null) 
             return null;
     }
     if (number.Length > 11) 
     {
         if ((number.Length < 14) && ((countryCode == "1" || countryCode == "43"))) 
         {
         }
         else 
             return null;
     }
     ph.Number = number;
     if (additional != null) 
         ph.AddSlot(PhoneReferent.ATTR_ADDNUMBER, additional, true, 0);
     if (!isPhoneBefore && end.Next != null && !end.IsNewlineAfter) 
     {
         if (end.Next.IsCharOf("+=") || end.Next.IsHiphen) 
             return null;
     }
     if (countryCode != null && countryCode == "7") 
     {
         if (number.Length != 10) 
             return null;
     }
     ph.m_Template = templ.ToString();
     if (j == (pli.Count - 1) && pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Prefix && !pli[j].IsNewlineBefore) 
     {
         end = pli[j].EndToken;
         if (pli[j].Kind != PhoneKind.Undefined) 
             ph.Kind = pli[j].Kind;
     }
     Pullenti.Ner.ReferentToken res = new Pullenti.Ner.ReferentToken(ph, pli[0].BeginToken, end);
     if (pli[0].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Prefix && pli[0].EndToken.Next.IsTableControlChar) 
         res.BeginToken = pli[1].BeginToken;
     return res;
 }
예제 #25
0
        public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
        {
            Pullenti.Ner.Core.AnalyzerData     ad     = kit.GetAnalyzerData(this);
            Pullenti.Ner.Core.TerminCollection models = new Pullenti.Ner.Core.TerminCollection();
            Dictionary <string, List <Pullenti.Ner.Referent> > objsByModel = new Dictionary <string, List <Pullenti.Ner.Referent> >();

            Pullenti.Ner.Core.TerminCollection objByNames = new Pullenti.Ner.Core.TerminCollection();
            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
            {
                List <Pullenti.Ner.Weapon.Internal.WeaponItemToken> its = Pullenti.Ner.Weapon.Internal.WeaponItemToken.TryParseList(t, 10);
                if (its == null)
                {
                    continue;
                }
                List <Pullenti.Ner.ReferentToken> rts = this.TryAttach(its, false);
                if (rts != null)
                {
                    foreach (Pullenti.Ner.ReferentToken rt in rts)
                    {
                        rt.Referent = ad.RegisterReferent(rt.Referent);
                        kit.EmbedToken(rt);
                        t = rt;
                        foreach (Pullenti.Ner.Slot s in rt.Referent.Slots)
                        {
                            if (s.TypeName == WeaponReferent.ATTR_MODEL)
                            {
                                string mod = s.Value.ToString();
                                for (int k = 0; k < 2; k++)
                                {
                                    if (!char.IsDigit(mod[0]))
                                    {
                                        List <Pullenti.Ner.Referent> li;
                                        if (!objsByModel.TryGetValue(mod, out li))
                                        {
                                            objsByModel.Add(mod, (li = new List <Pullenti.Ner.Referent>()));
                                        }
                                        if (!li.Contains(rt.Referent))
                                        {
                                            li.Add(rt.Referent);
                                        }
                                        models.AddString(mod, li, null, false);
                                    }
                                    if (k > 0)
                                    {
                                        break;
                                    }
                                    string brand = rt.Referent.GetStringValue(WeaponReferent.ATTR_BRAND);
                                    if (brand == null)
                                    {
                                        break;
                                    }
                                    mod = string.Format("{0} {1}", brand, mod);
                                }
                            }
                            else if (s.TypeName == WeaponReferent.ATTR_NAME)
                            {
                                objByNames.Add(new Pullenti.Ner.Core.Termin(s.Value.ToString())
                                {
                                    Tag = rt.Referent
                                });
                            }
                        }
                    }
                }
            }
            if (objsByModel.Count == 0 && objByNames.Termins.Count == 0)
            {
                return;
            }
            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
            {
                Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 10);
                if (br != null)
                {
                    Pullenti.Ner.Core.TerminToken toks = objByNames.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No);
                    if (toks != null && toks.EndToken.Next == br.EndToken)
                    {
                        Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(toks.Termin.Tag as Pullenti.Ner.Referent, br.BeginToken, br.EndToken);
                        kit.EmbedToken(rt0);
                        t = rt0;
                        continue;
                    }
                }
                if (!(t is Pullenti.Ner.TextToken))
                {
                    continue;
                }
                if (!t.Chars.IsLetter)
                {
                    continue;
                }
                Pullenti.Ner.Core.TerminToken tok = models.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
                if (tok == null)
                {
                    if (!t.Chars.IsAllLower)
                    {
                        tok = objByNames.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
                    }
                    if (tok == null)
                    {
                        continue;
                    }
                }
                if (!tok.IsWhitespaceAfter)
                {
                    if (tok.EndToken.Next == null || !tok.EndToken.Next.IsCharOf(",.)"))
                    {
                        if (!Pullenti.Ner.Core.BracketHelper.IsBracket(tok.EndToken.Next, false))
                        {
                            continue;
                        }
                    }
                }
                Pullenti.Ner.Referent        tr = null;
                List <Pullenti.Ner.Referent> li = tok.Termin.Tag as List <Pullenti.Ner.Referent>;
                if (li != null && li.Count == 1)
                {
                    tr = li[0];
                }
                else
                {
                    tr = tok.Termin.Tag as Pullenti.Ner.Referent;
                }
                if (tr != null)
                {
                    Pullenti.Ner.Weapon.Internal.WeaponItemToken tit = Pullenti.Ner.Weapon.Internal.WeaponItemToken.TryParse(tok.BeginToken.Previous, null, false, true);
                    if (tit != null && tit.Typ == Pullenti.Ner.Weapon.Internal.WeaponItemToken.Typs.Brand)
                    {
                        tr.AddSlot(WeaponReferent.ATTR_BRAND, tit.Value, false, 0);
                        tok.BeginToken = tit.BeginToken;
                    }
                    Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(tr, tok.BeginToken, tok.EndToken);
                    kit.EmbedToken(rt0);
                    t = rt0;
                    continue;
                }
            }
        }
예제 #26
0
        public static List <WeaponItemToken> TryParseList(Pullenti.Ner.Token t, int maxCount = 10)
        {
            WeaponItemToken tr = TryParse(t, null, false, false);

            if (tr == null)
            {
                return(null);
            }
            if (tr.Typ == Typs.Class || tr.Typ == Typs.Date)
            {
                return(null);
            }
            WeaponItemToken        tr0 = tr;
            List <WeaponItemToken> res = new List <WeaponItemToken>();

            if (tr.InnerTokens.Count > 0)
            {
                res.AddRange(tr.InnerTokens);
                if (res[0].BeginChar > tr.BeginChar)
                {
                    res[0].BeginToken = tr.BeginToken;
                }
            }
            res.Add(tr);
            t = tr.EndToken.Next;
            if (tr.Typ == Typs.Noun)
            {
                for (; t != null; t = t.Next)
                {
                    if (t.IsChar(':') || t.IsHiphen)
                    {
                    }
                    else
                    {
                        break;
                    }
                }
            }
            bool andConj = false;

            for (; t != null; t = t.Next)
            {
                if (maxCount > 0 && res.Count >= maxCount)
                {
                    break;
                }
                if (t.IsChar(':'))
                {
                    continue;
                }
                if (tr0.Typ == Typs.Noun)
                {
                    if (t.IsHiphen && t.Next != null)
                    {
                        t = t.Next;
                    }
                }
                tr = TryParse(t, tr0, false, false);
                if (tr == null)
                {
                    if (Pullenti.Ner.Core.BracketHelper.CanBeEndOfSequence(t, true, null, false) && t.Next != null)
                    {
                        if (tr0.Typ == Typs.Model || tr0.Typ == Typs.Brand)
                        {
                            Pullenti.Ner.Token tt1 = t.Next;
                            if (tt1 != null && tt1.IsComma)
                            {
                                tt1 = tt1.Next;
                            }
                            tr = TryParse(tt1, tr0, false, false);
                        }
                    }
                }
                if (tr == null && (t is Pullenti.Ner.ReferentToken))
                {
                    Pullenti.Ner.ReferentToken rt = t as Pullenti.Ner.ReferentToken;
                    if (rt.BeginToken == rt.EndToken && (rt.BeginToken is Pullenti.Ner.TextToken))
                    {
                        tr = TryParse(rt.BeginToken, tr0, false, false);
                        if (tr != null && tr.BeginToken == tr.EndToken)
                        {
                            tr.BeginToken = (tr.EndToken = t);
                        }
                    }
                }
                if (tr == null && t.IsChar('('))
                {
                    Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 100);
                    if (br != null)
                    {
                        Pullenti.Ner.Token tt = br.EndToken.Next;
                        if (tt != null && tt.IsComma)
                        {
                            tt = tt.Next;
                        }
                        tr = TryParse(tt, tr0, false, false);
                        if (tr != null && tr.Typ == Typs.Number)
                        {
                        }
                        else
                        {
                            tr = null;
                        }
                    }
                }
                if (tr == null && t.IsHiphen)
                {
                    if (tr0.Typ == Typs.Brand || tr0.Typ == Typs.Model)
                    {
                        tr = TryParse(t.Next, tr0, false, false);
                    }
                }
                if (tr == null && t.IsComma)
                {
                    if ((tr0.Typ == Typs.Name || tr0.Typ == Typs.Brand || tr0.Typ == Typs.Model) || tr0.Typ == Typs.Class || tr0.Typ == Typs.Date)
                    {
                        tr = TryParse(t.Next, tr0, true, false);
                        if (tr != null)
                        {
                            if (tr.Typ == Typs.Number)
                            {
                            }
                            else
                            {
                                tr = null;
                            }
                        }
                    }
                }
                if (tr == null)
                {
                    break;
                }
                if (t.IsNewlineBefore)
                {
                    if (tr.Typ != Typs.Number)
                    {
                        break;
                    }
                }
                if (tr.InnerTokens.Count > 0)
                {
                    res.AddRange(tr.InnerTokens);
                }
                res.Add(tr);
                tr0 = tr;
                t   = tr.EndToken;
                if (andConj)
                {
                    break;
                }
            }
            for (int i = 0; i < (res.Count - 1); i++)
            {
                if (res[i].Typ == Typs.Model && res[i + 1].Typ == Typs.Model)
                {
                    res[i].EndToken = res[i + 1].EndToken;
                    res[i].Value    = string.Format("{0}{1}{2}", res[i].Value, (res[i].EndToken.Next != null && res[i].EndToken.Next.IsHiphen ? '-' : ' '), res[i + 1].Value);
                    res.RemoveAt(i + 1);
                    i--;
                }
            }
            return(res);
        }
예제 #27
0
 public static BookLinkToken TryParseAuthor(Pullenti.Ner.Token t, Pullenti.Ner.Person.Internal.FioTemplateType prevPersTemplate = Pullenti.Ner.Person.Internal.FioTemplateType.Undefined)
 {
     if (t == null)
     {
         return(null);
     }
     Pullenti.Ner.ReferentToken rtp = Pullenti.Ner.Person.Internal.PersonItemToken.TryParsePerson(t, prevPersTemplate);
     if (rtp != null)
     {
         BookLinkToken re;
         if (rtp.Data == null)
         {
             re = new BookLinkToken(t, (rtp == t ? t : rtp.EndToken))
             {
                 Typ = BookLinkTyp.Person, Ref = rtp.Referent
             }
         }
         ;
         else
         {
             re = new BookLinkToken(t, rtp.EndToken)
             {
                 Typ = BookLinkTyp.Person, Tok = rtp
             }
         };
         re.PersonTemplate = (Pullenti.Ner.Person.Internal.FioTemplateType)rtp.MiscAttrs;
         for (Pullenti.Ner.Token tt = rtp.BeginToken; tt != null && tt.EndChar <= rtp.EndChar; tt = tt.Next)
         {
             if (!(tt.GetReferent() is Pullenti.Ner.Person.PersonPropertyReferent))
             {
                 continue;
             }
             Pullenti.Ner.ReferentToken rt = tt as Pullenti.Ner.ReferentToken;
             if (rt.BeginToken.Chars.IsCapitalUpper && tt != rtp.BeginToken)
             {
                 re.StartOfName = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(rt, Pullenti.Ner.Core.GetTextAttr.KeepRegister);
                 break;
             }
             return(null);
         }
         return(re);
     }
     if (t.IsChar('['))
     {
         BookLinkToken re = TryParseAuthor(t.Next, Pullenti.Ner.Person.Internal.FioTemplateType.Undefined);
         if (re != null && re.EndToken.Next != null && re.EndToken.Next.IsChar(']'))
         {
             re.BeginToken = t;
             re.EndToken   = re.EndToken.Next;
             return(re);
         }
     }
     if (((t.IsValue("И", null) || t.IsValue("ET", null))) && t.Next != null)
     {
         if (t.Next.IsValue("ДРУГИЕ", null) || t.Next.IsValue("ДР", null) || t.Next.IsValue("AL", null))
         {
             BookLinkToken res = new BookLinkToken(t, t.Next)
             {
                 Typ = BookLinkTyp.AndOthers
             };
             if (t.Next.Next != null && t.Next.Next.IsChar('.'))
             {
                 res.EndToken = res.EndToken.Next;
             }
             return(res);
         }
     }
     return(null);
 }
예제 #28
0
        public Pullenti.Ner.ReferentToken TryAttach(Pullenti.Ner.Token t, bool forOntology = false)
        {
            if (t == null)
            {
                return(null);
            }
            Pullenti.Ner.ReferentToken rt0 = this.TryAttachSpec(t);
            if (rt0 != null)
            {
                return(rt0);
            }
            if (t.Chars.IsAllLower)
            {
                if (!t.IsWhitespaceAfter && (t.Next is Pullenti.Ner.NumberToken))
                {
                    if (t.Previous == null || t.IsWhitespaceBefore || t.Previous.IsCharOf(",:"))
                    {
                    }
                    else
                    {
                        return(null);
                    }
                }
                else
                {
                    return(null);
                }
            }
            StringBuilder tmp = new StringBuilder();

            Pullenti.Ner.Token t1 = t;
            bool hiph             = false;
            bool ok    = true;
            int  nums  = 0;
            int  chars = 0;

            for (Pullenti.Ner.Token w = t1.Next; w != null; w = w.Next)
            {
                if (w.IsWhitespaceBefore && !forOntology)
                {
                    break;
                }
                if (w.IsCharOf("/\\_") || w.IsHiphen)
                {
                    hiph = true;
                    tmp.Append('-');
                    continue;
                }
                hiph = false;
                Pullenti.Ner.NumberToken nt = w as Pullenti.Ner.NumberToken;
                if (nt != null)
                {
                    if (nt.Typ != Pullenti.Ner.NumberSpellingType.Digit)
                    {
                        break;
                    }
                    t1 = nt;
                    tmp.Append(nt.GetSourceText());
                    nums++;
                    continue;
                }
                Pullenti.Ner.TextToken tt = w as Pullenti.Ner.TextToken;
                if (tt == null)
                {
                    break;
                }
                if (tt.LengthChar > 3)
                {
                    ok = false;
                    break;
                }
                if (!char.IsLetter(tt.Term[0]))
                {
                    if (tt.IsCharOf(",:") || Pullenti.Ner.Core.BracketHelper.CanBeEndOfSequence(tt, false, null, false))
                    {
                        break;
                    }
                    if (!tt.IsCharOf("+*&^#@!"))
                    {
                        ok = false;
                        break;
                    }
                    chars++;
                }
                t1 = tt;
                tmp.Append(tt.GetSourceText());
            }
            if (!forOntology)
            {
                if ((tmp.Length < 1) || !ok || hiph)
                {
                    return(null);
                }
                if (tmp.Length > 12)
                {
                    return(null);
                }
                char last = tmp[tmp.Length - 1];
                if (last == '!')
                {
                    return(null);
                }
                if ((nums + chars) == 0)
                {
                    return(null);
                }
                if (!this.CheckAttach(t, t1))
                {
                    return(null);
                }
            }
            DenominationReferent newDr = new DenominationReferent();

            newDr.AddValue(t, t1);
            return(new Pullenti.Ner.ReferentToken(newDr, t, t1));
        }
예제 #29
0
 // Основная функция выделения объектов
 public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
 {
     Pullenti.Ner.Core.AnalyzerDataWithOntology ad = kit.GetAnalyzerData(this) as Pullenti.Ner.Core.AnalyzerDataWithOntology;
     for (int k = 0; k < 2; k++)
     {
         bool     detectNewDenoms = false;
         DateTime dt = DateTime.Now;
         for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
         {
             if (t.IsWhitespaceBefore)
             {
             }
             else if (t.Previous != null && ((t.Previous.IsCharOf(",") || Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(t.Previous, false, false))))
             {
             }
             else
             {
                 continue;
             }
             Pullenti.Ner.ReferentToken rt0 = this.TryAttachSpec(t);
             if (rt0 != null)
             {
                 rt0.Referent = ad.RegisterReferent(rt0.Referent);
                 kit.EmbedToken(rt0);
                 t = rt0;
                 continue;
             }
             if (!t.Chars.IsLetter)
             {
                 continue;
             }
             if (!this.CanBeStartOfDenom(t))
             {
                 continue;
             }
             if (((DateTime.Now - dt)).TotalMinutes > 1)
             {
                 break;
             }
             List <Pullenti.Ner.Core.IntOntologyToken> ot = null;
             ot = ad.LocalOntology.TryAttach(t, null, false);
             if (ot != null && (ot[0].Item.Referent is DenominationReferent))
             {
                 if (this.CheckAttach(ot[0].BeginToken, ot[0].EndToken))
                 {
                     DenominationReferent cl = ot[0].Item.Referent.Clone() as DenominationReferent;
                     cl.Occurrence.Clear();
                     Pullenti.Ner.ReferentToken rt = new Pullenti.Ner.ReferentToken(cl, ot[0].BeginToken, ot[0].EndToken);
                     kit.EmbedToken(rt);
                     t = rt;
                     continue;
                 }
             }
             if (k > 0)
             {
                 continue;
             }
             if (t != null && t.Kit.Ontology != null)
             {
                 if ((((ot = t.Kit.Ontology.AttachToken(DenominationReferent.OBJ_TYPENAME, t)))) != null)
                 {
                     if (this.CheckAttach(ot[0].BeginToken, ot[0].EndToken))
                     {
                         DenominationReferent dr = new DenominationReferent();
                         dr.MergeSlots(ot[0].Item.Referent, true);
                         Pullenti.Ner.ReferentToken rt = new Pullenti.Ner.ReferentToken(ad.RegisterReferent(dr), ot[0].BeginToken, ot[0].EndToken);
                         kit.EmbedToken(rt);
                         t = rt;
                         continue;
                     }
                 }
             }
             rt0 = this.TryAttach(t, false);
             if (rt0 != null)
             {
                 rt0.Referent = ad.RegisterReferent(rt0.Referent);
                 kit.EmbedToken(rt0);
                 detectNewDenoms = true;
                 t = rt0;
                 if (ad.LocalOntology.Items.Count > 1000)
                 {
                     break;
                 }
             }
         }
         if (!detectNewDenoms)
         {
             break;
         }
     }
 }
예제 #30
0
        public static Pullenti.Ner.ReferentToken TryAttachOrg(Pullenti.Ner.Token t, bool canBeCyr = false)
        {
            if (t == null)
            {
                return(null);
            }
            bool br = false;

            if (t.IsChar('(') && t.Next != null)
            {
                t  = t.Next;
                br = true;
            }
            if (t is Pullenti.Ner.NumberToken)
            {
                if ((t as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Words && t.Morph.Class.IsAdjective && t.Chars.IsCapitalUpper)
                {
                }
                else
                {
                    return(null);
                }
            }
            else
            {
                if (t.Chars.IsAllLower)
                {
                    return(null);
                }
                if ((t.LengthChar < 3) && !t.Chars.IsLetter)
                {
                    return(null);
                }
                if (!t.Chars.IsLatinLetter)
                {
                    if (!canBeCyr || !t.Chars.IsCyrillicLetter)
                    {
                        return(null);
                    }
                }
            }
            Pullenti.Ner.Token t0 = t;
            Pullenti.Ner.Token t1 = t0;
            int            namWo  = 0;
            OrgItemEngItem tok    = null;

            Pullenti.Ner.Geo.GeoReferent geo    = null;
            OrgItemTypeToken             addTyp = null;

            for (; t != null; t = t.Next)
            {
                if (t != t0 && t.WhitespacesBeforeCount > 1)
                {
                    break;
                }
                if (t.IsChar(')'))
                {
                    break;
                }
                if (t.IsChar('(') && t.Next != null)
                {
                    if ((t.Next.GetReferent() is Pullenti.Ner.Geo.GeoReferent) && t.Next.Next != null && t.Next.Next.IsChar(')'))
                    {
                        geo = t.Next.GetReferent() as Pullenti.Ner.Geo.GeoReferent;
                        t   = t.Next.Next;
                        continue;
                    }
                    OrgItemTypeToken typ = OrgItemTypeToken.TryAttach(t.Next, true, null);
                    if ((typ != null && typ.EndToken.Next != null && typ.EndToken.Next.IsChar(')')) && typ.Chars.IsLatinLetter)
                    {
                        addTyp = typ;
                        t      = typ.EndToken.Next;
                        continue;
                    }
                    if (((t.Next is Pullenti.Ner.TextToken) && t.Next.Next != null && t.Next.Next.IsChar(')')) && t.Next.Chars.IsCapitalUpper)
                    {
                        t1 = (t = t.Next.Next);
                        continue;
                    }
                    break;
                }
                tok = TryAttach(t, canBeCyr);
                if (tok == null && t.IsCharOf(".,") && t.Next != null)
                {
                    tok = TryAttach(t.Next, canBeCyr);
                    if (tok == null && t.Next.IsCharOf(",."))
                    {
                        tok = TryAttach(t.Next.Next, canBeCyr);
                    }
                }
                if (tok != null)
                {
                    if (tok.LengthChar == 1 && t0.Chars.IsCyrillicLetter)
                    {
                        return(null);
                    }
                    break;
                }
                if (t.IsHiphen && !t.IsWhitespaceAfter && !t.IsWhitespaceBefore)
                {
                    continue;
                }
                if (t.IsCharOf("&+") || t.IsAnd)
                {
                    continue;
                }
                if (t.IsChar('.'))
                {
                    if (t.Previous != null && t.Previous.LengthChar == 1)
                    {
                        continue;
                    }
                    else if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t.Next))
                    {
                        break;
                    }
                }
                if (!t.Chars.IsLatinLetter)
                {
                    if (!canBeCyr || !t.Chars.IsCyrillicLetter)
                    {
                        break;
                    }
                }
                if (t.Chars.IsAllLower)
                {
                    if (t.Morph.Class.IsPreposition || t.Morph.Class.IsConjunction)
                    {
                        continue;
                    }
                    if (br)
                    {
                        continue;
                    }
                    break;
                }
                Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
                if (mc.IsVerb)
                {
                    if (t.Next != null && t.Next.Morph.Class.IsPreposition)
                    {
                        break;
                    }
                }
                if (t.Next != null && t.Next.IsValue("OF", null))
                {
                    break;
                }
                if (t is Pullenti.Ner.TextToken)
                {
                    namWo++;
                }
                t1 = t;
            }
            if (tok == null)
            {
                return(null);
            }
            if (t0 == tok.BeginToken)
            {
                Pullenti.Ner.Core.BracketSequenceToken br2 = Pullenti.Ner.Core.BracketHelper.TryParse(tok.EndToken.Next, Pullenti.Ner.Core.BracketParseAttr.No, 100);
                if (br2 != null)
                {
                    Pullenti.Ner.Org.OrganizationReferent org1 = new Pullenti.Ner.Org.OrganizationReferent();
                    if (tok.ShortValue != null)
                    {
                        org1.AddTypeStr(tok.ShortValue);
                    }
                    org1.AddTypeStr(tok.FullValue);
                    string nam1 = Pullenti.Ner.Core.MiscHelper.GetTextValue(br2.BeginToken, br2.EndToken, Pullenti.Ner.Core.GetTextAttr.No);
                    if (nam1 != null)
                    {
                        org1.AddName(nam1, true, null);
                        return(new Pullenti.Ner.ReferentToken(org1, t0, br2.EndToken));
                    }
                }
                return(null);
            }
            Pullenti.Ner.Org.OrganizationReferent org = new Pullenti.Ner.Org.OrganizationReferent();
            Pullenti.Ner.Token te = tok.EndToken;
            if (tok.IsBank)
            {
                t1 = tok.EndToken;
            }
            if (tok.FullValue == "company" && (tok.WhitespacesAfterCount < 3))
            {
                OrgItemEngItem tok1 = TryAttach(tok.EndToken.Next, canBeCyr);
                if (tok1 != null)
                {
                    t1  = tok.EndToken;
                    tok = tok1;
                    te  = tok.EndToken;
                }
            }
            if (tok.FullValue == "company")
            {
                if (namWo == 0)
                {
                    return(null);
                }
            }
            string nam = Pullenti.Ner.Core.MiscHelper.GetTextValue(t0, t1, Pullenti.Ner.Core.GetTextAttr.IgnoreArticles);

            if (nam == "STOCK" && tok.FullValue == "company")
            {
                return(null);
            }
            string altNam = null;

            if (string.IsNullOrEmpty(nam))
            {
                return(null);
            }
            if (nam.IndexOf('(') > 0)
            {
                int i1 = nam.IndexOf('(');
                int i2 = nam.IndexOf(')');
                if (i1 < i2)
                {
                    altNam = nam;
                    string tai = null;
                    if ((i2 + 1) < nam.Length)
                    {
                        tai = nam.Substring(i2).Trim();
                    }
                    nam = nam.Substring(0, i1).Trim();
                    if (tai != null)
                    {
                        nam = string.Format("{0} {1}", nam, tai);
                    }
                }
            }
            if (tok.IsBank)
            {
                org.AddTypeStr((tok.Kit.BaseLanguage.IsEn ? "bank" : "банк"));
                org.AddProfile(Pullenti.Ner.Org.OrgProfile.Finance);
                if ((t1.Next != null && t1.Next.IsValue("OF", null) && t1.Next.Next != null) && t1.Next.Next.Chars.IsLatinLetter)
                {
                    OrgItemNameToken nam0 = OrgItemNameToken.TryAttach(t1.Next, null, false, false);
                    if (nam0 != null)
                    {
                        te = nam0.EndToken;
                    }
                    else
                    {
                        te = t1.Next.Next;
                    }
                    nam = Pullenti.Ner.Core.MiscHelper.GetTextValue(t0, te, Pullenti.Ner.Core.GetTextAttr.No);
                    if (te.GetReferent() is Pullenti.Ner.Geo.GeoReferent)
                    {
                        org.AddGeoObject(te.GetReferent() as Pullenti.Ner.Geo.GeoReferent);
                    }
                }
                else if (t0 == t1)
                {
                    return(null);
                }
            }
            else
            {
                if (tok.ShortValue != null)
                {
                    org.AddTypeStr(tok.ShortValue);
                }
                org.AddTypeStr(tok.FullValue);
            }
            if (string.IsNullOrEmpty(nam))
            {
                return(null);
            }
            org.AddName(nam, true, null);
            if (altNam != null)
            {
                org.AddName(altNam, true, null);
            }
            Pullenti.Ner.ReferentToken res = new Pullenti.Ner.ReferentToken(org, t0, te);
            t = te;
            while (t.Next != null)
            {
                if (t.Next.IsCharOf(",."))
                {
                    t = t.Next;
                }
                else
                {
                    break;
                }
            }
            if (t.WhitespacesAfterCount < 2)
            {
                tok = TryAttach(t.Next, canBeCyr);
                if (tok != null)
                {
                    if (tok.ShortValue != null)
                    {
                        org.AddTypeStr(tok.ShortValue);
                    }
                    org.AddTypeStr(tok.FullValue);
                    res.EndToken = tok.EndToken;
                }
            }
            if (geo != null)
            {
                org.AddGeoObject(geo);
            }
            if (addTyp != null)
            {
                org.AddType(addTyp, false);
            }
            if (!br)
            {
                return(res);
            }
            t = res.EndToken;
            if (t.Next == null || t.Next.IsChar(')'))
            {
                res.EndToken = t.Next;
            }
            else
            {
                return(null);
            }
            return(res);
        }