예제 #1
0
        public static Pullenti.Ner.ReferentToken TryAttachOrg(Pullenti.Ner.Token t, bool canBeCyr = false)
        {
            if (t == null)
            {
                return(null);
            }
            bool br = false;

            if (t.IsChar('(') && t.Next != null)
            {
                t  = t.Next;
                br = true;
            }
            if (t is Pullenti.Ner.NumberToken)
            {
                if ((t as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Words && t.Morph.Class.IsAdjective && t.Chars.IsCapitalUpper)
                {
                }
                else
                {
                    return(null);
                }
            }
            else
            {
                if (t.Chars.IsAllLower)
                {
                    return(null);
                }
                if ((t.LengthChar < 3) && !t.Chars.IsLetter)
                {
                    return(null);
                }
                if (!t.Chars.IsLatinLetter)
                {
                    if (!canBeCyr || !t.Chars.IsCyrillicLetter)
                    {
                        return(null);
                    }
                }
            }
            Pullenti.Ner.Token t0 = t;
            Pullenti.Ner.Token t1 = t0;
            int            namWo  = 0;
            OrgItemEngItem tok    = null;

            Pullenti.Ner.Geo.GeoReferent geo    = null;
            OrgItemTypeToken             addTyp = null;

            for (; t != null; t = t.Next)
            {
                if (t != t0 && t.WhitespacesBeforeCount > 1)
                {
                    break;
                }
                if (t.IsChar(')'))
                {
                    break;
                }
                if (t.IsChar('(') && t.Next != null)
                {
                    if ((t.Next.GetReferent() is Pullenti.Ner.Geo.GeoReferent) && t.Next.Next != null && t.Next.Next.IsChar(')'))
                    {
                        geo = t.Next.GetReferent() as Pullenti.Ner.Geo.GeoReferent;
                        t   = t.Next.Next;
                        continue;
                    }
                    OrgItemTypeToken typ = OrgItemTypeToken.TryAttach(t.Next, true, null);
                    if ((typ != null && typ.EndToken.Next != null && typ.EndToken.Next.IsChar(')')) && typ.Chars.IsLatinLetter)
                    {
                        addTyp = typ;
                        t      = typ.EndToken.Next;
                        continue;
                    }
                    if (((t.Next is Pullenti.Ner.TextToken) && t.Next.Next != null && t.Next.Next.IsChar(')')) && t.Next.Chars.IsCapitalUpper)
                    {
                        t1 = (t = t.Next.Next);
                        continue;
                    }
                    break;
                }
                tok = TryAttach(t, canBeCyr);
                if (tok == null && t.IsCharOf(".,") && t.Next != null)
                {
                    tok = TryAttach(t.Next, canBeCyr);
                    if (tok == null && t.Next.IsCharOf(",."))
                    {
                        tok = TryAttach(t.Next.Next, canBeCyr);
                    }
                }
                if (tok != null)
                {
                    if (tok.LengthChar == 1 && t0.Chars.IsCyrillicLetter)
                    {
                        return(null);
                    }
                    break;
                }
                if (t.IsHiphen && !t.IsWhitespaceAfter && !t.IsWhitespaceBefore)
                {
                    continue;
                }
                if (t.IsCharOf("&+") || t.IsAnd)
                {
                    continue;
                }
                if (t.IsChar('.'))
                {
                    if (t.Previous != null && t.Previous.LengthChar == 1)
                    {
                        continue;
                    }
                    else if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t.Next))
                    {
                        break;
                    }
                }
                if (!t.Chars.IsLatinLetter)
                {
                    if (!canBeCyr || !t.Chars.IsCyrillicLetter)
                    {
                        break;
                    }
                }
                if (t.Chars.IsAllLower)
                {
                    if (t.Morph.Class.IsPreposition || t.Morph.Class.IsConjunction)
                    {
                        continue;
                    }
                    if (br)
                    {
                        continue;
                    }
                    break;
                }
                Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
                if (mc.IsVerb)
                {
                    if (t.Next != null && t.Next.Morph.Class.IsPreposition)
                    {
                        break;
                    }
                }
                if (t.Next != null && t.Next.IsValue("OF", null))
                {
                    break;
                }
                if (t is Pullenti.Ner.TextToken)
                {
                    namWo++;
                }
                t1 = t;
            }
            if (tok == null)
            {
                return(null);
            }
            if (t0 == tok.BeginToken)
            {
                Pullenti.Ner.Core.BracketSequenceToken br2 = Pullenti.Ner.Core.BracketHelper.TryParse(tok.EndToken.Next, Pullenti.Ner.Core.BracketParseAttr.No, 100);
                if (br2 != null)
                {
                    Pullenti.Ner.Org.OrganizationReferent org1 = new Pullenti.Ner.Org.OrganizationReferent();
                    if (tok.ShortValue != null)
                    {
                        org1.AddTypeStr(tok.ShortValue);
                    }
                    org1.AddTypeStr(tok.FullValue);
                    string nam1 = Pullenti.Ner.Core.MiscHelper.GetTextValue(br2.BeginToken, br2.EndToken, Pullenti.Ner.Core.GetTextAttr.No);
                    if (nam1 != null)
                    {
                        org1.AddName(nam1, true, null);
                        return(new Pullenti.Ner.ReferentToken(org1, t0, br2.EndToken));
                    }
                }
                return(null);
            }
            Pullenti.Ner.Org.OrganizationReferent org = new Pullenti.Ner.Org.OrganizationReferent();
            Pullenti.Ner.Token te = tok.EndToken;
            if (tok.IsBank)
            {
                t1 = tok.EndToken;
            }
            if (tok.FullValue == "company" && (tok.WhitespacesAfterCount < 3))
            {
                OrgItemEngItem tok1 = TryAttach(tok.EndToken.Next, canBeCyr);
                if (tok1 != null)
                {
                    t1  = tok.EndToken;
                    tok = tok1;
                    te  = tok.EndToken;
                }
            }
            if (tok.FullValue == "company")
            {
                if (namWo == 0)
                {
                    return(null);
                }
            }
            string nam = Pullenti.Ner.Core.MiscHelper.GetTextValue(t0, t1, Pullenti.Ner.Core.GetTextAttr.IgnoreArticles);

            if (nam == "STOCK" && tok.FullValue == "company")
            {
                return(null);
            }
            string altNam = null;

            if (string.IsNullOrEmpty(nam))
            {
                return(null);
            }
            if (nam.IndexOf('(') > 0)
            {
                int i1 = nam.IndexOf('(');
                int i2 = nam.IndexOf(')');
                if (i1 < i2)
                {
                    altNam = nam;
                    string tai = null;
                    if ((i2 + 1) < nam.Length)
                    {
                        tai = nam.Substring(i2).Trim();
                    }
                    nam = nam.Substring(0, i1).Trim();
                    if (tai != null)
                    {
                        nam = string.Format("{0} {1}", nam, tai);
                    }
                }
            }
            if (tok.IsBank)
            {
                org.AddTypeStr((tok.Kit.BaseLanguage.IsEn ? "bank" : "банк"));
                org.AddProfile(Pullenti.Ner.Org.OrgProfile.Finance);
                if ((t1.Next != null && t1.Next.IsValue("OF", null) && t1.Next.Next != null) && t1.Next.Next.Chars.IsLatinLetter)
                {
                    OrgItemNameToken nam0 = OrgItemNameToken.TryAttach(t1.Next, null, false, false);
                    if (nam0 != null)
                    {
                        te = nam0.EndToken;
                    }
                    else
                    {
                        te = t1.Next.Next;
                    }
                    nam = Pullenti.Ner.Core.MiscHelper.GetTextValue(t0, te, Pullenti.Ner.Core.GetTextAttr.No);
                    if (te.GetReferent() is Pullenti.Ner.Geo.GeoReferent)
                    {
                        org.AddGeoObject(te.GetReferent() as Pullenti.Ner.Geo.GeoReferent);
                    }
                }
                else if (t0 == t1)
                {
                    return(null);
                }
            }
            else
            {
                if (tok.ShortValue != null)
                {
                    org.AddTypeStr(tok.ShortValue);
                }
                org.AddTypeStr(tok.FullValue);
            }
            if (string.IsNullOrEmpty(nam))
            {
                return(null);
            }
            org.AddName(nam, true, null);
            if (altNam != null)
            {
                org.AddName(altNam, true, null);
            }
            Pullenti.Ner.ReferentToken res = new Pullenti.Ner.ReferentToken(org, t0, te);
            t = te;
            while (t.Next != null)
            {
                if (t.Next.IsCharOf(",."))
                {
                    t = t.Next;
                }
                else
                {
                    break;
                }
            }
            if (t.WhitespacesAfterCount < 2)
            {
                tok = TryAttach(t.Next, canBeCyr);
                if (tok != null)
                {
                    if (tok.ShortValue != null)
                    {
                        org.AddTypeStr(tok.ShortValue);
                    }
                    org.AddTypeStr(tok.FullValue);
                    res.EndToken = tok.EndToken;
                }
            }
            if (geo != null)
            {
                org.AddGeoObject(geo);
            }
            if (addTyp != null)
            {
                org.AddType(addTyp, false);
            }
            if (!br)
            {
                return(res);
            }
            t = res.EndToken;
            if (t.Next == null || t.Next.IsChar(')'))
            {
                res.EndToken = t.Next;
            }
            else
            {
                return(null);
            }
            return(res);
        }
예제 #2
0
        internal static bool CanBeHigherToken(Pullenti.Ner.Token rhi, Pullenti.Ner.Token rlo)
        {
            if (rhi == null || rlo == null)
            {
                return(false);
            }
            if (rhi.Morph.Case.IsInstrumental && !rhi.Morph.Case.IsGenitive)
            {
                return(false);
            }
            Pullenti.Ner.Geo.GeoReferent hi = rhi.GetReferent() as Pullenti.Ner.Geo.GeoReferent;
            Pullenti.Ner.Geo.GeoReferent lo = rlo.GetReferent() as Pullenti.Ner.Geo.GeoReferent;
            if (hi == null || lo == null)
            {
                return(false);
            }
            bool citiInReg = false;

            if (hi.IsCity && lo.IsRegion)
            {
                if (hi.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, "город", true) != null || hi.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, "місто", true) != null || hi.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, "city", true) != null)
                {
                    string s = _getTypesString(lo);
                    if (((s.Contains("район") || s.Contains("административный округ") || s.Contains("муниципальный округ")) || s.Contains("адміністративний округ") || s.Contains("муніципальний округ")) || lo.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, "округ", true) != null)
                    {
                        if (rhi.Next == rlo && rlo.Morph.Case.IsGenitive)
                        {
                            citiInReg = true;
                        }
                    }
                }
            }
            if (hi.IsRegion && lo.IsCity)
            {
                if (lo.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, "город", true) != null || lo.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, "місто", true) != null || lo.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, "city", true) != null)
                {
                    string s = _getTypesString(hi);
                    if (s == "район;")
                    {
                        if (hi.Higher != null && hi.Higher.IsRegion)
                        {
                            citiInReg = true;
                        }
                        else if (rhi.EndChar <= rlo.BeginChar && rhi.Next.IsComma && !rlo.Morph.Case.IsGenitive)
                        {
                            citiInReg = true;
                        }
                        else if (rhi.EndChar <= rlo.BeginChar && rhi.Next.IsComma)
                        {
                            citiInReg = true;
                        }
                    }
                }
                else
                {
                    citiInReg = true;
                }
            }
            if (rhi.EndChar <= rlo.BeginChar)
            {
                if (!rhi.Morph.Class.IsAdjective)
                {
                    if (hi.IsState && !rhi.Chars.IsLatinLetter)
                    {
                        return(false);
                    }
                }
                if (rhi.IsNewlineAfter || rlo.IsNewlineBefore)
                {
                    if (!citiInReg)
                    {
                        return(false);
                    }
                }
            }
            else
            {
            }
            if (rlo.Previous != null && rlo.Previous.Morph.Class.IsPreposition)
            {
                if (rlo.Previous.Morph.Language.IsUa)
                {
                    if ((rlo.Previous.IsValue("У", null) && !rlo.Morph.Case.IsDative && !rlo.Morph.Case.IsPrepositional) && !rlo.Morph.Case.IsUndefined)
                    {
                        return(false);
                    }
                    if (rlo.Previous.IsValue("З", null) && !rlo.Morph.Case.IsGenitive && !rlo.Morph.Case.IsUndefined)
                    {
                        return(false);
                    }
                }
                else
                {
                    if ((rlo.Previous.IsValue("В", null) && !rlo.Morph.Case.IsDative && !rlo.Morph.Case.IsPrepositional) && !rlo.Morph.Case.IsUndefined)
                    {
                        return(false);
                    }
                    if (rlo.Previous.IsValue("ИЗ", null) && !rlo.Morph.Case.IsGenitive && !rlo.Morph.Case.IsUndefined)
                    {
                        return(false);
                    }
                }
            }
            if (!CanBeHigher(hi, lo))
            {
                return(citiInReg);
            }
            return(true);
        }
예제 #3
0
 static PersonIdToken TryParse(Pullenti.Ner.Token t, PersonIdToken prev)
 {
     if (t.IsValue("СВИДЕТЕЛЬСТВО", null))
     {
         Pullenti.Ner.Token tt1 = t;
         bool ip  = false;
         bool reg = false;
         for (Pullenti.Ner.Token tt = t.Next; tt != null; tt = tt.Next)
         {
             if (tt.IsCommaAnd || tt.Morph.Class.IsPreposition)
             {
                 continue;
             }
             if (tt.IsValue("РЕГИСТРАЦИЯ", null) || tt.IsValue("РЕЕСТР", null) || tt.IsValue("ЗАРЕГИСТРИРОВАТЬ", null))
             {
                 reg = true;
                 tt1 = tt;
             }
             else if (tt.IsValue("ИНДИВИДУАЛЬНЫЙ", null) || tt.IsValue("ИП", null))
             {
                 ip  = true;
                 tt1 = tt;
             }
             else if ((tt.IsValue("ВНЕСЕНИЕ", null) || tt.IsValue("ГОСУДАРСТВЕННЫЙ", null) || tt.IsValue("ЕДИНЫЙ", null)) || tt.IsValue("ЗАПИСЬ", null) || tt.IsValue("ПРЕДПРИНИМАТЕЛЬ", null))
             {
                 tt1 = tt;
             }
             else if (tt.GetReferent() != null && tt.GetReferent().TypeName == "DATERANGE")
             {
                 tt1 = tt;
             }
             else
             {
                 break;
             }
         }
         if (reg && ip)
         {
             return new PersonIdToken(t, tt1)
                    {
                        Typ = Typs.Keyword, Value = "СВИДЕТЕЛЬСТВО О ГОСУДАРСТВЕННОЙ РЕГИСТРАЦИИ ФИЗИЧЕСКОГО ЛИЦА В КАЧЕСТВЕ ИНДИВИДУАЛЬНОГО ПРЕДПРИНИМАТЕЛЯ"
                    }
         }
         ;
     }
     Pullenti.Ner.Core.TerminToken tok = m_Ontology.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
     if (tok != null)
     {
         Typs          ty  = (Typs)tok.Termin.Tag;
         PersonIdToken res = new PersonIdToken(tok.BeginToken, tok.EndToken)
         {
             Typ = ty, Value = tok.Termin.CanonicText
         };
         if (prev == null)
         {
             if (ty != Typs.Keyword)
             {
                 return(null);
             }
             for (t = tok.EndToken.Next; t != null; t = t.Next)
             {
                 Pullenti.Ner.Referent r = t.GetReferent();
                 if (r != null && (r is Pullenti.Ner.Geo.GeoReferent))
                 {
                     res.Referent = r;
                     res.EndToken = t;
                     continue;
                 }
                 if (t.IsValue("ГРАЖДАНИН", null) && t.Next != null && (t.Next.GetReferent() is Pullenti.Ner.Geo.GeoReferent))
                 {
                     res.Referent = t.Next.GetReferent();
                     t            = (res.EndToken = t.Next);
                     continue;
                 }
                 if (r != null)
                 {
                     break;
                 }
                 PersonAttrToken ait = PersonAttrToken.TryAttach(t, null, PersonAttrToken.PersonAttrAttachAttrs.No);
                 if (ait != null)
                 {
                     if (ait.Referent != null)
                     {
                         foreach (Pullenti.Ner.Slot s in ait.Referent.Slots)
                         {
                             if (s.TypeName == Pullenti.Ner.Person.PersonPropertyReferent.ATTR_REF && (s.Value is Pullenti.Ner.Geo.GeoReferent))
                             {
                                 res.Referent = s.Value as Pullenti.Ner.Referent;
                             }
                         }
                     }
                     res.EndToken = ait.EndToken;
                     break;
                 }
                 if (t.IsValue("ДАННЫЙ", null))
                 {
                     res.EndToken = t;
                     continue;
                 }
                 break;
             }
             if ((res.Referent is Pullenti.Ner.Geo.GeoReferent) && !(res.Referent as Pullenti.Ner.Geo.GeoReferent).IsState)
             {
                 res.Referent = null;
             }
             return(res);
         }
         if (ty == Typs.Number)
         {
             StringBuilder      tmp = new StringBuilder();
             Pullenti.Ner.Token tt  = tok.EndToken.Next;
             if (tt != null && tt.IsChar(':'))
             {
                 tt = tt.Next;
             }
             for (; tt != null; tt = tt.Next)
             {
                 if (tt.IsNewlineBefore)
                 {
                     break;
                 }
                 if (!(tt is Pullenti.Ner.NumberToken))
                 {
                     break;
                 }
                 tmp.Append(tt.GetSourceText());
                 res.EndToken = tt;
             }
             if (tmp.Length < 1)
             {
                 return(null);
             }
             res.Value     = tmp.ToString();
             res.HasPrefix = true;
             return(res);
         }
         if (ty == Typs.Seria)
         {
             StringBuilder      tmp = new StringBuilder();
             Pullenti.Ner.Token tt  = tok.EndToken.Next;
             if (tt != null && tt.IsChar(':'))
             {
                 tt = tt.Next;
             }
             bool nextNum = false;
             for (; tt != null; tt = tt.Next)
             {
                 if (tt.IsNewlineBefore)
                 {
                     break;
                 }
                 if (Pullenti.Ner.Core.MiscHelper.CheckNumberPrefix(tt) != null)
                 {
                     nextNum = true;
                     break;
                 }
                 if (!(tt is Pullenti.Ner.NumberToken))
                 {
                     if (!(tt is Pullenti.Ner.TextToken))
                     {
                         break;
                     }
                     if (!tt.Chars.IsAllUpper)
                     {
                         break;
                     }
                     Pullenti.Ner.NumberToken nu = Pullenti.Ner.Core.NumberHelper.TryParseRoman(tt);
                     if (nu != null)
                     {
                         tmp.Append(nu.GetSourceText());
                         tt = nu.EndToken;
                     }
                     else if (tt.LengthChar != 2)
                     {
                         break;
                     }
                     else
                     {
                         tmp.Append((tt as Pullenti.Ner.TextToken).Term);
                         res.EndToken = tt;
                     }
                     if (tt.Next != null && tt.Next.IsHiphen)
                     {
                         tt = tt.Next;
                     }
                     continue;
                 }
                 if (tmp.Length >= 4)
                 {
                     break;
                 }
                 tmp.Append(tt.GetSourceText());
                 res.EndToken = tt;
             }
             if (tmp.Length < 4)
             {
                 if (tmp.Length < 2)
                 {
                     return(null);
                 }
                 Pullenti.Ner.Token tt1 = res.EndToken.Next;
                 if (tt1 != null && tt1.IsComma)
                 {
                     tt1 = tt1.Next;
                 }
                 PersonIdToken next = TryParse(tt1, res);
                 if (next != null && next.Typ == Typs.Number)
                 {
                 }
                 else
                 {
                     return(null);
                 }
             }
             res.Value     = tmp.ToString();
             res.HasPrefix = true;
             return(res);
         }
         if (ty == Typs.Code)
         {
             for (Pullenti.Ner.Token tt = res.EndToken.Next; tt != null; tt = tt.Next)
             {
                 if (tt.IsCharOf(":") || tt.IsHiphen)
                 {
                     continue;
                 }
                 if (tt is Pullenti.Ner.NumberToken)
                 {
                     res.EndToken = tt;
                     continue;
                 }
                 break;
             }
         }
         if (ty == Typs.Address)
         {
             if (t.GetReferent() is Pullenti.Ner.Address.AddressReferent)
             {
                 res.Referent = t.GetReferent();
                 res.EndToken = t;
                 return(res);
             }
             for (Pullenti.Ner.Token tt = res.EndToken.Next; tt != null; tt = tt.Next)
             {
                 if (tt.IsCharOf(":") || tt.IsHiphen || tt.Morph.Class.IsPreposition)
                 {
                     continue;
                 }
                 if (tt.GetReferent() is Pullenti.Ner.Address.AddressReferent)
                 {
                     res.Referent = tt.GetReferent();
                     res.EndToken = tt;
                 }
                 break;
             }
             if (res.Referent == null)
             {
                 return(null);
             }
         }
         return(res);
     }
     else if (prev == null)
     {
         return(null);
     }
     Pullenti.Ner.Token t0 = t;
     Pullenti.Ner.Token t1 = Pullenti.Ner.Core.MiscHelper.CheckNumberPrefix(t0);
     if (t1 != null)
     {
         t = t1;
     }
     if (t is Pullenti.Ner.NumberToken)
     {
         StringBuilder tmp = new StringBuilder();
         PersonIdToken res = new PersonIdToken(t0, t)
         {
             Typ = Typs.Number
         };
         for (Pullenti.Ner.Token tt = t; tt != null; tt = tt.Next)
         {
             if (tt.IsNewlineBefore || !(tt is Pullenti.Ner.NumberToken))
             {
                 break;
             }
             tmp.Append(tt.GetSourceText());
             res.EndToken = tt;
         }
         if (tmp.Length < 4)
         {
             if (tmp.Length < 2)
             {
                 return(null);
             }
             if (prev == null || prev.Typ != Typs.Keyword)
             {
                 return(null);
             }
             PersonIdToken ne = TryParse(res.EndToken.Next, prev);
             if (ne != null && ne.Typ == Typs.Number)
             {
                 res.Typ = Typs.Seria;
             }
             else
             {
                 return(null);
             }
         }
         res.Value = tmp.ToString();
         if (t0 != t)
         {
             res.HasPrefix = true;
         }
         return(res);
     }
     if (t is Pullenti.Ner.ReferentToken)
     {
         Pullenti.Ner.Referent r = t.GetReferent();
         if (r != null)
         {
             if (r.TypeName == "DATE")
             {
                 return new PersonIdToken(t, t)
                        {
                            Typ = Typs.Date, Referent = r
                        }
             }
             ;
             if (r.TypeName == "ORGANIZATION")
             {
                 return new PersonIdToken(t, t)
                        {
                            Typ = Typs.Org, Referent = r
                        }
             }
             ;
             if (r.TypeName == "ADDRESS")
             {
                 return new PersonIdToken(t, t)
                        {
                            Typ = Typs.Address, Referent = r
                        }
             }
             ;
         }
     }
     if ((prev != null && prev.Typ == Typs.Keyword && (t is Pullenti.Ner.TextToken)) && !t.Chars.IsAllLower && t.Chars.IsLetter)
     {
         PersonIdToken rr = TryParse(t.Next, prev);
         if (rr != null && rr.Typ == Typs.Number)
         {
             return new PersonIdToken(t, t)
                    {
                        Typ = Typs.Seria, Value = (t as Pullenti.Ner.TextToken).Term
                    }
         }
         ;
     }
     if ((t != null && t.IsValue("ОТ", "ВІД") && (t.Next is Pullenti.Ner.ReferentToken)) && t.Next.GetReferent().TypeName == "DATE")
     {
         return new PersonIdToken(t, t.Next)
                {
                    Typ = Typs.Date, Referent = t.Next.GetReferent()
                }
     }
     ;
     return(null);
 }
예제 #4
0
 public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
 {
     PhoneAnalizerData ad = kit.GetAnalyzerData(this) as PhoneAnalizerData;
     for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) 
     {
         List<Pullenti.Ner.Phone.Internal.PhoneItemToken> pli = Pullenti.Ner.Phone.Internal.PhoneItemToken.TryAttachAll(t, 15);
         if (pli == null || pli.Count == 0) 
             continue;
         PhoneReferent prevPhone = null;
         int kkk = 0;
         for (Pullenti.Ner.Token tt = t.Previous; tt != null; tt = tt.Previous) 
         {
             if (tt.GetReferent() is PhoneReferent) 
             {
                 prevPhone = tt.GetReferent() as PhoneReferent;
                 break;
             }
             else if (tt is Pullenti.Ner.ReferentToken) 
             {
             }
             else if (tt.IsChar(')')) 
             {
                 Pullenti.Ner.Token ttt = tt.Previous;
                 int cou = 0;
                 for (; ttt != null; ttt = ttt.Previous) 
                 {
                     if (ttt.IsChar('(')) 
                         break;
                     else if ((++cou) > 100) 
                         break;
                 }
                 if (ttt == null || !ttt.IsChar('(')) 
                     break;
                 tt = ttt;
             }
             else if (!tt.IsCharOf(",;/\\") && !tt.IsAnd) 
             {
                 if ((++kkk) > 5) 
                     break;
                 if (tt.IsNewlineBefore || tt.IsNewlineAfter) 
                     break;
             }
         }
         int j = 0;
         bool isPhoneBefore = false;
         bool isPref = false;
         PhoneKind ki = PhoneKind.Undefined;
         while (j < pli.Count) 
         {
             if (pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Prefix) 
             {
                 if (ki == PhoneKind.Undefined) 
                     ki = pli[j].Kind;
                 isPref = true;
                 isPhoneBefore = true;
                 j++;
                 if ((j < pli.Count) && pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Delim) 
                     j++;
             }
             else if (((j + 1) < pli.Count) && pli[j + 1].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Prefix && j == 0) 
             {
                 if (ki == PhoneKind.Undefined) 
                     ki = pli[0].Kind;
                 isPref = true;
                 pli.RemoveAt(0);
             }
             else 
                 break;
         }
         if (prevPhone != null) 
             isPhoneBefore = true;
         if (pli.Count == 1 && pli[0].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Number) 
         {
             Pullenti.Ner.Token tt = t.Previous;
             if ((tt is Pullenti.Ner.TextToken) && !tt.Chars.IsLetter) 
                 tt = tt.Previous;
             if (tt is Pullenti.Ner.TextToken) 
             {
                 if (Pullenti.Ner.Uri.UriAnalyzer.m_Schemes.TryParse(tt, Pullenti.Ner.Core.TerminParseAttr.No) != null) 
                     continue;
             }
         }
         List<Pullenti.Ner.ReferentToken> rts = this.TryAttach(pli, j, isPhoneBefore, prevPhone);
         if (rts == null) 
         {
             for (j = 1; j < pli.Count; j++) 
             {
                 if (pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Prefix) 
                 {
                     pli.RemoveRange(0, j);
                     rts = this.TryAttach(pli, 1, true, prevPhone);
                     break;
                 }
             }
         }
         if (rts == null) 
             t = pli[pli.Count - 1].EndToken;
         else 
         {
             if ((ki == PhoneKind.Undefined && prevPhone != null && !isPref) && prevPhone.Kind != PhoneKind.Mobile && kkk == 0) 
                 ki = prevPhone.Kind;
             foreach (Pullenti.Ner.ReferentToken rt in rts) 
             {
                 PhoneReferent ph = rt.Referent as PhoneReferent;
                 if (ki != PhoneKind.Undefined) 
                     ph.Kind = ki;
                 else 
                 {
                     if (rt == rts[0] && (rt.WhitespacesBeforeCount < 3)) 
                     {
                         Pullenti.Ner.Token tt1 = rt.BeginToken.Previous;
                         if (tt1 != null && tt1.IsTableControlChar) 
                             tt1 = tt1.Previous;
                         if ((tt1 is Pullenti.Ner.TextToken) && ((tt1.IsNewlineBefore || ((tt1.Previous != null && tt1.Previous.IsTableControlChar))))) 
                         {
                             string term = (tt1 as Pullenti.Ner.TextToken).Term;
                             if (term == "T" || term == "Т") 
                                 rt.BeginToken = tt1;
                             else if (term == "Ф" || term == "F") 
                             {
                                 ph.Kind = (ki = PhoneKind.Fax);
                                 rt.BeginToken = tt1;
                             }
                             else if (term == "M" || term == "М") 
                             {
                                 ph.Kind = (ki = PhoneKind.Mobile);
                                 rt.BeginToken = tt1;
                             }
                         }
                     }
                     ph.Correct();
                 }
                 rt.Referent = ad.RegisterReferent(rt.Referent);
                 kit.EmbedToken(rt);
                 t = rt;
             }
         }
     }
 }
예제 #5
0
        static BookLinkToken _tryParse(Pullenti.Ner.Token t, int lev)
        {
            if (t == null || lev > 3)
            {
                return(null);
            }
            if (t.IsChar('['))
            {
                BookLinkToken re = _tryParse(t.Next, lev + 1);
                if (re != null && re.EndToken.Next != null && re.EndToken.Next.IsChar(']'))
                {
                    re.BeginToken = t;
                    re.EndToken   = re.EndToken.Next;
                    return(re);
                }
                if (re != null && re.EndToken.IsChar(']'))
                {
                    re.BeginToken = t;
                    return(re);
                }
                if (re != null)
                {
                    if (re.Typ == BookLinkTyp.Sostavitel || re.Typ == BookLinkTyp.Editors)
                    {
                        return(re);
                    }
                }
                Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 100);
                if (br != null)
                {
                    if ((br.EndToken.Previous is Pullenti.Ner.NumberToken) && (br.LengthChar < 30))
                    {
                        return new BookLinkToken(t, br.EndToken)
                               {
                                   Typ = BookLinkTyp.Number, Value = Pullenti.Ner.Core.MiscHelper.GetTextValue(br.BeginToken.Next, br.EndToken.Previous, Pullenti.Ner.Core.GetTextAttr.No)
                               }
                    }
                    ;
                }
            }
            Pullenti.Ner.Token t0 = t;
            if (t is Pullenti.Ner.ReferentToken)
            {
                if (t.GetReferent() is Pullenti.Ner.Person.PersonReferent)
                {
                    return(TryParseAuthor(t, Pullenti.Ner.Person.Internal.FioTemplateType.Undefined));
                }
                if (t.GetReferent() is Pullenti.Ner.Geo.GeoReferent)
                {
                    return new BookLinkToken(t, t)
                           {
                               Typ = BookLinkTyp.Geo, Ref = t.GetReferent()
                           }
                }
                ;
                if (t.GetReferent() is Pullenti.Ner.Date.DateReferent)
                {
                    Pullenti.Ner.Date.DateReferent dr = t.GetReferent() as Pullenti.Ner.Date.DateReferent;

                    if (dr.Slots.Count == 1 && dr.Year > 0)
                    {
                        return new BookLinkToken(t, t)
                               {
                                   Typ = BookLinkTyp.Year, Value = dr.Year.ToString()
                               }
                    }
                    ;
                    if (dr.Year > 0 && t.Previous != null && t.Previous.IsComma)
                    {
                        return new BookLinkToken(t, t)
                               {
                                   Typ = BookLinkTyp.Year, Value = dr.Year.ToString()
                               }
                    }
                    ;
                }
                if (t.GetReferent() is Pullenti.Ner.Org.OrganizationReferent)
                {
                    Pullenti.Ner.Org.OrganizationReferent org = t.GetReferent() as Pullenti.Ner.Org.OrganizationReferent;
                    if (org.Kind == Pullenti.Ner.Org.OrganizationKind.Press)
                    {
                        return new BookLinkToken(t, t)
                               {
                                   Typ = BookLinkTyp.Press, Ref = org
                               }
                    }
                    ;
                }
                if (t.GetReferent() is Pullenti.Ner.Uri.UriReferent)
                {
                    Pullenti.Ner.Uri.UriReferent uri = t.GetReferent() as Pullenti.Ner.Uri.UriReferent;
                    if ((uri.Scheme == "http" || uri.Scheme == "https" || uri.Scheme == "ftp") || uri.Scheme == null)
                    {
                        return new BookLinkToken(t, t)
                               {
                                   Typ = BookLinkTyp.Url, Ref = uri
                               }
                    }
                    ;
                }
            }
            Pullenti.Ner.Core.TerminToken tok = m_Termins.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
            if (tok != null)
            {
                BookLinkTyp typ = (BookLinkTyp)tok.Termin.Tag;
                bool        ok  = true;
                if (typ == BookLinkTyp.Type || typ == BookLinkTyp.NameTail || typ == BookLinkTyp.ElectronRes)
                {
                    if (t.Previous != null && ((t.Previous.IsCharOf(".:[") || t.Previous.IsHiphen)))
                    {
                    }
                    else
                    {
                        ok = false;
                    }
                }
                if (ok)
                {
                    return new BookLinkToken(t, tok.EndToken)
                           {
                               Typ = typ, Value = tok.Termin.CanonicText
                           }
                }
                ;
                if (typ == BookLinkTyp.ElectronRes)
                {
                    for (Pullenti.Ner.Token tt = tok.EndToken.Next; tt != null; tt = tt.Next)
                    {
                        if ((tt is Pullenti.Ner.TextToken) && !tt.Chars.IsLetter)
                        {
                            continue;
                        }
                        if (tt.GetReferent() is Pullenti.Ner.Uri.UriReferent)
                        {
                            return new BookLinkToken(t, tt)
                                   {
                                       Typ = BookLinkTyp.ElectronRes, Ref = tt.GetReferent()
                                   }
                        }
                        ;
                        break;
                    }
                }
            }
            if (t.IsChar('/'))
            {
                BookLinkToken res = new BookLinkToken(t, t)
                {
                    Typ = BookLinkTyp.Delimeter, Value = "/"
                };
                if (t.Next != null && t.Next.IsChar('/'))
                {
                    res.EndToken = t.Next;

                    res.Value = "//";
                }
                if (!t.IsWhitespaceBefore && !t.IsWhitespaceAfter)
                {
                    int  coo = 3;
                    bool no  = true;
                    for (Pullenti.Ner.Token tt = t.Next; tt != null && coo > 0; tt = tt.Next, coo--)
                    {
                        BookLinkToken vvv = TryParse(tt, lev + 1);
                        if (vvv != null && vvv.Typ != BookLinkTyp.Number)
                        {
                            no = false;

                            break;
                        }
                    }
                    if (no)
                    {
                        return(null);
                    }
                }
                return(res);
            }
            if ((t is Pullenti.Ner.NumberToken) && (t as Pullenti.Ner.NumberToken).IntValue != null && (t as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Digit)
            {
                BookLinkToken res = new BookLinkToken(t, t)
                {
                    Typ = BookLinkTyp.Number, Value = (t as Pullenti.Ner.NumberToken).Value.ToString()
                };
                int val = (t as Pullenti.Ner.NumberToken).IntValue.Value;
                if (val >= 1930 && (val < 2030))
                {
                    res.Typ = BookLinkTyp.Year;
                }
                if (t.Next != null && t.Next.IsChar('.'))
                {
                    res.EndToken = t.Next;
                }
                else if ((t.Next != null && t.Next.LengthChar == 1 && !t.Next.Chars.IsLetter) && t.Next.IsWhitespaceAfter)
                {
                    res.EndToken = t.Next;
                }
                else if (t.Next is Pullenti.Ner.TextToken)
                {
                    string term = (t.Next as Pullenti.Ner.TextToken).Term;
                    if (((term == "СТР" || term == "C" || term == "С") || term == "P" || term == "S") || term == "PAGES")
                    {
                        res.EndToken = t.Next;
                        res.Typ      = BookLinkTyp.Pages;
                        res.Value    = (t as Pullenti.Ner.NumberToken).Value.ToString();
                    }
                }
                return(res);
            }
            if (t is Pullenti.Ner.TextToken)
            {
                string term = (t as Pullenti.Ner.TextToken).Term;
                if (((((((term == "СТР" || term == "C" || term == "С") || term == "ТОМ" || term == "T") || term == "Т" || term == "P") || term == "PP" || term == "V") || term == "VOL" || term == "S") || term == "СТОР" || t.IsValue("PAGE", null)) || t.IsValue("СТРАНИЦА", "СТОРІНКА"))
                {
                    Pullenti.Ner.Token tt = t.Next;
                    while (tt != null)
                    {
                        if (tt.IsCharOf(".:~"))
                        {
                            tt = tt.Next;
                        }
                        else
                        {
                            break;
                        }
                    }
                    if (tt is Pullenti.Ner.NumberToken)
                    {
                        BookLinkToken res = new BookLinkToken(t, tt)
                        {
                            Typ = BookLinkTyp.PageRange
                        };
                        Pullenti.Ner.Token tt0 = tt;
                        Pullenti.Ner.Token tt1 = tt;
                        for (tt = tt.Next; tt != null; tt = tt.Next)
                        {
                            if (tt.IsCharOf(",") || tt.IsHiphen)
                            {
                                if (tt.Next is Pullenti.Ner.NumberToken)
                                {
                                    tt           = tt.Next;
                                    res.EndToken = tt;
                                    tt1          = tt;
                                    continue;
                                }
                            }
                            break;
                        }
                        res.Value = Pullenti.Ner.Core.MiscHelper.GetTextValue(tt0, tt1, Pullenti.Ner.Core.GetTextAttr.No);
                        return(res);
                    }
                }
                if ((term == "M" || term == "М" || term == "СПБ") || term == "K" || term == "К")
                {
                    if (t.Next != null && t.Next.IsCharOf(":;"))
                    {
                        BookLinkToken re = new BookLinkToken(t, t.Next)
                        {
                            Typ = BookLinkTyp.Geo
                        };
                        return(re);
                    }
                    if (t.Next != null && t.Next.IsCharOf("."))
                    {
                        BookLinkToken res = new BookLinkToken(t, t.Next)
                        {
                            Typ = BookLinkTyp.Geo
                        };
                        if (t.Next.Next != null && t.Next.Next.IsCharOf(":;"))
                        {
                            res.EndToken = t.Next.Next;
                        }
                        else if (t.Next.Next != null && (t.Next.Next is Pullenti.Ner.NumberToken))
                        {
                        }
                        else if (t.Next.Next != null && t.Next.Next.IsComma && (t.Next.Next.Next is Pullenti.Ner.NumberToken))
                        {
                        }
                        else
                        {
                            return(null);
                        }
                        return(res);
                    }
                }
                if (term == "ПЕР" || term == "ПЕРЕВ" || term == "ПЕРЕВОД")
                {
                    Pullenti.Ner.Token tt = t;
                    if (tt.Next != null && tt.Next.IsChar('.'))
                    {
                        tt = tt.Next;
                    }
                    if (tt.Next != null && ((tt.Next.IsValue("C", null) || tt.Next.IsValue("С", null))))
                    {
                        tt = tt.Next;
                        if (tt.Next == null || tt.WhitespacesAfterCount > 2)
                        {
                            return(null);
                        }
                        BookLinkToken re = new BookLinkToken(t, tt.Next)
                        {
                            Typ = BookLinkTyp.Translate
                        };
                        return(re);
                    }
                }
                if (term == "ТАМ" || term == "ТАМЖЕ")
                {
                    BookLinkToken res = new BookLinkToken(t, t)
                    {
                        Typ = BookLinkTyp.Tamze
                    };
                    if (t.Next != null && t.Next.IsValue("ЖЕ", null))
                    {
                        res.EndToken = t.Next;
                    }
                    return(res);
                }
                if (((term == "СМ" || term == "CM" || term == "НАПР") || term == "НАПРИМЕР" || term == "SEE") || term == "ПОДРОБНЕЕ" || term == "ПОДРОБНО")
                {
                    BookLinkToken res = new BookLinkToken(t, t)
                    {
                        Typ = BookLinkTyp.See
                    };
                    for (t = t.Next; t != null; t = t.Next)
                    {
                        if (t.IsCharOf(".:") || t.IsValue("ALSO", null))
                        {
                            res.EndToken = t;
                            continue;
                        }
                        if (t.IsValue("В", null) || t.IsValue("IN", null))
                        {
                            res.EndToken = t;
                            continue;
                        }
                        BookLinkToken vvv = _tryParse(t, lev + 1);
                        if (vvv != null && vvv.Typ == BookLinkTyp.See)
                        {
                            res.EndToken = vvv.EndToken;
                            break;
                        }
                        break;
                    }
                    return(res);
                }
                if (term == "БОЛЕЕ")
                {
                    BookLinkToken vvv = _tryParse(t.Next, lev + 1);
                    if (vvv != null && vvv.Typ == BookLinkTyp.See)
                    {
                        vvv.BeginToken = t;
                        return(vvv);
                    }
                }
                Pullenti.Ner.Token no = Pullenti.Ner.Core.MiscHelper.CheckNumberPrefix(t);
                if (no is Pullenti.Ner.NumberToken)
                {
                    return new BookLinkToken(t, no)
                           {
                               Typ = BookLinkTyp.N
                           }
                }
                ;
                if (((term == "B" || term == "В")) && (t.Next is Pullenti.Ner.NumberToken) && (t.Next.Next is Pullenti.Ner.TextToken))
                {
                    string term2 = (t.Next.Next as Pullenti.Ner.TextToken).Term;

                    if (((term2 == "Т" || term2 == "T" || term2.StartsWith("ТОМ")) || term2 == "TT" || term2 == "ТТ") || term2 == "КН" || term2.StartsWith("КНИГ"))
                    {
                        return new BookLinkToken(t, t.Next.Next)
                               {
                                   Typ = BookLinkTyp.Volume
                               }
                    }
                    ;
                }
            }
            if (t.IsChar('('))
            {
                if (((t.Next is Pullenti.Ner.NumberToken) && (t.Next as Pullenti.Ner.NumberToken).IntValue != null && t.Next.Next != null) && t.Next.Next.IsChar(')'))
                {
                    int num = (t.Next as Pullenti.Ner.NumberToken).IntValue.Value;
                    if (num > 1900 && num <= 2040)
                    {
                        if (num <= DateTime.Now.Year)
                        {
                            return new BookLinkToken(t, t.Next.Next)
                                   {
                                       Typ = BookLinkTyp.Year, Value = num.ToString()
                                   }
                        }
                        ;
                    }
                }
                if (((t.Next is Pullenti.Ner.ReferentToken) && (t.Next.GetReferent() is Pullenti.Ner.Date.DateReferent) && t.Next.Next != null) && t.Next.Next.IsChar(')'))
                {
                    int num = (t.Next.GetReferent() as Pullenti.Ner.Date.DateReferent).Year;
                    if (num > 0)
                    {
                        return new BookLinkToken(t, t.Next.Next)
                               {
                                   Typ = BookLinkTyp.Year, Value = num.ToString()
                               }
                    }
                    ;
                }
            }
            return(null);
        }
예제 #6
0
        public static List <Line> Parse(Pullenti.Ner.Token t0, int maxLines, int maxChars, int maxEndChar)
        {
            List <Line> res        = new List <Line>();
            int         totalChars = 0;

            for (Pullenti.Ner.Token t = t0; t != null; t = t.Next)
            {
                if (maxEndChar > 0)
                {
                    if (t.BeginChar > maxEndChar)
                    {
                        break;
                    }
                }
                Pullenti.Ner.Token t1;
                for (t1 = t; t1 != null && t1.Next != null; t1 = t1.Next)
                {
                    if (t1.IsNewlineAfter)
                    {
                        if (t1.Next == null || Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t1.Next))
                        {
                            break;
                        }
                    }
                    if (t1 == t && t.IsNewlineBefore && (t.GetReferent() is Pullenti.Ner.Person.PersonReferent))
                    {
                        if (t1.Next == null)
                        {
                            continue;
                        }
                        if ((t1.Next is Pullenti.Ner.TextToken) && t1.Next.Chars.IsLetter && !t1.Next.Chars.IsAllLower)
                        {
                            break;
                        }
                    }
                }
                if (t1 == null)
                {
                    t1 = t;
                }
                TitleItemToken tit = TitleItemToken.TryAttach(t);
                if (tit != null)
                {
                    if (tit.Typ == TitleItemToken.Types.Keywords)
                    {
                        break;
                    }
                }
                Pullenti.Ner.Core.Internal.BlockTitleToken bl = Pullenti.Ner.Core.Internal.BlockTitleToken.TryAttach(t, false, null);
                if (bl != null)
                {
                    if (bl.Typ != Pullenti.Ner.Core.Internal.BlkTyps.Undefined)
                    {
                        break;
                    }
                }
                Line l = new Line(t, t1);
                res.Add(l);
                totalChars += l.CharsCount;
                if (res.Count >= maxLines || totalChars >= maxChars)
                {
                    break;
                }
                t = t1;
            }
            return(res);
        }
예제 #7
0
        static Pullenti.Ner.ReferentToken _tryNameExist(List <CityItemToken> li, out Pullenti.Ner.Core.IntOntologyItem oi, bool always)
        {
            oi = null;
            if (li == null || li[0].Typ != CityItemToken.ItemType.City)
            {
                return(null);
            }
            oi = li[0].OntoItem;
            Pullenti.Ner.TextToken tt = li[0].BeginToken as Pullenti.Ner.TextToken;
            if (tt == null)
            {
                return(null);
            }
            bool   ok  = false;
            string nam = (oi == null ? li[0].Value : oi.CanonicText);

            if (nam == null)
            {
                return(null);
            }
            if (nam == "РИМ")
            {
                if (tt.Term == "РИМ")
                {
                    if ((tt.Next is Pullenti.Ner.TextToken) && tt.Next.GetMorphClassInDictionary().IsProperSecname)
                    {
                    }
                    else
                    {
                        ok = true;
                    }
                }
                else if (tt.Previous != null && tt.Previous.IsValue("В", null) && tt.Term == "РИМЕ")
                {
                    ok = true;
                }
            }
            else if (oi != null && oi.Referent != null && oi.Owner.IsExtOntology)
            {
                ok = true;
            }
            else if (nam.EndsWith("ГРАД") || nam.EndsWith("СК"))
            {
                ok = true;
            }
            else if (nam.EndsWith("TOWN") || nam.StartsWith("SAN"))
            {
                ok = true;
            }
            else if (li[0].Chars.IsLatinLetter && li[0].BeginToken.Previous != null && ((li[0].BeginToken.Previous.IsValue("IN", null) || li[0].BeginToken.Previous.IsValue("FROM", null))))
            {
                ok = true;
            }
            else
            {
                for (Pullenti.Ner.Token tt2 = li[0].EndToken.Next; tt2 != null; tt2 = tt2.Next)
                {
                    if (tt2.IsNewlineBefore)
                    {
                        break;
                    }
                    if ((tt2.IsCharOf(",(") || tt2.Morph.Class.IsPreposition || tt2.Morph.Class.IsConjunction) || tt2.Morph.Class.IsMisc)
                    {
                        continue;
                    }
                    if ((tt2.GetReferent() is Pullenti.Ner.Geo.GeoReferent) && tt2.Chars.IsCyrillicLetter == li[0].Chars.IsCyrillicLetter)
                    {
                        ok = true;
                    }
                    break;
                }
                if (!ok)
                {
                    for (Pullenti.Ner.Token tt2 = li[0].BeginToken.Previous; tt2 != null; tt2 = tt2.Previous)
                    {
                        if (tt2.IsNewlineAfter)
                        {
                            break;
                        }
                        if ((tt2.IsCharOf(",)") || tt2.Morph.Class.IsPreposition || tt2.Morph.Class.IsConjunction) || tt2.Morph.Class.IsMisc)
                        {
                            continue;
                        }
                        if ((tt2.GetReferent() is Pullenti.Ner.Geo.GeoReferent) && tt2.Chars.IsCyrillicLetter == li[0].Chars.IsCyrillicLetter)
                        {
                            ok = true;
                        }
                        if (ok)
                        {
                            List <Pullenti.Ner.Address.Internal.StreetItemToken> sits = Pullenti.Ner.Address.Internal.StreetItemToken.TryParseList(li[0].BeginToken, null, 10);
                            if (sits != null && sits.Count > 1)
                            {
                                Pullenti.Ner.Address.Internal.AddressItemToken ss = Pullenti.Ner.Address.Internal.StreetDefineHelper.TryParseStreet(sits, false, false);
                                if (ss != null)
                                {
                                    sits.RemoveAt(0);
                                    if (Pullenti.Ner.Address.Internal.StreetDefineHelper.TryParseStreet(sits, false, false) == null)
                                    {
                                        ok = false;
                                    }
                                }
                            }
                        }
                        if (ok)
                        {
                            if (li.Count > 1 && li[1].Typ == CityItemToken.ItemType.ProperName && (li[1].WhitespacesBeforeCount < 3))
                            {
                                ok = false;
                            }
                            else
                            {
                                Pullenti.Morph.MorphClass mc = li[0].BeginToken.GetMorphClassInDictionary();
                                if (mc.IsProperName || mc.IsProperSurname || mc.IsAdjective)
                                {
                                    ok = false;
                                }
                                else
                                {
                                    Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(li[0].BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                                    if (npt != null && npt.EndChar > li[0].EndChar)
                                    {
                                        ok = false;
                                    }
                                }
                            }
                        }
                        if (Pullenti.Ner.Address.Internal.AddressItemToken.TryAttachOrg(li[0].BeginToken) != null)
                        {
                            ok = false;
                            break;
                        }
                        break;
                    }
                }
            }
            if (always)
            {
                if (li[0].WhitespacesBeforeCount > 3 && li[0].Doubtful && li[0].BeginToken.GetMorphClassInDictionary().IsProperSurname)
                {
                    Pullenti.Ner.ReferentToken pp = li[0].Kit.ProcessReferent("PERSON", li[0].BeginToken);
                    if (pp != null)
                    {
                        always = false;
                    }
                }
            }
            if (li[0].BeginToken.Chars.IsLatinLetter && li[0].BeginToken == li[0].EndToken)
            {
                Pullenti.Ner.Token tt1 = li[0].EndToken.Next;
                if (tt1 != null && tt1.IsChar(','))
                {
                    tt1 = tt1.Next;
                }
                if (((tt1 is Pullenti.Ner.TextToken) && tt1.Chars.IsLatinLetter && (tt1.LengthChar < 3)) && !tt1.Chars.IsAllLower)
                {
                    ok = false;
                }
            }
            if (!ok && !always)
            {
                return(null);
            }
            Pullenti.Ner.Geo.GeoReferent city = null;
            if (oi != null && (oi.Referent is Pullenti.Ner.Geo.GeoReferent) && !oi.Owner.IsExtOntology)
            {
                city = oi.Referent.Clone() as Pullenti.Ner.Geo.GeoReferent;
                city.Occurrence.Clear();
            }
            else
            {
                city = new Pullenti.Ner.Geo.GeoReferent();
                city.AddName(nam);
                if (oi != null && (oi.Referent is Pullenti.Ner.Geo.GeoReferent))
                {
                    city.MergeSlots2(oi.Referent as Pullenti.Ner.Geo.GeoReferent, li[0].Kit.BaseLanguage);
                }
                if (!city.IsCity)
                {
                    city.AddTypCity(li[0].Kit.BaseLanguage);
                }
            }
            return(new Pullenti.Ner.ReferentToken(city, li[0].BeginToken, li[0].EndToken)
            {
                Morph = li[0].Morph
            });
        }
예제 #8
0
        Pullenti.Ner.ReferentToken TryAttach(Pullenti.Ner.Token t, bool keyWord)
        {
            if (t == null)
            {
                return(null);
            }
            Pullenti.Ner.Token t0       = t;
            Pullenti.Ner.Token t1       = t;
            List <string>      urisKeys = null;
            List <Pullenti.Ner.Uri.UriReferent> uris = null;

            Pullenti.Ner.Referent org    = null;
            Pullenti.Ner.Referent corOrg = null;
            bool orgIsBank = false;
            int  empty     = 0;

            Pullenti.Ner.Uri.UriReferent lastUri = null;
            for (; t != null; t = t.Next)
            {
                if (t.IsTableControlChar && t != t0)
                {
                    break;
                }
                if (t.IsComma || t.Morph.Class.IsPreposition || t.IsCharOf("/\\"))
                {
                    continue;
                }
                bool bankKeyword = false;
                if (t.IsValue("ПОЛНЫЙ", null) && t.Next != null && ((t.Next.IsValue("НАИМЕНОВАНИЕ", null) || t.Next.IsValue("НАЗВАНИЕ", null))))
                {
                    t = t.Next.Next;
                    if (t == null)
                    {
                        break;
                    }
                }
                if (t.IsValue("БАНК", null))
                {
                    if ((t is Pullenti.Ner.ReferentToken) && t.GetReferent().TypeName == "ORGANIZATION")
                    {
                        bankKeyword = true;
                    }
                    Pullenti.Ner.Token tt = t.Next;
                    Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                    if (npt != null)
                    {
                        tt = npt.EndToken.Next;
                    }
                    if (tt != null && tt.IsChar(':'))
                    {
                        tt = tt.Next;
                    }
                    if (tt != null)
                    {
                        if (!bankKeyword)
                        {
                            t           = tt;
                            bankKeyword = true;
                        }
                        else if (tt.GetReferent() != null && tt.GetReferent().TypeName == "ORGANIZATION")
                        {
                            t = tt;
                        }
                    }
                }
                Pullenti.Ner.Referent r = t.GetReferent();
                if (r != null && r.TypeName == "ORGANIZATION")
                {
                    bool isBank = false;
                    int  kk     = 0;
                    for (Pullenti.Ner.Referent rr = r; rr != null && (kk < 4); rr = rr.ParentReferent, kk++)
                    {
                        isBank = string.Compare(rr.GetStringValue("KIND") ?? "", "Bank", true) == 0;
                        if (isBank)
                        {
                            break;
                        }
                    }
                    if (!isBank && bankKeyword)
                    {
                        isBank = true;
                    }
                    if (!isBank && uris != null && urisKeys.Contains("ИНН"))
                    {
                        return(null);
                    }
                    if ((lastUri != null && lastUri.Scheme == "К/С" && t.Previous != null) && t.Previous.IsValue("В", null))
                    {
                        corOrg = r;
                        t1     = t;
                    }
                    else if (org == null || ((!orgIsBank && isBank)))
                    {
                        org       = r;
                        t1        = t;
                        orgIsBank = isBank;
                        if (isBank)
                        {
                            continue;
                        }
                    }
                    if (uris == null && !keyWord)
                    {
                        return(null);
                    }
                    continue;
                }
                if (r is Pullenti.Ner.Uri.UriReferent)
                {
                    Pullenti.Ner.Uri.UriReferent u = r as Pullenti.Ner.Uri.UriReferent;
                    if (uris == null)
                    {
                        if (!_isBankReq(u.Scheme))
                        {
                            return(null);
                        }
                        if (u.Scheme == "ИНН" && t.IsNewlineAfter)
                        {
                            return(null);
                        }
                        uris     = new List <Pullenti.Ner.Uri.UriReferent>();
                        urisKeys = new List <string>();
                    }
                    else
                    {
                        if (!_isBankReq(u.Scheme))
                        {
                            break;
                        }
                        if (urisKeys.Contains(u.Scheme))
                        {
                            break;
                        }
                        if (u.Scheme == "ИНН")
                        {
                            if (empty > 0)
                            {
                                break;
                            }
                        }
                    }
                    urisKeys.Add(u.Scheme);
                    uris.Add(u);
                    lastUri = u;
                    t1      = t;
                    empty   = 0;
                    continue;
                }
                else if (uris == null && !keyWord && !orgIsBank)
                {
                    return(null);
                }
                if (r != null && ((r.TypeName == "GEO" || r.TypeName == "ADDRESS")))
                {
                    empty++;
                    continue;
                }
                if (t is Pullenti.Ner.TextToken)
                {
                    if (t.IsValue("ПОЛНЫЙ", null) || t.IsValue("НАИМЕНОВАНИЕ", null) || t.IsValue("НАЗВАНИЕ", null))
                    {
                    }
                    else if (t.Chars.IsLetter)
                    {
                        Pullenti.Ner.Core.TerminToken tok = m_Ontology.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
                        if (tok != null)
                        {
                            t     = tok.EndToken;
                            empty = 0;
                        }
                        else
                        {
                            empty++;
                            if (t.IsNewlineBefore)
                            {
                                Pullenti.Ner.Core.NounPhraseToken nnn = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                                if (nnn != null && nnn.EndToken.Next != null && nnn.EndToken.Next.IsChar(':'))
                                {
                                    break;
                                }
                            }
                        }
                        if (uris == null)
                        {
                            break;
                        }
                    }
                }
                if (empty > 2)
                {
                    break;
                }
                if (empty > 0 && t.IsChar(':') && t.IsNewlineAfter)
                {
                    break;
                }
                if (((t is Pullenti.Ner.NumberToken) && t.IsNewlineBefore && t.Next != null) && !t.Next.Chars.IsLetter)
                {
                    break;
                }
            }
            if (uris == null)
            {
                return(null);
            }
            if (!urisKeys.Contains("Р/С") && !urisKeys.Contains("Л/С"))
            {
                return(null);
            }
            bool ok = false;

            if ((uris.Count < 2) && org == null)
            {
                return(null);
            }
            BankDataReferent bdr = new BankDataReferent();

            foreach (Pullenti.Ner.Uri.UriReferent u in uris)
            {
                bdr.AddSlot(BankDataReferent.ATTR_ITEM, u, false, 0);
            }
            if (org != null)
            {
                bdr.AddSlot(BankDataReferent.ATTR_BANK, org, false, 0);
            }
            if (corOrg != null)
            {
                bdr.AddSlot(BankDataReferent.ATTR_CORBANK, corOrg, false, 0);
            }
            Pullenti.Ner.Referent org0 = (t0.Previous == null ? null : t0.Previous.GetReferent());
            if (org0 != null && org0.TypeName == "ORGANIZATION")
            {
                foreach (Pullenti.Ner.Slot s in org0.Slots)
                {
                    if (s.Value is Pullenti.Ner.Uri.UriReferent)
                    {
                        Pullenti.Ner.Uri.UriReferent u = s.Value as Pullenti.Ner.Uri.UriReferent;
                        if (_isBankReq(u.Scheme))
                        {
                            if (!urisKeys.Contains(u.Scheme))
                            {
                                bdr.AddSlot(BankDataReferent.ATTR_ITEM, u, false, 0);
                            }
                        }
                    }
                }
            }
            return(new Pullenti.Ner.ReferentToken(bdr, t0, t1));
        }
예제 #9
0
        public static Pullenti.Ner.ReferentToken TryAttachTerritory(List <TerrItemToken> li, Pullenti.Ner.Core.AnalyzerData ad, bool attachAlways = false, List <CityItemToken> cits = null, List <Pullenti.Ner.Geo.GeoReferent> exists = null)
        {
            if (li == null || li.Count == 0)
            {
                return(null);
            }
            TerrItemToken        exObj   = null;
            TerrItemToken        newName = null;
            List <TerrItemToken> adjList = new List <TerrItemToken>();
            TerrItemToken        noun    = null;
            TerrItemToken        addNoun = null;

            Pullenti.Ner.ReferentToken rt = _tryAttachMoscowAO(li, ad);
            if (rt != null)
            {
                return(rt);
            }
            if (li[0].TerminItem != null && li[0].TerminItem.CanonicText == "ТЕРРИТОРИЯ")
            {
                Pullenti.Ner.ReferentToken res2 = _tryAttachPureTerr(li, ad);
                return(res2);
            }
            if (li.Count == 2)
            {
                if (li[0].Rzd != null && li[1].RzdDir != null)
                {
                    Pullenti.Ner.Geo.GeoReferent rzd = new Pullenti.Ner.Geo.GeoReferent();
                    rzd.AddName(li[1].RzdDir);
                    rzd.AddTypTer(li[0].Kit.BaseLanguage);
                    rzd.AddSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_REF, li[0].Rzd.Referent, false, 0);
                    rzd.AddExtReferent(li[0].Rzd);
                    return(new Pullenti.Ner.ReferentToken(rzd, li[0].BeginToken, li[1].EndToken));
                }
                if (li[1].Rzd != null && li[0].RzdDir != null)
                {
                    Pullenti.Ner.Geo.GeoReferent rzd = new Pullenti.Ner.Geo.GeoReferent();
                    rzd.AddName(li[0].RzdDir);
                    rzd.AddTypTer(li[0].Kit.BaseLanguage);
                    rzd.AddSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_REF, li[1].Rzd.Referent, false, 0);
                    rzd.AddExtReferent(li[1].Rzd);
                    return(new Pullenti.Ner.ReferentToken(rzd, li[0].BeginToken, li[1].EndToken));
                }
            }
            bool canBeCityBefore = false;
            bool adjTerrBefore   = false;

            if (cits != null)
            {
                if (cits[0].Typ == CityItemToken.ItemType.City)
                {
                    canBeCityBefore = true;
                }
                else if (cits[0].Typ == CityItemToken.ItemType.Noun && cits.Count > 1)
                {
                    canBeCityBefore = true;
                }
            }
            int k;

            for (k = 0; k < li.Count; k++)
            {
                if (li[k].OntoItem != null)
                {
                    if (exObj != null || newName != null)
                    {
                        break;
                    }
                    if (noun != null)
                    {
                        if (k == 1)
                        {
                            if (noun.TerminItem.CanonicText == "РАЙОН" || noun.TerminItem.CanonicText == "ОБЛАСТЬ" || noun.TerminItem.CanonicText == "СОЮЗ")
                            {
                                if (li[k].OntoItem.Referent is Pullenti.Ner.Geo.GeoReferent)
                                {
                                    if ((li[k].OntoItem.Referent as Pullenti.Ner.Geo.GeoReferent).IsState)
                                    {
                                        break;
                                    }
                                }
                                bool ok = false;
                                Pullenti.Ner.Token tt = li[k].EndToken.Next;
                                if (tt == null)
                                {
                                    ok = true;
                                }
                                else if (tt.IsCharOf(",."))
                                {
                                    ok = true;
                                }
                                if (!ok)
                                {
                                    ok = MiscLocationHelper.CheckGeoObjectBefore(li[0].BeginToken);
                                }
                                if (!ok)
                                {
                                    Pullenti.Ner.Address.Internal.AddressItemToken adr = Pullenti.Ner.Address.Internal.AddressItemToken.TryParse(tt, null, false, false, null);
                                    if (adr != null)
                                    {
                                        if (adr.Typ == Pullenti.Ner.Address.Internal.AddressItemToken.ItemType.Street)
                                        {
                                            ok = true;
                                        }
                                    }
                                }
                                if (!ok)
                                {
                                    break;
                                }
                            }
                            if (li[k].OntoItem != null)
                            {
                                if (noun.BeginToken.IsValue("МО", null) || noun.BeginToken.IsValue("ЛО", null))
                                {
                                    return(null);
                                }
                            }
                        }
                    }
                    exObj = li[k];
                }
                else if (li[k].TerminItem != null)
                {
                    if (noun != null)
                    {
                        break;
                    }
                    if (li[k].TerminItem.IsAlwaysPrefix && k > 0)
                    {
                        break;
                    }
                    if (k > 0 && li[k].IsDoubt)
                    {
                        if (li[k].BeginToken == li[k].EndToken && li[k].BeginToken.IsValue("ЗАО", null))
                        {
                            break;
                        }
                    }
                    if (li[k].TerminItem.IsAdjective || li[k].IsGeoInDictionary)
                    {
                        adjList.Add(li[k]);
                    }
                    else
                    {
                        if (exObj != null)
                        {
                            Pullenti.Ner.Geo.GeoReferent geo = exObj.OntoItem.Referent as Pullenti.Ner.Geo.GeoReferent;
                            if (geo == null)
                            {
                                break;
                            }
                            if (exObj.IsAdjective && ((li[k].TerminItem.CanonicText == "СОЮЗ" || li[k].TerminItem.CanonicText == "ФЕДЕРАЦИЯ")))
                            {
                                string str = exObj.OntoItem.ToString();
                                if (!str.Contains(li[k].TerminItem.CanonicText))
                                {
                                    return(null);
                                }
                            }
                            if (li[k].TerminItem.CanonicText == "РАЙОН" || li[k].TerminItem.CanonicText == "ОКРУГ" || li[k].TerminItem.CanonicText == "КРАЙ")
                            {
                                StringBuilder tmp = new StringBuilder();
                                foreach (Pullenti.Ner.Slot s in geo.Slots)
                                {
                                    if (s.TypeName == Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE)
                                    {
                                        tmp.AppendFormat("{0};", s.Value);
                                    }
                                }
                                if (!tmp.ToString().ToUpper().Contains(li[k].TerminItem.CanonicText))
                                {
                                    if (k != 1 || newName != null)
                                    {
                                        break;
                                    }
                                    newName             = li[0];
                                    newName.IsAdjective = true;
                                    newName.OntoItem    = null;
                                    exObj = null;
                                }
                            }
                        }
                        noun = li[k];
                        if (k == 0)
                        {
                            TerrItemToken tt = TerrItemToken.TryParse(li[k].BeginToken.Previous, null, true, false, null);
                            if (tt != null && tt.Morph.Class.IsAdjective)
                            {
                                adjTerrBefore = true;
                            }
                        }
                    }
                }
                else
                {
                    if (exObj != null)
                    {
                        break;
                    }
                    if (newName != null)
                    {
                        break;
                    }
                    newName = li[k];
                }
            }
            string name     = null;
            string altName  = null;
            string fullName = null;

            Pullenti.Ner.MorphCollection morph = null;
            if (exObj != null)
            {
                if (exObj.IsAdjective && !exObj.Morph.Language.IsEn && noun == null)
                {
                    if (attachAlways && exObj.EndToken.Next != null)
                    {
                        Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(exObj.BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                        if (exObj.EndToken.Next.IsCommaAnd)
                        {
                        }
                        else if (npt == null)
                        {
                        }
                        else
                        {
                            Pullenti.Ner.Address.Internal.StreetItemToken str = Pullenti.Ner.Address.Internal.StreetItemToken.TryParse(exObj.EndToken.Next, null, false, null, false);
                            if (str != null)
                            {
                                if (str.Typ == Pullenti.Ner.Address.Internal.StreetItemType.Noun && str.EndToken == npt.EndToken)
                                {
                                    return(null);
                                }
                            }
                        }
                    }
                    else
                    {
                        CityItemToken cit = CityItemToken.TryParse(exObj.EndToken.Next, null, false, null);
                        if (cit != null && ((cit.Typ == CityItemToken.ItemType.Noun || cit.Typ == CityItemToken.ItemType.City)))
                        {
                            Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(exObj.BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                            if (npt != null && npt.EndToken == cit.EndToken)
                            {
                            }
                            else
                            {
                                return(null);
                            }
                        }
                        else if (exObj.BeginToken.IsValue("ПОДНЕБЕСНЫЙ", null))
                        {
                        }
                        else
                        {
                            return(null);
                        }
                    }
                }
                if (noun == null && exObj.CanBeCity)
                {
                    CityItemToken cit0 = CityItemToken.TryParseBack(exObj.BeginToken.Previous);
                    if (cit0 != null && cit0.Typ != CityItemToken.ItemType.ProperName)
                    {
                        return(null);
                    }
                }
                if (exObj.IsDoubt && noun == null)
                {
                    bool ok2 = false;
                    if (_canBeGeoAfter(exObj.EndToken.Next))
                    {
                        ok2 = true;
                    }
                    else if (!exObj.CanBeSurname && !exObj.CanBeCity)
                    {
                        if ((exObj.EndToken.Next != null && exObj.EndToken.Next.IsChar(')') && exObj.BeginToken.Previous != null) && exObj.BeginToken.Previous.IsChar('('))
                        {
                            ok2 = true;
                        }
                        else if (exObj.Chars.IsLatinLetter && exObj.BeginToken.Previous != null)
                        {
                            if (exObj.BeginToken.Previous.IsValue("IN", null))
                            {
                                ok2 = true;
                            }
                            else if (exObj.BeginToken.Previous.IsValue("THE", null) && exObj.BeginToken.Previous.Previous != null && exObj.BeginToken.Previous.Previous.IsValue("IN", null))
                            {
                                ok2 = true;
                            }
                        }
                    }
                    if (!ok2)
                    {
                        CityItemToken cit0 = CityItemToken.TryParseBack(exObj.BeginToken.Previous);
                        if (cit0 != null && cit0.Typ != CityItemToken.ItemType.ProperName)
                        {
                        }
                        else if (MiscLocationHelper.CheckGeoObjectBefore(exObj.BeginToken.Previous))
                        {
                        }
                        else
                        {
                            return(null);
                        }
                    }
                }
                name  = exObj.OntoItem.CanonicText;
                morph = exObj.Morph;
            }
            else if (newName != null)
            {
                if (noun == null)
                {
                    return(null);
                }
                for (int j = 1; j < k; j++)
                {
                    if (li[j].IsNewlineBefore && !li[0].IsNewlineBefore)
                    {
                        if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(li[j].BeginToken, false, false))
                        {
                        }
                        else
                        {
                            return(null);
                        }
                    }
                }
                morph = noun.Morph;
                if (newName.IsAdjective)
                {
                    if (noun.TerminItem.Acronym == "АО")
                    {
                        if (noun.BeginToken != noun.EndToken)
                        {
                            return(null);
                        }
                        if (newName.Morph.Gender != Pullenti.Morph.MorphGender.Feminie)
                        {
                            return(null);
                        }
                    }
                    Pullenti.Ner.Geo.GeoReferent geoBefore = null;
                    Pullenti.Ner.Token           tt0       = li[0].BeginToken.Previous;
                    if (tt0 != null && tt0.IsCommaAnd)
                    {
                        tt0 = tt0.Previous;
                    }
                    if (!li[0].IsNewlineBefore && tt0 != null)
                    {
                        geoBefore = tt0.GetReferent() as Pullenti.Ner.Geo.GeoReferent;
                    }
                    if (li.IndexOf(noun) < li.IndexOf(newName))
                    {
                        if (noun.TerminItem.IsState)
                        {
                            return(null);
                        }
                        if (newName.CanBeSurname && geoBefore == null)
                        {
                            if (((noun.Morph.Case & newName.Morph.Case)).IsUndefined)
                            {
                                return(null);
                            }
                        }
                        if (Pullenti.Ner.Core.MiscHelper.IsExistsInDictionary(newName.BeginToken, newName.EndToken, Pullenti.Morph.MorphClass.Adjective | Pullenti.Morph.MorphClass.Pronoun | Pullenti.Morph.MorphClass.Verb))
                        {
                            if (noun.BeginToken != newName.BeginToken)
                            {
                                if (geoBefore == null)
                                {
                                    if (li.Count == 2 && _canBeGeoAfter(li[1].EndToken.Next))
                                    {
                                    }
                                    else if (li.Count == 3 && li[2].TerminItem != null && _canBeGeoAfter(li[2].EndToken.Next))
                                    {
                                    }
                                    else if (newName.IsGeoInDictionary)
                                    {
                                    }
                                    else if (newName.EndToken.IsNewlineAfter)
                                    {
                                    }
                                    else
                                    {
                                        return(null);
                                    }
                                }
                            }
                        }
                        Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(newName.EndToken, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePronouns, 0, null);
                        if (npt != null && npt.EndToken != newName.EndToken)
                        {
                            if (li.Count >= 3 && li[2].TerminItem != null && npt.EndToken == li[2].EndToken)
                            {
                                addNoun = li[2];
                            }
                            else
                            {
                                return(null);
                            }
                        }
                        Pullenti.Ner.ReferentToken rtp = newName.Kit.ProcessReferent("PERSON", newName.BeginToken);
                        if (rtp != null)
                        {
                            return(null);
                        }
                        name = Pullenti.Ner.Core.ProperNameHelper.GetNameEx(newName.BeginToken, newName.EndToken, Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphCase.Undefined, noun.TerminItem.Gender, false, false);
                    }
                    else
                    {
                        bool ok = false;
                        if (((k + 1) < li.Count) && li[k].TerminItem == null && li[k + 1].TerminItem != null)
                        {
                            ok = true;
                        }
                        else if ((k < li.Count) && li[k].OntoItem != null)
                        {
                            ok = true;
                        }
                        else if (k == li.Count && !newName.IsAdjInDictionary)
                        {
                            ok = true;
                        }
                        else if (MiscLocationHelper.CheckGeoObjectBefore(li[0].BeginToken) || canBeCityBefore)
                        {
                            ok = true;
                        }
                        else if (MiscLocationHelper.CheckGeoObjectAfter(li[k - 1].EndToken, false))
                        {
                            ok = true;
                        }
                        else if (li.Count == 3 && k == 2)
                        {
                            CityItemToken cit = CityItemToken.TryParse(li[2].BeginToken, null, false, null);
                            if (cit != null)
                            {
                                if (cit.Typ == CityItemToken.ItemType.City || cit.Typ == CityItemToken.ItemType.Noun)
                                {
                                    ok = true;
                                }
                            }
                        }
                        else if (li.Count == 2)
                        {
                            ok = _canBeGeoAfter(li[li.Count - 1].EndToken.Next);
                        }
                        if (!ok && !li[0].IsNewlineBefore && !li[0].Chars.IsAllLower)
                        {
                            Pullenti.Ner.ReferentToken rt00 = li[0].Kit.ProcessReferent("PERSONPROPERTY", li[0].BeginToken.Previous);
                            if (rt00 != null)
                            {
                                ok = true;
                            }
                        }
                        if (noun.TerminItem != null && noun.TerminItem.IsStrong && newName.IsAdjective)
                        {
                            ok = true;
                        }
                        if (noun.IsDoubt && adjList.Count == 0 && geoBefore == null)
                        {
                            return(null);
                        }
                        name = Pullenti.Ner.Core.ProperNameHelper.GetNameEx(newName.BeginToken, newName.EndToken, Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphCase.Undefined, noun.TerminItem.Gender, false, false);
                        if (!ok && !attachAlways)
                        {
                            if (Pullenti.Ner.Core.MiscHelper.IsExistsInDictionary(newName.BeginToken, newName.EndToken, Pullenti.Morph.MorphClass.Adjective | Pullenti.Morph.MorphClass.Pronoun | Pullenti.Morph.MorphClass.Verb))
                            {
                                if (exists != null)
                                {
                                    foreach (Pullenti.Ner.Geo.GeoReferent e in exists)
                                    {
                                        if (e.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_NAME, name, true) != null)
                                        {
                                            ok = true;
                                            break;
                                        }
                                    }
                                }
                                if (!ok)
                                {
                                    return(null);
                                }
                            }
                        }
                        fullName = string.Format("{0} {1}", Pullenti.Ner.Core.ProperNameHelper.GetNameEx(li[0].BeginToken, noun.BeginToken.Previous, Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphCase.Undefined, noun.TerminItem.Gender, false, false), noun.TerminItem.CanonicText);
                    }
                }
                else
                {
                    if (!attachAlways || ((noun.TerminItem != null && noun.TerminItem.CanonicText == "ФЕДЕРАЦИЯ")))
                    {
                        bool isLatin = noun.Chars.IsLatinLetter && newName.Chars.IsLatinLetter;
                        if (li.IndexOf(noun) > li.IndexOf(newName))
                        {
                            if (!isLatin)
                            {
                                return(null);
                            }
                        }
                        if (!newName.IsDistrictName && !Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(newName.BeginToken, false, false))
                        {
                            if (adjList.Count == 0 && Pullenti.Ner.Core.MiscHelper.IsExistsInDictionary(newName.BeginToken, newName.EndToken, Pullenti.Morph.MorphClass.Noun | Pullenti.Morph.MorphClass.Pronoun))
                            {
                                if (li.Count == 2 && noun.IsCityRegion && (noun.WhitespacesAfterCount < 2))
                                {
                                }
                                else
                                {
                                    return(null);
                                }
                            }
                            if (!isLatin)
                            {
                                if ((noun.TerminItem.IsRegion && !attachAlways && ((!adjTerrBefore || newName.IsDoubt))) && !noun.IsCityRegion && !noun.TerminItem.IsSpecificPrefix)
                                {
                                    if (!MiscLocationHelper.CheckGeoObjectBefore(noun.BeginToken))
                                    {
                                        if (!noun.IsDoubt && noun.BeginToken != noun.EndToken)
                                        {
                                        }
                                        else if ((noun.TerminItem.IsAlwaysPrefix && li.Count == 2 && li[0] == noun) && li[1] == newName)
                                        {
                                        }
                                        else
                                        {
                                            return(null);
                                        }
                                    }
                                }
                                if (noun.IsDoubt && adjList.Count == 0)
                                {
                                    if (noun.TerminItem.Acronym == "МО" || noun.TerminItem.Acronym == "ЛО")
                                    {
                                        if (k == (li.Count - 1) && li[k].TerminItem != null)
                                        {
                                            addNoun = li[k];
                                            k++;
                                        }
                                        else if (li.Count == 2 && noun == li[0] && newName.ToString().EndsWith("совет"))
                                        {
                                        }
                                        else
                                        {
                                            return(null);
                                        }
                                    }
                                    else
                                    {
                                        return(null);
                                    }
                                }
                                Pullenti.Ner.ReferentToken pers = newName.Kit.ProcessReferent("PERSON", newName.BeginToken);
                                if (pers != null)
                                {
                                    return(null);
                                }
                            }
                        }
                    }
                    name = Pullenti.Ner.Core.MiscHelper.GetTextValue(newName.BeginToken, newName.EndToken, Pullenti.Ner.Core.GetTextAttr.No);
                    if (newName.BeginToken != newName.EndToken)
                    {
                        for (Pullenti.Ner.Token ttt = newName.BeginToken.Next; ttt != null && ttt.EndChar <= newName.EndChar; ttt = ttt.Next)
                        {
                            if (ttt.Chars.IsLetter)
                            {
                                TerrItemToken ty = TerrItemToken.TryParse(ttt, null, false, false, null);
                                if ((ty != null && ty.TerminItem != null && noun != null) && ((ty.TerminItem.CanonicText.Contains(noun.TerminItem.CanonicText) || noun.TerminItem.CanonicText.Contains(ty.TerminItem.CanonicText))))
                                {
                                    name = Pullenti.Ner.Core.MiscHelper.GetTextValue(newName.BeginToken, ttt.Previous, Pullenti.Ner.Core.GetTextAttr.No);
                                    break;
                                }
                            }
                        }
                    }
                    if (adjList.Count > 0)
                    {
                        Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(adjList[0].BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                        if (npt != null && npt.EndToken == noun.EndToken)
                        {
                            altName = string.Format("{0} {1}", npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false), name);
                        }
                    }
                }
            }
            else
            {
                if ((li.Count == 1 && noun != null && noun.EndToken.Next != null) && (noun.EndToken.Next.GetReferent() is Pullenti.Ner.Geo.GeoReferent))
                {
                    Pullenti.Ner.Geo.GeoReferent g = noun.EndToken.Next.GetReferent() as Pullenti.Ner.Geo.GeoReferent;
                    if (noun.TerminItem != null)
                    {
                        string tyy = noun.TerminItem.CanonicText.ToLower();
                        bool   ooo = false;
                        if (g.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, tyy, true) != null)
                        {
                            ooo = true;
                        }
                        else if (tyy.EndsWith("район") && g.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, "район", true) != null)
                        {
                            ooo = true;
                        }
                        if (ooo)
                        {
                            return new Pullenti.Ner.ReferentToken(g, noun.BeginToken, noun.EndToken.Next)
                                   {
                                       Morph = noun.BeginToken.Morph
                                   }
                        }
                        ;
                    }
                }
                if ((li.Count == 1 && noun == li[0] && li[0].TerminItem != null) && TerrItemToken.TryParse(li[0].EndToken.Next, null, true, false, null) == null && TerrItemToken.TryParse(li[0].BeginToken.Previous, null, true, false, null) == null)
                {
                    if (li[0].Morph.Number == Pullenti.Morph.MorphNumber.Plural)
                    {
                        return(null);
                    }
                    int    cou = 0;
                    string str = li[0].TerminItem.CanonicText.ToLower();
                    for (Pullenti.Ner.Token tt = li[0].BeginToken.Previous; tt != null; tt = tt.Previous)
                    {
                        if (tt.IsNewlineAfter)
                        {
                            cou += 10;
                        }
                        else
                        {
                            cou++;
                        }
                        if (cou > 500)
                        {
                            break;
                        }
                        Pullenti.Ner.Geo.GeoReferent g = tt.GetReferent() as Pullenti.Ner.Geo.GeoReferent;
                        if (g == null)
                        {
                            continue;
                        }
                        bool ok = true;
                        cou = 0;
                        for (tt = li[0].EndToken.Next; tt != null; tt = tt.Next)
                        {
                            if (tt.IsNewlineBefore)
                            {
                                cou += 10;
                            }
                            else
                            {
                                cou++;
                            }
                            if (cou > 500)
                            {
                                break;
                            }
                            TerrItemToken tee = TerrItemToken.TryParse(tt, null, true, false, null);
                            if (tee == null)
                            {
                                continue;
                            }
                            ok = false;
                            break;
                        }
                        if (ok)
                        {
                            for (int ii = 0; g != null && (ii < 3); g = g.Higher, ii++)
                            {
                                if (g.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, str, true) != null)
                                {
                                    return new Pullenti.Ner.ReferentToken(g, li[0].BeginToken, li[0].EndToken)
                                           {
                                               Morph = noun.BeginToken.Morph
                                           }
                                }
                                ;
                            }
                        }
                        break;
                    }
                }
                return(null);
            }
            Pullenti.Ner.Geo.GeoReferent ter = null;
            if (exObj != null && (exObj.Tag is Pullenti.Ner.Geo.GeoReferent))
            {
                ter = exObj.Tag as Pullenti.Ner.Geo.GeoReferent;
            }
            else
            {
                ter = new Pullenti.Ner.Geo.GeoReferent();
                if (exObj != null)
                {
                    Pullenti.Ner.Geo.GeoReferent geo = exObj.OntoItem.Referent as Pullenti.Ner.Geo.GeoReferent;
                    if (geo != null && !geo.IsCity)
                    {
                        ter.MergeSlots2(geo, li[0].Kit.BaseLanguage);
                    }
                    else
                    {
                        ter.AddName(name);
                    }
                    if (noun == null && exObj.CanBeCity)
                    {
                        ter.AddTypCity(li[0].Kit.BaseLanguage);
                    }
                    else
                    {
                    }
                }
                else if (newName != null)
                {
                    ter.AddName(name);
                    if (altName != null)
                    {
                        ter.AddName(altName);
                    }
                }
                if (noun != null)
                {
                    if (noun.TerminItem.CanonicText == "АО")
                    {
                        ter.AddTyp((li[0].Kit.BaseLanguage.IsUa ? "АВТОНОМНИЙ ОКРУГ" : "АВТОНОМНЫЙ ОКРУГ"));
                    }
                    else if (noun.TerminItem.CanonicText == "МУНИЦИПАЛЬНОЕ СОБРАНИЕ" || noun.TerminItem.CanonicText == "МУНІЦИПАЛЬНЕ ЗБОРИ")
                    {
                        ter.AddTyp((li[0].Kit.BaseLanguage.IsUa ? "МУНІЦИПАЛЬНЕ УТВОРЕННЯ" : "МУНИЦИПАЛЬНОЕ ОБРАЗОВАНИЕ"));
                    }
                    else if (noun.TerminItem.Acronym == "МО" && addNoun != null)
                    {
                        ter.AddTyp(addNoun.TerminItem.CanonicText);
                    }
                    else
                    {
                        if (noun.TerminItem.CanonicText == "СОЮЗ" && exObj != null && exObj.EndChar > noun.EndChar)
                        {
                            return new Pullenti.Ner.ReferentToken(ter, exObj.BeginToken, exObj.EndToken)
                                   {
                                       Morph = exObj.Morph
                                   }
                        }
                        ;
                        ter.AddTyp(noun.TerminItem.CanonicText);
                        if (noun.TerminItem.IsRegion && ter.IsState)
                        {
                            ter.AddTypReg(li[0].Kit.BaseLanguage);
                        }
                    }
                }
                if (ter.IsState && ter.IsRegion)
                {
                    foreach (TerrItemToken a in adjList)
                    {
                        if (a.TerminItem.IsRegion)
                        {
                            ter.AddTypReg(li[0].Kit.BaseLanguage);
                            break;
                        }
                    }
                }
                if (ter.IsState)
                {
                    if (fullName != null)
                    {
                        ter.AddName(fullName);
                    }
                }
            }
            Pullenti.Ner.ReferentToken res = new Pullenti.Ner.ReferentToken(ter, li[0].BeginToken, li[k - 1].EndToken);
            if (noun != null && noun.Morph.Class.IsNoun)
            {
                res.Morph = noun.Morph;
            }
            else
            {
                res.Morph = new Pullenti.Ner.MorphCollection();
                for (int ii = 0; ii < k; ii++)
                {
                    foreach (Pullenti.Morph.MorphBaseInfo v in li[ii].Morph.Items)
                    {
                        Pullenti.Morph.MorphBaseInfo bi = new Pullenti.Morph.MorphBaseInfo();
                        bi.CopyFrom(v);
                        if (noun != null)
                        {
                            if (bi.Class.IsAdjective)
                            {
                                bi.Class = Pullenti.Morph.MorphClass.Noun;
                            }
                        }
                        res.Morph.AddItem(bi);
                    }
                }
            }
            if (li[0].TerminItem != null && li[0].TerminItem.IsSpecificPrefix)
            {
                res.BeginToken = li[0].EndToken.Next;
            }
            if (addNoun != null && addNoun.EndChar > res.EndChar)
            {
                res.EndToken = addNoun.EndToken;
            }
            if ((res.BeginToken.Previous is Pullenti.Ner.TextToken) && (res.WhitespacesBeforeCount < 2))
            {
                Pullenti.Ner.TextToken tt = res.BeginToken.Previous as Pullenti.Ner.TextToken;
                if (tt.Term == "АР")
                {
                    foreach (string ty in ter.Typs)
                    {
                        if (ty.Contains("республика") || ty.Contains("республіка"))
                        {
                            res.BeginToken = tt;
                            break;
                        }
                    }
                }
            }
            return(res);
        }
예제 #10
0
        static Pullenti.Ner.ReferentToken _tryNounName(List <CityItemToken> li, out Pullenti.Ner.Core.IntOntologyItem oi, bool always)
        {
            oi = null;
            if (li == null || (li.Count < 2) || ((li[0].Typ != CityItemToken.ItemType.Noun && li[0].Typ != CityItemToken.ItemType.Misc)))
            {
                return(null);
            }
            bool ok = !li[0].Doubtful;

            if (ok && li[0].Typ == CityItemToken.ItemType.Misc)
            {
                ok = false;
            }
            string typ     = (li[0].Typ == CityItemToken.ItemType.Misc ? null : li[0].Value);
            string typ2    = (li[0].Typ == CityItemToken.ItemType.Misc ? null : li[0].AltValue);
            string probAdj = null;
            int    i1      = 1;

            Pullenti.Ner.Referent org = null;
            if ((typ != null && li[i1].Typ == CityItemToken.ItemType.Noun && ((i1 + 1) < li.Count)) && li[0].WhitespacesAfterCount <= 1 && (((Pullenti.Morph.LanguageHelper.EndsWith(typ, "ПОСЕЛОК") || Pullenti.Morph.LanguageHelper.EndsWith(typ, "СЕЛИЩЕ") || typ == "ДЕРЕВНЯ") || typ == "СЕЛО")))
            {
                if (li[i1].BeginToken == li[i1].EndToken)
                {
                    Pullenti.Ner.Address.Internal.AddressItemToken ooo = Pullenti.Ner.Address.Internal.AddressItemToken.TryAttachOrg(li[i1].BeginToken);
                    if (ooo != null && ooo.RefToken != null)
                    {
                        return(null);
                    }
                }
                typ2 = li[i1].Value;
                if (typ2 == "СТАНЦИЯ" && li[i1].BeginToken.IsValue("СТ", null) && ((i1 + 1) < li.Count))
                {
                    Pullenti.Ner.MorphCollection m = li[i1 + 1].Morph;
                    if (m.Number == Pullenti.Morph.MorphNumber.Plural)
                    {
                        probAdj = "СТАРЫЕ";
                    }
                    else if (m.Gender == Pullenti.Morph.MorphGender.Feminie)
                    {
                        probAdj = "СТАРАЯ";
                    }
                    else if (m.Gender == Pullenti.Morph.MorphGender.Masculine)
                    {
                        probAdj = "СТАРЫЙ";
                    }
                    else
                    {
                        probAdj = "СТАРОЕ";
                    }
                }
                i1++;
            }
            string name    = li[i1].Value ?? ((li[i1].OntoItem == null ? null : li[i1].OntoItem.CanonicText));
            string altName = li[i1].AltValue;

            if (name == null)
            {
                return(null);
            }
            Pullenti.Ner.MorphCollection mc = li[0].Morph;
            if (i1 == 1 && li[i1].Typ == CityItemToken.ItemType.City && ((li[0].Value == "ГОРОД" || li[0].Value == "МІСТО" || li[0].Typ == CityItemToken.ItemType.Misc)))
            {
                if (typ == null && ((i1 + 1) < li.Count) && li[i1 + 1].Typ == CityItemToken.ItemType.Noun)
                {
                    return(null);
                }
                oi = li[i1].OntoItem;
                if (oi != null)
                {
                    name = oi.CanonicText;
                }
                if (name.Length > 2 || oi.MiscAttr != null)
                {
                    if (!li[1].Doubtful || ((oi != null && oi.MiscAttr != null)))
                    {
                        ok = true;
                    }
                    else if (!ok && !li[1].IsNewlineBefore)
                    {
                        if (li[0].GeoObjectBefore || li[1].GeoObjectAfter)
                        {
                            ok = true;
                        }
                        else if (Pullenti.Ner.Address.Internal.StreetDefineHelper.CheckStreetAfter(li[1].EndToken.Next))
                        {
                            ok = true;
                        }
                        else if (li[1].EndToken.Next != null && (li[1].EndToken.Next.GetReferent() is Pullenti.Ner.Date.DateReferent))
                        {
                            ok = true;
                        }
                        else if ((li[1].WhitespacesBeforeCount < 2) && li[1].OntoItem != null)
                        {
                            if (li[1].IsNewlineAfter)
                            {
                                ok = true;
                            }
                            else
                            {
                                ok = true;
                            }
                        }
                    }
                    if (li[1].Doubtful && li[1].EndToken.Next != null && li[1].EndToken.Chars == li[1].EndToken.Next.Chars)
                    {
                        ok = false;
                    }
                    if (li[0].BeginToken.Previous != null && li[0].BeginToken.Previous.IsValue("В", null))
                    {
                        ok = true;
                    }
                }
                if (!ok)
                {
                    ok = CheckYearAfter(li[1].EndToken.Next);
                }
                if (!ok)
                {
                    ok = CheckCityAfter(li[1].EndToken.Next);
                }
            }
            else if ((li[i1].Typ == CityItemToken.ItemType.ProperName || li[i1].Typ == CityItemToken.ItemType.City))
            {
                if (((li[0].Value == "АДМИНИСТРАЦИЯ" || li[0].Value == "АДМІНІСТРАЦІЯ")) && i1 == 1)
                {
                    return(null);
                }
                if (li[i1].IsNewlineBefore)
                {
                    if (li.Count != 2)
                    {
                        return(null);
                    }
                }
                if (!li[0].Doubtful)
                {
                    ok = true;
                    if (name.Length < 2)
                    {
                        ok = false;
                    }
                    else if ((name.Length < 3) && li[0].Morph.Number != Pullenti.Morph.MorphNumber.Singular)
                    {
                        ok = false;
                    }
                    if (li[i1].Doubtful && !li[i1].GeoObjectAfter && !li[0].GeoObjectBefore)
                    {
                        if (li[i1].Morph.Case.IsGenitive)
                        {
                            if (li[i1].EndToken.Next == null || MiscLocationHelper.CheckGeoObjectAfter(li[i1].EndToken.Next, false) || Pullenti.Ner.Address.Internal.AddressItemToken.CheckHouseAfter(li[i1].EndToken.Next, false, true))
                            {
                            }
                            else if (li[0].BeginToken.Previous == null || MiscLocationHelper.CheckGeoObjectBefore(li[0].BeginToken))
                            {
                            }
                            else
                            {
                                ok = false;
                            }
                        }
                        if (ok)
                        {
                            Pullenti.Ner.ReferentToken rt0 = li[i1].Kit.ProcessReferent("PERSONPROPERTY", li[0].BeginToken.Previous);
                            if (rt0 != null)
                            {
                                Pullenti.Ner.ReferentToken rt1 = li[i1].Kit.ProcessReferent("PERSON", li[i1].BeginToken);
                                if (rt1 != null)
                                {
                                    ok = false;
                                }
                            }
                        }
                    }
                    Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(li[i1].BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                    if (npt != null)
                    {
                        if (npt.EndToken.EndChar > li[i1].EndChar && npt.Adjectives.Count > 0 && !npt.Adjectives[0].EndToken.Next.IsComma)
                        {
                            ok = false;
                        }
                        else if (TerrItemToken.m_UnknownRegions.TryParse(npt.EndToken, Pullenti.Ner.Core.TerminParseAttr.FullwordsOnly) != null)
                        {
                            bool ok1 = false;
                            if (li[0].BeginToken.Previous != null)
                            {
                                Pullenti.Ner.Token ttt = li[0].BeginToken.Previous;
                                if (ttt.IsComma && ttt.Previous != null)
                                {
                                    ttt = ttt.Previous;
                                }
                                Pullenti.Ner.Geo.GeoReferent geo = ttt.GetReferent() as Pullenti.Ner.Geo.GeoReferent;
                                if (geo != null && !geo.IsCity)
                                {
                                    ok1 = true;
                                }
                            }
                            if (npt.EndToken.Next != null)
                            {
                                Pullenti.Ner.Token ttt = npt.EndToken.Next;
                                if (ttt.IsComma && ttt.Next != null)
                                {
                                    ttt = ttt.Next;
                                }
                                Pullenti.Ner.Geo.GeoReferent geo = ttt.GetReferent() as Pullenti.Ner.Geo.GeoReferent;
                                if (geo != null && !geo.IsCity)
                                {
                                    ok1 = true;
                                }
                            }
                            if (!ok1)
                            {
                                return(null);
                            }
                        }
                    }
                    if (li[0].Value == "ПОРТ")
                    {
                        if (li[i1].Chars.IsAllUpper || li[i1].Chars.IsLatinLetter)
                        {
                            return(null);
                        }
                    }
                }
                else if (li[0].GeoObjectBefore)
                {
                    ok = true;
                }
                else if (li[i1].GeoObjectAfter && !li[i1].IsNewlineAfter)
                {
                    ok = true;
                }
                else
                {
                    ok = CheckYearAfter(li[i1].EndToken.Next);
                }
                if (!ok)
                {
                    ok = CheckStreetAfter(li[i1].EndToken.Next);
                }
                if (!ok && li[0].BeginToken.Previous != null && li[0].BeginToken.Previous.IsValue("В", null))
                {
                    ok = true;
                }
            }
            else
            {
                return(null);
            }
            if (!ok && !always)
            {
                if (MiscLocationHelper.CheckNearBefore(li[0].BeginToken.Previous) == null)
                {
                    return(null);
                }
            }
            if (li.Count > (i1 + 1))
            {
                li.RemoveRange(i1 + 1, li.Count - i1 - 1);
            }
            Pullenti.Ner.Geo.GeoReferent city = new Pullenti.Ner.Geo.GeoReferent();
            if (oi != null && oi.Referent != null)
            {
                city = oi.Referent.Clone() as Pullenti.Ner.Geo.GeoReferent;
                city.Occurrence.Clear();
            }
            if (!li[0].Morph.Case.IsUndefined && li[0].Morph.Gender != Pullenti.Morph.MorphGender.Undefined)
            {
                if (li[i1].EndToken.Morph.Class.IsAdjective && li[i1].BeginToken == li[i1].EndToken)
                {
                    string nam = Pullenti.Ner.Core.ProperNameHelper.GetNameEx(li[i1].BeginToken, li[i1].EndToken, Pullenti.Morph.MorphClass.Adjective, li[0].Morph.Case, li[0].Morph.Gender, false, false);
                    if (nam != null && nam != name)
                    {
                        name = nam;
                    }
                }
            }
            if (li[0].Morph.Case.IsNominative)
            {
                if (altName != null)
                {
                    city.AddName(altName);
                }
                altName = null;
            }
            city.AddName(name);
            if (probAdj != null)
            {
                city.AddName(probAdj + " " + name);
            }
            if (altName != null)
            {
                city.AddName(altName);
                if (probAdj != null)
                {
                    city.AddName(probAdj + " " + altName);
                }
            }
            if (typ != null)
            {
                city.AddTyp(typ);
            }
            else if (!city.IsCity)
            {
                city.AddTypCity(li[0].Kit.BaseLanguage);
            }
            if (typ2 != null)
            {
                city.AddTyp(typ2.ToLower());
            }
            if (li[0].HigherGeo != null && GeoOwnerHelper.CanBeHigher(li[0].HigherGeo, city))
            {
                city.Higher = li[0].HigherGeo;
            }
            if (li[0].Typ == CityItemToken.ItemType.Misc)
            {
                li.RemoveAt(0);
            }
            Pullenti.Ner.ReferentToken res = new Pullenti.Ner.ReferentToken(city, li[0].BeginToken, li[li.Count - 1].EndToken)
            {
                Morph = mc
            };
            if (res.EndToken.Next != null && res.EndToken.Next.IsHiphen && (res.EndToken.Next.Next is Pullenti.Ner.NumberToken))
            {
                Pullenti.Ner.NumberToken num = res.EndToken.Next.Next as Pullenti.Ner.NumberToken;
                if ((num.Typ == Pullenti.Ner.NumberSpellingType.Digit && !num.Morph.Class.IsAdjective && num.IntValue != null) && (num.IntValue.Value < 50))
                {
                    foreach (Pullenti.Ner.Slot s in city.Slots)
                    {
                        if (s.TypeName == Pullenti.Ner.Geo.GeoReferent.ATTR_NAME)
                        {
                            city.UploadSlot(s, string.Format("{0}-{1}", s.Value, num.Value));
                        }
                    }
                    res.EndToken = num;
                }
            }
            if (li[0].BeginToken == li[0].EndToken && li[0].BeginToken.IsValue("ГОРОДОК", null))
            {
                if (Pullenti.Ner.Address.Internal.AddressItemToken.CheckHouseAfter(res.EndToken.Next, true, false))
                {
                    return(null);
                }
            }
            return(res);
        }
예제 #11
0
        /// <summary>
        /// Попробовать восстановить последовательность, обрамляемую кавычками или скобками. Поддерживается
        /// вложенность, возможность отсутствия закрывающего элемента и др.
        /// </summary>
        /// <param name="t">начальный токен</param>
        /// <param name="attrs">параметры выделения</param>
        /// <param name="maxTokens">максимально токенов (вдруг забыли закрывающую кавычку)</param>
        /// <return>метатокен BracketSequenceToken</return>
        public static BracketSequenceToken TryParse(Pullenti.Ner.Token t, BracketParseAttr attrs = BracketParseAttr.No, int maxTokens = 100)
        {
            Pullenti.Ner.Token t0 = t;
            int cou = 0;

            if (!CanBeStartOfSequence(t0, false, false))
            {
                return(null);
            }
            List <Bracket> brList = new List <Bracket>();

            brList.Add(new Bracket(t0));
            cou = 0;
            int crlf = 0;

            Pullenti.Ner.Token last = null;
            int  lev     = 1;
            bool isAssim = brList[0].Char != '«' && m_AssymOPenChars.IndexOf(brList[0].Char) >= 0;
            bool genCase = false;

            for (t = t0.Next; t != null; t = t.Next)
            {
                if (t.IsTableControlChar)
                {
                    break;
                }
                last = t;
                if (t.IsCharOf(m_OpenChars) || t.IsCharOf(m_CloseChars))
                {
                    if (t.IsNewlineBefore && ((attrs & BracketParseAttr.CanBeManyLines)) == BracketParseAttr.No)
                    {
                        if (t.WhitespacesBeforeCount > 10 || CanBeStartOfSequence(t, false, false))
                        {
                            if (t.IsChar('(') && !t0.IsChar('('))
                            {
                            }
                            else
                            {
                                last = t.Previous;
                                break;
                            }
                        }
                    }
                    Bracket bb = new Bracket(t);
                    brList.Add(bb);
                    if (brList.Count > 20)
                    {
                        break;
                    }
                    if ((brList.Count == 3 && brList[1].CanBeOpen && bb.CanBeClose) && MustBeCloseChar(bb.Char, brList[1].Char) && MustBeCloseChar(bb.Char, brList[0].Char))
                    {
                        bool ok = false;
                        for (Pullenti.Ner.Token tt = t.Next; tt != null; tt = tt.Next)
                        {
                            if (tt.IsNewlineBefore)
                            {
                                break;
                            }
                            if (tt.IsChar(','))
                            {
                                break;
                            }
                            if (tt.IsChar('.'))
                            {
                                for (tt = tt.Next; tt != null; tt = tt.Next)
                                {
                                    if (tt.IsNewlineBefore)
                                    {
                                        break;
                                    }
                                    else if (tt.IsCharOf(m_OpenChars) || tt.IsCharOf(m_CloseChars))
                                    {
                                        Bracket bb2 = new Bracket(tt);
                                        if (BracketHelper.CanBeEndOfSequence(tt, false, null, false) && CanBeCloseChar(bb2.Char, brList[0].Char))
                                        {
                                            ok = true;
                                        }
                                        break;
                                    }
                                }
                                break;
                            }
                            if (t.IsCharOf(m_OpenChars) || t.IsCharOf(m_CloseChars))
                            {
                                ok = true;
                                break;
                            }
                        }
                        if (!ok)
                        {
                            break;
                        }
                    }
                    if (isAssim)
                    {
                        if (bb.CanBeOpen && !bb.CanBeClose && bb.Char == brList[0].Char)
                        {
                            lev++;
                        }
                        else if (bb.CanBeClose && !bb.CanBeOpen && m_OpenChars.IndexOf(brList[0].Char) == m_CloseChars.IndexOf(bb.Char))
                        {
                            lev--;
                            if (lev == 0)
                            {
                                break;
                            }
                        }
                    }
                }
                else
                {
                    if ((++cou) > maxTokens)
                    {
                        break;
                    }
                    if (((attrs & BracketParseAttr.CanContainsVerbs)) == BracketParseAttr.No)
                    {
                        if (t.Morph.Language.IsCyrillic)
                        {
                            if (t.GetMorphClassInDictionary() == Pullenti.Morph.MorphClass.Verb)
                            {
                                if (!t.Morph.Class.IsAdjective && !t.Morph.ContainsAttr("страд.з.", null))
                                {
                                    if (t.Chars.IsAllLower)
                                    {
                                        string norm = t.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false);
                                        if (!Pullenti.Morph.LanguageHelper.EndsWith(norm, "СЯ"))
                                        {
                                            if (brList.Count > 1)
                                            {
                                                break;
                                            }
                                            if (brList[0].Char != '(')
                                            {
                                                break;
                                            }
                                        }
                                    }
                                }
                            }
                        }
                        else if (t.Morph.Language.IsEn)
                        {
                            if (t.Morph.Class == Pullenti.Morph.MorphClass.Verb && t.Chars.IsAllLower)
                            {
                                break;
                            }
                        }
                        Pullenti.Ner.Referent r = t.GetReferent();
                        if (r != null && r.TypeName == "ADDRESS")
                        {
                            if (!t0.IsChar('('))
                            {
                                break;
                            }
                        }
                    }
                }
                if (((attrs & BracketParseAttr.CanBeManyLines)) != BracketParseAttr.No)
                {
                    if (t.IsNewlineBefore)
                    {
                        if (t.NewlinesBeforeCount > 1)
                        {
                            break;
                        }
                        crlf++;
                    }
                    continue;
                }
                if (t.IsNewlineBefore)
                {
                    if (t.WhitespacesBeforeCount > 15)
                    {
                        last = t.Previous;
                        break;
                    }
                    crlf++;
                    if (!t.Chars.IsAllLower)
                    {
                        if (MiscHelper.CanBeStartOfSentence(t))
                        {
                            bool has = false;
                            for (Pullenti.Ner.Token tt = t.Next; tt != null; tt = tt.Next)
                            {
                                if (tt.IsNewlineBefore)
                                {
                                    break;
                                }
                                else if (tt.LengthChar == 1 && tt.IsCharOf(m_OpenChars) && tt.IsWhitespaceBefore)
                                {
                                    break;
                                }
                                else if (tt.LengthChar == 1 && tt.IsCharOf(m_CloseChars) && !tt.IsWhitespaceBefore)
                                {
                                    has = true;
                                    break;
                                }
                            }
                            if (!has)
                            {
                                last = t.Previous;
                                break;
                            }
                        }
                    }
                    if ((t.Previous is Pullenti.Ner.MetaToken) && CanBeEndOfSequence((t.Previous as Pullenti.Ner.MetaToken).EndToken, false, null, false))
                    {
                        last = t.Previous;
                        break;
                    }
                }
                if (crlf > 1)
                {
                    if (brList.Count > 1)
                    {
                        break;
                    }
                    if (crlf > 10)
                    {
                        break;
                    }
                }
                if (t.IsChar(';') && t.IsNewlineAfter)
                {
                    break;
                }
                NounPhraseToken npt = NounPhraseHelper.TryParse(t, NounPhraseParseAttr.No, 0, null);
                if (npt != null)
                {
                    if (t.IsNewlineBefore)
                    {
                        genCase = npt.Morph.Case.IsGenitive;
                    }
                    last = (t = npt.EndToken);
                }
            }
            if ((brList.Count == 1 && brList[0].CanBeOpen && (last is Pullenti.Ner.MetaToken)) && last.IsNewlineAfter)
            {
                if (BracketHelper.CanBeEndOfSequence((last as Pullenti.Ner.MetaToken).EndToken, false, null, false))
                {
                    return(new BracketSequenceToken(t0, last));
                }
            }
            if ((brList.Count == 1 && brList[0].CanBeOpen && genCase) && last.IsNewlineAfter && crlf <= 2)
            {
                return(new BracketSequenceToken(t0, last));
            }
            if (brList.Count < 1)
            {
                return(null);
            }
            for (int i = 1; i < (brList.Count - 1); i++)
            {
                if (brList[i].Char == '<' && brList[i + 1].Char == '>')
                {
                    brList[i].CanBeOpen      = true;
                    brList[i + 1].CanBeClose = true;
                }
            }
            List <BracketSequenceToken> internals = null;

            while (brList.Count > 3)
            {
                int i = brList.Count - 1;
                if ((brList[i].CanBeClose && brList[i - 1].CanBeOpen && !CanBeCloseChar(brList[i].Char, brList[0].Char)) && CanBeCloseChar(brList[i].Char, brList[i - 1].Char))
                {
                    brList.RemoveRange(brList.Count - 2, 2);
                    continue;
                }
                break;
            }
            while (brList.Count >= 4)
            {
                bool changed = false;
                for (int i = 1; i < (brList.Count - 2); i++)
                {
                    if ((brList[i].CanBeOpen && !brList[i].CanBeClose && brList[i + 1].CanBeClose) && !brList[i + 1].CanBeOpen)
                    {
                        bool ok = false;
                        if (MustBeCloseChar(brList[i + 1].Char, brList[i].Char) || brList[i].Char != brList[0].Char)
                        {
                            ok = true;
                            if ((i == 1 && ((i + 2) < brList.Count) && brList[i + 2].Char == ')') && brList[i + 1].Char != ')' && CanBeCloseChar(brList[i + 1].Char, brList[i - 1].Char))
                            {
                                brList[i + 2] = brList[i + 1];
                            }
                        }
                        else if (i > 1 && ((i + 2) < brList.Count) && MustBeCloseChar(brList[i + 2].Char, brList[i - 1].Char))
                        {
                            ok = true;
                        }
                        if (ok)
                        {
                            if (internals == null)
                            {
                                internals = new List <BracketSequenceToken>();
                            }
                            internals.Add(new BracketSequenceToken(brList[i].Source, brList[i + 1].Source));
                            brList.RemoveRange(i, 2);
                            changed = true;
                            break;
                        }
                    }
                }
                if (!changed)
                {
                    break;
                }
            }
            BracketSequenceToken res = null;

            if ((brList.Count >= 4 && brList[1].CanBeOpen && brList[2].CanBeClose) && brList[3].CanBeClose && !brList[3].CanBeOpen)
            {
                if (CanBeCloseChar(brList[3].Char, brList[0].Char))
                {
                    res = new BracketSequenceToken(brList[0].Source, brList[3].Source);
                    if (brList[0].Source.Next != brList[1].Source || brList[2].Source.Next != brList[3].Source)
                    {
                        res.Internal.Add(new BracketSequenceToken(brList[1].Source, brList[2].Source));
                    }
                    if (internals != null)
                    {
                        res.Internal.AddRange(internals);
                    }
                }
            }
            if ((res == null && brList.Count >= 3 && brList[2].CanBeClose) && !brList[2].CanBeOpen)
            {
                if (((attrs & BracketParseAttr.NearCloseBracket)) != BracketParseAttr.No)
                {
                    if (CanBeCloseChar(brList[1].Char, brList[0].Char))
                    {
                        return(new BracketSequenceToken(brList[0].Source, brList[1].Source));
                    }
                }
                bool ok = true;
                if (CanBeCloseChar(brList[2].Char, brList[0].Char) && CanBeCloseChar(brList[1].Char, brList[0].Char) && brList[1].CanBeClose)
                {
                    for (t = brList[1].Source; t != brList[2].Source && t != null; t = t.Next)
                    {
                        if (t.IsNewlineBefore)
                        {
                            ok = false;
                            break;
                        }
                        if (t.Chars.IsLetter && t.Chars.IsAllLower)
                        {
                            ok = false;
                            break;
                        }
                        NounPhraseToken npt = NounPhraseHelper.TryParse(t, NounPhraseParseAttr.No, 0, null);
                        if (npt != null)
                        {
                            t = npt.EndToken;
                        }
                    }
                    if (ok)
                    {
                        for (t = brList[0].Source.Next; t != brList[1].Source && t != null; t = t.Next)
                        {
                            if (t.IsNewlineBefore)
                            {
                                return(new BracketSequenceToken(brList[0].Source, t.Previous));
                            }
                        }
                    }
                    int lev1 = 0;
                    for (Pullenti.Ner.Token tt = brList[0].Source.Previous; tt != null; tt = tt.Previous)
                    {
                        if (tt.IsNewlineAfter || tt.IsTableControlChar)
                        {
                            break;
                        }
                        if (!(tt is Pullenti.Ner.TextToken))
                        {
                            continue;
                        }
                        if (tt.Chars.IsLetter || tt.LengthChar > 1)
                        {
                            continue;
                        }
                        char ch = (tt as Pullenti.Ner.TextToken).Term[0];
                        if (CanBeCloseChar(ch, brList[0].Char))
                        {
                            lev1++;
                        }
                        else if (CanBeCloseChar(brList[1].Char, ch))
                        {
                            lev1--;
                            if (lev1 < 0)
                            {
                                return(new BracketSequenceToken(brList[0].Source, brList[1].Source));
                            }
                        }
                    }
                }
                if (ok && CanBeCloseChar(brList[2].Char, brList[0].Char))
                {
                    BracketSequenceToken intern = new BracketSequenceToken(brList[1].Source, brList[2].Source);
                    res = new BracketSequenceToken(brList[0].Source, brList[2].Source);
                    res.Internal.Add(intern);
                }
                else if (ok && CanBeCloseChar(brList[2].Char, brList[1].Char) && brList[0].CanBeOpen)
                {
                    if (CanBeCloseChar(brList[2].Char, brList[0].Char))
                    {
                        BracketSequenceToken intern = new BracketSequenceToken(brList[1].Source, brList[2].Source);
                        res = new BracketSequenceToken(brList[0].Source, brList[2].Source);
                        res.Internal.Add(intern);
                    }
                    else if (brList.Count == 3)
                    {
                        return(null);
                    }
                }
            }
            if (res == null && brList.Count > 1 && brList[1].CanBeClose)
            {
                res = new BracketSequenceToken(brList[0].Source, brList[1].Source);
            }
            if (res == null && brList.Count > 1 && CanBeCloseChar(brList[1].Char, brList[0].Char))
            {
                res = new BracketSequenceToken(brList[0].Source, brList[1].Source);
            }
            if (res == null && brList.Count == 2 && brList[0].Char == brList[1].Char)
            {
                res = new BracketSequenceToken(brList[0].Source, brList[1].Source);
            }
            if (res != null && internals != null)
            {
                foreach (BracketSequenceToken i in internals)
                {
                    if (i.BeginChar < res.EndChar)
                    {
                        res.Internal.Add(i);
                    }
                }
            }
            if (res == null)
            {
                cou = 0;
                for (Pullenti.Ner.Token tt = t0.Next; tt != null; tt = tt.Next, cou++)
                {
                    if (tt.IsTableControlChar)
                    {
                        break;
                    }
                    if (MiscHelper.CanBeStartOfSentence(tt))
                    {
                        break;
                    }
                    if (maxTokens > 0 && cou > maxTokens)
                    {
                        break;
                    }
                    Pullenti.Ner.MetaToken mt = tt as Pullenti.Ner.MetaToken;
                    if (mt == null)
                    {
                        continue;
                    }
                    if (mt.EndToken is Pullenti.Ner.TextToken)
                    {
                        if ((mt.EndToken as Pullenti.Ner.TextToken).IsCharOf(m_CloseChars))
                        {
                            Bracket bb = new Bracket(mt.EndToken as Pullenti.Ner.TextToken);
                            if (bb.CanBeClose && CanBeCloseChar(bb.Char, brList[0].Char))
                            {
                                return(new BracketSequenceToken(t0, tt));
                            }
                        }
                    }
                }
            }
            return(res);
        }
예제 #12
0
        public static string GetNameEx(Pullenti.Ner.Token begin, Pullenti.Ner.Token end, Pullenti.Morph.MorphClass cla, Pullenti.Morph.MorphCase mc, Pullenti.Morph.MorphGender gender = Pullenti.Morph.MorphGender.Undefined, bool ignoreBracketsAndHiphens = false, bool ignoreGeoReferent = false)
        {
            if (end == null || begin == null)
            {
                return(null);
            }
            if (begin.EndChar > end.BeginChar && begin != end)
            {
                return(null);
            }
            StringBuilder res    = new StringBuilder();
            string        prefix = null;

            for (Pullenti.Ner.Token t = begin; t != null && t.EndChar <= end.EndChar; t = t.Next)
            {
                if (res.Length > 1000)
                {
                    break;
                }
                if (t.IsTableControlChar)
                {
                    continue;
                }
                if (ignoreBracketsAndHiphens)
                {
                    if (BracketHelper.IsBracket(t, false))
                    {
                        if (t == end)
                        {
                            break;
                        }
                        if (t.IsCharOf("(<["))
                        {
                            BracketSequenceToken br = BracketHelper.TryParse(t, BracketParseAttr.No, 100);
                            if (br != null && br.EndChar <= end.EndChar)
                            {
                                string tmp = GetNameEx(br.BeginToken.Next, br.EndToken.Previous, Pullenti.Morph.MorphClass.Undefined, Pullenti.Morph.MorphCase.Undefined, Pullenti.Morph.MorphGender.Undefined, ignoreBracketsAndHiphens, false);
                                if (tmp != null)
                                {
                                    if ((br.EndChar == end.EndChar && br.BeginToken.Next == br.EndToken.Previous && !br.BeginToken.Next.Chars.IsLetter) && !(br.BeginToken.Next is Pullenti.Ner.ReferentToken))
                                    {
                                    }
                                    else
                                    {
                                        res.AppendFormat(" {0}{1}{2}", t.GetSourceText(), tmp, br.EndToken.GetSourceText());
                                    }
                                }
                                t = br.EndToken;
                            }
                        }
                        continue;
                    }
                    if (t.IsHiphen)
                    {
                        if (t == end)
                        {
                            break;
                        }
                        else if (t.IsWhitespaceBefore || t.IsWhitespaceAfter)
                        {
                            continue;
                        }
                    }
                }
                Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
                if (tt != null)
                {
                    if (!ignoreBracketsAndHiphens)
                    {
                        if ((tt.Next != null && tt.Next.IsHiphen && (tt.Next.Next is Pullenti.Ner.TextToken)) && tt != end && tt.Next != end)
                        {
                            if (prefix == null)
                            {
                                prefix = tt.Term;
                            }
                            else
                            {
                                prefix = string.Format("{0}-{1}", prefix, tt.Term);
                            }
                            t = tt.Next;
                            if (t == end)
                            {
                                break;
                            }
                            else
                            {
                                continue;
                            }
                        }
                    }
                    string s = null;
                    if (cla.Value != 0 || !mc.IsUndefined || gender != Pullenti.Morph.MorphGender.Undefined)
                    {
                        foreach (Pullenti.Morph.MorphBaseInfo wff in tt.Morph.Items)
                        {
                            Pullenti.Morph.MorphWordForm wf = wff as Pullenti.Morph.MorphWordForm;
                            if (wf == null)
                            {
                                continue;
                            }
                            if (cla.Value != 0)
                            {
                                if (((wf.Class.Value & cla.Value)) == 0)
                                {
                                    continue;
                                }
                            }
                            if (!mc.IsUndefined)
                            {
                                if (((wf.Case & mc)).IsUndefined)
                                {
                                    continue;
                                }
                            }
                            if (gender != Pullenti.Morph.MorphGender.Undefined)
                            {
                                if (((wf.Gender & gender)) == Pullenti.Morph.MorphGender.Undefined)
                                {
                                    continue;
                                }
                            }
                            if (s == null || wf.NormalCase == tt.Term)
                            {
                                s = wf.NormalCase;
                            }
                        }
                        if (s == null && gender != Pullenti.Morph.MorphGender.Undefined)
                        {
                            foreach (Pullenti.Morph.MorphBaseInfo wff in tt.Morph.Items)
                            {
                                Pullenti.Morph.MorphWordForm wf = wff as Pullenti.Morph.MorphWordForm;
                                if (wf == null)
                                {
                                    continue;
                                }
                                if (cla.Value != 0)
                                {
                                    if (((wf.Class.Value & cla.Value)) == 0)
                                    {
                                        continue;
                                    }
                                }
                                if (!mc.IsUndefined)
                                {
                                    if (((wf.Case & mc)).IsUndefined)
                                    {
                                        continue;
                                    }
                                }
                                if (s == null || wf.NormalCase == tt.Term)
                                {
                                    s = wf.NormalCase;
                                }
                            }
                        }
                    }
                    if (s == null)
                    {
                        s = tt.Term;
                        if (tt.Chars.IsLastLower && tt.LengthChar > 2)
                        {
                            s = tt.GetSourceText();
                            for (int i = s.Length - 1; i >= 0; i--)
                            {
                                if (char.IsUpper(s[i]))
                                {
                                    s = s.Substring(0, i + 1);
                                    break;
                                }
                            }
                        }
                    }
                    if (prefix != null)
                    {
                        string delim = "-";
                        if (ignoreBracketsAndHiphens)
                        {
                            delim = " ";
                        }
                        s = string.Format("{0}{1}{2}", prefix, delim, s);
                    }
                    prefix = null;
                    if (res.Length > 0 && s.Length > 0)
                    {
                        if (char.IsLetterOrDigit(s[0]))
                        {
                            char ch0 = res[res.Length - 1];
                            if (ch0 == '-')
                            {
                            }
                            else
                            {
                                res.Append(' ');
                            }
                        }
                        else if (!ignoreBracketsAndHiphens && BracketHelper.CanBeStartOfSequence(tt, false, false))
                        {
                            res.Append(' ');
                        }
                    }
                    res.Append(s);
                }
                else if (t is Pullenti.Ner.NumberToken)
                {
                    if (res.Length > 0)
                    {
                        if (!t.IsWhitespaceBefore && res[res.Length - 1] == '-')
                        {
                        }
                        else
                        {
                            res.Append(' ');
                        }
                    }
                    Pullenti.Ner.NumberToken nt = t as Pullenti.Ner.NumberToken;
                    if ((t.Morph.Class.IsAdjective && nt.Typ == Pullenti.Ner.NumberSpellingType.Words && nt.BeginToken == nt.EndToken) && (nt.BeginToken is Pullenti.Ner.TextToken))
                    {
                        res.Append((nt.BeginToken as Pullenti.Ner.TextToken).Term);
                    }
                    else
                    {
                        res.Append(nt.Value);
                    }
                }
                else if (t is Pullenti.Ner.MetaToken)
                {
                    if ((ignoreGeoReferent && t != begin && t.GetReferent() != null) && t.GetReferent().TypeName == "GEO")
                    {
                        continue;
                    }
                    string s = GetNameEx((t as Pullenti.Ner.MetaToken).BeginToken, (t as Pullenti.Ner.MetaToken).EndToken, cla, mc, gender, ignoreBracketsAndHiphens, ignoreGeoReferent);
                    if (!string.IsNullOrEmpty(s))
                    {
                        if (res.Length > 0)
                        {
                            if (!t.IsWhitespaceBefore && res[res.Length - 1] == '-')
                            {
                            }
                            else
                            {
                                res.Append(' ');
                            }
                        }
                        res.Append(s);
                    }
                }
                if (t == end)
                {
                    break;
                }
            }
            if (res.Length == 0)
            {
                return(null);
            }
            return(res.ToString());
        }
예제 #13
0
        public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
        {
            Pullenti.Ner.Core.AnalyzerData     ad     = kit.GetAnalyzerData(this);
            Pullenti.Ner.Core.TerminCollection models = new Pullenti.Ner.Core.TerminCollection();
            Dictionary <string, List <Pullenti.Ner.Referent> > objsByModel = new Dictionary <string, List <Pullenti.Ner.Referent> >();

            Pullenti.Ner.Core.TerminCollection objByNames = new Pullenti.Ner.Core.TerminCollection();
            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
            {
                List <Pullenti.Ner.Transport.Internal.TransItemToken> its = Pullenti.Ner.Transport.Internal.TransItemToken.TryParseList(t, 10);
                if (its == null)
                {
                    continue;
                }
                List <Pullenti.Ner.ReferentToken> rts = this.TryAttach(its, false);
                if (rts != null)
                {
                    foreach (Pullenti.Ner.ReferentToken rt in rts)
                    {
                        int cou = 0;
                        for (Pullenti.Ner.Token tt = t.Previous; tt != null && (cou < 1000); tt = tt.Previous, cou++)
                        {
                            TransportReferent tr = tt.GetReferent() as TransportReferent;
                            if (tr == null)
                            {
                                continue;
                            }
                            bool ok = true;
                            foreach (Pullenti.Ner.Slot s in rt.Referent.Slots)
                            {
                                if (tr.FindSlot(s.TypeName, s.Value, true) == null)
                                {
                                    ok = false;
                                    break;
                                }
                            }
                            if (ok)
                            {
                                rt.Referent = tr;
                                break;
                            }
                        }
                        rt.Referent = ad.RegisterReferent(rt.Referent);
                        kit.EmbedToken(rt);
                        t = rt;
                        foreach (Pullenti.Ner.Slot s in rt.Referent.Slots)
                        {
                            if (s.TypeName == TransportReferent.ATTR_MODEL)
                            {
                                string mod = s.Value.ToString();
                                for (int k = 0; k < 2; k++)
                                {
                                    if (!char.IsDigit(mod[0]))
                                    {
                                        List <Pullenti.Ner.Referent> li;
                                        if (!objsByModel.TryGetValue(mod, out li))
                                        {
                                            objsByModel.Add(mod, (li = new List <Pullenti.Ner.Referent>()));
                                        }
                                        if (!li.Contains(rt.Referent))
                                        {
                                            li.Add(rt.Referent);
                                        }
                                        models.AddString(mod, li, null, false);
                                    }
                                    if (k > 0)
                                    {
                                        break;
                                    }
                                    string brand = rt.Referent.GetStringValue(TransportReferent.ATTR_BRAND);
                                    if (brand == null)
                                    {
                                        break;
                                    }
                                    mod = string.Format("{0} {1}", brand, mod);
                                }
                            }
                            else if (s.TypeName == TransportReferent.ATTR_NAME)
                            {
                                objByNames.Add(new Pullenti.Ner.Core.Termin(s.Value.ToString())
                                {
                                    Tag = rt.Referent
                                });
                            }
                        }
                    }
                }
            }
            if (objsByModel.Count == 0 && objByNames.Termins.Count == 0)
            {
                return;
            }
            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
            {
                Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 10);
                if (br != null)
                {
                    Pullenti.Ner.Core.TerminToken toks = objByNames.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No);
                    if (toks != null && toks.EndToken.Next == br.EndToken)
                    {
                        Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(toks.Termin.Tag as Pullenti.Ner.Referent, br.BeginToken, br.EndToken);
                        kit.EmbedToken(rt0);
                        t = rt0;
                        continue;
                    }
                }
                if (!(t is Pullenti.Ner.TextToken))
                {
                    continue;
                }
                if (!t.Chars.IsLetter)
                {
                    continue;
                }
                Pullenti.Ner.Core.TerminToken tok = models.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
                if (tok == null)
                {
                    if (!t.Chars.IsAllLower)
                    {
                        tok = objByNames.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
                    }
                    if (tok == null)
                    {
                        continue;
                    }
                }
                if (!tok.IsWhitespaceAfter)
                {
                    if (tok.EndToken.Next == null || !tok.EndToken.Next.IsCharOf(",.)"))
                    {
                        if (!Pullenti.Ner.Core.BracketHelper.IsBracket(tok.EndToken.Next, false))
                        {
                            continue;
                        }
                    }
                }
                Pullenti.Ner.Referent        tr = null;
                List <Pullenti.Ner.Referent> li = tok.Termin.Tag as List <Pullenti.Ner.Referent>;
                if (li != null && li.Count == 1)
                {
                    tr = li[0];
                }
                else
                {
                    tr = tok.Termin.Tag as Pullenti.Ner.Referent;
                }
                if (tr != null)
                {
                    Pullenti.Ner.Transport.Internal.TransItemToken tit = Pullenti.Ner.Transport.Internal.TransItemToken.TryParse(tok.BeginToken.Previous, null, false, true);
                    if (tit != null && tit.Typ == Pullenti.Ner.Transport.Internal.TransItemToken.Typs.Brand)
                    {
                        tr.AddSlot(TransportReferent.ATTR_BRAND, tit.Value, false, 0);
                        tok.BeginToken = tit.BeginToken;
                    }
                    Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(tr, tok.BeginToken, tok.EndToken);
                    kit.EmbedToken(rt0);
                    t = rt0;
                    continue;
                }
            }
        }
예제 #14
0
        public static Pullenti.Ner.Core.NumberExToken TryParseNumberWithPostfix(Pullenti.Ner.Token t)
        {
            if (t == null)
            {
                return(null);
            }
            Pullenti.Ner.Token t0       = t;
            string             isDollar = null;

            if (t.LengthChar == 1 && t.Next != null)
            {
                if ((((isDollar = Pullenti.Ner.Core.NumberHelper.IsMoneyChar(t)))) != null)
                {
                    t = t.Next;
                }
            }
            Pullenti.Ner.NumberToken nt = t as Pullenti.Ner.NumberToken;
            if (nt == null)
            {
                if ((!(t.Previous is Pullenti.Ner.NumberToken) && t.IsChar('(') && (t.Next is Pullenti.Ner.NumberToken)) && t.Next.Next != null && t.Next.Next.IsChar(')'))
                {
                    Pullenti.Ner.Core.TerminToken toks1 = m_Postfixes.TryParse(t.Next.Next.Next, Pullenti.Ner.Core.TerminParseAttr.No);
                    if (toks1 != null && ((Pullenti.Ner.Core.NumberExType)toks1.Termin.Tag) == Pullenti.Ner.Core.NumberExType.Money)
                    {
                        Pullenti.Ner.NumberToken        nt0 = t.Next as Pullenti.Ner.NumberToken;
                        Pullenti.Ner.Core.NumberExToken res = new Pullenti.Ner.Core.NumberExToken(t, toks1.EndToken, nt0.Value, nt0.Typ, Pullenti.Ner.Core.NumberExType.Money)
                        {
                            AltRealValue = nt0.RealValue, Morph = toks1.BeginToken.Morph
                        };
                        return(_correctMoney(res, toks1.BeginToken));
                    }
                }
                Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
                if (tt == null || !tt.Morph.Class.IsAdjective)
                {
                    return(null);
                }
                string val = tt.Term;
                for (int i = 4; i < (val.Length - 5); i++)
                {
                    string v = val.Substring(0, i);
                    List <Pullenti.Ner.Core.Termin> li = Pullenti.Ner.Core.NumberHelper.m_Nums.FindTerminsByString(v, tt.Morph.Language);
                    if (li == null)
                    {
                        continue;
                    }
                    string vv = val.Substring(i);
                    List <Pullenti.Ner.Core.Termin> lii = m_Postfixes.FindTerminsByString(vv, tt.Morph.Language);
                    if (lii != null && lii.Count > 0)
                    {
                        Pullenti.Ner.Core.NumberExToken re = new Pullenti.Ner.Core.NumberExToken(t, t, ((int)li[0].Tag).ToString(), Pullenti.Ner.NumberSpellingType.Words, (Pullenti.Ner.Core.NumberExType)lii[0].Tag)
                        {
                            Morph = t.Morph
                        };
                        _correctExtTypes(re);
                        return(re);
                    }
                    break;
                }
                return(null);
            }
            if (t.Next == null && isDollar == null)
            {
                return(null);
            }
            double f = nt.RealValue;

            if (double.IsNaN(f))
            {
                return(null);
            }
            Pullenti.Ner.Token t1 = nt.Next;
            if (((t1 != null && t1.IsCharOf(",."))) || (((t1 is Pullenti.Ner.NumberToken) && (t1.WhitespacesBeforeCount < 3))))
            {
                double d;
                Pullenti.Ner.NumberToken tt11 = Pullenti.Ner.Core.NumberHelper.TryParseRealNumber(nt, false, false);
                if (tt11 != null)
                {
                    t1 = tt11.EndToken.Next;
                    f  = tt11.RealValue;
                }
            }
            if (t1 == null)
            {
                if (isDollar == null)
                {
                    return(null);
                }
            }
            else if ((t1.Next != null && t1.Next.IsValue("С", "З") && t1.Next.Next != null) && t1.Next.Next.IsValue("ПОЛОВИНА", null))
            {
                f += 0.5;
                t1 = t1.Next.Next;
            }
            if (t1 != null && t1.IsHiphen && t1.Next != null)
            {
                t1 = t1.Next;
            }
            bool   det  = false;
            double altf = f;

            if (((t1 is Pullenti.Ner.NumberToken) && t1.Previous != null && t1.Previous.IsHiphen) && (t1 as Pullenti.Ner.NumberToken).IntValue == 0 && t1.LengthChar == 2)
            {
                t1 = t1.Next;
            }
            if ((t1 != null && t1.Next != null && t1.IsChar('(')) && (((t1.Next is Pullenti.Ner.NumberToken) || t1.Next.IsValue("НОЛЬ", null))) && t1.Next.Next != null)
            {
                Pullenti.Ner.NumberToken nt1 = t1.Next as Pullenti.Ner.NumberToken;
                double val = (double)0;
                if (nt1 != null)
                {
                    val = nt1.RealValue;
                }
                if (Math.Floor(f) == Math.Floor(val))
                {
                    Pullenti.Ner.Token ttt = t1.Next.Next;
                    if (ttt.IsChar(')'))
                    {
                        t1  = ttt.Next;
                        det = true;
                        if ((t1 is Pullenti.Ner.NumberToken) && (t1 as Pullenti.Ner.NumberToken).IntValue != null && (t1 as Pullenti.Ner.NumberToken).IntValue.Value == 0)
                        {
                            t1 = t1.Next;
                        }
                    }
                    else if (((((ttt is Pullenti.Ner.NumberToken) && ((ttt as Pullenti.Ner.NumberToken).RealValue < 100) && ttt.Next != null) && ttt.Next.IsChar('/') && ttt.Next.Next != null) && ttt.Next.Next.GetSourceText() == "100" && ttt.Next.Next.Next != null) && ttt.Next.Next.Next.IsChar(')'))
                    {
                        int rest = GetDecimalRest100(f);
                        if ((ttt as Pullenti.Ner.NumberToken).IntValue != null && rest == (ttt as Pullenti.Ner.NumberToken).IntValue.Value)
                        {
                            t1  = ttt.Next.Next.Next.Next;
                            det = true;
                        }
                    }
                    else if ((ttt.IsValue("ЦЕЛЫХ", null) && (ttt.Next is Pullenti.Ner.NumberToken) && ttt.Next.Next != null) && ttt.Next.Next.Next != null && ttt.Next.Next.Next.IsChar(')'))
                    {
                        Pullenti.Ner.NumberToken num2 = ttt.Next as Pullenti.Ner.NumberToken;
                        altf = num2.RealValue;
                        if (ttt.Next.Next.IsValue("ДЕСЯТЫЙ", null))
                        {
                            altf /= 10;
                        }
                        else if (ttt.Next.Next.IsValue("СОТЫЙ", null))
                        {
                            altf /= 100;
                        }
                        else if (ttt.Next.Next.IsValue("ТЫСЯЧНЫЙ", null))
                        {
                            altf /= 1000;
                        }
                        else if (ttt.Next.Next.IsValue("ДЕСЯТИТЫСЯЧНЫЙ", null))
                        {
                            altf /= 10000;
                        }
                        else if (ttt.Next.Next.IsValue("СТОТЫСЯЧНЫЙ", null))
                        {
                            altf /= 100000;
                        }
                        else if (ttt.Next.Next.IsValue("МИЛЛИОННЫЙ", null))
                        {
                            altf /= 1000000;
                        }
                        if (altf < 1)
                        {
                            altf += val;
                            t1    = ttt.Next.Next.Next.Next;
                            det   = true;
                        }
                    }
                    else
                    {
                        Pullenti.Ner.Core.TerminToken toks1 = m_Postfixes.TryParse(ttt, Pullenti.Ner.Core.TerminParseAttr.No);
                        if (toks1 != null)
                        {
                            if (((Pullenti.Ner.Core.NumberExType)toks1.Termin.Tag) == Pullenti.Ner.Core.NumberExType.Money)
                            {
                                if (toks1.EndToken.Next != null && toks1.EndToken.Next.IsChar(')'))
                                {
                                    Pullenti.Ner.Core.NumberExToken res = new Pullenti.Ner.Core.NumberExToken(t, toks1.EndToken.Next, nt.Value, nt.Typ, Pullenti.Ner.Core.NumberExType.Money)
                                    {
                                        RealValue = f, AltRealValue = altf, Morph = toks1.BeginToken.Morph
                                    };
                                    return(_correctMoney(res, toks1.BeginToken));
                                }
                            }
                        }
                        Pullenti.Ner.Core.NumberExToken res2 = TryParseNumberWithPostfix(t1.Next);
                        if (res2 != null && res2.EndToken.Next != null && res2.EndToken.Next.IsChar(')'))
                        {
                            res2.BeginToken   = t;
                            res2.EndToken     = res2.EndToken.Next;
                            res2.AltRealValue = res2.RealValue;
                            res2.RealValue    = f;
                            _correctExtTypes(res2);
                            if (res2.WhitespacesAfterCount < 2)
                            {
                                Pullenti.Ner.Core.TerminToken toks2 = m_Postfixes.TryParse(res2.EndToken.Next, Pullenti.Ner.Core.TerminParseAttr.No);
                                if (toks2 != null)
                                {
                                    if (((Pullenti.Ner.Core.NumberExType)toks2.Termin.Tag) == Pullenti.Ner.Core.NumberExType.Money)
                                    {
                                        res2.EndToken = toks2.EndToken;
                                    }
                                }
                            }
                            return(res2);
                        }
                    }
                }
                else if (nt1 != null && nt1.Typ == Pullenti.Ner.NumberSpellingType.Words && nt.Typ == Pullenti.Ner.NumberSpellingType.Digit)
                {
                    altf = nt1.RealValue;
                    Pullenti.Ner.Token ttt = t1.Next.Next;
                    if (ttt.IsChar(')'))
                    {
                        t1  = ttt.Next;
                        det = true;
                    }
                    if (!det)
                    {
                        altf = f;
                    }
                }
            }
            if ((t1 != null && t1.IsChar('(') && t1.Next != null) && t1.Next.IsValue("СУММА", null))
            {
                Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t1, Pullenti.Ner.Core.BracketParseAttr.No, 100);
                if (br != null)
                {
                    t1 = br.EndToken.Next;
                }
            }
            if (isDollar != null)
            {
                Pullenti.Ner.Token te = null;
                if (t1 != null)
                {
                    te = t1.Previous;
                }
                else
                {
                    for (t1 = t0; t1 != null; t1 = t1.Next)
                    {
                        if (t1.Next == null)
                        {
                            te = t1;
                        }
                    }
                }
                if (te == null)
                {
                    return(null);
                }
                if (te.IsHiphen && te.Next != null)
                {
                    if (te.Next.IsValue("МИЛЛИОННЫЙ", null))
                    {
                        f    *= 1000000;
                        altf *= 1000000;
                        te    = te.Next;
                    }
                    else if (te.Next.IsValue("МИЛЛИАРДНЫЙ", null))
                    {
                        f    *= 1000000000;
                        altf *= 1000000000;
                        te    = te.Next;
                    }
                }
                if (!te.IsWhitespaceAfter && (te.Next is Pullenti.Ner.TextToken))
                {
                    if (te.Next.IsValue("M", null))
                    {
                        f    *= 1000000;
                        altf *= 1000000;
                        te    = te.Next;
                    }
                    else if (te.Next.IsValue("BN", null))
                    {
                        f    *= 1000000000;
                        altf *= 1000000000;
                        te    = te.Next;
                    }
                }
                return(new Pullenti.Ner.Core.NumberExToken(t0, te, "", nt.Typ, Pullenti.Ner.Core.NumberExType.Money)
                {
                    RealValue = f, AltRealValue = altf, ExTypParam = isDollar
                });
            }
            if (t1 == null || ((t1.IsNewlineBefore && !det)))
            {
                return(null);
            }
            Pullenti.Ner.Core.TerminToken toks = m_Postfixes.TryParse(t1, Pullenti.Ner.Core.TerminParseAttr.No);
            if ((toks == null && det && (t1 is Pullenti.Ner.NumberToken)) && (t1 as Pullenti.Ner.NumberToken).Value == "0")
            {
                toks = m_Postfixes.TryParse(t1.Next, Pullenti.Ner.Core.TerminParseAttr.No);
            }
            if (toks == null && t1.IsChar('р'))
            {
                int cou = 10;
                for (Pullenti.Ner.Token ttt = t0.Previous; ttt != null && cou > 0; ttt = ttt.Previous, cou--)
                {
                    if (ttt.IsValue("СУММА", null) || ttt.IsValue("НАЛИЧНЫЙ", null) || ttt.IsValue("БАЛАНС", null))
                    {
                    }
                    else if (ttt.GetReferent() != null && ttt.GetReferent().TypeName == "MONEY")
                    {
                    }
                    else
                    {
                        continue;
                    }
                    toks = new Pullenti.Ner.Core.TerminToken(t1, t1)
                    {
                        Termin = m_Postfixes.FindTerminsByCanonicText("RUB")[0]
                    };
                    if (t1.Next != null && t1.Next.IsChar('.'))
                    {
                        toks.EndToken = t1.Next;
                    }
                    Pullenti.Ner.Core.NumberExType ty = (Pullenti.Ner.Core.NumberExType)toks.Termin.Tag;
                    return(new Pullenti.Ner.Core.NumberExToken(t, toks.EndToken, nt.Value, nt.Typ, ty)
                    {
                        RealValue = f, AltRealValue = altf, Morph = toks.BeginToken.Morph, ExTypParam = "RUB"
                    });
                }
            }
            if (toks != null)
            {
                t1 = toks.EndToken;
                if (!t1.IsChar('.') && t1.Next != null && t1.Next.IsChar('.'))
                {
                    if ((t1 is Pullenti.Ner.TextToken) && t1.IsValue(toks.Termin.Terms[0].CanonicalText, null))
                    {
                    }
                    else if (!t1.Chars.IsLetter)
                    {
                    }
                    else
                    {
                        t1 = t1.Next;
                    }
                }
                if (toks.Termin.CanonicText == "LTL")
                {
                    return(null);
                }
                if (toks.BeginToken == t1)
                {
                    if (t1.Morph.Class.IsPreposition || t1.Morph.Class.IsConjunction)
                    {
                        if (t1.IsWhitespaceBefore && t1.IsWhitespaceAfter)
                        {
                            return(null);
                        }
                    }
                }
                Pullenti.Ner.Core.NumberExType  ty  = (Pullenti.Ner.Core.NumberExType)toks.Termin.Tag;
                Pullenti.Ner.Core.NumberExToken res = new Pullenti.Ner.Core.NumberExToken(t, t1, nt.Value, nt.Typ, ty)
                {
                    RealValue = f, AltRealValue = altf, Morph = toks.BeginToken.Morph
                };
                if (ty != Pullenti.Ner.Core.NumberExType.Money)
                {
                    _correctExtTypes(res);
                    return(res);
                }
                return(_correctMoney(res, toks.BeginToken));
            }
            Pullenti.Ner.Core.NumberExToken pfx = _attachSpecPostfix(t1);
            if (pfx != null)
            {
                pfx.BeginToken   = t;
                pfx.Value        = nt.Value;
                pfx.Typ          = nt.Typ;
                pfx.RealValue    = f;
                pfx.AltRealValue = altf;
                return(pfx);
            }
            if (t1.Next != null && ((t1.Morph.Class.IsPreposition || t1.Morph.Class.IsConjunction)))
            {
                if (t1.IsValue("НА", null))
                {
                }
                else
                {
                    Pullenti.Ner.Core.NumberExToken nn = TryParseNumberWithPostfix(t1.Next);
                    if (nn != null)
                    {
                        return new Pullenti.Ner.Core.NumberExToken(t, t, nt.Value, nt.Typ, nn.ExTyp)
                               {
                                   RealValue = f, AltRealValue = altf, ExTyp2 = nn.ExTyp2, ExTypParam = nn.ExTypParam
                               }
                    }
                    ;
                }
            }
            if (!t1.IsWhitespaceAfter && (t1.Next is Pullenti.Ner.NumberToken) && (t1 is Pullenti.Ner.TextToken))
            {
                string term = (t1 as Pullenti.Ner.TextToken).Term;
                Pullenti.Ner.Core.NumberExType ty = Pullenti.Ner.Core.NumberExType.Undefined;
                if (term == "СМХ" || term == "CMX")
                {
                    ty = Pullenti.Ner.Core.NumberExType.Santimeter;
                }
                else if (term == "MX" || term == "МХ")
                {
                    ty = Pullenti.Ner.Core.NumberExType.Meter;
                }
                else if (term == "MMX" || term == "ММХ")
                {
                    ty = Pullenti.Ner.Core.NumberExType.Millimeter;
                }
                if (ty != Pullenti.Ner.Core.NumberExType.Undefined)
                {
                    return new Pullenti.Ner.Core.NumberExToken(t, t1, nt.Value, nt.Typ, ty)
                           {
                               RealValue = f, AltRealValue = altf, MultAfter = true
                           }
                }
                ;
            }
            return(null);
        }
예제 #15
0
        static OrgItemNameToken _TryAttach(Pullenti.Ner.Token t, OrgItemNameToken prev, bool extOnto)
        {
            if (t == null)
            {
                return(null);
            }
            Pullenti.Ner.Referent r = t.GetReferent();
            if (r != null)
            {
                if (r.TypeName == "DENOMINATION")
                {
                    return new OrgItemNameToken(t, t)
                           {
                               Value = r.ToString(true, t.Kit.BaseLanguage, 0), IsDenomination = true
                           }
                }
                ;
                if ((r is Pullenti.Ner.Geo.GeoReferent) && t.Chars.IsLatinLetter)
                {
                    OrgItemNameToken res2 = _TryAttach(t.Next, prev, extOnto);

                    if (res2 != null && res2.Chars.IsLatinLetter)
                    {
                        res2.BeginToken     = t;
                        res2.Value          = string.Format("{0} {1}", Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(t as Pullenti.Ner.MetaToken, Pullenti.Ner.Core.GetTextAttr.No), res2.Value);
                        res2.IsInDictionary = false;
                        return(res2);
                    }
                }
                return(null);
            }
            Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
            if (tt == null)
            {
                return(null);
            }
            OrgItemNameToken res = null;

            Pullenti.Ner.Core.TerminToken tok = m_StdTails.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
            if (tok == null && t.IsChar(','))
            {
                tok = m_StdTails.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No);
            }
            if (tok != null)
            {
                return new OrgItemNameToken(t, tok.EndToken)
                       {
                           Value = tok.Termin.CanonicText, IsStdTail = tok.Termin.Tag == null, IsEmptyWord = tok.Termin.Tag != null, Morph = tok.Morph
                       }
            }
            ;
            if ((((tok = m_StdNames.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No)))) != null)
            {
                return new OrgItemNameToken(t, tok.EndToken)
                       {
                           Value = tok.Termin.CanonicText, IsStdName = true
                       }
            }
            ;
            OrgItemEngItem eng = OrgItemEngItem.TryAttach(t, false);

            if (eng == null && t.IsChar(','))
            {
                eng = OrgItemEngItem.TryAttach(t.Next, false);
            }
            if (eng != null)
            {
                return new OrgItemNameToken(t, eng.EndToken)
                       {
                           Value = eng.FullValue, IsStdTail = true
                       }
            }
            ;
            if (tt.Chars.IsAllLower && prev != null)
            {
                if (!prev.Chars.IsAllLower && !prev.Chars.IsCapitalUpper)
                {
                    return(null);
                }
            }
            if (tt.IsChar(',') && prev != null)
            {
                Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);

                if (npt1 == null || npt1.Chars != prev.Chars || ((npt1.Morph.Case & prev.Morph.Case)).IsUndefined)
                {
                    return(null);
                }
                OrgItemTypeToken ty = OrgItemTypeToken.TryAttach(t.Next, false, null);
                if (ty != null)
                {
                    return(null);
                }
                if (npt1.EndToken.Next == null || !npt1.EndToken.Next.IsValue("И", null))
                {
                    return(null);
                }
                Pullenti.Ner.Token t1 = npt1.EndToken.Next;
                Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t1.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                if (npt2 == null || npt2.Chars != prev.Chars || ((npt2.Morph.Case & npt1.Morph.Case & prev.Morph.Case)).IsUndefined)
                {
                    return(null);
                }
                ty = OrgItemTypeToken.TryAttach(t1.Next, false, null);
                if (ty != null)
                {
                    return(null);
                }
                res = new OrgItemNameToken(npt1.BeginToken, npt1.EndToken)
                {
                    Morph = npt1.Morph, Value = npt1.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false)
                };
                res.IsNounPhrase       = true;
                res.IsAfterConjunction = true;
                if (prev.Preposition != null)
                {
                    res.Preposition = prev.Preposition;
                }
                return(res);
            }
            if (((tt.IsChar('&') || tt.IsValue("AND", null) || tt.IsValue("UND", null))) && prev != null)
            {
                if ((tt.Next is Pullenti.Ner.TextToken) && tt.LengthChar == 1 && tt.Next.Chars.IsLatinLetter)
                {
                    res = new OrgItemNameToken(tt, tt.Next)
                    {
                        Chars = tt.Next.Chars
                    };
                    res.IsAfterConjunction = true;
                    res.Value = "& " + (tt.Next as Pullenti.Ner.TextToken).Term;
                    return(res);
                }
                res = OrgItemNameToken.TryAttach(tt.Next, null, extOnto, false);
                if (res == null || res.Chars != prev.Chars)
                {
                    return(null);
                }
                res.IsAfterConjunction = true;
                res.Value = "& " + res.Value;
                return(res);
            }
            if (!tt.Chars.IsLetter)
            {
                return(null);
            }
            List <Pullenti.Semantic.Utils.DerivateGroup> expinf = null;

            if (prev != null && prev.EndToken.GetMorphClassInDictionary().IsNoun)
            {
                string wo = prev.EndToken.GetNormalCaseText(Pullenti.Morph.MorphClass.Noun, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false);
                expinf = Pullenti.Semantic.Utils.DerivateService.FindDerivates(wo, true, prev.EndToken.Morph.Language);
            }
            Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
            if (npt != null && npt.InternalNoun != null)
            {
                npt = null;
            }
            bool explOk = false;

            if (npt != null && prev != null && prev.EndToken.GetMorphClassInDictionary().IsNoun)
            {
                Pullenti.Ner.Core.NounPhraseToken npt0 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(prev.EndToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                if (npt0 != null)
                {
                    List <Pullenti.Semantic.Core.SemanticLink> links = Pullenti.Semantic.Core.SemanticHelper.TryCreateLinks(npt0, npt, null);
                    if (links.Count > 0)
                    {
                        explOk = true;
                    }
                }
            }
            if (npt != null && ((explOk || npt.Morph.Case.IsGenitive || ((prev != null && !((prev.Morph.Case & npt.Morph.Case)).IsUndefined)))))
            {
                Pullenti.Morph.MorphClass mc = npt.BeginToken.GetMorphClassInDictionary();
                if (mc.IsVerb || mc.IsPronoun)
                {
                    return(null);
                }
                if (mc.IsAdverb)
                {
                    if (npt.BeginToken.Next != null && npt.BeginToken.Next.IsHiphen)
                    {
                    }
                    else
                    {
                        return(null);
                    }
                }
                if (mc.IsPreposition)
                {
                    return(null);
                }
                if (mc.IsNoun && npt.Chars.IsAllLower)
                {
                    Pullenti.Morph.MorphCase ca = npt.Morph.Case;
                    if ((!ca.IsDative && !ca.IsGenitive && !ca.IsInstrumental) && !ca.IsPrepositional)
                    {
                        return(null);
                    }
                }
                res = new OrgItemNameToken(npt.BeginToken, npt.EndToken)
                {
                    Morph = npt.Morph, Value = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false)
                };
                res.IsNounPhrase = true;
                if ((npt.EndToken.WhitespacesAfterCount < 2) && (npt.EndToken.Next is Pullenti.Ner.TextToken))
                {
                    Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(npt.EndToken.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                    if (npt2 != null && npt2.Morph.Case.IsGenitive && npt2.Chars.IsAllLower)
                    {
                        OrgItemTypeToken           typ = OrgItemTypeToken.TryAttach(npt.EndToken.Next, true, null);
                        OrgItemEponymToken         epo = OrgItemEponymToken.TryAttach(npt.EndToken.Next, false);
                        Pullenti.Ner.ReferentToken rtt = t.Kit.ProcessReferent("PERSONPROPERTY", npt.EndToken.Next);
                        if (typ == null && epo == null && ((rtt == null || rtt.Morph.Number == Pullenti.Morph.MorphNumber.Plural)))
                        {
                            res.EndToken = npt2.EndToken;
                            res.Value    = string.Format("{0} {1}", res.Value, Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(npt2, Pullenti.Ner.Core.GetTextAttr.No));
                        }
                    }
                    else if (npt.EndToken.Next.IsComma && (npt.EndToken.Next.Next is Pullenti.Ner.TextToken))
                    {
                        Pullenti.Ner.Token        tt2 = npt.EndToken.Next.Next;
                        Pullenti.Morph.MorphClass mv2 = tt2.GetMorphClassInDictionary();
                        if (mv2.IsAdjective && mv2.IsVerb)
                        {
                            Pullenti.Morph.MorphBaseInfo bi = new Pullenti.Morph.MorphBaseInfo()
                            {
                                Case = npt.Morph.Case, Gender = npt.Morph.Gender, Number = npt.Morph.Number
                            };
                            if (tt2.Morph.CheckAccord(bi, false, false))
                            {
                                npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt2.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                                if (npt2 != null && ((npt2.Morph.Case.IsDative || npt2.Morph.Case.IsGenitive)) && npt2.Chars.IsAllLower)
                                {
                                    res.EndToken = npt2.EndToken;
                                    res.Value    = string.Format("{0} {1}", res.Value, Pullenti.Ner.Core.MiscHelper.GetTextValue(npt.EndToken.Next, res.EndToken, Pullenti.Ner.Core.GetTextAttr.No));
                                }
                            }
                        }
                    }
                }
                if (explOk)
                {
                    res.IsAfterConjunction = true;
                }
            }
            else if (npt != null && ((((prev != null && prev.IsNounPhrase && npt.Morph.Case.IsInstrumental)) || extOnto)))
            {
                res = new OrgItemNameToken(npt.BeginToken, npt.EndToken)
                {
                    Morph = npt.Morph, Value = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false)
                };
                res.IsNounPhrase = true;
            }
            else if (tt.IsAnd)
            {
                res = TryAttach(tt.Next, prev, extOnto, false);
                if (res == null || !res.IsNounPhrase || prev == null)
                {
                    return(null);
                }
                if (((prev.Morph.Case & res.Morph.Case)).IsUndefined)
                {
                    return(null);
                }
                if (prev.Morph.Number != Pullenti.Morph.MorphNumber.Undefined && res.Morph.Number != Pullenti.Morph.MorphNumber.Undefined)
                {
                    if (((prev.Morph.Number & res.Morph.Number)) == Pullenti.Morph.MorphNumber.Undefined)
                    {
                        if (prev.Chars != res.Chars)
                        {
                            return(null);
                        }
                        OrgItemTypeToken ty = OrgItemTypeToken.TryAttach(res.EndToken.Next, false, null);
                        if (ty != null)
                        {
                            return(null);
                        }
                    }
                }
                Pullenti.Morph.CharsInfo ci = res.Chars;
                res.Chars = ci;
                res.IsAfterConjunction = true;
                return(res);
            }
            else if (((tt.Term == "ПО" || tt.Term == "ПРИ" || tt.Term == "ЗА") || tt.Term == "С" || tt.Term == "В") || tt.Term == "НА")
            {
                npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                if (npt != null)
                {
                    if (m_VervotWords.TryParse(npt.EndToken, Pullenti.Ner.Core.TerminParseAttr.No) != null)
                    {
                        return(null);
                    }
                    bool ok = false;
                    if (tt.Term == "ПО")
                    {
                        ok = npt.Morph.Case.IsDative;
                    }
                    else if (tt.Term == "С")
                    {
                        ok = npt.Morph.Case.IsInstrumental;
                    }
                    else if (tt.Term == "ЗА")
                    {
                        ok = npt.Morph.Case.IsGenitive | npt.Morph.Case.IsInstrumental;
                    }
                    else if (tt.Term == "НА")
                    {
                        ok = npt.Morph.Case.IsPrepositional;
                    }
                    else if (tt.Term == "В")
                    {
                        ok = npt.Morph.Case.IsDative | npt.Morph.Case.IsPrepositional;
                        if (ok)
                        {
                            ok = false;
                            if (t.Next.IsValue("СФЕРА", null) || t.Next.IsValue("ОБЛАСТЬ", null))
                            {
                                ok = true;
                            }
                        }
                    }
                    else if (tt.Term == "ПРИ")
                    {
                        ok = npt.Morph.Case.IsPrepositional;
                        if (ok)
                        {
                            if (OrgItemTypeToken.TryAttach(tt.Next, true, null) != null)
                            {
                                ok = false;
                            }
                            else
                            {
                                Pullenti.Ner.ReferentToken rt = tt.Kit.ProcessReferent(Pullenti.Ner.Org.OrganizationAnalyzer.ANALYZER_NAME, tt.Next);
                                if (rt != null)
                                {
                                    ok = false;
                                }
                            }
                        }
                        string s = npt.Noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false);
                        if (s == "ПОДДЕРЖКА" || s == "УЧАСТИЕ")
                        {
                            ok = false;
                        }
                    }
                    else
                    {
                        ok = npt.Morph.Case.IsPrepositional;
                    }
                    if (ok)
                    {
                        res = new OrgItemNameToken(t, npt.EndToken)
                        {
                            Morph = npt.Morph, Value = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false), Chars = npt.Chars
                        };
                        res.IsNounPhrase = true;
                        res.Preposition  = tt.Term;
                        if (((res.Value == "ДЕЛО" || res.Value == "ВОПРОС")) && !res.IsNewlineAfter)
                        {
                            OrgItemNameToken res2 = _TryAttach(res.EndToken.Next, res, extOnto);
                            if (res2 != null && res2.Morph.Case.IsGenitive)
                            {
                                res.Value    = string.Format("{0} {1}", res.Value, res2.Value);
                                res.EndToken = res2.EndToken;
                                for (Pullenti.Ner.Token ttt = res2.EndToken.Next; ttt != null; ttt = ttt.Next)
                                {
                                    if (!ttt.IsCommaAnd)
                                    {
                                        break;
                                    }
                                    OrgItemNameToken res3 = _TryAttach(ttt.Next, res2, extOnto);
                                    if (res3 == null)
                                    {
                                        break;
                                    }
                                    res.Value    = string.Format("{0} {1}", res.Value, res3.Value);
                                    res.EndToken = res3.EndToken;
                                    if (ttt.IsAnd)
                                    {
                                        break;
                                    }
                                    ttt = res.EndToken;
                                }
                            }
                        }
                    }
                }
                if (res == null)
                {
                    return(null);
                }
            }
            else if (tt.Term == "OF")
            {
                Pullenti.Ner.Token t1 = tt.Next;
                if (t1 != null && Pullenti.Ner.Core.MiscHelper.IsEngArticle(t1))
                {
                    t1 = t1.Next;
                }
                if (t1 != null && t1.Chars.IsLatinLetter && !t1.Chars.IsAllLower)
                {
                    res = new OrgItemNameToken(t, t1)
                    {
                        Chars = t1.Chars, Morph = t1.Morph
                    };
                    for (Pullenti.Ner.Token ttt = t1.Next; ttt != null; ttt = ttt.Next)
                    {
                        if (ttt.WhitespacesBeforeCount > 2)
                        {
                            break;
                        }
                        if (Pullenti.Ner.Core.MiscHelper.IsEngAdjSuffix(ttt))
                        {
                            ttt = ttt.Next;
                            continue;
                        }
                        if (!ttt.Chars.IsLatinLetter)
                        {
                            break;
                        }
                        if (ttt.Morph.Class.IsPreposition)
                        {
                            break;
                        }
                        t1 = (res.EndToken = ttt);
                    }
                    res.Value       = Pullenti.Ner.Core.MiscHelper.GetTextValue(t, t1, Pullenti.Ner.Core.GetTextAttr.IgnoreArticles);
                    res.Preposition = tt.Term;
                    return(res);
                }
            }
            if (res == null)
            {
                if (tt.Chars.IsLatinLetter && tt.LengthChar == 1)
                {
                }
                else if (tt.Chars.IsAllLower || (tt.LengthChar < 2))
                {
                    if (!tt.Chars.IsLatinLetter || prev == null || !prev.Chars.IsLatinLetter)
                    {
                        return(null);
                    }
                }
                if (tt.Chars.IsCyrillicLetter)
                {
                    Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary();
                    if (mc.IsVerb || mc.IsAdverb)
                    {
                        return(null);
                    }
                }
                else if (tt.Chars.IsLatinLetter && !tt.IsWhitespaceAfter)
                {
                    if (!tt.IsWhitespaceAfter && (tt.LengthChar < 5))
                    {
                        if (tt.Next is Pullenti.Ner.NumberToken)
                        {
                            return(null);
                        }
                    }
                }
                res = new OrgItemNameToken(tt, tt)
                {
                    Value = tt.Term, Morph = tt.Morph
                };
                for (t = tt.Next; t != null; t = t.Next)
                {
                    if ((((t.IsHiphen || t.IsCharOf("\\/"))) && t.Next != null && (t.Next is Pullenti.Ner.TextToken)) && !t.IsWhitespaceBefore && !t.IsWhitespaceAfter)
                    {
                        t            = t.Next;
                        res.EndToken = t;
                        res.Value    = string.Format("{0}{1}{2}", res.Value, (t.Previous.IsChar('.') ? '.' : '-'), (t as Pullenti.Ner.TextToken).Term);
                    }
                    else if (t.IsChar('.'))
                    {
                        if (!t.IsWhitespaceAfter && !t.IsWhitespaceBefore && (t.Next is Pullenti.Ner.TextToken))
                        {
                            res.EndToken = t.Next;
                            t            = t.Next;
                            res.Value    = string.Format("{0}.{1}", res.Value, (t as Pullenti.Ner.TextToken).Term);
                        }
                        else if ((t.Next != null && !t.IsNewlineAfter && t.Next.Chars.IsLatinLetter) && tt.Chars.IsLatinLetter)
                        {
                            res.EndToken = t;
                        }
                        else
                        {
                            break;
                        }
                    }
                    else
                    {
                        break;
                    }
                }
            }
            for (Pullenti.Ner.Token t0 = res.BeginToken; t0 != null; t0 = t0.Next)
            {
                if ((((tt = t0 as Pullenti.Ner.TextToken))) != null && tt.IsLetters)
                {
                    if (!tt.Morph.Class.IsConjunction && !tt.Morph.Class.IsPreposition)
                    {
                        foreach (Pullenti.Morph.MorphBaseInfo mf in tt.Morph.Items)
                        {
                            if ((mf as Pullenti.Morph.MorphWordForm).IsInDictionary)
                            {
                                res.IsInDictionary = true;
                            }
                        }
                    }
                }
                if (t0 == res.EndToken)
                {
                    break;
                }
            }
            if (res.BeginToken == res.EndToken && res.BeginToken.Chars.IsAllUpper)
            {
                if (res.EndToken.Next != null && !res.EndToken.IsWhitespaceAfter)
                {
                    Pullenti.Ner.Token t1 = res.EndToken.Next;
                    if (t1.Next != null && !t1.IsWhitespaceAfter && t1.IsHiphen)
                    {
                        t1 = t1.Next;
                    }
                    if (t1 is Pullenti.Ner.NumberToken)
                    {
                        res.Value   += (t1 as Pullenti.Ner.NumberToken).Value;
                        res.EndToken = t1;
                    }
                }
            }
            if (res.BeginToken == res.EndToken && res.BeginToken.Chars.IsLastLower)
            {
                string src = res.BeginToken.GetSourceText();
                for (int i = src.Length - 1; i >= 0; i--)
                {
                    if (char.IsUpper(src[i]))
                    {
                        res.Value = src.Substring(0, i + 1);
                        break;
                    }
                }
            }
            return(res);
        }
예제 #16
0
        static Pullenti.Ner.Core.NumberExToken _correctMoney(Pullenti.Ner.Core.NumberExToken res, Pullenti.Ner.Token t1)
        {
            if (t1 == null)
            {
                return(null);
            }
            List <Pullenti.Ner.Core.TerminToken> toks = m_Postfixes.TryParseAll(t1, Pullenti.Ner.Core.TerminParseAttr.No);

            if (toks == null || toks.Count == 0)
            {
                return(null);
            }
            Pullenti.Ner.Token    tt = toks[0].EndToken.Next;
            Pullenti.Ner.Referent r  = (tt == null ? null : tt.GetReferent());
            string alpha2            = null;

            if (r != null && r.TypeName == "GEO")
            {
                alpha2 = r.GetStringValue("ALPHA2");
            }
            if (alpha2 != null && toks.Count > 0)
            {
                for (int i = toks.Count - 1; i >= 0; i--)
                {
                    if (!toks[i].Termin.CanonicText.StartsWith(alpha2))
                    {
                        toks.RemoveAt(i);
                    }
                }
                if (toks.Count == 0)
                {
                    toks = m_Postfixes.TryParseAll(t1, Pullenti.Ner.Core.TerminParseAttr.No);
                }
            }
            if (toks.Count > 1)
            {
                alpha2 = null;
                string str = toks[0].Termin.Terms[0].CanonicalText;
                if (str == "РУБЛЬ" || str == "RUBLE")
                {
                    alpha2 = "RU";
                }
                else if (str == "ДОЛЛАР" || str == "ДОЛАР" || str == "DOLLAR")
                {
                    alpha2 = "US";
                }
                else if (str == "ФУНТ" || str == "POUND")
                {
                    alpha2 = "UK";
                }
                if (alpha2 != null)
                {
                    for (int i = toks.Count - 1; i >= 0; i--)
                    {
                        if (!toks[i].Termin.CanonicText.StartsWith(alpha2) && toks[i].Termin.CanonicText != "GBP")
                        {
                            toks.RemoveAt(i);
                        }
                    }
                }
                alpha2 = null;
            }
            if (toks.Count < 1)
            {
                return(null);
            }
            res.ExTypParam = toks[0].Termin.CanonicText;
            if (alpha2 != null && tt != null)
            {
                res.EndToken = tt;
            }
            tt = res.EndToken.Next;
            if (tt != null && tt.IsCommaAnd)
            {
                tt = tt.Next;
            }
            if ((tt is Pullenti.Ner.NumberToken) && tt.Next != null && (tt.WhitespacesAfterCount < 4))
            {
                Pullenti.Ner.Token tt1 = tt.Next;
                if ((tt1 != null && tt1.IsChar('(') && (tt1.Next is Pullenti.Ner.NumberToken)) && tt1.Next.Next != null && tt1.Next.Next.IsChar(')'))
                {
                    if ((tt as Pullenti.Ner.NumberToken).Value == (tt1.Next as Pullenti.Ner.NumberToken).Value)
                    {
                        tt1 = tt1.Next.Next.Next;
                    }
                }
                Pullenti.Ner.Core.TerminToken tok = m_SmallMoney.TryParse(tt1, Pullenti.Ner.Core.TerminParseAttr.No);
                if (tok == null && tt1 != null && tt1.IsChar(')'))
                {
                    tok = m_SmallMoney.TryParse(tt1.Next, Pullenti.Ner.Core.TerminParseAttr.No);
                }
                if (tok != null && (tt as Pullenti.Ner.NumberToken).IntValue != null)
                {
                    int max = (int)tok.Termin.Tag;
                    int val = (tt as Pullenti.Ner.NumberToken).IntValue.Value;
                    if (val < max)
                    {
                        double f = (double)val;
                        f /= max;
                        double f0  = res.RealValue - ((long)res.RealValue);
                        int    re0 = (int)(((f0 * 100) + 0.0001));
                        if (re0 > 0 && val != re0)
                        {
                            res.AltRestMoney = val;
                        }
                        else if (f0 == 0)
                        {
                            res.RealValue += f;
                        }
                        f0  = res.AltRealValue - ((long)res.AltRealValue);
                        re0 = (int)(((f0 * 100) + 0.0001));
                        if (re0 > 0 && val != re0)
                        {
                            res.AltRestMoney = val;
                        }
                        else if (f0 == 0)
                        {
                            res.AltRealValue += f;
                        }
                        res.EndToken = tok.EndToken;
                    }
                }
            }
            else if ((tt is Pullenti.Ner.TextToken) && tt.IsValue("НОЛЬ", null))
            {
                Pullenti.Ner.Core.TerminToken tok = m_SmallMoney.TryParse(tt.Next, Pullenti.Ner.Core.TerminParseAttr.No);
                if (tok != null)
                {
                    res.EndToken = tok.EndToken;
                }
            }
            return(res);
        }
예제 #17
0
        public static OrgItemNameToken TryAttach(Pullenti.Ner.Token t, OrgItemNameToken prev, bool extOnto, bool first)
        {
            if (t == null)
            {
                return(null);
            }
            if (t.IsValue("ОРДЕНА", null) && t.Next != null)
            {
                Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                if (npt != null)
                {
                    Pullenti.Ner.Token t1 = npt.EndToken;
                    if (((t1.IsValue("ЗНАК", null) || t1.IsValue("ДРУЖБА", null))) && (t1.WhitespacesAfterCount < 2))
                    {
                        npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t1.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                        if (npt != null)
                        {
                            t1 = npt.EndToken;
                        }
                    }
                    return(new OrgItemNameToken(t, t1)
                    {
                        IsIgnoredPart = true
                    });
                }
                if (t.Next.GetMorphClassInDictionary().IsProperSurname)
                {
                    return new OrgItemNameToken(t, t.Next)
                           {
                               IsIgnoredPart = true
                           }
                }
                ;
                Pullenti.Ner.ReferentToken ppp = t.Kit.ProcessReferent("PERSON", t.Next);
                if (ppp != null)
                {
                    return new OrgItemNameToken(t, ppp.EndToken)
                           {
                               IsIgnoredPart = true
                           }
                }
                ;
                if ((t.WhitespacesAfterCount < 2) && Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(t.Next, true, false))
                {
                    Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t.Next, Pullenti.Ner.Core.BracketParseAttr.NearCloseBracket, 10);

                    if (br != null && (br.LengthChar < 40))
                    {
                        return new OrgItemNameToken(t, br.EndToken)
                               {
                                   IsIgnoredPart = true
                               }
                    }
                    ;
                }
            }
            if (first && t.Chars.IsCyrillicLetter && t.Morph.Class.IsPreposition)
            {
                if (!t.IsValue("ПО", null) && !t.IsValue("ПРИ", null))
                {
                    return(null);
                }
            }
            OrgItemNameToken res = _TryAttach(t, prev, extOnto);

            if (res == null)
            {
                if (extOnto)
                {
                    if ((t.GetReferent() is Pullenti.Ner.Geo.GeoReferent) || (((t is Pullenti.Ner.TextToken) && !t.IsChar(';'))))
                    {
                        return new OrgItemNameToken(t, t)
                               {
                                   Value = t.GetSourceText()
                               }
                    }
                    ;
                }
                return(null);
            }
            if (prev == null && !extOnto)
            {
                if (t.Kit.Ontology != null)
                {
                    Pullenti.Ner.Org.OrganizationAnalyzer.OrgAnalyzerData ad = t.Kit.Ontology._getAnalyzerData(Pullenti.Ner.Org.OrganizationAnalyzer.ANALYZER_NAME) as Pullenti.Ner.Org.OrganizationAnalyzer.OrgAnalyzerData;
                    if (ad != null)
                    {
                        Pullenti.Ner.Core.TerminToken tok = ad.OrgPureNames.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);

                        if (tok != null && tok.EndChar > res.EndChar)
                        {
                            res.EndToken = tok.EndToken;
                        }
                    }
                }
            }
            if (prev != null && !extOnto)
            {
                if ((prev.Chars.IsAllLower && !res.Chars.IsAllLower && !res.IsStdTail) && !res.IsStdName)
                {
                    if (prev.Chars.IsLatinLetter && res.Chars.IsLatinLetter)
                    {
                    }
                    else if (m_StdNouns.TryParse(res.BeginToken, Pullenti.Ner.Core.TerminParseAttr.No) != null)
                    {
                    }
                    else
                    {
                        return(null);
                    }
                }
            }
            if ((res.EndToken.Next != null && !res.EndToken.IsWhitespaceAfter && res.EndToken.Next.IsHiphen) && !res.EndToken.Next.IsWhitespaceAfter)
            {
                Pullenti.Ner.TextToken tt = res.EndToken.Next.Next as Pullenti.Ner.TextToken;
                if (tt != null)
                {
                    if (tt.Chars == res.Chars || tt.Chars.IsAllUpper)
                    {
                        res.EndToken = tt;
                        res.Value    = string.Format("{0}-{1}", res.Value, tt.Term);
                    }
                }
            }
            if ((res.EndToken.Next != null && res.EndToken.Next.IsAnd && res.EndToken.WhitespacesAfterCount == 1) && res.EndToken.Next.WhitespacesAfterCount == 1)
            {
                OrgItemNameToken res1 = _TryAttach(res.EndToken.Next.Next, prev, extOnto);
                if (res1 != null && res1.Chars == res.Chars && OrgItemTypeToken.TryAttach(res.EndToken.Next.Next, false, null) == null)
                {
                    if (!((res1.Morph.Case & res.Morph.Case)).IsUndefined)
                    {
                        res.EndToken = res1.EndToken;
                        res.Value    = string.Format("{0} {1} {2}", res.Value, (res.Kit.BaseLanguage.IsUa ? "ТА" : "И"), res1.Value);
                    }
                }
            }
            for (Pullenti.Ner.Token tt = res.BeginToken; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next)
            {
                if (m_StdNouns.TryParse(tt, Pullenti.Ner.Core.TerminParseAttr.No) != null)
                {
                    res.StdOrgNameNouns++;
                }
            }
            if (m_StdNouns.TryParse(res.EndToken, Pullenti.Ner.Core.TerminParseAttr.No) != null)
            {
                int  cou = 1;
                bool non = false;
                Pullenti.Ner.Token et = res.EndToken;
                if (!_isNotTermNoun(res.EndToken))
                {
                    non = true;
                }
                bool br = false;
                for (Pullenti.Ner.Token tt = res.EndToken.Next; tt != null; tt = tt.Next)
                {
                    if (tt.IsTableControlChar)
                    {
                        break;
                    }
                    if (tt.IsChar('('))
                    {
                        if (!non)
                        {
                            break;
                        }
                        br = true;
                        continue;
                    }
                    if (tt.IsChar(')'))
                    {
                        br = false;
                        et = tt;
                        break;
                    }
                    if (!(tt is Pullenti.Ner.TextToken))
                    {
                        break;
                    }
                    if (tt.WhitespacesBeforeCount > 1)
                    {
                        if (tt.NewlinesBeforeCount > 1)
                        {
                            break;
                        }
                        if (tt.Chars != res.EndToken.Chars)
                        {
                            break;
                        }
                    }
                    if (tt.Morph.Class.IsPreposition || tt.IsCommaAnd)
                    {
                        continue;
                    }
                    Pullenti.Morph.MorphClass dd = tt.GetMorphClassInDictionary();
                    if (!dd.IsNoun && !dd.IsAdjective)
                    {
                        break;
                    }
                    Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                    if (npt2 == null)
                    {
                        if (dd == Pullenti.Morph.MorphClass.Adjective)
                        {
                            continue;
                        }
                        break;
                    }
                    if (m_StdNouns.TryParse(npt2.EndToken, Pullenti.Ner.Core.TerminParseAttr.No) == null)
                    {
                        break;
                    }
                    if (npt2.EndToken.Chars != res.EndToken.Chars)
                    {
                        break;
                    }
                    if ((npt2.EndToken.IsValue("УПРАВЛЕНИЕ", null) || npt2.EndToken.IsValue("ИНСТИТУТ", null) || npt2.EndToken.IsValue("УПРАВЛІННЯ", null)) || npt2.EndToken.IsValue("ІНСТИТУТ", null) || tt.Previous.IsValue("ПРИ", null))
                    {
                        Pullenti.Ner.ReferentToken rt = tt.Kit.ProcessReferent(Pullenti.Ner.Org.OrganizationAnalyzer.ANALYZER_NAME, tt);
                        if (rt != null)
                        {
                            break;
                        }
                    }
                    cou++;
                    tt = npt2.EndToken;
                    if (!_isNotTermNoun(tt))
                    {
                        non = true;
                        et  = tt;
                    }
                }
                if (non && !br)
                {
                    res.StdOrgNameNouns += cou;
                    res.EndToken         = et;
                }
            }
            return(res);
        }
예제 #18
0
        List <Pullenti.Ner.ReferentToken> TryAttach(List <Pullenti.Ner.Weapon.Internal.WeaponItemToken> its, bool attach)
        {
            WeaponReferent tr = new WeaponReferent();
            int            i;

            Pullenti.Ner.Token t1 = null;
            Pullenti.Ner.Weapon.Internal.WeaponItemToken noun  = null;
            Pullenti.Ner.Weapon.Internal.WeaponItemToken brand = null;
            Pullenti.Ner.Weapon.Internal.WeaponItemToken model = null;
            for (i = 0; i < its.Count; i++)
            {
                if (its[i].Typ == Pullenti.Ner.Weapon.Internal.WeaponItemToken.Typs.Noun)
                {
                    if (its.Count == 1)
                    {
                        return(null);
                    }
                    if (tr.FindSlot(WeaponReferent.ATTR_TYPE, null, true) != null)
                    {
                        if (tr.FindSlot(WeaponReferent.ATTR_TYPE, its[i].Value, true) == null)
                        {
                            break;
                        }
                    }
                    if (!its[i].IsInternal)
                    {
                        noun = its[i];
                    }
                    tr.AddSlot(WeaponReferent.ATTR_TYPE, its[i].Value, false, 0);
                    if (its[i].AltValue != null)
                    {
                        tr.AddSlot(WeaponReferent.ATTR_TYPE, its[i].AltValue, false, 0);
                    }
                    t1 = its[i].EndToken;
                    continue;
                }
                if (its[i].Typ == Pullenti.Ner.Weapon.Internal.WeaponItemToken.Typs.Brand)
                {
                    if (tr.FindSlot(WeaponReferent.ATTR_BRAND, null, true) != null)
                    {
                        if (tr.FindSlot(WeaponReferent.ATTR_BRAND, its[i].Value, true) == null)
                        {
                            break;
                        }
                    }
                    if (!its[i].IsInternal)
                    {
                        if (noun != null && noun.IsDoubt)
                        {
                            noun.IsDoubt = false;
                        }
                    }
                    brand = its[i];
                    tr.AddSlot(WeaponReferent.ATTR_BRAND, its[i].Value, false, 0);
                    t1 = its[i].EndToken;
                    continue;
                }
                if (its[i].Typ == Pullenti.Ner.Weapon.Internal.WeaponItemToken.Typs.Model)
                {
                    if (tr.FindSlot(WeaponReferent.ATTR_MODEL, null, true) != null)
                    {
                        if (tr.FindSlot(WeaponReferent.ATTR_MODEL, its[i].Value, true) == null)
                        {
                            break;
                        }
                    }
                    model = its[i];
                    tr.AddSlot(WeaponReferent.ATTR_MODEL, its[i].Value, false, 0);
                    if (its[i].AltValue != null)
                    {
                        tr.AddSlot(WeaponReferent.ATTR_MODEL, its[i].AltValue, false, 0);
                    }
                    t1 = its[i].EndToken;
                    continue;
                }
                if (its[i].Typ == Pullenti.Ner.Weapon.Internal.WeaponItemToken.Typs.Name)
                {
                    if (tr.FindSlot(WeaponReferent.ATTR_NAME, null, true) != null)
                    {
                        break;
                    }
                    tr.AddSlot(WeaponReferent.ATTR_NAME, its[i].Value, false, 0);
                    if (its[i].AltValue != null)
                    {
                        tr.AddSlot(WeaponReferent.ATTR_NAME, its[i].AltValue, false, 0);
                    }
                    t1 = its[i].EndToken;
                    continue;
                }
                if (its[i].Typ == Pullenti.Ner.Weapon.Internal.WeaponItemToken.Typs.Number)
                {
                    if (tr.FindSlot(WeaponReferent.ATTR_NUMBER, null, true) != null)
                    {
                        break;
                    }
                    tr.AddSlot(WeaponReferent.ATTR_NUMBER, its[i].Value, false, 0);
                    if (its[i].AltValue != null)
                    {
                        tr.AddSlot(WeaponReferent.ATTR_NUMBER, its[i].AltValue, false, 0);
                    }
                    t1 = its[i].EndToken;
                    continue;
                }
                if (its[i].Typ == Pullenti.Ner.Weapon.Internal.WeaponItemToken.Typs.Caliber)
                {
                    if (tr.FindSlot(WeaponReferent.ATTR_CALIBER, null, true) != null)
                    {
                        break;
                    }
                    tr.AddSlot(WeaponReferent.ATTR_CALIBER, its[i].Value, false, 0);
                    if (its[i].AltValue != null)
                    {
                        tr.AddSlot(WeaponReferent.ATTR_CALIBER, its[i].AltValue, false, 0);
                    }
                    t1 = its[i].EndToken;
                    continue;
                }
                if (its[i].Typ == Pullenti.Ner.Weapon.Internal.WeaponItemToken.Typs.Developer)
                {
                    tr.AddSlot(WeaponReferent.ATTR_REF, its[i].Ref, false, 0);
                    t1 = its[i].EndToken;
                    continue;
                }
                if (its[i].Typ == Pullenti.Ner.Weapon.Internal.WeaponItemToken.Typs.Date)
                {
                    if (tr.FindSlot(WeaponReferent.ATTR_DATE, null, true) != null)
                    {
                        break;
                    }
                    tr.AddSlot(WeaponReferent.ATTR_DATE, its[i].Ref, true, 0);
                    t1 = its[i].EndToken;
                    continue;
                }
            }
            bool           hasGoodNoun = (noun == null ? false : !noun.IsDoubt);
            WeaponReferent prev        = null;

            if (noun == null)
            {
                for (Pullenti.Ner.Token tt = its[0].BeginToken.Previous; tt != null; tt = tt.Previous)
                {
                    if ((((prev = tt.GetReferent() as WeaponReferent))) != null)
                    {
                        List <Pullenti.Ner.Slot> addSlots = new List <Pullenti.Ner.Slot>();
                        foreach (Pullenti.Ner.Slot s in prev.Slots)
                        {
                            if (s.TypeName == WeaponReferent.ATTR_TYPE)
                            {
                                tr.AddSlot(s.TypeName, s.Value, false, 0);
                            }
                            else if (s.TypeName == WeaponReferent.ATTR_BRAND || s.TypeName == WeaponReferent.ATTR_BRAND || s.TypeName == WeaponReferent.ATTR_MODEL)
                            {
                                if (tr.FindSlot(s.TypeName, null, true) == null)
                                {
                                    addSlots.Add(s);
                                }
                            }
                        }
                        foreach (Pullenti.Ner.Slot s in addSlots)
                        {
                            tr.AddSlot(s.TypeName, s.Value, false, 0);
                        }
                        hasGoodNoun = true;
                        break;
                    }
                    else if ((tt is Pullenti.Ner.TextToken) && ((!tt.Chars.IsLetter || tt.Morph.Class.IsConjunction)))
                    {
                    }
                    else
                    {
                        break;
                    }
                }
            }
            if (noun == null && model != null)
            {
                int cou = 0;
                for (Pullenti.Ner.Token tt = its[0].BeginToken.Previous; tt != null && (cou < 100); tt = tt.Previous, cou++)
                {
                    if ((((prev = tt.GetReferent() as WeaponReferent))) != null)
                    {
                        if (prev.FindSlot(WeaponReferent.ATTR_MODEL, model.Value, true) == null)
                        {
                            continue;
                        }
                        List <Pullenti.Ner.Slot> addSlots = new List <Pullenti.Ner.Slot>();
                        foreach (Pullenti.Ner.Slot s in prev.Slots)
                        {
                            if (s.TypeName == WeaponReferent.ATTR_TYPE)
                            {
                                tr.AddSlot(s.TypeName, s.Value, false, 0);
                            }
                            else if (s.TypeName == WeaponReferent.ATTR_BRAND || s.TypeName == WeaponReferent.ATTR_BRAND)
                            {
                                if (tr.FindSlot(s.TypeName, null, true) == null)
                                {
                                    addSlots.Add(s);
                                }
                            }
                        }
                        foreach (Pullenti.Ner.Slot s in addSlots)
                        {
                            tr.AddSlot(s.TypeName, s.Value, false, 0);
                        }
                        hasGoodNoun = true;
                        break;
                    }
                }
            }
            if (hasGoodNoun)
            {
            }
            else if (noun != null)
            {
                if (model != null || ((brand != null && !brand.IsDoubt)))
                {
                }
                else
                {
                    return(null);
                }
            }
            else
            {
                if (model == null)
                {
                    return(null);
                }
                int  cou = 0;
                bool ok  = false;
                for (Pullenti.Ner.Token tt = t1.Previous; tt != null && (cou < 20); tt = tt.Previous, cou++)
                {
                    if ((tt.IsValue("ОРУЖИЕ", null) || tt.IsValue("ВООРУЖЕНИЕ", null) || tt.IsValue("ВЫСТРЕЛ", null)) || tt.IsValue("ВЫСТРЕЛИТЬ", null))
                    {
                        ok = true;
                        break;
                    }
                }
                if (!ok)
                {
                    return(null);
                }
            }
            List <Pullenti.Ner.ReferentToken> res = new List <Pullenti.Ner.ReferentToken>();

            res.Add(new Pullenti.Ner.ReferentToken(tr, its[0].BeginToken, t1));
            return(res);
        }
예제 #19
0
        public static NounPhraseItem TryParse(Pullenti.Ner.Token t, List <NounPhraseItem> items, Pullenti.Ner.Core.NounPhraseParseAttr attrs)
        {
            if (t == null)
            {
                return(null);
            }
            Pullenti.Ner.Token t0 = t;
            bool _canBeSurname    = false;
            bool _isDoubtAdj      = false;

            Pullenti.Ner.ReferentToken rt = t as Pullenti.Ner.ReferentToken;
            if (rt != null && rt.BeginToken == rt.EndToken && (rt.BeginToken is Pullenti.Ner.TextToken))
            {
                NounPhraseItem res = TryParse(rt.BeginToken, items, attrs);
                if (res != null)
                {
                    res.BeginToken = (res.EndToken = t);
                    res.CanBeNoun  = true;
                    return(res);
                }
            }
            if (rt != null)
            {
                NounPhraseItem res = new NounPhraseItem(t, t);
                foreach (Pullenti.Morph.MorphBaseInfo m in t.Morph.Items)
                {
                    NounPhraseItemTextVar v = new NounPhraseItemTextVar(m, null);
                    v.NormalValue = t.GetReferent().ToString();
                    res.NounMorph.Add(v);
                }
                res.CanBeNoun = true;
                return(res);
            }
            if (t is Pullenti.Ner.NumberToken)
            {
            }
            bool hasLegalVerb = false;

            if (t is Pullenti.Ner.TextToken)
            {
                if (!t.Chars.IsLetter)
                {
                    return(null);
                }
                string str = (t as Pullenti.Ner.TextToken).Term;
                if (str[str.Length - 1] == 'А' || str[str.Length - 1] == 'О')
                {
                    foreach (Pullenti.Morph.MorphBaseInfo wf in t.Morph.Items)
                    {
                        if ((wf is Pullenti.Morph.MorphWordForm) && (wf as Pullenti.Morph.MorphWordForm).IsInDictionary)
                        {
                            if (wf.Class.IsVerb)
                            {
                                Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
                                if (!mc.IsNoun && ((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.IgnoreParticiples)) == Pullenti.Ner.Core.NounPhraseParseAttr.No)
                                {
                                    if (!Pullenti.Morph.LanguageHelper.EndsWithEx(str, "ОГО", "ЕГО", null, null))
                                    {
                                        return(null);
                                    }
                                }
                                hasLegalVerb = true;
                            }
                            if (wf.Class.IsAdverb)
                            {
                                if (t.Next == null || !t.Next.IsHiphen)
                                {
                                    if ((str == "ВСЕГО" || str == "ДОМА" || str == "НЕСКОЛЬКО") || str == "МНОГО" || str == "ПОРЯДКА")
                                    {
                                    }
                                    else
                                    {
                                        return(null);
                                    }
                                }
                            }
                            if (wf.Class.IsAdjective)
                            {
                                if (wf.ContainsAttr("к.ф.", null))
                                {
                                    if (t.GetMorphClassInDictionary() == Pullenti.Morph.MorphClass.Adjective)
                                    {
                                    }
                                    else
                                    {
                                        _isDoubtAdj = true;
                                    }
                                }
                            }
                        }
                    }
                }
                Pullenti.Morph.MorphClass mc0 = t.Morph.Class;
                if (mc0.IsProperSurname && !t.Chars.IsAllLower)
                {
                    foreach (Pullenti.Morph.MorphBaseInfo wf in t.Morph.Items)
                    {
                        if (wf.Class.IsProperSurname && wf.Number != Pullenti.Morph.MorphNumber.Plural)
                        {
                            Pullenti.Morph.MorphWordForm wff = wf as Pullenti.Morph.MorphWordForm;
                            if (wff == null)
                            {
                                continue;
                            }
                            string s = ((wff.NormalFull ?? wff.NormalCase)) ?? "";
                            if (Pullenti.Morph.LanguageHelper.EndsWithEx(s, "ИН", "ЕН", "ЫН", null))
                            {
                                if (!wff.IsInDictionary)
                                {
                                    _canBeSurname = true;
                                }
                                else
                                {
                                    return(null);
                                }
                            }
                            if (wff.IsInDictionary && Pullenti.Morph.LanguageHelper.EndsWith(s, "ОВ"))
                            {
                                _canBeSurname = true;
                            }
                        }
                    }
                }
                if (mc0.IsProperName && !t.Chars.IsAllLower)
                {
                    foreach (Pullenti.Morph.MorphBaseInfo wff in t.Morph.Items)
                    {
                        Pullenti.Morph.MorphWordForm wf = wff as Pullenti.Morph.MorphWordForm;
                        if (wf == null)
                        {
                            continue;
                        }
                        if (wf.NormalCase == "ГОР")
                        {
                            continue;
                        }
                        if (wf.Class.IsProperName && wf.IsInDictionary)
                        {
                            if (wf.NormalCase == null || !wf.NormalCase.StartsWith("ЛЮБ"))
                            {
                                if (mc0.IsAdjective && t.Morph.ContainsAttr("неизм.", null))
                                {
                                }
                                else if (((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.ReferentCanBeNoun)) == Pullenti.Ner.Core.NounPhraseParseAttr.ReferentCanBeNoun)
                                {
                                }
                                else
                                {
                                    if (items == null || (items.Count < 1))
                                    {
                                        return(null);
                                    }
                                    if (!items[0].IsStdAdjective)
                                    {
                                        return(null);
                                    }
                                }
                            }
                        }
                    }
                }
                if (mc0.IsAdjective && t.Morph.ItemsCount == 1)
                {
                    if (t.Morph[0].ContainsAttr("в.ср.ст.", null))
                    {
                        return(null);
                    }
                }
                Pullenti.Morph.MorphClass mc1 = t.GetMorphClassInDictionary();
                if (mc1 == Pullenti.Morph.MorphClass.Verb && t.Morph.Case.IsUndefined)
                {
                    return(null);
                }
                if ((((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.IgnoreParticiples)) == Pullenti.Ner.Core.NounPhraseParseAttr.IgnoreParticiples && t.Morph.Class.IsVerb && !t.Morph.Class.IsNoun) && !t.Morph.Class.IsProper)
                {
                    foreach (Pullenti.Morph.MorphBaseInfo wf in t.Morph.Items)
                    {
                        if (wf.Class.IsVerb)
                        {
                            if (wf.ContainsAttr("дейст.з.", null))
                            {
                                if (Pullenti.Morph.LanguageHelper.EndsWith((t as Pullenti.Ner.TextToken).Term, "СЯ"))
                                {
                                }
                                else
                                {
                                    return(null);
                                }
                            }
                        }
                    }
                }
            }
            Pullenti.Ner.Token t1 = null;
            for (int k = 0; k < 2; k++)
            {
                t = t1 ?? t0;
                if (k == 0)
                {
                    if (((t0 is Pullenti.Ner.TextToken) && t0.Next != null && t0.Next.IsHiphen) && t0.Next.Next != null)
                    {
                        if (!t0.IsWhitespaceAfter && !t0.Morph.Class.IsPronoun && !(t0.Next.Next is Pullenti.Ner.NumberToken))
                        {
                            if (!t0.Next.IsWhitespaceAfter)
                            {
                                t = t0.Next.Next;
                            }
                            else if (t0.Next.Next.Chars.IsAllLower && Pullenti.Morph.LanguageHelper.EndsWith((t0 as Pullenti.Ner.TextToken).Term, "О"))
                            {
                                t = t0.Next.Next;
                            }
                        }
                    }
                }
                NounPhraseItem it = new NounPhraseItem(t0, t)
                {
                    CanBeSurname = _canBeSurname
                };
                if (t0 == t && (t0 is Pullenti.Ner.ReferentToken))
                {
                    it.CanBeNoun = true;
                    it.Morph     = new Pullenti.Ner.MorphCollection(t0.Morph);
                }
                bool canBePrepos = false;
                foreach (Pullenti.Morph.MorphBaseInfo v in t.Morph.Items)
                {
                    Pullenti.Morph.MorphWordForm wf = v as Pullenti.Morph.MorphWordForm;
                    if (v.Class.IsVerb && !v.Case.IsUndefined)
                    {
                        it.CanBeAdj = true;
                        it.AdjMorph.Add(new NounPhraseItemTextVar(v, t));
                        continue;
                    }
                    if (v.Class.IsPreposition)
                    {
                        canBePrepos = true;
                    }
                    if (v.Class.IsAdjective || ((v.Class.IsPronoun && !v.Class.IsPersonalPronoun && !v.ContainsAttr("неизм.", null))) || ((v.Class.IsNoun && (t is Pullenti.Ner.NumberToken))))
                    {
                        if (TryAccordVariant(items, (items == null ? 0 : items.Count), v, false))
                        {
                            bool isDoub = false;
                            if (v.ContainsAttr("к.ф.", null))
                            {
                                continue;
                            }
                            if (v.ContainsAttr("собир.", null) && !(t is Pullenti.Ner.NumberToken))
                            {
                                if (wf != null && wf.IsInDictionary)
                                {
                                    return(null);
                                }
                                continue;
                            }
                            if (v.ContainsAttr("сравн.", null))
                            {
                                continue;
                            }
                            bool ok = true;
                            if (t is Pullenti.Ner.TextToken)
                            {
                                string s = (t as Pullenti.Ner.TextToken).Term;
                                if (s == "ПРАВО" || s == "ПРАВА")
                                {
                                    ok = false;
                                }
                                else if (Pullenti.Morph.LanguageHelper.EndsWith(s, "ОВ") && t.GetMorphClassInDictionary().IsNoun)
                                {
                                    ok = false;
                                }
                            }
                            else if (t is Pullenti.Ner.NumberToken)
                            {
                                if (v.Class.IsNoun && t.Morph.Class.IsAdjective)
                                {
                                    ok = false;
                                }
                                else if (t.Morph.Class.IsNoun && ((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.ParseNumericAsAdjective)) == Pullenti.Ner.Core.NounPhraseParseAttr.No)
                                {
                                    ok = false;
                                }
                            }
                            if (ok)
                            {
                                it.AdjMorph.Add(new NounPhraseItemTextVar(v, t));
                                it.CanBeAdj = true;
                                if (_isDoubtAdj && t0 == t)
                                {
                                    it.IsDoubtAdjective = true;
                                }
                                if (hasLegalVerb && wf != null && wf.IsInDictionary)
                                {
                                    it.CanBeNoun = true;
                                }
                                if (wf != null && wf.Class.IsPronoun)
                                {
                                    it.CanBeNoun = true;
                                    it.NounMorph.Add(new NounPhraseItemTextVar(v, t));
                                }
                            }
                        }
                    }
                    bool canBeNoun = false;
                    if (t is Pullenti.Ner.NumberToken)
                    {
                    }
                    else if (v.Class.IsNoun || ((wf != null && wf.NormalCase == "САМ")))
                    {
                        canBeNoun = true;
                    }
                    else if (v.Class.IsPersonalPronoun)
                    {
                        if (items == null || items.Count == 0)
                        {
                            canBeNoun = true;
                        }
                        else
                        {
                            foreach (NounPhraseItem it1 in items)
                            {
                                if (it1.IsVerb)
                                {
                                    if (items.Count == 1 && !v.Case.IsNominative)
                                    {
                                        canBeNoun = true;
                                    }
                                    else
                                    {
                                        return(null);
                                    }
                                }
                            }
                            if (items.Count == 1)
                            {
                                if (items[0].CanBeAdjForPersonalPronoun)
                                {
                                    canBeNoun = true;
                                }
                            }
                        }
                    }
                    else if ((v.Class.IsPronoun && ((items == null || items.Count == 0 || ((items.Count == 1 && items[0].CanBeAdjForPersonalPronoun)))) && wf != null) && (((((wf.NormalCase == "ТОТ" || wf.NormalFull == "ТО" || wf.NormalCase == "ТО") || wf.NormalCase == "ЭТО" || wf.NormalCase == "ВСЕ") || wf.NormalCase == "ЧТО" || wf.NormalCase == "КТО") || wf.NormalFull == "КОТОРЫЙ" || wf.NormalCase == "КОТОРЫЙ")))
                    {
                        if (wf.NormalCase == "ВСЕ")
                        {
                            if (t.Next != null && t.Next.IsValue("РАВНО", null))
                            {
                                return(null);
                            }
                        }
                        canBeNoun = true;
                    }
                    else if (wf != null && ((wf.NormalFull ?? wf.NormalCase)) == "КОТОРЫЙ" && ((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.ParsePronouns)) == Pullenti.Ner.Core.NounPhraseParseAttr.No)
                    {
                        return(null);
                    }
                    else if (v.Class.IsProper && (t is Pullenti.Ner.TextToken))
                    {
                        if (t.LengthChar > 4 || v.Class.IsProperName)
                        {
                            canBeNoun = true;
                        }
                    }
                    if (canBeNoun)
                    {
                        bool added = false;
                        if (items != null && items.Count > 1 && ((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.MultiNouns)) != Pullenti.Ner.Core.NounPhraseParseAttr.No)
                        {
                            bool ok1 = true;
                            for (int ii = 1; ii < items.Count; ii++)
                            {
                                if (!items[ii].ConjBefore)
                                {
                                    ok1 = false;
                                    break;
                                }
                            }
                            if (ok1)
                            {
                                if (TryAccordVariant(items, (items == null ? 0 : items.Count), v, true))
                                {
                                    it.NounMorph.Add(new NounPhraseItemTextVar(v, t));
                                    it.CanBeNoun  = true;
                                    it.MultiNouns = true;
                                    added         = true;
                                }
                            }
                        }
                        if (!added)
                        {
                            if (TryAccordVariant(items, (items == null ? 0 : items.Count), v, false))
                            {
                                it.NounMorph.Add(new NounPhraseItemTextVar(v, t));
                                it.CanBeNoun = true;
                                if (v.Class.IsPersonalPronoun && t.Morph.ContainsAttr("неизм.", null) && !it.CanBeAdj)
                                {
                                    NounPhraseItemTextVar itt = new NounPhraseItemTextVar(v, t);
                                    itt.Case   = Pullenti.Morph.MorphCase.AllCases;
                                    itt.Number = Pullenti.Morph.MorphNumber.Undefined;
                                    if (itt.NormalValue == null)
                                    {
                                    }
                                    it.AdjMorph.Add(itt);
                                    it.CanBeAdj = true;
                                }
                            }
                            else if ((items.Count > 0 && items[0].AdjMorph.Count > 0 && items[0].AdjMorph[0].Number == Pullenti.Morph.MorphNumber.Plural) && !((items[0].AdjMorph[0].Case & v.Case)).IsUndefined && !items[0].AdjMorph[0].Class.IsVerb)
                            {
                                if (t.Next != null && t.Next.IsCommaAnd && (t.Next.Next is Pullenti.Ner.TextToken))
                                {
                                    Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next.Next, attrs, 0, null);
                                    if (npt2 != null && npt2.Preposition == null && !((npt2.Morph.Case & v.Case & items[0].AdjMorph[0].Case)).IsUndefined)
                                    {
                                        it.NounMorph.Add(new NounPhraseItemTextVar(v, t));
                                        it.CanBeNoun = true;
                                    }
                                }
                            }
                        }
                    }
                }
                if (t0 != t)
                {
                    foreach (NounPhraseItemTextVar v in it.AdjMorph)
                    {
                        v.CorrectPrefix(t0 as Pullenti.Ner.TextToken, false);
                    }
                    foreach (NounPhraseItemTextVar v in it.NounMorph)
                    {
                        v.CorrectPrefix(t0 as Pullenti.Ner.TextToken, true);
                    }
                }
                if (k == 1 && it.CanBeNoun && !it.CanBeAdj)
                {
                    if (t1 != null)
                    {
                        it.EndToken = t1;
                    }
                    else
                    {
                        it.EndToken = t0.Next.Next;
                    }
                    foreach (NounPhraseItemTextVar v in it.NounMorph)
                    {
                        if (v.NormalValue != null && (v.NormalValue.IndexOf('-') < 0))
                        {
                            v.NormalValue = string.Format("{0}-{1}", v.NormalValue, it.EndToken.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false));
                        }
                    }
                }
                if (it.CanBeAdj)
                {
                    if (m_StdAdjectives.TryParse(it.BeginToken, Pullenti.Ner.Core.TerminParseAttr.No) != null)
                    {
                        it.IsStdAdjective = true;
                    }
                }
                if (canBePrepos && it.CanBeNoun)
                {
                    if (items != null && items.Count > 0)
                    {
                        Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition | Pullenti.Ner.Core.NounPhraseParseAttr.ParsePronouns | Pullenti.Ner.Core.NounPhraseParseAttr.ParseVerbs, 0, null);
                        if (npt1 != null && npt1.EndChar > t.EndChar)
                        {
                            return(null);
                        }
                    }
                    else
                    {
                        Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePronouns | Pullenti.Ner.Core.NounPhraseParseAttr.ParseVerbs, 0, null);
                        if (npt1 != null)
                        {
                            Pullenti.Morph.MorphCase mc = Pullenti.Morph.LanguageHelper.GetCaseAfterPreposition((t as Pullenti.Ner.TextToken).Lemma);
                            if (!((mc & npt1.Morph.Case)).IsUndefined)
                            {
                                return(null);
                            }
                        }
                    }
                }
                if (it.CanBeNoun || it.CanBeAdj || k == 1)
                {
                    if (it.BeginToken.Morph.Class.IsPronoun)
                    {
                        Pullenti.Ner.Token tt2 = it.EndToken.Next;
                        if ((tt2 != null && tt2.IsHiphen && !tt2.IsWhitespaceAfter) && !tt2.IsWhitespaceBefore)
                        {
                            tt2 = tt2.Next;
                        }
                        if (tt2 is Pullenti.Ner.TextToken)
                        {
                            string ss = (tt2 as Pullenti.Ner.TextToken).Term;
                            if ((ss == "ЖЕ" || ss == "БЫ" || ss == "ЛИ") || ss == "Ж")
                            {
                                it.EndToken = tt2;
                            }
                            else if (ss == "НИБУДЬ" || ss == "ЛИБО" || (((ss == "ТО" && tt2.Previous.IsHiphen)) && it.CanBeAdj))
                            {
                                it.EndToken = tt2;
                                foreach (NounPhraseItemTextVar m in it.AdjMorph)
                                {
                                    m.NormalValue = string.Format("{0}-{1}", m.NormalValue, ss);
                                    if (m.SingleNumberValue != null)
                                    {
                                        m.SingleNumberValue = string.Format("{0}-{1}", m.SingleNumberValue, ss);
                                    }
                                }
                            }
                        }
                    }
                    return(it);
                }
                if (t0 == t)
                {
                    if (t0.IsValue("БИЗНЕС", null) && t0.Next != null && t0.Next.Chars == t0.Chars)
                    {
                        t1 = t0.Next;
                        continue;
                    }
                    return(it);
                }
            }
            return(null);
        }
예제 #20
0
 public static BookLinkToken TryParseAuthor(Pullenti.Ner.Token t, Pullenti.Ner.Person.Internal.FioTemplateType prevPersTemplate = Pullenti.Ner.Person.Internal.FioTemplateType.Undefined)
 {
     if (t == null)
     {
         return(null);
     }
     Pullenti.Ner.ReferentToken rtp = Pullenti.Ner.Person.Internal.PersonItemToken.TryParsePerson(t, prevPersTemplate);
     if (rtp != null)
     {
         BookLinkToken re;
         if (rtp.Data == null)
         {
             re = new BookLinkToken(t, (rtp == t ? t : rtp.EndToken))
             {
                 Typ = BookLinkTyp.Person, Ref = rtp.Referent
             }
         }
         ;
         else
         {
             re = new BookLinkToken(t, rtp.EndToken)
             {
                 Typ = BookLinkTyp.Person, Tok = rtp
             }
         };
         re.PersonTemplate = (Pullenti.Ner.Person.Internal.FioTemplateType)rtp.MiscAttrs;
         for (Pullenti.Ner.Token tt = rtp.BeginToken; tt != null && tt.EndChar <= rtp.EndChar; tt = tt.Next)
         {
             if (!(tt.GetReferent() is Pullenti.Ner.Person.PersonPropertyReferent))
             {
                 continue;
             }
             Pullenti.Ner.ReferentToken rt = tt as Pullenti.Ner.ReferentToken;
             if (rt.BeginToken.Chars.IsCapitalUpper && tt != rtp.BeginToken)
             {
                 re.StartOfName = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(rt, Pullenti.Ner.Core.GetTextAttr.KeepRegister);
                 break;
             }
             return(null);
         }
         return(re);
     }
     if (t.IsChar('['))
     {
         BookLinkToken re = TryParseAuthor(t.Next, Pullenti.Ner.Person.Internal.FioTemplateType.Undefined);
         if (re != null && re.EndToken.Next != null && re.EndToken.Next.IsChar(']'))
         {
             re.BeginToken = t;
             re.EndToken   = re.EndToken.Next;
             return(re);
         }
     }
     if (((t.IsValue("И", null) || t.IsValue("ET", null))) && t.Next != null)
     {
         if (t.Next.IsValue("ДРУГИЕ", null) || t.Next.IsValue("ДР", null) || t.Next.IsValue("AL", null))
         {
             BookLinkToken res = new BookLinkToken(t, t.Next)
             {
                 Typ = BookLinkTyp.AndOthers
             };
             if (t.Next.Next != null && t.Next.Next.IsChar('.'))
             {
                 res.EndToken = res.EndToken.Next;
             }
             return(res);
         }
     }
     return(null);
 }
예제 #21
0
        // Основная функция выделения телефонов
        public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
        {
            Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this);
            bool hasDenoms = false;

            foreach (Pullenti.Ner.Analyzer a in kit.Processor.Analyzers)
            {
                if ((a is Pullenti.Ner.Denomination.DenominationAnalyzer) && !a.IgnoreThisAnalyzer)
                {
                    hasDenoms = true;
                }
            }
            if (!hasDenoms)
            {
                Pullenti.Ner.Denomination.DenominationAnalyzer a = new Pullenti.Ner.Denomination.DenominationAnalyzer();
                a.Process(kit);
            }
            List <KeywordReferent> li   = new List <KeywordReferent>();
            StringBuilder          tmp  = new StringBuilder();
            List <string>          tmp2 = new List <string>();
            int max = 0;

            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
            {
                max++;
            }
            int cur = 0;

            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next, cur++)
            {
                Pullenti.Ner.Referent r = t.GetReferent();
                if (r != null)
                {
                    t = this._addReferents(ad, t, cur, max);
                    continue;
                }
                if (!(t is Pullenti.Ner.TextToken))
                {
                    continue;
                }
                if (!t.Chars.IsLetter || (t.LengthChar < 3))
                {
                    continue;
                }
                string term = (t as Pullenti.Ner.TextToken).Term;
                if (term == "ЕСТЬ")
                {
                    if ((t.Previous is Pullenti.Ner.TextToken) && t.Previous.Morph.Class.IsVerb)
                    {
                    }
                    else
                    {
                        continue;
                    }
                }
                Pullenti.Ner.Core.NounPhraseToken npt = null;
                npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.AdjectiveCanBeLast | Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition, 0, null);
                if (npt == null)
                {
                    Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
                    if (mc.IsVerb && !mc.IsPreposition)
                    {
                        if ((t as Pullenti.Ner.TextToken).IsVerbBe)
                        {
                            continue;
                        }
                        if (t.IsValue("МОЧЬ", null) || t.IsValue("WOULD", null))
                        {
                            continue;
                        }
                        KeywordReferent kref = new KeywordReferent()
                        {
                            Typ = KeywordType.Predicate
                        };
                        string norm = t.GetNormalCaseText(Pullenti.Morph.MorphClass.Verb, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false);
                        if (norm == null)
                        {
                            norm = (t as Pullenti.Ner.TextToken).Lemma;
                        }
                        if (norm.EndsWith("ЬСЯ"))
                        {
                            norm = norm.Substring(0, norm.Length - 2);
                        }
                        kref.AddSlot(KeywordReferent.ATTR_VALUE, norm, false, 0);
                        List <Pullenti.Semantic.Utils.DerivateGroup> drv = Pullenti.Semantic.Utils.DerivateService.FindDerivates(norm, true, t.Morph.Language);
                        _addNormals(kref, drv, norm);
                        kref = ad.RegisterReferent(kref) as KeywordReferent;
                        _setRank(kref, cur, max);
                        Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(ad.RegisterReferent(kref), t, t)
                        {
                            Morph = t.Morph
                        };
                        kit.EmbedToken(rt1);
                        t = rt1;
                        continue;
                    }
                    continue;
                }
                if (npt.InternalNoun != null)
                {
                    continue;
                }
                if (npt.EndToken.IsValue("ЦЕЛОМ", null) || npt.EndToken.IsValue("ЧАСТНОСТИ", null))
                {
                    if (npt.Preposition != null)
                    {
                        t = npt.EndToken;
                        continue;
                    }
                }
                if (npt.EndToken.IsValue("СТОРОНЫ", null) && npt.Preposition != null && npt.Preposition.Normal == "С")
                {
                    t = npt.EndToken;
                    continue;
                }
                if (npt.BeginToken == npt.EndToken)
                {
                    Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
                    if (mc.IsPreposition)
                    {
                        continue;
                    }
                    else if (mc.IsAdverb)
                    {
                        if (t.IsValue("ПОТОМ", null))
                        {
                            continue;
                        }
                    }
                }
                else
                {
                }
                li.Clear();
                Pullenti.Ner.Token t0 = t;
                for (Pullenti.Ner.Token tt = t; tt != null && tt.EndChar <= npt.EndChar; tt = tt.Next)
                {
                    if (!(tt is Pullenti.Ner.TextToken))
                    {
                        continue;
                    }
                    if (tt.IsValue("NATURAL", null))
                    {
                    }
                    if ((tt.LengthChar < 3) || !tt.Chars.IsLetter)
                    {
                        continue;
                    }
                    Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary();
                    if ((mc.IsPreposition || mc.IsPronoun || mc.IsPersonalPronoun) || mc.IsConjunction)
                    {
                        if (tt.IsValue("ОТНОШЕНИЕ", null))
                        {
                        }
                        else
                        {
                            continue;
                        }
                    }
                    if (mc.IsMisc)
                    {
                        if (Pullenti.Ner.Core.MiscHelper.IsEngArticle(tt))
                        {
                            continue;
                        }
                    }
                    KeywordReferent kref = new KeywordReferent()
                    {
                        Typ = KeywordType.Object
                    };
                    string norm = (tt as Pullenti.Ner.TextToken).Lemma;
                    kref.AddSlot(KeywordReferent.ATTR_VALUE, norm, false, 0);
                    if (norm != "ЕСТЬ")
                    {
                        List <Pullenti.Semantic.Utils.DerivateGroup> drv = Pullenti.Semantic.Utils.DerivateService.FindDerivates(norm, true, tt.Morph.Language);
                        _addNormals(kref, drv, norm);
                    }
                    kref = ad.RegisterReferent(kref) as KeywordReferent;
                    _setRank(kref, cur, max);
                    Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(kref, tt, tt)
                    {
                        Morph = tt.Morph
                    };
                    kit.EmbedToken(rt1);
                    if (tt == t && li.Count == 0)
                    {
                        t0 = rt1;
                    }
                    t = rt1;
                    li.Add(kref);
                }
                if (li.Count > 1)
                {
                    KeywordReferent kref = new KeywordReferent()
                    {
                        Typ = KeywordType.Object
                    };
                    tmp.Length = 0;
                    tmp2.Clear();
                    bool hasNorm = false;
                    foreach (KeywordReferent kw in li)
                    {
                        string s = kw.GetStringValue(KeywordReferent.ATTR_VALUE);
                        if (tmp.Length > 0)
                        {
                            tmp.Append(' ');
                        }
                        tmp.Append(s);
                        string n = kw.GetStringValue(KeywordReferent.ATTR_NORMAL);
                        if (n != null)
                        {
                            hasNorm = true;
                            tmp2.Add(n);
                        }
                        else
                        {
                            tmp2.Add(s);
                        }
                        kref.AddSlot(KeywordReferent.ATTR_REF, kw, false, 0);
                    }
                    string val = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false);
                    kref.AddSlot(KeywordReferent.ATTR_VALUE, val, false, 0);
                    tmp.Length = 0;
                    tmp2.Sort();
                    foreach (string s in tmp2)
                    {
                        if (tmp.Length > 0)
                        {
                            tmp.Append(' ');
                        }
                        tmp.Append(s);
                    }
                    string norm = tmp.ToString();
                    if (norm != val)
                    {
                        kref.AddSlot(KeywordReferent.ATTR_NORMAL, norm, false, 0);
                    }
                    kref = ad.RegisterReferent(kref) as KeywordReferent;
                    _setRank(kref, cur, max);
                    Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(kref, t0, t)
                    {
                        Morph = npt.Morph
                    };
                    kit.EmbedToken(rt1);
                    t = rt1;
                }
            }
            cur = 0;
            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next, cur++)
            {
                KeywordReferent kw = t.GetReferent() as KeywordReferent;
                if (kw == null || kw.Typ != KeywordType.Object)
                {
                    continue;
                }
                if (t.Next == null || kw.ChildWords > 2)
                {
                    continue;
                }
                Pullenti.Ner.Token t1 = t.Next;
                if (t1.IsValue("OF", null) && (t1.WhitespacesAfterCount < 3) && t1.Next != null)
                {
                    t1 = t1.Next;
                    if ((t1 is Pullenti.Ner.TextToken) && Pullenti.Ner.Core.MiscHelper.IsEngArticle(t1) && t1.Next != null)
                    {
                        t1 = t1.Next;
                    }
                }
                else if (!t1.Morph.Case.IsGenitive || t.WhitespacesAfterCount > 1)
                {
                    continue;
                }
                KeywordReferent kw2 = t1.GetReferent() as KeywordReferent;
                if (kw2 == null)
                {
                    continue;
                }
                if (kw == kw2)
                {
                    continue;
                }
                if (kw2.Typ != KeywordType.Object || (kw.ChildWords + kw2.ChildWords) > 3)
                {
                    continue;
                }
                KeywordReferent kwUn = new KeywordReferent();
                kwUn.Union(kw, kw2, Pullenti.Ner.Core.MiscHelper.GetTextValue(t1, t1, Pullenti.Ner.Core.GetTextAttr.No));
                kwUn = ad.RegisterReferent(kwUn) as KeywordReferent;
                _setRank(kwUn, cur, max);
                Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(kwUn, t, t1)
                {
                    Morph = t.Morph
                };
                kit.EmbedToken(rt1);
                t = rt1;
            }
            if (SortKeywordsByRank)
            {
                List <Pullenti.Ner.Referent> all = new List <Pullenti.Ner.Referent>(ad.Referents);
                all.Sort(new CompByRank());
                ad.Referents = all;
            }
            if (AnnotationMaxSentences > 0)
            {
                KeywordReferent ano = Pullenti.Ner.Keyword.Internal.AutoannoSentToken.CreateAnnotation(kit, AnnotationMaxSentences);
                if (ano != null)
                {
                    ad.RegisterReferent(ano);
                }
            }
        }
예제 #22
0
 public static NamedItemToken TryParse(Pullenti.Ner.Token t, Pullenti.Ner.Core.IntOntologyCollection locOnto)
 {
     if (t == null)
     {
         return(null);
     }
     if (t is Pullenti.Ner.ReferentToken)
     {
         Pullenti.Ner.Referent r = t.GetReferent();
         if ((r.TypeName == "PERSON" || r.TypeName == "PERSONPROPERTY" || (r is Pullenti.Ner.Geo.GeoReferent)) || r.TypeName == "ORGANIZATION")
         {
             return new NamedItemToken(t, t)
                    {
                        Ref = r, Morph = t.Morph
                    }
         }
         ;
         return(null);
     }
     Pullenti.Ner.Core.TerminToken typ = m_Types.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
     Pullenti.Ner.Core.TerminToken nam = m_Names.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
     if (typ != null)
     {
         if (!(t is Pullenti.Ner.TextToken))
         {
             return(null);
         }
         NamedItemToken res = new NamedItemToken(typ.BeginToken, typ.EndToken)
         {
             Morph = typ.Morph, Chars = typ.Chars
         };
         res.Kind      = (Pullenti.Ner.Named.NamedEntityKind)typ.Termin.Tag;
         res.TypeValue = typ.Termin.CanonicText;
         if ((nam != null && nam.EndToken == typ.EndToken && !t.Chars.IsAllLower) && ((Pullenti.Ner.Named.NamedEntityKind)nam.Termin.Tag) == res.Kind)
         {
             res.NameValue   = nam.Termin.CanonicText;
             res.IsWellknown = true;
         }
         return(res);
     }
     if (nam != null)
     {
         if (nam.BeginToken.Chars.IsAllLower)
         {
             return(null);
         }
         NamedItemToken res = new NamedItemToken(nam.BeginToken, nam.EndToken)
         {
             Morph = nam.Morph, Chars = nam.Chars
         };
         res.Kind      = (Pullenti.Ner.Named.NamedEntityKind)nam.Termin.Tag;
         res.NameValue = nam.Termin.CanonicText;
         bool ok = true;
         if (!t.IsWhitespaceBefore && t.Previous != null)
         {
             ok = false;
         }
         else if (!t.IsWhitespaceAfter && t.Next != null)
         {
             if (t.Next.IsCharOf(",.;!?") && t.Next.IsWhitespaceAfter)
             {
             }
             else
             {
                 ok = false;
             }
         }
         if (ok)
         {
             res.IsWellknown = true;
             res.TypeValue   = nam.Termin.Tag2 as string;
         }
         return(res);
     }
     Pullenti.Ner.MetaToken adj = Pullenti.Ner.Geo.Internal.MiscLocationHelper.TryAttachNordWest(t);
     if (adj != null)
     {
         if (adj.Morph.Class.IsNoun)
         {
             if (adj.EndToken.IsValue("ВОСТОК", null))
             {
                 if (adj.BeginToken == adj.EndToken)
                 {
                     return(null);
                 }
                 NamedItemToken re = new NamedItemToken(t, adj.EndToken)
                 {
                     Morph = adj.Morph
                 };
                 re.Kind        = Pullenti.Ner.Named.NamedEntityKind.Location;
                 re.NameValue   = Pullenti.Ner.Core.MiscHelper.GetTextValue(t, adj.EndToken, Pullenti.Ner.Core.GetTextAttr.FirstNounGroupToNominative);
                 re.IsWellknown = true;
                 return(re);
             }
             return(null);
         }
         if (adj.WhitespacesAfterCount > 2)
         {
             return(null);
         }
         if ((adj.EndToken.Next is Pullenti.Ner.ReferentToken) && (adj.EndToken.Next.GetReferent() is Pullenti.Ner.Geo.GeoReferent))
         {
             NamedItemToken re = new NamedItemToken(t, adj.EndToken.Next)
             {
                 Morph = adj.EndToken.Next.Morph
             };
             re.Kind        = Pullenti.Ner.Named.NamedEntityKind.Location;
             re.NameValue   = Pullenti.Ner.Core.MiscHelper.GetTextValue(t, adj.EndToken.Next, Pullenti.Ner.Core.GetTextAttr.FirstNounGroupToNominative);
             re.IsWellknown = true;
             re.Ref         = adj.EndToken.Next.GetReferent();
             return(re);
         }
         NamedItemToken res = TryParse(adj.EndToken.Next, locOnto);
         if (res != null && res.Kind == Pullenti.Ner.Named.NamedEntityKind.Location)
         {
             string s = adj.GetNormalCaseText(Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphNumber.Singular, res.Morph.Gender, false);
             if (s != null)
             {
                 if (res.NameValue == null)
                 {
                     res.NameValue = s.ToUpper();
                 }
                 else
                 {
                     res.NameValue = string.Format("{0} {1}", s.ToUpper(), res.NameValue);
                     res.TypeValue = null;
                 }
                 res.BeginToken  = t;
                 res.Chars       = t.Chars;
                 res.IsWellknown = true;
                 return(res);
             }
         }
     }
     if (t.Chars.IsCapitalUpper && !Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t))
     {
         Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
         if (npt != null && npt.Adjectives.Count > 0)
         {
             NamedItemToken test = TryParse(npt.Noun.BeginToken, locOnto);
             if (test != null && test.EndToken == npt.EndToken && test.TypeValue != null)
             {
                 test.BeginToken = t;
                 StringBuilder tmp = new StringBuilder();
                 foreach (Pullenti.Ner.MetaToken a in npt.Adjectives)
                 {
                     string s = a.GetNormalCaseText(Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphNumber.Singular, test.Morph.Gender, false);
                     if (tmp.Length > 0)
                     {
                         tmp.Append(' ');
                     }
                     tmp.Append(s);
                 }
                 test.NameValue = tmp.ToString();
                 test.Chars     = t.Chars;
                 if (test.Kind == Pullenti.Ner.Named.NamedEntityKind.Location)
                 {
                     test.IsWellknown = true;
                 }
                 return(test);
             }
         }
     }
     if ((Pullenti.Ner.Core.BracketHelper.IsBracket(t, true) && t.Next != null && t.Next.Chars.IsLetter) && !t.Next.Chars.IsAllLower)
     {
         Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 100);
         if (br != null)
         {
             NamedItemToken res = new NamedItemToken(t, br.EndToken);
             res.IsInBracket = true;
             res.NameValue   = Pullenti.Ner.Core.MiscHelper.GetTextValue(t, br.EndToken, Pullenti.Ner.Core.GetTextAttr.No);
             nam             = m_Names.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No);
             if (nam != null && nam.EndToken == br.EndToken.Previous)
             {
                 res.Kind        = (Pullenti.Ner.Named.NamedEntityKind)nam.Termin.Tag;
                 res.IsWellknown = true;
                 res.NameValue   = nam.Termin.CanonicText;
             }
             return(res);
         }
     }
     if (((t is Pullenti.Ner.TextToken) && t.Chars.IsLetter && !t.Chars.IsAllLower) && t.LengthChar > 2)
     {
         NamedItemToken res = new NamedItemToken(t, t)
         {
             Morph = t.Morph
         };
         string str = (t as Pullenti.Ner.TextToken).Term;
         if (str.EndsWith("О") || str.EndsWith("И") || str.EndsWith("Ы"))
         {
             res.NameValue = str;
         }
         else
         {
             res.NameValue = t.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false);
         }
         res.Chars = t.Chars;
         if (((!t.IsWhitespaceAfter && t.Next != null && t.Next.IsHiphen) && (t.Next.Next is Pullenti.Ner.TextToken) && !t.Next.Next.IsWhitespaceAfter) && t.Chars.IsCyrillicLetter == t.Next.Next.Chars.IsCyrillicLetter)
         {
             t             = (res.EndToken = t.Next.Next);
             res.NameValue = string.Format("{0}-{1}", res.NameValue, t.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false));
         }
         return(res);
     }
     return(null);
 }
예제 #23
0
        Pullenti.Ner.Token _addReferents(Pullenti.Ner.Core.AnalyzerData ad, Pullenti.Ner.Token t, int cur, int max)
        {
            if (!(t is Pullenti.Ner.ReferentToken))
            {
                return(t);
            }
            Pullenti.Ner.Referent r = t.GetReferent();
            if (r == null)
            {
                return(t);
            }
            if (r is Pullenti.Ner.Denomination.DenominationReferent)
            {
                Pullenti.Ner.Denomination.DenominationReferent dr = r as Pullenti.Ner.Denomination.DenominationReferent;
                KeywordReferent kref0 = new KeywordReferent()
                {
                    Typ = KeywordType.Referent
                };
                foreach (Pullenti.Ner.Slot s in dr.Slots)
                {
                    if (s.TypeName == Pullenti.Ner.Denomination.DenominationReferent.ATTR_VALUE)
                    {
                        kref0.AddSlot(KeywordReferent.ATTR_NORMAL, s.Value, false, 0);
                    }
                }
                kref0.AddSlot(KeywordReferent.ATTR_REF, dr, false, 0);
                Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(ad.RegisterReferent(kref0), t, t);
                t.Kit.EmbedToken(rt0);
                return(rt0);
            }
            if ((r is Pullenti.Ner.Phone.PhoneReferent) || (r is Pullenti.Ner.Uri.UriReferent) || (r is Pullenti.Ner.Bank.BankDataReferent))
            {
                return(t);
            }
            if (r is Pullenti.Ner.Money.MoneyReferent)
            {
                Pullenti.Ner.Money.MoneyReferent mr = r as Pullenti.Ner.Money.MoneyReferent;
                KeywordReferent kref0 = new KeywordReferent()
                {
                    Typ = KeywordType.Object
                };
                kref0.AddSlot(KeywordReferent.ATTR_NORMAL, mr.Currency, false, 0);
                Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(ad.RegisterReferent(kref0), t, t);
                t.Kit.EmbedToken(rt0);
                return(rt0);
            }
            if (r.TypeName == "DATE" || r.TypeName == "DATERANGE" || r.TypeName == "BOOKLINKREF")
            {
                return(t);
            }
            for (Pullenti.Ner.Token tt = (t as Pullenti.Ner.MetaToken).BeginToken; tt != null && tt.EndChar <= t.EndChar; tt = tt.Next)
            {
                if (tt is Pullenti.Ner.ReferentToken)
                {
                    this._addReferents(ad, tt, cur, max);
                }
            }
            KeywordReferent kref = new KeywordReferent()
            {
                Typ = KeywordType.Referent
            };
            string norm = null;

            if (r.TypeName == "GEO")
            {
                norm = r.GetStringValue("ALPHA2");
            }
            if (norm == null)
            {
                norm = r.ToString(true, null, 0);
            }
            if (norm != null)
            {
                kref.AddSlot(KeywordReferent.ATTR_NORMAL, norm.ToUpper(), false, 0);
            }
            kref.AddSlot(KeywordReferent.ATTR_REF, t.GetReferent(), false, 0);
            _setRank(kref, cur, max);
            Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(ad.RegisterReferent(kref), t, t);
            t.Kit.EmbedToken(rt1);
            return(rt1);
        }
예제 #24
0
        static WeaponItemToken _TryParse(Pullenti.Ner.Token t, WeaponItemToken prev, bool afterConj, bool attachHigh = false)
        {
            if (t == null)
            {
                return(null);
            }
            if (Pullenti.Ner.Core.BracketHelper.IsBracket(t, true))
            {
                WeaponItemToken wit = _TryParse(t.Next, prev, afterConj, attachHigh);
                if (wit != null)
                {
                    if (wit.EndToken.Next == null)
                    {
                        wit.BeginToken = t;
                        return(wit);
                    }
                    if (Pullenti.Ner.Core.BracketHelper.IsBracket(wit.EndToken.Next, true))
                    {
                        wit.BeginToken = t;
                        wit.EndToken   = wit.EndToken.Next;
                        return(wit);
                    }
                }
            }
            Pullenti.Ner.Core.TerminToken tok = m_Ontology.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
            if (tok != null)
            {
                WeaponItemToken res = new WeaponItemToken(t, tok.EndToken);
                res.Typ = (Typs)tok.Termin.Tag;
                if (res.Typ == Typs.Noun)
                {
                    res.Value = tok.Termin.CanonicText;
                    if (tok.Termin.Tag2 != null)
                    {
                        res.IsDoubt = true;
                    }
                    for (Pullenti.Ner.Token tt = res.EndToken.Next; tt != null; tt = tt.Next)
                    {
                        if (tt.WhitespacesBeforeCount > 2)
                        {
                            break;
                        }
                        WeaponItemToken wit = _TryParse(tt, null, false, false);
                        if (wit != null)
                        {
                            if (wit.Typ == Typs.Brand)
                            {
                                res.InnerTokens.Add(wit);
                                res.EndToken = (tt = wit.EndToken);
                                continue;
                            }
                            break;
                        }
                        if (!(tt is Pullenti.Ner.TextToken))
                        {
                            break;
                        }
                        Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary();
                        if (mc == Pullenti.Morph.MorphClass.Adjective)
                        {
                            if (res.AltValue == null)
                            {
                                res.AltValue = res.Value;
                            }
                            if (res.AltValue.EndsWith(res.Value))
                            {
                                res.AltValue = res.AltValue.Substring(0, res.AltValue.Length - res.Value.Length);
                            }
                            res.AltValue = string.Format("{0}{1} {2}", res.AltValue, (tt as Pullenti.Ner.TextToken).Term, res.Value);
                            res.EndToken = tt;
                            continue;
                        }
                        break;
                    }
                    return(res);
                }
                if (res.Typ == Typs.Brand || res.Typ == Typs.Name)
                {
                    res.Value = tok.Termin.CanonicText;
                    return(res);
                }
                if (res.Typ == Typs.Model)
                {
                    res.Value = tok.Termin.CanonicText;
                    if (tok.Termin.Tag2 is List <Pullenti.Ner.Core.Termin> )
                    {
                        List <Pullenti.Ner.Core.Termin> li = tok.Termin.Tag2 as List <Pullenti.Ner.Core.Termin>;
                        foreach (Pullenti.Ner.Core.Termin to in li)
                        {
                            WeaponItemToken wit = new WeaponItemToken(t, tok.EndToken)
                            {
                                Typ = (Typs)to.Tag, Value = to.CanonicText, IsInternal = tok.BeginToken == tok.EndToken
                            };
                            res.InnerTokens.Add(wit);
                            if (to.AdditionalVars != null && to.AdditionalVars.Count > 0)
                            {
                                wit.AltValue = to.AdditionalVars[0].CanonicText;
                            }
                        }
                    }
                    res._correctModel();
                    return(res);
                }
            }
            Pullenti.Ner.Token nnn = Pullenti.Ner.Core.MiscHelper.CheckNumberPrefix(t);
            if (nnn != null)
            {
                Pullenti.Ner.Transport.Internal.TransItemToken tit = Pullenti.Ner.Transport.Internal.TransItemToken._attachNumber(nnn, true);
                if (tit != null)
                {
                    WeaponItemToken res = new WeaponItemToken(t, tit.EndToken)
                    {
                        Typ = Typs.Number
                    };
                    res.Value    = tit.Value;
                    res.AltValue = tit.AltValue;
                    return(res);
                }
            }
            if (((t is Pullenti.Ner.TextToken) && t.Chars.IsLetter && t.Chars.IsAllUpper) && (t.LengthChar < 4))
            {
                if ((t.Next != null && ((t.Next.IsHiphen || t.Next.IsChar('.'))) && (t.Next.WhitespacesAfterCount < 2)) && (t.Next.Next is Pullenti.Ner.NumberToken))
                {
                    WeaponItemToken res = new WeaponItemToken(t, t.Next)
                    {
                        Typ = Typs.Model, IsDoubt = true
                    };
                    res.Value = (t as Pullenti.Ner.TextToken).Term;
                    res._correctModel();
                    return(res);
                }
                if ((t.Next is Pullenti.Ner.NumberToken) && !t.IsWhitespaceAfter)
                {
                    WeaponItemToken res = new WeaponItemToken(t, t)
                    {
                        Typ = Typs.Model, IsDoubt = true
                    };
                    res.Value = (t as Pullenti.Ner.TextToken).Term;
                    res._correctModel();
                    return(res);
                }
                if ((t as Pullenti.Ner.TextToken).Term == "СП" && (t.WhitespacesAfterCount < 3) && (t.Next is Pullenti.Ner.TextToken))
                {
                    WeaponItemToken pp = _TryParse(t.Next, null, false, false);
                    if (pp != null && ((pp.Typ == Typs.Model || pp.Typ == Typs.Brand)))
                    {
                        WeaponItemToken res = new WeaponItemToken(t, t)
                        {
                            Typ = Typs.Noun
                        };
                        res.Value    = "ПИСТОЛЕТ";
                        res.AltValue = "СЛУЖЕБНЫЙ ПИСТОЛЕТ";
                        return(res);
                    }
                }
            }
            if (((t is Pullenti.Ner.TextToken) && t.Chars.IsLetter && !t.Chars.IsAllLower) && t.LengthChar > 2)
            {
                bool ok = false;
                if (prev != null && ((prev.Typ == Typs.Noun || prev.Typ == Typs.Model || prev.Typ == Typs.Brand)))
                {
                    ok = true;
                }
                else if (prev == null && t.Previous != null && t.Previous.IsCommaAnd)
                {
                    ok = true;
                }
                if (ok)
                {
                    WeaponItemToken res = new WeaponItemToken(t, t)
                    {
                        Typ = Typs.Name, IsDoubt = true
                    };
                    res.Value = (t as Pullenti.Ner.TextToken).Term;
                    if ((t.Next != null && t.Next.IsHiphen && (t.Next.Next is Pullenti.Ner.TextToken)) && t.Next.Next.Chars == t.Chars)
                    {
                        res.Value    = string.Format("{0}-{1}", res.Value, (t.Next.Next as Pullenti.Ner.TextToken).Term);
                        res.EndToken = t.Next.Next;
                    }
                    if (prev != null && prev.Typ == Typs.Noun)
                    {
                        res.Typ = Typs.Brand;
                    }
                    if (res.EndToken.Next != null && res.EndToken.Next.IsHiphen && (res.EndToken.Next.Next is Pullenti.Ner.NumberToken))
                    {
                        res.Typ = Typs.Model;
                        res._correctModel();
                    }
                    else if (!res.EndToken.IsWhitespaceAfter && (res.EndToken.Next is Pullenti.Ner.NumberToken))
                    {
                        res.Typ = Typs.Model;
                        res._correctModel();
                    }
                    return(res);
                }
            }
            if (t.IsValue("МАРКА", null))
            {
                WeaponItemToken res = _TryParse(t.Next, prev, afterConj, false);
                if (res != null && res.Typ == Typs.Brand)
                {
                    res.BeginToken = t;
                    return(res);
                }
                if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(t.Next, true, false))
                {
                    Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t.Next, Pullenti.Ner.Core.BracketParseAttr.No, 100);
                    if (br != null)
                    {
                        return new WeaponItemToken(t, br.EndToken)
                               {
                                   Typ = Typs.Brand, Value = Pullenti.Ner.Core.MiscHelper.GetTextValue(br.BeginToken, br.EndToken, Pullenti.Ner.Core.GetTextAttr.No)
                               }
                    }
                    ;
                }
                if (((t is Pullenti.Ner.TextToken) && (t.Next is Pullenti.Ner.TextToken) && t.Next.LengthChar > 1) && !t.Next.Chars.IsAllLower)
                {
                    return new WeaponItemToken(t, t.Next)
                           {
                               Typ = Typs.Brand, Value = (t as Pullenti.Ner.TextToken).Term
                           }
                }
                ;
            }
            if (t.IsValue("КАЛИБР", "КАЛІБР"))
            {
                Pullenti.Ner.Token tt1 = t.Next;
                if (tt1 != null && ((tt1.IsHiphen || tt1.IsChar(':'))))
                {
                    tt1 = tt1.Next;
                }
                Pullenti.Ner.Measure.Internal.NumbersWithUnitToken num = Pullenti.Ner.Measure.Internal.NumbersWithUnitToken.TryParse(tt1, null, false, false, false, false);
                if (num != null && num.SingleVal != null)
                {
                    return new WeaponItemToken(t, num.EndToken)
                           {
                               Typ = Typs.Caliber, Value = Pullenti.Ner.Core.NumberHelper.DoubleToString(num.SingleVal.Value)
                           }
                }
                ;
            }
            if (t is Pullenti.Ner.NumberToken)
            {
                Pullenti.Ner.Measure.Internal.NumbersWithUnitToken num = Pullenti.Ner.Measure.Internal.NumbersWithUnitToken.TryParse(t, null, false, false, false, false);
                if (num != null && num.SingleVal != null)
                {
                    if (num.Units.Count == 1 && num.Units[0].Unit != null && num.Units[0].Unit.NameCyr == "мм")
                    {
                        return new WeaponItemToken(t, num.EndToken)
                               {
                                   Typ = Typs.Caliber, Value = Pullenti.Ner.Core.NumberHelper.DoubleToString(num.SingleVal.Value)
                               }
                    }
                    ;

                    if (num.EndToken.Next != null && num.EndToken.Next.IsValue("КАЛИБР", "КАЛІБР"))
                    {
                        return new WeaponItemToken(t, num.EndToken.Next)
                               {
                                   Typ = Typs.Caliber, Value = Pullenti.Ner.Core.NumberHelper.DoubleToString(num.SingleVal.Value)
                               }
                    }
                    ;
                }
            }
            if (t.IsValue("ПРОИЗВОДСТВО", "ВИРОБНИЦТВО"))
            {
                Pullenti.Ner.Token tt1 = t.Next;
                if (tt1 != null && ((tt1.IsHiphen || tt1.IsChar(':'))))
                {
                    tt1 = tt1.Next;
                }
                if (tt1 is Pullenti.Ner.ReferentToken)
                {
                    if ((tt1.GetReferent() is Pullenti.Ner.Org.OrganizationReferent) || (tt1.GetReferent() is Pullenti.Ner.Geo.GeoReferent))
                    {
                        return new WeaponItemToken(t, tt1)
                               {
                                   Typ = Typs.Developer, Ref = tt1.GetReferent()
                               }
                    }
                    ;
                }
            }
            return(null);
        }

        void _correctModel()
        {
            Pullenti.Ner.Token tt = EndToken.Next;
            if (tt == null || tt.WhitespacesBeforeCount > 2)
            {
                return;
            }
            if (tt.IsValue(":\\/.", null) || tt.IsHiphen)
            {
                tt = tt.Next;
            }
            if (tt is Pullenti.Ner.NumberToken)
            {
                StringBuilder tmp = new StringBuilder();
                tmp.Append((tt as Pullenti.Ner.NumberToken).Value);
                bool isLat = Pullenti.Morph.LanguageHelper.IsLatinChar(Value[0]);
                EndToken = tt;
                for (tt = tt.Next; tt != null; tt = tt.Next)
                {
                    if ((tt is Pullenti.Ner.TextToken) && tt.LengthChar == 1 && tt.Chars.IsLetter)
                    {
                        if (!tt.IsWhitespaceBefore || ((tt.Previous != null && tt.Previous.IsHiphen)))
                        {
                            char ch = (tt as Pullenti.Ner.TextToken).Term[0];
                            EndToken = tt;
                            char ch2 = (char)0;
                            if (Pullenti.Morph.LanguageHelper.IsLatinChar(ch) && !isLat)
                            {
                                ch2 = Pullenti.Morph.LanguageHelper.GetCyrForLat(ch);
                                if (ch2 != ((char)0))
                                {
                                    ch = ch2;
                                }
                            }
                            else if (Pullenti.Morph.LanguageHelper.IsCyrillicChar(ch) && isLat)
                            {
                                ch2 = Pullenti.Morph.LanguageHelper.GetLatForCyr(ch);
                                if (ch2 != ((char)0))
                                {
                                    ch = ch2;
                                }
                            }
                            tmp.Append(ch);
                            continue;
                        }
                    }
                    break;
                }
                Value    = string.Format("{0}-{1}", Value, tmp.ToString());
                AltValue = Pullenti.Ner.Core.MiscHelper.CreateCyrLatAlternative(Value);
            }
            if (!EndToken.IsWhitespaceAfter && EndToken.Next != null && ((EndToken.Next.IsHiphen || EndToken.Next.IsCharOf("\\/"))))
            {
                if (!EndToken.Next.IsWhitespaceAfter && (EndToken.Next.Next is Pullenti.Ner.NumberToken))
                {
                    EndToken = EndToken.Next.Next;
                    Value    = string.Format("{0}-{1}", Value, (EndToken as Pullenti.Ner.NumberToken).Value);
                    if (AltValue != null)
                    {
                        AltValue = string.Format("{0}-{1}", AltValue, (EndToken as Pullenti.Ner.NumberToken).Value);
                    }
                }
            }
        }
예제 #25
0
        internal static Pullenti.Ner.ReferentToken CreateReferentToken(Pullenti.Ner.Person.PersonReferent p, Pullenti.Ner.Token begin, Pullenti.Ner.Token end, Pullenti.Ner.MorphCollection morph, List <PersonAttrToken> attrs, Pullenti.Ner.Person.PersonAnalyzer.PersonAnalyzerData ad, bool forAttribute, bool afterBePredicate)
        {
            if (p == null)
            {
                return(null);
            }
            bool hasPrefix = false;

            if (attrs != null)
            {
                foreach (PersonAttrToken a in attrs)
                {
                    if (a.Typ == PersonAttrTerminType.BestRegards)
                    {
                        hasPrefix = true;
                    }
                    else
                    {
                        if (a.BeginChar < begin.BeginChar)
                        {
                            begin = a.BeginToken;
                            if ((a.EndToken.Next != null && a.EndToken.Next.IsChar(')') && begin.Previous != null) && begin.Previous.IsChar('('))
                            {
                                begin = begin.Previous;
                            }
                        }
                        if (a.Typ != PersonAttrTerminType.Prefix)
                        {
                            if (a.Age != null)
                            {
                                p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_AGE, a.Age, false, 0);
                            }
                            if (a.PropRef == null)
                            {
                                p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, a.Value, false, 0);
                            }
                            else
                            {
                                p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, a, false, 0);
                            }
                        }
                        else if (a.Gender == Pullenti.Morph.MorphGender.Feminie && !p.IsFemale)
                        {
                            p.IsFemale = true;
                        }
                        else if (a.Gender == Pullenti.Morph.MorphGender.Masculine && !p.IsMale)
                        {
                            p.IsMale = true;
                        }
                    }
                }
            }
            else if ((begin.Previous is Pullenti.Ner.TextToken) && (begin.WhitespacesBeforeCount < 3))
            {
                if ((begin.Previous as Pullenti.Ner.TextToken).Term == "ИП")
                {
                    PersonAttrToken a = new PersonAttrToken(begin.Previous, begin.Previous);
                    a.PropRef      = new Pullenti.Ner.Person.PersonPropertyReferent();
                    a.PropRef.Name = "индивидуальный предприниматель";
                    p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, a, false, 0);
                    begin = begin.Previous;
                }
            }
            Pullenti.Ner.MorphCollection m0 = new Pullenti.Ner.MorphCollection();
            foreach (Pullenti.Morph.MorphBaseInfo it in morph.Items)
            {
                Pullenti.Morph.MorphBaseInfo bi = new Pullenti.Morph.MorphBaseInfo();
                bi.CopyFrom(it);
                bi.Number = Pullenti.Morph.MorphNumber.Singular;
                if (bi.Gender == Pullenti.Morph.MorphGender.Undefined)
                {
                    if (p.IsMale && !p.IsFemale)
                    {
                        bi.Gender = Pullenti.Morph.MorphGender.Masculine;
                    }
                    if (!p.IsMale && p.IsFemale)
                    {
                        bi.Gender = Pullenti.Morph.MorphGender.Feminie;
                    }
                }
                m0.AddItem(bi);
            }
            morph = m0;
            if ((attrs != null && attrs.Count > 0 && !attrs[0].Morph.Case.IsUndefined) && morph.Case.IsUndefined)
            {
                morph.Case = attrs[0].Morph.Case;
                if (attrs[0].Morph.Number == Pullenti.Morph.MorphNumber.Singular)
                {
                    morph.Number = Pullenti.Morph.MorphNumber.Singular;
                }
                if (p.IsMale && !p.IsFemale)
                {
                    morph.Gender = Pullenti.Morph.MorphGender.Masculine;
                }
                else if (p.IsFemale)
                {
                    morph.Gender = Pullenti.Morph.MorphGender.Feminie;
                }
            }
            if (begin.Previous != null)
            {
                Pullenti.Ner.Token ttt = begin.Previous;
                if (ttt.IsValue("ИМЕНИ", "ІМЕНІ"))
                {
                    forAttribute = true;
                }
                else
                {
                    if (ttt.IsChar('.') && ttt.Previous != null)
                    {
                        ttt = ttt.Previous;
                    }
                    if (ttt.WhitespacesAfterCount < 3)
                    {
                        if (ttt.IsValue("ИМ", "ІМ"))
                        {
                            forAttribute = true;
                        }
                    }
                }
            }
            if (forAttribute)
            {
                return new Pullenti.Ner.ReferentToken(p, begin, end)
                       {
                           Morph = morph, MiscAttrs = (int)p.m_PersonIdentityTyp
                       }
            }
            ;
            if ((begin.Previous != null && begin.Previous.IsCommaAnd && (begin.Previous.Previous is Pullenti.Ner.ReferentToken)) && (begin.Previous.Previous.GetReferent() is Pullenti.Ner.Person.PersonReferent))
            {
                Pullenti.Ner.ReferentToken rt00 = begin.Previous.Previous as Pullenti.Ner.ReferentToken;

                for (Pullenti.Ner.Token ttt = (Pullenti.Ner.Token)rt00; ttt != null;)
                {
                    if (ttt.Previous == null || !(ttt.Previous.Previous is Pullenti.Ner.ReferentToken))
                    {
                        break;
                    }
                    if (!ttt.Previous.IsCommaAnd || !(ttt.Previous.Previous.GetReferent() is Pullenti.Ner.Person.PersonReferent))
                    {
                        break;
                    }
                    rt00 = ttt.Previous.Previous as Pullenti.Ner.ReferentToken;
                    ttt  = rt00;
                }
                if (rt00.BeginToken.GetReferent() is Pullenti.Ner.Person.PersonPropertyReferent)
                {
                    bool ok = false;
                    if ((rt00.BeginToken as Pullenti.Ner.ReferentToken).EndToken.Next != null && (rt00.BeginToken as Pullenti.Ner.ReferentToken).EndToken.Next.IsChar(':'))
                    {
                        ok = true;
                    }
                    else if (rt00.BeginToken.Morph.Number == Pullenti.Morph.MorphNumber.Plural)
                    {
                        ok = true;
                    }
                    if (ok)
                    {
                        p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, rt00.BeginToken.GetReferent(), false, 0);
                    }
                }
            }
            if (ad != null)
            {
                if (ad.OverflowLevel > 10)
                {
                    return new Pullenti.Ner.ReferentToken(p, begin, end)
                           {
                               Morph = morph, MiscAttrs = (int)p.m_PersonIdentityTyp
                           }
                }
                ;
                ad.OverflowLevel++;
            }
            List <PersonAttrToken> attrs1 = null;
            bool hasPosition = false;
            bool openBr      = false;

            for (Pullenti.Ner.Token t = end.Next; t != null; t = t.Next)
            {
                if (t.IsTableControlChar)
                {
                    break;
                }
                if (t.IsNewlineBefore)
                {
                    if (t.NewlinesBeforeCount > 2)
                    {
                        break;
                    }
                    if (attrs1 != null && attrs1.Count > 0)
                    {
                        break;
                    }
                    Pullenti.Ner.Mail.Internal.MailLine ml = Pullenti.Ner.Mail.Internal.MailLine.Parse(t, 0, 0);
                    if (ml != null && ml.Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From)
                    {
                        break;
                    }
                    if (t.Chars.IsCapitalUpper)
                    {
                        PersonAttrToken attr1 = PersonAttrToken.TryAttach(t, (ad == null ? null : ad.LocalOntology), PersonAttrToken.PersonAttrAttachAttrs.No);
                        bool            ok1   = false;
                        if (attr1 != null)
                        {
                            if (hasPrefix || attr1.IsNewlineAfter || ((attr1.EndToken.Next != null && attr1.EndToken.Next.IsTableControlChar)))
                            {
                                ok1 = true;
                            }
                            else
                            {
                                for (Pullenti.Ner.Token tt2 = t.Next; tt2 != null && tt2.EndChar <= attr1.EndChar; tt2 = tt2.Next)
                                {
                                    if (tt2.IsWhitespaceBefore)
                                    {
                                        ok1 = true;
                                    }
                                }
                            }
                        }
                        else
                        {
                            Pullenti.Ner.Token ttt = CorrectTailAttributes(p, t);
                            if (ttt != null && ttt != t)
                            {
                                end = (t = ttt);
                                continue;
                            }
                        }
                        if (!ok1)
                        {
                            break;
                        }
                    }
                }
                if (t.IsHiphen || t.IsCharOf("_>|"))
                {
                    continue;
                }
                if (t.IsValue("МОДЕЛЬ", null))
                {
                    break;
                }
                Pullenti.Ner.Token tt = CorrectTailAttributes(p, t);
                if (tt != t && tt != null)
                {
                    end = (t = tt);
                    continue;
                }
                bool isBe = false;
                if (t.IsChar('(') && t == end.Next)
                {
                    openBr = true;
                    t      = t.Next;
                    if (t == null)
                    {
                        break;
                    }
                    PersonItemToken pit1 = PersonItemToken.TryAttach(t, null, PersonItemToken.ParseAttr.No, null);
                    if ((pit1 != null && t.Chars.IsCapitalUpper && pit1.EndToken.Next != null) && (t is Pullenti.Ner.TextToken) && pit1.EndToken.Next.IsChar(')'))
                    {
                        if (pit1.Lastname != null)
                        {
                            Pullenti.Morph.MorphBaseInfo inf = new Pullenti.Morph.MorphBaseInfo()
                            {
                                Case = Pullenti.Morph.MorphCase.Nominative
                            };
                            if (p.IsMale)
                            {
                                inf.Gender |= Pullenti.Morph.MorphGender.Masculine;
                            }
                            if (p.IsFemale)
                            {
                                inf.Gender |= Pullenti.Morph.MorphGender.Feminie;
                            }
                            PersonMorphCollection sur = PersonIdentityToken.CreateLastname(pit1, inf);
                            if (sur != null)
                            {
                                p.AddFioIdentity(sur, null, null);
                                end = (t = pit1.EndToken.Next);
                                continue;
                            }
                        }
                    }
                    if ((t is Pullenti.Ner.TextToken) && t.Chars.IsLatinLetter)
                    {
                        List <PersonItemToken> pits = PersonItemToken.TryAttachList(t, null, PersonItemToken.ParseAttr.CanBeLatin, 10);
                        if (((pits != null && pits.Count >= 2 && pits.Count <= 3) && pits[0].Chars.IsLatinLetter && pits[1].Chars.IsLatinLetter) && pits[pits.Count - 1].EndToken.Next != null && pits[pits.Count - 1].EndToken.Next.IsChar(')'))
                        {
                            Pullenti.Ner.Person.PersonReferent pr2 = new Pullenti.Ner.Person.PersonReferent();
                            int cou = 0;
                            foreach (PersonItemToken pi in pits)
                            {
                                foreach (Pullenti.Ner.Slot si in p.Slots)
                                {
                                    if (si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_FIRSTNAME || si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_MIDDLENAME || si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_LASTNAME)
                                    {
                                        if (Pullenti.Ner.Core.MiscHelper.CanBeEqualCyrAndLatSS(si.Value.ToString(), pi.Value))
                                        {
                                            cou++;
                                            pr2.AddSlot(si.TypeName, pi.Value, false, 0);
                                            break;
                                        }
                                    }
                                }
                            }
                            if (cou == pits.Count)
                            {
                                foreach (Pullenti.Ner.Slot si in pr2.Slots)
                                {
                                    p.AddSlot(si.TypeName, si.Value, false, 0);
                                }
                                end = (t = pits[pits.Count - 1].EndToken.Next);
                                continue;
                            }
                        }
                    }
                }
                else if (t.IsComma)
                {
                    t = t.Next;
                    if ((t is Pullenti.Ner.TextToken) && (t as Pullenti.Ner.TextToken).IsValue("WHO", null))
                    {
                        continue;
                    }
                    if ((t is Pullenti.Ner.TextToken) && t.Chars.IsLatinLetter)
                    {
                        List <PersonItemToken> pits = PersonItemToken.TryAttachList(t, null, PersonItemToken.ParseAttr.CanBeLatin, 10);
                        if ((pits != null && pits.Count >= 2 && pits.Count <= 3) && pits[0].Chars.IsLatinLetter && pits[1].Chars.IsLatinLetter)
                        {
                            Pullenti.Ner.Person.PersonReferent pr2 = new Pullenti.Ner.Person.PersonReferent();
                            int cou = 0;
                            foreach (PersonItemToken pi in pits)
                            {
                                foreach (Pullenti.Ner.Slot si in p.Slots)
                                {
                                    if (si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_FIRSTNAME || si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_MIDDLENAME || si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_LASTNAME)
                                    {
                                        if (Pullenti.Ner.Core.MiscHelper.CanBeEqualCyrAndLatSS(si.Value.ToString(), pi.Value))
                                        {
                                            cou++;
                                            pr2.AddSlot(si.TypeName, pi.Value, false, 0);
                                            break;
                                        }
                                    }
                                }
                            }
                            if (cou == pits.Count)
                            {
                                foreach (Pullenti.Ner.Slot si in pr2.Slots)
                                {
                                    p.AddSlot(si.TypeName, si.Value, false, 0);
                                }
                                end = (t = pits[pits.Count - 1].EndToken);
                                continue;
                            }
                        }
                    }
                }
                else if ((t is Pullenti.Ner.TextToken) && (t as Pullenti.Ner.TextToken).IsVerbBe)
                {
                    t = t.Next;
                }
                else if (t.IsAnd && t.IsWhitespaceAfter && !t.IsNewlineAfter)
                {
                    if (t == end.Next)
                    {
                        break;
                    }
                    t = t.Next;
                }
                else if (t.IsHiphen && t == end.Next)
                {
                    t = t.Next;
                }
                else if (t.IsChar('.') && t == end.Next && hasPrefix)
                {
                    t = t.Next;
                }
                Pullenti.Ner.Token ttt2 = CreateNickname(p, t);
                if (ttt2 != null)
                {
                    t = (end = ttt2);
                    continue;
                }
                if (t == null)
                {
                    break;
                }
                PersonAttrToken attr = null;
                attr = PersonAttrToken.TryAttach(t, (ad == null ? null : ad.LocalOntology), PersonAttrToken.PersonAttrAttachAttrs.No);
                if (attr == null)
                {
                    if ((t != null && t.GetReferent() != null && t.GetReferent().TypeName == "GEO") && attrs1 != null && openBr)
                    {
                        continue;
                    }
                    if ((t.Chars.IsCapitalUpper && openBr && t.Next != null) && t.Next.IsChar(')'))
                    {
                        if (p.FindSlot(Pullenti.Ner.Person.PersonReferent.ATTR_LASTNAME, null, true) == null)
                        {
                            p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_LASTNAME, t.GetSourceText().ToUpper(), false, 0);
                            t   = t.Next;
                            end = t;
                        }
                    }
                    if (t != null && t.IsValue("КОТОРЫЙ", null) && t.Morph.Number == Pullenti.Morph.MorphNumber.Singular)
                    {
                        if (!p.IsFemale && t.Morph.Gender == Pullenti.Morph.MorphGender.Feminie)
                        {
                            p.IsFemale = true;
                            p.CorrectData();
                        }
                        else if (!p.IsMale && t.Morph.Gender == Pullenti.Morph.MorphGender.Masculine)
                        {
                            p.IsMale = true;
                            p.CorrectData();
                        }
                    }
                    break;
                }
                if (attr.Morph.Number == Pullenti.Morph.MorphNumber.Plural)
                {
                    break;
                }
                if (attr.Typ == PersonAttrTerminType.BestRegards)
                {
                    break;
                }
                if (attr.IsDoubt)
                {
                    if (hasPrefix)
                    {
                    }
                    else if (t.IsNewlineBefore && attr.IsNewlineAfter)
                    {
                    }
                    else if (t.Previous != null && ((t.Previous.IsHiphen || t.Previous.IsChar(':'))))
                    {
                    }
                    else
                    {
                        break;
                    }
                }
                if (!morph.Case.IsUndefined && !attr.Morph.Case.IsUndefined)
                {
                    if (((morph.Case & attr.Morph.Case)).IsUndefined && !isBe)
                    {
                        break;
                    }
                }
                if (openBr)
                {
                    if (Pullenti.Ner.Person.PersonAnalyzer.TryAttachPerson(t, ad, false, 0, true) != null)
                    {
                        break;
                    }
                }
                if (attrs1 == null)
                {
                    if (t.Previous.IsComma && t.Previous == end.Next)
                    {
                        Pullenti.Ner.Token ttt = attr.EndToken.Next;
                        if (ttt != null)
                        {
                            if (ttt.Morph.Class.IsVerb)
                            {
                                if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(begin))
                                {
                                }
                                else
                                {
                                    break;
                                }
                            }
                        }
                    }
                    attrs1 = new List <PersonAttrToken>();
                }
                attrs1.Add(attr);
                if (attr.Typ == PersonAttrTerminType.Position || attr.Typ == PersonAttrTerminType.King)
                {
                    if (!isBe)
                    {
                        hasPosition = true;
                    }
                }
                else if (attr.Typ != PersonAttrTerminType.Prefix)
                {
                    if (attr.Typ == PersonAttrTerminType.Other && attr.Age != null)
                    {
                    }
                    else
                    {
                        attrs1 = null;
                        break;
                    }
                }
                t = attr.EndToken;
            }
            if (attrs1 != null && hasPosition && attrs != null)
            {
                Pullenti.Ner.Token te1 = attrs[attrs.Count - 1].EndToken.Next;
                Pullenti.Ner.Token te2 = attrs1[0].BeginToken;
                if (te1.WhitespacesAfterCount > te2.WhitespacesBeforeCount && (te2.WhitespacesBeforeCount < 2))
                {
                }
                else if (attrs1[0].Age != null)
                {
                }
                else if (((te1.IsHiphen || te1.IsChar(':'))) && !attrs1[0].IsNewlineBefore && ((te2.Previous.IsComma || te2.Previous == end)))
                {
                }
                else
                {
                    foreach (PersonAttrToken a in attrs)
                    {
                        if (a.Typ == PersonAttrTerminType.Position)
                        {
                            Pullenti.Ner.Token te = attrs1[attrs1.Count - 1].EndToken;
                            if (te.Next != null)
                            {
                                if (!te.Next.IsChar('.'))
                                {
                                    attrs1 = null;
                                    break;
                                }
                            }
                        }
                    }
                }
            }
            if (attrs1 != null && !hasPrefix)
            {
                PersonAttrToken attr = attrs1[attrs1.Count - 1];
                bool            ok   = false;
                if (attr.EndToken.Next != null && attr.EndToken.Next.Chars.IsCapitalUpper)
                {
                    ok = true;
                }
                else
                {
                    Pullenti.Ner.ReferentToken rt = Pullenti.Ner.Person.PersonAnalyzer.TryAttachPerson(attr.BeginToken, ad, false, -1, false);
                    if (rt != null && (rt.Referent is Pullenti.Ner.Person.PersonReferent))
                    {
                        ok = true;
                    }
                }
                if (ok)
                {
                    if (attr.BeginToken.WhitespacesBeforeCount > attr.EndToken.WhitespacesAfterCount)
                    {
                        attrs1 = null;
                    }
                    else if (attr.BeginToken.WhitespacesBeforeCount == attr.EndToken.WhitespacesAfterCount)
                    {
                        Pullenti.Ner.ReferentToken rt1 = Pullenti.Ner.Person.PersonAnalyzer.TryAttachPerson(attr.BeginToken, ad, false, -1, false);
                        if (rt1 != null)
                        {
                            attrs1 = null;
                        }
                    }
                }
            }
            if (attrs1 != null)
            {
                foreach (PersonAttrToken a in attrs1)
                {
                    if (a.Typ != PersonAttrTerminType.Prefix)
                    {
                        if (a.Age != null)
                        {
                            p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_AGE, a.Age, true, 0);
                        }
                        else if (a.PropRef == null)
                        {
                            p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, a.Value, false, 0);
                        }
                        else
                        {
                            p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, a, false, 0);
                        }
                        end = a.EndToken;
                        if (a.Gender != Pullenti.Morph.MorphGender.Undefined && !p.IsFemale && !p.IsMale)
                        {
                            if (a.Gender == Pullenti.Morph.MorphGender.Masculine && !p.IsMale)
                            {
                                p.IsMale = true;
                                p.CorrectData();
                            }
                            else if (a.Gender == Pullenti.Morph.MorphGender.Feminie && !p.IsFemale)
                            {
                                p.IsFemale = true;
                                p.CorrectData();
                            }
                        }
                    }
                }
                if (openBr)
                {
                    if (end.Next != null && end.Next.IsChar(')'))
                    {
                        end = end.Next;
                    }
                }
            }
            int crlfCou = 0;

            for (Pullenti.Ner.Token t = end.Next; t != null; t = t.Next)
            {
                if (t.IsTableControlChar)
                {
                    break;
                }
                if (t.IsNewlineBefore)
                {
                    Pullenti.Ner.Mail.Internal.MailLine ml = Pullenti.Ner.Mail.Internal.MailLine.Parse(t, 0, 0);
                    if (ml != null && ml.Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From)
                    {
                        break;
                    }
                    crlfCou++;
                }
                if (t.IsCharOf(":,(") || t.IsHiphen)
                {
                    continue;
                }
                if (t.IsChar('.') && t == end.Next)
                {
                    continue;
                }
                Pullenti.Ner.Referent r = t.GetReferent();
                if (r != null)
                {
                    if (r.TypeName == "PHONE" || r.TypeName == "URI" || r.TypeName == "ADDRESS")
                    {
                        string ty = r.GetStringValue("SCHEME");
                        if (r.TypeName == "URI")
                        {
                            if ((ty != "mailto" && ty != "skype" && ty != "ICQ") && ty != "http")
                            {
                                break;
                            }
                        }
                        p.AddContact(r);
                        end     = t;
                        crlfCou = 0;
                        continue;
                    }
                }
                if (r is Pullenti.Ner.Person.PersonIdentityReferent)
                {
                    p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_IDDOC, r, false, 0);
                    end     = t;
                    crlfCou = 0;
                    continue;
                }
                if (r != null && r.TypeName == "ORGANIZATION")
                {
                    if (t.Next != null && t.Next.Morph.Class.IsVerb)
                    {
                        break;
                    }
                    if (begin.Previous != null && begin.Previous.Morph.Class.IsVerb)
                    {
                        break;
                    }
                    if (t.WhitespacesAfterCount == 1)
                    {
                        break;
                    }
                    bool exist = false;
                    foreach (Pullenti.Ner.Slot s in p.Slots)
                    {
                        if (s.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_ATTR && (s.Value is Pullenti.Ner.Person.PersonPropertyReferent))
                        {
                            Pullenti.Ner.Person.PersonPropertyReferent pr = s.Value as Pullenti.Ner.Person.PersonPropertyReferent;
                            if (pr.FindSlot(Pullenti.Ner.Person.PersonPropertyReferent.ATTR_REF, r, true) != null)
                            {
                                exist = true;
                                break;
                            }
                        }
                        else if (s.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_ATTR && (s.Value is PersonAttrToken))
                        {
                            PersonAttrToken pr = s.Value as PersonAttrToken;
                            if (pr.Referent.FindSlot(Pullenti.Ner.Person.PersonPropertyReferent.ATTR_REF, r, true) != null)
                            {
                                exist = true;
                                break;
                            }
                        }
                    }
                    if (!exist)
                    {
                        PersonAttrToken pat = new PersonAttrToken(t, t);
                        pat.PropRef = new Pullenti.Ner.Person.PersonPropertyReferent()
                        {
                            Name = "сотрудник"
                        };
                        pat.PropRef.AddSlot(Pullenti.Ner.Person.PersonPropertyReferent.ATTR_REF, r, false, 0);
                        p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, pat, false, 0);
                    }
                    continue;
                }
                if (r != null)
                {
                    break;
                }
                if (!hasPrefix || crlfCou >= 2)
                {
                    break;
                }
                Pullenti.Ner.ReferentToken rt = t.Kit.ProcessReferent("PERSON", t);
                if (rt != null)
                {
                    break;
                }
            }
            if (ad != null)
            {
                ad.OverflowLevel--;
            }
            if (begin.IsValue("НА", null) && begin.Next != null && begin.Next.IsValue("ИМЯ", null))
            {
                Pullenti.Ner.Token t0 = begin.Previous;
                if (t0 != null && t0.IsComma)
                {
                    t0 = t0.Previous;
                }
                if (t0 != null && (t0.GetReferent() is Pullenti.Ner.Person.PersonIdentityReferent))
                {
                    p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_IDDOC, t0.GetReferent(), false, 0);
                }
            }
            return(new Pullenti.Ner.ReferentToken(p, begin, end)
            {
                Morph = morph, MiscAttrs = (int)p.m_PersonIdentityTyp
            });
        }
예제 #26
0
        public static MailLine Parse(Pullenti.Ner.Token t0, int lev, int maxCount = 0)
        {
            if (t0 == null)
            {
                return(null);
            }
            MailLine res = new MailLine(t0, t0);
            bool     pr  = true;
            int      cou = 0;

            for (Pullenti.Ner.Token t = t0; t != null; t = t.Next, cou++)
            {
                if (t.IsNewlineBefore && t0 != t)
                {
                    break;
                }
                if (maxCount > 0 && cou > maxCount)
                {
                    break;
                }
                res.EndToken = t;
                if (t.IsTableControlChar || t.IsHiphen)
                {
                    continue;
                }
                if (pr)
                {
                    if ((t is Pullenti.Ner.TextToken) && t.IsCharOf(">|"))
                    {
                        res.Lev++;
                    }
                    else
                    {
                        pr = false;
                        Pullenti.Ner.Core.TerminToken tok = m_FromWords.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
                        if (tok != null && tok.EndToken.Next != null && tok.EndToken.Next.IsChar(':'))
                        {
                            res.Typ = Types.From;
                            t       = tok.EndToken.Next;
                            continue;
                        }
                    }
                }
                if (t is Pullenti.Ner.ReferentToken)
                {
                    Pullenti.Ner.Referent r = t.GetReferent();
                    if (r != null)
                    {
                        if ((((r is Pullenti.Ner.Person.PersonReferent) || (r is Pullenti.Ner.Geo.GeoReferent) || (r is Pullenti.Ner.Address.AddressReferent)) || r.TypeName == "PHONE" || r.TypeName == "URI") || (r is Pullenti.Ner.Person.PersonPropertyReferent) || r.TypeName == "ORGANIZATION")
                        {
                            res.Refs.Add(r);
                        }
                    }
                }
            }
            if (res.Typ == Types.Undefined)
            {
                Pullenti.Ner.Token t = t0;
                for (; t != null && (t.EndChar < res.EndChar); t = t.Next)
                {
                    if (!t.IsHiphen && t.Chars.IsLetter)
                    {
                        break;
                    }
                }
                int ok   = 0;
                int nams = 0;
                int oth  = 0;
                Pullenti.Ner.Token lastComma = null;
                for (; t != null && (t.EndChar < res.EndChar); t = t.Next)
                {
                    if (t.GetReferent() is Pullenti.Ner.Person.PersonReferent)
                    {
                        nams++;
                        continue;
                    }
                    if (t is Pullenti.Ner.TextToken)
                    {
                        if (!t.Chars.IsLetter)
                        {
                            lastComma = t;
                            continue;
                        }
                        Pullenti.Ner.Core.TerminToken tok = m_HelloWords.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
                        if (tok != null)
                        {
                            ok++;
                            t = tok.EndToken;
                            continue;
                        }
                        if (t.IsValue("ВСЕ", null) || t.IsValue("ALL", null) || t.IsValue("TEAM", null))
                        {
                            nams++;
                            continue;
                        }
                        Pullenti.Ner.Person.Internal.PersonItemToken pit = Pullenti.Ner.Person.Internal.PersonItemToken.TryAttach(t, null, Pullenti.Ner.Person.Internal.PersonItemToken.ParseAttr.No, null);
                        if (pit != null)
                        {
                            nams++;
                            t = pit.EndToken;
                            continue;
                        }
                    }
                    if ((++oth) > 3)
                    {
                        if (ok > 0 && lastComma != null)
                        {
                            res.EndToken = lastComma;
                            oth          = 0;
                        }
                        break;
                    }
                }
                if ((oth < 3) && ok > 0)
                {
                    res.Typ = Types.Hello;
                }
            }
            if (res.Typ == Types.Undefined)
            {
                int okWords = 0;
                if (t0.IsValue("HAVE", null))
                {
                }
                for (Pullenti.Ner.Token t = t0; t != null && t.EndChar <= res.EndChar; t = t.Next)
                {
                    if (!(t is Pullenti.Ner.TextToken))
                    {
                        continue;
                    }
                    if (t.IsChar('<'))
                    {
                        Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 100);
                        if (br != null)
                        {
                            t = br.EndToken;
                            continue;
                        }
                    }
                    if (!t.IsLetters || t.IsTableControlChar)
                    {
                        continue;
                    }
                    Pullenti.Ner.Core.TerminToken tok = m_RegardWords.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
                    if (tok != null)
                    {
                        okWords++;
                        for (; t != null && t.EndChar <= tok.EndChar; t = t.Next)
                        {
                            t.Tag = tok.Termin;
                        }
                        t = tok.EndToken;
                        if ((t.Next is Pullenti.Ner.TextToken) && t.Next.Morph.Case.IsGenitive)
                        {
                            for (t = t.Next; t.EndChar <= res.EndChar; t = t.Next)
                            {
                                if (t.Morph.Class.IsConjunction)
                                {
                                    continue;
                                }
                                Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                                if (npt1 == null)
                                {
                                    break;
                                }
                                if (!npt1.Morph.Case.IsGenitive)
                                {
                                    break;
                                }
                                for (; t.EndChar < npt1.EndChar; t = t.Next)
                                {
                                    t.Tag = t;
                                }
                                t.Tag = t;
                            }
                        }
                        continue;
                    }
                    if ((t.Morph.Class.IsPreposition || t.Morph.Class.IsConjunction || t.Morph.Class.IsMisc) || t.IsValue("C", null))
                    {
                        continue;
                    }
                    if ((okWords > 0 && t.Previous != null && t.Previous.IsComma) && t.Previous.BeginChar > t0.BeginChar && !t.Chars.IsAllLower)
                    {
                        res.EndToken = t.Previous;
                        break;
                    }
                    Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                    if (npt == null)
                    {
                        if ((res.EndChar - t.EndChar) > 10)
                        {
                            okWords = 0;
                        }
                        break;
                    }
                    tok = m_RegardWords.TryParse(npt.EndToken, Pullenti.Ner.Core.TerminParseAttr.No);
                    if (tok != null && (npt.EndToken is Pullenti.Ner.TextToken))
                    {
                        string term = (npt.EndToken as Pullenti.Ner.TextToken).Term;
                        if (term == "ДЕЛ")
                        {
                            tok = null;
                        }
                    }
                    if (tok == null)
                    {
                        if (npt.Noun.IsValue("НАДЕЖДА", null))
                        {
                            t.Tag = t;
                        }
                        else if (okWords > 0 && t.IsValue("NICE", null) && ((res.EndChar - npt.EndChar) < 13))
                        {
                            t.Tag = t;
                        }
                        else
                        {
                            okWords = 0;
                        }
                        break;
                    }
                    okWords++;
                    for (; t != null && t.EndChar <= tok.EndChar; t = t.Next)
                    {
                        t.Tag = tok.Termin;
                    }
                    t = tok.EndToken;
                }
                if (okWords > 0)
                {
                    res.Typ = Types.BestRegards;
                }
            }
            if (res.Typ == Types.Undefined)
            {
                Pullenti.Ner.Token t = t0;
                for (; t != null && (t.EndChar < res.EndChar); t = t.Next)
                {
                    if (!(t is Pullenti.Ner.TextToken))
                    {
                        break;
                    }
                    else if (!t.IsHiphen && t.Chars.IsLetter)
                    {
                        break;
                    }
                }
                if (t != null)
                {
                    if (t != t0)
                    {
                    }
                    if (((t.IsValue("ПЕРЕСЫЛАЕМОЕ", null) || t.IsValue("ПЕРЕАДРЕСОВАННОЕ", null))) && t.Next != null && t.Next.IsValue("СООБЩЕНИЕ", null))
                    {
                        res.Typ             = Types.From;
                        res.MustBeFirstLine = true;
                    }
                    else if ((t.IsValue("НАЧАЛО", null) && t.Next != null && ((t.Next.IsValue("ПЕРЕСЫЛАЕМОЕ", null) || t.Next.IsValue("ПЕРЕАДРЕСОВАННОЕ", null)))) && t.Next.Next != null && t.Next.Next.IsValue("СООБЩЕНИЕ", null))
                    {
                        res.Typ             = Types.From;
                        res.MustBeFirstLine = true;
                    }
                    else if (t.IsValue("ORIGINAL", null) && t.Next != null && ((t.Next.IsValue("MESSAGE", null) || t.Next.IsValue("APPOINTMENT", null))))
                    {
                        res.Typ             = Types.From;
                        res.MustBeFirstLine = true;
                    }
                    else if (t.IsValue("ПЕРЕСЛАНО", null) && t.Next != null && t.Next.IsValue("ПОЛЬЗОВАТЕЛЕМ", null))
                    {
                        res.Typ             = Types.From;
                        res.MustBeFirstLine = true;
                    }
                    else if (((t.GetReferent() != null && t.GetReferent().TypeName == "DATE")) || ((t.IsValue("IL", null) && t.Next != null && t.Next.IsValue("GIORNO", null))) || ((t.IsValue("ON", null) && (t.Next is Pullenti.Ner.ReferentToken) && t.Next.GetReferent().TypeName == "DATE")))
                    {
                        bool hasFrom = false;
                        bool hasDate = t.GetReferent() != null && t.GetReferent().TypeName == "DATE";
                        if (t.IsNewlineAfter && (lev < 5))
                        {
                            MailLine res1 = Parse(t.Next, lev + 1, 0);
                            if (res1 != null && res1.Typ == Types.Hello)
                            {
                                res.Typ = Types.From;
                            }
                        }
                        MailLine next = Parse(res.EndToken.Next, lev + 1, 0);
                        if (next != null)
                        {
                            if (next.Typ != Types.Undefined)
                            {
                                next = null;
                            }
                        }
                        int tmax = res.EndChar;
                        if (next != null)
                        {
                            tmax = next.EndChar;
                        }
                        Pullenti.Ner.Core.BracketSequenceToken br1 = null;
                        for (; t != null && t.EndChar <= tmax; t = t.Next)
                        {
                            if (t.IsValue("ОТ", null) || t.IsValue("FROM", null))
                            {
                                hasFrom = true;
                            }
                            else if (t.GetReferent() != null && ((t.GetReferent().TypeName == "URI" || (t.GetReferent() is Pullenti.Ner.Person.PersonReferent))))
                            {
                                if (t.GetReferent().TypeName == "URI" && hasDate)
                                {
                                    if (br1 != null)
                                    {
                                        hasFrom = true;
                                        next    = null;
                                    }
                                    if (t.Previous.IsChar('<') && t.Next != null && t.Next.IsChar('>'))
                                    {
                                        t = t.Next;
                                        if (t.Next != null && t.Next.IsChar(':'))
                                        {
                                            t = t.Next;
                                        }
                                        if (t.IsNewlineAfter)
                                        {
                                            hasFrom = true;
                                            next    = null;
                                        }
                                    }
                                }
                                for (t = t.Next; t != null && t.EndChar <= res.EndChar; t = t.Next)
                                {
                                    if (t.IsValue("HA", null) && t.Next != null && t.Next.IsValue("SCRITTO", null))
                                    {
                                        hasFrom = true;
                                        break;
                                    }
                                    else if (((t.IsValue("НАПИСАТЬ", null) || t.IsValue("WROTE", null))) && ((res.EndChar - t.EndChar) < 10))
                                    {
                                        hasFrom = true;
                                        break;
                                    }
                                }
                                if (hasFrom)
                                {
                                    res.Typ = Types.From;
                                    if (next != null && t.EndChar >= next.BeginChar)
                                    {
                                        res.EndToken = next.EndToken;
                                    }
                                }
                                break;
                            }
                            else if (br1 == null && !t.IsChar('<') && Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(t, true, false))
                            {
                                br1 = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 100);
                                if (br1 != null)
                                {
                                    t = br1.EndToken;
                                }
                            }
                        }
                    }
                    else
                    {
                        bool hasUri = false;
                        for (; t != null && (t.EndChar < res.EndChar); t = t.Next)
                        {
                            if (t.GetReferent() != null && ((t.GetReferent().TypeName == "URI" || (t.GetReferent() is Pullenti.Ner.Person.PersonReferent))))
                            {
                                hasUri = true;
                            }
                            else if (t.IsValue("ПИСАТЬ", null) && hasUri)
                            {
                                if (t.Next != null && t.Next.IsChar('('))
                                {
                                    if (hasUri)
                                    {
                                        res.Typ = Types.From;
                                    }
                                    break;
                                }
                            }
                        }
                    }
                }
            }
            return(res);
        }
예제 #27
0
        static Pullenti.Ner.Token CorrectTailAttributes(Pullenti.Ner.Person.PersonReferent p, Pullenti.Ner.Token t0)
        {
            Pullenti.Ner.Token res = t0;
            Pullenti.Ner.Token t   = t0;
            if (t != null && t.IsChar(','))
            {
                t = t.Next;
            }
            bool born = false;
            bool die  = false;

            if (t != null && ((t.IsValue("РОДИТЬСЯ", "НАРОДИТИСЯ") || t.IsValue("BORN", null))))
            {
                t    = t.Next;
                born = true;
            }
            else if (t != null && ((t.IsValue("УМЕРЕТЬ", "ПОМЕРТИ") || t.IsValue("СКОНЧАТЬСЯ", null) || t.IsValue("DIED", null))))
            {
                t   = t.Next;
                die = true;
            }
            else if ((t != null && t.IsValue("ДАТА", null) && t.Next != null) && t.Next.IsValue("РОЖДЕНИЕ", "НАРОДЖЕННЯ"))
            {
                t    = t.Next.Next;
                born = true;
            }
            while (t != null)
            {
                if (t.Morph.Class.IsPreposition || t.IsHiphen || t.IsChar(':'))
                {
                    t = t.Next;
                }
                else
                {
                    break;
                }
            }
            if (t != null && t.GetReferent() != null)
            {
                Pullenti.Ner.Referent r = t.GetReferent();
                if (r.TypeName == "DATE")
                {
                    Pullenti.Ner.Token t1 = t;
                    if (t.Next != null && ((t.Next.IsValue("Р", null) || t.Next.IsValue("РОЖДЕНИЕ", "НАРОДЖЕННЯ"))))
                    {
                        born = true;
                        t1   = t.Next;
                        if (t1.Next != null && t1.Next.IsChar('.'))
                        {
                            t1 = t1.Next;
                        }
                    }
                    if (born)
                    {
                        if (p != null)
                        {
                            p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_BORN, r, false, 0);
                        }
                        res = t1;
                        t   = t1;
                    }
                    else if (die)
                    {
                        if (p != null)
                        {
                            p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_DIE, r, false, 0);
                        }
                        res = t1;
                        t   = t1;
                    }
                }
            }
            if (die && t != null)
            {
                Pullenti.Ner.NumberToken ag = Pullenti.Ner.Core.NumberHelper.TryParseAge(t.Next);
                if (ag != null)
                {
                    if (p != null)
                    {
                        p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_AGE, ag.Value.ToString(), false, 0);
                    }
                    t   = ag.EndToken.Next;
                    res = ag.EndToken;
                }
            }
            if (t == null)
            {
                return(res);
            }
            if (t.IsChar('('))
            {
                Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 100);
                if (br != null)
                {
                    Pullenti.Ner.Token t1 = t.Next;
                    born = false;
                    if (t1.IsValue("РОД", null))
                    {
                        born = true;
                        t1   = t1.Next;
                        if (t1 != null && t1.IsChar('.'))
                        {
                            t1 = t1.Next;
                        }
                    }
                    if (t1 is Pullenti.Ner.ReferentToken)
                    {
                        Pullenti.Ner.Referent r = t1.GetReferent();
                        if (r.TypeName == "DATERANGE" && t1.Next == br.EndToken)
                        {
                            Pullenti.Ner.Referent bd = r.GetSlotValue("FROM") as Pullenti.Ner.Referent;
                            Pullenti.Ner.Referent to = r.GetSlotValue("TO") as Pullenti.Ner.Referent;
                            if (bd != null && to != null)
                            {
                                if (p != null)
                                {
                                    p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_BORN, bd, false, 0);
                                    p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_DIE, to, false, 0);
                                }
                                t = (res = br.EndToken);
                            }
                        }
                        else if (r.TypeName == "DATE" && t1.Next == br.EndToken)
                        {
                            if (p != null)
                            {
                                p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_BORN, r, false, 0);
                            }
                            t = (res = br.EndToken);
                        }
                    }
                }
            }
            return(res);
        }
예제 #28
0
        public static CanonicDecreeRefUri TryCreateCanonicDecreeRefUri(Pullenti.Ner.Token t)
        {
            if (!(t is Pullenti.Ner.ReferentToken))
            {
                return(null);
            }
            Pullenti.Ner.Decree.DecreeReferent dr = t.GetReferent() as Pullenti.Ner.Decree.DecreeReferent;
            CanonicDecreeRefUri res;

            if (dr != null)
            {
                if (dr.Kind == Pullenti.Ner.Decree.DecreeKind.Publisher)
                {
                    return(null);
                }
                res = new CanonicDecreeRefUri(t.Kit.Sofa.Text)
                {
                    Ref = dr, BeginChar = t.BeginChar, EndChar = t.EndChar
                };
                if ((t.Previous != null && t.Previous.IsChar('(') && t.Next != null) && t.Next.IsChar(')'))
                {
                    return(res);
                }
                if ((t as Pullenti.Ner.ReferentToken).MiscAttrs != 0)
                {
                    return(res);
                }
                Pullenti.Ner.ReferentToken rt = t as Pullenti.Ner.ReferentToken;
                if (rt.BeginToken.IsChar('(') && rt.EndToken.IsChar(')'))
                {
                    res = new CanonicDecreeRefUri(t.Kit.Sofa.Text)
                    {
                        Ref = dr, BeginChar = rt.BeginToken.Next.BeginChar, EndChar = rt.EndToken.Previous.EndChar
                    };
                    return(res);
                }
                List <DecreeToken> nextDecreeItems = null;
                if ((t.Next != null && t.Next.IsCommaAnd && (t.Next.Next is Pullenti.Ner.ReferentToken)) && (t.Next.Next.GetReferent() is Pullenti.Ner.Decree.DecreeReferent))
                {
                    nextDecreeItems = DecreeToken.TryAttachList((t.Next.Next as Pullenti.Ner.ReferentToken).BeginToken, null, 10, false);
                    if (nextDecreeItems != null && nextDecreeItems.Count > 1)
                    {
                        for (int i = 0; i < (nextDecreeItems.Count - 1); i++)
                        {
                            if (nextDecreeItems[i].IsNewlineAfter)
                            {
                                nextDecreeItems.RemoveRange(i + 1, nextDecreeItems.Count - i - 1);
                                break;
                            }
                        }
                    }
                }
                bool wasTyp = false;
                bool wasNum = false;
                for (Pullenti.Ner.Token tt = (t as Pullenti.Ner.MetaToken).BeginToken; tt != null && tt.EndChar <= t.EndChar; tt = tt.Next)
                {
                    if (tt.BeginChar == t.BeginChar && tt.IsChar('(') && tt.Next != null)
                    {
                        res.BeginChar = tt.Next.BeginChar;
                    }
                    if (tt.IsChar('(') && tt.Next != null && tt.Next.IsValue("ДАЛЕЕ", null))
                    {
                        if (res.EndChar >= tt.BeginChar)
                        {
                            res.EndChar = tt.Previous.EndChar;
                        }
                        break;
                    }
                    if (tt.EndChar == t.EndChar && tt.IsChar(')'))
                    {
                        res.EndChar = tt.Previous.EndChar;
                        for (Pullenti.Ner.Token tt1 = tt.Previous; tt1 != null && tt1.BeginChar >= res.BeginChar; tt1 = tt1.Previous)
                        {
                            if (tt1.IsChar('(') && tt1.Previous != null)
                            {
                                if (res.BeginChar < tt1.Previous.BeginChar)
                                {
                                    res.EndChar = tt1.Previous.EndChar;
                                }
                            }
                        }
                    }
                    List <DecreeToken> li = DecreeToken.TryAttachList(tt, null, 10, false);
                    if (li != null && li.Count > 0)
                    {
                        for (int ii = 0; ii < (li.Count - 1); ii++)
                        {
                            if (li[ii].Typ == DecreeToken.ItemType.Typ && li[ii + 1].Typ == DecreeToken.ItemType.Terr)
                            {
                                res.TypeWithGeo = Pullenti.Ner.Core.MiscHelper.GetTextValue(li[ii].BeginToken, li[ii + 1].EndToken, Pullenti.Ner.Core.GetTextAttr.FirstNounGroupToNominativeSingle);
                            }
                        }
                        if ((nextDecreeItems != null && nextDecreeItems.Count > 1 && (nextDecreeItems.Count < li.Count)) && nextDecreeItems[0].Typ != DecreeToken.ItemType.Typ)
                        {
                            int d = li.Count - nextDecreeItems.Count;
                            int j;
                            for (j = 0; j < nextDecreeItems.Count; j++)
                            {
                                if (nextDecreeItems[j].Typ != li[d + j].Typ)
                                {
                                    break;
                                }
                            }
                            if (j >= nextDecreeItems.Count)
                            {
                                li.RemoveRange(0, d);
                                res.BeginChar = li[0].BeginChar;
                            }
                        }
                        else if ((nextDecreeItems != null && nextDecreeItems.Count == 1 && nextDecreeItems[0].Typ == DecreeToken.ItemType.Name) && li.Count == 2 && li[1].Typ == DecreeToken.ItemType.Name)
                        {
                            res.BeginChar = li[1].BeginChar;
                            res.EndChar   = li[1].EndChar;
                            break;
                        }
                        else if ((nextDecreeItems != null && nextDecreeItems.Count == 1 && nextDecreeItems[0].Typ == DecreeToken.ItemType.Number) && li[li.Count - 1].Typ == DecreeToken.ItemType.Number)
                        {
                            res.BeginChar = li[li.Count - 1].BeginChar;
                            res.EndChar   = li[li.Count - 1].EndChar;
                        }
                        for (int i = 0; i < li.Count; i++)
                        {
                            DecreeToken l = li[i];
                            if (l.BeginChar > t.EndChar)
                            {
                                li.RemoveRange(i, li.Count - i);
                                break;
                            }
                            if (l.Typ == DecreeToken.ItemType.Name)
                            {
                                if (!wasNum)
                                {
                                    if (dr.Kind == Pullenti.Ner.Decree.DecreeKind.Contract)
                                    {
                                        continue;
                                    }
                                    if (((i + 1) < li.Count) && ((li[i + 1].Typ == DecreeToken.ItemType.Date || li[i + 1].Typ == DecreeToken.ItemType.Number)))
                                    {
                                        continue;
                                    }
                                }
                                int ee = l.BeginToken.Previous.EndChar;
                                if (ee > res.BeginChar && (ee < res.EndChar))
                                {
                                    res.EndChar = ee;
                                }
                                break;
                            }
                            if (l.Typ == DecreeToken.ItemType.Number)
                            {
                                wasNum = true;
                            }
                            if (i == 0)
                            {
                                if (l.Typ == DecreeToken.ItemType.Typ)
                                {
                                    wasTyp = true;
                                }
                                else if (l.Typ == DecreeToken.ItemType.Owner || l.Typ == DecreeToken.ItemType.Org)
                                {
                                    if (((i + 1) < li.Count) && ((li[1].Typ == DecreeToken.ItemType.Date || li[1].Typ == DecreeToken.ItemType.Number)))
                                    {
                                        wasTyp = true;
                                    }
                                }
                                if (wasTyp)
                                {
                                    Pullenti.Ner.Token tt0 = l.BeginToken.Previous;
                                    if (tt0 != null && tt0.IsChar('.'))
                                    {
                                        tt0 = tt0.Previous;
                                    }
                                    if (tt0 != null && ((tt0.IsValue("УТВЕРЖДЕННЫЙ", null) || tt0.IsValue("УТВЕРДИТЬ", null) || tt0.IsValue("УТВ", null))))
                                    {
                                        if (l.BeginChar > res.BeginChar)
                                        {
                                            res.BeginChar = l.BeginChar;
                                            if (res.EndChar < res.BeginChar)
                                            {
                                                res.EndChar = t.EndChar;
                                            }
                                            res.IsAdopted = true;
                                        }
                                    }
                                }
                            }
                        }
                        if (li.Count > 0)
                        {
                            tt = li[li.Count - 1].EndToken;
                            if (tt.IsChar(')'))
                            {
                                tt = tt.Previous;
                            }
                            continue;
                        }
                    }
                    if (wasTyp)
                    {
                        DecreeToken na = DecreeToken.TryAttachName(tt, dr.Typ0, true, false);
                        if (na != null && tt.BeginChar > t.BeginChar)
                        {
                            Pullenti.Ner.Token tt1 = na.EndToken.Next;
                            if (tt1 != null && tt1.IsCharOf(",()"))
                            {
                                tt1 = tt1.Next;
                            }
                            if (tt1 != null && (tt1.EndChar < t.EndChar))
                            {
                                if (tt1.IsValue("УТВЕРЖДЕННЫЙ", null) || tt1.IsValue("УТВЕРДИТЬ", null) || tt1.IsValue("УТВ", null))
                                {
                                    tt = tt1;
                                    continue;
                                }
                            }
                            if (tt.Previous != null && tt.Previous.IsChar(':') && na.EndChar <= res.EndChar)
                            {
                                res.BeginChar = tt.BeginChar;
                                break;
                            }
                            if (tt.Previous.EndChar > res.BeginChar)
                            {
                                res.EndChar = tt.Previous.EndChar;
                                break;
                            }
                        }
                    }
                }
                return(res);
            }
            Pullenti.Ner.Decree.DecreePartReferent dpr = t.GetReferent() as Pullenti.Ner.Decree.DecreePartReferent;
            if (dpr == null)
            {
                return(null);
            }
            if ((t.Previous != null && t.Previous.IsHiphen && (t.Previous.Previous is Pullenti.Ner.ReferentToken)) && (t.Previous.Previous.GetReferent() is Pullenti.Ner.Decree.DecreePartReferent))
            {
                if (Pullenti.Ner.Decree.DecreePartReferent.CreateRangeReferent(t.Previous.Previous.GetReferent() as Pullenti.Ner.Decree.DecreePartReferent, dpr) != null)
                {
                    return(null);
                }
            }
            Pullenti.Ner.Token t1 = t;
            bool hasDiap          = false;

            Pullenti.Ner.ReferentToken DiapRef = null;
            if ((t.Next != null && t.Next.IsHiphen && (t.Next.Next is Pullenti.Ner.ReferentToken)) && (t.Next.Next.GetReferent() is Pullenti.Ner.Decree.DecreePartReferent))
            {
                Pullenti.Ner.Decree.DecreePartReferent diap = Pullenti.Ner.Decree.DecreePartReferent.CreateRangeReferent(dpr as Pullenti.Ner.Decree.DecreePartReferent, t.Next.Next.GetReferent() as Pullenti.Ner.Decree.DecreePartReferent);
                if (diap != null)
                {
                    dpr     = diap;
                    hasDiap = true;
                    t1      = t.Next.Next;
                    DiapRef = t1 as Pullenti.Ner.ReferentToken;
                }
            }
            res = new CanonicDecreeRefUri(t.Kit.Sofa.Text)
            {
                Ref = dpr, BeginChar = t.BeginChar, EndChar = t1.EndChar, IsDiap = hasDiap
            };
            if ((t.Previous != null && t.Previous.IsChar('(') && t1.Next != null) && t1.Next.IsChar(')'))
            {
                return(res);
            }
            for (Pullenti.Ner.Token tt = (t as Pullenti.Ner.MetaToken).BeginToken; tt != null && tt.EndChar <= t.EndChar; tt = tt.Next)
            {
                if (tt.GetReferent() is Pullenti.Ner.Decree.DecreeReferent)
                {
                    if (tt.BeginChar > t.BeginChar)
                    {
                        res.EndChar = tt.Previous.EndChar;
                        if (tt.Previous.Morph.Class.IsPreposition && tt.Previous.Previous != null)
                        {
                            res.EndChar = tt.Previous.Previous.EndChar;
                        }
                    }
                    else if (tt.EndChar < t.EndChar)
                    {
                        res.BeginChar = tt.BeginChar;
                    }
                    break;
                }
            }
            bool hasSameBefore = _hasSameDecree(t, dpr, true);
            bool hasSameAfter  = _hasSameDecree(t, dpr, false);

            PartToken.ItemType ptmin  = PartToken.ItemType.Prefix;
            PartToken.ItemType ptmin2 = PartToken.ItemType.Prefix;
            int max  = 0;
            int max2 = 00;

            foreach (Pullenti.Ner.Slot s in dpr.Slots)
            {
                PartToken.ItemType pt = PartToken._getTypeByAttrName(s.TypeName);
                if (pt == PartToken.ItemType.Prefix)
                {
                    continue;
                }
                int co = PartToken._getRank(pt);
                if (co < 1)
                {
                    if (pt == PartToken.ItemType.Part && dpr.FindSlot(Pullenti.Ner.Decree.DecreePartReferent.ATTR_CLAUSE, null, true) != null)
                    {
                        co = PartToken._getRank(PartToken.ItemType.Paragraph);
                    }
                    else
                    {
                        continue;
                    }
                }
                if (co > max)
                {
                    max2   = max;
                    ptmin2 = ptmin;
                    max    = co;
                    ptmin  = pt;
                }
                else if (co > max2)
                {
                    max2   = co;
                    ptmin2 = pt;
                }
            }
            if (ptmin != PartToken.ItemType.Prefix)
            {
                for (Pullenti.Ner.Token tt = (t as Pullenti.Ner.MetaToken).BeginToken; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next)
                {
                    if (tt.BeginChar >= res.BeginChar)
                    {
                        PartToken pt = PartToken.TryAttach(tt, null, false, false);
                        if (pt != null && pt.Typ == ptmin)
                        {
                            res.BeginChar = pt.BeginChar;
                            res.EndChar   = pt.EndChar;
                            if (pt.Typ == PartToken.ItemType.Appendix && pt.EndToken.IsValue("К", null) && pt.BeginToken != pt.EndToken)
                            {
                                res.EndChar = pt.EndToken.Previous.EndChar;
                            }
                            if (pt.EndChar == t.EndChar)
                            {
                                if ((t.Next != null && t.Next.IsCommaAnd && (t.Next.Next is Pullenti.Ner.ReferentToken)) && (t.Next.Next.GetReferent() is Pullenti.Ner.Decree.DecreePartReferent))
                                {
                                    Pullenti.Ner.Token tt1 = (t.Next.Next as Pullenti.Ner.ReferentToken).BeginToken;
                                    bool ok = true;
                                    if (tt1.Chars.IsLetter)
                                    {
                                        ok = false;
                                    }
                                    if (ok)
                                    {
                                        foreach (PartToken.PartValue v in pt.Values)
                                        {
                                            res.BeginChar = v.BeginChar;
                                            res.EndChar   = v.EndChar;
                                            break;
                                        }
                                    }
                                }
                            }
                            if (!hasDiap)
                            {
                                return(res);
                            }
                            break;
                        }
                    }
                }
                if (hasDiap && DiapRef != null)
                {
                    for (Pullenti.Ner.Token tt = DiapRef.BeginToken; tt != null && tt.EndChar <= DiapRef.EndChar; tt = tt.Next)
                    {
                        if (tt.IsChar(','))
                        {
                            break;
                        }
                        if (tt != DiapRef.BeginToken && tt.IsWhitespaceBefore)
                        {
                            break;
                        }
                        res.EndChar = tt.EndChar;
                    }
                    return(res);
                }
            }
            if (((hasSameBefore || hasSameAfter)) && ptmin != PartToken.ItemType.Prefix)
            {
                for (Pullenti.Ner.Token tt = (t as Pullenti.Ner.MetaToken).BeginToken; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next)
                {
                    if (tt.BeginChar >= res.BeginChar)
                    {
                        PartToken pt = (!hasSameBefore ? PartToken.TryAttach(tt, null, false, false) : null);
                        if (pt != null)
                        {
                            if (pt.Typ == ptmin)
                            {
                                foreach (PartToken.PartValue v in pt.Values)
                                {
                                    res.BeginChar = v.BeginChar;
                                    res.EndChar   = v.EndChar;
                                    return(res);
                                }
                            }
                            tt = pt.EndToken;
                            continue;
                        }
                        if ((tt is Pullenti.Ner.NumberToken) && tt.BeginChar == res.BeginChar)
                        {
                            res.EndChar = tt.EndChar;
                            for (; tt != null && tt.Next != null;)
                            {
                                if (!tt.Next.IsChar('.') || tt.IsWhitespaceAfter || tt.Next.IsWhitespaceAfter)
                                {
                                    break;
                                }
                                if (!(tt.Next.Next is Pullenti.Ner.NumberToken))
                                {
                                    break;
                                }
                                tt          = tt.Next.Next;
                                res.EndChar = tt.EndChar;
                            }
                            if (tt.Next != null && tt.Next.IsHiphen)
                            {
                                if (tt.Next.Next is Pullenti.Ner.NumberToken)
                                {
                                    tt          = tt.Next.Next;
                                    res.EndChar = tt.EndChar;
                                    for (; tt != null && tt.Next != null;)
                                    {
                                        if (!tt.Next.IsChar('.') || tt.IsWhitespaceAfter || tt.Next.IsWhitespaceAfter)
                                        {
                                            break;
                                        }
                                        if (!(tt.Next.Next is Pullenti.Ner.NumberToken))
                                        {
                                            break;
                                        }
                                        tt          = tt.Next.Next;
                                        res.EndChar = tt.EndChar;
                                    }
                                }
                                else if (tt.Next.Next != null && (tt.Next.Next.GetReferent() is Pullenti.Ner.Decree.DecreePartReferent) && hasDiap)
                                {
                                    res.EndChar = (tt.Next.Next as Pullenti.Ner.MetaToken).BeginToken.EndChar;
                                }
                            }
                            return(res);
                        }
                        if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(tt, true, false) && tt.BeginChar == res.BeginChar && hasSameBefore)
                        {
                            Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(tt, Pullenti.Ner.Core.BracketParseAttr.No, 100);
                            if (br != null && br.EndToken.Previous == tt.Next)
                            {
                                res.EndChar = br.EndChar;
                                return(res);
                            }
                        }
                    }
                }
                return(res);
            }
            if (!hasSameBefore && !hasSameAfter && ptmin != PartToken.ItemType.Prefix)
            {
                for (Pullenti.Ner.Token tt = (t as Pullenti.Ner.MetaToken).BeginToken; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next)
                {
                    if (tt.BeginChar >= res.BeginChar)
                    {
                        List <PartToken> pts = PartToken.TryAttachList(tt, false, 40);
                        if (pts == null || pts.Count == 0)
                        {
                            break;
                        }
                        for (int i = 0; i < pts.Count; i++)
                        {
                            if (pts[i].Typ == ptmin)
                            {
                                res.BeginChar = pts[i].BeginChar;
                                res.EndChar   = pts[i].EndChar;
                                tt            = pts[i].EndToken;
                                if (tt.Next != null && tt.Next.IsHiphen)
                                {
                                    if (tt.Next.Next is Pullenti.Ner.NumberToken)
                                    {
                                        res.EndChar = tt.Next.Next.EndChar;
                                    }
                                    else if (tt.Next.Next != null && (tt.Next.Next.GetReferent() is Pullenti.Ner.Decree.DecreePartReferent) && hasDiap)
                                    {
                                        res.EndChar = (tt.Next.Next as Pullenti.Ner.MetaToken).BeginToken.EndChar;
                                    }
                                }
                                return(res);
                            }
                        }
                    }
                }
            }
            return(res);
        }
예제 #29
0
        internal static TitlePageReferent _process(Pullenti.Ner.Token begin, int maxCharPos, Pullenti.Ner.Core.AnalysisKit kit, out Pullenti.Ner.Token endToken)
        {
            endToken = begin;
            TitlePageReferent res = new TitlePageReferent();

            Pullenti.Ner.Core.Termin term = null;
            List <Pullenti.Ner.Titlepage.Internal.Line> lines = Pullenti.Ner.Titlepage.Internal.Line.Parse(begin, 30, 1500, maxCharPos);

            if (lines.Count < 1)
            {
                return(null);
            }
            int cou = lines.Count;
            int minNewlinesCount = 10;
            Dictionary <int, int> linesCountStat = new Dictionary <int, int>();

            for (int i = 0; i < lines.Count; i++)
            {
                if (Pullenti.Ner.Titlepage.Internal.TitleNameToken.CanBeStartOfTextOrContent(lines[i].BeginToken, lines[i].EndToken))
                {
                    cou = i;
                    break;
                }
                int j = lines[i].NewlinesBeforeCount;
                if (i > 0 && j > 0)
                {
                    if (!linesCountStat.ContainsKey(j))
                    {
                        linesCountStat.Add(j, 1);
                    }
                    else
                    {
                        linesCountStat[j]++;
                    }
                }
            }
            int max = 0;

            foreach (KeyValuePair <int, int> kp in linesCountStat)
            {
                if (kp.Value > max)
                {
                    max = kp.Value;
                    minNewlinesCount = kp.Key;
                }
            }
            int endChar = (cou > 0 ? lines[cou - 1].EndChar : 0);

            if (maxCharPos > 0 && endChar > maxCharPos)
            {
                endChar = maxCharPos;
            }
            List <Pullenti.Ner.Titlepage.Internal.TitleNameToken> names = new List <Pullenti.Ner.Titlepage.Internal.TitleNameToken>();

            for (int i = 0; i < cou; i++)
            {
                if (i == 6)
                {
                }
                for (int j = i; (j < cou) && (j < (i + 5)); j++)
                {
                    if (i == 6 && j == 8)
                    {
                    }
                    if (j > i)
                    {
                        if (lines[j - 1].IsPureEn && lines[j].IsPureRu)
                        {
                            break;
                        }
                        if (lines[j - 1].IsPureRu && lines[j].IsPureEn)
                        {
                            break;
                        }
                        if (lines[j].NewlinesBeforeCount >= (minNewlinesCount * 2))
                        {
                            break;
                        }
                    }
                    Pullenti.Ner.Titlepage.Internal.TitleNameToken ttt = Pullenti.Ner.Titlepage.Internal.TitleNameToken.TryParse(lines[i].BeginToken, lines[j].EndToken, minNewlinesCount);
                    if (ttt != null)
                    {
                        if (lines[i].IsPureEn)
                        {
                            ttt.Morph.Language = Pullenti.Morph.MorphLang.EN;
                        }
                        else if (lines[i].IsPureRu)
                        {
                            ttt.Morph.Language = Pullenti.Morph.MorphLang.RU;
                        }
                        names.Add(ttt);
                    }
                }
            }
            Pullenti.Ner.Titlepage.Internal.TitleNameToken.Sort(names);
            Pullenti.Ner.ReferentToken nameRt = null;
            if (names.Count > 0)
            {
                int i0 = 0;
                if (names[i0].Morph.Language.IsEn)
                {
                    for (int ii = 1; ii < names.Count; ii++)
                    {
                        if (names[ii].Morph.Language.IsRu && names[ii].Rank > 0)
                        {
                            i0 = ii;
                            break;
                        }
                    }
                }
                term = res.AddName(names[i0].BeginNameToken, names[i0].EndNameToken);
                if (names[i0].TypeValue != null)
                {
                    res.AddType(names[i0].TypeValue);
                }
                if (names[i0].Speciality != null)
                {
                    res.Speciality = names[i0].Speciality;
                }
                Pullenti.Ner.ReferentToken rt = new Pullenti.Ner.ReferentToken(res, names[i0].BeginToken, names[i0].EndToken);
                if (kit != null)
                {
                    kit.EmbedToken(rt);
                }
                else
                {
                    res.AddOccurence(new Pullenti.Ner.TextAnnotation(rt.BeginToken, rt.EndToken));
                }
                endToken = rt.EndToken;
                nameRt   = rt;
                if (begin.BeginChar == rt.BeginChar)
                {
                    begin = rt;
                }
            }
            if (term != null && kit != null)
            {
                for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
                {
                    Pullenti.Ner.Core.TerminToken tok = term.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
                    if (tok == null)
                    {
                        continue;
                    }
                    Pullenti.Ner.Token t0 = t;
                    Pullenti.Ner.Token t1 = tok.EndToken;
                    if (t1.Next != null && t1.Next.IsChar('.'))
                    {
                        t1 = t1.Next;
                    }
                    if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(t0.Previous, false, false) && Pullenti.Ner.Core.BracketHelper.CanBeEndOfSequence(t1.Next, false, null, false))
                    {
                        t0 = t0.Previous;
                        t1 = t1.Next;
                    }
                    Pullenti.Ner.ReferentToken rt = new Pullenti.Ner.ReferentToken(res, t0, t1);
                    kit.EmbedToken(rt);
                    t = rt;
                }
            }
            Pullenti.Ner.Titlepage.Internal.PersonRelations             pr        = new Pullenti.Ner.Titlepage.Internal.PersonRelations();
            Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types        persTyp   = Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined;
            List <Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types> persTypes = pr.RelTypes;

            for (Pullenti.Ner.Token t = begin; t != null; t = t.Next)
            {
                if (maxCharPos > 0 && t.BeginChar > maxCharPos)
                {
                    break;
                }
                if (t == nameRt)
                {
                    continue;
                }
                Pullenti.Ner.Titlepage.Internal.TitleItemToken tpt = Pullenti.Ner.Titlepage.Internal.TitleItemToken.TryAttach(t);
                if (tpt != null)
                {
                    persTyp = Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined;
                    if (tpt.Typ == Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Typ)
                    {
                        if (res.Types.Count == 0)
                        {
                            res.AddType(tpt.Value);
                        }
                        else if (res.Types.Count == 1)
                        {
                            string ty = res.Types[0].ToUpper();
                            if (ty == "РЕФЕРАТ")
                            {
                                res.AddType(tpt.Value);
                            }
                            else if (ty == "АВТОРЕФЕРАТ")
                            {
                                if (tpt.Value == "КАНДИДАТСКАЯ ДИССЕРТАЦИЯ")
                                {
                                    res.AddSlot(TitlePageReferent.ATTR_TYPE, "автореферат кандидатской диссертации", true, 0);
                                }
                                else if (tpt.Value == "ДОКТОРСКАЯ ДИССЕРТАЦИЯ")
                                {
                                    res.AddSlot(TitlePageReferent.ATTR_TYPE, "автореферат докторской диссертации", true, 0);
                                }
                                else if (tpt.Value == "МАГИСТЕРСКАЯ ДИССЕРТАЦИЯ")
                                {
                                    res.AddSlot(TitlePageReferent.ATTR_TYPE, "автореферат магистерской диссертации", true, 0);
                                }
                                else if (tpt.Value == "КАНДИДАТСЬКА ДИСЕРТАЦІЯ")
                                {
                                    res.AddSlot(TitlePageReferent.ATTR_TYPE, "автореферат кандидатської дисертації", true, 0);
                                }
                                else if (tpt.Value == "ДОКТОРСЬКА ДИСЕРТАЦІЯ")
                                {
                                    res.AddSlot(TitlePageReferent.ATTR_TYPE, "автореферат докторської дисертації", true, 0);
                                }
                                else if (tpt.Value == "МАГІСТЕРСЬКА ДИСЕРТАЦІЯ")
                                {
                                    res.AddSlot(TitlePageReferent.ATTR_TYPE, "автореферат магістерської дисертації", true, 0);
                                }
                                else
                                {
                                    res.AddType(tpt.Value);
                                }
                            }
                            else if (tpt.Value == "РЕФЕРАТ" || tpt.Value == "АВТОРЕФЕРАТ")
                            {
                                if (!ty.Contains(tpt.Value))
                                {
                                    res.AddType(tpt.Value);
                                }
                            }
                        }
                    }
                    else if (tpt.Typ == Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Speciality)
                    {
                        if (res.Speciality == null)
                        {
                            res.Speciality = tpt.Value;
                        }
                    }
                    else if (persTypes.Contains(tpt.Typ))
                    {
                        persTyp = tpt.Typ;
                    }
                    t = tpt.EndToken;
                    if (t.EndChar > endToken.EndChar)
                    {
                        endToken = t;
                    }
                    if (t.Next != null && t.Next.IsCharOf(":-"))
                    {
                        t = t.Next;
                    }
                    continue;
                }
                if (t.EndChar > endChar)
                {
                    break;
                }
                List <Pullenti.Ner.Referent> rli = t.GetReferents();
                if (rli == null)
                {
                    continue;
                }
                if (!t.IsNewlineBefore && (t.Previous is Pullenti.Ner.TextToken))
                {
                    string s = (t.Previous as Pullenti.Ner.TextToken).Term;
                    if (s == "ИМЕНИ" || s == "ИМ")
                    {
                        continue;
                    }
                    if (s == "." && t.Previous.Previous != null && t.Previous.Previous.IsValue("ИМ", null))
                    {
                        continue;
                    }
                }
                foreach (Pullenti.Ner.Referent r in rli)
                {
                    if (r is Pullenti.Ner.Person.PersonReferent)
                    {
                        if (r != rli[0])
                        {
                            continue;
                        }
                        Pullenti.Ner.Person.PersonReferent p = r as Pullenti.Ner.Person.PersonReferent;
                        if (persTyp != Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined)
                        {
                            if (t.Previous != null && t.Previous.IsChar('.'))
                            {
                                persTyp = Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined;
                            }
                        }
                        Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types typ = pr.CalcTypFromAttrs(p);
                        if (typ != Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined)
                        {
                            pr.Add(p, typ, 1);
                            persTyp = typ;
                        }
                        else if (persTyp != Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined)
                        {
                            pr.Add(p, persTyp, 1);
                        }
                        else if (t.Previous != null && t.Previous.IsChar('©'))
                        {
                            persTyp = Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Worker;
                            pr.Add(p, persTyp, 1);
                        }
                        else
                        {
                            for (Pullenti.Ner.Token tt = t.Next; tt != null; tt = tt.Next)
                            {
                                Pullenti.Ner.Referent rr = tt.GetReferent();
                                if (rr == res)
                                {
                                    persTyp = Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Worker;
                                    break;
                                }
                                if (rr is Pullenti.Ner.Person.PersonReferent)
                                {
                                    if (pr.CalcTypFromAttrs(r as Pullenti.Ner.Person.PersonReferent) != Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined)
                                    {
                                        break;
                                    }
                                    else
                                    {
                                        continue;
                                    }
                                }
                                if (rr != null)
                                {
                                    break;
                                }
                                tpt = Pullenti.Ner.Titlepage.Internal.TitleItemToken.TryAttach(tt);
                                if (tpt != null)
                                {
                                    if (tpt.Typ != Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Typ && tpt.Typ != Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.TypAndTheme)
                                    {
                                        break;
                                    }
                                    tt = tpt.EndToken;
                                    if (tt.EndChar > endToken.EndChar)
                                    {
                                        endToken = tt;
                                    }
                                    continue;
                                }
                            }
                            if (persTyp == Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined)
                            {
                                for (Pullenti.Ner.Token tt = t.Previous; tt != null; tt = tt.Previous)
                                {
                                    Pullenti.Ner.Referent rr = tt.GetReferent();
                                    if (rr == res)
                                    {
                                        persTyp = Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Worker;
                                        break;
                                    }
                                    if (rr != null)
                                    {
                                        break;
                                    }
                                    if ((tt.IsValue("СТУДЕНТ", null) || tt.IsValue("СТУДЕНТКА", null) || tt.IsValue("СЛУШАТЕЛЬ", null)) || tt.IsValue("ДИПЛОМНИК", null) || tt.IsValue("ИСПОЛНИТЕЛЬ", null))
                                    {
                                        persTyp = Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Worker;
                                        break;
                                    }
                                    tpt = Pullenti.Ner.Titlepage.Internal.TitleItemToken.TryAttach(tt);
                                    if (tpt != null && tpt.Typ != Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Typ)
                                    {
                                        break;
                                    }
                                }
                            }
                            if (persTyp != Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined)
                            {
                                pr.Add(p, persTyp, 1);
                            }
                            else
                            {
                                pr.Add(p, persTyp, (float)0.5);
                            }
                            if (t.EndChar > endToken.EndChar)
                            {
                                endToken = t;
                            }
                        }
                        continue;
                    }
                    if (r == rli[0])
                    {
                        persTyp = Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined;
                    }
                    if (r is Pullenti.Ner.Date.DateReferent)
                    {
                        if (res.Date == null)
                        {
                            res.Date = r as Pullenti.Ner.Date.DateReferent;
                            if (t.EndChar > endToken.EndChar)
                            {
                                endToken = t;
                            }
                        }
                    }
                    else if (r is Pullenti.Ner.Geo.GeoReferent)
                    {
                        if (res.City == null && (r as Pullenti.Ner.Geo.GeoReferent).IsCity)
                        {
                            res.City = r as Pullenti.Ner.Geo.GeoReferent;
                            if (t.EndChar > endToken.EndChar)
                            {
                                endToken = t;
                            }
                        }
                    }
                    if (r is Pullenti.Ner.Org.OrganizationReferent)
                    {
                        Pullenti.Ner.Org.OrganizationReferent org = r as Pullenti.Ner.Org.OrganizationReferent;
                        if (org.Types.Contains("курс") && org.Number != null)
                        {
                            int i;
                            if (int.TryParse(org.Number, out i))
                            {
                                if (i > 0 && (i < 8))
                                {
                                    res.StudentYear = i;
                                }
                            }
                        }
                        for (; org.Higher != null; org = org.Higher)
                        {
                            if (org.Kind != Pullenti.Ner.Org.OrganizationKind.Department)
                            {
                                break;
                            }
                        }
                        if (org.Kind != Pullenti.Ner.Org.OrganizationKind.Department)
                        {
                            if (res.Org == null)
                            {
                                res.Org = org;
                            }
                            else if (Pullenti.Ner.Org.OrganizationReferent.CanBeHigher(res.Org, org))
                            {
                                res.Org = org;
                            }
                        }
                        if (t.EndChar > endToken.EndChar)
                        {
                            endToken = t;
                        }
                    }
                    if ((r is Pullenti.Ner.Uri.UriReferent) || (r is Pullenti.Ner.Geo.GeoReferent))
                    {
                        if (t.EndChar > endToken.EndChar)
                        {
                            endToken = t;
                        }
                    }
                }
            }
            foreach (Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types ty in persTypes)
            {
                foreach (Pullenti.Ner.Person.PersonReferent p in pr.GetPersons(ty))
                {
                    if (pr.GetAttrNameForType(ty) != null)
                    {
                        res.AddSlot(pr.GetAttrNameForType(ty), p, false, 0);
                    }
                }
            }
            if (res.GetSlotValue(TitlePageReferent.ATTR_AUTHOR) == null)
            {
                foreach (Pullenti.Ner.Person.PersonReferent p in pr.GetPersons(Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined))
                {
                    res.AddSlot(TitlePageReferent.ATTR_AUTHOR, p, false, 0);
                    break;
                }
            }
            if (res.City == null && res.Org != null)
            {
                Pullenti.Ner.Slot s = res.Org.FindSlot(Pullenti.Ner.Org.OrganizationReferent.ATTR_GEO, null, true);
                if (s != null && (s.Value is Pullenti.Ner.Geo.GeoReferent))
                {
                    if ((s.Value as Pullenti.Ner.Geo.GeoReferent).IsCity)
                    {
                        res.City = s.Value as Pullenti.Ner.Geo.GeoReferent;
                    }
                }
            }
            if (res.Date == null)
            {
                for (Pullenti.Ner.Token t = begin; t != null && t.EndChar <= endChar; t = t.Next)
                {
                    Pullenti.Ner.Geo.GeoReferent city = t.GetReferent() as Pullenti.Ner.Geo.GeoReferent;
                    if (city == null)
                    {
                        continue;
                    }
                    if (t.Next is Pullenti.Ner.TextToken)
                    {
                        if (t.Next.IsCharOf(":,") || t.Next.IsHiphen)
                        {
                            t = t.Next;
                        }
                    }
                    Pullenti.Ner.ReferentToken rt = t.Kit.ProcessReferent(Pullenti.Ner.Date.DateAnalyzer.ANALYZER_NAME, t.Next);
                    if (rt != null)
                    {
                        rt.SaveToLocalOntology();
                        res.Date = rt.Referent as Pullenti.Ner.Date.DateReferent;
                        if (kit != null)
                        {
                            kit.EmbedToken(rt);
                        }
                        break;
                    }
                }
            }
            if (res.Slots.Count == 0)
            {
                return(null);
            }
            else
            {
                return(res);
            }
        }