Exemple #1
0
 // Некоторые специфические случаи
 Pullenti.Ner.ReferentToken TryAttachSpec(Pullenti.Ner.Token t)
 {
     if (t == null)
     {
         return(null);
     }
     Pullenti.Ner.Token       t0 = t;
     Pullenti.Ner.NumberToken nt = t as Pullenti.Ner.NumberToken;
     if (nt != null && nt.Typ == Pullenti.Ner.NumberSpellingType.Digit && nt.Value == "1")
     {
         if (t.Next != null && t.Next.IsHiphen)
         {
             t = t.Next;
         }
         if ((t.Next is Pullenti.Ner.TextToken) && !t.Next.IsWhitespaceBefore)
         {
             if (t.Next.IsValue("C", null) || t.Next.IsValue("С", null))
             {
                 DenominationReferent dr = new DenominationReferent();
                 dr.AddSlot(DenominationReferent.ATTR_VALUE, "1С", false, 0);
                 dr.AddSlot(DenominationReferent.ATTR_VALUE, "1C", false, 0);
                 return(new Pullenti.Ner.ReferentToken(dr, t0, t.Next));
             }
         }
     }
     if (((nt != null && nt.Typ == Pullenti.Ner.NumberSpellingType.Digit && (t.Next is Pullenti.Ner.TextToken)) && !t.IsWhitespaceAfter && !t.Next.Chars.IsAllLower) && t.Next.Chars.IsLetter)
     {
         DenominationReferent dr = new DenominationReferent();
         dr.AddSlot(DenominationReferent.ATTR_VALUE, string.Format("{0}{1}", nt.GetSourceText(), (t.Next as Pullenti.Ner.TextToken).Term), false, 0);
         return(new Pullenti.Ner.ReferentToken(dr, t0, t.Next));
     }
     return(null);
 }
Exemple #2
0
        static Pullenti.Ner.Token DeserializeToken(Stream stream, Pullenti.Ner.Core.AnalysisKit kit, int vers)
        {
            short typ = DeserializeShort(stream);

            if (typ == 0)
            {
                return(null);
            }
            Pullenti.Ner.Token t = null;
            if (typ == 1)
            {
                t = new Pullenti.Ner.TextToken(null, kit);
            }
            else if (typ == 2)
            {
                t = new Pullenti.Ner.NumberToken(null, null, null, Pullenti.Ner.NumberSpellingType.Digit, kit);
            }
            else if (typ == 3)
            {
                t = new Pullenti.Ner.ReferentToken(null, null, null, kit);
            }
            else
            {
                t = new Pullenti.Ner.MetaToken(null, null, kit);
            }
            t.Deserialize(stream, kit, vers);
            if (t is Pullenti.Ner.MetaToken)
            {
                Pullenti.Ner.Token tt = DeserializeTokens(stream, kit, vers);
                if (tt != null)
                {
                    (t as Pullenti.Ner.MetaToken).m_BeginToken = tt;
                    for (; tt != null; tt = tt.Next)
                    {
                        (t as Pullenti.Ner.MetaToken).m_EndToken = tt;
                    }
                }
            }
            return(t);
        }
Exemple #3
0
        public static UriItemToken AttachDomainName(Pullenti.Ner.Token t0, bool check, bool canBeWhitspaces)
        {
            StringBuilder txt = new StringBuilder();

            Pullenti.Ner.Token t1 = t0;
            int  ipCount          = 0;
            bool isIp             = true;

            for (Pullenti.Ner.Token t = t0; t != null; t = t.Next)
            {
                if (t.IsWhitespaceBefore && t != t0)
                {
                    bool ok = false;
                    if (!t.IsNewlineBefore && canBeWhitspaces)
                    {
                        for (Pullenti.Ner.Token tt1 = t; tt1 != null; tt1 = tt1.Next)
                        {
                            if (tt1.IsChar('.') || tt1.IsHiphen)
                            {
                                continue;
                            }
                            if (tt1.IsWhitespaceBefore)
                            {
                                if (tt1.IsNewlineBefore)
                                {
                                    break;
                                }
                                if (tt1.Previous != null && ((tt1.Previous.IsChar('.') || tt1.Previous.IsHiphen)))
                                {
                                }
                                else
                                {
                                    break;
                                }
                            }
                            if (!(tt1 is Pullenti.Ner.TextToken))
                            {
                                break;
                            }
                            if (m_StdGroups.TryParse(tt1, Pullenti.Ner.Core.TerminParseAttr.No) != null)
                            {
                                ok = true;
                                break;
                            }
                            if (!tt1.Chars.IsLatinLetter)
                            {
                                break;
                            }
                        }
                    }
                    if (!ok)
                    {
                        break;
                    }
                }
                if (t is Pullenti.Ner.NumberToken)
                {
                    Pullenti.Ner.NumberToken nt = t as Pullenti.Ner.NumberToken;
                    if (nt.IntValue == null)
                    {
                        break;
                    }
                    txt.Append(nt.GetSourceText());
                    t1 = t;
                    if (nt.Typ == Pullenti.Ner.NumberSpellingType.Digit && nt.IntValue.Value >= 0 && (nt.IntValue.Value < 256))
                    {
                        ipCount++;
                    }
                    else
                    {
                        isIp = false;
                    }
                    continue;
                }
                Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
                if (tt == null)
                {
                    break;
                }
                string src = (tt as Pullenti.Ner.TextToken).Term;
                char   ch  = src[0];
                if (!char.IsLetter(ch))
                {
                    if (".-_".IndexOf(ch) < 0)
                    {
                        break;
                    }
                    if (ch != '.')
                    {
                        isIp = false;
                    }
                    if (ch == '-')
                    {
                        if (string.Compare(txt.ToString(), "vk.com", true) == 0)
                        {
                            return new UriItemToken(t0, t1)
                                   {
                                       Value = txt.ToString().ToLower()
                                   }
                        }
                        ;
                    }
                }
                else
                {
                    isIp = false;
                }
                txt.Append(src.ToLower());
                t1 = t;
            }
            if (txt.Length == 0)
            {
                return(null);
            }
            if (ipCount != 4)
            {
                isIp = false;
            }
            int i;
            int points = 0;

            for (i = 0; i < txt.Length; i++)
            {
                if (txt[i] == '.')
                {
                    if (i == 0)
                    {
                        return(null);
                    }
                    if (i >= (txt.Length - 1))
                    {
                        txt.Length--;
                        t1 = t1.Previous;
                        break;
                    }
                    if (txt[i - 1] == '.' || txt[i + 1] == '.')
                    {
                        return(null);
                    }
                    points++;
                }
            }
            if (points == 0)
            {
                return(null);
            }
            string uri = txt.ToString();

            if (check)
            {
                bool ok = isIp;
                if (!isIp)
                {
                    if (txt.ToString() == "localhost")
                    {
                        ok = true;
                    }
                }
                if (!ok && t1.Previous != null && t1.Previous.IsChar('.'))
                {
                    if (m_StdGroups.TryParse(t1, Pullenti.Ner.Core.TerminParseAttr.No) != null)
                    {
                        ok = true;
                    }
                }
                if (!ok)
                {
                    return(null);
                }
            }
            return(new UriItemToken(t0, t1)
            {
                Value = txt.ToString().ToLower()
            });
        }
Exemple #4
0
        public static List <UriItemToken> AttachMailUsers(Pullenti.Ner.Token t1)
        {
            if (t1 == null)
            {
                return(null);
            }
            if (t1.IsChar('}'))
            {
                List <UriItemToken> res0 = AttachMailUsers(t1.Previous);
                if (res0 == null)
                {
                    return(null);
                }
                t1 = res0[0].BeginToken.Previous;
                for (; t1 != null; t1 = t1.Previous)
                {
                    if (t1.IsChar('{'))
                    {
                        res0[0].BeginToken = t1;
                        return(res0);
                    }
                    if (t1.IsCharOf(";,"))
                    {
                        continue;
                    }
                    List <UriItemToken> res1 = AttachMailUsers(t1);
                    if (res1 == null)
                    {
                        return(null);
                    }
                    res0.Insert(0, res1[0]);
                    t1 = res1[0].BeginToken;
                }
                return(null);
            }
            StringBuilder txt = new StringBuilder();

            Pullenti.Ner.Token t0 = t1;
            for (Pullenti.Ner.Token t = t1; t != null; t = t.Previous)
            {
                if (t.IsWhitespaceAfter)
                {
                    break;
                }
                if (t is Pullenti.Ner.NumberToken)
                {
                    Pullenti.Ner.NumberToken nt = t as Pullenti.Ner.NumberToken;
                    txt.Insert(0, nt.GetSourceText());
                    t0 = t;
                    continue;
                }
                Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
                if (tt == null)
                {
                    break;
                }
                string src = tt.GetSourceText();
                char   ch  = src[0];
                if (!char.IsLetter(ch))
                {
                    if (".-_".IndexOf(ch) < 0)
                    {
                        break;
                    }
                }
                txt.Insert(0, src);
                t0 = t;
            }
            if (txt.Length == 0)
            {
                return(null);
            }
            List <UriItemToken> res = new List <UriItemToken>();

            res.Add(new UriItemToken(t0, t1)
            {
                Value = txt.ToString().ToLower()
            });
            return(res);
        }
Exemple #5
0
        public static OrgItemEponymToken TryAttach(Pullenti.Ner.Token t, bool mustHasPrefix = false)
        {
            Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
            if (tt == null)
            {
                if (t == null)
                {
                    return(null);
                }
                Pullenti.Ner.Referent r1 = t.GetReferent();
                if (r1 != null && r1.TypeName == "DATE")
                {
                    string str = r1.ToString().ToUpper();
                    if ((str == "1 МАЯ" || str == "7 ОКТЯБРЯ" || str == "9 МАЯ") || str == "8 МАРТА")
                    {
                        OrgItemEponymToken dt = new OrgItemEponymToken(t, t)
                        {
                            Eponyms = new List <string>()
                        };
                        dt.Eponyms.Add(str);
                        return(dt);
                    }
                }
                Pullenti.Ner.NumberToken age = Pullenti.Ner.Core.NumberHelper.TryParseAge(t);
                if ((age != null && (((age.EndToken.Next is Pullenti.Ner.TextToken) || (age.EndToken.Next is Pullenti.Ner.ReferentToken))) && (age.WhitespacesAfterCount < 3)) && !age.EndToken.Next.Chars.IsAllLower && age.EndToken.Next.Chars.IsCyrillicLetter)
                {
                    OrgItemEponymToken dt = new OrgItemEponymToken(t, age.EndToken.Next)
                    {
                        Eponyms = new List <string>()
                    };
                    dt.Eponyms.Add(string.Format("{0} {1}", age.Value, dt.EndToken.GetSourceText().ToUpper()));
                    return(dt);
                }
                return(null);
            }
            Pullenti.Ner.Token t1 = null;
            bool full             = false;
            bool hasName          = false;

            if (tt.Term == "ИМЕНИ" || tt.Term == "ІМЕНІ")
            {
                t1      = t.Next;
                full    = true;
                hasName = true;
            }
            else if (((tt.Term == "ИМ" || tt.Term == "ІМ")) && tt.Next != null)
            {
                if (tt.Next.IsChar('.'))
                {
                    t1   = tt.Next.Next;
                    full = true;
                }
                else if ((tt.Next is Pullenti.Ner.TextToken) && tt.Chars.IsAllLower && !tt.Next.Chars.IsAllLower)
                {
                    t1 = tt.Next;
                }
                hasName = true;
            }
            else if (tt.Previous != null && ((tt.Previous.IsValue("ФОНД", null) || tt.Previous.IsValue("ХРАМ", null) || tt.Previous.IsValue("ЦЕРКОВЬ", "ЦЕРКВА"))))
            {
                if ((!tt.Chars.IsCyrillicLetter || tt.Morph.Class.IsPreposition || tt.Morph.Class.IsConjunction) || !tt.Chars.IsLetter)
                {
                    return(null);
                }
                if (tt.WhitespacesBeforeCount != 1)
                {
                    return(null);
                }
                if (tt.Chars.IsAllLower)
                {
                    return(null);
                }
                if (tt.Morph.Class.IsAdjective)
                {
                    Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                    if (npt != null && npt.BeginToken != npt.EndToken)
                    {
                        return(null);
                    }
                }
                OrgItemNameToken na = OrgItemNameToken.TryAttach(tt, null, false, true);
                if (na != null)
                {
                    if (na.IsEmptyWord || na.IsStdName || na.IsStdTail)
                    {
                        return(null);
                    }
                }
                t1 = tt;
            }
            if (t1 == null || ((t1.IsNewlineBefore && !full)))
            {
                return(null);
            }
            if (tt.Previous != null && tt.Previous.Morph.Class.IsPreposition)
            {
                return(null);
            }
            if (mustHasPrefix && !hasName)
            {
                return(null);
            }
            Pullenti.Ner.Referent r = t1.GetReferent();
            if ((r != null && r.TypeName == "DATE" && full) && r.FindSlot("DAY", null, true) != null && r.FindSlot("YEAR", null, true) == null)
            {
                OrgItemEponymToken dt = new OrgItemEponymToken(t, t1)
                {
                    Eponyms = new List <string>()
                };
                dt.Eponyms.Add(r.ToString().ToUpper());
                return(dt);
            }
            bool holy = false;

            if ((t1.IsValue("СВЯТОЙ", null) || t1.IsValue("СВЯТИЙ", null) || t1.IsValue("СВ", null)) || t1.IsValue("СВЯТ", null))
            {
                t1   = t1.Next;
                holy = true;
                if (t1 != null && t1.IsChar('.'))
                {
                    t1 = t1.Next;
                }
            }
            if (t1 == null)
            {
                return(null);
            }
            Pullenti.Morph.MorphClass cl = t1.GetMorphClassInDictionary();
            if (cl.IsNoun || cl.IsAdjective)
            {
                Pullenti.Ner.ReferentToken rt = t1.Kit.ProcessReferent("PERSON", t1);
                if (rt != null && rt.Referent.TypeName == "PERSON" && rt.BeginToken != rt.EndToken)
                {
                    string e = rt.Referent.GetStringValue("LASTNAME");
                    if (e != null)
                    {
                        if (rt.EndToken.IsValue(e, null))
                        {
                            OrgItemEponymToken re = new OrgItemEponymToken(t, rt.EndToken);
                            re.Eponyms.Add(rt.EndToken.GetSourceText());
                            return(re);
                        }
                    }
                }
            }
            Pullenti.Ner.NumberToken nt = Pullenti.Ner.Core.NumberHelper.TryParseAnniversary(t1);
            if (nt != null && nt.Typ == Pullenti.Ner.NumberSpellingType.Age)
            {
                Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(nt.EndToken.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                if (npt != null)
                {
                    string             s   = string.Format("{0}-{1} {2}", nt.Value, (t.Kit.BaseLanguage.IsUa ? "РОКІВ" : "ЛЕТ"), Pullenti.Ner.Core.MiscHelper.GetTextValue(npt.BeginToken, npt.EndToken, Pullenti.Ner.Core.GetTextAttr.No));
                    OrgItemEponymToken res = new OrgItemEponymToken(t, npt.EndToken);
                    res.Eponyms.Add(s);
                    return(res);
                }
            }
            List <PersonItemToken> its = PersonItemToken.TryAttach(t1);

            if (its == null)
            {
                if ((t1 is Pullenti.Ner.ReferentToken) && (t1.GetReferent() is Pullenti.Ner.Geo.GeoReferent))
                {
                    string             s  = Pullenti.Ner.Core.MiscHelper.GetTextValue(t1, t1, Pullenti.Ner.Core.GetTextAttr.No);
                    OrgItemEponymToken re = new OrgItemEponymToken(t, t1);
                    re.Eponyms.Add(s);
                    return(re);
                }
                return(null);
            }
            List <string> eponims = new List <string>();
            int           i       = 0;
            int           j;

            if (its[i].Typ == PersonItemType.LocaseWord)
            {
                i++;
            }
            if (i >= its.Count)
            {
                return(null);
            }
            if (!full)
            {
                if (its[i].BeginToken.Morph.Class.IsAdjective && !its[i].BeginToken.Morph.Class.IsProperSurname)
                {
                    return(null);
                }
            }
            if (its[i].Typ == PersonItemType.Initial)
            {
                i++;
                while (true)
                {
                    if ((i < its.Count) && its[i].Typ == PersonItemType.Initial)
                    {
                        i++;
                    }
                    if (i >= its.Count || ((its[i].Typ != PersonItemType.Surname && its[i].Typ != PersonItemType.Name)))
                    {
                        break;
                    }
                    eponims.Add(its[i].Value);
                    t1 = its[i].EndToken;
                    if ((i + 2) >= its.Count || its[i + 1].Typ != PersonItemType.And || its[i + 2].Typ != PersonItemType.Initial)
                    {
                        break;
                    }
                    i += 3;
                }
            }
            else if (((i + 1) < its.Count) && its[i].Typ == PersonItemType.Name && its[i + 1].Typ == PersonItemType.Surname)
            {
                eponims.Add(its[i + 1].Value);
                t1 = its[i + 1].EndToken;
                i += 2;
                if ((((i + 2) < its.Count) && its[i].Typ == PersonItemType.And && its[i + 1].Typ == PersonItemType.Name) && its[i + 2].Typ == PersonItemType.Surname)
                {
                    eponims.Add(its[i + 2].Value);
                    t1 = its[i + 2].EndToken;
                }
            }
            else if (its[i].Typ == PersonItemType.Surname)
            {
                if (its.Count == (i + 2) && its[i].Chars == its[i + 1].Chars)
                {
                    its[i].Value   += (" " + its[i + 1].Value);
                    its[i].EndToken = its[i + 1].EndToken;
                    its.RemoveAt(i + 1);
                }
                eponims.Add(its[i].Value);
                if (((i + 1) < its.Count) && its[i + 1].Typ == PersonItemType.Name)
                {
                    if ((i + 2) == its.Count)
                    {
                        i++;
                    }
                    else if (its[i + 2].Typ != PersonItemType.Surname)
                    {
                        i++;
                    }
                }
                else if (((i + 1) < its.Count) && its[i + 1].Typ == PersonItemType.Initial)
                {
                    if ((i + 2) == its.Count)
                    {
                        i++;
                    }
                    else if (its[i + 2].Typ == PersonItemType.Initial && (i + 3) == its.Count)
                    {
                        i += 2;
                    }
                }
                else if (((i + 2) < its.Count) && its[i + 1].Typ == PersonItemType.And && its[i + 2].Typ == PersonItemType.Surname)
                {
                    bool ok = true;
                    Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(its[i + 2].BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                    if (npt != null && !npt.Morph.Case.IsGenitive && !npt.Morph.Case.IsUndefined)
                    {
                        ok = false;
                    }
                    if (ok)
                    {
                        eponims.Add(its[i + 2].Value);
                        i += 2;
                    }
                }
                t1 = its[i].EndToken;
            }
            else if (its[i].Typ == PersonItemType.Name && holy)
            {
                t1 = its[i].EndToken;
                bool sec = false;
                if (((i + 1) < its.Count) && its[i].Chars == its[i + 1].Chars && its[i + 1].Typ != PersonItemType.Initial)
                {
                    sec = true;
                    t1  = its[i + 1].EndToken;
                }
                if (sec)
                {
                    eponims.Add(string.Format("СВЯТ.{0} {1}", its[i].Value, its[i + 1].Value));
                }
                else
                {
                    eponims.Add(string.Format("СВЯТ.{0}", its[i].Value));
                }
            }
            else if (full && (i + 1) == its.Count && ((its[i].Typ == PersonItemType.Name || its[i].Typ == PersonItemType.Surname)))
            {
                t1 = its[i].EndToken;
                eponims.Add(its[i].Value);
            }
            else if ((its[i].Typ == PersonItemType.Name && its.Count == 3 && its[i + 1].Typ == PersonItemType.Name) && its[i + 2].Typ == PersonItemType.Surname)
            {
                t1 = its[i + 2].EndToken;
                eponims.Add(string.Format("{0} {1} {2}", its[i].Value, its[i + 1].Value, its[i + 2].Value));
                i += 2;
            }
            if (eponims.Count == 0)
            {
                return(null);
            }
            return(new OrgItemEponymToken(t, t1)
            {
                Eponyms = eponims
            });
        }
Exemple #6
0
        static UriItemToken _AttachUriContent(Pullenti.Ner.Token t0, string chars, bool canBeWhitespaces = false)
        {
            StringBuilder txt = new StringBuilder();

            Pullenti.Ner.Token t1  = t0;
            UriItemToken       dom = AttachDomainName(t0, true, canBeWhitespaces);

            if (dom != null)
            {
                if (dom.Value.Length < 3)
                {
                    return(null);
                }
            }
            char openChar = (char)0;

            Pullenti.Ner.Token t = t0;
            if (dom != null)
            {
                t = dom.EndToken.Next;
            }
            for (; t != null; t = t.Next)
            {
                if (t != t0 && t.IsWhitespaceBefore)
                {
                    if (t.IsNewlineBefore || !canBeWhitespaces)
                    {
                        break;
                    }
                    if (dom == null)
                    {
                        break;
                    }
                    if (t.Previous.IsHiphen)
                    {
                    }
                    else if (t.Previous.IsCharOf(",;"))
                    {
                        break;
                    }
                    else if (t.Previous.IsChar('.') && t.Chars.IsLetter && t.LengthChar == 2)
                    {
                    }
                    else
                    {
                        bool ok = false;
                        Pullenti.Ner.Token tt1 = t;
                        if (t.IsCharOf("\\/"))
                        {
                            tt1 = t.Next;
                        }
                        Pullenti.Ner.Token tt0 = tt1;
                        for (; tt1 != null; tt1 = tt1.Next)
                        {
                            if (tt1 != tt0 && tt1.IsWhitespaceBefore)
                            {
                                break;
                            }
                            if (tt1 is Pullenti.Ner.NumberToken)
                            {
                                continue;
                            }
                            if (!(tt1 is Pullenti.Ner.TextToken))
                            {
                                break;
                            }
                            string term1 = (tt1 as Pullenti.Ner.TextToken).Term;
                            if (((term1 == "HTM" || term1 == "HTML" || term1 == "SHTML") || term1 == "ASP" || term1 == "ASPX") || term1 == "JSP")
                            {
                                ok = true;
                                break;
                            }
                            if (!tt1.Chars.IsLetter)
                            {
                                if (tt1.IsCharOf("\\/"))
                                {
                                    ok = true;
                                    break;
                                }
                                if (!tt1.IsCharOf(chars))
                                {
                                    break;
                                }
                            }
                            else if (!tt1.Chars.IsLatinLetter)
                            {
                                break;
                            }
                        }
                        if (!ok)
                        {
                            break;
                        }
                    }
                }
                if (t is Pullenti.Ner.NumberToken)
                {
                    Pullenti.Ner.NumberToken nt = t as Pullenti.Ner.NumberToken;
                    txt.Append(nt.GetSourceText());
                    t1 = t;
                    continue;
                }
                Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
                if (tt == null)
                {
                    Pullenti.Ner.ReferentToken rt = t as Pullenti.Ner.ReferentToken;
                    if (rt != null && rt.BeginToken.IsValue("РФ", null))
                    {
                        if (txt.Length > 0 && txt[txt.Length - 1] == '.')
                        {
                            txt.Append(rt.BeginToken.GetSourceText());
                            t1 = t;
                            continue;
                        }
                    }
                    if (rt != null && rt.Chars.IsLatinLetter && rt.BeginToken == rt.EndToken)
                    {
                        txt.Append(rt.BeginToken.GetSourceText());
                        t1 = t;
                        continue;
                    }
                    break;
                }
                string src = tt.GetSourceText();
                char   ch  = src[0];
                if (!char.IsLetter(ch))
                {
                    if (chars.IndexOf(ch) < 0)
                    {
                        break;
                    }
                    if (ch == '(' || ch == '[')
                    {
                        openChar = ch;
                    }
                    else if (ch == ')')
                    {
                        if (openChar != '(')
                        {
                            break;
                        }
                        openChar = (char)0;
                    }
                    else if (ch == ']')
                    {
                        if (openChar != '[')
                        {
                            break;
                        }
                        openChar = (char)0;
                    }
                }
                txt.Append(src);
                t1 = t;
            }
            if (txt.Length == 0)
            {
                return(dom);
            }
            int i;

            for (i = 0; i < txt.Length; i++)
            {
                if (char.IsLetterOrDigit(txt[i]))
                {
                    break;
                }
            }
            if (i >= txt.Length)
            {
                return(dom);
            }
            if (txt[txt.Length - 1] == '.' || txt[txt.Length - 1] == '/')
            {
                txt.Length--;
                t1 = t1.Previous;
            }
            if (dom != null)
            {
                txt.Insert(0, dom.Value);
            }
            string tmp = txt.ToString();

            if (tmp.StartsWith("\\\\"))
            {
                txt.Replace("\\\\", "//");
                tmp = txt.ToString();
            }
            if (tmp.StartsWith("//"))
            {
                tmp = tmp.Substring(2);
            }
            if (string.Compare(tmp, "WWW", true) == 0)
            {
                return(null);
            }
            UriItemToken res = new UriItemToken(t0, t1)
            {
                Value = txt.ToString()
            };

            return(res);
        }
Exemple #7
0
        public static BlockLine Create(Pullenti.Ner.Token t, Pullenti.Ner.Core.TerminCollection names)
        {
            if (t == null)
            {
                return(null);
            }
            BlockLine res = new BlockLine(t, t);

            for (Pullenti.Ner.Token tt = t; tt != null; tt = tt.Next)
            {
                if (tt != t && tt.IsNewlineBefore)
                {
                    break;
                }
                else
                {
                    res.EndToken = tt;
                }
            }
            int nums = 0;

            while (t != null && t.Next != null && t.EndChar <= res.EndChar)
            {
                if (t is Pullenti.Ner.NumberToken)
                {
                }
                else
                {
                    Pullenti.Ner.NumberToken rom = Pullenti.Ner.Core.NumberHelper.TryParseRoman(t);
                    if (rom != null && rom.EndToken.Next != null)
                    {
                        t = rom.EndToken;
                    }
                    else
                    {
                        break;
                    }
                }
                if (t.Next.IsChar('.'))
                {
                }
                else if ((t.Next is Pullenti.Ner.TextToken) && !t.Next.Chars.IsAllLower)
                {
                }
                else
                {
                    break;
                }
                res.NumberEnd = t;
                t             = t.Next;
                if (t.IsChar('.') && t.Next != null)
                {
                    res.NumberEnd = t;
                    t             = t.Next;
                }
                if (t.IsNewlineBefore)
                {
                    return(res);
                }
                nums++;
            }
            Pullenti.Ner.Core.TerminToken tok = m_Ontology.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
            if (tok == null)
            {
                Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                if (npt1 != null && npt1.EndToken != npt1.BeginToken)
                {
                    tok = m_Ontology.TryParse(npt1.Noun.BeginToken, Pullenti.Ner.Core.TerminParseAttr.No);
                }
            }
            if (tok != null)
            {
                if (t.Previous != null && t.Previous.IsChar(':'))
                {
                    tok = null;
                }
            }
            if (tok != null)
            {
                BlkTyps typ = (BlkTyps)tok.Termin.Tag;
                if (typ == BlkTyps.Conslusion)
                {
                    if (t.IsNewlineAfter)
                    {
                    }
                    else if (t.Next != null && t.Next.Morph.Class.IsPreposition && t.Next.Next != null)
                    {
                        Pullenti.Ner.Core.TerminToken tok2 = m_Ontology.TryParse(t.Next.Next, Pullenti.Ner.Core.TerminParseAttr.No);
                        if (tok2 != null && ((BlkTyps)tok2.Termin.Tag) == BlkTyps.Chapter)
                        {
                        }
                        else
                        {
                            tok = null;
                        }
                    }
                    else
                    {
                        tok = null;
                    }
                }
                if (t.Kit.BaseLanguage != t.Morph.Language)
                {
                    tok = null;
                }
                if (typ == BlkTyps.Index && !t.IsValue("ОГЛАВЛЕНИЕ", null))
                {
                    if (!t.IsNewlineAfter && t.Next != null)
                    {
                        Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                        if (npt != null && npt.IsNewlineAfter && npt.Morph.Case.IsGenitive)
                        {
                            tok = null;
                        }
                        else if (npt == null)
                        {
                            tok = null;
                        }
                    }
                }
                if ((typ == BlkTyps.Intro && tok != null && !tok.IsNewlineAfter) && t.IsValue("ВВЕДЕНИЕ", null))
                {
                    Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                    if (npt != null && npt.Morph.Case.IsGenitive)
                    {
                        tok = null;
                    }
                }
                if (tok != null)
                {
                    if (res.NumberEnd == null)
                    {
                        res.NumberEnd = tok.EndToken;
                        if (res.NumberEnd.EndChar > res.EndChar)
                        {
                            res.EndToken = res.NumberEnd;
                        }
                    }
                    res.Typ = typ;
                    t       = tok.EndToken;
                    if (t.Next != null && t.Next.IsCharOf(":."))
                    {
                        t            = t.Next;
                        res.EndToken = t;
                    }
                    if (t.IsNewlineAfter || t.Next == null)
                    {
                        return(res);
                    }
                    t = t.Next;
                }
            }
            if (t.IsChar('§') && (t.Next is Pullenti.Ner.NumberToken))
            {
                res.Typ       = BlkTyps.Chapter;
                res.NumberEnd = t;
                t             = t.Next;
            }
            if (names != null)
            {
                Pullenti.Ner.Core.TerminToken tok2 = names.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
                if (tok2 != null && tok2.EndToken.IsNewlineAfter)
                {
                    res.EndToken    = tok2.EndToken;
                    res.IsExistName = true;
                    if (res.Typ == BlkTyps.Undefined)
                    {
                        BlockLine li2 = Create((res.NumberEnd == null ? null : res.NumberEnd.Next), null);
                        if (li2 != null && ((li2.Typ == BlkTyps.Literature || li2.Typ == BlkTyps.Intro || li2.Typ == BlkTyps.Conslusion)))
                        {
                            res.Typ = li2.Typ;
                        }
                        else
                        {
                            res.Typ = BlkTyps.Chapter;
                        }
                    }
                    return(res);
                }
            }
            Pullenti.Ner.Token t1 = res.EndToken;
            if ((((t1 is Pullenti.Ner.NumberToken) || t1.IsChar('.'))) && t1.Previous != null)
            {
                t1 = t1.Previous;
                if (t1.IsChar('.'))
                {
                    res.HasContentItemTail = true;
                    for (; t1 != null && t1.BeginChar > res.BeginChar; t1 = t1.Previous)
                    {
                        if (!t1.IsChar('.'))
                        {
                            break;
                        }
                    }
                }
            }
            res.IsAllUpper = true;
            for (; t != null && t.EndChar <= t1.EndChar; t = t.Next)
            {
                if (!(t is Pullenti.Ner.TextToken) || !t.Chars.IsLetter)
                {
                    res.NotWords++;
                }
                else
                {
                    Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
                    if (mc.IsUndefined)
                    {
                        res.NotWords++;
                    }
                    else if (t.LengthChar > 2)
                    {
                        res.Words++;
                    }
                    if (!t.Chars.IsAllUpper)
                    {
                        res.IsAllUpper = false;
                    }
                    if ((t as Pullenti.Ner.TextToken).IsPureVerb)
                    {
                        if (!(t as Pullenti.Ner.TextToken).Term.EndsWith("ING"))
                        {
                            res.HasVerb = true;
                        }
                    }
                }
            }
            if (res.Typ == BlkTyps.Undefined)
            {
                Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse((res.NumberEnd == null ? res.BeginToken : res.NumberEnd.Next), Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                if (npt != null)
                {
                    if (npt.Noun.IsValue("ХАРАКТЕРИСТИКА", null) || npt.Noun.IsValue("СОДЕРЖАНИЕ", "ЗМІСТ"))
                    {
                        bool ok = true;
                        for (Pullenti.Ner.Token tt = npt.EndToken.Next; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next)
                        {
                            if (tt.IsChar('.'))
                            {
                                continue;
                            }
                            Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                            if (npt2 == null || !npt2.Morph.Case.IsGenitive)
                            {
                                ok = false;
                                break;
                            }
                            tt = npt2.EndToken;
                            if (tt.EndChar > res.EndChar)
                            {
                                res.EndToken = tt;
                                if (!tt.IsNewlineAfter)
                                {
                                    for (; res.EndToken.Next != null; res.EndToken = res.EndToken.Next)
                                    {
                                        if (res.EndToken.IsNewlineAfter)
                                        {
                                            break;
                                        }
                                    }
                                }
                            }
                        }
                        if (ok)
                        {
                            res.Typ         = BlkTyps.Intro;
                            res.IsExistName = true;
                        }
                    }
                    else if (npt.Noun.IsValue("ВЫВОД", "ВИСНОВОК") || npt.Noun.IsValue("РЕЗУЛЬТАТ", "ДОСЛІДЖЕННЯ"))
                    {
                        bool ok = true;
                        for (Pullenti.Ner.Token tt = npt.EndToken.Next; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next)
                        {
                            if (tt.IsCharOf(",.") || tt.IsAnd)
                            {
                                continue;
                            }
                            Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                            if (npt1 != null)
                            {
                                if (npt1.Noun.IsValue("РЕЗУЛЬТАТ", "ДОСЛІДЖЕННЯ") || npt1.Noun.IsValue("РЕКОМЕНДАЦИЯ", "РЕКОМЕНДАЦІЯ") || npt1.Noun.IsValue("ИССЛЕДОВАНИЕ", "ДОСЛІДЖЕННЯ"))
                                {
                                    tt = npt1.EndToken;
                                    if (tt.EndChar > res.EndChar)
                                    {
                                        res.EndToken = tt;
                                        if (!tt.IsNewlineAfter)
                                        {
                                            for (; res.EndToken.Next != null; res.EndToken = res.EndToken.Next)
                                            {
                                                if (res.EndToken.IsNewlineAfter)
                                                {
                                                    break;
                                                }
                                            }
                                        }
                                    }
                                    continue;
                                }
                            }
                            ok = false;
                            break;
                        }
                        if (ok)
                        {
                            res.Typ         = BlkTyps.Conslusion;
                            res.IsExistName = true;
                        }
                    }
                    if (res.Typ == BlkTyps.Undefined && npt != null && npt.EndChar <= res.EndChar)
                    {
                        bool ok   = false;
                        int  publ = 0;
                        if (_isPub(npt))
                        {
                            ok   = true;
                            publ = 1;
                        }
                        else if ((npt.Noun.IsValue("СПИСОК", null) || npt.Noun.IsValue("УКАЗАТЕЛЬ", "ПОКАЖЧИК") || npt.Noun.IsValue("ПОЛОЖЕНИЕ", "ПОЛОЖЕННЯ")) || npt.Noun.IsValue("ВЫВОД", "ВИСНОВОК") || npt.Noun.IsValue("РЕЗУЛЬТАТ", "ДОСЛІДЖЕННЯ"))
                        {
                            if (npt.EndChar == res.EndChar)
                            {
                                return(null);
                            }
                            ok = true;
                        }
                        if (ok)
                        {
                            if (npt.BeginToken == npt.EndToken && npt.Noun.IsValue("СПИСОК", null) && npt.EndChar == res.EndChar)
                            {
                                ok = false;
                            }
                            for (Pullenti.Ner.Token tt = npt.EndToken.Next; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next)
                            {
                                if (tt.IsCharOf(",.:") || tt.IsAnd || tt.Morph.Class.IsPreposition)
                                {
                                    continue;
                                }
                                if (tt.IsValue("ОТРАЖЕНЫ", "ВІДОБРАЖЕНІ"))
                                {
                                    continue;
                                }
                                npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                                if (npt == null)
                                {
                                    ok = false;
                                    break;
                                }
                                if (((_isPub(npt) || npt.Noun.IsValue("РАБОТА", "РОБОТА") || npt.Noun.IsValue("ИССЛЕДОВАНИЕ", "ДОСЛІДЖЕННЯ")) || npt.Noun.IsValue("АВТОР", null) || npt.Noun.IsValue("ТРУД", "ПРАЦЯ")) || npt.Noun.IsValue("ТЕМА", null) || npt.Noun.IsValue("ДИССЕРТАЦИЯ", "ДИСЕРТАЦІЯ"))
                                {
                                    tt = npt.EndToken;
                                    if (_isPub(npt))
                                    {
                                        publ++;
                                    }
                                    if (tt.EndChar > res.EndChar)
                                    {
                                        res.EndToken = tt;
                                        if (!tt.IsNewlineAfter)
                                        {
                                            for (; res.EndToken.Next != null; res.EndToken = res.EndToken.Next)
                                            {
                                                if (res.EndToken.IsNewlineAfter)
                                                {
                                                    break;
                                                }
                                            }
                                        }
                                    }
                                    continue;
                                }
                                ok = false;
                                break;
                            }
                            if (ok)
                            {
                                res.Typ         = BlkTyps.Literature;
                                res.IsExistName = true;
                                if (publ == 0 && (res.EndChar < (((res.Kit.Sofa.Text.Length * 2) / 3))))
                                {
                                    if (res.NumberEnd != null)
                                    {
                                        res.Typ = BlkTyps.Misc;
                                    }
                                    else
                                    {
                                        res.Typ = BlkTyps.Undefined;
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(res);
        }
Exemple #8
0
        static TitleItemToken TryAttachSpeciality(Pullenti.Ner.Token t, bool keyWordBefore)
        {
            if (t == null)
            {
                return(null);
            }
            bool susp = false;

            if (!keyWordBefore)
            {
                if (!t.IsNewlineBefore)
                {
                    susp = true;
                }
            }
            StringBuilder val = null;

            Pullenti.Ner.Token t0 = t;
            int digCount          = 0;

            for (int i = 0; i < 3; i++)
            {
                Pullenti.Ner.NumberToken nt = t as Pullenti.Ner.NumberToken;
                if (nt == null)
                {
                    break;
                }
                if (nt.Typ != Pullenti.Ner.NumberSpellingType.Digit || nt.Morph.Class.IsAdjective)
                {
                    break;
                }
                if (val == null)
                {
                    val = new StringBuilder();
                }
                if (susp && t.LengthChar != 2)
                {
                    return(null);
                }
                string digs = nt.GetSourceText();
                digCount += digs.Length;
                val.Append(digs);
                if (t.Next == null)
                {
                    break;
                }
                t = t.Next;
                if (t.IsCharOf(".,") || t.IsHiphen)
                {
                    if (susp && (i < 2))
                    {
                        if (!t.IsChar('.') || t.IsWhitespaceAfter || t.IsWhitespaceBefore)
                        {
                            return(null);
                        }
                    }
                    if (t.Next != null)
                    {
                        t = t.Next;
                    }
                }
            }
            if (val == null || (digCount < 5))
            {
                return(null);
            }
            if (digCount != 6)
            {
                if (!keyWordBefore)
                {
                    return(null);
                }
            }
            else
            {
                val.Insert(4, '.');
                val.Insert(2, '.');
            }
            for (Pullenti.Ner.Token tt = t.Next; tt != null; tt = tt.Next)
            {
                if (tt.IsNewlineBefore)
                {
                    break;
                }
                Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(tt, Pullenti.Ner.Core.BracketParseAttr.No, 100);
                if (br != null)
                {
                    t = (tt = br.EndToken);
                    continue;
                }
                t = tt;
            }
            return(new TitleItemToken(t0, t, Types.Speciality)
            {
                Value = val.ToString()
            });
        }
Exemple #9
0
        public Pullenti.Ner.ReferentToken TryAttach(Pullenti.Ner.Token t, bool forOntology = false)
        {
            if (t == null)
            {
                return(null);
            }
            Pullenti.Ner.ReferentToken rt0 = this.TryAttachSpec(t);
            if (rt0 != null)
            {
                return(rt0);
            }
            if (t.Chars.IsAllLower)
            {
                if (!t.IsWhitespaceAfter && (t.Next is Pullenti.Ner.NumberToken))
                {
                    if (t.Previous == null || t.IsWhitespaceBefore || t.Previous.IsCharOf(",:"))
                    {
                    }
                    else
                    {
                        return(null);
                    }
                }
                else
                {
                    return(null);
                }
            }
            StringBuilder tmp = new StringBuilder();

            Pullenti.Ner.Token t1 = t;
            bool hiph             = false;
            bool ok    = true;
            int  nums  = 0;
            int  chars = 0;

            for (Pullenti.Ner.Token w = t1.Next; w != null; w = w.Next)
            {
                if (w.IsWhitespaceBefore && !forOntology)
                {
                    break;
                }
                if (w.IsCharOf("/\\_") || w.IsHiphen)
                {
                    hiph = true;
                    tmp.Append('-');
                    continue;
                }
                hiph = false;
                Pullenti.Ner.NumberToken nt = w as Pullenti.Ner.NumberToken;
                if (nt != null)
                {
                    if (nt.Typ != Pullenti.Ner.NumberSpellingType.Digit)
                    {
                        break;
                    }
                    t1 = nt;
                    tmp.Append(nt.GetSourceText());
                    nums++;
                    continue;
                }
                Pullenti.Ner.TextToken tt = w as Pullenti.Ner.TextToken;
                if (tt == null)
                {
                    break;
                }
                if (tt.LengthChar > 3)
                {
                    ok = false;
                    break;
                }
                if (!char.IsLetter(tt.Term[0]))
                {
                    if (tt.IsCharOf(",:") || Pullenti.Ner.Core.BracketHelper.CanBeEndOfSequence(tt, false, null, false))
                    {
                        break;
                    }
                    if (!tt.IsCharOf("+*&^#@!"))
                    {
                        ok = false;
                        break;
                    }
                    chars++;
                }
                t1 = tt;
                tmp.Append(tt.GetSourceText());
            }
            if (!forOntology)
            {
                if ((tmp.Length < 1) || !ok || hiph)
                {
                    return(null);
                }
                if (tmp.Length > 12)
                {
                    return(null);
                }
                char last = tmp[tmp.Length - 1];
                if (last == '!')
                {
                    return(null);
                }
                if ((nums + chars) == 0)
                {
                    return(null);
                }
                if (!this.CheckAttach(t, t1))
                {
                    return(null);
                }
            }
            DenominationReferent newDr = new DenominationReferent();

            newDr.AddValue(t, t1);
            return(new Pullenti.Ner.ReferentToken(newDr, t, t1));
        }
Exemple #10
0
        static Pullenti.Ner.ReferentToken _tryNounName(List <CityItemToken> li, out Pullenti.Ner.Core.IntOntologyItem oi, bool always)
        {
            oi = null;
            if (li == null || (li.Count < 2) || ((li[0].Typ != CityItemToken.ItemType.Noun && li[0].Typ != CityItemToken.ItemType.Misc)))
            {
                return(null);
            }
            bool ok = !li[0].Doubtful;

            if (ok && li[0].Typ == CityItemToken.ItemType.Misc)
            {
                ok = false;
            }
            string typ     = (li[0].Typ == CityItemToken.ItemType.Misc ? null : li[0].Value);
            string typ2    = (li[0].Typ == CityItemToken.ItemType.Misc ? null : li[0].AltValue);
            string probAdj = null;
            int    i1      = 1;

            Pullenti.Ner.Referent org = null;
            if ((typ != null && li[i1].Typ == CityItemToken.ItemType.Noun && ((i1 + 1) < li.Count)) && li[0].WhitespacesAfterCount <= 1 && (((Pullenti.Morph.LanguageHelper.EndsWith(typ, "ПОСЕЛОК") || Pullenti.Morph.LanguageHelper.EndsWith(typ, "СЕЛИЩЕ") || typ == "ДЕРЕВНЯ") || typ == "СЕЛО")))
            {
                if (li[i1].BeginToken == li[i1].EndToken)
                {
                    Pullenti.Ner.Address.Internal.AddressItemToken ooo = Pullenti.Ner.Address.Internal.AddressItemToken.TryAttachOrg(li[i1].BeginToken);
                    if (ooo != null && ooo.RefToken != null)
                    {
                        return(null);
                    }
                }
                typ2 = li[i1].Value;
                if (typ2 == "СТАНЦИЯ" && li[i1].BeginToken.IsValue("СТ", null) && ((i1 + 1) < li.Count))
                {
                    Pullenti.Ner.MorphCollection m = li[i1 + 1].Morph;
                    if (m.Number == Pullenti.Morph.MorphNumber.Plural)
                    {
                        probAdj = "СТАРЫЕ";
                    }
                    else if (m.Gender == Pullenti.Morph.MorphGender.Feminie)
                    {
                        probAdj = "СТАРАЯ";
                    }
                    else if (m.Gender == Pullenti.Morph.MorphGender.Masculine)
                    {
                        probAdj = "СТАРЫЙ";
                    }
                    else
                    {
                        probAdj = "СТАРОЕ";
                    }
                }
                i1++;
            }
            string name    = li[i1].Value ?? ((li[i1].OntoItem == null ? null : li[i1].OntoItem.CanonicText));
            string altName = li[i1].AltValue;

            if (name == null)
            {
                return(null);
            }
            Pullenti.Ner.MorphCollection mc = li[0].Morph;
            if (i1 == 1 && li[i1].Typ == CityItemToken.ItemType.City && ((li[0].Value == "ГОРОД" || li[0].Value == "МІСТО" || li[0].Typ == CityItemToken.ItemType.Misc)))
            {
                if (typ == null && ((i1 + 1) < li.Count) && li[i1 + 1].Typ == CityItemToken.ItemType.Noun)
                {
                    return(null);
                }
                oi = li[i1].OntoItem;
                if (oi != null)
                {
                    name = oi.CanonicText;
                }
                if (name.Length > 2 || oi.MiscAttr != null)
                {
                    if (!li[1].Doubtful || ((oi != null && oi.MiscAttr != null)))
                    {
                        ok = true;
                    }
                    else if (!ok && !li[1].IsNewlineBefore)
                    {
                        if (li[0].GeoObjectBefore || li[1].GeoObjectAfter)
                        {
                            ok = true;
                        }
                        else if (Pullenti.Ner.Address.Internal.StreetDefineHelper.CheckStreetAfter(li[1].EndToken.Next))
                        {
                            ok = true;
                        }
                        else if (li[1].EndToken.Next != null && (li[1].EndToken.Next.GetReferent() is Pullenti.Ner.Date.DateReferent))
                        {
                            ok = true;
                        }
                        else if ((li[1].WhitespacesBeforeCount < 2) && li[1].OntoItem != null)
                        {
                            if (li[1].IsNewlineAfter)
                            {
                                ok = true;
                            }
                            else
                            {
                                ok = true;
                            }
                        }
                    }
                    if (li[1].Doubtful && li[1].EndToken.Next != null && li[1].EndToken.Chars == li[1].EndToken.Next.Chars)
                    {
                        ok = false;
                    }
                    if (li[0].BeginToken.Previous != null && li[0].BeginToken.Previous.IsValue("В", null))
                    {
                        ok = true;
                    }
                }
                if (!ok)
                {
                    ok = CheckYearAfter(li[1].EndToken.Next);
                }
                if (!ok)
                {
                    ok = CheckCityAfter(li[1].EndToken.Next);
                }
            }
            else if ((li[i1].Typ == CityItemToken.ItemType.ProperName || li[i1].Typ == CityItemToken.ItemType.City))
            {
                if (((li[0].Value == "АДМИНИСТРАЦИЯ" || li[0].Value == "АДМІНІСТРАЦІЯ")) && i1 == 1)
                {
                    return(null);
                }
                if (li[i1].IsNewlineBefore)
                {
                    if (li.Count != 2)
                    {
                        return(null);
                    }
                }
                if (!li[0].Doubtful)
                {
                    ok = true;
                    if (name.Length < 2)
                    {
                        ok = false;
                    }
                    else if ((name.Length < 3) && li[0].Morph.Number != Pullenti.Morph.MorphNumber.Singular)
                    {
                        ok = false;
                    }
                    if (li[i1].Doubtful && !li[i1].GeoObjectAfter && !li[0].GeoObjectBefore)
                    {
                        if (li[i1].Morph.Case.IsGenitive)
                        {
                            if (li[i1].EndToken.Next == null || MiscLocationHelper.CheckGeoObjectAfter(li[i1].EndToken.Next, false) || Pullenti.Ner.Address.Internal.AddressItemToken.CheckHouseAfter(li[i1].EndToken.Next, false, true))
                            {
                            }
                            else if (li[0].BeginToken.Previous == null || MiscLocationHelper.CheckGeoObjectBefore(li[0].BeginToken))
                            {
                            }
                            else
                            {
                                ok = false;
                            }
                        }
                        if (ok)
                        {
                            Pullenti.Ner.ReferentToken rt0 = li[i1].Kit.ProcessReferent("PERSONPROPERTY", li[0].BeginToken.Previous);
                            if (rt0 != null)
                            {
                                Pullenti.Ner.ReferentToken rt1 = li[i1].Kit.ProcessReferent("PERSON", li[i1].BeginToken);
                                if (rt1 != null)
                                {
                                    ok = false;
                                }
                            }
                        }
                    }
                    Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(li[i1].BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                    if (npt != null)
                    {
                        if (npt.EndToken.EndChar > li[i1].EndChar && npt.Adjectives.Count > 0 && !npt.Adjectives[0].EndToken.Next.IsComma)
                        {
                            ok = false;
                        }
                        else if (TerrItemToken.m_UnknownRegions.TryParse(npt.EndToken, Pullenti.Ner.Core.TerminParseAttr.FullwordsOnly) != null)
                        {
                            bool ok1 = false;
                            if (li[0].BeginToken.Previous != null)
                            {
                                Pullenti.Ner.Token ttt = li[0].BeginToken.Previous;
                                if (ttt.IsComma && ttt.Previous != null)
                                {
                                    ttt = ttt.Previous;
                                }
                                Pullenti.Ner.Geo.GeoReferent geo = ttt.GetReferent() as Pullenti.Ner.Geo.GeoReferent;
                                if (geo != null && !geo.IsCity)
                                {
                                    ok1 = true;
                                }
                            }
                            if (npt.EndToken.Next != null)
                            {
                                Pullenti.Ner.Token ttt = npt.EndToken.Next;
                                if (ttt.IsComma && ttt.Next != null)
                                {
                                    ttt = ttt.Next;
                                }
                                Pullenti.Ner.Geo.GeoReferent geo = ttt.GetReferent() as Pullenti.Ner.Geo.GeoReferent;
                                if (geo != null && !geo.IsCity)
                                {
                                    ok1 = true;
                                }
                            }
                            if (!ok1)
                            {
                                return(null);
                            }
                        }
                    }
                    if (li[0].Value == "ПОРТ")
                    {
                        if (li[i1].Chars.IsAllUpper || li[i1].Chars.IsLatinLetter)
                        {
                            return(null);
                        }
                    }
                }
                else if (li[0].GeoObjectBefore)
                {
                    ok = true;
                }
                else if (li[i1].GeoObjectAfter && !li[i1].IsNewlineAfter)
                {
                    ok = true;
                }
                else
                {
                    ok = CheckYearAfter(li[i1].EndToken.Next);
                }
                if (!ok)
                {
                    ok = CheckStreetAfter(li[i1].EndToken.Next);
                }
                if (!ok && li[0].BeginToken.Previous != null && li[0].BeginToken.Previous.IsValue("В", null))
                {
                    ok = true;
                }
            }
            else
            {
                return(null);
            }
            if (!ok && !always)
            {
                if (MiscLocationHelper.CheckNearBefore(li[0].BeginToken.Previous) == null)
                {
                    return(null);
                }
            }
            if (li.Count > (i1 + 1))
            {
                li.RemoveRange(i1 + 1, li.Count - i1 - 1);
            }
            Pullenti.Ner.Geo.GeoReferent city = new Pullenti.Ner.Geo.GeoReferent();
            if (oi != null && oi.Referent != null)
            {
                city = oi.Referent.Clone() as Pullenti.Ner.Geo.GeoReferent;
                city.Occurrence.Clear();
            }
            if (!li[0].Morph.Case.IsUndefined && li[0].Morph.Gender != Pullenti.Morph.MorphGender.Undefined)
            {
                if (li[i1].EndToken.Morph.Class.IsAdjective && li[i1].BeginToken == li[i1].EndToken)
                {
                    string nam = Pullenti.Ner.Core.ProperNameHelper.GetNameEx(li[i1].BeginToken, li[i1].EndToken, Pullenti.Morph.MorphClass.Adjective, li[0].Morph.Case, li[0].Morph.Gender, false, false);
                    if (nam != null && nam != name)
                    {
                        name = nam;
                    }
                }
            }
            if (li[0].Morph.Case.IsNominative)
            {
                if (altName != null)
                {
                    city.AddName(altName);
                }
                altName = null;
            }
            city.AddName(name);
            if (probAdj != null)
            {
                city.AddName(probAdj + " " + name);
            }
            if (altName != null)
            {
                city.AddName(altName);
                if (probAdj != null)
                {
                    city.AddName(probAdj + " " + altName);
                }
            }
            if (typ != null)
            {
                city.AddTyp(typ);
            }
            else if (!city.IsCity)
            {
                city.AddTypCity(li[0].Kit.BaseLanguage);
            }
            if (typ2 != null)
            {
                city.AddTyp(typ2.ToLower());
            }
            if (li[0].HigherGeo != null && GeoOwnerHelper.CanBeHigher(li[0].HigherGeo, city))
            {
                city.Higher = li[0].HigherGeo;
            }
            if (li[0].Typ == CityItemToken.ItemType.Misc)
            {
                li.RemoveAt(0);
            }
            Pullenti.Ner.ReferentToken res = new Pullenti.Ner.ReferentToken(city, li[0].BeginToken, li[li.Count - 1].EndToken)
            {
                Morph = mc
            };
            if (res.EndToken.Next != null && res.EndToken.Next.IsHiphen && (res.EndToken.Next.Next is Pullenti.Ner.NumberToken))
            {
                Pullenti.Ner.NumberToken num = res.EndToken.Next.Next as Pullenti.Ner.NumberToken;
                if ((num.Typ == Pullenti.Ner.NumberSpellingType.Digit && !num.Morph.Class.IsAdjective && num.IntValue != null) && (num.IntValue.Value < 50))
                {
                    foreach (Pullenti.Ner.Slot s in city.Slots)
                    {
                        if (s.TypeName == Pullenti.Ner.Geo.GeoReferent.ATTR_NAME)
                        {
                            city.UploadSlot(s, string.Format("{0}-{1}", s.Value, num.Value));
                        }
                    }
                    res.EndToken = num;
                }
            }
            if (li[0].BeginToken == li[0].EndToken && li[0].BeginToken.IsValue("ГОРОДОК", null))
            {
                if (Pullenti.Ner.Address.Internal.AddressItemToken.CheckHouseAfter(res.EndToken.Next, true, false))
                {
                    return(null);
                }
            }
            return(res);
        }
Exemple #11
0
 public static Pullenti.Semantic.SemObject CreateNounGroup(Pullenti.Semantic.SemGraph gr, Pullenti.Ner.Core.NounPhraseToken npt)
 {
     Pullenti.Ner.Token          noun = npt.Noun.BeginToken;
     Pullenti.Semantic.SemObject sem  = new Pullenti.Semantic.SemObject(gr);
     sem.Tokens.Add(npt.Noun);
     sem.Typ = Pullenti.Semantic.SemObjectType.Noun;
     if (npt.Noun.Morph.Class.IsPersonalPronoun)
     {
         sem.Typ = Pullenti.Semantic.SemObjectType.PersonalPronoun;
     }
     else if (npt.Noun.Morph.Class.IsPronoun)
     {
         sem.Typ = Pullenti.Semantic.SemObjectType.Pronoun;
     }
     if (npt.Noun.BeginToken != npt.Noun.EndToken)
     {
         sem.Morph.NormalCase = npt.Noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false);
         sem.Morph.NormalFull = npt.Noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false);
         sem.Morph.Class      = Pullenti.Morph.MorphClass.Noun;
         sem.Morph.Number     = npt.Morph.Number;
         sem.Morph.Gender     = npt.Morph.Gender;
         sem.Morph.Case       = npt.Morph.Case;
     }
     else if (noun is Pullenti.Ner.TextToken)
     {
         foreach (Pullenti.Morph.MorphBaseInfo wf in noun.Morph.Items)
         {
             if (wf.CheckAccord(npt.Morph, false, false) && (wf is Pullenti.Morph.MorphWordForm))
             {
                 _setMorph(sem, wf as Pullenti.Morph.MorphWordForm);
                 break;
             }
         }
         if (sem.Morph.NormalCase == null)
         {
             sem.Morph.NormalCase = noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false);
             sem.Morph.NormalFull = noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false);
         }
         List <Pullenti.Semantic.Utils.DerivateGroup> grs = Pullenti.Semantic.Utils.DerivateService.FindDerivates(sem.Morph.NormalFull, true, null);
         if (grs != null && grs.Count > 0)
         {
             sem.Concept = grs[0];
         }
     }
     else if (noun is Pullenti.Ner.ReferentToken)
     {
         Pullenti.Ner.Referent r = (noun as Pullenti.Ner.ReferentToken).Referent;
         if (r == null)
         {
             return(null);
         }
         sem.Morph.NormalFull = (sem.Morph.NormalCase = r.ToString());
         sem.Concept          = r;
     }
     else if (noun is Pullenti.Ner.NumberToken)
     {
         Pullenti.Ner.NumberToken num = noun as Pullenti.Ner.NumberToken;
         sem.Morph.Gender = noun.Morph.Gender;
         sem.Morph.Number = noun.Morph.Number;
         if (num.IntValue != null)
         {
             sem.Morph.NormalCase = Pullenti.Ner.Core.NumberHelper.GetNumberAdjective(num.IntValue.Value, noun.Morph.Gender, noun.Morph.Number);
             sem.Morph.NormalFull = Pullenti.Ner.Core.NumberHelper.GetNumberAdjective(num.IntValue.Value, Pullenti.Morph.MorphGender.Masculine, Pullenti.Morph.MorphNumber.Singular);
         }
         else
         {
             sem.Morph.NormalFull = (sem.Morph.NormalCase = noun.GetSourceText().ToUpper());
         }
     }
     noun.Tag = sem;
     if (npt.Adjectives.Count > 0)
     {
         foreach (Pullenti.Ner.MetaToken a in npt.Adjectives)
         {
             if (npt.MultiNouns && a != npt.Adjectives[0])
             {
                 break;
             }
             Pullenti.Semantic.SemObject asem = CreateNptAdj(gr, npt, a);
             if (asem != null)
             {
                 gr.AddLink(Pullenti.Semantic.SemLinkType.Detail, sem, asem, "какой", false, null);
             }
         }
     }
     if (npt.InternalNoun != null)
     {
         Pullenti.Semantic.SemObject intsem = CreateNounGroup(gr, npt.InternalNoun);
         if (intsem != null)
         {
             gr.AddLink(Pullenti.Semantic.SemLinkType.Detail, sem, intsem, null, false, null);
         }
     }
     gr.Objects.Add(sem);
     return(sem);
 }
Exemple #12
0
        public AnalysisKit(Pullenti.Ner.SourceOfAnalysis sofa = null, bool onlyTokenizing = false, Pullenti.Morph.MorphLang lang = null, ProgressChangedEventHandler progress = null)
        {
            if (sofa == null)
            {
                return;
            }
            m_Sofa    = sofa;
            StartDate = DateTime.Now;
            List <Pullenti.Morph.MorphToken> tokens = Pullenti.Morph.MorphologyService.Process(sofa.Text, lang, progress);

            Pullenti.Ner.Token t0 = null;
            if (tokens != null)
            {
                for (int ii = 0; ii < tokens.Count; ii++)
                {
                    Pullenti.Morph.MorphToken mt = tokens[ii];
                    if (mt.BeginChar == 733860)
                    {
                    }
                    Pullenti.Ner.TextToken tt = new Pullenti.Ner.TextToken(mt, this);
                    if (sofa.CorrectionDict != null)
                    {
                        string corw;
                        if (sofa.CorrectionDict.TryGetValue(mt.Term, out corw))
                        {
                            List <Pullenti.Morph.MorphToken> ccc = Pullenti.Morph.MorphologyService.Process(corw, lang, null);
                            if (ccc != null && ccc.Count == 1)
                            {
                                Pullenti.Ner.TextToken tt1 = new Pullenti.Ner.TextToken(ccc[0], this, tt.BeginChar, tt.EndChar)
                                {
                                    Term0 = tt.Term
                                };
                                tt1.Chars = tt.Chars;
                                tt        = tt1;
                                if (CorrectedTokens == null)
                                {
                                    CorrectedTokens = new Dictionary <Pullenti.Ner.Token, string>();
                                }
                                CorrectedTokens.Add(tt, tt.GetSourceText());
                            }
                        }
                    }
                    if (t0 == null)
                    {
                        FirstToken = tt;
                    }
                    else
                    {
                        t0.Next = tt;
                    }
                    t0 = tt;
                }
            }
            if (sofa.ClearDust)
            {
                this.ClearDust();
            }
            if (sofa.DoWordsMergingByMorph)
            {
                this.CorrectWordsByMerging(lang);
            }
            if (sofa.DoWordCorrectionByMorph)
            {
                this.CorrectWordsByMorph(lang);
            }
            this.MergeLetters();
            this.DefineBaseLanguage();
            if (sofa.CreateNumberTokens)
            {
                for (Pullenti.Ner.Token t = FirstToken; t != null; t = t.Next)
                {
                    Pullenti.Ner.NumberToken nt = NumberHelper.TryParseNumber(t);
                    if (nt == null)
                    {
                        continue;
                    }
                    this.EmbedToken(nt);
                    t = nt;
                }
            }
            if (onlyTokenizing)
            {
                return;
            }
            for (Pullenti.Ner.Token t = FirstToken; t != null; t = t.Next)
            {
                if (t.Morph.Class.IsPreposition)
                {
                    continue;
                }
                Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
                if (mc.IsUndefined && t.Chars.IsCyrillicLetter && t.LengthChar > 4)
                {
                    string             tail = sofa.Text.Substring(t.EndChar - 1, 2);
                    Pullenti.Ner.Token tte  = null;
                    Pullenti.Ner.Token tt   = t.Previous;
                    if (tt != null && ((tt.IsCommaAnd || tt.Morph.Class.IsPreposition || tt.Morph.Class.IsConjunction)))
                    {
                        tt = tt.Previous;
                    }
                    if ((tt != null && !tt.GetMorphClassInDictionary().IsUndefined&& ((tt.Morph.Class.Value & t.Morph.Class.Value)) != 0) && tt.LengthChar > 4)
                    {
                        string tail2 = sofa.Text.Substring(tt.EndChar - 1, 2);
                        if (tail2 == tail)
                        {
                            tte = tt;
                        }
                    }
                    if (tte == null)
                    {
                        tt = t.Next;
                        if (tt != null && ((tt.IsCommaAnd || tt.Morph.Class.IsPreposition || tt.Morph.Class.IsConjunction)))
                        {
                            tt = tt.Next;
                        }
                        if ((tt != null && !tt.GetMorphClassInDictionary().IsUndefined&& ((tt.Morph.Class.Value & t.Morph.Class.Value)) != 0) && tt.LengthChar > 4)
                        {
                            string tail2 = sofa.Text.Substring(tt.EndChar - 1, 2);
                            if (tail2 == tail)
                            {
                                tte = tt;
                            }
                        }
                    }
                    if (tte != null)
                    {
                        t.Morph.RemoveItemsEx(tte.Morph, tte.GetMorphClassInDictionary());
                    }
                }
                continue;
            }
            this.CreateStatistics();
        }
Exemple #13
0
        public static DefinitionWithNumericToken TryParse(Pullenti.Ner.Token t)
        {
            if (!Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t))
            {
                return(null);
            }
            Pullenti.Ner.Token tt = t;
            Pullenti.Ner.Core.NounPhraseToken noun = null;
            Pullenti.Ner.NumberToken          num  = null;
            for (; tt != null; tt = tt.Next)
            {
                if (tt != t && Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(tt))
                {
                    return(null);
                }
                if (!(tt is Pullenti.Ner.NumberToken))
                {
                    continue;
                }
                if (tt.WhitespacesAfterCount > 2 || tt == t)
                {
                    continue;
                }
                if (tt.Morph.Class.IsAdjective)
                {
                    continue;
                }
                Pullenti.Ner.Core.NounPhraseToken nn = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                if (nn == null)
                {
                    continue;
                }
                num  = tt as Pullenti.Ner.NumberToken;
                noun = nn;
                break;
            }
            if (num == null || num.IntValue == null)
            {
                return(null);
            }
            DefinitionWithNumericToken res = new DefinitionWithNumericToken(t, noun.EndToken);

            res.Number          = num.IntValue.Value;
            res.NumberBeginChar = num.BeginChar;
            res.NumberEndChar   = num.EndChar;
            res.Noun            = noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false);
            res.NounsGenetive   = noun.GetMorphVariant(Pullenti.Morph.MorphCase.Genitive, true) ?? res.Noun;
            res.Text            = Pullenti.Ner.Core.MiscHelper.GetTextValue(t, num.Previous, Pullenti.Ner.Core.GetTextAttr.KeepQuotes | Pullenti.Ner.Core.GetTextAttr.KeepRegister);
            if (num.IsWhitespaceBefore)
            {
                res.Text += " ";
            }
            res.NumberSubstring = Pullenti.Ner.Core.MiscHelper.GetTextValue(num, noun.EndToken, Pullenti.Ner.Core.GetTextAttr.KeepQuotes | Pullenti.Ner.Core.GetTextAttr.KeepRegister);
            res.Text           += res.NumberSubstring;
            for (tt = noun.EndToken; tt != null; tt = tt.Next)
            {
                if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(tt))
                {
                    break;
                }
                res.EndToken = tt;
            }
            if (res.EndToken != noun.EndToken)
            {
                if (noun.IsWhitespaceAfter)
                {
                    res.Text += " ";
                }
                res.Text += Pullenti.Ner.Core.MiscHelper.GetTextValue(noun.EndToken.Next, res.EndToken, Pullenti.Ner.Core.GetTextAttr.KeepQuotes | Pullenti.Ner.Core.GetTextAttr.KeepRegister);
            }
            return(res);
        }
Exemple #14
0
        public static Pullenti.Ner.Core.NumberExToken TryParseNumberWithPostfix(Pullenti.Ner.Token t)
        {
            if (t == null)
            {
                return(null);
            }
            Pullenti.Ner.Token t0       = t;
            string             isDollar = null;

            if (t.LengthChar == 1 && t.Next != null)
            {
                if ((((isDollar = Pullenti.Ner.Core.NumberHelper.IsMoneyChar(t)))) != null)
                {
                    t = t.Next;
                }
            }
            Pullenti.Ner.NumberToken nt = t as Pullenti.Ner.NumberToken;
            if (nt == null)
            {
                if ((!(t.Previous is Pullenti.Ner.NumberToken) && t.IsChar('(') && (t.Next is Pullenti.Ner.NumberToken)) && t.Next.Next != null && t.Next.Next.IsChar(')'))
                {
                    Pullenti.Ner.Core.TerminToken toks1 = m_Postfixes.TryParse(t.Next.Next.Next, Pullenti.Ner.Core.TerminParseAttr.No);
                    if (toks1 != null && ((Pullenti.Ner.Core.NumberExType)toks1.Termin.Tag) == Pullenti.Ner.Core.NumberExType.Money)
                    {
                        Pullenti.Ner.NumberToken        nt0 = t.Next as Pullenti.Ner.NumberToken;
                        Pullenti.Ner.Core.NumberExToken res = new Pullenti.Ner.Core.NumberExToken(t, toks1.EndToken, nt0.Value, nt0.Typ, Pullenti.Ner.Core.NumberExType.Money)
                        {
                            AltRealValue = nt0.RealValue, Morph = toks1.BeginToken.Morph
                        };
                        return(_correctMoney(res, toks1.BeginToken));
                    }
                }
                Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
                if (tt == null || !tt.Morph.Class.IsAdjective)
                {
                    return(null);
                }
                string val = tt.Term;
                for (int i = 4; i < (val.Length - 5); i++)
                {
                    string v = val.Substring(0, i);
                    List <Pullenti.Ner.Core.Termin> li = Pullenti.Ner.Core.NumberHelper.m_Nums.FindTerminsByString(v, tt.Morph.Language);
                    if (li == null)
                    {
                        continue;
                    }
                    string vv = val.Substring(i);
                    List <Pullenti.Ner.Core.Termin> lii = m_Postfixes.FindTerminsByString(vv, tt.Morph.Language);
                    if (lii != null && lii.Count > 0)
                    {
                        Pullenti.Ner.Core.NumberExToken re = new Pullenti.Ner.Core.NumberExToken(t, t, ((int)li[0].Tag).ToString(), Pullenti.Ner.NumberSpellingType.Words, (Pullenti.Ner.Core.NumberExType)lii[0].Tag)
                        {
                            Morph = t.Morph
                        };
                        _correctExtTypes(re);
                        return(re);
                    }
                    break;
                }
                return(null);
            }
            if (t.Next == null && isDollar == null)
            {
                return(null);
            }
            double f = nt.RealValue;

            if (double.IsNaN(f))
            {
                return(null);
            }
            Pullenti.Ner.Token t1 = nt.Next;
            if (((t1 != null && t1.IsCharOf(",."))) || (((t1 is Pullenti.Ner.NumberToken) && (t1.WhitespacesBeforeCount < 3))))
            {
                double d;
                Pullenti.Ner.NumberToken tt11 = Pullenti.Ner.Core.NumberHelper.TryParseRealNumber(nt, false, false);
                if (tt11 != null)
                {
                    t1 = tt11.EndToken.Next;
                    f  = tt11.RealValue;
                }
            }
            if (t1 == null)
            {
                if (isDollar == null)
                {
                    return(null);
                }
            }
            else if ((t1.Next != null && t1.Next.IsValue("С", "З") && t1.Next.Next != null) && t1.Next.Next.IsValue("ПОЛОВИНА", null))
            {
                f += 0.5;
                t1 = t1.Next.Next;
            }
            if (t1 != null && t1.IsHiphen && t1.Next != null)
            {
                t1 = t1.Next;
            }
            bool   det  = false;
            double altf = f;

            if (((t1 is Pullenti.Ner.NumberToken) && t1.Previous != null && t1.Previous.IsHiphen) && (t1 as Pullenti.Ner.NumberToken).IntValue == 0 && t1.LengthChar == 2)
            {
                t1 = t1.Next;
            }
            if ((t1 != null && t1.Next != null && t1.IsChar('(')) && (((t1.Next is Pullenti.Ner.NumberToken) || t1.Next.IsValue("НОЛЬ", null))) && t1.Next.Next != null)
            {
                Pullenti.Ner.NumberToken nt1 = t1.Next as Pullenti.Ner.NumberToken;
                double val = (double)0;
                if (nt1 != null)
                {
                    val = nt1.RealValue;
                }
                if (Math.Floor(f) == Math.Floor(val))
                {
                    Pullenti.Ner.Token ttt = t1.Next.Next;
                    if (ttt.IsChar(')'))
                    {
                        t1  = ttt.Next;
                        det = true;
                        if ((t1 is Pullenti.Ner.NumberToken) && (t1 as Pullenti.Ner.NumberToken).IntValue != null && (t1 as Pullenti.Ner.NumberToken).IntValue.Value == 0)
                        {
                            t1 = t1.Next;
                        }
                    }
                    else if (((((ttt is Pullenti.Ner.NumberToken) && ((ttt as Pullenti.Ner.NumberToken).RealValue < 100) && ttt.Next != null) && ttt.Next.IsChar('/') && ttt.Next.Next != null) && ttt.Next.Next.GetSourceText() == "100" && ttt.Next.Next.Next != null) && ttt.Next.Next.Next.IsChar(')'))
                    {
                        int rest = GetDecimalRest100(f);
                        if ((ttt as Pullenti.Ner.NumberToken).IntValue != null && rest == (ttt as Pullenti.Ner.NumberToken).IntValue.Value)
                        {
                            t1  = ttt.Next.Next.Next.Next;
                            det = true;
                        }
                    }
                    else if ((ttt.IsValue("ЦЕЛЫХ", null) && (ttt.Next is Pullenti.Ner.NumberToken) && ttt.Next.Next != null) && ttt.Next.Next.Next != null && ttt.Next.Next.Next.IsChar(')'))
                    {
                        Pullenti.Ner.NumberToken num2 = ttt.Next as Pullenti.Ner.NumberToken;
                        altf = num2.RealValue;
                        if (ttt.Next.Next.IsValue("ДЕСЯТЫЙ", null))
                        {
                            altf /= 10;
                        }
                        else if (ttt.Next.Next.IsValue("СОТЫЙ", null))
                        {
                            altf /= 100;
                        }
                        else if (ttt.Next.Next.IsValue("ТЫСЯЧНЫЙ", null))
                        {
                            altf /= 1000;
                        }
                        else if (ttt.Next.Next.IsValue("ДЕСЯТИТЫСЯЧНЫЙ", null))
                        {
                            altf /= 10000;
                        }
                        else if (ttt.Next.Next.IsValue("СТОТЫСЯЧНЫЙ", null))
                        {
                            altf /= 100000;
                        }
                        else if (ttt.Next.Next.IsValue("МИЛЛИОННЫЙ", null))
                        {
                            altf /= 1000000;
                        }
                        if (altf < 1)
                        {
                            altf += val;
                            t1    = ttt.Next.Next.Next.Next;
                            det   = true;
                        }
                    }
                    else
                    {
                        Pullenti.Ner.Core.TerminToken toks1 = m_Postfixes.TryParse(ttt, Pullenti.Ner.Core.TerminParseAttr.No);
                        if (toks1 != null)
                        {
                            if (((Pullenti.Ner.Core.NumberExType)toks1.Termin.Tag) == Pullenti.Ner.Core.NumberExType.Money)
                            {
                                if (toks1.EndToken.Next != null && toks1.EndToken.Next.IsChar(')'))
                                {
                                    Pullenti.Ner.Core.NumberExToken res = new Pullenti.Ner.Core.NumberExToken(t, toks1.EndToken.Next, nt.Value, nt.Typ, Pullenti.Ner.Core.NumberExType.Money)
                                    {
                                        RealValue = f, AltRealValue = altf, Morph = toks1.BeginToken.Morph
                                    };
                                    return(_correctMoney(res, toks1.BeginToken));
                                }
                            }
                        }
                        Pullenti.Ner.Core.NumberExToken res2 = TryParseNumberWithPostfix(t1.Next);
                        if (res2 != null && res2.EndToken.Next != null && res2.EndToken.Next.IsChar(')'))
                        {
                            res2.BeginToken   = t;
                            res2.EndToken     = res2.EndToken.Next;
                            res2.AltRealValue = res2.RealValue;
                            res2.RealValue    = f;
                            _correctExtTypes(res2);
                            if (res2.WhitespacesAfterCount < 2)
                            {
                                Pullenti.Ner.Core.TerminToken toks2 = m_Postfixes.TryParse(res2.EndToken.Next, Pullenti.Ner.Core.TerminParseAttr.No);
                                if (toks2 != null)
                                {
                                    if (((Pullenti.Ner.Core.NumberExType)toks2.Termin.Tag) == Pullenti.Ner.Core.NumberExType.Money)
                                    {
                                        res2.EndToken = toks2.EndToken;
                                    }
                                }
                            }
                            return(res2);
                        }
                    }
                }
                else if (nt1 != null && nt1.Typ == Pullenti.Ner.NumberSpellingType.Words && nt.Typ == Pullenti.Ner.NumberSpellingType.Digit)
                {
                    altf = nt1.RealValue;
                    Pullenti.Ner.Token ttt = t1.Next.Next;
                    if (ttt.IsChar(')'))
                    {
                        t1  = ttt.Next;
                        det = true;
                    }
                    if (!det)
                    {
                        altf = f;
                    }
                }
            }
            if ((t1 != null && t1.IsChar('(') && t1.Next != null) && t1.Next.IsValue("СУММА", null))
            {
                Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t1, Pullenti.Ner.Core.BracketParseAttr.No, 100);
                if (br != null)
                {
                    t1 = br.EndToken.Next;
                }
            }
            if (isDollar != null)
            {
                Pullenti.Ner.Token te = null;
                if (t1 != null)
                {
                    te = t1.Previous;
                }
                else
                {
                    for (t1 = t0; t1 != null; t1 = t1.Next)
                    {
                        if (t1.Next == null)
                        {
                            te = t1;
                        }
                    }
                }
                if (te == null)
                {
                    return(null);
                }
                if (te.IsHiphen && te.Next != null)
                {
                    if (te.Next.IsValue("МИЛЛИОННЫЙ", null))
                    {
                        f    *= 1000000;
                        altf *= 1000000;
                        te    = te.Next;
                    }
                    else if (te.Next.IsValue("МИЛЛИАРДНЫЙ", null))
                    {
                        f    *= 1000000000;
                        altf *= 1000000000;
                        te    = te.Next;
                    }
                }
                if (!te.IsWhitespaceAfter && (te.Next is Pullenti.Ner.TextToken))
                {
                    if (te.Next.IsValue("M", null))
                    {
                        f    *= 1000000;
                        altf *= 1000000;
                        te    = te.Next;
                    }
                    else if (te.Next.IsValue("BN", null))
                    {
                        f    *= 1000000000;
                        altf *= 1000000000;
                        te    = te.Next;
                    }
                }
                return(new Pullenti.Ner.Core.NumberExToken(t0, te, "", nt.Typ, Pullenti.Ner.Core.NumberExType.Money)
                {
                    RealValue = f, AltRealValue = altf, ExTypParam = isDollar
                });
            }
            if (t1 == null || ((t1.IsNewlineBefore && !det)))
            {
                return(null);
            }
            Pullenti.Ner.Core.TerminToken toks = m_Postfixes.TryParse(t1, Pullenti.Ner.Core.TerminParseAttr.No);
            if ((toks == null && det && (t1 is Pullenti.Ner.NumberToken)) && (t1 as Pullenti.Ner.NumberToken).Value == "0")
            {
                toks = m_Postfixes.TryParse(t1.Next, Pullenti.Ner.Core.TerminParseAttr.No);
            }
            if (toks == null && t1.IsChar('р'))
            {
                int cou = 10;
                for (Pullenti.Ner.Token ttt = t0.Previous; ttt != null && cou > 0; ttt = ttt.Previous, cou--)
                {
                    if (ttt.IsValue("СУММА", null) || ttt.IsValue("НАЛИЧНЫЙ", null) || ttt.IsValue("БАЛАНС", null))
                    {
                    }
                    else if (ttt.GetReferent() != null && ttt.GetReferent().TypeName == "MONEY")
                    {
                    }
                    else
                    {
                        continue;
                    }
                    toks = new Pullenti.Ner.Core.TerminToken(t1, t1)
                    {
                        Termin = m_Postfixes.FindTerminsByCanonicText("RUB")[0]
                    };
                    if (t1.Next != null && t1.Next.IsChar('.'))
                    {
                        toks.EndToken = t1.Next;
                    }
                    Pullenti.Ner.Core.NumberExType ty = (Pullenti.Ner.Core.NumberExType)toks.Termin.Tag;
                    return(new Pullenti.Ner.Core.NumberExToken(t, toks.EndToken, nt.Value, nt.Typ, ty)
                    {
                        RealValue = f, AltRealValue = altf, Morph = toks.BeginToken.Morph, ExTypParam = "RUB"
                    });
                }
            }
            if (toks != null)
            {
                t1 = toks.EndToken;
                if (!t1.IsChar('.') && t1.Next != null && t1.Next.IsChar('.'))
                {
                    if ((t1 is Pullenti.Ner.TextToken) && t1.IsValue(toks.Termin.Terms[0].CanonicalText, null))
                    {
                    }
                    else if (!t1.Chars.IsLetter)
                    {
                    }
                    else
                    {
                        t1 = t1.Next;
                    }
                }
                if (toks.Termin.CanonicText == "LTL")
                {
                    return(null);
                }
                if (toks.BeginToken == t1)
                {
                    if (t1.Morph.Class.IsPreposition || t1.Morph.Class.IsConjunction)
                    {
                        if (t1.IsWhitespaceBefore && t1.IsWhitespaceAfter)
                        {
                            return(null);
                        }
                    }
                }
                Pullenti.Ner.Core.NumberExType  ty  = (Pullenti.Ner.Core.NumberExType)toks.Termin.Tag;
                Pullenti.Ner.Core.NumberExToken res = new Pullenti.Ner.Core.NumberExToken(t, t1, nt.Value, nt.Typ, ty)
                {
                    RealValue = f, AltRealValue = altf, Morph = toks.BeginToken.Morph
                };
                if (ty != Pullenti.Ner.Core.NumberExType.Money)
                {
                    _correctExtTypes(res);
                    return(res);
                }
                return(_correctMoney(res, toks.BeginToken));
            }
            Pullenti.Ner.Core.NumberExToken pfx = _attachSpecPostfix(t1);
            if (pfx != null)
            {
                pfx.BeginToken   = t;
                pfx.Value        = nt.Value;
                pfx.Typ          = nt.Typ;
                pfx.RealValue    = f;
                pfx.AltRealValue = altf;
                return(pfx);
            }
            if (t1.Next != null && ((t1.Morph.Class.IsPreposition || t1.Morph.Class.IsConjunction)))
            {
                if (t1.IsValue("НА", null))
                {
                }
                else
                {
                    Pullenti.Ner.Core.NumberExToken nn = TryParseNumberWithPostfix(t1.Next);
                    if (nn != null)
                    {
                        return new Pullenti.Ner.Core.NumberExToken(t, t, nt.Value, nt.Typ, nn.ExTyp)
                               {
                                   RealValue = f, AltRealValue = altf, ExTyp2 = nn.ExTyp2, ExTypParam = nn.ExTypParam
                               }
                    }
                    ;
                }
            }
            if (!t1.IsWhitespaceAfter && (t1.Next is Pullenti.Ner.NumberToken) && (t1 is Pullenti.Ner.TextToken))
            {
                string term = (t1 as Pullenti.Ner.TextToken).Term;
                Pullenti.Ner.Core.NumberExType ty = Pullenti.Ner.Core.NumberExType.Undefined;
                if (term == "СМХ" || term == "CMX")
                {
                    ty = Pullenti.Ner.Core.NumberExType.Santimeter;
                }
                else if (term == "MX" || term == "МХ")
                {
                    ty = Pullenti.Ner.Core.NumberExType.Meter;
                }
                else if (term == "MMX" || term == "ММХ")
                {
                    ty = Pullenti.Ner.Core.NumberExType.Millimeter;
                }
                if (ty != Pullenti.Ner.Core.NumberExType.Undefined)
                {
                    return new Pullenti.Ner.Core.NumberExToken(t, t1, nt.Value, nt.Typ, ty)
                           {
                               RealValue = f, AltRealValue = altf, MultAfter = true
                           }
                }
                ;
            }
            return(null);
        }
Exemple #15
0
        static Pullenti.Ner.Token CorrectTailAttributes(Pullenti.Ner.Person.PersonReferent p, Pullenti.Ner.Token t0)
        {
            Pullenti.Ner.Token res = t0;
            Pullenti.Ner.Token t   = t0;
            if (t != null && t.IsChar(','))
            {
                t = t.Next;
            }
            bool born = false;
            bool die  = false;

            if (t != null && ((t.IsValue("РОДИТЬСЯ", "НАРОДИТИСЯ") || t.IsValue("BORN", null))))
            {
                t    = t.Next;
                born = true;
            }
            else if (t != null && ((t.IsValue("УМЕРЕТЬ", "ПОМЕРТИ") || t.IsValue("СКОНЧАТЬСЯ", null) || t.IsValue("DIED", null))))
            {
                t   = t.Next;
                die = true;
            }
            else if ((t != null && t.IsValue("ДАТА", null) && t.Next != null) && t.Next.IsValue("РОЖДЕНИЕ", "НАРОДЖЕННЯ"))
            {
                t    = t.Next.Next;
                born = true;
            }
            while (t != null)
            {
                if (t.Morph.Class.IsPreposition || t.IsHiphen || t.IsChar(':'))
                {
                    t = t.Next;
                }
                else
                {
                    break;
                }
            }
            if (t != null && t.GetReferent() != null)
            {
                Pullenti.Ner.Referent r = t.GetReferent();
                if (r.TypeName == "DATE")
                {
                    Pullenti.Ner.Token t1 = t;
                    if (t.Next != null && ((t.Next.IsValue("Р", null) || t.Next.IsValue("РОЖДЕНИЕ", "НАРОДЖЕННЯ"))))
                    {
                        born = true;
                        t1   = t.Next;
                        if (t1.Next != null && t1.Next.IsChar('.'))
                        {
                            t1 = t1.Next;
                        }
                    }
                    if (born)
                    {
                        if (p != null)
                        {
                            p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_BORN, r, false, 0);
                        }
                        res = t1;
                        t   = t1;
                    }
                    else if (die)
                    {
                        if (p != null)
                        {
                            p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_DIE, r, false, 0);
                        }
                        res = t1;
                        t   = t1;
                    }
                }
            }
            if (die && t != null)
            {
                Pullenti.Ner.NumberToken ag = Pullenti.Ner.Core.NumberHelper.TryParseAge(t.Next);
                if (ag != null)
                {
                    if (p != null)
                    {
                        p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_AGE, ag.Value.ToString(), false, 0);
                    }
                    t   = ag.EndToken.Next;
                    res = ag.EndToken;
                }
            }
            if (t == null)
            {
                return(res);
            }
            if (t.IsChar('('))
            {
                Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 100);
                if (br != null)
                {
                    Pullenti.Ner.Token t1 = t.Next;
                    born = false;
                    if (t1.IsValue("РОД", null))
                    {
                        born = true;
                        t1   = t1.Next;
                        if (t1 != null && t1.IsChar('.'))
                        {
                            t1 = t1.Next;
                        }
                    }
                    if (t1 is Pullenti.Ner.ReferentToken)
                    {
                        Pullenti.Ner.Referent r = t1.GetReferent();
                        if (r.TypeName == "DATERANGE" && t1.Next == br.EndToken)
                        {
                            Pullenti.Ner.Referent bd = r.GetSlotValue("FROM") as Pullenti.Ner.Referent;
                            Pullenti.Ner.Referent to = r.GetSlotValue("TO") as Pullenti.Ner.Referent;
                            if (bd != null && to != null)
                            {
                                if (p != null)
                                {
                                    p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_BORN, bd, false, 0);
                                    p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_DIE, to, false, 0);
                                }
                                t = (res = br.EndToken);
                            }
                        }
                        else if (r.TypeName == "DATE" && t1.Next == br.EndToken)
                        {
                            if (p != null)
                            {
                                p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_BORN, r, false, 0);
                            }
                            t = (res = br.EndToken);
                        }
                    }
                }
            }
            return(res);
        }
Exemple #16
0
        public static UriItemToken AttachISBN(Pullenti.Ner.Token t0)
        {
            StringBuilder txt = new StringBuilder();

            Pullenti.Ner.Token t1 = t0;
            int digs = 0;

            for (Pullenti.Ner.Token t = t0; t != null; t = t.Next)
            {
                if (t.IsTableControlChar)
                {
                    break;
                }
                if (t.IsNewlineBefore && t != t0)
                {
                    if (t.Previous != null && t.Previous.IsHiphen)
                    {
                    }
                    else
                    {
                        break;
                    }
                }
                if (t is Pullenti.Ner.NumberToken)
                {
                    Pullenti.Ner.NumberToken nt = t as Pullenti.Ner.NumberToken;
                    if (nt.Typ != Pullenti.Ner.NumberSpellingType.Digit || !nt.Morph.Class.IsUndefined)
                    {
                        break;
                    }
                    string d = nt.GetSourceText();
                    txt.Append(d);
                    digs += d.Length;
                    t1    = t;
                    if (digs > 13)
                    {
                        break;
                    }
                    continue;
                }
                Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
                if (tt == null)
                {
                    break;
                }
                string s = tt.Term;
                if (s != "-" && s != "Х" && s != "X")
                {
                    break;
                }
                if (s == "Х")
                {
                    s = "X";
                }
                txt.Append(s);
                t1 = t;
                if (s != "-")
                {
                    break;
                }
            }
            int i;
            int dig = 0;

            for (i = 0; i < txt.Length; i++)
            {
                if (char.IsDigit(txt[i]))
                {
                    dig++;
                }
            }
            if (dig < 7)
            {
                return(null);
            }
            return(new UriItemToken(t0, t1)
            {
                Value = txt.ToString()
            });
        }
Exemple #17
0
 static void _parseNumber(Pullenti.Ner.Token t, InstrToken1 res, InstrToken1 prev)
 {
     if (((t is Pullenti.Ner.NumberToken) && (t as Pullenti.Ner.NumberToken).IntValue != null && (t as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Digit) && ((t as Pullenti.Ner.NumberToken).IntValue.Value < 3000))
     {
         if (res.Numbers.Count >= 4)
         {
         }
         if (t.Morph.Class.IsAdjective && res.TypContainerRank == 0)
         {
             return;
         }
         Pullenti.Ner.Core.NumberExToken nwp = Pullenti.Ner.Core.NumberHelper.TryParseNumberWithPostfix(t);
         if (nwp != null)
         {
             if (nwp.EndToken.IsWhitespaceBefore)
             {
             }
             else
             {
                 return;
             }
         }
         if ((t.Next != null && (t.WhitespacesAfterCount < 3) && t.Next.Chars.IsLetter) && t.Next.Chars.IsAllLower)
         {
             if (!t.IsWhitespaceAfter && t.Next.LengthChar == 1)
             {
             }
             else if (res.Numbers.Count == 0)
             {
                 res.NumTyp = NumberTypes.Digit;
                 res.Numbers.Add((t as Pullenti.Ner.NumberToken).Value.ToString());
                 res.NumBeginToken = (res.NumEndToken = (res.EndToken = t));
                 return;
             }
             else
             {
                 return;
             }
         }
         if (res.NumTyp == NumberTypes.Undefined)
         {
             res.NumTyp = NumberTypes.Digit;
         }
         else
         {
             res.NumTyp = NumberTypes.Combo;
         }
         if (res.Numbers.Count > 0 && t.IsWhitespaceBefore)
         {
             return;
         }
         if (res.Numbers.Count == 0)
         {
             res.NumBeginToken = t;
         }
         if ((t.Next != null && t.Next.IsHiphen && (t.Next.Next is Pullenti.Ner.NumberToken)) && (t.Next.Next as Pullenti.Ner.NumberToken).IntValue != null && (t.Next.Next as Pullenti.Ner.NumberToken).IntValue.Value > (t as Pullenti.Ner.NumberToken).IntValue.Value)
         {
             res.MinNumber = (t as Pullenti.Ner.NumberToken).Value.ToString();
             t             = t.Next.Next;
         }
         else if (((t.Next != null && t.Next.IsCharOf(")") && t.Next.Next != null) && t.Next.Next.IsHiphen && (t.Next.Next.Next is Pullenti.Ner.NumberToken)) && (t.Next.Next.Next as Pullenti.Ner.NumberToken).IntValue != null && (t.Next.Next.Next as Pullenti.Ner.NumberToken).IntValue.Value > (t as Pullenti.Ner.NumberToken).IntValue.Value)
         {
             res.MinNumber = (t as Pullenti.Ner.NumberToken).Value.ToString();
             t             = t.Next.Next.Next;
         }
         res.Numbers.Add((t as Pullenti.Ner.NumberToken).Value.ToString());
         res.EndToken  = (res.NumEndToken = t);
         res.NumSuffix = null;
         for (Pullenti.Ner.Token ttt = t.Next; ttt != null && (res.Numbers.Count < 4); ttt = ttt.Next)
         {
             bool ok1 = false;
             bool ok2 = false;
             if ((ttt.IsCharOf("._") && !ttt.IsWhitespaceAfter && (ttt.Next is Pullenti.Ner.NumberToken)) && (((ttt.Next as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Digit || (((ttt.Next as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Words) && ttt.Next.Chars.IsLatinLetter && !ttt.IsWhitespaceAfter))))
             {
                 ok1 = true;
             }
             else if ((ttt.IsCharOf("(<") && (ttt.Next is Pullenti.Ner.NumberToken) && ttt.Next.Next != null) && ttt.Next.Next.IsCharOf(")>"))
             {
                 ok2 = true;
             }
             if (ok1 || ok2)
             {
                 ttt = ttt.Next;
                 res.Numbers.Add((ttt as Pullenti.Ner.NumberToken).Value.ToString());
                 res.NumTyp = (res.Numbers.Count == 2 ? NumberTypes.TwoDigits : (res.Numbers.Count == 3 ? NumberTypes.ThreeDigits : NumberTypes.FourDigits));
                 if ((ttt.Next != null && ttt.Next.IsCharOf(")>") && ttt.Next.Next != null) && ttt.Next.Next.IsChar('.'))
                 {
                     ttt = ttt.Next;
                 }
                 else if (ok2)
                 {
                     ttt = ttt.Next;
                 }
                 t = (res.EndToken = (res.NumEndToken = ttt));
                 continue;
             }
             if (((ttt is Pullenti.Ner.TextToken) && ttt.LengthChar == 1 && ttt.Chars.IsLetter) && !ttt.IsWhitespaceBefore && res.Numbers.Count == 1)
             {
                 res.Numbers.Add((ttt as Pullenti.Ner.TextToken).Term);
                 res.NumTyp = NumberTypes.Combo;
                 t          = (res.EndToken = (res.NumEndToken = ttt));
                 continue;
             }
             break;
         }
         if (t.Next != null && t.Next.IsCharOf(")."))
         {
             res.NumSuffix = t.Next.GetSourceText();
             t             = (res.EndToken = (res.NumEndToken = t.Next));
         }
         return;
     }
     if (((t is Pullenti.Ner.NumberToken) && (t as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Words && res.TypContainerRank > 0) && res.Numbers.Count == 0)
     {
         res.Numbers.Add((t as Pullenti.Ner.NumberToken).Value.ToString());
         res.NumTyp        = NumberTypes.Digit;
         res.NumBeginToken = t;
         if (t.Next != null && t.Next.IsChar('.'))
         {
             t             = t.Next;
             res.NumSuffix = ".";
         }
         res.EndToken = (res.NumEndToken = t);
         return;
     }
     Pullenti.Ner.NumberToken nt = Pullenti.Ner.Core.NumberHelper.TryParseRoman(t);
     if ((nt != null && nt.Value == "10" && t.Next != null) && t.Next.IsChar(')'))
     {
         nt = null;
     }
     if (nt != null && nt.Value == "100")
     {
         nt = null;
     }
     if (nt != null && nt.Typ == Pullenti.Ner.NumberSpellingType.Roman)
     {
         if (res.NumTyp == NumberTypes.Undefined)
         {
             res.NumTyp = NumberTypes.Roman;
         }
         else
         {
             res.NumTyp = NumberTypes.Combo;
         }
         if (res.Numbers.Count > 0 && t.IsWhitespaceBefore)
         {
             return;
         }
         if (res.Numbers.Count == 0)
         {
             res.NumBeginToken = t;
         }
         res.Numbers.Add(nt.Value.ToString());
         t = (res.EndToken = (res.NumEndToken = nt.EndToken));
         if (res.NumTyp == NumberTypes.Roman && ((res.Typ == InstrToken1.Types.Chapter || res.Typ == InstrToken1.Types.Section || res.Typ == InstrToken1.Types.Line)))
         {
             if ((t.Next != null && t.Next.IsCharOf("._<") && (t.Next.Next is Pullenti.Ner.NumberToken)) && (t.Next.Next as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Digit)
             {
                 t = t.Next.Next;
                 res.Numbers.Add((t as Pullenti.Ner.NumberToken).Value.ToString());
                 res.NumTyp = NumberTypes.TwoDigits;
                 if (t.Next != null && t.Next.IsChar('>'))
                 {
                     t = t.Next;
                 }
                 res.EndToken = (res.NumEndToken = t);
                 if ((t.Next != null && t.Next.IsCharOf("._<") && (t.Next.Next is Pullenti.Ner.NumberToken)) && (t.Next.Next as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Digit)
                 {
                     t = t.Next.Next;
                     res.Numbers.Add((t as Pullenti.Ner.NumberToken).Value.ToString());
                     res.NumTyp = NumberTypes.ThreeDigits;
                     if (t.Next != null && t.Next.IsChar('>'))
                     {
                         t = t.Next;
                     }
                     res.EndToken = (res.NumEndToken = t);
                 }
             }
         }
         if (t.Next != null && t.Next.IsCharOf(")."))
         {
             res.NumSuffix = t.Next.GetSourceText();
             t             = (res.EndToken = (res.NumEndToken = t.Next));
         }
         return;
     }
     if (((t is Pullenti.Ner.TextToken) && t.LengthChar == 1 && t.Chars.IsLetter) && t == res.BeginToken)
     {
         if ((!t.IsWhitespaceAfter && (t.Next is Pullenti.Ner.NumberToken) && t.Next.Next != null) && t.Next.Next.IsChar('.'))
         {
             res.NumBeginToken = t;
             res.NumTyp        = NumberTypes.Digit;
             res.Numbers.Add((t.Next as Pullenti.Ner.NumberToken).Value.ToString());
             res.NumSuffix = (t as Pullenti.Ner.TextToken).Term + ".";
             t             = (res.EndToken = (res.NumEndToken = t.Next.Next));
             return;
         }
         if (t.Next != null && t.Next.IsCharOf(".)"))
         {
             if (((t.Next.IsChar('.') && (t.Next.Next is Pullenti.Ner.NumberToken) && t.Next.Next.Next != null) && t.Next.Next.Next.IsChar(')') && !t.Next.IsWhitespaceAfter) && !t.Next.Next.IsWhitespaceAfter)
             {
                 res.NumTyp = NumberTypes.TwoDigits;
                 res.Numbers.Add((t as Pullenti.Ner.TextToken).Term);
                 res.Numbers.Add((t.Next.Next as Pullenti.Ner.NumberToken).Value.ToString());
                 res.NumSuffix     = ")";
                 res.NumBeginToken = t;
                 t = (res.EndToken = (res.NumEndToken = t.Next.Next.Next));
                 return;
             }
             if (t.Next.IsChar('.') && ((t.Chars.IsAllUpper || (t.Next.Next is Pullenti.Ner.NumberToken))))
             {
             }
             else
             {
                 InstrToken1 tmp1 = new InstrToken1(t, t.Next);
                 tmp1.Numbers.Add((t as Pullenti.Ner.TextToken).Term);
                 if (tmp1.LastNumber > 1 && t.Next.IsCharOf(".") && ((prev == null || (prev.LastNumber + 1) != tmp1.LastNumber)))
                 {
                 }
                 else
                 {
                     if (res.Numbers.Count == 0)
                     {
                         res.NumBeginToken = t;
                     }
                     res.NumTyp = NumberTypes.Letter;
                     res.Numbers.Add((t as Pullenti.Ner.TextToken).Term);
                     res.NumBeginToken = t;
                     t             = (res.EndToken = (res.NumEndToken = t.Next));
                     res.NumSuffix = t.GetSourceText();
                     return;
                 }
             }
         }
     }
 }
Exemple #18
0
        public static UriItemToken AttachBBK(Pullenti.Ner.Token t0)
        {
            StringBuilder txt = new StringBuilder();

            Pullenti.Ner.Token t1 = t0;
            int digs = 0;

            for (Pullenti.Ner.Token t = t0; t != null; t = t.Next)
            {
                if (t.IsNewlineBefore && t != t0)
                {
                    break;
                }
                if (t.IsTableControlChar)
                {
                    break;
                }
                if (t is Pullenti.Ner.NumberToken)
                {
                    Pullenti.Ner.NumberToken nt = t as Pullenti.Ner.NumberToken;
                    if (nt.Typ != Pullenti.Ner.NumberSpellingType.Digit || !nt.Morph.Class.IsUndefined)
                    {
                        break;
                    }
                    string d = nt.GetSourceText();
                    txt.Append(d);
                    digs += d.Length;
                    t1    = t;
                    continue;
                }
                Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
                if (tt == null)
                {
                    break;
                }
                if (tt.IsChar(','))
                {
                    break;
                }
                if (tt.IsChar('('))
                {
                    if (!(tt.Next is Pullenti.Ner.NumberToken))
                    {
                        break;
                    }
                }
                string s = tt.GetSourceText();
                if (char.IsLetter(s[0]))
                {
                    if (tt.IsWhitespaceBefore)
                    {
                        break;
                    }
                }
                txt.Append(s);
                t1 = t;
            }
            if ((txt.Length < 3) || (digs < 2))
            {
                return(null);
            }
            if (txt[txt.Length - 1] == '.')
            {
                txt.Length--;
                t1 = t1.Previous;
            }
            return(new UriItemToken(t0, t1)
            {
                Value = txt.ToString()
            });
        }
Exemple #19
0
        internal static NumbersWithUnitToken _tryParse(Pullenti.Ner.Token t, Pullenti.Ner.Core.TerminCollection addUnits, bool second, bool canOmitNumber, bool canBeNan)
        {
            if (t == null)
            {
                return(null);
            }
            while (t != null)
            {
                if (t.IsCommaAnd || t.IsValue("НО", null))
                {
                    t = t.Next;
                }
                else
                {
                    break;
                }
            }
            Pullenti.Ner.Token t0 = t;
            bool about            = false;
            bool hasKeyw          = false;
            bool isDiapKeyw       = false;
            int  minMax           = 0;

            Pullenti.Ner.Token ttt = _isMinOrMax(t, ref minMax);
            if (ttt != null)
            {
                t = ttt.Next;
                if (t == null)
                {
                    return(null);
                }
            }
            if (t == null)
            {
                return(null);
            }
            if (t.IsChar('~') || t.IsValue("ОКОЛО", null) || t.IsValue("ПРИМЕРНО", null))
            {
                t       = t.Next;
                about   = true;
                hasKeyw = true;
                if (t == null)
                {
                    return(null);
                }
            }
            if (t.IsValue("В", null) && t.Next != null)
            {
                if (t.Next.IsValue("ПРЕДЕЛ", null) || t.IsValue("ДИАПАЗОН", null))
                {
                    t = t.Next.Next;
                    if (t == null)
                    {
                        return(null);
                    }
                    isDiapKeyw = true;
                }
            }
            if (t0.IsChar('('))
            {
                NumbersWithUnitToken mt0 = _tryParse(t.Next, addUnits, false, false, false);
                if (mt0 != null && mt0.EndToken.Next != null && mt0.EndToken.Next.IsChar(')'))
                {
                    if (second)
                    {
                        if (mt0.FromVal != null && mt0.ToVal != null && mt0.FromVal.Value == (-mt0.ToVal.Value))
                        {
                        }
                        else
                        {
                            return(null);
                        }
                    }
                    mt0.BeginToken = t0;
                    mt0.EndToken   = mt0.EndToken.Next;
                    List <UnitToken> uu = UnitToken.TryParseList(mt0.EndToken.Next, addUnits, false);
                    if (uu != null && mt0.Units.Count == 0)
                    {
                        mt0.Units    = uu;
                        mt0.EndToken = uu[uu.Count - 1].EndToken;
                    }
                    return(mt0);
                }
            }
            bool    plusminus  = false;
            bool    unitBefore = false;
            bool    isAge      = false;
            DiapTyp dty        = DiapTyp.Undefined;

            Pullenti.Ner.MetaToken whd = null;
            List <UnitToken>       uni = null;

            Pullenti.Ner.Core.TerminToken tok = (m_Termins == null ? null : m_Termins.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No));
            if (tok != null)
            {
                if (tok.EndToken.IsValue("СТАРШЕ", null) || tok.EndToken.IsValue("МЛАДШЕ", null))
                {
                    isAge = true;
                }
                t       = tok.EndToken.Next;
                dty     = (DiapTyp)tok.Termin.Tag;
                hasKeyw = true;
                if (!tok.IsWhitespaceAfter)
                {
                    if (t == null)
                    {
                        return(null);
                    }
                    if (t is Pullenti.Ner.NumberToken)
                    {
                        if (tok.BeginToken == tok.EndToken && !tok.Chars.IsAllLower)
                        {
                            return(null);
                        }
                    }
                    else if (t.IsComma && t.Next != null && t.Next.IsValue("ЧЕМ", null))
                    {
                        t = t.Next.Next;
                        if (t != null && t.Morph.Class.IsPreposition)
                        {
                            t = t.Next;
                        }
                    }
                    else if (t.IsCharOf(":,(") || t.IsTableControlChar)
                    {
                    }
                    else
                    {
                        return(null);
                    }
                }
                if (t != null && t.IsChar('('))
                {
                    uni = UnitToken.TryParseList(t.Next, addUnits, false);
                    if (uni != null)
                    {
                        t = uni[uni.Count - 1].EndToken.Next;
                        while (t != null)
                        {
                            if (t.IsCharOf("):"))
                            {
                                t = t.Next;
                            }
                            else
                            {
                                break;
                            }
                        }
                        NumbersWithUnitToken mt0 = _tryParse(t, addUnits, false, canOmitNumber, false);
                        if (mt0 != null && mt0.Units.Count == 0)
                        {
                            mt0.BeginToken = t0;
                            mt0.Units      = uni;
                            return(mt0);
                        }
                    }
                    whd = _tryParseWHL(t);
                    if (whd != null)
                    {
                        t = whd.EndToken.Next;
                    }
                }
                else if (t != null && t.IsValue("IP", null))
                {
                    uni = UnitToken.TryParseList(t, addUnits, false);
                    if (uni != null)
                    {
                        t = uni[uni.Count - 1].EndToken.Next;
                    }
                }
                if ((t != null && t.IsHiphen && t.IsWhitespaceBefore) && t.IsWhitespaceAfter)
                {
                    t = t.Next;
                }
            }
            else if (t.IsChar('<'))
            {
                dty     = DiapTyp.Ls;
                t       = t.Next;
                hasKeyw = true;
                if (t != null && t.IsChar('='))
                {
                    t   = t.Next;
                    dty = DiapTyp.Le;
                }
            }
            else if (t.IsChar('>'))
            {
                dty     = DiapTyp.Gt;
                t       = t.Next;
                hasKeyw = true;
                if (t != null && t.IsChar('='))
                {
                    t   = t.Next;
                    dty = DiapTyp.Ge;
                }
            }
            else if (t.IsChar('≤'))
            {
                dty     = DiapTyp.Le;
                hasKeyw = true;
                t       = t.Next;
            }
            else if (t.IsChar('≥'))
            {
                dty     = DiapTyp.Ge;
                hasKeyw = true;
                t       = t.Next;
            }
            else if (t.IsValue("IP", null))
            {
                uni = UnitToken.TryParseList(t, addUnits, false);
                if (uni != null)
                {
                    t = uni[uni.Count - 1].EndToken.Next;
                }
            }
            else if (t.IsValue("ЗА", null) && (t.Next is Pullenti.Ner.NumberToken))
            {
                dty = DiapTyp.Ge;
                t   = t.Next;
            }
            while (t != null && ((t.IsCharOf(":,") || t.IsValue("ЧЕМ", null) || t.IsTableControlChar)))
            {
                t = t.Next;
            }
            if (t != null)
            {
                if (t.IsChar('+') || t.IsValue("ПЛЮС", null))
                {
                    t = t.Next;
                    if (t != null && !t.IsWhitespaceBefore)
                    {
                        if (t.IsHiphen)
                        {
                            t         = t.Next;
                            plusminus = true;
                        }
                        else if ((t.IsCharOf("\\/") && t.Next != null && !t.IsNewlineAfter) && t.Next.IsHiphen)
                        {
                            t         = t.Next.Next;
                            plusminus = true;
                        }
                    }
                }
                else if (second && (t.IsCharOf("\\/÷…~")))
                {
                    t = t.Next;
                }
                else if ((t.IsHiphen && t == t0 && !second) && m_Termins.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No) != null)
                {
                    tok = m_Termins.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No);
                    t   = tok.EndToken.Next;
                    dty = (DiapTyp)tok.Termin.Tag;
                }
                else if (t.IsHiphen && t == t0 && ((t.IsWhitespaceAfter || second)))
                {
                    t = t.Next;
                }
                else if (t.IsChar('±'))
                {
                    t         = t.Next;
                    plusminus = true;
                    hasKeyw   = true;
                }
                else if ((second && t.IsChar('.') && t.Next != null) && t.Next.IsChar('.'))
                {
                    t = t.Next.Next;
                    if (t != null && t.IsChar('.'))
                    {
                        t = t.Next;
                    }
                }
            }
            Pullenti.Ner.NumberToken num = Pullenti.Ner.Core.NumberHelper.TryParseRealNumber(t, true, false);
            if (num == null)
            {
                uni = UnitToken.TryParseList(t, addUnits, false);
                if (uni != null)
                {
                    unitBefore = true;
                    t          = uni[uni.Count - 1].EndToken.Next;
                    bool delim = false;
                    while (t != null)
                    {
                        if (t.IsCharOf(":,"))
                        {
                            delim = true;
                            t     = t.Next;
                        }
                        else if (t.IsHiphen && t.IsWhitespaceAfter)
                        {
                            delim = true;
                            t     = t.Next;
                        }
                        else
                        {
                            break;
                        }
                    }
                    if (!delim)
                    {
                        if (t == null)
                        {
                            if (hasKeyw && canBeNan)
                            {
                            }
                            else
                            {
                                return(null);
                            }
                        }
                        else if (!t.IsWhitespaceBefore)
                        {
                            return(null);
                        }
                        if (t.Next != null && t.IsHiphen && t.IsWhitespaceAfter)
                        {
                            delim = true;
                            t     = t.Next;
                        }
                    }
                    num = Pullenti.Ner.Core.NumberHelper.TryParseRealNumber(t, true, false);
                }
            }
            NumbersWithUnitToken res = null;
            double rval = (double)0;

            if (num == null)
            {
                Pullenti.Ner.Core.TerminToken tt = m_Spec.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
                if (tt != null)
                {
                    rval = (double)tt.Termin.Tag;
                    string unam = (string)tt.Termin.Tag2;
                    foreach (Unit u in UnitsHelper.Units)
                    {
                        if (u.FullnameCyr == unam)
                        {
                            uni = new List <UnitToken>();
                            uni.Add(new UnitToken(t, t)
                            {
                                Unit = u
                            });
                            break;
                        }
                    }
                    if (uni == null)
                    {
                        return(null);
                    }
                    res = new NumbersWithUnitToken(t0, tt.EndToken)
                    {
                        About = about
                    };
                    t = tt.EndToken.Next;
                }
                else
                {
                    if (!canOmitNumber && !hasKeyw && !canBeNan)
                    {
                        return(null);
                    }
                    if ((uni != null && uni.Count == 1 && uni[0].BeginToken == uni[0].EndToken) && uni[0].LengthChar > 3)
                    {
                        rval = 1;
                        res  = new NumbersWithUnitToken(t0, uni[uni.Count - 1].EndToken)
                        {
                            About = about
                        };
                        t = res.EndToken.Next;
                    }
                    else if (hasKeyw && canBeNan)
                    {
                        rval = double.NaN;
                        res  = new NumbersWithUnitToken(t0, t0)
                        {
                            About = about
                        };
                        if (t != null)
                        {
                            res.EndToken = t.Previous;
                        }
                        else
                        {
                            for (t = t0; t != null; t = t.Next)
                            {
                                res.EndToken = t;
                            }
                        }
                    }
                    else
                    {
                        return(null);
                    }
                }
            }
            else
            {
                if ((t == t0 && t0.IsHiphen && !t.IsWhitespaceBefore) && !t.IsWhitespaceAfter && (num.RealValue < 0))
                {
                    num = Pullenti.Ner.Core.NumberHelper.TryParseRealNumber(t.Next, true, false);
                    if (num == null)
                    {
                        return(null);
                    }
                }
                if (t == t0 && (t is Pullenti.Ner.NumberToken) && t.Morph.Class.IsAdjective)
                {
                    Pullenti.Ner.TextToken nn = (t as Pullenti.Ner.NumberToken).EndToken as Pullenti.Ner.TextToken;
                    if (nn == null)
                    {
                        return(null);
                    }
                    string norm = nn.GetNormalCaseText(Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false);
                    if ((norm.EndsWith("Ь") || norm == "ЧЕТЫРЕ" || norm == "ТРИ") || norm == "ДВА")
                    {
                    }
                    else
                    {
                        Pullenti.Morph.MorphWordForm mi = Pullenti.Morph.MorphologyService.GetWordBaseInfo("КОКО" + nn.Term, null, false, false);
                        if (mi.Class.IsAdjective)
                        {
                            return(null);
                        }
                    }
                }
                t   = num.EndToken.Next;
                res = new NumbersWithUnitToken(t0, num.EndToken)
                {
                    About = about
                };
                rval = num.RealValue;
            }
            if (uni == null)
            {
                uni = UnitToken.TryParseList(t, addUnits, false);
                if (uni != null)
                {
                    if ((plusminus && second && uni.Count >= 1) && uni[0].Unit == UnitsHelper.uPercent)
                    {
                        res.EndToken         = uni[0].EndToken;
                        res.PlusMinusPercent = true;
                        Pullenti.Ner.Token tt1 = uni[0].EndToken.Next;
                        uni = UnitToken.TryParseList(tt1, addUnits, false);
                        if (uni != null)
                        {
                            res.Units    = uni;
                            res.EndToken = uni[uni.Count - 1].EndToken;
                        }
                    }
                    else
                    {
                        res.Units    = uni;
                        res.EndToken = uni[uni.Count - 1].EndToken;
                    }
                    t = res.EndToken.Next;
                }
            }
            else
            {
                res.Units = uni;
                if (uni.Count > 1)
                {
                    List <UnitToken> uni1 = UnitToken.TryParseList(t, addUnits, false);
                    if (((uni1 != null && uni1[0].Unit == uni[0].Unit && (uni1.Count < uni.Count)) && uni[uni1.Count].Pow == -1 && uni1[uni1.Count - 1].EndToken.Next != null) && uni1[uni1.Count - 1].EndToken.Next.IsCharOf("/\\"))
                    {
                        NumbersWithUnitToken num2 = _tryParse(uni1[uni1.Count - 1].EndToken.Next.Next, addUnits, false, false, false);
                        if (num2 != null && num2.Units != null && num2.Units[0].Unit == uni[uni1.Count].Unit)
                        {
                            res.Units    = uni1;
                            res.DivNum   = num2;
                            res.EndToken = num2.EndToken;
                        }
                    }
                }
            }
            res.WHL = whd;
            if (dty != DiapTyp.Undefined)
            {
                if (dty == DiapTyp.Ge || dty == DiapTyp.From)
                {
                    res.FromInclude = true;
                    res.FromVal     = rval;
                }
                else if (dty == DiapTyp.Gt)
                {
                    res.FromInclude = false;
                    res.FromVal     = rval;
                }
                else if (dty == DiapTyp.Le || dty == DiapTyp.To)
                {
                    res.ToInclude = true;
                    res.ToVal     = rval;
                }
                else if (dty == DiapTyp.Ls)
                {
                    res.ToInclude = false;
                    res.ToVal     = rval;
                }
            }
            bool isSecondMax = false;

            if (!second)
            {
                int iii = 0;
                ttt = _isMinOrMax(t, ref iii);
                if (ttt != null && iii > 0)
                {
                    isSecondMax = true;
                    t           = ttt.Next;
                }
            }
            NumbersWithUnitToken next = (second || plusminus || ((t != null && ((t.IsTableControlChar || t.IsNewlineBefore)))) ? null : _tryParse(t, addUnits, true, false, canBeNan));

            if (next != null && (t.Previous is Pullenti.Ner.NumberToken))
            {
                if (MeasureHelper.IsMultChar((t.Previous as Pullenti.Ner.NumberToken).EndToken))
                {
                    next = null;
                }
            }
            if (next != null && ((next.ToVal != null || next.SingleVal != null)) && next.FromVal == null)
            {
                if ((((next.BeginToken.IsChar('+') && next.SingleVal != null && !double.IsNaN(next.SingleVal.Value)) && next.EndToken.Next != null && next.EndToken.Next.IsCharOf("\\/")) && next.EndToken.Next.Next != null && next.EndToken.Next.Next.IsHiphen) && !hasKeyw && !double.IsNaN(rval))
                {
                    NumbersWithUnitToken next2 = _tryParse(next.EndToken.Next.Next.Next, addUnits, true, false, false);
                    if (next2 != null && next2.SingleVal != null && !double.IsNaN(next2.SingleVal.Value))
                    {
                        res.FromVal     = rval - next2.SingleVal.Value;
                        res.FromInclude = true;
                        res.ToVal       = rval + next.SingleVal.Value;
                        res.ToInclude   = true;
                        if (next2.Units != null && res.Units.Count == 0)
                        {
                            res.Units = next2.Units;
                        }
                        res.EndToken = next2.EndToken;
                        return(res);
                    }
                }
                if (next.Units.Count > 0)
                {
                    if (res.Units.Count == 0)
                    {
                        res.Units = next.Units;
                    }
                    else if (!UnitToken.CanBeEquals(res.Units, next.Units))
                    {
                        next = null;
                    }
                }
                else if (res.Units.Count > 0 && !unitBefore && !next.PlusMinusPercent)
                {
                    next = null;
                }
                if (next != null)
                {
                    res.EndToken = next.EndToken;
                }
                if (next != null && next.ToVal != null)
                {
                    res.ToVal     = next.ToVal;
                    res.ToInclude = next.ToInclude;
                }
                else if (next != null && next.SingleVal != null)
                {
                    if (next.BeginToken.IsCharOf("/\\"))
                    {
                        res.DivNum    = next;
                        res.SingleVal = rval;
                        return(res);
                    }
                    else if (next.PlusMinusPercent)
                    {
                        res.SingleVal        = rval;
                        res.PlusMinus        = next.SingleVal;
                        res.PlusMinusPercent = true;
                        res.ToInclude        = true;
                    }
                    else
                    {
                        res.ToVal     = next.SingleVal;
                        res.ToInclude = true;
                    }
                }
                if (next != null)
                {
                    if (res.FromVal == null)
                    {
                        res.FromVal     = rval;
                        res.FromInclude = true;
                    }
                    return(res);
                }
            }
            else if ((next != null && next.FromVal != null && next.ToVal != null) && next.ToVal.Value == (-next.FromVal.Value))
            {
                if (next.Units.Count == 1 && next.Units[0].Unit == UnitsHelper.uPercent && res.Units.Count > 0)
                {
                    res.SingleVal        = rval;
                    res.PlusMinus        = next.ToVal.Value;
                    res.PlusMinusPercent = true;
                    res.EndToken         = next.EndToken;
                    return(res);
                }
                if (next.Units.Count == 0)
                {
                    res.SingleVal = rval;
                    res.PlusMinus = next.ToVal.Value;
                    res.EndToken  = next.EndToken;
                    return(res);
                }
                res.FromVal     = next.FromVal + rval;
                res.FromInclude = true;
                res.ToVal       = next.ToVal + rval;
                res.ToInclude   = true;
                res.EndToken    = next.EndToken;
                if (next.Units.Count > 0)
                {
                    res.Units = next.Units;
                }
                return(res);
            }
            if (dty == DiapTyp.Undefined)
            {
                if (plusminus && ((!res.PlusMinusPercent || !second)))
                {
                    res.FromInclude = true;
                    res.FromVal     = -rval;
                    res.ToInclude   = true;
                    res.ToVal       = rval;
                }
                else
                {
                    res.SingleVal        = rval;
                    res.PlusMinusPercent = plusminus;
                }
            }
            if (isAge)
            {
                res.IsAge = true;
            }
            return(res);
        }
Exemple #20
0
 static PersonIdToken TryParse(Pullenti.Ner.Token t, PersonIdToken prev)
 {
     if (t.IsValue("СВИДЕТЕЛЬСТВО", null))
     {
         Pullenti.Ner.Token tt1 = t;
         bool ip  = false;
         bool reg = false;
         for (Pullenti.Ner.Token tt = t.Next; tt != null; tt = tt.Next)
         {
             if (tt.IsCommaAnd || tt.Morph.Class.IsPreposition)
             {
                 continue;
             }
             if (tt.IsValue("РЕГИСТРАЦИЯ", null) || tt.IsValue("РЕЕСТР", null) || tt.IsValue("ЗАРЕГИСТРИРОВАТЬ", null))
             {
                 reg = true;
                 tt1 = tt;
             }
             else if (tt.IsValue("ИНДИВИДУАЛЬНЫЙ", null) || tt.IsValue("ИП", null))
             {
                 ip  = true;
                 tt1 = tt;
             }
             else if ((tt.IsValue("ВНЕСЕНИЕ", null) || tt.IsValue("ГОСУДАРСТВЕННЫЙ", null) || tt.IsValue("ЕДИНЫЙ", null)) || tt.IsValue("ЗАПИСЬ", null) || tt.IsValue("ПРЕДПРИНИМАТЕЛЬ", null))
             {
                 tt1 = tt;
             }
             else if (tt.GetReferent() != null && tt.GetReferent().TypeName == "DATERANGE")
             {
                 tt1 = tt;
             }
             else
             {
                 break;
             }
         }
         if (reg && ip)
         {
             return new PersonIdToken(t, tt1)
                    {
                        Typ = Typs.Keyword, Value = "СВИДЕТЕЛЬСТВО О ГОСУДАРСТВЕННОЙ РЕГИСТРАЦИИ ФИЗИЧЕСКОГО ЛИЦА В КАЧЕСТВЕ ИНДИВИДУАЛЬНОГО ПРЕДПРИНИМАТЕЛЯ"
                    }
         }
         ;
     }
     Pullenti.Ner.Core.TerminToken tok = m_Ontology.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
     if (tok != null)
     {
         Typs          ty  = (Typs)tok.Termin.Tag;
         PersonIdToken res = new PersonIdToken(tok.BeginToken, tok.EndToken)
         {
             Typ = ty, Value = tok.Termin.CanonicText
         };
         if (prev == null)
         {
             if (ty != Typs.Keyword)
             {
                 return(null);
             }
             for (t = tok.EndToken.Next; t != null; t = t.Next)
             {
                 Pullenti.Ner.Referent r = t.GetReferent();
                 if (r != null && (r is Pullenti.Ner.Geo.GeoReferent))
                 {
                     res.Referent = r;
                     res.EndToken = t;
                     continue;
                 }
                 if (t.IsValue("ГРАЖДАНИН", null) && t.Next != null && (t.Next.GetReferent() is Pullenti.Ner.Geo.GeoReferent))
                 {
                     res.Referent = t.Next.GetReferent();
                     t            = (res.EndToken = t.Next);
                     continue;
                 }
                 if (r != null)
                 {
                     break;
                 }
                 PersonAttrToken ait = PersonAttrToken.TryAttach(t, null, PersonAttrToken.PersonAttrAttachAttrs.No);
                 if (ait != null)
                 {
                     if (ait.Referent != null)
                     {
                         foreach (Pullenti.Ner.Slot s in ait.Referent.Slots)
                         {
                             if (s.TypeName == Pullenti.Ner.Person.PersonPropertyReferent.ATTR_REF && (s.Value is Pullenti.Ner.Geo.GeoReferent))
                             {
                                 res.Referent = s.Value as Pullenti.Ner.Referent;
                             }
                         }
                     }
                     res.EndToken = ait.EndToken;
                     break;
                 }
                 if (t.IsValue("ДАННЫЙ", null))
                 {
                     res.EndToken = t;
                     continue;
                 }
                 break;
             }
             if ((res.Referent is Pullenti.Ner.Geo.GeoReferent) && !(res.Referent as Pullenti.Ner.Geo.GeoReferent).IsState)
             {
                 res.Referent = null;
             }
             return(res);
         }
         if (ty == Typs.Number)
         {
             StringBuilder      tmp = new StringBuilder();
             Pullenti.Ner.Token tt  = tok.EndToken.Next;
             if (tt != null && tt.IsChar(':'))
             {
                 tt = tt.Next;
             }
             for (; tt != null; tt = tt.Next)
             {
                 if (tt.IsNewlineBefore)
                 {
                     break;
                 }
                 if (!(tt is Pullenti.Ner.NumberToken))
                 {
                     break;
                 }
                 tmp.Append(tt.GetSourceText());
                 res.EndToken = tt;
             }
             if (tmp.Length < 1)
             {
                 return(null);
             }
             res.Value     = tmp.ToString();
             res.HasPrefix = true;
             return(res);
         }
         if (ty == Typs.Seria)
         {
             StringBuilder      tmp = new StringBuilder();
             Pullenti.Ner.Token tt  = tok.EndToken.Next;
             if (tt != null && tt.IsChar(':'))
             {
                 tt = tt.Next;
             }
             bool nextNum = false;
             for (; tt != null; tt = tt.Next)
             {
                 if (tt.IsNewlineBefore)
                 {
                     break;
                 }
                 if (Pullenti.Ner.Core.MiscHelper.CheckNumberPrefix(tt) != null)
                 {
                     nextNum = true;
                     break;
                 }
                 if (!(tt is Pullenti.Ner.NumberToken))
                 {
                     if (!(tt is Pullenti.Ner.TextToken))
                     {
                         break;
                     }
                     if (!tt.Chars.IsAllUpper)
                     {
                         break;
                     }
                     Pullenti.Ner.NumberToken nu = Pullenti.Ner.Core.NumberHelper.TryParseRoman(tt);
                     if (nu != null)
                     {
                         tmp.Append(nu.GetSourceText());
                         tt = nu.EndToken;
                     }
                     else if (tt.LengthChar != 2)
                     {
                         break;
                     }
                     else
                     {
                         tmp.Append((tt as Pullenti.Ner.TextToken).Term);
                         res.EndToken = tt;
                     }
                     if (tt.Next != null && tt.Next.IsHiphen)
                     {
                         tt = tt.Next;
                     }
                     continue;
                 }
                 if (tmp.Length >= 4)
                 {
                     break;
                 }
                 tmp.Append(tt.GetSourceText());
                 res.EndToken = tt;
             }
             if (tmp.Length < 4)
             {
                 if (tmp.Length < 2)
                 {
                     return(null);
                 }
                 Pullenti.Ner.Token tt1 = res.EndToken.Next;
                 if (tt1 != null && tt1.IsComma)
                 {
                     tt1 = tt1.Next;
                 }
                 PersonIdToken next = TryParse(tt1, res);
                 if (next != null && next.Typ == Typs.Number)
                 {
                 }
                 else
                 {
                     return(null);
                 }
             }
             res.Value     = tmp.ToString();
             res.HasPrefix = true;
             return(res);
         }
         if (ty == Typs.Code)
         {
             for (Pullenti.Ner.Token tt = res.EndToken.Next; tt != null; tt = tt.Next)
             {
                 if (tt.IsCharOf(":") || tt.IsHiphen)
                 {
                     continue;
                 }
                 if (tt is Pullenti.Ner.NumberToken)
                 {
                     res.EndToken = tt;
                     continue;
                 }
                 break;
             }
         }
         if (ty == Typs.Address)
         {
             if (t.GetReferent() is Pullenti.Ner.Address.AddressReferent)
             {
                 res.Referent = t.GetReferent();
                 res.EndToken = t;
                 return(res);
             }
             for (Pullenti.Ner.Token tt = res.EndToken.Next; tt != null; tt = tt.Next)
             {
                 if (tt.IsCharOf(":") || tt.IsHiphen || tt.Morph.Class.IsPreposition)
                 {
                     continue;
                 }
                 if (tt.GetReferent() is Pullenti.Ner.Address.AddressReferent)
                 {
                     res.Referent = tt.GetReferent();
                     res.EndToken = tt;
                 }
                 break;
             }
             if (res.Referent == null)
             {
                 return(null);
             }
         }
         return(res);
     }
     else if (prev == null)
     {
         return(null);
     }
     Pullenti.Ner.Token t0 = t;
     Pullenti.Ner.Token t1 = Pullenti.Ner.Core.MiscHelper.CheckNumberPrefix(t0);
     if (t1 != null)
     {
         t = t1;
     }
     if (t is Pullenti.Ner.NumberToken)
     {
         StringBuilder tmp = new StringBuilder();
         PersonIdToken res = new PersonIdToken(t0, t)
         {
             Typ = Typs.Number
         };
         for (Pullenti.Ner.Token tt = t; tt != null; tt = tt.Next)
         {
             if (tt.IsNewlineBefore || !(tt is Pullenti.Ner.NumberToken))
             {
                 break;
             }
             tmp.Append(tt.GetSourceText());
             res.EndToken = tt;
         }
         if (tmp.Length < 4)
         {
             if (tmp.Length < 2)
             {
                 return(null);
             }
             if (prev == null || prev.Typ != Typs.Keyword)
             {
                 return(null);
             }
             PersonIdToken ne = TryParse(res.EndToken.Next, prev);
             if (ne != null && ne.Typ == Typs.Number)
             {
                 res.Typ = Typs.Seria;
             }
             else
             {
                 return(null);
             }
         }
         res.Value = tmp.ToString();
         if (t0 != t)
         {
             res.HasPrefix = true;
         }
         return(res);
     }
     if (t is Pullenti.Ner.ReferentToken)
     {
         Pullenti.Ner.Referent r = t.GetReferent();
         if (r != null)
         {
             if (r.TypeName == "DATE")
             {
                 return new PersonIdToken(t, t)
                        {
                            Typ = Typs.Date, Referent = r
                        }
             }
             ;
             if (r.TypeName == "ORGANIZATION")
             {
                 return new PersonIdToken(t, t)
                        {
                            Typ = Typs.Org, Referent = r
                        }
             }
             ;
             if (r.TypeName == "ADDRESS")
             {
                 return new PersonIdToken(t, t)
                        {
                            Typ = Typs.Address, Referent = r
                        }
             }
             ;
         }
     }
     if ((prev != null && prev.Typ == Typs.Keyword && (t is Pullenti.Ner.TextToken)) && !t.Chars.IsAllLower && t.Chars.IsLetter)
     {
         PersonIdToken rr = TryParse(t.Next, prev);
         if (rr != null && rr.Typ == Typs.Number)
         {
             return new PersonIdToken(t, t)
                    {
                        Typ = Typs.Seria, Value = (t as Pullenti.Ner.TextToken).Term
                    }
         }
         ;
     }
     if ((t != null && t.IsValue("ОТ", "ВІД") && (t.Next is Pullenti.Ner.ReferentToken)) && t.Next.GetReferent().TypeName == "DATE")
     {
         return new PersonIdToken(t, t.Next)
                {
                    Typ = Typs.Date, Referent = t.Next.GetReferent()
                }
     }
     ;
     return(null);
 }
Exemple #21
0
        public static string GetNameEx(Pullenti.Ner.Token begin, Pullenti.Ner.Token end, Pullenti.Morph.MorphClass cla, Pullenti.Morph.MorphCase mc, Pullenti.Morph.MorphGender gender = Pullenti.Morph.MorphGender.Undefined, bool ignoreBracketsAndHiphens = false, bool ignoreGeoReferent = false)
        {
            if (end == null || begin == null)
            {
                return(null);
            }
            if (begin.EndChar > end.BeginChar && begin != end)
            {
                return(null);
            }
            StringBuilder res    = new StringBuilder();
            string        prefix = null;

            for (Pullenti.Ner.Token t = begin; t != null && t.EndChar <= end.EndChar; t = t.Next)
            {
                if (res.Length > 1000)
                {
                    break;
                }
                if (t.IsTableControlChar)
                {
                    continue;
                }
                if (ignoreBracketsAndHiphens)
                {
                    if (BracketHelper.IsBracket(t, false))
                    {
                        if (t == end)
                        {
                            break;
                        }
                        if (t.IsCharOf("(<["))
                        {
                            BracketSequenceToken br = BracketHelper.TryParse(t, BracketParseAttr.No, 100);
                            if (br != null && br.EndChar <= end.EndChar)
                            {
                                string tmp = GetNameEx(br.BeginToken.Next, br.EndToken.Previous, Pullenti.Morph.MorphClass.Undefined, Pullenti.Morph.MorphCase.Undefined, Pullenti.Morph.MorphGender.Undefined, ignoreBracketsAndHiphens, false);
                                if (tmp != null)
                                {
                                    if ((br.EndChar == end.EndChar && br.BeginToken.Next == br.EndToken.Previous && !br.BeginToken.Next.Chars.IsLetter) && !(br.BeginToken.Next is Pullenti.Ner.ReferentToken))
                                    {
                                    }
                                    else
                                    {
                                        res.AppendFormat(" {0}{1}{2}", t.GetSourceText(), tmp, br.EndToken.GetSourceText());
                                    }
                                }
                                t = br.EndToken;
                            }
                        }
                        continue;
                    }
                    if (t.IsHiphen)
                    {
                        if (t == end)
                        {
                            break;
                        }
                        else if (t.IsWhitespaceBefore || t.IsWhitespaceAfter)
                        {
                            continue;
                        }
                    }
                }
                Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
                if (tt != null)
                {
                    if (!ignoreBracketsAndHiphens)
                    {
                        if ((tt.Next != null && tt.Next.IsHiphen && (tt.Next.Next is Pullenti.Ner.TextToken)) && tt != end && tt.Next != end)
                        {
                            if (prefix == null)
                            {
                                prefix = tt.Term;
                            }
                            else
                            {
                                prefix = string.Format("{0}-{1}", prefix, tt.Term);
                            }
                            t = tt.Next;
                            if (t == end)
                            {
                                break;
                            }
                            else
                            {
                                continue;
                            }
                        }
                    }
                    string s = null;
                    if (cla.Value != 0 || !mc.IsUndefined || gender != Pullenti.Morph.MorphGender.Undefined)
                    {
                        foreach (Pullenti.Morph.MorphBaseInfo wff in tt.Morph.Items)
                        {
                            Pullenti.Morph.MorphWordForm wf = wff as Pullenti.Morph.MorphWordForm;
                            if (wf == null)
                            {
                                continue;
                            }
                            if (cla.Value != 0)
                            {
                                if (((wf.Class.Value & cla.Value)) == 0)
                                {
                                    continue;
                                }
                            }
                            if (!mc.IsUndefined)
                            {
                                if (((wf.Case & mc)).IsUndefined)
                                {
                                    continue;
                                }
                            }
                            if (gender != Pullenti.Morph.MorphGender.Undefined)
                            {
                                if (((wf.Gender & gender)) == Pullenti.Morph.MorphGender.Undefined)
                                {
                                    continue;
                                }
                            }
                            if (s == null || wf.NormalCase == tt.Term)
                            {
                                s = wf.NormalCase;
                            }
                        }
                        if (s == null && gender != Pullenti.Morph.MorphGender.Undefined)
                        {
                            foreach (Pullenti.Morph.MorphBaseInfo wff in tt.Morph.Items)
                            {
                                Pullenti.Morph.MorphWordForm wf = wff as Pullenti.Morph.MorphWordForm;
                                if (wf == null)
                                {
                                    continue;
                                }
                                if (cla.Value != 0)
                                {
                                    if (((wf.Class.Value & cla.Value)) == 0)
                                    {
                                        continue;
                                    }
                                }
                                if (!mc.IsUndefined)
                                {
                                    if (((wf.Case & mc)).IsUndefined)
                                    {
                                        continue;
                                    }
                                }
                                if (s == null || wf.NormalCase == tt.Term)
                                {
                                    s = wf.NormalCase;
                                }
                            }
                        }
                    }
                    if (s == null)
                    {
                        s = tt.Term;
                        if (tt.Chars.IsLastLower && tt.LengthChar > 2)
                        {
                            s = tt.GetSourceText();
                            for (int i = s.Length - 1; i >= 0; i--)
                            {
                                if (char.IsUpper(s[i]))
                                {
                                    s = s.Substring(0, i + 1);
                                    break;
                                }
                            }
                        }
                    }
                    if (prefix != null)
                    {
                        string delim = "-";
                        if (ignoreBracketsAndHiphens)
                        {
                            delim = " ";
                        }
                        s = string.Format("{0}{1}{2}", prefix, delim, s);
                    }
                    prefix = null;
                    if (res.Length > 0 && s.Length > 0)
                    {
                        if (char.IsLetterOrDigit(s[0]))
                        {
                            char ch0 = res[res.Length - 1];
                            if (ch0 == '-')
                            {
                            }
                            else
                            {
                                res.Append(' ');
                            }
                        }
                        else if (!ignoreBracketsAndHiphens && BracketHelper.CanBeStartOfSequence(tt, false, false))
                        {
                            res.Append(' ');
                        }
                    }
                    res.Append(s);
                }
                else if (t is Pullenti.Ner.NumberToken)
                {
                    if (res.Length > 0)
                    {
                        if (!t.IsWhitespaceBefore && res[res.Length - 1] == '-')
                        {
                        }
                        else
                        {
                            res.Append(' ');
                        }
                    }
                    Pullenti.Ner.NumberToken nt = t as Pullenti.Ner.NumberToken;
                    if ((t.Morph.Class.IsAdjective && nt.Typ == Pullenti.Ner.NumberSpellingType.Words && nt.BeginToken == nt.EndToken) && (nt.BeginToken is Pullenti.Ner.TextToken))
                    {
                        res.Append((nt.BeginToken as Pullenti.Ner.TextToken).Term);
                    }
                    else
                    {
                        res.Append(nt.Value);
                    }
                }
                else if (t is Pullenti.Ner.MetaToken)
                {
                    if ((ignoreGeoReferent && t != begin && t.GetReferent() != null) && t.GetReferent().TypeName == "GEO")
                    {
                        continue;
                    }
                    string s = GetNameEx((t as Pullenti.Ner.MetaToken).BeginToken, (t as Pullenti.Ner.MetaToken).EndToken, cla, mc, gender, ignoreBracketsAndHiphens, ignoreGeoReferent);
                    if (!string.IsNullOrEmpty(s))
                    {
                        if (res.Length > 0)
                        {
                            if (!t.IsWhitespaceBefore && res[res.Length - 1] == '-')
                            {
                            }
                            else
                            {
                                res.Append(' ');
                            }
                        }
                        res.Append(s);
                    }
                }
                if (t == end)
                {
                    break;
                }
            }
            if (res.Length == 0)
            {
                return(null);
            }
            return(res.ToString());
        }