예제 #1
0
        public static UriItemToken AttachISOContent(Pullenti.Ner.Token t0, string specChars)
        {
            Pullenti.Ner.Token t = t0;
            while (true)
            {
                if (t == null)
                {
                    return(null);
                }
                if (t.IsCharOf(":/\\") || t.IsHiphen || t.IsValue("IEC", null))
                {
                    t = t.Next;
                    continue;
                }
                break;
            }
            if (!(t is Pullenti.Ner.NumberToken))
            {
                return(null);
            }
            Pullenti.Ner.Token t1 = t;
            char          delim   = (char)0;
            StringBuilder txt     = new StringBuilder();

            for (; t != null; t = t.Next)
            {
                if (t.IsWhitespaceBefore && t != t1)
                {
                    break;
                }
                if (t is Pullenti.Ner.NumberToken)
                {
                    if (delim != ((char)0))
                    {
                        txt.Append(delim);
                    }
                    delim = (char)0;
                    t1    = t;
                    txt.Append(t.GetSourceText());
                    continue;
                }
                if (!(t is Pullenti.Ner.TextToken))
                {
                    break;
                }
                if (!t.IsCharOf(specChars))
                {
                    break;
                }
                delim = t.GetSourceText()[0];
            }
            if (txt.Length == 0)
            {
                return(null);
            }
            return(new UriItemToken(t0, t1)
            {
                Value = txt.ToString()
            });
        }
예제 #2
0
        public override Pullenti.Ner.ReferentToken ProcessOntologyItem(Pullenti.Ner.Token begin)
        {
            if (!(begin is Pullenti.Ner.TextToken))
            {
                return(null);
            }
            Pullenti.Ner.Measure.Internal.UnitToken ut = Pullenti.Ner.Measure.Internal.UnitToken.TryParse(begin, null, null, false);
            if (ut != null)
            {
                return(new Pullenti.Ner.ReferentToken(ut.CreateReferentWithRegister(null), ut.BeginToken, ut.EndToken));
            }
            UnitReferent u = new UnitReferent();

            u.AddSlot(UnitReferent.ATTR_NAME, begin.GetSourceText(), false, 0);
            return(new Pullenti.Ner.ReferentToken(u, begin, begin));
        }
예제 #3
0
        internal void AddValue(Pullenti.Ner.Token begin, Pullenti.Ner.Token end)
        {
            StringBuilder tmp = new StringBuilder();

            for (Pullenti.Ner.Token t = begin; t != null && t.Previous != end; t = t.Next)
            {
                if (t is Pullenti.Ner.NumberToken)
                {
                    tmp.Append(t.GetSourceText());
                    continue;
                }
                if (t is Pullenti.Ner.TextToken)
                {
                    string s = (t as Pullenti.Ner.TextToken).Term;
                    if (t.IsCharOf("-\\/"))
                    {
                        s = "-";
                    }
                    tmp.Append(s);
                }
            }
            for (int i = 0; i < tmp.Length; i++)
            {
                if (tmp[i] == '-' && i > 0 && ((i + 1) < tmp.Length))
                {
                    char ch0 = tmp[i - 1];
                    char ch1 = tmp[i + 1];
                    if (char.IsLetterOrDigit(ch0) && char.IsLetterOrDigit(ch1))
                    {
                        if (char.IsDigit(ch0) && !char.IsDigit(ch1))
                        {
                            tmp.Remove(i, 1);
                        }
                        else if (!char.IsDigit(ch0) && char.IsDigit(ch1))
                        {
                            tmp.Remove(i, 1);
                        }
                    }
                }
            }
            this.AddSlot(ATTR_VALUE, tmp.ToString(), false, 0);
            m_Names = null;
        }
예제 #4
0
        public static UnitToken TryParse(Pullenti.Ner.Token t, Pullenti.Ner.Core.TerminCollection addUnits, UnitToken prev, bool parseUnknownUnits = false)
        {
            if (t == null)
            {
                return(null);
            }
            Pullenti.Ner.Token t0 = t;
            int  pow   = 1;
            bool isNeg = false;

            if ((t.IsCharOf("\\/") || t.IsValue("НА", null) || t.IsValue("OF", null)) || t.IsValue("PER", null))
            {
                isNeg = true;
                t     = t.Next;
            }
            else if (t.IsValue("В", null) && prev != null)
            {
                isNeg = true;
                t     = t.Next;
            }
            else if (MeasureHelper.IsMultChar(t))
            {
                t = t.Next;
            }
            Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
            if (tt == null)
            {
                return(null);
            }
            if (tt.Term == "КВ" || tt.Term == "КВАДР" || tt.IsValue("КВАДРАТНЫЙ", null))
            {
                pow = 2;
                tt  = tt.Next as Pullenti.Ner.TextToken;
                if (tt != null && tt.IsChar('.'))
                {
                    tt = tt.Next as Pullenti.Ner.TextToken;
                }
                if (tt == null)
                {
                    return(null);
                }
            }
            else if (tt.Term == "КУБ" || tt.Term == "КУБИЧ" || tt.IsValue("КУБИЧЕСКИЙ", null))
            {
                pow = 3;
                tt  = tt.Next as Pullenti.Ner.TextToken;
                if (tt != null && tt.IsChar('.'))
                {
                    tt = tt.Next as Pullenti.Ner.TextToken;
                }
                if (tt == null)
                {
                    return(null);
                }
            }
            else if (tt.Term == "µ")
            {
                UnitToken res = TryParse(tt.Next, addUnits, prev, false);
                if (res != null)
                {
                    foreach (Unit u in UnitsHelper.Units)
                    {
                        if (u.Factor == UnitsFactors.Micro && string.Compare("мк" + u.NameCyr, res.Unit.NameCyr, true) == 0)
                        {
                            res.Unit       = u;
                            res.BeginToken = tt;
                            res.Pow        = pow;
                            if (isNeg)
                            {
                                res.Pow = -pow;
                            }
                            return(res);
                        }
                    }
                }
            }
            List <Pullenti.Ner.Core.TerminToken> toks = UnitsHelper.Termins.TryParseAll(tt, Pullenti.Ner.Core.TerminParseAttr.No);

            if (toks != null)
            {
                if ((prev != null && tt == t0 && toks.Count == 1) && t.IsWhitespaceBefore)
                {
                    return(null);
                }
                if (toks[0].BeginToken == toks[0].EndToken && tt.Morph.Class.IsPreposition && (tt.WhitespacesAfterCount < 3))
                {
                    if (Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition, 0, null) != null)
                    {
                        return(null);
                    }
                    if (tt.Next is Pullenti.Ner.NumberToken)
                    {
                        if ((tt.Next as Pullenti.Ner.NumberToken).Typ != Pullenti.Ner.NumberSpellingType.Digit)
                        {
                            return(null);
                        }
                    }
                    UnitToken nex = TryParse(tt.Next, addUnits, null, false);
                    if (nex != null)
                    {
                        return(null);
                    }
                }
                if (toks[0].BeginToken == toks[0].EndToken && ((toks[0].BeginToken.IsValue("М", null) || toks[0].BeginToken.IsValue("M", null))) && toks[0].BeginToken.Chars.IsAllLower)
                {
                    if (prev != null && prev.Unit != null && prev.Unit.Kind == Pullenti.Ner.Measure.MeasureKind.Length)
                    {
                        UnitToken res = new UnitToken(t0, toks[0].EndToken)
                        {
                            Unit = UnitsHelper.uMinute
                        };
                        res.Pow = pow;
                        if (isNeg)
                        {
                            res.Pow = -pow;
                        }
                        return(res);
                    }
                }
                List <UnitToken> uts = new List <UnitToken>();
                foreach (Pullenti.Ner.Core.TerminToken tok in toks)
                {
                    UnitToken res = new UnitToken(t0, tok.EndToken)
                    {
                        Unit = tok.Termin.Tag as Unit
                    };
                    res.Pow = pow;
                    if (isNeg)
                    {
                        res.Pow = -pow;
                    }
                    if (res.Unit.BaseMultiplier == 1000000 && (t0 is Pullenti.Ner.TextToken) && char.IsLower((t0 as Pullenti.Ner.TextToken).GetSourceText()[0]))
                    {
                        foreach (Unit u in UnitsHelper.Units)
                        {
                            if (u.Factor == UnitsFactors.Milli && string.Compare(u.NameCyr, res.Unit.NameCyr, true) == 0)
                            {
                                res.Unit = u;
                                break;
                            }
                        }
                    }
                    res._correct();
                    res._checkDoubt();
                    uts.Add(res);
                }
                int       max  = 0;
                UnitToken best = null;
                foreach (UnitToken ut in uts)
                {
                    if (ut.Keyword != null)
                    {
                        if (ut.Keyword.BeginChar >= max)
                        {
                            max  = ut.Keyword.BeginChar;
                            best = ut;
                        }
                    }
                }
                if (best != null)
                {
                    return(best);
                }
                foreach (UnitToken ut in uts)
                {
                    if (!ut.IsDoubt)
                    {
                        return(ut);
                    }
                }
                return(uts[0]);
            }
            Pullenti.Ner.Token t1 = null;
            if (t.IsCharOf("º°"))
            {
                t1 = t;
            }
            else if ((t.IsChar('<') && t.Next != null && t.Next.Next != null) && t.Next.Next.IsChar('>') && ((t.Next.IsValue("О", null) || t.Next.IsValue("O", null) || (((t.Next is Pullenti.Ner.NumberToken) && (t.Next as Pullenti.Ner.NumberToken).Value == "0")))))
            {
                t1 = t.Next.Next;
            }
            if (t1 != null)
            {
                UnitToken res = new UnitToken(t0, t1)
                {
                    Unit = UnitsHelper.uGradus
                };
                res._checkDoubt();
                t = t1.Next;
                if (t != null && t.IsComma)
                {
                    t = t.Next;
                }
                if (t != null && t.IsValue("ПО", null))
                {
                    t = t.Next;
                }
                if (t is Pullenti.Ner.TextToken)
                {
                    string vv = (t as Pullenti.Ner.TextToken).Term;
                    if (vv == "C" || vv == "С" || vv.StartsWith("ЦЕЛЬС"))
                    {
                        res.Unit     = UnitsHelper.uGradusC;
                        res.IsDoubt  = false;
                        res.EndToken = t;
                    }
                    if (vv == "F" || vv.StartsWith("ФАР"))
                    {
                        res.Unit     = UnitsHelper.uGradusF;
                        res.IsDoubt  = false;
                        res.EndToken = t;
                    }
                }
                return(res);
            }
            if ((t is Pullenti.Ner.TextToken) && ((t.IsValue("ОС", null) || t.IsValue("OC", null))))
            {
                string str = t.GetSourceText();
                if (str == "оС" || str == "oC")
                {
                    UnitToken res = new UnitToken(t, t)
                    {
                        Unit = UnitsHelper.uGradusC, IsDoubt = false
                    };
                    return(res);
                }
            }
            if (t.IsChar('%'))
            {
                Pullenti.Ner.Token tt1 = t.Next;
                if (tt1 != null && tt1.IsChar('('))
                {
                    tt1 = tt1.Next;
                }
                if ((tt1 is Pullenti.Ner.TextToken) && (tt1 as Pullenti.Ner.TextToken).Term.StartsWith("ОБ"))
                {
                    UnitToken re = new UnitToken(t, tt1)
                    {
                        Unit = UnitsHelper.uAlco
                    };
                    if (re.EndToken.Next != null && re.EndToken.Next.IsChar('.'))
                    {
                        re.EndToken = re.EndToken.Next;
                    }
                    if (re.EndToken.Next != null && re.EndToken.Next.IsChar(')') && t.Next.IsChar('('))
                    {
                        re.EndToken = re.EndToken.Next;
                    }
                    return(re);
                }
                return(new UnitToken(t, t)
                {
                    Unit = UnitsHelper.uPercent
                });
            }
            if (addUnits != null)
            {
                Pullenti.Ner.Core.TerminToken tok = addUnits.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
                if (tok != null)
                {
                    UnitToken res = new UnitToken(t0, tok.EndToken)
                    {
                        ExtOnto = tok.Termin.Tag as Pullenti.Ner.Measure.UnitReferent
                    };
                    if (tok.EndToken.Next != null && tok.EndToken.Next.IsChar('.'))
                    {
                        tok.EndToken = tok.EndToken.Next;
                    }
                    res.Pow = pow;
                    if (isNeg)
                    {
                        res.Pow = -pow;
                    }
                    res._correct();
                    return(res);
                }
            }
            if (!parseUnknownUnits)
            {
                return(null);
            }
            if ((t.WhitespacesBeforeCount > 2 || !t.Chars.IsLetter || t.LengthChar > 5) || !(t is Pullenti.Ner.TextToken))
            {
                return(null);
            }
            if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t))
            {
                return(null);
            }
            t1 = t;
            if (t.Next != null && t.Next.IsChar('.'))
            {
                t1 = t;
            }
            bool ok = false;

            if (t1.Next == null || t1.WhitespacesAfterCount > 2)
            {
                ok = true;
            }
            else if (t1.Next.IsComma || t1.Next.IsCharOf("\\/") || t1.Next.IsTableControlChar)
            {
                ok = true;
            }
            else if (MeasureHelper.IsMultChar(t1.Next))
            {
                ok = true;
            }
            if (!ok)
            {
                return(null);
            }
            Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
            if (mc.IsUndefined)
            {
            }
            else if (t.LengthChar > 7)
            {
                return(null);
            }
            UnitToken res1 = new UnitToken(t0, t1)
            {
                Pow = pow, IsDoubt = true
            };

            res1.UnknownName = (t as Pullenti.Ner.TextToken).GetSourceText();
            res1._correct();
            return(res1);
        }
예제 #5
0
 static PersonIdToken TryParse(Pullenti.Ner.Token t, PersonIdToken prev)
 {
     if (t.IsValue("СВИДЕТЕЛЬСТВО", null))
     {
         Pullenti.Ner.Token tt1 = t;
         bool ip  = false;
         bool reg = false;
         for (Pullenti.Ner.Token tt = t.Next; tt != null; tt = tt.Next)
         {
             if (tt.IsCommaAnd || tt.Morph.Class.IsPreposition)
             {
                 continue;
             }
             if (tt.IsValue("РЕГИСТРАЦИЯ", null) || tt.IsValue("РЕЕСТР", null) || tt.IsValue("ЗАРЕГИСТРИРОВАТЬ", null))
             {
                 reg = true;
                 tt1 = tt;
             }
             else if (tt.IsValue("ИНДИВИДУАЛЬНЫЙ", null) || tt.IsValue("ИП", null))
             {
                 ip  = true;
                 tt1 = tt;
             }
             else if ((tt.IsValue("ВНЕСЕНИЕ", null) || tt.IsValue("ГОСУДАРСТВЕННЫЙ", null) || tt.IsValue("ЕДИНЫЙ", null)) || tt.IsValue("ЗАПИСЬ", null) || tt.IsValue("ПРЕДПРИНИМАТЕЛЬ", null))
             {
                 tt1 = tt;
             }
             else if (tt.GetReferent() != null && tt.GetReferent().TypeName == "DATERANGE")
             {
                 tt1 = tt;
             }
             else
             {
                 break;
             }
         }
         if (reg && ip)
         {
             return new PersonIdToken(t, tt1)
                    {
                        Typ = Typs.Keyword, Value = "СВИДЕТЕЛЬСТВО О ГОСУДАРСТВЕННОЙ РЕГИСТРАЦИИ ФИЗИЧЕСКОГО ЛИЦА В КАЧЕСТВЕ ИНДИВИДУАЛЬНОГО ПРЕДПРИНИМАТЕЛЯ"
                    }
         }
         ;
     }
     Pullenti.Ner.Core.TerminToken tok = m_Ontology.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
     if (tok != null)
     {
         Typs          ty  = (Typs)tok.Termin.Tag;
         PersonIdToken res = new PersonIdToken(tok.BeginToken, tok.EndToken)
         {
             Typ = ty, Value = tok.Termin.CanonicText
         };
         if (prev == null)
         {
             if (ty != Typs.Keyword)
             {
                 return(null);
             }
             for (t = tok.EndToken.Next; t != null; t = t.Next)
             {
                 Pullenti.Ner.Referent r = t.GetReferent();
                 if (r != null && (r is Pullenti.Ner.Geo.GeoReferent))
                 {
                     res.Referent = r;
                     res.EndToken = t;
                     continue;
                 }
                 if (t.IsValue("ГРАЖДАНИН", null) && t.Next != null && (t.Next.GetReferent() is Pullenti.Ner.Geo.GeoReferent))
                 {
                     res.Referent = t.Next.GetReferent();
                     t            = (res.EndToken = t.Next);
                     continue;
                 }
                 if (r != null)
                 {
                     break;
                 }
                 PersonAttrToken ait = PersonAttrToken.TryAttach(t, null, PersonAttrToken.PersonAttrAttachAttrs.No);
                 if (ait != null)
                 {
                     if (ait.Referent != null)
                     {
                         foreach (Pullenti.Ner.Slot s in ait.Referent.Slots)
                         {
                             if (s.TypeName == Pullenti.Ner.Person.PersonPropertyReferent.ATTR_REF && (s.Value is Pullenti.Ner.Geo.GeoReferent))
                             {
                                 res.Referent = s.Value as Pullenti.Ner.Referent;
                             }
                         }
                     }
                     res.EndToken = ait.EndToken;
                     break;
                 }
                 if (t.IsValue("ДАННЫЙ", null))
                 {
                     res.EndToken = t;
                     continue;
                 }
                 break;
             }
             if ((res.Referent is Pullenti.Ner.Geo.GeoReferent) && !(res.Referent as Pullenti.Ner.Geo.GeoReferent).IsState)
             {
                 res.Referent = null;
             }
             return(res);
         }
         if (ty == Typs.Number)
         {
             StringBuilder      tmp = new StringBuilder();
             Pullenti.Ner.Token tt  = tok.EndToken.Next;
             if (tt != null && tt.IsChar(':'))
             {
                 tt = tt.Next;
             }
             for (; tt != null; tt = tt.Next)
             {
                 if (tt.IsNewlineBefore)
                 {
                     break;
                 }
                 if (!(tt is Pullenti.Ner.NumberToken))
                 {
                     break;
                 }
                 tmp.Append(tt.GetSourceText());
                 res.EndToken = tt;
             }
             if (tmp.Length < 1)
             {
                 return(null);
             }
             res.Value     = tmp.ToString();
             res.HasPrefix = true;
             return(res);
         }
         if (ty == Typs.Seria)
         {
             StringBuilder      tmp = new StringBuilder();
             Pullenti.Ner.Token tt  = tok.EndToken.Next;
             if (tt != null && tt.IsChar(':'))
             {
                 tt = tt.Next;
             }
             bool nextNum = false;
             for (; tt != null; tt = tt.Next)
             {
                 if (tt.IsNewlineBefore)
                 {
                     break;
                 }
                 if (Pullenti.Ner.Core.MiscHelper.CheckNumberPrefix(tt) != null)
                 {
                     nextNum = true;
                     break;
                 }
                 if (!(tt is Pullenti.Ner.NumberToken))
                 {
                     if (!(tt is Pullenti.Ner.TextToken))
                     {
                         break;
                     }
                     if (!tt.Chars.IsAllUpper)
                     {
                         break;
                     }
                     Pullenti.Ner.NumberToken nu = Pullenti.Ner.Core.NumberHelper.TryParseRoman(tt);
                     if (nu != null)
                     {
                         tmp.Append(nu.GetSourceText());
                         tt = nu.EndToken;
                     }
                     else if (tt.LengthChar != 2)
                     {
                         break;
                     }
                     else
                     {
                         tmp.Append((tt as Pullenti.Ner.TextToken).Term);
                         res.EndToken = tt;
                     }
                     if (tt.Next != null && tt.Next.IsHiphen)
                     {
                         tt = tt.Next;
                     }
                     continue;
                 }
                 if (tmp.Length >= 4)
                 {
                     break;
                 }
                 tmp.Append(tt.GetSourceText());
                 res.EndToken = tt;
             }
             if (tmp.Length < 4)
             {
                 if (tmp.Length < 2)
                 {
                     return(null);
                 }
                 Pullenti.Ner.Token tt1 = res.EndToken.Next;
                 if (tt1 != null && tt1.IsComma)
                 {
                     tt1 = tt1.Next;
                 }
                 PersonIdToken next = TryParse(tt1, res);
                 if (next != null && next.Typ == Typs.Number)
                 {
                 }
                 else
                 {
                     return(null);
                 }
             }
             res.Value     = tmp.ToString();
             res.HasPrefix = true;
             return(res);
         }
         if (ty == Typs.Code)
         {
             for (Pullenti.Ner.Token tt = res.EndToken.Next; tt != null; tt = tt.Next)
             {
                 if (tt.IsCharOf(":") || tt.IsHiphen)
                 {
                     continue;
                 }
                 if (tt is Pullenti.Ner.NumberToken)
                 {
                     res.EndToken = tt;
                     continue;
                 }
                 break;
             }
         }
         if (ty == Typs.Address)
         {
             if (t.GetReferent() is Pullenti.Ner.Address.AddressReferent)
             {
                 res.Referent = t.GetReferent();
                 res.EndToken = t;
                 return(res);
             }
             for (Pullenti.Ner.Token tt = res.EndToken.Next; tt != null; tt = tt.Next)
             {
                 if (tt.IsCharOf(":") || tt.IsHiphen || tt.Morph.Class.IsPreposition)
                 {
                     continue;
                 }
                 if (tt.GetReferent() is Pullenti.Ner.Address.AddressReferent)
                 {
                     res.Referent = tt.GetReferent();
                     res.EndToken = tt;
                 }
                 break;
             }
             if (res.Referent == null)
             {
                 return(null);
             }
         }
         return(res);
     }
     else if (prev == null)
     {
         return(null);
     }
     Pullenti.Ner.Token t0 = t;
     Pullenti.Ner.Token t1 = Pullenti.Ner.Core.MiscHelper.CheckNumberPrefix(t0);
     if (t1 != null)
     {
         t = t1;
     }
     if (t is Pullenti.Ner.NumberToken)
     {
         StringBuilder tmp = new StringBuilder();
         PersonIdToken res = new PersonIdToken(t0, t)
         {
             Typ = Typs.Number
         };
         for (Pullenti.Ner.Token tt = t; tt != null; tt = tt.Next)
         {
             if (tt.IsNewlineBefore || !(tt is Pullenti.Ner.NumberToken))
             {
                 break;
             }
             tmp.Append(tt.GetSourceText());
             res.EndToken = tt;
         }
         if (tmp.Length < 4)
         {
             if (tmp.Length < 2)
             {
                 return(null);
             }
             if (prev == null || prev.Typ != Typs.Keyword)
             {
                 return(null);
             }
             PersonIdToken ne = TryParse(res.EndToken.Next, prev);
             if (ne != null && ne.Typ == Typs.Number)
             {
                 res.Typ = Typs.Seria;
             }
             else
             {
                 return(null);
             }
         }
         res.Value = tmp.ToString();
         if (t0 != t)
         {
             res.HasPrefix = true;
         }
         return(res);
     }
     if (t is Pullenti.Ner.ReferentToken)
     {
         Pullenti.Ner.Referent r = t.GetReferent();
         if (r != null)
         {
             if (r.TypeName == "DATE")
             {
                 return new PersonIdToken(t, t)
                        {
                            Typ = Typs.Date, Referent = r
                        }
             }
             ;
             if (r.TypeName == "ORGANIZATION")
             {
                 return new PersonIdToken(t, t)
                        {
                            Typ = Typs.Org, Referent = r
                        }
             }
             ;
             if (r.TypeName == "ADDRESS")
             {
                 return new PersonIdToken(t, t)
                        {
                            Typ = Typs.Address, Referent = r
                        }
             }
             ;
         }
     }
     if ((prev != null && prev.Typ == Typs.Keyword && (t is Pullenti.Ner.TextToken)) && !t.Chars.IsAllLower && t.Chars.IsLetter)
     {
         PersonIdToken rr = TryParse(t.Next, prev);
         if (rr != null && rr.Typ == Typs.Number)
         {
             return new PersonIdToken(t, t)
                    {
                        Typ = Typs.Seria, Value = (t as Pullenti.Ner.TextToken).Term
                    }
         }
         ;
     }
     if ((t != null && t.IsValue("ОТ", "ВІД") && (t.Next is Pullenti.Ner.ReferentToken)) && t.Next.GetReferent().TypeName == "DATE")
     {
         return new PersonIdToken(t, t.Next)
                {
                    Typ = Typs.Date, Referent = t.Next.GetReferent()
                }
     }
     ;
     return(null);
 }
예제 #6
0
 public static Pullenti.Semantic.SemObject CreateNounGroup(Pullenti.Semantic.SemGraph gr, Pullenti.Ner.Core.NounPhraseToken npt)
 {
     Pullenti.Ner.Token          noun = npt.Noun.BeginToken;
     Pullenti.Semantic.SemObject sem  = new Pullenti.Semantic.SemObject(gr);
     sem.Tokens.Add(npt.Noun);
     sem.Typ = Pullenti.Semantic.SemObjectType.Noun;
     if (npt.Noun.Morph.Class.IsPersonalPronoun)
     {
         sem.Typ = Pullenti.Semantic.SemObjectType.PersonalPronoun;
     }
     else if (npt.Noun.Morph.Class.IsPronoun)
     {
         sem.Typ = Pullenti.Semantic.SemObjectType.Pronoun;
     }
     if (npt.Noun.BeginToken != npt.Noun.EndToken)
     {
         sem.Morph.NormalCase = npt.Noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false);
         sem.Morph.NormalFull = npt.Noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false);
         sem.Morph.Class      = Pullenti.Morph.MorphClass.Noun;
         sem.Morph.Number     = npt.Morph.Number;
         sem.Morph.Gender     = npt.Morph.Gender;
         sem.Morph.Case       = npt.Morph.Case;
     }
     else if (noun is Pullenti.Ner.TextToken)
     {
         foreach (Pullenti.Morph.MorphBaseInfo wf in noun.Morph.Items)
         {
             if (wf.CheckAccord(npt.Morph, false, false) && (wf is Pullenti.Morph.MorphWordForm))
             {
                 _setMorph(sem, wf as Pullenti.Morph.MorphWordForm);
                 break;
             }
         }
         if (sem.Morph.NormalCase == null)
         {
             sem.Morph.NormalCase = noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false);
             sem.Morph.NormalFull = noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false);
         }
         List <Pullenti.Semantic.Utils.DerivateGroup> grs = Pullenti.Semantic.Utils.DerivateService.FindDerivates(sem.Morph.NormalFull, true, null);
         if (grs != null && grs.Count > 0)
         {
             sem.Concept = grs[0];
         }
     }
     else if (noun is Pullenti.Ner.ReferentToken)
     {
         Pullenti.Ner.Referent r = (noun as Pullenti.Ner.ReferentToken).Referent;
         if (r == null)
         {
             return(null);
         }
         sem.Morph.NormalFull = (sem.Morph.NormalCase = r.ToString());
         sem.Concept          = r;
     }
     else if (noun is Pullenti.Ner.NumberToken)
     {
         Pullenti.Ner.NumberToken num = noun as Pullenti.Ner.NumberToken;
         sem.Morph.Gender = noun.Morph.Gender;
         sem.Morph.Number = noun.Morph.Number;
         if (num.IntValue != null)
         {
             sem.Morph.NormalCase = Pullenti.Ner.Core.NumberHelper.GetNumberAdjective(num.IntValue.Value, noun.Morph.Gender, noun.Morph.Number);
             sem.Morph.NormalFull = Pullenti.Ner.Core.NumberHelper.GetNumberAdjective(num.IntValue.Value, Pullenti.Morph.MorphGender.Masculine, Pullenti.Morph.MorphNumber.Singular);
         }
         else
         {
             sem.Morph.NormalFull = (sem.Morph.NormalCase = noun.GetSourceText().ToUpper());
         }
     }
     noun.Tag = sem;
     if (npt.Adjectives.Count > 0)
     {
         foreach (Pullenti.Ner.MetaToken a in npt.Adjectives)
         {
             if (npt.MultiNouns && a != npt.Adjectives[0])
             {
                 break;
             }
             Pullenti.Semantic.SemObject asem = CreateNptAdj(gr, npt, a);
             if (asem != null)
             {
                 gr.AddLink(Pullenti.Semantic.SemLinkType.Detail, sem, asem, "какой", false, null);
             }
         }
     }
     if (npt.InternalNoun != null)
     {
         Pullenti.Semantic.SemObject intsem = CreateNounGroup(gr, npt.InternalNoun);
         if (intsem != null)
         {
             gr.AddLink(Pullenti.Semantic.SemLinkType.Detail, sem, intsem, null, false, null);
         }
     }
     gr.Objects.Add(sem);
     return(sem);
 }
예제 #7
0
 void CorrectWordsByMorph(Pullenti.Morph.MorphLang lang)
 {
     for (Pullenti.Ner.Token tt = FirstToken; tt != null; tt = tt.Next)
     {
         if (!(tt is Pullenti.Ner.TextToken))
         {
             continue;
         }
         if (tt.Morph.ContainsAttr("прдктв.", null))
         {
             continue;
         }
         Pullenti.Morph.MorphClass dd = tt.GetMorphClassInDictionary();
         if (!dd.IsUndefined || (tt.LengthChar < 4))
         {
             continue;
         }
         if (tt.Morph.Class.IsProperSurname && !tt.Chars.IsAllLower)
         {
             continue;
         }
         if (tt.Chars.IsAllUpper)
         {
             continue;
         }
         string corw = Pullenti.Morph.MorphologyService.CorrectWord((tt as Pullenti.Ner.TextToken).Term, (tt.Morph.Language.IsUndefined ? lang : tt.Morph.Language));
         if (corw == null)
         {
             continue;
         }
         List <Pullenti.Morph.MorphToken> ccc = Pullenti.Morph.MorphologyService.Process(corw, lang, null);
         if (ccc == null || ccc.Count != 1)
         {
             continue;
         }
         Pullenti.Ner.TextToken tt1 = new Pullenti.Ner.TextToken(ccc[0], this, tt.BeginChar, tt.EndChar)
         {
             Chars = tt.Chars, Term0 = (tt as Pullenti.Ner.TextToken).Term
         };
         Pullenti.Morph.MorphClass mc = tt1.GetMorphClassInDictionary();
         if (mc.IsProperSurname)
         {
             continue;
         }
         if (tt == FirstToken)
         {
             FirstToken = tt1;
         }
         else
         {
             tt.Previous.Next = tt1;
         }
         tt1.Next = tt.Next;
         tt       = tt1;
         if (CorrectedTokens == null)
         {
             CorrectedTokens = new Dictionary <Pullenti.Ner.Token, string>();
         }
         CorrectedTokens.Add(tt, tt.GetSourceText());
     }
 }
예제 #8
0
        public static PhoneItemToken TryAttachAdditional(Pullenti.Ner.Token t0)
        {
            Pullenti.Ner.Token t = t0;
            if (t == null)
            {
                return(null);
            }
            if (t.IsChar(','))
            {
                t = t.Next;
            }
            else if (t.IsCharOf("*#") && (t.Next is Pullenti.Ner.NumberToken))
            {
                string             val0 = (t.Next as Pullenti.Ner.NumberToken).GetSourceText();
                Pullenti.Ner.Token t1   = t.Next;
                if ((t1.Next != null && t1.Next.IsHiphen && !t1.IsWhitespaceAfter) && (t1.Next.Next is Pullenti.Ner.NumberToken) && !t1.Next.IsWhitespaceAfter)
                {
                    t1    = t1.Next.Next;
                    val0 += t1.GetSourceText();
                }
                if (val0.Length >= 3 && (val0.Length < 7))
                {
                    return new PhoneItemToken(t, t1)
                           {
                               ItemType = PhoneItemType.AddNumber, Value = val0
                           }
                }
                ;
            }
            bool br = false;

            if (t != null && t.IsChar('('))
            {
                if (t.Previous != null && t.Previous.IsComma)
                {
                    return(null);
                }
                br = true;
                t  = t.Next;
            }
            Pullenti.Ner.Core.TerminToken to = m_PhoneTermins.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
            if (to == null)
            {
                if (!br)
                {
                    return(null);
                }
                if (t0.WhitespacesBeforeCount > 1)
                {
                    return(null);
                }
            }
            else if (to.Termin.Tag == null)
            {
                return(null);
            }
            else
            {
                t = to.EndToken.Next;
            }
            if (t == null)
            {
                return(null);
            }
            if (((t.IsValue("НОМЕР", null) || t.IsValue("N", null) || t.IsValue("#", null)) || t.IsValue("№", null) || t.IsValue("NUMBER", null)) || ((t.IsChar('+') && br)))
            {
                t = t.Next;
            }
            else if (to == null && !br)
            {
                return(null);
            }
            else if (t.IsValue("НОМ", null) || t.IsValue("ТЕЛ", null))
            {
                t = t.Next;
                if (t != null && t.IsChar('.'))
                {
                    t = t.Next;
                }
            }
            if (t != null && t.IsCharOf(":,") && !t.IsNewlineAfter)
            {
                t = t.Next;
            }
            if (!(t is Pullenti.Ner.NumberToken))
            {
                return(null);
            }
            string val = (t as Pullenti.Ner.NumberToken).GetSourceText();

            if ((t.Next != null && t.Next.IsHiphen && !t.IsWhitespaceAfter) && (t.Next.Next is Pullenti.Ner.NumberToken))
            {
                val += t.Next.Next.GetSourceText();
                t    = t.Next.Next;
            }
            if ((val.Length < 2) || val.Length > 7)
            {
                return(null);
            }
            if (br)
            {
                if (t.Next == null || !t.Next.IsChar(')'))
                {
                    return(null);
                }
                t = t.Next;
            }
            PhoneItemToken res = new PhoneItemToken(t0, t)
            {
                ItemType = PhoneItemType.AddNumber, Value = val
            };

            return(res);
        }
예제 #9
0
        internal static Pullenti.Ner.ReferentToken CreateReferentToken(Pullenti.Ner.Person.PersonReferent p, Pullenti.Ner.Token begin, Pullenti.Ner.Token end, Pullenti.Ner.MorphCollection morph, List <PersonAttrToken> attrs, Pullenti.Ner.Person.PersonAnalyzer.PersonAnalyzerData ad, bool forAttribute, bool afterBePredicate)
        {
            if (p == null)
            {
                return(null);
            }
            bool hasPrefix = false;

            if (attrs != null)
            {
                foreach (PersonAttrToken a in attrs)
                {
                    if (a.Typ == PersonAttrTerminType.BestRegards)
                    {
                        hasPrefix = true;
                    }
                    else
                    {
                        if (a.BeginChar < begin.BeginChar)
                        {
                            begin = a.BeginToken;
                            if ((a.EndToken.Next != null && a.EndToken.Next.IsChar(')') && begin.Previous != null) && begin.Previous.IsChar('('))
                            {
                                begin = begin.Previous;
                            }
                        }
                        if (a.Typ != PersonAttrTerminType.Prefix)
                        {
                            if (a.Age != null)
                            {
                                p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_AGE, a.Age, false, 0);
                            }
                            if (a.PropRef == null)
                            {
                                p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, a.Value, false, 0);
                            }
                            else
                            {
                                p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, a, false, 0);
                            }
                        }
                        else if (a.Gender == Pullenti.Morph.MorphGender.Feminie && !p.IsFemale)
                        {
                            p.IsFemale = true;
                        }
                        else if (a.Gender == Pullenti.Morph.MorphGender.Masculine && !p.IsMale)
                        {
                            p.IsMale = true;
                        }
                    }
                }
            }
            else if ((begin.Previous is Pullenti.Ner.TextToken) && (begin.WhitespacesBeforeCount < 3))
            {
                if ((begin.Previous as Pullenti.Ner.TextToken).Term == "ИП")
                {
                    PersonAttrToken a = new PersonAttrToken(begin.Previous, begin.Previous);
                    a.PropRef      = new Pullenti.Ner.Person.PersonPropertyReferent();
                    a.PropRef.Name = "индивидуальный предприниматель";
                    p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, a, false, 0);
                    begin = begin.Previous;
                }
            }
            Pullenti.Ner.MorphCollection m0 = new Pullenti.Ner.MorphCollection();
            foreach (Pullenti.Morph.MorphBaseInfo it in morph.Items)
            {
                Pullenti.Morph.MorphBaseInfo bi = new Pullenti.Morph.MorphBaseInfo();
                bi.CopyFrom(it);
                bi.Number = Pullenti.Morph.MorphNumber.Singular;
                if (bi.Gender == Pullenti.Morph.MorphGender.Undefined)
                {
                    if (p.IsMale && !p.IsFemale)
                    {
                        bi.Gender = Pullenti.Morph.MorphGender.Masculine;
                    }
                    if (!p.IsMale && p.IsFemale)
                    {
                        bi.Gender = Pullenti.Morph.MorphGender.Feminie;
                    }
                }
                m0.AddItem(bi);
            }
            morph = m0;
            if ((attrs != null && attrs.Count > 0 && !attrs[0].Morph.Case.IsUndefined) && morph.Case.IsUndefined)
            {
                morph.Case = attrs[0].Morph.Case;
                if (attrs[0].Morph.Number == Pullenti.Morph.MorphNumber.Singular)
                {
                    morph.Number = Pullenti.Morph.MorphNumber.Singular;
                }
                if (p.IsMale && !p.IsFemale)
                {
                    morph.Gender = Pullenti.Morph.MorphGender.Masculine;
                }
                else if (p.IsFemale)
                {
                    morph.Gender = Pullenti.Morph.MorphGender.Feminie;
                }
            }
            if (begin.Previous != null)
            {
                Pullenti.Ner.Token ttt = begin.Previous;
                if (ttt.IsValue("ИМЕНИ", "ІМЕНІ"))
                {
                    forAttribute = true;
                }
                else
                {
                    if (ttt.IsChar('.') && ttt.Previous != null)
                    {
                        ttt = ttt.Previous;
                    }
                    if (ttt.WhitespacesAfterCount < 3)
                    {
                        if (ttt.IsValue("ИМ", "ІМ"))
                        {
                            forAttribute = true;
                        }
                    }
                }
            }
            if (forAttribute)
            {
                return new Pullenti.Ner.ReferentToken(p, begin, end)
                       {
                           Morph = morph, MiscAttrs = (int)p.m_PersonIdentityTyp
                       }
            }
            ;
            if ((begin.Previous != null && begin.Previous.IsCommaAnd && (begin.Previous.Previous is Pullenti.Ner.ReferentToken)) && (begin.Previous.Previous.GetReferent() is Pullenti.Ner.Person.PersonReferent))
            {
                Pullenti.Ner.ReferentToken rt00 = begin.Previous.Previous as Pullenti.Ner.ReferentToken;

                for (Pullenti.Ner.Token ttt = (Pullenti.Ner.Token)rt00; ttt != null;)
                {
                    if (ttt.Previous == null || !(ttt.Previous.Previous is Pullenti.Ner.ReferentToken))
                    {
                        break;
                    }
                    if (!ttt.Previous.IsCommaAnd || !(ttt.Previous.Previous.GetReferent() is Pullenti.Ner.Person.PersonReferent))
                    {
                        break;
                    }
                    rt00 = ttt.Previous.Previous as Pullenti.Ner.ReferentToken;
                    ttt  = rt00;
                }
                if (rt00.BeginToken.GetReferent() is Pullenti.Ner.Person.PersonPropertyReferent)
                {
                    bool ok = false;
                    if ((rt00.BeginToken as Pullenti.Ner.ReferentToken).EndToken.Next != null && (rt00.BeginToken as Pullenti.Ner.ReferentToken).EndToken.Next.IsChar(':'))
                    {
                        ok = true;
                    }
                    else if (rt00.BeginToken.Morph.Number == Pullenti.Morph.MorphNumber.Plural)
                    {
                        ok = true;
                    }
                    if (ok)
                    {
                        p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, rt00.BeginToken.GetReferent(), false, 0);
                    }
                }
            }
            if (ad != null)
            {
                if (ad.OverflowLevel > 10)
                {
                    return new Pullenti.Ner.ReferentToken(p, begin, end)
                           {
                               Morph = morph, MiscAttrs = (int)p.m_PersonIdentityTyp
                           }
                }
                ;
                ad.OverflowLevel++;
            }
            List <PersonAttrToken> attrs1 = null;
            bool hasPosition = false;
            bool openBr      = false;

            for (Pullenti.Ner.Token t = end.Next; t != null; t = t.Next)
            {
                if (t.IsTableControlChar)
                {
                    break;
                }
                if (t.IsNewlineBefore)
                {
                    if (t.NewlinesBeforeCount > 2)
                    {
                        break;
                    }
                    if (attrs1 != null && attrs1.Count > 0)
                    {
                        break;
                    }
                    Pullenti.Ner.Mail.Internal.MailLine ml = Pullenti.Ner.Mail.Internal.MailLine.Parse(t, 0, 0);
                    if (ml != null && ml.Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From)
                    {
                        break;
                    }
                    if (t.Chars.IsCapitalUpper)
                    {
                        PersonAttrToken attr1 = PersonAttrToken.TryAttach(t, (ad == null ? null : ad.LocalOntology), PersonAttrToken.PersonAttrAttachAttrs.No);
                        bool            ok1   = false;
                        if (attr1 != null)
                        {
                            if (hasPrefix || attr1.IsNewlineAfter || ((attr1.EndToken.Next != null && attr1.EndToken.Next.IsTableControlChar)))
                            {
                                ok1 = true;
                            }
                            else
                            {
                                for (Pullenti.Ner.Token tt2 = t.Next; tt2 != null && tt2.EndChar <= attr1.EndChar; tt2 = tt2.Next)
                                {
                                    if (tt2.IsWhitespaceBefore)
                                    {
                                        ok1 = true;
                                    }
                                }
                            }
                        }
                        else
                        {
                            Pullenti.Ner.Token ttt = CorrectTailAttributes(p, t);
                            if (ttt != null && ttt != t)
                            {
                                end = (t = ttt);
                                continue;
                            }
                        }
                        if (!ok1)
                        {
                            break;
                        }
                    }
                }
                if (t.IsHiphen || t.IsCharOf("_>|"))
                {
                    continue;
                }
                if (t.IsValue("МОДЕЛЬ", null))
                {
                    break;
                }
                Pullenti.Ner.Token tt = CorrectTailAttributes(p, t);
                if (tt != t && tt != null)
                {
                    end = (t = tt);
                    continue;
                }
                bool isBe = false;
                if (t.IsChar('(') && t == end.Next)
                {
                    openBr = true;
                    t      = t.Next;
                    if (t == null)
                    {
                        break;
                    }
                    PersonItemToken pit1 = PersonItemToken.TryAttach(t, null, PersonItemToken.ParseAttr.No, null);
                    if ((pit1 != null && t.Chars.IsCapitalUpper && pit1.EndToken.Next != null) && (t is Pullenti.Ner.TextToken) && pit1.EndToken.Next.IsChar(')'))
                    {
                        if (pit1.Lastname != null)
                        {
                            Pullenti.Morph.MorphBaseInfo inf = new Pullenti.Morph.MorphBaseInfo()
                            {
                                Case = Pullenti.Morph.MorphCase.Nominative
                            };
                            if (p.IsMale)
                            {
                                inf.Gender |= Pullenti.Morph.MorphGender.Masculine;
                            }
                            if (p.IsFemale)
                            {
                                inf.Gender |= Pullenti.Morph.MorphGender.Feminie;
                            }
                            PersonMorphCollection sur = PersonIdentityToken.CreateLastname(pit1, inf);
                            if (sur != null)
                            {
                                p.AddFioIdentity(sur, null, null);
                                end = (t = pit1.EndToken.Next);
                                continue;
                            }
                        }
                    }
                    if ((t is Pullenti.Ner.TextToken) && t.Chars.IsLatinLetter)
                    {
                        List <PersonItemToken> pits = PersonItemToken.TryAttachList(t, null, PersonItemToken.ParseAttr.CanBeLatin, 10);
                        if (((pits != null && pits.Count >= 2 && pits.Count <= 3) && pits[0].Chars.IsLatinLetter && pits[1].Chars.IsLatinLetter) && pits[pits.Count - 1].EndToken.Next != null && pits[pits.Count - 1].EndToken.Next.IsChar(')'))
                        {
                            Pullenti.Ner.Person.PersonReferent pr2 = new Pullenti.Ner.Person.PersonReferent();
                            int cou = 0;
                            foreach (PersonItemToken pi in pits)
                            {
                                foreach (Pullenti.Ner.Slot si in p.Slots)
                                {
                                    if (si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_FIRSTNAME || si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_MIDDLENAME || si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_LASTNAME)
                                    {
                                        if (Pullenti.Ner.Core.MiscHelper.CanBeEqualCyrAndLatSS(si.Value.ToString(), pi.Value))
                                        {
                                            cou++;
                                            pr2.AddSlot(si.TypeName, pi.Value, false, 0);
                                            break;
                                        }
                                    }
                                }
                            }
                            if (cou == pits.Count)
                            {
                                foreach (Pullenti.Ner.Slot si in pr2.Slots)
                                {
                                    p.AddSlot(si.TypeName, si.Value, false, 0);
                                }
                                end = (t = pits[pits.Count - 1].EndToken.Next);
                                continue;
                            }
                        }
                    }
                }
                else if (t.IsComma)
                {
                    t = t.Next;
                    if ((t is Pullenti.Ner.TextToken) && (t as Pullenti.Ner.TextToken).IsValue("WHO", null))
                    {
                        continue;
                    }
                    if ((t is Pullenti.Ner.TextToken) && t.Chars.IsLatinLetter)
                    {
                        List <PersonItemToken> pits = PersonItemToken.TryAttachList(t, null, PersonItemToken.ParseAttr.CanBeLatin, 10);
                        if ((pits != null && pits.Count >= 2 && pits.Count <= 3) && pits[0].Chars.IsLatinLetter && pits[1].Chars.IsLatinLetter)
                        {
                            Pullenti.Ner.Person.PersonReferent pr2 = new Pullenti.Ner.Person.PersonReferent();
                            int cou = 0;
                            foreach (PersonItemToken pi in pits)
                            {
                                foreach (Pullenti.Ner.Slot si in p.Slots)
                                {
                                    if (si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_FIRSTNAME || si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_MIDDLENAME || si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_LASTNAME)
                                    {
                                        if (Pullenti.Ner.Core.MiscHelper.CanBeEqualCyrAndLatSS(si.Value.ToString(), pi.Value))
                                        {
                                            cou++;
                                            pr2.AddSlot(si.TypeName, pi.Value, false, 0);
                                            break;
                                        }
                                    }
                                }
                            }
                            if (cou == pits.Count)
                            {
                                foreach (Pullenti.Ner.Slot si in pr2.Slots)
                                {
                                    p.AddSlot(si.TypeName, si.Value, false, 0);
                                }
                                end = (t = pits[pits.Count - 1].EndToken);
                                continue;
                            }
                        }
                    }
                }
                else if ((t is Pullenti.Ner.TextToken) && (t as Pullenti.Ner.TextToken).IsVerbBe)
                {
                    t = t.Next;
                }
                else if (t.IsAnd && t.IsWhitespaceAfter && !t.IsNewlineAfter)
                {
                    if (t == end.Next)
                    {
                        break;
                    }
                    t = t.Next;
                }
                else if (t.IsHiphen && t == end.Next)
                {
                    t = t.Next;
                }
                else if (t.IsChar('.') && t == end.Next && hasPrefix)
                {
                    t = t.Next;
                }
                Pullenti.Ner.Token ttt2 = CreateNickname(p, t);
                if (ttt2 != null)
                {
                    t = (end = ttt2);
                    continue;
                }
                if (t == null)
                {
                    break;
                }
                PersonAttrToken attr = null;
                attr = PersonAttrToken.TryAttach(t, (ad == null ? null : ad.LocalOntology), PersonAttrToken.PersonAttrAttachAttrs.No);
                if (attr == null)
                {
                    if ((t != null && t.GetReferent() != null && t.GetReferent().TypeName == "GEO") && attrs1 != null && openBr)
                    {
                        continue;
                    }
                    if ((t.Chars.IsCapitalUpper && openBr && t.Next != null) && t.Next.IsChar(')'))
                    {
                        if (p.FindSlot(Pullenti.Ner.Person.PersonReferent.ATTR_LASTNAME, null, true) == null)
                        {
                            p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_LASTNAME, t.GetSourceText().ToUpper(), false, 0);
                            t   = t.Next;
                            end = t;
                        }
                    }
                    if (t != null && t.IsValue("КОТОРЫЙ", null) && t.Morph.Number == Pullenti.Morph.MorphNumber.Singular)
                    {
                        if (!p.IsFemale && t.Morph.Gender == Pullenti.Morph.MorphGender.Feminie)
                        {
                            p.IsFemale = true;
                            p.CorrectData();
                        }
                        else if (!p.IsMale && t.Morph.Gender == Pullenti.Morph.MorphGender.Masculine)
                        {
                            p.IsMale = true;
                            p.CorrectData();
                        }
                    }
                    break;
                }
                if (attr.Morph.Number == Pullenti.Morph.MorphNumber.Plural)
                {
                    break;
                }
                if (attr.Typ == PersonAttrTerminType.BestRegards)
                {
                    break;
                }
                if (attr.IsDoubt)
                {
                    if (hasPrefix)
                    {
                    }
                    else if (t.IsNewlineBefore && attr.IsNewlineAfter)
                    {
                    }
                    else if (t.Previous != null && ((t.Previous.IsHiphen || t.Previous.IsChar(':'))))
                    {
                    }
                    else
                    {
                        break;
                    }
                }
                if (!morph.Case.IsUndefined && !attr.Morph.Case.IsUndefined)
                {
                    if (((morph.Case & attr.Morph.Case)).IsUndefined && !isBe)
                    {
                        break;
                    }
                }
                if (openBr)
                {
                    if (Pullenti.Ner.Person.PersonAnalyzer.TryAttachPerson(t, ad, false, 0, true) != null)
                    {
                        break;
                    }
                }
                if (attrs1 == null)
                {
                    if (t.Previous.IsComma && t.Previous == end.Next)
                    {
                        Pullenti.Ner.Token ttt = attr.EndToken.Next;
                        if (ttt != null)
                        {
                            if (ttt.Morph.Class.IsVerb)
                            {
                                if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(begin))
                                {
                                }
                                else
                                {
                                    break;
                                }
                            }
                        }
                    }
                    attrs1 = new List <PersonAttrToken>();
                }
                attrs1.Add(attr);
                if (attr.Typ == PersonAttrTerminType.Position || attr.Typ == PersonAttrTerminType.King)
                {
                    if (!isBe)
                    {
                        hasPosition = true;
                    }
                }
                else if (attr.Typ != PersonAttrTerminType.Prefix)
                {
                    if (attr.Typ == PersonAttrTerminType.Other && attr.Age != null)
                    {
                    }
                    else
                    {
                        attrs1 = null;
                        break;
                    }
                }
                t = attr.EndToken;
            }
            if (attrs1 != null && hasPosition && attrs != null)
            {
                Pullenti.Ner.Token te1 = attrs[attrs.Count - 1].EndToken.Next;
                Pullenti.Ner.Token te2 = attrs1[0].BeginToken;
                if (te1.WhitespacesAfterCount > te2.WhitespacesBeforeCount && (te2.WhitespacesBeforeCount < 2))
                {
                }
                else if (attrs1[0].Age != null)
                {
                }
                else if (((te1.IsHiphen || te1.IsChar(':'))) && !attrs1[0].IsNewlineBefore && ((te2.Previous.IsComma || te2.Previous == end)))
                {
                }
                else
                {
                    foreach (PersonAttrToken a in attrs)
                    {
                        if (a.Typ == PersonAttrTerminType.Position)
                        {
                            Pullenti.Ner.Token te = attrs1[attrs1.Count - 1].EndToken;
                            if (te.Next != null)
                            {
                                if (!te.Next.IsChar('.'))
                                {
                                    attrs1 = null;
                                    break;
                                }
                            }
                        }
                    }
                }
            }
            if (attrs1 != null && !hasPrefix)
            {
                PersonAttrToken attr = attrs1[attrs1.Count - 1];
                bool            ok   = false;
                if (attr.EndToken.Next != null && attr.EndToken.Next.Chars.IsCapitalUpper)
                {
                    ok = true;
                }
                else
                {
                    Pullenti.Ner.ReferentToken rt = Pullenti.Ner.Person.PersonAnalyzer.TryAttachPerson(attr.BeginToken, ad, false, -1, false);
                    if (rt != null && (rt.Referent is Pullenti.Ner.Person.PersonReferent))
                    {
                        ok = true;
                    }
                }
                if (ok)
                {
                    if (attr.BeginToken.WhitespacesBeforeCount > attr.EndToken.WhitespacesAfterCount)
                    {
                        attrs1 = null;
                    }
                    else if (attr.BeginToken.WhitespacesBeforeCount == attr.EndToken.WhitespacesAfterCount)
                    {
                        Pullenti.Ner.ReferentToken rt1 = Pullenti.Ner.Person.PersonAnalyzer.TryAttachPerson(attr.BeginToken, ad, false, -1, false);
                        if (rt1 != null)
                        {
                            attrs1 = null;
                        }
                    }
                }
            }
            if (attrs1 != null)
            {
                foreach (PersonAttrToken a in attrs1)
                {
                    if (a.Typ != PersonAttrTerminType.Prefix)
                    {
                        if (a.Age != null)
                        {
                            p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_AGE, a.Age, true, 0);
                        }
                        else if (a.PropRef == null)
                        {
                            p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, a.Value, false, 0);
                        }
                        else
                        {
                            p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, a, false, 0);
                        }
                        end = a.EndToken;
                        if (a.Gender != Pullenti.Morph.MorphGender.Undefined && !p.IsFemale && !p.IsMale)
                        {
                            if (a.Gender == Pullenti.Morph.MorphGender.Masculine && !p.IsMale)
                            {
                                p.IsMale = true;
                                p.CorrectData();
                            }
                            else if (a.Gender == Pullenti.Morph.MorphGender.Feminie && !p.IsFemale)
                            {
                                p.IsFemale = true;
                                p.CorrectData();
                            }
                        }
                    }
                }
                if (openBr)
                {
                    if (end.Next != null && end.Next.IsChar(')'))
                    {
                        end = end.Next;
                    }
                }
            }
            int crlfCou = 0;

            for (Pullenti.Ner.Token t = end.Next; t != null; t = t.Next)
            {
                if (t.IsTableControlChar)
                {
                    break;
                }
                if (t.IsNewlineBefore)
                {
                    Pullenti.Ner.Mail.Internal.MailLine ml = Pullenti.Ner.Mail.Internal.MailLine.Parse(t, 0, 0);
                    if (ml != null && ml.Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From)
                    {
                        break;
                    }
                    crlfCou++;
                }
                if (t.IsCharOf(":,(") || t.IsHiphen)
                {
                    continue;
                }
                if (t.IsChar('.') && t == end.Next)
                {
                    continue;
                }
                Pullenti.Ner.Referent r = t.GetReferent();
                if (r != null)
                {
                    if (r.TypeName == "PHONE" || r.TypeName == "URI" || r.TypeName == "ADDRESS")
                    {
                        string ty = r.GetStringValue("SCHEME");
                        if (r.TypeName == "URI")
                        {
                            if ((ty != "mailto" && ty != "skype" && ty != "ICQ") && ty != "http")
                            {
                                break;
                            }
                        }
                        p.AddContact(r);
                        end     = t;
                        crlfCou = 0;
                        continue;
                    }
                }
                if (r is Pullenti.Ner.Person.PersonIdentityReferent)
                {
                    p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_IDDOC, r, false, 0);
                    end     = t;
                    crlfCou = 0;
                    continue;
                }
                if (r != null && r.TypeName == "ORGANIZATION")
                {
                    if (t.Next != null && t.Next.Morph.Class.IsVerb)
                    {
                        break;
                    }
                    if (begin.Previous != null && begin.Previous.Morph.Class.IsVerb)
                    {
                        break;
                    }
                    if (t.WhitespacesAfterCount == 1)
                    {
                        break;
                    }
                    bool exist = false;
                    foreach (Pullenti.Ner.Slot s in p.Slots)
                    {
                        if (s.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_ATTR && (s.Value is Pullenti.Ner.Person.PersonPropertyReferent))
                        {
                            Pullenti.Ner.Person.PersonPropertyReferent pr = s.Value as Pullenti.Ner.Person.PersonPropertyReferent;
                            if (pr.FindSlot(Pullenti.Ner.Person.PersonPropertyReferent.ATTR_REF, r, true) != null)
                            {
                                exist = true;
                                break;
                            }
                        }
                        else if (s.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_ATTR && (s.Value is PersonAttrToken))
                        {
                            PersonAttrToken pr = s.Value as PersonAttrToken;
                            if (pr.Referent.FindSlot(Pullenti.Ner.Person.PersonPropertyReferent.ATTR_REF, r, true) != null)
                            {
                                exist = true;
                                break;
                            }
                        }
                    }
                    if (!exist)
                    {
                        PersonAttrToken pat = new PersonAttrToken(t, t);
                        pat.PropRef = new Pullenti.Ner.Person.PersonPropertyReferent()
                        {
                            Name = "сотрудник"
                        };
                        pat.PropRef.AddSlot(Pullenti.Ner.Person.PersonPropertyReferent.ATTR_REF, r, false, 0);
                        p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, pat, false, 0);
                    }
                    continue;
                }
                if (r != null)
                {
                    break;
                }
                if (!hasPrefix || crlfCou >= 2)
                {
                    break;
                }
                Pullenti.Ner.ReferentToken rt = t.Kit.ProcessReferent("PERSON", t);
                if (rt != null)
                {
                    break;
                }
            }
            if (ad != null)
            {
                ad.OverflowLevel--;
            }
            if (begin.IsValue("НА", null) && begin.Next != null && begin.Next.IsValue("ИМЯ", null))
            {
                Pullenti.Ner.Token t0 = begin.Previous;
                if (t0 != null && t0.IsComma)
                {
                    t0 = t0.Previous;
                }
                if (t0 != null && (t0.GetReferent() is Pullenti.Ner.Person.PersonIdentityReferent))
                {
                    p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_IDDOC, t0.GetReferent(), false, 0);
                }
            }
            return(new Pullenti.Ner.ReferentToken(p, begin, end)
            {
                Morph = morph, MiscAttrs = (int)p.m_PersonIdentityTyp
            });
        }
예제 #10
0
        static PhoneItemToken _TryAttach(Pullenti.Ner.Token t0)
        {
            if (t0 == null)
            {
                return(null);
            }
            if (t0 is Pullenti.Ner.NumberToken)
            {
                if (Pullenti.Ner.Core.NumberHelper.TryParseNumberWithPostfix(t0) != null && !t0.IsWhitespaceAfter)
                {
                    Pullenti.Ner.ReferentToken rt = t0.Kit.ProcessReferent("PHONE", t0.Next);
                    if (rt == null)
                    {
                        return(null);
                    }
                }
                if ((t0 as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Digit && !t0.Morph.Class.IsAdjective)
                {
                    return new PhoneItemToken(t0, t0)
                           {
                               ItemType = PhoneItemType.Number, Value = t0.GetSourceText()
                           }
                }
                ;
                return(null);
            }
            if (t0.IsChar('.'))
            {
                return new PhoneItemToken(t0, t0)
                       {
                           ItemType = PhoneItemType.Delim, Value = "."
                       }
            }
            ;
            if (t0.IsHiphen)
            {
                return new PhoneItemToken(t0, t0)
                       {
                           ItemType = PhoneItemType.Delim, Value = "-"
                       }
            }
            ;
            if (t0.IsChar('+'))
            {
                if (!(t0.Next is Pullenti.Ner.NumberToken) || (t0.Next as Pullenti.Ner.NumberToken).Typ != Pullenti.Ner.NumberSpellingType.Digit)
                {
                    return(null);
                }
                else
                {
                    string val = t0.Next.GetSourceText();
                    int    i;
                    for (i = 0; i < val.Length; i++)
                    {
                        if (val[i] != '0')
                        {
                            break;
                        }
                    }
                    if (i >= val.Length)
                    {
                        return(null);
                    }
                    if (i > 0)
                    {
                        val = val.Substring(i);
                    }
                    return(new PhoneItemToken(t0, t0.Next)
                    {
                        ItemType = PhoneItemType.CountryCode, Value = val
                    });
                }
            }
            if (t0.IsChar((char)0x2011) && (t0.Next is Pullenti.Ner.NumberToken) && t0.Next.LengthChar == 2)
            {
                return new PhoneItemToken(t0, t0)
                       {
                           ItemType = PhoneItemType.Delim, Value = "-"
                       }
            }
            ;
            if (t0.IsCharOf("("))
            {
                if (t0.Next is Pullenti.Ner.NumberToken)
                {
                    Pullenti.Ner.Token et  = t0.Next;
                    StringBuilder      val = new StringBuilder();
                    for (; et != null; et = et.Next)
                    {
                        if (et.IsChar(')'))
                        {
                            break;
                        }
                        if ((et is Pullenti.Ner.NumberToken) && (et as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Digit)
                        {
                            val.Append(et.GetSourceText());
                        }
                        else if (!et.IsHiphen && !et.IsChar('.'))
                        {
                            return(null);
                        }
                    }
                    if (et == null || val.Length == 0)
                    {
                        return(null);
                    }
                    else
                    {
                        return new PhoneItemToken(t0, et)
                               {
                                   ItemType = PhoneItemType.CityCode, Value = val.ToString(), IsInBrackets = true
                               }
                    };
                }
                else
                {
                    Pullenti.Ner.Core.TerminToken tt1 = m_PhoneTermins.TryParse(t0.Next, Pullenti.Ner.Core.TerminParseAttr.No);
                    if (tt1 == null || tt1.Termin.Tag != null)
                    {
                    }
                    else if (tt1.EndToken.Next == null || !tt1.EndToken.Next.IsChar(')'))
                    {
                    }
                    else
                    {
                        return new PhoneItemToken(t0, tt1.EndToken.Next)
                               {
                                   ItemType = PhoneItemType.Prefix, IsInBrackets = true, Value = string.Empty
                               }
                    };
                    return(null);
                }
            }
            if ((t0.IsChar('/') && (t0.Next is Pullenti.Ner.NumberToken) && t0.Next.Next != null) && t0.Next.Next.IsChar('/') && t0.Next.LengthChar == 3)
            {
                return new PhoneItemToken(t0, t0.Next.Next)
                       {
                           ItemType = PhoneItemType.CityCode, Value = (t0.Next as Pullenti.Ner.NumberToken).Value.ToString(), IsInBrackets = true
                       }
            }
            ;
            Pullenti.Ner.Token           t1 = null;
            Pullenti.Ner.Phone.PhoneKind ki = Pullenti.Ner.Phone.PhoneKind.Undefined;
            if ((t0.IsValue("Т", null) && t0.Next != null && t0.Next.IsCharOf("\\/")) && t0.Next.Next != null && ((t0.Next.Next.IsValue("Р", null) || t0.Next.Next.IsValue("М", null))))
            {
                t1 = t0.Next.Next;

                ki = (t1.IsValue("Р", null) ? Pullenti.Ner.Phone.PhoneKind.Work : Pullenti.Ner.Phone.PhoneKind.Mobile);
            }
            else
            {
                Pullenti.Ner.Core.TerminToken tt = m_PhoneTermins.TryParse(t0, Pullenti.Ner.Core.TerminParseAttr.No);
                if (tt == null || tt.Termin.Tag != null)
                {
                    if (t0.IsValue("НОМЕР", null))
                    {
                        PhoneItemToken rr = _TryAttach(t0.Next);

                        if (rr != null && rr.ItemType == PhoneItemType.Prefix)
                        {
                            rr.BeginToken = t0;
                            return(rr);
                        }
                    }
                    return(null);
                }
                if (tt.Termin.Tag2 is Pullenti.Ner.Phone.PhoneKind)
                {
                    ki = (Pullenti.Ner.Phone.PhoneKind)tt.Termin.Tag2;
                }
                t1 = tt.EndToken;
            }
            PhoneItemToken res = new PhoneItemToken(t0, t1)
            {
                ItemType = PhoneItemType.Prefix, Value = string.Empty, Kind = ki
            };

            while (true)
            {
                if (t1.Next != null && t1.Next.IsCharOf(".:"))
                {
                    res.EndToken = (t1 = t1.Next);
                }
                else if (t1.Next != null && t1.Next.IsTableControlChar)
                {
                    t1 = t1.Next;
                }
                else
                {
                    break;
                }
            }
            if (t0 == t1 && ((t0.BeginChar == t0.EndChar || t0.Chars.IsAllUpper)))
            {
                if (!t0.IsWhitespaceAfter)
                {
                    return(null);
                }
            }
            return(res);
        }
예제 #11
0
        public static OrgItemNameToken TryAttach(Pullenti.Ner.Token t, OrgItemNameToken prev, bool extOnto, bool first)
        {
            if (t == null)
            {
                return(null);
            }
            if (t.IsValue("ОРДЕНА", null) && t.Next != null)
            {
                Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                if (npt != null)
                {
                    Pullenti.Ner.Token t1 = npt.EndToken;
                    if (((t1.IsValue("ЗНАК", null) || t1.IsValue("ДРУЖБА", null))) && (t1.WhitespacesAfterCount < 2))
                    {
                        npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t1.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                        if (npt != null)
                        {
                            t1 = npt.EndToken;
                        }
                    }
                    return(new OrgItemNameToken(t, t1)
                    {
                        IsIgnoredPart = true
                    });
                }
                if (t.Next.GetMorphClassInDictionary().IsProperSurname)
                {
                    return new OrgItemNameToken(t, t.Next)
                           {
                               IsIgnoredPart = true
                           }
                }
                ;
                Pullenti.Ner.ReferentToken ppp = t.Kit.ProcessReferent("PERSON", t.Next);
                if (ppp != null)
                {
                    return new OrgItemNameToken(t, ppp.EndToken)
                           {
                               IsIgnoredPart = true
                           }
                }
                ;
                if ((t.WhitespacesAfterCount < 2) && Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(t.Next, true, false))
                {
                    Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t.Next, Pullenti.Ner.Core.BracketParseAttr.NearCloseBracket, 10);

                    if (br != null && (br.LengthChar < 40))
                    {
                        return new OrgItemNameToken(t, br.EndToken)
                               {
                                   IsIgnoredPart = true
                               }
                    }
                    ;
                }
            }
            if (first && t.Chars.IsCyrillicLetter && t.Morph.Class.IsPreposition)
            {
                if (!t.IsValue("ПО", null) && !t.IsValue("ПРИ", null))
                {
                    return(null);
                }
            }
            OrgItemNameToken res = _TryAttach(t, prev, extOnto);

            if (res == null)
            {
                if (extOnto)
                {
                    if ((t.GetReferent() is Pullenti.Ner.Geo.GeoReferent) || (((t is Pullenti.Ner.TextToken) && !t.IsChar(';'))))
                    {
                        return new OrgItemNameToken(t, t)
                               {
                                   Value = t.GetSourceText()
                               }
                    }
                    ;
                }
                return(null);
            }
            if (prev == null && !extOnto)
            {
                if (t.Kit.Ontology != null)
                {
                    Pullenti.Ner.Org.OrganizationAnalyzer.OrgAnalyzerData ad = t.Kit.Ontology._getAnalyzerData(Pullenti.Ner.Org.OrganizationAnalyzer.ANALYZER_NAME) as Pullenti.Ner.Org.OrganizationAnalyzer.OrgAnalyzerData;
                    if (ad != null)
                    {
                        Pullenti.Ner.Core.TerminToken tok = ad.OrgPureNames.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);

                        if (tok != null && tok.EndChar > res.EndChar)
                        {
                            res.EndToken = tok.EndToken;
                        }
                    }
                }
            }
            if (prev != null && !extOnto)
            {
                if ((prev.Chars.IsAllLower && !res.Chars.IsAllLower && !res.IsStdTail) && !res.IsStdName)
                {
                    if (prev.Chars.IsLatinLetter && res.Chars.IsLatinLetter)
                    {
                    }
                    else if (m_StdNouns.TryParse(res.BeginToken, Pullenti.Ner.Core.TerminParseAttr.No) != null)
                    {
                    }
                    else
                    {
                        return(null);
                    }
                }
            }
            if ((res.EndToken.Next != null && !res.EndToken.IsWhitespaceAfter && res.EndToken.Next.IsHiphen) && !res.EndToken.Next.IsWhitespaceAfter)
            {
                Pullenti.Ner.TextToken tt = res.EndToken.Next.Next as Pullenti.Ner.TextToken;
                if (tt != null)
                {
                    if (tt.Chars == res.Chars || tt.Chars.IsAllUpper)
                    {
                        res.EndToken = tt;
                        res.Value    = string.Format("{0}-{1}", res.Value, tt.Term);
                    }
                }
            }
            if ((res.EndToken.Next != null && res.EndToken.Next.IsAnd && res.EndToken.WhitespacesAfterCount == 1) && res.EndToken.Next.WhitespacesAfterCount == 1)
            {
                OrgItemNameToken res1 = _TryAttach(res.EndToken.Next.Next, prev, extOnto);
                if (res1 != null && res1.Chars == res.Chars && OrgItemTypeToken.TryAttach(res.EndToken.Next.Next, false, null) == null)
                {
                    if (!((res1.Morph.Case & res.Morph.Case)).IsUndefined)
                    {
                        res.EndToken = res1.EndToken;
                        res.Value    = string.Format("{0} {1} {2}", res.Value, (res.Kit.BaseLanguage.IsUa ? "ТА" : "И"), res1.Value);
                    }
                }
            }
            for (Pullenti.Ner.Token tt = res.BeginToken; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next)
            {
                if (m_StdNouns.TryParse(tt, Pullenti.Ner.Core.TerminParseAttr.No) != null)
                {
                    res.StdOrgNameNouns++;
                }
            }
            if (m_StdNouns.TryParse(res.EndToken, Pullenti.Ner.Core.TerminParseAttr.No) != null)
            {
                int  cou = 1;
                bool non = false;
                Pullenti.Ner.Token et = res.EndToken;
                if (!_isNotTermNoun(res.EndToken))
                {
                    non = true;
                }
                bool br = false;
                for (Pullenti.Ner.Token tt = res.EndToken.Next; tt != null; tt = tt.Next)
                {
                    if (tt.IsTableControlChar)
                    {
                        break;
                    }
                    if (tt.IsChar('('))
                    {
                        if (!non)
                        {
                            break;
                        }
                        br = true;
                        continue;
                    }
                    if (tt.IsChar(')'))
                    {
                        br = false;
                        et = tt;
                        break;
                    }
                    if (!(tt is Pullenti.Ner.TextToken))
                    {
                        break;
                    }
                    if (tt.WhitespacesBeforeCount > 1)
                    {
                        if (tt.NewlinesBeforeCount > 1)
                        {
                            break;
                        }
                        if (tt.Chars != res.EndToken.Chars)
                        {
                            break;
                        }
                    }
                    if (tt.Morph.Class.IsPreposition || tt.IsCommaAnd)
                    {
                        continue;
                    }
                    Pullenti.Morph.MorphClass dd = tt.GetMorphClassInDictionary();
                    if (!dd.IsNoun && !dd.IsAdjective)
                    {
                        break;
                    }
                    Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                    if (npt2 == null)
                    {
                        if (dd == Pullenti.Morph.MorphClass.Adjective)
                        {
                            continue;
                        }
                        break;
                    }
                    if (m_StdNouns.TryParse(npt2.EndToken, Pullenti.Ner.Core.TerminParseAttr.No) == null)
                    {
                        break;
                    }
                    if (npt2.EndToken.Chars != res.EndToken.Chars)
                    {
                        break;
                    }
                    if ((npt2.EndToken.IsValue("УПРАВЛЕНИЕ", null) || npt2.EndToken.IsValue("ИНСТИТУТ", null) || npt2.EndToken.IsValue("УПРАВЛІННЯ", null)) || npt2.EndToken.IsValue("ІНСТИТУТ", null) || tt.Previous.IsValue("ПРИ", null))
                    {
                        Pullenti.Ner.ReferentToken rt = tt.Kit.ProcessReferent(Pullenti.Ner.Org.OrganizationAnalyzer.ANALYZER_NAME, tt);
                        if (rt != null)
                        {
                            break;
                        }
                    }
                    cou++;
                    tt = npt2.EndToken;
                    if (!_isNotTermNoun(tt))
                    {
                        non = true;
                        et  = tt;
                    }
                }
                if (non && !br)
                {
                    res.StdOrgNameNouns += cou;
                    res.EndToken         = et;
                }
            }
            return(res);
        }
예제 #12
0
        static int _analizeListItems(List <FragToken> chi, int ind)
        {
            if (ind >= chi.Count)
            {
                return(-1);
            }
            FragToken res = chi[ind];

            Pullenti.Ner.Instrument.InstrumentKind ki = res.Kind;
            if (((ki == Pullenti.Ner.Instrument.InstrumentKind.Chapter || ki == Pullenti.Ner.Instrument.InstrumentKind.Clause || ki == Pullenti.Ner.Instrument.InstrumentKind.Content) || ki == Pullenti.Ner.Instrument.InstrumentKind.Item || ki == Pullenti.Ner.Instrument.InstrumentKind.Subitem) || ki == Pullenti.Ner.Instrument.InstrumentKind.ClausePart || ki == Pullenti.Ner.Instrument.InstrumentKind.Indention)
            {
            }
            else
            {
                return(-1);
            }
            if (res.HasChanges && res.MultilineChangesValue != null)
            {
                Pullenti.Ner.MetaToken ci = res.MultilineChangesValue;
                FragToken cit             = new FragToken(ci.BeginToken, ci.EndToken)
                {
                    Kind = Pullenti.Ner.Instrument.InstrumentKind.Citation
                };
                res.Children.Add(cit);
                if (Pullenti.Ner.Core.BracketHelper.IsBracket(cit.BeginToken.Previous, true))
                {
                    cit.BeginToken = cit.BeginToken.Previous;
                }
                if (Pullenti.Ner.Core.BracketHelper.IsBracket(cit.EndToken.Next, true))
                {
                    cit.EndToken = cit.EndToken.Next;
                    if (cit.EndToken.Next != null && cit.EndToken.Next.IsCharOf(";."))
                    {
                        cit.EndToken = cit.EndToken.Next;
                    }
                }
                res.FillByContentChildren();
                if (res.Children[0].HasChanges)
                {
                }
                Pullenti.Ner.Instrument.InstrumentKind citKind = Pullenti.Ner.Instrument.InstrumentKind.Undefined;
                if (ci.Tag is Pullenti.Ner.Decree.DecreeChangeReferent)
                {
                    Pullenti.Ner.Decree.DecreeChangeReferent dcr = ci.Tag as Pullenti.Ner.Decree.DecreeChangeReferent;
                    if (dcr.Value != null && dcr.Value.NewItems.Count > 0)
                    {
                        string mnem = dcr.Value.NewItems[0];
                        int    i;
                        if ((((i = mnem.IndexOf(' ')))) > 0)
                        {
                            mnem = mnem.Substring(0, i);
                        }
                        citKind = Pullenti.Ner.Decree.Internal.PartToken._getInstrKindByTyp(Pullenti.Ner.Decree.Internal.PartToken._getTypeByAttrName(mnem));
                    }
                    else if (dcr.Owners.Count > 0 && (dcr.Owners[0] is Pullenti.Ner.Decree.DecreePartReferent) && dcr.Kind == Pullenti.Ner.Decree.DecreeChangeKind.New)
                    {
                        Pullenti.Ner.Decree.DecreePartReferent pat = dcr.Owners[0] as Pullenti.Ner.Decree.DecreePartReferent;
                        int min = 0;
                        foreach (Pullenti.Ner.Slot s in pat.Slots)
                        {
                            Pullenti.Ner.Decree.Internal.PartToken.ItemType ty = Pullenti.Ner.Decree.Internal.PartToken._getTypeByAttrName(s.TypeName);
                            if (ty == Pullenti.Ner.Decree.Internal.PartToken.ItemType.Undefined)
                            {
                                continue;
                            }
                            int l = Pullenti.Ner.Decree.Internal.PartToken._getRank(ty);
                            if (l == 0)
                            {
                                continue;
                            }
                            if (l > min || min == 0)
                            {
                                min     = l;
                                citKind = Pullenti.Ner.Decree.Internal.PartToken._getInstrKindByTyp(ty);
                            }
                        }
                    }
                }
                FragToken sub = null;
                if (citKind != Pullenti.Ner.Instrument.InstrumentKind.Undefined && citKind != Pullenti.Ner.Instrument.InstrumentKind.Appendix)
                {
                    sub = new FragToken(ci.BeginToken, ci.EndToken);
                    ContentAnalyzeWhapper wr = new ContentAnalyzeWhapper();
                    wr.Analyze(sub, null, true, citKind);
                    sub.Kind = Pullenti.Ner.Instrument.InstrumentKind.Content;
                }
                else
                {
                    sub = FragToken.CreateDocument(ci.BeginToken, ci.EndChar, citKind);
                }
                if (sub == null || sub.Children.Count == 0)
                {
                }
                else if ((sub.Kind == Pullenti.Ner.Instrument.InstrumentKind.Content && sub.Children.Count > 0 && sub.Children[0].BeginToken == sub.BeginToken) && sub.Children[sub.Children.Count - 1].EndToken == sub.EndToken)
                {
                    cit.Children.AddRange(sub.Children);
                }
                else
                {
                    cit.Children.Add(sub);
                }
                return(1);
            }
            int endChar = res.EndChar;

            if (res.Itok == null)
            {
                res.Itok = InstrToken1.Parse(res.BeginToken, true, null, 0, null, false, res.EndChar, false, false);
            }
            List <LineToken> lines = LineToken.ParseList(res.BeginToken, endChar, null);

            if (lines == null || (lines.Count < 1))
            {
                return(-1);
            }
            int ret = 1;

            if (res.Kind == Pullenti.Ner.Instrument.InstrumentKind.Content)
            {
                for (int j = ind + 1; j < chi.Count; j++)
                {
                    if (chi[j].Kind == Pullenti.Ner.Instrument.InstrumentKind.Content)
                    {
                        List <LineToken> lines2 = LineToken.ParseList(chi[j].BeginToken, chi[j].EndChar, lines[lines.Count - 1]);
                        if (lines2 == null || (lines2.Count < 1))
                        {
                            break;
                        }
                        if (!lines2[0].IsListItem)
                        {
                            if ((lines2.Count > 1 && lines2[1].IsListItem && lines2[0].EndToken.IsCharOf(":")) && !lines2[0].BeginToken.Chars.IsCapitalUpper)
                            {
                                lines2[0].IsListItem = true;
                            }
                            else
                            {
                                break;
                            }
                        }
                        lines.AddRange(lines2);
                        ret = (j - ind) + 1;
                    }
                    else if (chi[j].Kind != Pullenti.Ner.Instrument.InstrumentKind.Editions && chi[j].Kind != Pullenti.Ner.Instrument.InstrumentKind.Comment)
                    {
                        break;
                    }
                }
            }
            if (lines.Count < 2)
            {
                return(-1);
            }
            if ((lines.Count > 1 && lines[0].IsListItem && lines[1].IsListItem) && lines[0].Number != 1)
            {
                if (lines.Count == 2 || !lines[2].IsListItem)
                {
                    lines[0].IsListItem = (lines[1].IsListItem = false);
                }
            }
            for (int i = 0; i < lines.Count; i++)
            {
                if (lines[i].IsListItem)
                {
                    if (i > 0 && lines[i - 1].IsListItem)
                    {
                        continue;
                    }
                    if (((i + 1) < lines.Count) && lines[i + 1].IsListItem)
                    {
                    }
                    else
                    {
                        lines[i].IsListItem = false;
                        continue;
                    }
                    int  j;
                    bool newLine = false;
                    for (j = i + 1; j < lines.Count; j++)
                    {
                        if (!lines[j].IsListItem)
                        {
                            break;
                        }
                        else if (lines[j].IsNewlineBefore)
                        {
                            newLine = true;
                        }
                    }
                    if (newLine)
                    {
                        continue;
                    }
                    if (i > 0 && lines[i - 1].EndToken.IsChar(':'))
                    {
                        continue;
                    }
                    for (j = i; j < lines.Count; j++)
                    {
                        if (!lines[j].IsListItem)
                        {
                            break;
                        }
                        else
                        {
                            lines[j].IsListItem = false;
                        }
                    }
                }
            }
            if (lines.Count > 2)
            {
                LineToken last  = lines[lines.Count - 1];
                LineToken last2 = lines[lines.Count - 2];
                if ((!last.IsListItem && last.EndToken.IsChar('.') && last2.IsListItem) && last2.EndToken.IsChar(';'))
                {
                    if ((last.LengthChar < (last2.LengthChar * 2)) || last.BeginToken.Chars.IsAllLower)
                    {
                        last.IsListItem = true;
                    }
                }
            }
            for (int i = 0; i < (lines.Count - 1); i++)
            {
                if (!lines[i].IsListItem && !lines[i + 1].IsListItem)
                {
                    if (((i + 2) < lines.Count) && lines[i + 2].IsListItem && lines[i + 1].EndToken.IsChar(':'))
                    {
                    }
                    else
                    {
                        lines[i].EndToken = lines[i + 1].EndToken;
                        lines.RemoveAt(i + 1);
                        i--;
                    }
                }
            }
            for (int i = 0; i < (lines.Count - 1); i++)
            {
                if (lines[i].IsListItem)
                {
                    if (lines[i].Number == 1)
                    {
                        bool ok    = true;
                        int  num   = 1;
                        int  nonum = 0;
                        for (int j = i + 1; j < lines.Count; j++)
                        {
                            if (!lines[j].IsListItem)
                            {
                                ok = false;
                                break;
                            }
                            else if (lines[j].Number > 0)
                            {
                                num++;
                                if (lines[j].Number != num)
                                {
                                    ok = false;
                                    break;
                                }
                            }
                            else
                            {
                                nonum++;
                            }
                        }
                        if (!ok || nonum == 0 || (num < 2))
                        {
                            break;
                        }
                        LineToken lt = lines[i];
                        for (int j = i + 1; j < lines.Count; j++)
                        {
                            if (lines[j].Number > 0)
                            {
                                lt = lines[j];
                            }
                            else
                            {
                                List <LineToken> chli = lt.Tag as List <LineToken>;
                                if (chli == null)
                                {
                                    lt.Tag = (chli = new List <LineToken>());
                                }
                                lt.EndToken = lines[j].EndToken;
                                chli.Add(lines[j]);
                                lines.RemoveAt(j);
                                j--;
                            }
                        }
                    }
                }
            }
            int cou = 0;

            foreach (LineToken li in lines)
            {
                if (li.IsListItem)
                {
                    cou++;
                }
            }
            if (cou < 2)
            {
                return(-1);
            }
            for (int i = 0; i < lines.Count; i++)
            {
                if (lines[i].IsListItem)
                {
                    int  i0 = i;
                    bool ok = true;
                    cou = 1;
                    for (; i < lines.Count; i++, cou++)
                    {
                        if (!lines[i].IsListItem)
                        {
                            break;
                        }
                        else if (lines[i].Number != cou)
                        {
                            ok = false;
                        }
                    }
                    if (!ok)
                    {
                        for (i = i0; i < lines.Count; i++)
                        {
                            if (!lines[i].IsListItem)
                            {
                                break;
                            }
                            else
                            {
                                lines[i].Number = 0;
                            }
                        }
                    }
                    if (cou > 3 && lines[i0].BeginToken.GetSourceText() != lines[i0 + 1].BeginToken.GetSourceText() && lines[i0 + 1].BeginToken.GetSourceText() == lines[i0 + 2].BeginToken.GetSourceText())
                    {
                        string pref = lines[i0 + 1].BeginToken.GetSourceText();
                        ok = true;
                        for (int j = i0 + 2; j < i; j++)
                        {
                            if (pref != lines[j].BeginToken.GetSourceText())
                            {
                                ok = false;
                                break;
                            }
                        }
                        if (!ok)
                        {
                            continue;
                        }
                        Pullenti.Ner.Token tt = null;
                        ok = false;
                        for (tt = lines[i0].EndToken.Previous; tt != null && tt != lines[i0].BeginToken; tt = tt.Previous)
                        {
                            if (tt.GetSourceText() == pref)
                            {
                                ok = true;
                                break;
                            }
                        }
                        if (ok)
                        {
                            LineToken li0 = new LineToken(lines[i0].BeginToken, tt.Previous);
                            lines[i0].BeginToken = tt;
                            lines.Insert(i0, li0);
                            i++;
                        }
                    }
                }
            }
            foreach (LineToken li in lines)
            {
                li.CorrectBeginToken();
                FragToken ch = new FragToken(li.BeginToken, li.EndToken)
                {
                    Kind = (li.IsListItem ? Pullenti.Ner.Instrument.InstrumentKind.ListItem : Pullenti.Ner.Instrument.InstrumentKind.Content), Number = li.Number
                };
                if (ch.Kind == Pullenti.Ner.Instrument.InstrumentKind.Content && ch.EndToken.IsChar(':'))
                {
                    ch.Kind = Pullenti.Ner.Instrument.InstrumentKind.ListHead;
                }
                res.Children.Add(ch);
                List <LineToken> chli = li.Tag as List <LineToken>;
                if (chli != null)
                {
                    foreach (LineToken lt in chli)
                    {
                        ch.Children.Add(new FragToken(lt.BeginToken, lt.EndToken)
                        {
                            Kind = Pullenti.Ner.Instrument.InstrumentKind.ListItem
                        });
                    }
                    if (ch.BeginChar < ch.Children[0].BeginChar)
                    {
                        ch.Children.Insert(0, new FragToken(ch.BeginToken, ch.Children[0].BeginToken.Previous)
                        {
                            Kind = Pullenti.Ner.Instrument.InstrumentKind.Content
                        });
                    }
                }
            }
            return(ret);
        }
예제 #13
0
 static void _parseNumber(Pullenti.Ner.Token t, InstrToken1 res, InstrToken1 prev)
 {
     if (((t is Pullenti.Ner.NumberToken) && (t as Pullenti.Ner.NumberToken).IntValue != null && (t as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Digit) && ((t as Pullenti.Ner.NumberToken).IntValue.Value < 3000))
     {
         if (res.Numbers.Count >= 4)
         {
         }
         if (t.Morph.Class.IsAdjective && res.TypContainerRank == 0)
         {
             return;
         }
         Pullenti.Ner.Core.NumberExToken nwp = Pullenti.Ner.Core.NumberHelper.TryParseNumberWithPostfix(t);
         if (nwp != null)
         {
             if (nwp.EndToken.IsWhitespaceBefore)
             {
             }
             else
             {
                 return;
             }
         }
         if ((t.Next != null && (t.WhitespacesAfterCount < 3) && t.Next.Chars.IsLetter) && t.Next.Chars.IsAllLower)
         {
             if (!t.IsWhitespaceAfter && t.Next.LengthChar == 1)
             {
             }
             else if (res.Numbers.Count == 0)
             {
                 res.NumTyp = NumberTypes.Digit;
                 res.Numbers.Add((t as Pullenti.Ner.NumberToken).Value.ToString());
                 res.NumBeginToken = (res.NumEndToken = (res.EndToken = t));
                 return;
             }
             else
             {
                 return;
             }
         }
         if (res.NumTyp == NumberTypes.Undefined)
         {
             res.NumTyp = NumberTypes.Digit;
         }
         else
         {
             res.NumTyp = NumberTypes.Combo;
         }
         if (res.Numbers.Count > 0 && t.IsWhitespaceBefore)
         {
             return;
         }
         if (res.Numbers.Count == 0)
         {
             res.NumBeginToken = t;
         }
         if ((t.Next != null && t.Next.IsHiphen && (t.Next.Next is Pullenti.Ner.NumberToken)) && (t.Next.Next as Pullenti.Ner.NumberToken).IntValue != null && (t.Next.Next as Pullenti.Ner.NumberToken).IntValue.Value > (t as Pullenti.Ner.NumberToken).IntValue.Value)
         {
             res.MinNumber = (t as Pullenti.Ner.NumberToken).Value.ToString();
             t             = t.Next.Next;
         }
         else if (((t.Next != null && t.Next.IsCharOf(")") && t.Next.Next != null) && t.Next.Next.IsHiphen && (t.Next.Next.Next is Pullenti.Ner.NumberToken)) && (t.Next.Next.Next as Pullenti.Ner.NumberToken).IntValue != null && (t.Next.Next.Next as Pullenti.Ner.NumberToken).IntValue.Value > (t as Pullenti.Ner.NumberToken).IntValue.Value)
         {
             res.MinNumber = (t as Pullenti.Ner.NumberToken).Value.ToString();
             t             = t.Next.Next.Next;
         }
         res.Numbers.Add((t as Pullenti.Ner.NumberToken).Value.ToString());
         res.EndToken  = (res.NumEndToken = t);
         res.NumSuffix = null;
         for (Pullenti.Ner.Token ttt = t.Next; ttt != null && (res.Numbers.Count < 4); ttt = ttt.Next)
         {
             bool ok1 = false;
             bool ok2 = false;
             if ((ttt.IsCharOf("._") && !ttt.IsWhitespaceAfter && (ttt.Next is Pullenti.Ner.NumberToken)) && (((ttt.Next as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Digit || (((ttt.Next as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Words) && ttt.Next.Chars.IsLatinLetter && !ttt.IsWhitespaceAfter))))
             {
                 ok1 = true;
             }
             else if ((ttt.IsCharOf("(<") && (ttt.Next is Pullenti.Ner.NumberToken) && ttt.Next.Next != null) && ttt.Next.Next.IsCharOf(")>"))
             {
                 ok2 = true;
             }
             if (ok1 || ok2)
             {
                 ttt = ttt.Next;
                 res.Numbers.Add((ttt as Pullenti.Ner.NumberToken).Value.ToString());
                 res.NumTyp = (res.Numbers.Count == 2 ? NumberTypes.TwoDigits : (res.Numbers.Count == 3 ? NumberTypes.ThreeDigits : NumberTypes.FourDigits));
                 if ((ttt.Next != null && ttt.Next.IsCharOf(")>") && ttt.Next.Next != null) && ttt.Next.Next.IsChar('.'))
                 {
                     ttt = ttt.Next;
                 }
                 else if (ok2)
                 {
                     ttt = ttt.Next;
                 }
                 t = (res.EndToken = (res.NumEndToken = ttt));
                 continue;
             }
             if (((ttt is Pullenti.Ner.TextToken) && ttt.LengthChar == 1 && ttt.Chars.IsLetter) && !ttt.IsWhitespaceBefore && res.Numbers.Count == 1)
             {
                 res.Numbers.Add((ttt as Pullenti.Ner.TextToken).Term);
                 res.NumTyp = NumberTypes.Combo;
                 t          = (res.EndToken = (res.NumEndToken = ttt));
                 continue;
             }
             break;
         }
         if (t.Next != null && t.Next.IsCharOf(")."))
         {
             res.NumSuffix = t.Next.GetSourceText();
             t             = (res.EndToken = (res.NumEndToken = t.Next));
         }
         return;
     }
     if (((t is Pullenti.Ner.NumberToken) && (t as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Words && res.TypContainerRank > 0) && res.Numbers.Count == 0)
     {
         res.Numbers.Add((t as Pullenti.Ner.NumberToken).Value.ToString());
         res.NumTyp        = NumberTypes.Digit;
         res.NumBeginToken = t;
         if (t.Next != null && t.Next.IsChar('.'))
         {
             t             = t.Next;
             res.NumSuffix = ".";
         }
         res.EndToken = (res.NumEndToken = t);
         return;
     }
     Pullenti.Ner.NumberToken nt = Pullenti.Ner.Core.NumberHelper.TryParseRoman(t);
     if ((nt != null && nt.Value == "10" && t.Next != null) && t.Next.IsChar(')'))
     {
         nt = null;
     }
     if (nt != null && nt.Value == "100")
     {
         nt = null;
     }
     if (nt != null && nt.Typ == Pullenti.Ner.NumberSpellingType.Roman)
     {
         if (res.NumTyp == NumberTypes.Undefined)
         {
             res.NumTyp = NumberTypes.Roman;
         }
         else
         {
             res.NumTyp = NumberTypes.Combo;
         }
         if (res.Numbers.Count > 0 && t.IsWhitespaceBefore)
         {
             return;
         }
         if (res.Numbers.Count == 0)
         {
             res.NumBeginToken = t;
         }
         res.Numbers.Add(nt.Value.ToString());
         t = (res.EndToken = (res.NumEndToken = nt.EndToken));
         if (res.NumTyp == NumberTypes.Roman && ((res.Typ == InstrToken1.Types.Chapter || res.Typ == InstrToken1.Types.Section || res.Typ == InstrToken1.Types.Line)))
         {
             if ((t.Next != null && t.Next.IsCharOf("._<") && (t.Next.Next is Pullenti.Ner.NumberToken)) && (t.Next.Next as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Digit)
             {
                 t = t.Next.Next;
                 res.Numbers.Add((t as Pullenti.Ner.NumberToken).Value.ToString());
                 res.NumTyp = NumberTypes.TwoDigits;
                 if (t.Next != null && t.Next.IsChar('>'))
                 {
                     t = t.Next;
                 }
                 res.EndToken = (res.NumEndToken = t);
                 if ((t.Next != null && t.Next.IsCharOf("._<") && (t.Next.Next is Pullenti.Ner.NumberToken)) && (t.Next.Next as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Digit)
                 {
                     t = t.Next.Next;
                     res.Numbers.Add((t as Pullenti.Ner.NumberToken).Value.ToString());
                     res.NumTyp = NumberTypes.ThreeDigits;
                     if (t.Next != null && t.Next.IsChar('>'))
                     {
                         t = t.Next;
                     }
                     res.EndToken = (res.NumEndToken = t);
                 }
             }
         }
         if (t.Next != null && t.Next.IsCharOf(")."))
         {
             res.NumSuffix = t.Next.GetSourceText();
             t             = (res.EndToken = (res.NumEndToken = t.Next));
         }
         return;
     }
     if (((t is Pullenti.Ner.TextToken) && t.LengthChar == 1 && t.Chars.IsLetter) && t == res.BeginToken)
     {
         if ((!t.IsWhitespaceAfter && (t.Next is Pullenti.Ner.NumberToken) && t.Next.Next != null) && t.Next.Next.IsChar('.'))
         {
             res.NumBeginToken = t;
             res.NumTyp        = NumberTypes.Digit;
             res.Numbers.Add((t.Next as Pullenti.Ner.NumberToken).Value.ToString());
             res.NumSuffix = (t as Pullenti.Ner.TextToken).Term + ".";
             t             = (res.EndToken = (res.NumEndToken = t.Next.Next));
             return;
         }
         if (t.Next != null && t.Next.IsCharOf(".)"))
         {
             if (((t.Next.IsChar('.') && (t.Next.Next is Pullenti.Ner.NumberToken) && t.Next.Next.Next != null) && t.Next.Next.Next.IsChar(')') && !t.Next.IsWhitespaceAfter) && !t.Next.Next.IsWhitespaceAfter)
             {
                 res.NumTyp = NumberTypes.TwoDigits;
                 res.Numbers.Add((t as Pullenti.Ner.TextToken).Term);
                 res.Numbers.Add((t.Next.Next as Pullenti.Ner.NumberToken).Value.ToString());
                 res.NumSuffix     = ")";
                 res.NumBeginToken = t;
                 t = (res.EndToken = (res.NumEndToken = t.Next.Next.Next));
                 return;
             }
             if (t.Next.IsChar('.') && ((t.Chars.IsAllUpper || (t.Next.Next is Pullenti.Ner.NumberToken))))
             {
             }
             else
             {
                 InstrToken1 tmp1 = new InstrToken1(t, t.Next);
                 tmp1.Numbers.Add((t as Pullenti.Ner.TextToken).Term);
                 if (tmp1.LastNumber > 1 && t.Next.IsCharOf(".") && ((prev == null || (prev.LastNumber + 1) != tmp1.LastNumber)))
                 {
                 }
                 else
                 {
                     if (res.Numbers.Count == 0)
                     {
                         res.NumBeginToken = t;
                     }
                     res.NumTyp = NumberTypes.Letter;
                     res.Numbers.Add((t as Pullenti.Ner.TextToken).Term);
                     res.NumBeginToken = t;
                     t             = (res.EndToken = (res.NumEndToken = t.Next));
                     res.NumSuffix = t.GetSourceText();
                     return;
                 }
             }
         }
     }
 }
예제 #14
0
        public static string GetNameEx(Pullenti.Ner.Token begin, Pullenti.Ner.Token end, Pullenti.Morph.MorphClass cla, Pullenti.Morph.MorphCase mc, Pullenti.Morph.MorphGender gender = Pullenti.Morph.MorphGender.Undefined, bool ignoreBracketsAndHiphens = false, bool ignoreGeoReferent = false)
        {
            if (end == null || begin == null)
            {
                return(null);
            }
            if (begin.EndChar > end.BeginChar && begin != end)
            {
                return(null);
            }
            StringBuilder res    = new StringBuilder();
            string        prefix = null;

            for (Pullenti.Ner.Token t = begin; t != null && t.EndChar <= end.EndChar; t = t.Next)
            {
                if (res.Length > 1000)
                {
                    break;
                }
                if (t.IsTableControlChar)
                {
                    continue;
                }
                if (ignoreBracketsAndHiphens)
                {
                    if (BracketHelper.IsBracket(t, false))
                    {
                        if (t == end)
                        {
                            break;
                        }
                        if (t.IsCharOf("(<["))
                        {
                            BracketSequenceToken br = BracketHelper.TryParse(t, BracketParseAttr.No, 100);
                            if (br != null && br.EndChar <= end.EndChar)
                            {
                                string tmp = GetNameEx(br.BeginToken.Next, br.EndToken.Previous, Pullenti.Morph.MorphClass.Undefined, Pullenti.Morph.MorphCase.Undefined, Pullenti.Morph.MorphGender.Undefined, ignoreBracketsAndHiphens, false);
                                if (tmp != null)
                                {
                                    if ((br.EndChar == end.EndChar && br.BeginToken.Next == br.EndToken.Previous && !br.BeginToken.Next.Chars.IsLetter) && !(br.BeginToken.Next is Pullenti.Ner.ReferentToken))
                                    {
                                    }
                                    else
                                    {
                                        res.AppendFormat(" {0}{1}{2}", t.GetSourceText(), tmp, br.EndToken.GetSourceText());
                                    }
                                }
                                t = br.EndToken;
                            }
                        }
                        continue;
                    }
                    if (t.IsHiphen)
                    {
                        if (t == end)
                        {
                            break;
                        }
                        else if (t.IsWhitespaceBefore || t.IsWhitespaceAfter)
                        {
                            continue;
                        }
                    }
                }
                Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
                if (tt != null)
                {
                    if (!ignoreBracketsAndHiphens)
                    {
                        if ((tt.Next != null && tt.Next.IsHiphen && (tt.Next.Next is Pullenti.Ner.TextToken)) && tt != end && tt.Next != end)
                        {
                            if (prefix == null)
                            {
                                prefix = tt.Term;
                            }
                            else
                            {
                                prefix = string.Format("{0}-{1}", prefix, tt.Term);
                            }
                            t = tt.Next;
                            if (t == end)
                            {
                                break;
                            }
                            else
                            {
                                continue;
                            }
                        }
                    }
                    string s = null;
                    if (cla.Value != 0 || !mc.IsUndefined || gender != Pullenti.Morph.MorphGender.Undefined)
                    {
                        foreach (Pullenti.Morph.MorphBaseInfo wff in tt.Morph.Items)
                        {
                            Pullenti.Morph.MorphWordForm wf = wff as Pullenti.Morph.MorphWordForm;
                            if (wf == null)
                            {
                                continue;
                            }
                            if (cla.Value != 0)
                            {
                                if (((wf.Class.Value & cla.Value)) == 0)
                                {
                                    continue;
                                }
                            }
                            if (!mc.IsUndefined)
                            {
                                if (((wf.Case & mc)).IsUndefined)
                                {
                                    continue;
                                }
                            }
                            if (gender != Pullenti.Morph.MorphGender.Undefined)
                            {
                                if (((wf.Gender & gender)) == Pullenti.Morph.MorphGender.Undefined)
                                {
                                    continue;
                                }
                            }
                            if (s == null || wf.NormalCase == tt.Term)
                            {
                                s = wf.NormalCase;
                            }
                        }
                        if (s == null && gender != Pullenti.Morph.MorphGender.Undefined)
                        {
                            foreach (Pullenti.Morph.MorphBaseInfo wff in tt.Morph.Items)
                            {
                                Pullenti.Morph.MorphWordForm wf = wff as Pullenti.Morph.MorphWordForm;
                                if (wf == null)
                                {
                                    continue;
                                }
                                if (cla.Value != 0)
                                {
                                    if (((wf.Class.Value & cla.Value)) == 0)
                                    {
                                        continue;
                                    }
                                }
                                if (!mc.IsUndefined)
                                {
                                    if (((wf.Case & mc)).IsUndefined)
                                    {
                                        continue;
                                    }
                                }
                                if (s == null || wf.NormalCase == tt.Term)
                                {
                                    s = wf.NormalCase;
                                }
                            }
                        }
                    }
                    if (s == null)
                    {
                        s = tt.Term;
                        if (tt.Chars.IsLastLower && tt.LengthChar > 2)
                        {
                            s = tt.GetSourceText();
                            for (int i = s.Length - 1; i >= 0; i--)
                            {
                                if (char.IsUpper(s[i]))
                                {
                                    s = s.Substring(0, i + 1);
                                    break;
                                }
                            }
                        }
                    }
                    if (prefix != null)
                    {
                        string delim = "-";
                        if (ignoreBracketsAndHiphens)
                        {
                            delim = " ";
                        }
                        s = string.Format("{0}{1}{2}", prefix, delim, s);
                    }
                    prefix = null;
                    if (res.Length > 0 && s.Length > 0)
                    {
                        if (char.IsLetterOrDigit(s[0]))
                        {
                            char ch0 = res[res.Length - 1];
                            if (ch0 == '-')
                            {
                            }
                            else
                            {
                                res.Append(' ');
                            }
                        }
                        else if (!ignoreBracketsAndHiphens && BracketHelper.CanBeStartOfSequence(tt, false, false))
                        {
                            res.Append(' ');
                        }
                    }
                    res.Append(s);
                }
                else if (t is Pullenti.Ner.NumberToken)
                {
                    if (res.Length > 0)
                    {
                        if (!t.IsWhitespaceBefore && res[res.Length - 1] == '-')
                        {
                        }
                        else
                        {
                            res.Append(' ');
                        }
                    }
                    Pullenti.Ner.NumberToken nt = t as Pullenti.Ner.NumberToken;
                    if ((t.Morph.Class.IsAdjective && nt.Typ == Pullenti.Ner.NumberSpellingType.Words && nt.BeginToken == nt.EndToken) && (nt.BeginToken is Pullenti.Ner.TextToken))
                    {
                        res.Append((nt.BeginToken as Pullenti.Ner.TextToken).Term);
                    }
                    else
                    {
                        res.Append(nt.Value);
                    }
                }
                else if (t is Pullenti.Ner.MetaToken)
                {
                    if ((ignoreGeoReferent && t != begin && t.GetReferent() != null) && t.GetReferent().TypeName == "GEO")
                    {
                        continue;
                    }
                    string s = GetNameEx((t as Pullenti.Ner.MetaToken).BeginToken, (t as Pullenti.Ner.MetaToken).EndToken, cla, mc, gender, ignoreBracketsAndHiphens, ignoreGeoReferent);
                    if (!string.IsNullOrEmpty(s))
                    {
                        if (res.Length > 0)
                        {
                            if (!t.IsWhitespaceBefore && res[res.Length - 1] == '-')
                            {
                            }
                            else
                            {
                                res.Append(' ');
                            }
                        }
                        res.Append(s);
                    }
                }
                if (t == end)
                {
                    break;
                }
            }
            if (res.Length == 0)
            {
                return(null);
            }
            return(res.ToString());
        }