Beispiel #1
0
 void _calcList()
 {
     Pullenti.Morph.MorphCase cas0 = FromMorph.Case;
     if (To == null)
     {
         if (ToVerb == null)
         {
             return;
         }
         return;
     }
     if (From.Source.Typ != To.Source.Typ)
     {
         if (From.Source.Prep == To.Source.Prep && ((From.Source.Typ == SentItemType.Noun || From.Source.Typ == SentItemType.PartBefore || From.Source.Typ == SentItemType.PartAfter)) && ((To.Source.Typ == SentItemType.Noun || To.Source.Typ == SentItemType.PartBefore || To.Source.Typ == SentItemType.PartAfter)))
         {
         }
         else
         {
             return;
         }
     }
     Pullenti.Morph.MorphCase cas1 = ToMorph.Case;
     if (!((cas0 & cas1)).IsUndefined)
     {
         Coef = Pullenti.Semantic.SemanticService.Params.List;
         if (string.IsNullOrEmpty(FromPrep) && !string.IsNullOrEmpty(To.Source.Prep))
         {
             Coef /= 2;
         }
         else if (!string.IsNullOrEmpty(FromPrep) && string.IsNullOrEmpty(To.Source.Prep))
         {
             Coef /= 4;
         }
     }
     else
     {
         if (!cas0.IsUndefined && !cas1.IsUndefined)
         {
             return;
         }
         if (!string.IsNullOrEmpty(FromPrep) && string.IsNullOrEmpty(To.Source.Prep))
         {
             return;
         }
         Coef = Pullenti.Semantic.SemanticService.Params.List;
     }
     Pullenti.Ner.TextToken t1 = From.Source.EndToken as Pullenti.Ner.TextToken;
     Pullenti.Ner.TextToken t2 = To.Source.EndToken as Pullenti.Ner.TextToken;
     if (t1 != null && t2 != null)
     {
         if (t1.IsValue(t2.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false), null))
         {
             Coef *= 10;
         }
     }
     if (From.Source.Typ != To.Source.Typ)
     {
         Coef /= 2;
     }
 }
Beispiel #2
0
        /// <summary>
        /// Проверка, что с этого токена может начинаться последовательность, а сам токен является открывающей скобкой или кавычкой
        /// </summary>
        /// <param name="t">проверяемый токен</param>
        /// <param name="quotesOnly">должны быть именно кавычка, а не скобка</param>
        /// <return>да-нет</return>
        public static bool CanBeStartOfSequence(Pullenti.Ner.Token t, bool quotesOnly = false, bool ignoreWhitespaces = false)
        {
            Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
            if (tt == null || tt.Next == null)
            {
                return(false);
            }
            char ch = tt.Term[0];

            if (char.IsLetterOrDigit(ch))
            {
                return(false);
            }
            if (quotesOnly && (m_Quotes.IndexOf(ch) < 0))
            {
                return(false);
            }
            if (t.Next == null)
            {
                return(false);
            }
            if (m_OpenChars.IndexOf(ch) < 0)
            {
                return(false);
            }
            if (!ignoreWhitespaces)
            {
                if (t.IsWhitespaceAfter)
                {
                    if (!t.IsWhitespaceBefore)
                    {
                        if (t.Previous != null && t.Previous.IsTableControlChar)
                        {
                        }
                        else
                        {
                            return(false);
                        }
                    }
                    if (t.IsNewlineAfter)
                    {
                        return(false);
                    }
                }
                else if (!t.IsWhitespaceBefore)
                {
                    if (char.IsLetterOrDigit(t.Kit.GetTextCharacter(t.BeginChar - 1)))
                    {
                        if (t.Next != null && ((t.Next.Chars.IsAllLower || !t.Next.Chars.IsLetter)))
                        {
                            if (ch != '(')
                            {
                                return(false);
                            }
                        }
                    }
                }
            }
            return(true);
        }
Beispiel #3
0
 /// <summary>
 /// Получить информацию о словоформе токена
 /// </summary>
 /// <param name="t">токен</param>
 /// <return>статистическая информация по тексту</return>
 public StatisticWordInfo GetWordInfo(Pullenti.Ner.Token t)
 {
     Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
     if (tt == null)
     {
         return(null);
     }
     return(this.FindItem(tt, true));
 }
Beispiel #4
0
        public static Pullenti.Ner.Token CreateSex(Pullenti.Ner.Person.PersonReferent pr, Pullenti.Ner.Token t)
        {
            if (t == null)
            {
                return(null);
            }
            while (t.Next != null)
            {
                if (t.IsValue("ПОЛ", null) || t.IsHiphen || t.IsChar(':'))
                {
                    t = t.Next;
                }
                else
                {
                    break;
                }
            }
            Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
            if (tt == null)
            {
                return(null);
            }
            bool ok = false;

            if ((tt.Term == "МУЖ" || tt.Term == "МУЖС" || tt.Term == "МУЖСК") || tt.IsValue("МУЖСКОЙ", null))
            {
                pr.IsMale = true;
                ok        = true;
            }
            else if ((tt.Term == "ЖЕН" || tt.Term == "ЖЕНС" || tt.Term == "ЖЕНСК") || tt.IsValue("ЖЕНСКИЙ", null))
            {
                pr.IsFemale = true;
                ok          = true;
            }
            if (!ok)
            {
                return(null);
            }
            while (t.Next != null)
            {
                if (t.Next.IsValue("ПОЛ", null) || t.Next.IsChar('.'))
                {
                    t = t.Next;
                }
                else
                {
                    break;
                }
            }
            return(t);
        }
Beispiel #5
0
        public static bool IsMultCharEnd(Pullenti.Ner.Token t)
        {
            Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
            if (tt == null)
            {
                return(false);
            }
            string term = tt.Term;

            if (term.EndsWith("X") || term.EndsWith("Х"))
            {
                return(true);
            }
            return(false);
        }
Beispiel #6
0
 public static bool IsMultChar(Pullenti.Ner.Token t)
 {
     Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
     if (tt == null)
     {
         return(false);
     }
     if (tt.LengthChar == 1)
     {
         if (tt.IsCharOf("*xXхХ·×◦∙•"))
         {
             return(true);
         }
     }
     return(false);
 }
Beispiel #7
0
        // Привязка с точностью до похожести
        // simD - параметр "похожесть (0.05..1)"
        public List <TerminToken> TryParseAllSim(Pullenti.Ner.Token token, double simD)
        {
            if (simD >= 1 || (simD < 0.05))
            {
                return(this.TryParseAll(token, TerminParseAttr.No));
            }
            if (Termins.Count == 0 || token == null)
            {
                return(null);
            }
            Pullenti.Ner.TextToken tt = token as Pullenti.Ner.TextToken;
            if (tt == null && (token is Pullenti.Ner.ReferentToken))
            {
                tt = (token as Pullenti.Ner.ReferentToken).BeginToken as Pullenti.Ner.TextToken;
            }
            List <TerminToken> res = null;

            foreach (Termin t in Termins)
            {
                if (!t.Lang.IsUndefined)
                {
                    if (!token.Morph.Language.IsUndefined)
                    {
                        if (((token.Morph.Language & t.Lang)).IsUndefined)
                        {
                            continue;
                        }
                    }
                }
                TerminToken ar = t.TryParseSim(tt, simD, TerminParseAttr.No);
                if (ar == null)
                {
                    continue;
                }
                ar.Termin = t;
                if (res == null || ar.TokensCount > res[0].TokensCount)
                {
                    res = new List <TerminToken>();
                    res.Add(ar);
                }
                else if (ar.TokensCount == res[0].TokensCount)
                {
                    res.Add(ar);
                }
            }
            return(res);
        }
Beispiel #8
0
        static bool _compareListItemTails(Pullenti.Ner.MetaToken mt1, Pullenti.Ner.MetaToken mt2)
        {
            Pullenti.Ner.TextToken t1 = mt1.EndToken as Pullenti.Ner.TextToken;
            Pullenti.Ner.TextToken t2 = mt2.EndToken as Pullenti.Ner.TextToken;
            if (t1 == null || t2 == null)
            {
                return(true);
            }
            int k  = 0;
            int i1 = t1.Term.Length - 1;
            int i2 = t2.Term.Length - 1;

            for (; i1 > 0 && i2 > 0; i1--, i2--, k++)
            {
                if (t1.Term[i1] != t2.Term[i2])
                {
                    break;
                }
            }
            if (k >= 2)
            {
                return(true);
            }
            string nn = t2.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false);

            if (t1.IsValue(nn, null))
            {
                return(true);
            }
            if (((t1.Morph.Number & t2.Morph.Number)) == Pullenti.Morph.MorphNumber.Undefined)
            {
                return(false);
            }
            if (((t1.Morph.Case & t2.Morph.Case)).IsUndefined)
            {
                return(false);
            }
            if (t1.Morph.Class.IsVerb != t2.Morph.Class.IsVerb && t1.Morph.Class.IsAdjective != t2.Morph.Class.IsAdjective)
            {
                return(false);
            }
            return(true);
        }
Beispiel #9
0
        static Pullenti.Ner.Token DeserializeToken(Stream stream, Pullenti.Ner.Core.AnalysisKit kit, int vers)
        {
            short typ = DeserializeShort(stream);

            if (typ == 0)
            {
                return(null);
            }
            Pullenti.Ner.Token t = null;
            if (typ == 1)
            {
                t = new Pullenti.Ner.TextToken(null, kit);
            }
            else if (typ == 2)
            {
                t = new Pullenti.Ner.NumberToken(null, null, null, Pullenti.Ner.NumberSpellingType.Digit, kit);
            }
            else if (typ == 3)
            {
                t = new Pullenti.Ner.ReferentToken(null, null, null, kit);
            }
            else
            {
                t = new Pullenti.Ner.MetaToken(null, null, kit);
            }
            t.Deserialize(stream, kit, vers);
            if (t is Pullenti.Ner.MetaToken)
            {
                Pullenti.Ner.Token tt = DeserializeTokens(stream, kit, vers);
                if (tt != null)
                {
                    (t as Pullenti.Ner.MetaToken).m_BeginToken = tt;
                    for (; tt != null; tt = tt.Next)
                    {
                        (t as Pullenti.Ner.MetaToken).m_EndToken = tt;
                    }
                }
            }
            return(t);
        }
Beispiel #10
0
        void DefineBaseLanguage()
        {
            Dictionary <short, int> stat = new Dictionary <short, int>();
            int total = 0;

            for (Pullenti.Ner.Token t = FirstToken; t != null; t = t.Next)
            {
                Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
                if (tt == null)
                {
                    continue;
                }
                if (tt.Morph.Language.IsUndefined)
                {
                    continue;
                }
                if (!stat.ContainsKey(tt.Morph.Language.Value))
                {
                    stat.Add(tt.Morph.Language.Value, 1);
                }
                else
                {
                    stat[tt.Morph.Language.Value]++;
                }
                total++;
            }
            short val = (short)0;

            foreach (KeyValuePair <short, int> kp in stat)
            {
                if (kp.Value > (total / 2))
                {
                    val |= kp.Key;
                }
            }
            BaseLanguage.Value = val;
        }
Beispiel #11
0
 public void CorrectPrefix(Pullenti.Ner.TextToken t, bool ignoreGender)
 {
     if (t == null)
     {
         return;
     }
     foreach (Pullenti.Morph.MorphBaseInfo v in t.Morph.Items)
     {
         if (v.Class == Class && this.CheckAccord(v, ignoreGender, false))
         {
             NormalValue = string.Format("{0}-{1}", (v as Pullenti.Morph.MorphWordForm).NormalCase, NormalValue);
             if (SingleNumberValue != null)
             {
                 SingleNumberValue = string.Format("{0}-{1}", (v as Pullenti.Morph.MorphWordForm).NormalFull ?? (v as Pullenti.Morph.MorphWordForm).NormalCase, SingleNumberValue);
             }
             return;
         }
     }
     NormalValue = string.Format("{0}-{1}", t.Term, NormalValue);
     if (SingleNumberValue != null)
     {
         SingleNumberValue = string.Format("{0}-{1}", t.Term, SingleNumberValue);
     }
 }
Beispiel #12
0
 static int CalcAbnormalCoef(Pullenti.Ner.Token t)
 {
     if (t is Pullenti.Ner.NumberToken)
     {
         return(0);
     }
     Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
     if (tt == null)
     {
         return(0);
     }
     if (!tt.Chars.IsLetter)
     {
         return(0);
     }
     if (!tt.Chars.IsLatinLetter && !tt.Chars.IsCyrillicLetter)
     {
         return(2);
     }
     if (tt.LengthChar < 4)
     {
         return(0);
     }
     foreach (Pullenti.Morph.MorphBaseInfo wf in tt.Morph.Items)
     {
         if ((wf as Pullenti.Morph.MorphWordForm).IsInDictionary)
         {
             return(-1);
         }
     }
     if (tt.LengthChar > 15)
     {
         return(2);
     }
     return(1);
 }
Beispiel #13
0
        public static UnitToken TryParse(Pullenti.Ner.Token t, Pullenti.Ner.Core.TerminCollection addUnits, UnitToken prev, bool parseUnknownUnits = false)
        {
            if (t == null)
            {
                return(null);
            }
            Pullenti.Ner.Token t0 = t;
            int  pow   = 1;
            bool isNeg = false;

            if ((t.IsCharOf("\\/") || t.IsValue("НА", null) || t.IsValue("OF", null)) || t.IsValue("PER", null))
            {
                isNeg = true;
                t     = t.Next;
            }
            else if (t.IsValue("В", null) && prev != null)
            {
                isNeg = true;
                t     = t.Next;
            }
            else if (MeasureHelper.IsMultChar(t))
            {
                t = t.Next;
            }
            Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
            if (tt == null)
            {
                return(null);
            }
            if (tt.Term == "КВ" || tt.Term == "КВАДР" || tt.IsValue("КВАДРАТНЫЙ", null))
            {
                pow = 2;
                tt  = tt.Next as Pullenti.Ner.TextToken;
                if (tt != null && tt.IsChar('.'))
                {
                    tt = tt.Next as Pullenti.Ner.TextToken;
                }
                if (tt == null)
                {
                    return(null);
                }
            }
            else if (tt.Term == "КУБ" || tt.Term == "КУБИЧ" || tt.IsValue("КУБИЧЕСКИЙ", null))
            {
                pow = 3;
                tt  = tt.Next as Pullenti.Ner.TextToken;
                if (tt != null && tt.IsChar('.'))
                {
                    tt = tt.Next as Pullenti.Ner.TextToken;
                }
                if (tt == null)
                {
                    return(null);
                }
            }
            else if (tt.Term == "µ")
            {
                UnitToken res = TryParse(tt.Next, addUnits, prev, false);
                if (res != null)
                {
                    foreach (Unit u in UnitsHelper.Units)
                    {
                        if (u.Factor == UnitsFactors.Micro && string.Compare("мк" + u.NameCyr, res.Unit.NameCyr, true) == 0)
                        {
                            res.Unit       = u;
                            res.BeginToken = tt;
                            res.Pow        = pow;
                            if (isNeg)
                            {
                                res.Pow = -pow;
                            }
                            return(res);
                        }
                    }
                }
            }
            List <Pullenti.Ner.Core.TerminToken> toks = UnitsHelper.Termins.TryParseAll(tt, Pullenti.Ner.Core.TerminParseAttr.No);

            if (toks != null)
            {
                if ((prev != null && tt == t0 && toks.Count == 1) && t.IsWhitespaceBefore)
                {
                    return(null);
                }
                if (toks[0].BeginToken == toks[0].EndToken && tt.Morph.Class.IsPreposition && (tt.WhitespacesAfterCount < 3))
                {
                    if (Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition, 0, null) != null)
                    {
                        return(null);
                    }
                    if (tt.Next is Pullenti.Ner.NumberToken)
                    {
                        if ((tt.Next as Pullenti.Ner.NumberToken).Typ != Pullenti.Ner.NumberSpellingType.Digit)
                        {
                            return(null);
                        }
                    }
                    UnitToken nex = TryParse(tt.Next, addUnits, null, false);
                    if (nex != null)
                    {
                        return(null);
                    }
                }
                if (toks[0].BeginToken == toks[0].EndToken && ((toks[0].BeginToken.IsValue("М", null) || toks[0].BeginToken.IsValue("M", null))) && toks[0].BeginToken.Chars.IsAllLower)
                {
                    if (prev != null && prev.Unit != null && prev.Unit.Kind == Pullenti.Ner.Measure.MeasureKind.Length)
                    {
                        UnitToken res = new UnitToken(t0, toks[0].EndToken)
                        {
                            Unit = UnitsHelper.uMinute
                        };
                        res.Pow = pow;
                        if (isNeg)
                        {
                            res.Pow = -pow;
                        }
                        return(res);
                    }
                }
                List <UnitToken> uts = new List <UnitToken>();
                foreach (Pullenti.Ner.Core.TerminToken tok in toks)
                {
                    UnitToken res = new UnitToken(t0, tok.EndToken)
                    {
                        Unit = tok.Termin.Tag as Unit
                    };
                    res.Pow = pow;
                    if (isNeg)
                    {
                        res.Pow = -pow;
                    }
                    if (res.Unit.BaseMultiplier == 1000000 && (t0 is Pullenti.Ner.TextToken) && char.IsLower((t0 as Pullenti.Ner.TextToken).GetSourceText()[0]))
                    {
                        foreach (Unit u in UnitsHelper.Units)
                        {
                            if (u.Factor == UnitsFactors.Milli && string.Compare(u.NameCyr, res.Unit.NameCyr, true) == 0)
                            {
                                res.Unit = u;
                                break;
                            }
                        }
                    }
                    res._correct();
                    res._checkDoubt();
                    uts.Add(res);
                }
                int       max  = 0;
                UnitToken best = null;
                foreach (UnitToken ut in uts)
                {
                    if (ut.Keyword != null)
                    {
                        if (ut.Keyword.BeginChar >= max)
                        {
                            max  = ut.Keyword.BeginChar;
                            best = ut;
                        }
                    }
                }
                if (best != null)
                {
                    return(best);
                }
                foreach (UnitToken ut in uts)
                {
                    if (!ut.IsDoubt)
                    {
                        return(ut);
                    }
                }
                return(uts[0]);
            }
            Pullenti.Ner.Token t1 = null;
            if (t.IsCharOf("º°"))
            {
                t1 = t;
            }
            else if ((t.IsChar('<') && t.Next != null && t.Next.Next != null) && t.Next.Next.IsChar('>') && ((t.Next.IsValue("О", null) || t.Next.IsValue("O", null) || (((t.Next is Pullenti.Ner.NumberToken) && (t.Next as Pullenti.Ner.NumberToken).Value == "0")))))
            {
                t1 = t.Next.Next;
            }
            if (t1 != null)
            {
                UnitToken res = new UnitToken(t0, t1)
                {
                    Unit = UnitsHelper.uGradus
                };
                res._checkDoubt();
                t = t1.Next;
                if (t != null && t.IsComma)
                {
                    t = t.Next;
                }
                if (t != null && t.IsValue("ПО", null))
                {
                    t = t.Next;
                }
                if (t is Pullenti.Ner.TextToken)
                {
                    string vv = (t as Pullenti.Ner.TextToken).Term;
                    if (vv == "C" || vv == "С" || vv.StartsWith("ЦЕЛЬС"))
                    {
                        res.Unit     = UnitsHelper.uGradusC;
                        res.IsDoubt  = false;
                        res.EndToken = t;
                    }
                    if (vv == "F" || vv.StartsWith("ФАР"))
                    {
                        res.Unit     = UnitsHelper.uGradusF;
                        res.IsDoubt  = false;
                        res.EndToken = t;
                    }
                }
                return(res);
            }
            if ((t is Pullenti.Ner.TextToken) && ((t.IsValue("ОС", null) || t.IsValue("OC", null))))
            {
                string str = t.GetSourceText();
                if (str == "оС" || str == "oC")
                {
                    UnitToken res = new UnitToken(t, t)
                    {
                        Unit = UnitsHelper.uGradusC, IsDoubt = false
                    };
                    return(res);
                }
            }
            if (t.IsChar('%'))
            {
                Pullenti.Ner.Token tt1 = t.Next;
                if (tt1 != null && tt1.IsChar('('))
                {
                    tt1 = tt1.Next;
                }
                if ((tt1 is Pullenti.Ner.TextToken) && (tt1 as Pullenti.Ner.TextToken).Term.StartsWith("ОБ"))
                {
                    UnitToken re = new UnitToken(t, tt1)
                    {
                        Unit = UnitsHelper.uAlco
                    };
                    if (re.EndToken.Next != null && re.EndToken.Next.IsChar('.'))
                    {
                        re.EndToken = re.EndToken.Next;
                    }
                    if (re.EndToken.Next != null && re.EndToken.Next.IsChar(')') && t.Next.IsChar('('))
                    {
                        re.EndToken = re.EndToken.Next;
                    }
                    return(re);
                }
                return(new UnitToken(t, t)
                {
                    Unit = UnitsHelper.uPercent
                });
            }
            if (addUnits != null)
            {
                Pullenti.Ner.Core.TerminToken tok = addUnits.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
                if (tok != null)
                {
                    UnitToken res = new UnitToken(t0, tok.EndToken)
                    {
                        ExtOnto = tok.Termin.Tag as Pullenti.Ner.Measure.UnitReferent
                    };
                    if (tok.EndToken.Next != null && tok.EndToken.Next.IsChar('.'))
                    {
                        tok.EndToken = tok.EndToken.Next;
                    }
                    res.Pow = pow;
                    if (isNeg)
                    {
                        res.Pow = -pow;
                    }
                    res._correct();
                    return(res);
                }
            }
            if (!parseUnknownUnits)
            {
                return(null);
            }
            if ((t.WhitespacesBeforeCount > 2 || !t.Chars.IsLetter || t.LengthChar > 5) || !(t is Pullenti.Ner.TextToken))
            {
                return(null);
            }
            if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t))
            {
                return(null);
            }
            t1 = t;
            if (t.Next != null && t.Next.IsChar('.'))
            {
                t1 = t;
            }
            bool ok = false;

            if (t1.Next == null || t1.WhitespacesAfterCount > 2)
            {
                ok = true;
            }
            else if (t1.Next.IsComma || t1.Next.IsCharOf("\\/") || t1.Next.IsTableControlChar)
            {
                ok = true;
            }
            else if (MeasureHelper.IsMultChar(t1.Next))
            {
                ok = true;
            }
            if (!ok)
            {
                return(null);
            }
            Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
            if (mc.IsUndefined)
            {
            }
            else if (t.LengthChar > 7)
            {
                return(null);
            }
            UnitToken res1 = new UnitToken(t0, t1)
            {
                Pow = pow, IsDoubt = true
            };

            res1.UnknownName = (t as Pullenti.Ner.TextToken).GetSourceText();
            res1._correct();
            return(res1);
        }
Beispiel #14
0
            public static List <PersonItemToken> TryAttach(Pullenti.Ner.Token t)
            {
                List <PersonItemToken> res = new List <PersonItemToken>();

                for (; t != null; t = t.Next)
                {
                    if (t.IsNewlineBefore && res.Count > 0)
                    {
                        break;
                    }
                    Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
                    if (tt == null)
                    {
                        break;
                    }
                    string s = tt.Term;
                    if (!char.IsLetter(s[0]))
                    {
                        break;
                    }
                    if (((s.Length == 1 || s == "ДЖ")) && !tt.Chars.IsAllLower)
                    {
                        Pullenti.Ner.Token t1 = t;
                        if (t1.Next != null && t1.Next.IsChar('.'))
                        {
                            t1 = t1.Next;
                        }
                        res.Add(new PersonItemToken(t, t1)
                        {
                            Typ = Pullenti.Ner.Org.Internal.OrgItemEponymToken.PersonItemType.Initial, Value = s
                        });
                        t = t1;
                        continue;
                    }
                    if (tt.IsAnd)
                    {
                        res.Add(new PersonItemToken(t, t)
                        {
                            Typ = Pullenti.Ner.Org.Internal.OrgItemEponymToken.PersonItemType.And
                        });
                        continue;
                    }
                    if (tt.Morph.Class.IsPronoun || tt.Morph.Class.IsPersonalPronoun)
                    {
                        break;
                    }
                    if (tt.Chars.IsAllLower)
                    {
                        Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary();
                        if (mc.IsPreposition || mc.IsVerb || mc.IsAdverb)
                        {
                            break;
                        }
                        Pullenti.Ner.Token t1 = t;
                        if (t1.Next != null && !t1.IsWhitespaceAfter && t1.Next.IsChar('.'))
                        {
                            t1 = t1.Next;
                        }
                        res.Add(new PersonItemToken(t, t1)
                        {
                            Typ = Pullenti.Ner.Org.Internal.OrgItemEponymToken.PersonItemType.LocaseWord, Value = s
                        });
                        t = t1;
                        continue;
                    }
                    if (tt.Morph.Class.IsProperName)
                    {
                        res.Add(new PersonItemToken(t, t)
                        {
                            Typ = Pullenti.Ner.Org.Internal.OrgItemEponymToken.PersonItemType.Name, Value = s
                        });
                    }
                    else if ((t.Next != null && t.Next.IsHiphen && (t.Next.Next is Pullenti.Ner.TextToken)) && !t.Next.IsWhitespaceAfter)
                    {
                        res.Add(new PersonItemToken(t, t.Next.Next)
                        {
                            Typ = Pullenti.Ner.Org.Internal.OrgItemEponymToken.PersonItemType.Surname, Value = string.Format("{0}-{1}", s, (t.Next.Next as Pullenti.Ner.TextToken).Term)
                        });
                        t = t.Next.Next;
                    }
                    else
                    {
                        res.Add(new PersonItemToken(t, t)
                        {
                            Typ = Pullenti.Ner.Org.Internal.OrgItemEponymToken.PersonItemType.Surname, Value = s
                        });
                    }
                }
                return(res.Count > 0 ? res : null);
            }
Beispiel #15
0
        public static OrgItemEponymToken TryAttach(Pullenti.Ner.Token t, bool mustHasPrefix = false)
        {
            Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
            if (tt == null)
            {
                if (t == null)
                {
                    return(null);
                }
                Pullenti.Ner.Referent r1 = t.GetReferent();
                if (r1 != null && r1.TypeName == "DATE")
                {
                    string str = r1.ToString().ToUpper();
                    if ((str == "1 МАЯ" || str == "7 ОКТЯБРЯ" || str == "9 МАЯ") || str == "8 МАРТА")
                    {
                        OrgItemEponymToken dt = new OrgItemEponymToken(t, t)
                        {
                            Eponyms = new List <string>()
                        };
                        dt.Eponyms.Add(str);
                        return(dt);
                    }
                }
                Pullenti.Ner.NumberToken age = Pullenti.Ner.Core.NumberHelper.TryParseAge(t);
                if ((age != null && (((age.EndToken.Next is Pullenti.Ner.TextToken) || (age.EndToken.Next is Pullenti.Ner.ReferentToken))) && (age.WhitespacesAfterCount < 3)) && !age.EndToken.Next.Chars.IsAllLower && age.EndToken.Next.Chars.IsCyrillicLetter)
                {
                    OrgItemEponymToken dt = new OrgItemEponymToken(t, age.EndToken.Next)
                    {
                        Eponyms = new List <string>()
                    };
                    dt.Eponyms.Add(string.Format("{0} {1}", age.Value, dt.EndToken.GetSourceText().ToUpper()));
                    return(dt);
                }
                return(null);
            }
            Pullenti.Ner.Token t1 = null;
            bool full             = false;
            bool hasName          = false;

            if (tt.Term == "ИМЕНИ" || tt.Term == "ІМЕНІ")
            {
                t1      = t.Next;
                full    = true;
                hasName = true;
            }
            else if (((tt.Term == "ИМ" || tt.Term == "ІМ")) && tt.Next != null)
            {
                if (tt.Next.IsChar('.'))
                {
                    t1   = tt.Next.Next;
                    full = true;
                }
                else if ((tt.Next is Pullenti.Ner.TextToken) && tt.Chars.IsAllLower && !tt.Next.Chars.IsAllLower)
                {
                    t1 = tt.Next;
                }
                hasName = true;
            }
            else if (tt.Previous != null && ((tt.Previous.IsValue("ФОНД", null) || tt.Previous.IsValue("ХРАМ", null) || tt.Previous.IsValue("ЦЕРКОВЬ", "ЦЕРКВА"))))
            {
                if ((!tt.Chars.IsCyrillicLetter || tt.Morph.Class.IsPreposition || tt.Morph.Class.IsConjunction) || !tt.Chars.IsLetter)
                {
                    return(null);
                }
                if (tt.WhitespacesBeforeCount != 1)
                {
                    return(null);
                }
                if (tt.Chars.IsAllLower)
                {
                    return(null);
                }
                if (tt.Morph.Class.IsAdjective)
                {
                    Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                    if (npt != null && npt.BeginToken != npt.EndToken)
                    {
                        return(null);
                    }
                }
                OrgItemNameToken na = OrgItemNameToken.TryAttach(tt, null, false, true);
                if (na != null)
                {
                    if (na.IsEmptyWord || na.IsStdName || na.IsStdTail)
                    {
                        return(null);
                    }
                }
                t1 = tt;
            }
            if (t1 == null || ((t1.IsNewlineBefore && !full)))
            {
                return(null);
            }
            if (tt.Previous != null && tt.Previous.Morph.Class.IsPreposition)
            {
                return(null);
            }
            if (mustHasPrefix && !hasName)
            {
                return(null);
            }
            Pullenti.Ner.Referent r = t1.GetReferent();
            if ((r != null && r.TypeName == "DATE" && full) && r.FindSlot("DAY", null, true) != null && r.FindSlot("YEAR", null, true) == null)
            {
                OrgItemEponymToken dt = new OrgItemEponymToken(t, t1)
                {
                    Eponyms = new List <string>()
                };
                dt.Eponyms.Add(r.ToString().ToUpper());
                return(dt);
            }
            bool holy = false;

            if ((t1.IsValue("СВЯТОЙ", null) || t1.IsValue("СВЯТИЙ", null) || t1.IsValue("СВ", null)) || t1.IsValue("СВЯТ", null))
            {
                t1   = t1.Next;
                holy = true;
                if (t1 != null && t1.IsChar('.'))
                {
                    t1 = t1.Next;
                }
            }
            if (t1 == null)
            {
                return(null);
            }
            Pullenti.Morph.MorphClass cl = t1.GetMorphClassInDictionary();
            if (cl.IsNoun || cl.IsAdjective)
            {
                Pullenti.Ner.ReferentToken rt = t1.Kit.ProcessReferent("PERSON", t1);
                if (rt != null && rt.Referent.TypeName == "PERSON" && rt.BeginToken != rt.EndToken)
                {
                    string e = rt.Referent.GetStringValue("LASTNAME");
                    if (e != null)
                    {
                        if (rt.EndToken.IsValue(e, null))
                        {
                            OrgItemEponymToken re = new OrgItemEponymToken(t, rt.EndToken);
                            re.Eponyms.Add(rt.EndToken.GetSourceText());
                            return(re);
                        }
                    }
                }
            }
            Pullenti.Ner.NumberToken nt = Pullenti.Ner.Core.NumberHelper.TryParseAnniversary(t1);
            if (nt != null && nt.Typ == Pullenti.Ner.NumberSpellingType.Age)
            {
                Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(nt.EndToken.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                if (npt != null)
                {
                    string             s   = string.Format("{0}-{1} {2}", nt.Value, (t.Kit.BaseLanguage.IsUa ? "РОКІВ" : "ЛЕТ"), Pullenti.Ner.Core.MiscHelper.GetTextValue(npt.BeginToken, npt.EndToken, Pullenti.Ner.Core.GetTextAttr.No));
                    OrgItemEponymToken res = new OrgItemEponymToken(t, npt.EndToken);
                    res.Eponyms.Add(s);
                    return(res);
                }
            }
            List <PersonItemToken> its = PersonItemToken.TryAttach(t1);

            if (its == null)
            {
                if ((t1 is Pullenti.Ner.ReferentToken) && (t1.GetReferent() is Pullenti.Ner.Geo.GeoReferent))
                {
                    string             s  = Pullenti.Ner.Core.MiscHelper.GetTextValue(t1, t1, Pullenti.Ner.Core.GetTextAttr.No);
                    OrgItemEponymToken re = new OrgItemEponymToken(t, t1);
                    re.Eponyms.Add(s);
                    return(re);
                }
                return(null);
            }
            List <string> eponims = new List <string>();
            int           i       = 0;
            int           j;

            if (its[i].Typ == PersonItemType.LocaseWord)
            {
                i++;
            }
            if (i >= its.Count)
            {
                return(null);
            }
            if (!full)
            {
                if (its[i].BeginToken.Morph.Class.IsAdjective && !its[i].BeginToken.Morph.Class.IsProperSurname)
                {
                    return(null);
                }
            }
            if (its[i].Typ == PersonItemType.Initial)
            {
                i++;
                while (true)
                {
                    if ((i < its.Count) && its[i].Typ == PersonItemType.Initial)
                    {
                        i++;
                    }
                    if (i >= its.Count || ((its[i].Typ != PersonItemType.Surname && its[i].Typ != PersonItemType.Name)))
                    {
                        break;
                    }
                    eponims.Add(its[i].Value);
                    t1 = its[i].EndToken;
                    if ((i + 2) >= its.Count || its[i + 1].Typ != PersonItemType.And || its[i + 2].Typ != PersonItemType.Initial)
                    {
                        break;
                    }
                    i += 3;
                }
            }
            else if (((i + 1) < its.Count) && its[i].Typ == PersonItemType.Name && its[i + 1].Typ == PersonItemType.Surname)
            {
                eponims.Add(its[i + 1].Value);
                t1 = its[i + 1].EndToken;
                i += 2;
                if ((((i + 2) < its.Count) && its[i].Typ == PersonItemType.And && its[i + 1].Typ == PersonItemType.Name) && its[i + 2].Typ == PersonItemType.Surname)
                {
                    eponims.Add(its[i + 2].Value);
                    t1 = its[i + 2].EndToken;
                }
            }
            else if (its[i].Typ == PersonItemType.Surname)
            {
                if (its.Count == (i + 2) && its[i].Chars == its[i + 1].Chars)
                {
                    its[i].Value   += (" " + its[i + 1].Value);
                    its[i].EndToken = its[i + 1].EndToken;
                    its.RemoveAt(i + 1);
                }
                eponims.Add(its[i].Value);
                if (((i + 1) < its.Count) && its[i + 1].Typ == PersonItemType.Name)
                {
                    if ((i + 2) == its.Count)
                    {
                        i++;
                    }
                    else if (its[i + 2].Typ != PersonItemType.Surname)
                    {
                        i++;
                    }
                }
                else if (((i + 1) < its.Count) && its[i + 1].Typ == PersonItemType.Initial)
                {
                    if ((i + 2) == its.Count)
                    {
                        i++;
                    }
                    else if (its[i + 2].Typ == PersonItemType.Initial && (i + 3) == its.Count)
                    {
                        i += 2;
                    }
                }
                else if (((i + 2) < its.Count) && its[i + 1].Typ == PersonItemType.And && its[i + 2].Typ == PersonItemType.Surname)
                {
                    bool ok = true;
                    Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(its[i + 2].BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                    if (npt != null && !npt.Morph.Case.IsGenitive && !npt.Morph.Case.IsUndefined)
                    {
                        ok = false;
                    }
                    if (ok)
                    {
                        eponims.Add(its[i + 2].Value);
                        i += 2;
                    }
                }
                t1 = its[i].EndToken;
            }
            else if (its[i].Typ == PersonItemType.Name && holy)
            {
                t1 = its[i].EndToken;
                bool sec = false;
                if (((i + 1) < its.Count) && its[i].Chars == its[i + 1].Chars && its[i + 1].Typ != PersonItemType.Initial)
                {
                    sec = true;
                    t1  = its[i + 1].EndToken;
                }
                if (sec)
                {
                    eponims.Add(string.Format("СВЯТ.{0} {1}", its[i].Value, its[i + 1].Value));
                }
                else
                {
                    eponims.Add(string.Format("СВЯТ.{0}", its[i].Value));
                }
            }
            else if (full && (i + 1) == its.Count && ((its[i].Typ == PersonItemType.Name || its[i].Typ == PersonItemType.Surname)))
            {
                t1 = its[i].EndToken;
                eponims.Add(its[i].Value);
            }
            else if ((its[i].Typ == PersonItemType.Name && its.Count == 3 && its[i + 1].Typ == PersonItemType.Name) && its[i + 2].Typ == PersonItemType.Surname)
            {
                t1 = its[i + 2].EndToken;
                eponims.Add(string.Format("{0} {1} {2}", its[i].Value, its[i + 1].Value, its[i + 2].Value));
                i += 2;
            }
            if (eponims.Count == 0)
            {
                return(null);
            }
            return(new OrgItemEponymToken(t, t1)
            {
                Eponyms = eponims
            });
        }
Beispiel #16
0
 void CorrectWordsByMerging(Pullenti.Morph.MorphLang lang)
 {
     for (Pullenti.Ner.Token t = FirstToken; t != null && t.Next != null; t = t.Next)
     {
         if (!t.Chars.IsLetter || (t.LengthChar < 2))
         {
             continue;
         }
         Pullenti.Morph.MorphClass mc0 = t.GetMorphClassInDictionary();
         if (t.Morph.ContainsAttr("прдктв.", null))
         {
             continue;
         }
         Pullenti.Ner.Token t1 = t.Next;
         if (t1.IsHiphen && t1.Next != null && !t1.IsNewlineAfter)
         {
             t1 = t1.Next;
         }
         if (t1.LengthChar == 1)
         {
             continue;
         }
         if (!t1.Chars.IsLetter || !t.Chars.IsLetter || t1.Chars.IsLatinLetter != t.Chars.IsLatinLetter)
         {
             continue;
         }
         if (t1.Chars.IsAllUpper && !t.Chars.IsAllUpper)
         {
             continue;
         }
         else if (!t1.Chars.IsAllLower)
         {
             continue;
         }
         else if (t.Chars.IsAllUpper)
         {
             continue;
         }
         if (t1.Morph.ContainsAttr("прдктв.", null))
         {
             continue;
         }
         Pullenti.Morph.MorphClass mc1 = t1.GetMorphClassInDictionary();
         if (!mc1.IsUndefined && !mc0.IsUndefined)
         {
             continue;
         }
         if (((t as Pullenti.Ner.TextToken).Term.Length + (t1 as Pullenti.Ner.TextToken).Term.Length) < 6)
         {
             continue;
         }
         string corw = (t as Pullenti.Ner.TextToken).Term + (t1 as Pullenti.Ner.TextToken).Term;
         List <Pullenti.Morph.MorphToken> ccc = Pullenti.Morph.MorphologyService.Process(corw, lang, null);
         if (ccc == null || ccc.Count != 1)
         {
             continue;
         }
         if (corw == "ПОСТ" || corw == "ВРЕД")
         {
             continue;
         }
         Pullenti.Ner.TextToken tt = new Pullenti.Ner.TextToken(ccc[0], this, t.BeginChar, t1.EndChar);
         if (tt.GetMorphClassInDictionary().IsUndefined)
         {
             continue;
         }
         tt.Chars = t.Chars;
         if (t == FirstToken)
         {
             FirstToken = tt;
         }
         else
         {
             t.Previous.Next = tt;
         }
         if (t1.Next != null)
         {
             tt.Next = t1.Next;
         }
         t = tt;
     }
 }
Beispiel #17
0
        public static UriItemToken AttachBBK(Pullenti.Ner.Token t0)
        {
            StringBuilder txt = new StringBuilder();

            Pullenti.Ner.Token t1 = t0;
            int digs = 0;

            for (Pullenti.Ner.Token t = t0; t != null; t = t.Next)
            {
                if (t.IsNewlineBefore && t != t0)
                {
                    break;
                }
                if (t.IsTableControlChar)
                {
                    break;
                }
                if (t is Pullenti.Ner.NumberToken)
                {
                    Pullenti.Ner.NumberToken nt = t as Pullenti.Ner.NumberToken;
                    if (nt.Typ != Pullenti.Ner.NumberSpellingType.Digit || !nt.Morph.Class.IsUndefined)
                    {
                        break;
                    }
                    string d = nt.GetSourceText();
                    txt.Append(d);
                    digs += d.Length;
                    t1    = t;
                    continue;
                }
                Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
                if (tt == null)
                {
                    break;
                }
                if (tt.IsChar(','))
                {
                    break;
                }
                if (tt.IsChar('('))
                {
                    if (!(tt.Next is Pullenti.Ner.NumberToken))
                    {
                        break;
                    }
                }
                string s = tt.GetSourceText();
                if (char.IsLetter(s[0]))
                {
                    if (tt.IsWhitespaceBefore)
                    {
                        break;
                    }
                }
                txt.Append(s);
                t1 = t;
            }
            if ((txt.Length < 3) || (digs < 2))
            {
                return(null);
            }
            if (txt[txt.Length - 1] == '.')
            {
                txt.Length--;
                t1 = t1.Previous;
            }
            return(new UriItemToken(t0, t1)
            {
                Value = txt.ToString()
            });
        }
Beispiel #18
0
        public static List <UriItemToken> AttachMailUsers(Pullenti.Ner.Token t1)
        {
            if (t1 == null)
            {
                return(null);
            }
            if (t1.IsChar('}'))
            {
                List <UriItemToken> res0 = AttachMailUsers(t1.Previous);
                if (res0 == null)
                {
                    return(null);
                }
                t1 = res0[0].BeginToken.Previous;
                for (; t1 != null; t1 = t1.Previous)
                {
                    if (t1.IsChar('{'))
                    {
                        res0[0].BeginToken = t1;
                        return(res0);
                    }
                    if (t1.IsCharOf(";,"))
                    {
                        continue;
                    }
                    List <UriItemToken> res1 = AttachMailUsers(t1);
                    if (res1 == null)
                    {
                        return(null);
                    }
                    res0.Insert(0, res1[0]);
                    t1 = res1[0].BeginToken;
                }
                return(null);
            }
            StringBuilder txt = new StringBuilder();

            Pullenti.Ner.Token t0 = t1;
            for (Pullenti.Ner.Token t = t1; t != null; t = t.Previous)
            {
                if (t.IsWhitespaceAfter)
                {
                    break;
                }
                if (t is Pullenti.Ner.NumberToken)
                {
                    Pullenti.Ner.NumberToken nt = t as Pullenti.Ner.NumberToken;
                    txt.Insert(0, nt.GetSourceText());
                    t0 = t;
                    continue;
                }
                Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
                if (tt == null)
                {
                    break;
                }
                string src = tt.GetSourceText();
                char   ch  = src[0];
                if (!char.IsLetter(ch))
                {
                    if (".-_".IndexOf(ch) < 0)
                    {
                        break;
                    }
                }
                txt.Insert(0, src);
                t0 = t;
            }
            if (txt.Length == 0)
            {
                return(null);
            }
            List <UriItemToken> res = new List <UriItemToken>();

            res.Add(new UriItemToken(t0, t1)
            {
                Value = txt.ToString().ToLower()
            });
            return(res);
        }
Beispiel #19
0
        public static Pullenti.Ner.Core.NumberExToken TryParseNumberWithPostfix(Pullenti.Ner.Token t)
        {
            if (t == null)
            {
                return(null);
            }
            Pullenti.Ner.Token t0       = t;
            string             isDollar = null;

            if (t.LengthChar == 1 && t.Next != null)
            {
                if ((((isDollar = Pullenti.Ner.Core.NumberHelper.IsMoneyChar(t)))) != null)
                {
                    t = t.Next;
                }
            }
            Pullenti.Ner.NumberToken nt = t as Pullenti.Ner.NumberToken;
            if (nt == null)
            {
                if ((!(t.Previous is Pullenti.Ner.NumberToken) && t.IsChar('(') && (t.Next is Pullenti.Ner.NumberToken)) && t.Next.Next != null && t.Next.Next.IsChar(')'))
                {
                    Pullenti.Ner.Core.TerminToken toks1 = m_Postfixes.TryParse(t.Next.Next.Next, Pullenti.Ner.Core.TerminParseAttr.No);
                    if (toks1 != null && ((Pullenti.Ner.Core.NumberExType)toks1.Termin.Tag) == Pullenti.Ner.Core.NumberExType.Money)
                    {
                        Pullenti.Ner.NumberToken        nt0 = t.Next as Pullenti.Ner.NumberToken;
                        Pullenti.Ner.Core.NumberExToken res = new Pullenti.Ner.Core.NumberExToken(t, toks1.EndToken, nt0.Value, nt0.Typ, Pullenti.Ner.Core.NumberExType.Money)
                        {
                            AltRealValue = nt0.RealValue, Morph = toks1.BeginToken.Morph
                        };
                        return(_correctMoney(res, toks1.BeginToken));
                    }
                }
                Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
                if (tt == null || !tt.Morph.Class.IsAdjective)
                {
                    return(null);
                }
                string val = tt.Term;
                for (int i = 4; i < (val.Length - 5); i++)
                {
                    string v = val.Substring(0, i);
                    List <Pullenti.Ner.Core.Termin> li = Pullenti.Ner.Core.NumberHelper.m_Nums.FindTerminsByString(v, tt.Morph.Language);
                    if (li == null)
                    {
                        continue;
                    }
                    string vv = val.Substring(i);
                    List <Pullenti.Ner.Core.Termin> lii = m_Postfixes.FindTerminsByString(vv, tt.Morph.Language);
                    if (lii != null && lii.Count > 0)
                    {
                        Pullenti.Ner.Core.NumberExToken re = new Pullenti.Ner.Core.NumberExToken(t, t, ((int)li[0].Tag).ToString(), Pullenti.Ner.NumberSpellingType.Words, (Pullenti.Ner.Core.NumberExType)lii[0].Tag)
                        {
                            Morph = t.Morph
                        };
                        _correctExtTypes(re);
                        return(re);
                    }
                    break;
                }
                return(null);
            }
            if (t.Next == null && isDollar == null)
            {
                return(null);
            }
            double f = nt.RealValue;

            if (double.IsNaN(f))
            {
                return(null);
            }
            Pullenti.Ner.Token t1 = nt.Next;
            if (((t1 != null && t1.IsCharOf(",."))) || (((t1 is Pullenti.Ner.NumberToken) && (t1.WhitespacesBeforeCount < 3))))
            {
                double d;
                Pullenti.Ner.NumberToken tt11 = Pullenti.Ner.Core.NumberHelper.TryParseRealNumber(nt, false, false);
                if (tt11 != null)
                {
                    t1 = tt11.EndToken.Next;
                    f  = tt11.RealValue;
                }
            }
            if (t1 == null)
            {
                if (isDollar == null)
                {
                    return(null);
                }
            }
            else if ((t1.Next != null && t1.Next.IsValue("С", "З") && t1.Next.Next != null) && t1.Next.Next.IsValue("ПОЛОВИНА", null))
            {
                f += 0.5;
                t1 = t1.Next.Next;
            }
            if (t1 != null && t1.IsHiphen && t1.Next != null)
            {
                t1 = t1.Next;
            }
            bool   det  = false;
            double altf = f;

            if (((t1 is Pullenti.Ner.NumberToken) && t1.Previous != null && t1.Previous.IsHiphen) && (t1 as Pullenti.Ner.NumberToken).IntValue == 0 && t1.LengthChar == 2)
            {
                t1 = t1.Next;
            }
            if ((t1 != null && t1.Next != null && t1.IsChar('(')) && (((t1.Next is Pullenti.Ner.NumberToken) || t1.Next.IsValue("НОЛЬ", null))) && t1.Next.Next != null)
            {
                Pullenti.Ner.NumberToken nt1 = t1.Next as Pullenti.Ner.NumberToken;
                double val = (double)0;
                if (nt1 != null)
                {
                    val = nt1.RealValue;
                }
                if (Math.Floor(f) == Math.Floor(val))
                {
                    Pullenti.Ner.Token ttt = t1.Next.Next;
                    if (ttt.IsChar(')'))
                    {
                        t1  = ttt.Next;
                        det = true;
                        if ((t1 is Pullenti.Ner.NumberToken) && (t1 as Pullenti.Ner.NumberToken).IntValue != null && (t1 as Pullenti.Ner.NumberToken).IntValue.Value == 0)
                        {
                            t1 = t1.Next;
                        }
                    }
                    else if (((((ttt is Pullenti.Ner.NumberToken) && ((ttt as Pullenti.Ner.NumberToken).RealValue < 100) && ttt.Next != null) && ttt.Next.IsChar('/') && ttt.Next.Next != null) && ttt.Next.Next.GetSourceText() == "100" && ttt.Next.Next.Next != null) && ttt.Next.Next.Next.IsChar(')'))
                    {
                        int rest = GetDecimalRest100(f);
                        if ((ttt as Pullenti.Ner.NumberToken).IntValue != null && rest == (ttt as Pullenti.Ner.NumberToken).IntValue.Value)
                        {
                            t1  = ttt.Next.Next.Next.Next;
                            det = true;
                        }
                    }
                    else if ((ttt.IsValue("ЦЕЛЫХ", null) && (ttt.Next is Pullenti.Ner.NumberToken) && ttt.Next.Next != null) && ttt.Next.Next.Next != null && ttt.Next.Next.Next.IsChar(')'))
                    {
                        Pullenti.Ner.NumberToken num2 = ttt.Next as Pullenti.Ner.NumberToken;
                        altf = num2.RealValue;
                        if (ttt.Next.Next.IsValue("ДЕСЯТЫЙ", null))
                        {
                            altf /= 10;
                        }
                        else if (ttt.Next.Next.IsValue("СОТЫЙ", null))
                        {
                            altf /= 100;
                        }
                        else if (ttt.Next.Next.IsValue("ТЫСЯЧНЫЙ", null))
                        {
                            altf /= 1000;
                        }
                        else if (ttt.Next.Next.IsValue("ДЕСЯТИТЫСЯЧНЫЙ", null))
                        {
                            altf /= 10000;
                        }
                        else if (ttt.Next.Next.IsValue("СТОТЫСЯЧНЫЙ", null))
                        {
                            altf /= 100000;
                        }
                        else if (ttt.Next.Next.IsValue("МИЛЛИОННЫЙ", null))
                        {
                            altf /= 1000000;
                        }
                        if (altf < 1)
                        {
                            altf += val;
                            t1    = ttt.Next.Next.Next.Next;
                            det   = true;
                        }
                    }
                    else
                    {
                        Pullenti.Ner.Core.TerminToken toks1 = m_Postfixes.TryParse(ttt, Pullenti.Ner.Core.TerminParseAttr.No);
                        if (toks1 != null)
                        {
                            if (((Pullenti.Ner.Core.NumberExType)toks1.Termin.Tag) == Pullenti.Ner.Core.NumberExType.Money)
                            {
                                if (toks1.EndToken.Next != null && toks1.EndToken.Next.IsChar(')'))
                                {
                                    Pullenti.Ner.Core.NumberExToken res = new Pullenti.Ner.Core.NumberExToken(t, toks1.EndToken.Next, nt.Value, nt.Typ, Pullenti.Ner.Core.NumberExType.Money)
                                    {
                                        RealValue = f, AltRealValue = altf, Morph = toks1.BeginToken.Morph
                                    };
                                    return(_correctMoney(res, toks1.BeginToken));
                                }
                            }
                        }
                        Pullenti.Ner.Core.NumberExToken res2 = TryParseNumberWithPostfix(t1.Next);
                        if (res2 != null && res2.EndToken.Next != null && res2.EndToken.Next.IsChar(')'))
                        {
                            res2.BeginToken   = t;
                            res2.EndToken     = res2.EndToken.Next;
                            res2.AltRealValue = res2.RealValue;
                            res2.RealValue    = f;
                            _correctExtTypes(res2);
                            if (res2.WhitespacesAfterCount < 2)
                            {
                                Pullenti.Ner.Core.TerminToken toks2 = m_Postfixes.TryParse(res2.EndToken.Next, Pullenti.Ner.Core.TerminParseAttr.No);
                                if (toks2 != null)
                                {
                                    if (((Pullenti.Ner.Core.NumberExType)toks2.Termin.Tag) == Pullenti.Ner.Core.NumberExType.Money)
                                    {
                                        res2.EndToken = toks2.EndToken;
                                    }
                                }
                            }
                            return(res2);
                        }
                    }
                }
                else if (nt1 != null && nt1.Typ == Pullenti.Ner.NumberSpellingType.Words && nt.Typ == Pullenti.Ner.NumberSpellingType.Digit)
                {
                    altf = nt1.RealValue;
                    Pullenti.Ner.Token ttt = t1.Next.Next;
                    if (ttt.IsChar(')'))
                    {
                        t1  = ttt.Next;
                        det = true;
                    }
                    if (!det)
                    {
                        altf = f;
                    }
                }
            }
            if ((t1 != null && t1.IsChar('(') && t1.Next != null) && t1.Next.IsValue("СУММА", null))
            {
                Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t1, Pullenti.Ner.Core.BracketParseAttr.No, 100);
                if (br != null)
                {
                    t1 = br.EndToken.Next;
                }
            }
            if (isDollar != null)
            {
                Pullenti.Ner.Token te = null;
                if (t1 != null)
                {
                    te = t1.Previous;
                }
                else
                {
                    for (t1 = t0; t1 != null; t1 = t1.Next)
                    {
                        if (t1.Next == null)
                        {
                            te = t1;
                        }
                    }
                }
                if (te == null)
                {
                    return(null);
                }
                if (te.IsHiphen && te.Next != null)
                {
                    if (te.Next.IsValue("МИЛЛИОННЫЙ", null))
                    {
                        f    *= 1000000;
                        altf *= 1000000;
                        te    = te.Next;
                    }
                    else if (te.Next.IsValue("МИЛЛИАРДНЫЙ", null))
                    {
                        f    *= 1000000000;
                        altf *= 1000000000;
                        te    = te.Next;
                    }
                }
                if (!te.IsWhitespaceAfter && (te.Next is Pullenti.Ner.TextToken))
                {
                    if (te.Next.IsValue("M", null))
                    {
                        f    *= 1000000;
                        altf *= 1000000;
                        te    = te.Next;
                    }
                    else if (te.Next.IsValue("BN", null))
                    {
                        f    *= 1000000000;
                        altf *= 1000000000;
                        te    = te.Next;
                    }
                }
                return(new Pullenti.Ner.Core.NumberExToken(t0, te, "", nt.Typ, Pullenti.Ner.Core.NumberExType.Money)
                {
                    RealValue = f, AltRealValue = altf, ExTypParam = isDollar
                });
            }
            if (t1 == null || ((t1.IsNewlineBefore && !det)))
            {
                return(null);
            }
            Pullenti.Ner.Core.TerminToken toks = m_Postfixes.TryParse(t1, Pullenti.Ner.Core.TerminParseAttr.No);
            if ((toks == null && det && (t1 is Pullenti.Ner.NumberToken)) && (t1 as Pullenti.Ner.NumberToken).Value == "0")
            {
                toks = m_Postfixes.TryParse(t1.Next, Pullenti.Ner.Core.TerminParseAttr.No);
            }
            if (toks == null && t1.IsChar('р'))
            {
                int cou = 10;
                for (Pullenti.Ner.Token ttt = t0.Previous; ttt != null && cou > 0; ttt = ttt.Previous, cou--)
                {
                    if (ttt.IsValue("СУММА", null) || ttt.IsValue("НАЛИЧНЫЙ", null) || ttt.IsValue("БАЛАНС", null))
                    {
                    }
                    else if (ttt.GetReferent() != null && ttt.GetReferent().TypeName == "MONEY")
                    {
                    }
                    else
                    {
                        continue;
                    }
                    toks = new Pullenti.Ner.Core.TerminToken(t1, t1)
                    {
                        Termin = m_Postfixes.FindTerminsByCanonicText("RUB")[0]
                    };
                    if (t1.Next != null && t1.Next.IsChar('.'))
                    {
                        toks.EndToken = t1.Next;
                    }
                    Pullenti.Ner.Core.NumberExType ty = (Pullenti.Ner.Core.NumberExType)toks.Termin.Tag;
                    return(new Pullenti.Ner.Core.NumberExToken(t, toks.EndToken, nt.Value, nt.Typ, ty)
                    {
                        RealValue = f, AltRealValue = altf, Morph = toks.BeginToken.Morph, ExTypParam = "RUB"
                    });
                }
            }
            if (toks != null)
            {
                t1 = toks.EndToken;
                if (!t1.IsChar('.') && t1.Next != null && t1.Next.IsChar('.'))
                {
                    if ((t1 is Pullenti.Ner.TextToken) && t1.IsValue(toks.Termin.Terms[0].CanonicalText, null))
                    {
                    }
                    else if (!t1.Chars.IsLetter)
                    {
                    }
                    else
                    {
                        t1 = t1.Next;
                    }
                }
                if (toks.Termin.CanonicText == "LTL")
                {
                    return(null);
                }
                if (toks.BeginToken == t1)
                {
                    if (t1.Morph.Class.IsPreposition || t1.Morph.Class.IsConjunction)
                    {
                        if (t1.IsWhitespaceBefore && t1.IsWhitespaceAfter)
                        {
                            return(null);
                        }
                    }
                }
                Pullenti.Ner.Core.NumberExType  ty  = (Pullenti.Ner.Core.NumberExType)toks.Termin.Tag;
                Pullenti.Ner.Core.NumberExToken res = new Pullenti.Ner.Core.NumberExToken(t, t1, nt.Value, nt.Typ, ty)
                {
                    RealValue = f, AltRealValue = altf, Morph = toks.BeginToken.Morph
                };
                if (ty != Pullenti.Ner.Core.NumberExType.Money)
                {
                    _correctExtTypes(res);
                    return(res);
                }
                return(_correctMoney(res, toks.BeginToken));
            }
            Pullenti.Ner.Core.NumberExToken pfx = _attachSpecPostfix(t1);
            if (pfx != null)
            {
                pfx.BeginToken   = t;
                pfx.Value        = nt.Value;
                pfx.Typ          = nt.Typ;
                pfx.RealValue    = f;
                pfx.AltRealValue = altf;
                return(pfx);
            }
            if (t1.Next != null && ((t1.Morph.Class.IsPreposition || t1.Morph.Class.IsConjunction)))
            {
                if (t1.IsValue("НА", null))
                {
                }
                else
                {
                    Pullenti.Ner.Core.NumberExToken nn = TryParseNumberWithPostfix(t1.Next);
                    if (nn != null)
                    {
                        return new Pullenti.Ner.Core.NumberExToken(t, t, nt.Value, nt.Typ, nn.ExTyp)
                               {
                                   RealValue = f, AltRealValue = altf, ExTyp2 = nn.ExTyp2, ExTypParam = nn.ExTypParam
                               }
                    }
                    ;
                }
            }
            if (!t1.IsWhitespaceAfter && (t1.Next is Pullenti.Ner.NumberToken) && (t1 is Pullenti.Ner.TextToken))
            {
                string term = (t1 as Pullenti.Ner.TextToken).Term;
                Pullenti.Ner.Core.NumberExType ty = Pullenti.Ner.Core.NumberExType.Undefined;
                if (term == "СМХ" || term == "CMX")
                {
                    ty = Pullenti.Ner.Core.NumberExType.Santimeter;
                }
                else if (term == "MX" || term == "МХ")
                {
                    ty = Pullenti.Ner.Core.NumberExType.Meter;
                }
                else if (term == "MMX" || term == "ММХ")
                {
                    ty = Pullenti.Ner.Core.NumberExType.Millimeter;
                }
                if (ty != Pullenti.Ner.Core.NumberExType.Undefined)
                {
                    return new Pullenti.Ner.Core.NumberExToken(t, t1, nt.Value, nt.Typ, ty)
                           {
                               RealValue = f, AltRealValue = altf, MultAfter = true
                           }
                }
                ;
            }
            return(null);
        }
Beispiel #20
0
        public AnalysisKit(Pullenti.Ner.SourceOfAnalysis sofa = null, bool onlyTokenizing = false, Pullenti.Morph.MorphLang lang = null, ProgressChangedEventHandler progress = null)
        {
            if (sofa == null)
            {
                return;
            }
            m_Sofa    = sofa;
            StartDate = DateTime.Now;
            List <Pullenti.Morph.MorphToken> tokens = Pullenti.Morph.MorphologyService.Process(sofa.Text, lang, progress);

            Pullenti.Ner.Token t0 = null;
            if (tokens != null)
            {
                for (int ii = 0; ii < tokens.Count; ii++)
                {
                    Pullenti.Morph.MorphToken mt = tokens[ii];
                    if (mt.BeginChar == 733860)
                    {
                    }
                    Pullenti.Ner.TextToken tt = new Pullenti.Ner.TextToken(mt, this);
                    if (sofa.CorrectionDict != null)
                    {
                        string corw;
                        if (sofa.CorrectionDict.TryGetValue(mt.Term, out corw))
                        {
                            List <Pullenti.Morph.MorphToken> ccc = Pullenti.Morph.MorphologyService.Process(corw, lang, null);
                            if (ccc != null && ccc.Count == 1)
                            {
                                Pullenti.Ner.TextToken tt1 = new Pullenti.Ner.TextToken(ccc[0], this, tt.BeginChar, tt.EndChar)
                                {
                                    Term0 = tt.Term
                                };
                                tt1.Chars = tt.Chars;
                                tt        = tt1;
                                if (CorrectedTokens == null)
                                {
                                    CorrectedTokens = new Dictionary <Pullenti.Ner.Token, string>();
                                }
                                CorrectedTokens.Add(tt, tt.GetSourceText());
                            }
                        }
                    }
                    if (t0 == null)
                    {
                        FirstToken = tt;
                    }
                    else
                    {
                        t0.Next = tt;
                    }
                    t0 = tt;
                }
            }
            if (sofa.ClearDust)
            {
                this.ClearDust();
            }
            if (sofa.DoWordsMergingByMorph)
            {
                this.CorrectWordsByMerging(lang);
            }
            if (sofa.DoWordCorrectionByMorph)
            {
                this.CorrectWordsByMorph(lang);
            }
            this.MergeLetters();
            this.DefineBaseLanguage();
            if (sofa.CreateNumberTokens)
            {
                for (Pullenti.Ner.Token t = FirstToken; t != null; t = t.Next)
                {
                    Pullenti.Ner.NumberToken nt = NumberHelper.TryParseNumber(t);
                    if (nt == null)
                    {
                        continue;
                    }
                    this.EmbedToken(nt);
                    t = nt;
                }
            }
            if (onlyTokenizing)
            {
                return;
            }
            for (Pullenti.Ner.Token t = FirstToken; t != null; t = t.Next)
            {
                if (t.Morph.Class.IsPreposition)
                {
                    continue;
                }
                Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
                if (mc.IsUndefined && t.Chars.IsCyrillicLetter && t.LengthChar > 4)
                {
                    string             tail = sofa.Text.Substring(t.EndChar - 1, 2);
                    Pullenti.Ner.Token tte  = null;
                    Pullenti.Ner.Token tt   = t.Previous;
                    if (tt != null && ((tt.IsCommaAnd || tt.Morph.Class.IsPreposition || tt.Morph.Class.IsConjunction)))
                    {
                        tt = tt.Previous;
                    }
                    if ((tt != null && !tt.GetMorphClassInDictionary().IsUndefined&& ((tt.Morph.Class.Value & t.Morph.Class.Value)) != 0) && tt.LengthChar > 4)
                    {
                        string tail2 = sofa.Text.Substring(tt.EndChar - 1, 2);
                        if (tail2 == tail)
                        {
                            tte = tt;
                        }
                    }
                    if (tte == null)
                    {
                        tt = t.Next;
                        if (tt != null && ((tt.IsCommaAnd || tt.Morph.Class.IsPreposition || tt.Morph.Class.IsConjunction)))
                        {
                            tt = tt.Next;
                        }
                        if ((tt != null && !tt.GetMorphClassInDictionary().IsUndefined&& ((tt.Morph.Class.Value & t.Morph.Class.Value)) != 0) && tt.LengthChar > 4)
                        {
                            string tail2 = sofa.Text.Substring(tt.EndChar - 1, 2);
                            if (tail2 == tail)
                            {
                                tte = tt;
                            }
                        }
                    }
                    if (tte != null)
                    {
                        t.Morph.RemoveItemsEx(tte.Morph, tte.GetMorphClassInDictionary());
                    }
                }
                continue;
            }
            this.CreateStatistics();
        }
Beispiel #21
0
        bool CalcRankAndValue(int minNewlinesCount)
        {
            Rank = 0;
            if (BeginToken.Chars.IsAllLower)
            {
                Rank -= 30;
            }
            int words      = 0;
            int upWords    = 0;
            int notwords   = 0;
            int lineNumber = 0;

            Pullenti.Ner.Token tstart = BeginToken;
            Pullenti.Ner.Token tend   = EndToken;
            for (Pullenti.Ner.Token t = BeginToken; t != EndToken.Next && t != null && t.EndChar <= EndToken.EndChar; t = t.Next)
            {
                if (t.IsNewlineBefore)
                {
                }
                TitleItemToken tit = TitleItemToken.TryAttach(t);
                if (tit != null)
                {
                    if (tit.Typ == TitleItemToken.Types.Theme || tit.Typ == TitleItemToken.Types.TypAndTheme)
                    {
                        if (t != BeginToken)
                        {
                            if (lineNumber > 0)
                            {
                                return(false);
                            }
                            words  = (upWords = (notwords = 0));
                            tstart = tit.EndToken.Next;
                        }
                        t = tit.EndToken;
                        if (t.Next == null)
                        {
                            return(false);
                        }
                        if (t.Next.Chars.IsLetter && t.Next.Chars.IsAllLower)
                        {
                            Rank += 20;
                        }
                        else
                        {
                            Rank += 100;
                        }
                        tstart = t.Next;
                        if (tit.Typ == TitleItemToken.Types.TypAndTheme)
                        {
                            TypeValue = tit.Value;
                        }
                        continue;
                    }
                    if (tit.Typ == TitleItemToken.Types.Typ)
                    {
                        if (t == BeginToken)
                        {
                            if (tit.EndToken.IsNewlineAfter)
                            {
                                TypeValue = tit.Value;
                                Rank     += 5;
                                tstart    = tit.EndToken.Next;
                            }
                        }
                        t = tit.EndToken;
                        words++;
                        if (tit.BeginToken != tit.EndToken)
                        {
                            words++;
                        }
                        if (tit.Chars.IsAllUpper)
                        {
                            upWords++;
                        }
                        continue;
                    }
                    if (tit.Typ == TitleItemToken.Types.Dust || tit.Typ == TitleItemToken.Types.Speciality)
                    {
                        if (t == BeginToken)
                        {
                            return(false);
                        }
                        Rank -= 20;
                        if (tit.Typ == TitleItemToken.Types.Speciality)
                        {
                            Speciality = tit.Value;
                        }
                        t = tit.EndToken;
                        continue;
                    }
                    if (tit.Typ == TitleItemToken.Types.Consultant || tit.Typ == TitleItemToken.Types.Boss || tit.Typ == TitleItemToken.Types.Editor)
                    {
                        t = tit.EndToken;
                        if (t.Next != null && ((t.Next.IsCharOf(":") || t.Next.IsHiphen || t.WhitespacesAfterCount > 4)))
                        {
                            Rank -= 10;
                        }
                        else
                        {
                            Rank -= 2;
                        }
                        continue;
                    }
                    return(false);
                }
                Pullenti.Ner.Booklink.Internal.BookLinkToken blt = Pullenti.Ner.Booklink.Internal.BookLinkToken.TryParse(t, 0);
                if (blt != null)
                {
                    if (blt.Typ == Pullenti.Ner.Booklink.Internal.BookLinkTyp.Misc || blt.Typ == Pullenti.Ner.Booklink.Internal.BookLinkTyp.N || blt.Typ == Pullenti.Ner.Booklink.Internal.BookLinkTyp.Pages)
                    {
                        Rank -= 10;
                    }
                    else if (blt.Typ == Pullenti.Ner.Booklink.Internal.BookLinkTyp.N || blt.Typ == Pullenti.Ner.Booklink.Internal.BookLinkTyp.PageRange)
                    {
                        Rank -= 20;
                    }
                }
                if (t == BeginToken && Pullenti.Ner.Booklink.Internal.BookLinkToken.TryParseAuthor(t, Pullenti.Ner.Person.Internal.FioTemplateType.Undefined) != null)
                {
                    Rank -= 20;
                }
                if (t.IsNewlineBefore && t != BeginToken)
                {
                    lineNumber++;
                    if (lineNumber > 4)
                    {
                        return(false);
                    }
                    if (t.Chars.IsAllLower)
                    {
                        Rank += 10;
                    }
                    else if (t.Previous.IsChar('.'))
                    {
                        Rank -= 10;
                    }
                    else if (t.Previous.IsCharOf(",-"))
                    {
                        Rank += 10;
                    }
                    else
                    {
                        Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Previous, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                        if (npt != null && npt.EndChar >= t.EndChar)
                        {
                            Rank += 10;
                        }
                    }
                }
                if (t != BeginToken && t.NewlinesBeforeCount > minNewlinesCount)
                {
                    Rank -= (t.NewlinesBeforeCount - minNewlinesCount);
                }
                Pullenti.Ner.Core.BracketSequenceToken bst = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 100);
                if (bst != null && bst.IsQuoteType && bst.EndToken.EndChar <= EndToken.EndChar)
                {
                    if (words == 0)
                    {
                        tstart = bst.BeginToken;
                        Rank  += 10;
                        if (bst.EndToken == EndToken)
                        {
                            tend  = EndToken;
                            Rank += 10;
                        }
                    }
                }
                List <Pullenti.Ner.Referent> rli = t.GetReferents();
                if (rli != null)
                {
                    foreach (Pullenti.Ner.Referent r in rli)
                    {
                        if (r is Pullenti.Ner.Org.OrganizationReferent)
                        {
                            if (t.IsNewlineBefore)
                            {
                                Rank -= 10;
                            }
                            else
                            {
                                Rank -= 4;
                            }
                            continue;
                        }
                        if ((r is Pullenti.Ner.Geo.GeoReferent) || (r is Pullenti.Ner.Person.PersonReferent))
                        {
                            if (t.IsNewlineBefore)
                            {
                                Rank -= 5;
                                if (t.IsNewlineAfter || t.Next == null)
                                {
                                    Rank -= 20;
                                }
                                else if (t.Next.IsHiphen || (t.Next is Pullenti.Ner.NumberToken) || (t.Next.GetReferent() is Pullenti.Ner.Date.DateReferent))
                                {
                                    Rank -= 20;
                                }
                                else if (t != BeginToken)
                                {
                                    Rank -= 20;
                                }
                            }
                            continue;
                        }
                        if ((r is Pullenti.Ner.Geo.GeoReferent) || (r is Pullenti.Ner.Denomination.DenominationReferent))
                        {
                            continue;
                        }
                        if ((r is Pullenti.Ner.Uri.UriReferent) || (r is Pullenti.Ner.Phone.PhoneReferent))
                        {
                            return(false);
                        }
                        if (t.IsNewlineBefore)
                        {
                            Rank -= 4;
                        }
                        else
                        {
                            Rank -= 2;
                        }
                        if (t == BeginToken && (EndToken.GetReferent() is Pullenti.Ner.Person.PersonReferent))
                        {
                            Rank -= 10;
                        }
                    }
                    words++;
                    if (t.Chars.IsAllUpper)
                    {
                        upWords++;
                    }
                    if (t == BeginToken)
                    {
                        if (t.IsNewlineAfter)
                        {
                            Rank -= 10;
                        }
                        else if (t.Next != null && t.Next.IsChar('.') && t.Next.IsNewlineAfter)
                        {
                            Rank -= 10;
                        }
                    }
                    continue;
                }
                if (t is Pullenti.Ner.NumberToken)
                {
                    if ((t as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Words)
                    {
                        words++;
                        if (t.Chars.IsAllUpper)
                        {
                            upWords++;
                        }
                    }
                    else
                    {
                        notwords++;
                    }
                    continue;
                }
                Pullenti.Ner.Person.Internal.PersonAttrToken pat = Pullenti.Ner.Person.Internal.PersonAttrToken.TryAttach(t, null, Pullenti.Ner.Person.Internal.PersonAttrToken.PersonAttrAttachAttrs.No);
                if (pat != null)
                {
                    if (t.IsNewlineBefore)
                    {
                        if (!pat.Morph.Case.IsUndefined && !pat.Morph.Case.IsNominative)
                        {
                        }
                        else if (pat.Chars.IsAllUpper)
                        {
                        }
                        else
                        {
                            Rank -= 20;
                        }
                    }
                    else if (t.Chars.IsAllLower)
                    {
                        Rank--;
                    }
                    for (; t != null; t = t.Next)
                    {
                        words++;
                        if (t.Chars.IsAllUpper)
                        {
                            upWords++;
                        }
                        if (t == pat.EndToken)
                        {
                            break;
                        }
                    }
                    continue;
                }
                Pullenti.Ner.Org.Internal.OrgItemTypeToken oitt = Pullenti.Ner.Org.Internal.OrgItemTypeToken.TryAttach(t, true, null);
                if (oitt != null)
                {
                    if (oitt.Morph.Number != Pullenti.Morph.MorphNumber.Plural && !oitt.IsDoubtRootWord)
                    {
                        if (!oitt.Morph.Case.IsUndefined && !oitt.Morph.Case.IsNominative)
                        {
                            words++;
                            if (t.Chars.IsAllUpper)
                            {
                                upWords++;
                            }
                        }
                        else
                        {
                            Rank -= 4;
                            if (t == BeginToken)
                            {
                                Rank -= 5;
                            }
                        }
                    }
                    else
                    {
                        words += 1;
                        if (t.Chars.IsAllUpper)
                        {
                            upWords++;
                        }
                    }
                    t = oitt.EndToken;
                    continue;
                }
                Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
                if (tt != null)
                {
                    if (tt.IsChar('©'))
                    {
                        Rank -= 10;
                    }
                    if (tt.IsChar('_'))
                    {
                        Rank--;
                    }
                    if (tt.Chars.IsLetter)
                    {
                        if (tt.LengthChar > 2)
                        {
                            words++;
                            if (t.Chars.IsAllUpper)
                            {
                                upWords++;
                            }
                        }
                    }
                    else if (!tt.IsChar(','))
                    {
                        notwords++;
                    }
                    if (tt.IsPureVerb)
                    {
                        {
                            Rank -= 30;
                            words--;
                        }
                        break;
                    }
                    if (tt == EndToken)
                    {
                        if (tt.Morph.Class.IsPreposition || tt.Morph.Class.IsConjunction)
                        {
                            Rank -= 10;
                        }
                        else if (tt.IsChar('.'))
                        {
                            Rank += 5;
                        }
                    }
                    else if (tt.IsCharOf("._"))
                    {
                        Rank -= 5;
                    }
                }
            }
            Rank += words;
            Rank -= notwords;
            if ((words < 1) && (Rank < 50))
            {
                return(false);
            }
            if (tstart == null || tend == null)
            {
                return(false);
            }
            if (tstart.EndChar > tend.EndChar)
            {
                return(false);
            }
            TitleItemToken tit1 = TitleItemToken.TryAttach(EndToken.Next);

            if (tit1 != null && ((tit1.Typ == TitleItemToken.Types.Typ || tit1.Typ == TitleItemToken.Types.Speciality)))
            {
                if (tit1.EndToken.IsNewlineAfter)
                {
                    Rank += 15;
                }
                else
                {
                    Rank += 10;
                }
                if (tit1.Typ == TitleItemToken.Types.Speciality)
                {
                    Speciality = tit1.Value;
                }
            }
            if (upWords > 4 && upWords > ((int)((0.8 * words))))
            {
                if (tstart.Previous != null && (tstart.Previous.GetReferent() is Pullenti.Ner.Person.PersonReferent))
                {
                    Rank += (5 + upWords);
                }
            }
            BeginNameToken = tstart;
            EndNameToken   = tend;
            return(true);
        }
Beispiel #22
0
        void MergeLetters()
        {
            bool          beforeWord = false;
            StringBuilder tmp        = new StringBuilder();

            for (Pullenti.Ner.Token t = FirstToken; t != null; t = t.Next)
            {
                Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
                if (!tt.Chars.IsLetter || tt.LengthChar != 1)
                {
                    beforeWord = false;
                    continue;
                }
                int i = t.WhitespacesBeforeCount;
                if (i > 2 || ((i == 2 && beforeWord)))
                {
                }
                else
                {
                    beforeWord = false;
                    continue;
                }
                i = 0;
                Pullenti.Ner.Token t1;
                tmp.Length = 0;
                tmp.Append(tt.GetSourceText());
                for (t1 = t; t1.Next != null; t1 = t1.Next)
                {
                    tt = t1.Next as Pullenti.Ner.TextToken;
                    if (tt.LengthChar != 1 || tt.WhitespacesBeforeCount != 1)
                    {
                        break;
                    }
                    i++;
                    tmp.Append(tt.GetSourceText());
                }
                if (i > 3 || ((i > 1 && beforeWord)))
                {
                }
                else
                {
                    beforeWord = false;
                    continue;
                }
                beforeWord = false;
                List <Pullenti.Morph.MorphToken> mt = Pullenti.Morph.MorphologyService.Process(tmp.ToString(), null, null);
                if (mt == null || mt.Count != 1)
                {
                    t = t1;
                    continue;
                }
                foreach (Pullenti.Morph.MorphWordForm wf in mt[0].WordForms)
                {
                    if (wf.IsInDictionary)
                    {
                        beforeWord = true;
                        break;
                    }
                }
                if (!beforeWord)
                {
                    t = t1;
                    continue;
                }
                tt = new Pullenti.Ner.TextToken(mt[0], this, t.BeginChar, t1.EndChar);
                if (t == FirstToken)
                {
                    FirstToken = tt;
                }
                else
                {
                    tt.Previous = t.Previous;
                }
                tt.Next = t1.Next;
                t       = tt;
            }
        }
Beispiel #23
0
 void CorrectWordsByMorph(Pullenti.Morph.MorphLang lang)
 {
     for (Pullenti.Ner.Token tt = FirstToken; tt != null; tt = tt.Next)
     {
         if (!(tt is Pullenti.Ner.TextToken))
         {
             continue;
         }
         if (tt.Morph.ContainsAttr("прдктв.", null))
         {
             continue;
         }
         Pullenti.Morph.MorphClass dd = tt.GetMorphClassInDictionary();
         if (!dd.IsUndefined || (tt.LengthChar < 4))
         {
             continue;
         }
         if (tt.Morph.Class.IsProperSurname && !tt.Chars.IsAllLower)
         {
             continue;
         }
         if (tt.Chars.IsAllUpper)
         {
             continue;
         }
         string corw = Pullenti.Morph.MorphologyService.CorrectWord((tt as Pullenti.Ner.TextToken).Term, (tt.Morph.Language.IsUndefined ? lang : tt.Morph.Language));
         if (corw == null)
         {
             continue;
         }
         List <Pullenti.Morph.MorphToken> ccc = Pullenti.Morph.MorphologyService.Process(corw, lang, null);
         if (ccc == null || ccc.Count != 1)
         {
             continue;
         }
         Pullenti.Ner.TextToken tt1 = new Pullenti.Ner.TextToken(ccc[0], this, tt.BeginChar, tt.EndChar)
         {
             Chars = tt.Chars, Term0 = (tt as Pullenti.Ner.TextToken).Term
         };
         Pullenti.Morph.MorphClass mc = tt1.GetMorphClassInDictionary();
         if (mc.IsProperSurname)
         {
             continue;
         }
         if (tt == FirstToken)
         {
             FirstToken = tt1;
         }
         else
         {
             tt.Previous.Next = tt1;
         }
         tt1.Next = tt.Next;
         tt       = tt1;
         if (CorrectedTokens == null)
         {
             CorrectedTokens = new Dictionary <Pullenti.Ner.Token, string>();
         }
         CorrectedTokens.Add(tt, tt.GetSourceText());
     }
 }
Beispiel #24
0
        static UriItemToken _AttachUriContent(Pullenti.Ner.Token t0, string chars, bool canBeWhitespaces = false)
        {
            StringBuilder txt = new StringBuilder();

            Pullenti.Ner.Token t1  = t0;
            UriItemToken       dom = AttachDomainName(t0, true, canBeWhitespaces);

            if (dom != null)
            {
                if (dom.Value.Length < 3)
                {
                    return(null);
                }
            }
            char openChar = (char)0;

            Pullenti.Ner.Token t = t0;
            if (dom != null)
            {
                t = dom.EndToken.Next;
            }
            for (; t != null; t = t.Next)
            {
                if (t != t0 && t.IsWhitespaceBefore)
                {
                    if (t.IsNewlineBefore || !canBeWhitespaces)
                    {
                        break;
                    }
                    if (dom == null)
                    {
                        break;
                    }
                    if (t.Previous.IsHiphen)
                    {
                    }
                    else if (t.Previous.IsCharOf(",;"))
                    {
                        break;
                    }
                    else if (t.Previous.IsChar('.') && t.Chars.IsLetter && t.LengthChar == 2)
                    {
                    }
                    else
                    {
                        bool ok = false;
                        Pullenti.Ner.Token tt1 = t;
                        if (t.IsCharOf("\\/"))
                        {
                            tt1 = t.Next;
                        }
                        Pullenti.Ner.Token tt0 = tt1;
                        for (; tt1 != null; tt1 = tt1.Next)
                        {
                            if (tt1 != tt0 && tt1.IsWhitespaceBefore)
                            {
                                break;
                            }
                            if (tt1 is Pullenti.Ner.NumberToken)
                            {
                                continue;
                            }
                            if (!(tt1 is Pullenti.Ner.TextToken))
                            {
                                break;
                            }
                            string term1 = (tt1 as Pullenti.Ner.TextToken).Term;
                            if (((term1 == "HTM" || term1 == "HTML" || term1 == "SHTML") || term1 == "ASP" || term1 == "ASPX") || term1 == "JSP")
                            {
                                ok = true;
                                break;
                            }
                            if (!tt1.Chars.IsLetter)
                            {
                                if (tt1.IsCharOf("\\/"))
                                {
                                    ok = true;
                                    break;
                                }
                                if (!tt1.IsCharOf(chars))
                                {
                                    break;
                                }
                            }
                            else if (!tt1.Chars.IsLatinLetter)
                            {
                                break;
                            }
                        }
                        if (!ok)
                        {
                            break;
                        }
                    }
                }
                if (t is Pullenti.Ner.NumberToken)
                {
                    Pullenti.Ner.NumberToken nt = t as Pullenti.Ner.NumberToken;
                    txt.Append(nt.GetSourceText());
                    t1 = t;
                    continue;
                }
                Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
                if (tt == null)
                {
                    Pullenti.Ner.ReferentToken rt = t as Pullenti.Ner.ReferentToken;
                    if (rt != null && rt.BeginToken.IsValue("РФ", null))
                    {
                        if (txt.Length > 0 && txt[txt.Length - 1] == '.')
                        {
                            txt.Append(rt.BeginToken.GetSourceText());
                            t1 = t;
                            continue;
                        }
                    }
                    if (rt != null && rt.Chars.IsLatinLetter && rt.BeginToken == rt.EndToken)
                    {
                        txt.Append(rt.BeginToken.GetSourceText());
                        t1 = t;
                        continue;
                    }
                    break;
                }
                string src = tt.GetSourceText();
                char   ch  = src[0];
                if (!char.IsLetter(ch))
                {
                    if (chars.IndexOf(ch) < 0)
                    {
                        break;
                    }
                    if (ch == '(' || ch == '[')
                    {
                        openChar = ch;
                    }
                    else if (ch == ')')
                    {
                        if (openChar != '(')
                        {
                            break;
                        }
                        openChar = (char)0;
                    }
                    else if (ch == ']')
                    {
                        if (openChar != '[')
                        {
                            break;
                        }
                        openChar = (char)0;
                    }
                }
                txt.Append(src);
                t1 = t;
            }
            if (txt.Length == 0)
            {
                return(dom);
            }
            int i;

            for (i = 0; i < txt.Length; i++)
            {
                if (char.IsLetterOrDigit(txt[i]))
                {
                    break;
                }
            }
            if (i >= txt.Length)
            {
                return(dom);
            }
            if (txt[txt.Length - 1] == '.' || txt[txt.Length - 1] == '/')
            {
                txt.Length--;
                t1 = t1.Previous;
            }
            if (dom != null)
            {
                txt.Insert(0, dom.Value);
            }
            string tmp = txt.ToString();

            if (tmp.StartsWith("\\\\"))
            {
                txt.Replace("\\\\", "//");
                tmp = txt.ToString();
            }
            if (tmp.StartsWith("//"))
            {
                tmp = tmp.Substring(2);
            }
            if (string.Compare(tmp, "WWW", true) == 0)
            {
                return(null);
            }
            UriItemToken res = new UriItemToken(t0, t1)
            {
                Value = txt.ToString()
            };

            return(res);
        }
Beispiel #25
0
        public static UriItemToken AttachDomainName(Pullenti.Ner.Token t0, bool check, bool canBeWhitspaces)
        {
            StringBuilder txt = new StringBuilder();

            Pullenti.Ner.Token t1 = t0;
            int  ipCount          = 0;
            bool isIp             = true;

            for (Pullenti.Ner.Token t = t0; t != null; t = t.Next)
            {
                if (t.IsWhitespaceBefore && t != t0)
                {
                    bool ok = false;
                    if (!t.IsNewlineBefore && canBeWhitspaces)
                    {
                        for (Pullenti.Ner.Token tt1 = t; tt1 != null; tt1 = tt1.Next)
                        {
                            if (tt1.IsChar('.') || tt1.IsHiphen)
                            {
                                continue;
                            }
                            if (tt1.IsWhitespaceBefore)
                            {
                                if (tt1.IsNewlineBefore)
                                {
                                    break;
                                }
                                if (tt1.Previous != null && ((tt1.Previous.IsChar('.') || tt1.Previous.IsHiphen)))
                                {
                                }
                                else
                                {
                                    break;
                                }
                            }
                            if (!(tt1 is Pullenti.Ner.TextToken))
                            {
                                break;
                            }
                            if (m_StdGroups.TryParse(tt1, Pullenti.Ner.Core.TerminParseAttr.No) != null)
                            {
                                ok = true;
                                break;
                            }
                            if (!tt1.Chars.IsLatinLetter)
                            {
                                break;
                            }
                        }
                    }
                    if (!ok)
                    {
                        break;
                    }
                }
                if (t is Pullenti.Ner.NumberToken)
                {
                    Pullenti.Ner.NumberToken nt = t as Pullenti.Ner.NumberToken;
                    if (nt.IntValue == null)
                    {
                        break;
                    }
                    txt.Append(nt.GetSourceText());
                    t1 = t;
                    if (nt.Typ == Pullenti.Ner.NumberSpellingType.Digit && nt.IntValue.Value >= 0 && (nt.IntValue.Value < 256))
                    {
                        ipCount++;
                    }
                    else
                    {
                        isIp = false;
                    }
                    continue;
                }
                Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
                if (tt == null)
                {
                    break;
                }
                string src = (tt as Pullenti.Ner.TextToken).Term;
                char   ch  = src[0];
                if (!char.IsLetter(ch))
                {
                    if (".-_".IndexOf(ch) < 0)
                    {
                        break;
                    }
                    if (ch != '.')
                    {
                        isIp = false;
                    }
                    if (ch == '-')
                    {
                        if (string.Compare(txt.ToString(), "vk.com", true) == 0)
                        {
                            return new UriItemToken(t0, t1)
                                   {
                                       Value = txt.ToString().ToLower()
                                   }
                        }
                        ;
                    }
                }
                else
                {
                    isIp = false;
                }
                txt.Append(src.ToLower());
                t1 = t;
            }
            if (txt.Length == 0)
            {
                return(null);
            }
            if (ipCount != 4)
            {
                isIp = false;
            }
            int i;
            int points = 0;

            for (i = 0; i < txt.Length; i++)
            {
                if (txt[i] == '.')
                {
                    if (i == 0)
                    {
                        return(null);
                    }
                    if (i >= (txt.Length - 1))
                    {
                        txt.Length--;
                        t1 = t1.Previous;
                        break;
                    }
                    if (txt[i - 1] == '.' || txt[i + 1] == '.')
                    {
                        return(null);
                    }
                    points++;
                }
            }
            if (points == 0)
            {
                return(null);
            }
            string uri = txt.ToString();

            if (check)
            {
                bool ok = isIp;
                if (!isIp)
                {
                    if (txt.ToString() == "localhost")
                    {
                        ok = true;
                    }
                }
                if (!ok && t1.Previous != null && t1.Previous.IsChar('.'))
                {
                    if (m_StdGroups.TryParse(t1, Pullenti.Ner.Core.TerminParseAttr.No) != null)
                    {
                        ok = true;
                    }
                }
                if (!ok)
                {
                    return(null);
                }
            }
            return(new UriItemToken(t0, t1)
            {
                Value = txt.ToString().ToLower()
            });
        }
Beispiel #26
0
        static VerbPhraseToken TryParseRu(Pullenti.Ner.Token t, bool canBePartition, bool canBeAdjPartition, bool forceParse)
        {
            VerbPhraseToken res = null;

            Pullenti.Ner.Token t0         = t;
            Pullenti.Ner.Token not        = null;
            bool             hasVerb      = false;
            bool             verbBeBefore = false;
            PrepositionToken prep         = null;

            for (; t != null; t = t.Next)
            {
                if (!(t is Pullenti.Ner.TextToken))
                {
                    break;
                }
                Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
                bool isParticiple         = false;
                if (tt.Term == "НЕ")
                {
                    not = t;
                    continue;
                }
                int    ty   = 0;
                string norm = null;
                Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary();
                if (tt.Term == "НЕТ")
                {
                    if (hasVerb)
                    {
                        break;
                    }
                    ty = 1;
                }
                else if (tt.Term == "ДОПУСТИМО")
                {
                    ty = 3;
                }
                else if (mc.IsAdverb && !mc.IsVerb)
                {
                    ty = 2;
                }
                else if (tt.IsPureVerb || tt.IsVerbBe)
                {
                    ty = 1;
                    if (hasVerb)
                    {
                        if (!tt.Morph.ContainsAttr("инф.", null))
                        {
                            if (verbBeBefore)
                            {
                            }
                            else
                            {
                                break;
                            }
                        }
                    }
                }
                else if (mc.IsVerb)
                {
                    if (mc.IsPreposition || mc.IsMisc || mc.IsPronoun)
                    {
                    }
                    else if (mc.IsNoun)
                    {
                        if (tt.Term == "СТАЛИ" || tt.Term == "СТЕКЛО" || tt.Term == "БЫЛИ")
                        {
                            ty = 1;
                        }
                        else if (!tt.Chars.IsAllLower && !MiscHelper.CanBeStartOfSentence(tt))
                        {
                            ty = 1;
                        }
                        else if (mc.IsAdjective && canBePartition)
                        {
                            ty = 1;
                        }
                        else if (forceParse)
                        {
                            ty = 1;
                        }
                    }
                    else if (mc.IsProper)
                    {
                        if (tt.Chars.IsAllLower)
                        {
                            ty = 1;
                        }
                    }
                    else
                    {
                        ty = 1;
                    }
                    if (mc.IsAdjective)
                    {
                        isParticiple = true;
                    }
                    if (!tt.Morph.Case.IsUndefined)
                    {
                        isParticiple = true;
                    }
                    if (!canBePartition && isParticiple)
                    {
                        break;
                    }
                    if (hasVerb)
                    {
                        if (tt.Morph.ContainsAttr("инф.", null))
                        {
                        }
                        else if (!isParticiple)
                        {
                        }
                        else
                        {
                            break;
                        }
                    }
                }
                else if ((mc.IsAdjective && tt.Morph.ContainsAttr("к.ф.", null) && tt.Term.EndsWith("О")) && NounPhraseHelper.TryParse(tt, NounPhraseParseAttr.No, 0, null) == null)
                {
                    ty = 2;
                }
                else if (mc.IsAdjective && ((canBePartition || canBeAdjPartition)))
                {
                    if (tt.Morph.ContainsAttr("к.ф.", null) && !canBeAdjPartition)
                    {
                        break;
                    }
                    norm = tt.GetNormalCaseText(Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Masculine, false);
                    if (norm.EndsWith("ЙШИЙ"))
                    {
                    }
                    else
                    {
                        List <Pullenti.Semantic.Utils.DerivateGroup> grs = Pullenti.Semantic.Utils.DerivateService.FindDerivates(norm, true, null);
                        if (grs != null && grs.Count > 0)
                        {
                            bool hVerb = false;
                            bool hPart = false;
                            foreach (Pullenti.Semantic.Utils.DerivateGroup gr in grs)
                            {
                                foreach (Pullenti.Semantic.Utils.DerivateWord w in gr.Words)
                                {
                                    if (w.Class.IsAdjective && w.Class.IsVerb)
                                    {
                                        if (w.Spelling == norm)
                                        {
                                            hPart = true;
                                        }
                                    }
                                    else if (w.Class.IsVerb)
                                    {
                                        hVerb = true;
                                    }
                                }
                            }
                            if (hPart && hVerb)
                            {
                                ty = 3;
                            }
                            else if (canBeAdjPartition)
                            {
                                ty = 3;
                            }
                            if (ty != 3 && !string.IsNullOrEmpty(grs[0].Prefix) && norm.StartsWith(grs[0].Prefix))
                            {
                                hVerb = false;
                                hPart = false;
                                string norm1 = norm.Substring(grs[0].Prefix.Length);
                                grs = Pullenti.Semantic.Utils.DerivateService.FindDerivates(norm1, true, null);
                                if (grs != null && grs.Count > 0)
                                {
                                    foreach (Pullenti.Semantic.Utils.DerivateGroup gr in grs)
                                    {
                                        foreach (Pullenti.Semantic.Utils.DerivateWord w in gr.Words)
                                        {
                                            if (w.Class.IsAdjective && w.Class.IsVerb)
                                            {
                                                if (w.Spelling == norm1)
                                                {
                                                    hPart = true;
                                                }
                                            }
                                            else if (w.Class.IsVerb)
                                            {
                                                hVerb = true;
                                            }
                                        }
                                    }
                                }
                                if (hPart && hVerb)
                                {
                                    ty = 3;
                                }
                            }
                        }
                    }
                }
                if (ty == 0 && t == t0 && canBePartition)
                {
                    prep = PrepositionHelper.TryParse(t);
                    if (prep != null)
                    {
                        t = prep.EndToken;
                        continue;
                    }
                }
                if (ty == 0)
                {
                    break;
                }
                if (res == null)
                {
                    res = new VerbPhraseToken(t0, t);
                }
                res.EndToken = t;
                VerbPhraseItemToken it = new VerbPhraseItemToken(t, t)
                {
                    Morph = new Pullenti.Ner.MorphCollection(t.Morph)
                };
                if (not != null)
                {
                    it.BeginToken = not;
                    it.Not        = true;
                    not           = null;
                }
                it.IsAdverb = ty == 2;
                if (prep != null && !t.Morph.Case.IsUndefined && res.Items.Count == 0)
                {
                    if (((prep.NextCase & t.Morph.Case)).IsUndefined)
                    {
                        return(null);
                    }
                    it.Morph.RemoveItems(prep.NextCase);
                    res.Preposition = prep;
                }
                if (norm == null)
                {
                    norm = t.GetNormalCaseText((ty == 3 ? Pullenti.Morph.MorphClass.Adjective : (ty == 2 ? Pullenti.Morph.MorphClass.Adverb : Pullenti.Morph.MorphClass.Verb)), Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Masculine, false);
                    if (ty == 1 && !tt.Morph.Case.IsUndefined)
                    {
                        Pullenti.Morph.MorphWordForm mi = new Pullenti.Morph.MorphWordForm()
                        {
                            Case = Pullenti.Morph.MorphCase.Nominative, Number = Pullenti.Morph.MorphNumber.Singular, Gender = Pullenti.Morph.MorphGender.Masculine
                        };
                        foreach (Pullenti.Morph.MorphBaseInfo mit in tt.Morph.Items)
                        {
                            if (mit is Pullenti.Morph.MorphWordForm)
                            {
                                mi.Misc = (mit as Pullenti.Morph.MorphWordForm).Misc;
                                break;
                            }
                        }
                        string nnn = Pullenti.Morph.MorphologyService.GetWordform("КК" + (t as Pullenti.Ner.TextToken).Term, mi);
                        if (nnn != null)
                        {
                            norm = nnn.Substring(2);
                        }
                    }
                }
                it.Normal = norm;
                res.Items.Add(it);
                if (!hasVerb && ((ty == 1 || ty == 3)))
                {
                    res.Morph = it.Morph;
                    hasVerb   = true;
                }
                if (ty == 1 || ty == 3)
                {
                    if (ty == 1 && tt.IsVerbBe)
                    {
                        verbBeBefore = true;
                    }
                    else
                    {
                        verbBeBefore = false;
                    }
                }
            }
            if (!hasVerb)
            {
                return(null);
            }
            for (int i = res.Items.Count - 1; i > 0; i--)
            {
                if (res.Items[i].IsAdverb)
                {
                    res.Items.RemoveAt(i);
                    res.EndToken = res.Items[i - 1].EndToken;
                }
                else
                {
                    break;
                }
            }
            return(res);
        }
Beispiel #27
0
        public static UriItemToken AttachISBN(Pullenti.Ner.Token t0)
        {
            StringBuilder txt = new StringBuilder();

            Pullenti.Ner.Token t1 = t0;
            int digs = 0;

            for (Pullenti.Ner.Token t = t0; t != null; t = t.Next)
            {
                if (t.IsTableControlChar)
                {
                    break;
                }
                if (t.IsNewlineBefore && t != t0)
                {
                    if (t.Previous != null && t.Previous.IsHiphen)
                    {
                    }
                    else
                    {
                        break;
                    }
                }
                if (t is Pullenti.Ner.NumberToken)
                {
                    Pullenti.Ner.NumberToken nt = t as Pullenti.Ner.NumberToken;
                    if (nt.Typ != Pullenti.Ner.NumberSpellingType.Digit || !nt.Morph.Class.IsUndefined)
                    {
                        break;
                    }
                    string d = nt.GetSourceText();
                    txt.Append(d);
                    digs += d.Length;
                    t1    = t;
                    if (digs > 13)
                    {
                        break;
                    }
                    continue;
                }
                Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
                if (tt == null)
                {
                    break;
                }
                string s = tt.Term;
                if (s != "-" && s != "Х" && s != "X")
                {
                    break;
                }
                if (s == "Х")
                {
                    s = "X";
                }
                txt.Append(s);
                t1 = t;
                if (s != "-")
                {
                    break;
                }
            }
            int i;
            int dig = 0;

            for (i = 0; i < txt.Length; i++)
            {
                if (char.IsDigit(txt[i]))
                {
                    dig++;
                }
            }
            if (dig < 7)
            {
                return(null);
            }
            return(new UriItemToken(t0, t1)
            {
                Value = txt.ToString()
            });
        }
Beispiel #28
0
        public static TitleItemToken TryAttach(Pullenti.Ner.Token t)
        {
            Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
            if (tt != null)
            {
                Pullenti.Ner.Token t1 = (Pullenti.Ner.Token)tt;
                if (tt.Term == "ТЕМА")
                {
                    TitleItemToken tit = TryAttach(tt.Next);
                    if (tit != null && tit.Typ == Types.Typ)
                    {
                        t1 = tit.EndToken;
                        if (t1.Next != null && t1.Next.IsChar(':'))
                        {
                            t1 = t1.Next;
                        }
                        return(new TitleItemToken(t, t1, Types.TypAndTheme)
                        {
                            Value = tit.Value
                        });
                    }
                    if (tt.Next != null && tt.Next.IsChar(':'))
                    {
                        t1 = tt.Next;
                    }
                    return(new TitleItemToken(tt, t1, Types.Theme));
                }
                if (tt.Term == "ПО" || tt.Term == "НА")
                {
                    if (tt.Next != null && tt.Next.IsValue("ТЕМА", null))
                    {
                        t1 = tt.Next;
                        if (t1.Next != null && t1.Next.IsChar(':'))
                        {
                            t1 = t1.Next;
                        }
                        return(new TitleItemToken(tt, t1, Types.Theme));
                    }
                }
                if (tt.Term == "ПЕРЕВОД" || tt.Term == "ПЕР")
                {
                    Pullenti.Ner.Token tt2 = tt.Next;
                    if (tt2 != null && tt2.IsChar('.'))
                    {
                        tt2 = tt2.Next;
                    }
                    if (tt2 is Pullenti.Ner.TextToken)
                    {
                        if ((tt2 as Pullenti.Ner.TextToken).Term == "C" || (tt2 as Pullenti.Ner.TextToken).Term == "С")
                        {
                            tt2 = tt2.Next;
                            if (tt2 is Pullenti.Ner.TextToken)
                            {
                                return(new TitleItemToken(t, tt2, Types.Translate));
                            }
                        }
                    }
                }
                if (tt.Term == "СЕКЦИЯ" || tt.Term == "SECTION" || tt.Term == "СЕКЦІЯ")
                {
                    t1 = tt.Next;
                    if (t1 != null && t1.IsChar(':'))
                    {
                        t1 = t1.Next;
                    }
                    Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t1, Pullenti.Ner.Core.BracketParseAttr.No, 100);
                    if (br != null)
                    {
                        t1 = br.EndToken;
                    }
                    else if (t1 != tt.Next)
                    {
                        for (; t1 != null; t1 = t1.Next)
                        {
                            if (t1.IsNewlineAfter)
                            {
                                break;
                            }
                        }
                        if (t1 == null)
                        {
                            return(null);
                        }
                    }
                    if (t1 != tt.Next)
                    {
                        return(new TitleItemToken(tt, t1, Types.Dust));
                    }
                }
                t1 = null;
                if (tt.IsValue("СПЕЦИАЛЬНОСТЬ", "СПЕЦІАЛЬНІСТЬ"))
                {
                    t1 = tt.Next;
                }
                else if (tt.Morph.Class.IsPreposition && tt.Next != null && tt.Next.IsValue("СПЕЦИАЛЬНОСТЬ", "СПЕЦІАЛЬНІСТЬ"))
                {
                    t1 = tt.Next.Next;
                }
                else if (tt.IsChar('/') && tt.IsNewlineBefore)
                {
                    t1 = tt.Next;
                }
                if (t1 != null)
                {
                    if (t1.IsCharOf(":") || t1.IsHiphen)
                    {
                        t1 = t1.Next;
                    }
                    TitleItemToken spec = TryAttachSpeciality(t1, true);
                    if (spec != null)
                    {
                        spec.BeginToken = t;
                        return(spec);
                    }
                }
            }
            TitleItemToken sss = TryAttachSpeciality(t, false);

            if (sss != null)
            {
                return(sss);
            }
            if (t is Pullenti.Ner.ReferentToken)
            {
                return(null);
            }
            Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
            if (npt != null)
            {
                string s = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false);
                Pullenti.Ner.Core.TerminToken tok = m_Termins.TryParse(npt.EndToken, Pullenti.Ner.Core.TerminParseAttr.No);
                if (tok != null)
                {
                    Types ty = (Types)tok.Termin.Tag;
                    if (ty == Types.Typ)
                    {
                        TitleItemToken tit = TryAttach(tok.EndToken.Next);
                        if (tit != null && tit.Typ == Types.Theme)
                        {
                            return new TitleItemToken(npt.BeginToken, tit.EndToken, Types.TypAndTheme)
                                   {
                                       Value = s
                                   }
                        }
                        ;
                        if (s == "РАБОТА" || s == "РОБОТА" || s == "ПРОЕКТ")
                        {
                            return(null);
                        }
                        Pullenti.Ner.Token t1 = tok.EndToken;
                        if (s == "ДИССЕРТАЦИЯ" || s == "ДИСЕРТАЦІЯ")
                        {
                            int err = 0;
                            for (Pullenti.Ner.Token ttt = t1.Next; ttt != null; ttt = ttt.Next)
                            {
                                if (ttt.Morph.Class.IsPreposition)
                                {
                                    continue;
                                }
                                if (ttt.IsValue("СОИСКАНИЕ", ""))
                                {
                                    continue;
                                }
                                Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(ttt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                                if (npt1 != null && npt1.Noun.IsValue("СТЕПЕНЬ", "СТУПІНЬ"))
                                {
                                    t1 = (ttt = npt1.EndToken);
                                    continue;
                                }
                                Pullenti.Ner.ReferentToken rt = t1.Kit.ProcessReferent("PERSON", ttt);
                                if (rt != null && (rt.Referent is Pullenti.Ner.Person.PersonPropertyReferent))
                                {
                                    Pullenti.Ner.Person.PersonPropertyReferent ppr = rt.Referent as Pullenti.Ner.Person.PersonPropertyReferent;
                                    if (ppr.Name == "доктор наук")
                                    {
                                        t1 = rt.EndToken;
                                        s  = "ДОКТОРСКАЯ ДИССЕРТАЦИЯ";
                                        break;
                                    }
                                    else if (ppr.Name == "кандидат наук")
                                    {
                                        t1 = rt.EndToken;
                                        s  = "КАНДИДАТСКАЯ ДИССЕРТАЦИЯ";
                                        break;
                                    }
                                    else if (ppr.Name == "магистр")
                                    {
                                        t1 = rt.EndToken;
                                        s  = "МАГИСТЕРСКАЯ ДИССЕРТАЦИЯ";
                                        break;
                                    }
                                }
                                if (ttt.IsValue("ДОКТОР", null) || ttt.IsValue("КАНДИДАТ", null) || ttt.IsValue("МАГИСТР", "МАГІСТР"))
                                {
                                    t1   = ttt;
                                    npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(ttt.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                                    if (npt1 != null && npt1.EndToken.IsValue("НАУК", null))
                                    {
                                        t1 = npt1.EndToken;
                                    }
                                    s = (ttt.IsValue("МАГИСТР", "МАГІСТР") ? "МАГИСТЕРСКАЯ ДИССЕРТАЦИЯ" : (ttt.IsValue("ДОКТОР", null) ? "ДОКТОРСКАЯ ДИССЕРТАЦИЯ" : "КАНДИДАТСКАЯ ДИССЕРТАЦИЯ"));
                                    break;
                                }
                                if ((++err) > 3)
                                {
                                    break;
                                }
                            }
                        }
                        if (t1.Next != null && t1.Next.IsChar('.'))
                        {
                            t1 = t1.Next;
                        }
                        if (s.EndsWith("ОТЧЕТ") && t1.Next != null && t1.Next.IsValue("О", null))
                        {
                            Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t1.Next, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition, 0, null);
                            if (npt1 != null && npt1.Morph.Case.IsPrepositional)
                            {
                                t1 = npt1.EndToken;
                            }
                        }
                        return(new TitleItemToken(npt.BeginToken, t1, ty)
                        {
                            Value = s
                        });
                    }
                }
            }
            Pullenti.Ner.Core.TerminToken tok1 = m_Termins.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
            if (tok1 != null)
            {
                Pullenti.Ner.Token t1 = tok1.EndToken;
                TitleItemToken     re = new TitleItemToken(tok1.BeginToken, t1, (Types)tok1.Termin.Tag);
                return(re);
            }
            if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(t, false, false))
            {
                tok1 = m_Termins.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No);
                if (tok1 != null && Pullenti.Ner.Core.BracketHelper.CanBeEndOfSequence(tok1.EndToken.Next, false, null, false))
                {
                    Pullenti.Ner.Token t1 = tok1.EndToken.Next;
                    return(new TitleItemToken(tok1.BeginToken, t1, (Types)tok1.Termin.Tag));
                }
            }
            return(null);
        }
Beispiel #29
0
        public Pullenti.Ner.ReferentToken TryAttach(Pullenti.Ner.Token t, bool forOntology = false)
        {
            if (t == null)
            {
                return(null);
            }
            Pullenti.Ner.ReferentToken rt0 = this.TryAttachSpec(t);
            if (rt0 != null)
            {
                return(rt0);
            }
            if (t.Chars.IsAllLower)
            {
                if (!t.IsWhitespaceAfter && (t.Next is Pullenti.Ner.NumberToken))
                {
                    if (t.Previous == null || t.IsWhitespaceBefore || t.Previous.IsCharOf(",:"))
                    {
                    }
                    else
                    {
                        return(null);
                    }
                }
                else
                {
                    return(null);
                }
            }
            StringBuilder tmp = new StringBuilder();

            Pullenti.Ner.Token t1 = t;
            bool hiph             = false;
            bool ok    = true;
            int  nums  = 0;
            int  chars = 0;

            for (Pullenti.Ner.Token w = t1.Next; w != null; w = w.Next)
            {
                if (w.IsWhitespaceBefore && !forOntology)
                {
                    break;
                }
                if (w.IsCharOf("/\\_") || w.IsHiphen)
                {
                    hiph = true;
                    tmp.Append('-');
                    continue;
                }
                hiph = false;
                Pullenti.Ner.NumberToken nt = w as Pullenti.Ner.NumberToken;
                if (nt != null)
                {
                    if (nt.Typ != Pullenti.Ner.NumberSpellingType.Digit)
                    {
                        break;
                    }
                    t1 = nt;
                    tmp.Append(nt.GetSourceText());
                    nums++;
                    continue;
                }
                Pullenti.Ner.TextToken tt = w as Pullenti.Ner.TextToken;
                if (tt == null)
                {
                    break;
                }
                if (tt.LengthChar > 3)
                {
                    ok = false;
                    break;
                }
                if (!char.IsLetter(tt.Term[0]))
                {
                    if (tt.IsCharOf(",:") || Pullenti.Ner.Core.BracketHelper.CanBeEndOfSequence(tt, false, null, false))
                    {
                        break;
                    }
                    if (!tt.IsCharOf("+*&^#@!"))
                    {
                        ok = false;
                        break;
                    }
                    chars++;
                }
                t1 = tt;
                tmp.Append(tt.GetSourceText());
            }
            if (!forOntology)
            {
                if ((tmp.Length < 1) || !ok || hiph)
                {
                    return(null);
                }
                if (tmp.Length > 12)
                {
                    return(null);
                }
                char last = tmp[tmp.Length - 1];
                if (last == '!')
                {
                    return(null);
                }
                if ((nums + chars) == 0)
                {
                    return(null);
                }
                if (!this.CheckAttach(t, t1))
                {
                    return(null);
                }
            }
            DenominationReferent newDr = new DenominationReferent();

            newDr.AddValue(t, t1);
            return(new Pullenti.Ner.ReferentToken(newDr, t, t1));
        }
Beispiel #30
0
        public static OrgItemNumberToken TryAttach(Pullenti.Ner.Token t, bool canBePureNumber = false, OrgItemTypeToken typ = null)
        {
            if (t == null)
            {
                return(null);
            }
            Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
            if (tt != null)
            {
                Pullenti.Ner.Token t1 = Pullenti.Ner.Core.MiscHelper.CheckNumberPrefix(tt);
                if ((t1 is Pullenti.Ner.NumberToken) && !t1.IsNewlineBefore)
                {
                    OrgItemNumberToken res = new OrgItemNumberToken(tt, t1)
                    {
                        Number = (t1 as Pullenti.Ner.NumberToken).Value.ToString()
                    };
                    if (t1.Next != null && t1.Next.IsCharOf("\\/") && (t1.Next.Next is Pullenti.Ner.NumberToken))
                    {
                        if (typ != null && ((typ.Typ == "офис" || typ.Typ == "банк" || typ.Typ == "отделение")))
                        {
                            res.EndToken = res.EndToken.Next.Next;
                            res.Number   = string.Format("{0}/{1}", res.Number, (res.EndToken as Pullenti.Ner.NumberToken).Value);
                        }
                    }
                    return(res);
                }
            }
            if ((t.IsHiphen && (t.Next is Pullenti.Ner.NumberToken) && !t.IsWhitespaceBefore) && !t.IsWhitespaceAfter)
            {
                if (Pullenti.Ner.Core.NumberHelper.TryParseAge(t.Next) == null)
                {
                    return new OrgItemNumberToken(t, t.Next)
                           {
                               Number = (t.Next as Pullenti.Ner.NumberToken).Value.ToString()
                           }
                }
                ;
            }
            if (t is Pullenti.Ner.NumberToken)
            {
                if ((!t.IsWhitespaceBefore && t.Previous != null && t.Previous.IsHiphen))
                {
                    return new OrgItemNumberToken(t, t)
                           {
                               Number = (t as Pullenti.Ner.NumberToken).Value.ToString()
                           }
                }
                ;
                if (typ != null && typ.Typ != null && (((typ.Typ == "войсковая часть" || typ.Typ == "військова частина" || typ.Typ.Contains("колония")) || typ.Typ.Contains("колонія") || typ.Typ.Contains("школа"))))
                {
                    if (t.LengthChar >= 4 || t.LengthChar <= 6)
                    {
                        OrgItemNumberToken res = new OrgItemNumberToken(t, t)
                        {
                            Number = (t as Pullenti.Ner.NumberToken).Value.ToString()
                        };
                        if (t.Next != null && ((t.Next.IsHiphen || t.Next.IsCharOf("\\/"))) && !t.Next.IsWhitespaceAfter)
                        {
                            if ((t.Next.Next is Pullenti.Ner.NumberToken) && ((t.LengthChar + t.Next.Next.LengthChar) < 9))
                            {
                                res.EndToken = t.Next.Next;
                                res.Number   = string.Format("{0}-{1}", res.Number, (res.EndToken as Pullenti.Ner.NumberToken).Value);
                            }
                            else if ((t.Next.Next is Pullenti.Ner.TextToken) && t.Next.Next.LengthChar == 1 && t.Next.Next.Chars.IsLetter)
                            {
                                res.EndToken = t.Next.Next;
                                res.Number   = string.Format("{0}{1}", res.Number, (res.EndToken as Pullenti.Ner.TextToken).Term);
                            }
                        }
                        else if (((t.Next is Pullenti.Ner.TextToken) && t.Next.LengthChar == 1 && t.Next.Chars.IsLetter) && !t.IsWhitespaceAfter)
                        {
                            res.EndToken = t.Next;
                            res.Number   = string.Format("{0}{1}", res.Number, (res.EndToken as Pullenti.Ner.TextToken).Term);
                        }
                        return(res);
                    }
                }
            }
            if (((t is Pullenti.Ner.TextToken) && t.LengthChar == 1 && t.Chars.IsLetter) && ((!t.IsWhitespaceAfter || (((t.WhitespacesAfterCount < 2) && t.Chars.IsAllUpper)))))
            {
                if (typ != null && typ.Typ != null && (((typ.Typ == "войсковая часть" || typ.Typ == "військова частина" || typ.Typ.Contains("колония")) || typ.Typ.Contains("колонія"))))
                {
                    Pullenti.Ner.Token tt1 = t.Next;

                    if (tt1 != null && tt1.IsHiphen)
                    {
                        tt1 = tt1.Next;
                    }
                    if (tt1 is Pullenti.Ner.NumberToken)
                    {
                        OrgItemNumberToken res = new OrgItemNumberToken(t, tt1);
                        res.Number = string.Format("{0}{1}", (t as Pullenti.Ner.TextToken).Term, (tt1 as Pullenti.Ner.NumberToken).Value);
                        return(res);
                    }
                }
            }
            return(null);
        }
    }
}