예제 #1
0
        /// <summary>
        /// Попытаться выделить предлог с указанного токена
        /// </summary>
        /// <param name="t">начальный токен</param>
        /// <return>результат или null</return>
        public static PrepositionToken TryParse(Pullenti.Ner.Token t)
        {
            if (!(t is Pullenti.Ner.TextToken))
            {
                return(null);
            }
            TerminToken tok = m_Ontology.TryParse(t, TerminParseAttr.No);

            if (tok != null)
            {
                return new PrepositionToken(t, tok.EndToken)
                       {
                           Normal = tok.Termin.CanonicText, NextCase = (Pullenti.Morph.MorphCase)tok.Termin.Tag
                       }
            }
            ;
            Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
            if (!mc.IsPreposition)
            {
                return(null);
            }
            PrepositionToken res = new PrepositionToken(t, t);

            res.Normal   = t.GetNormalCaseText(Pullenti.Morph.MorphClass.Preposition, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false);
            res.NextCase = Pullenti.Morph.LanguageHelper.GetCaseAfterPreposition(res.Normal);
            if ((t.Next != null && t.Next.IsHiphen && !t.IsWhitespaceAfter) && (t.Next.Next is Pullenti.Ner.TextToken) && t.Next.Next.GetMorphClassInDictionary().IsPreposition)
            {
                res.EndToken = t.Next.Next;
            }
            return(res);
        }
예제 #2
0
        static AutoannoSentToken TryParse(Pullenti.Ner.Token t)
        {
            if (t == null || !Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t))
            {
                return(null);
            }
            AutoannoSentToken res = new AutoannoSentToken(t, t);
            bool hasVerb          = false;

            for (; t != null; t = t.Next)
            {
                if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t) && t != res.BeginToken)
                {
                    break;
                }
                Pullenti.Ner.Referent r = t.GetReferent();
                if (r is Pullenti.Ner.Keyword.KeywordReferent)
                {
                    res.Rank += (r as Pullenti.Ner.Keyword.KeywordReferent).Rank;
                    if ((r as Pullenti.Ner.Keyword.KeywordReferent).Typ == Pullenti.Ner.Keyword.KeywordType.Predicate)
                    {
                        hasVerb = true;
                    }
                }
                else if (t is Pullenti.Ner.TextToken)
                {
                    Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
                    if (mc.IsPronoun || mc.IsPersonalPronoun)
                    {
                        res.Rank -= 1;
                    }
                    else if (t.LengthChar > 1)
                    {
                        res.Rank -= 0.1;
                    }
                }
                res.EndToken = t;
            }
            if (!hasVerb)
            {
                res.Rank /= 3;
            }
            res.Value = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(res, Pullenti.Ner.Core.GetTextAttr.KeepRegister | Pullenti.Ner.Core.GetTextAttr.KeepQuotes);
            return(res);
        }
예제 #3
0
 void CorrectWordsByMerging(Pullenti.Morph.MorphLang lang)
 {
     for (Pullenti.Ner.Token t = FirstToken; t != null && t.Next != null; t = t.Next)
     {
         if (!t.Chars.IsLetter || (t.LengthChar < 2))
         {
             continue;
         }
         Pullenti.Morph.MorphClass mc0 = t.GetMorphClassInDictionary();
         if (t.Morph.ContainsAttr("прдктв.", null))
         {
             continue;
         }
         Pullenti.Ner.Token t1 = t.Next;
         if (t1.IsHiphen && t1.Next != null && !t1.IsNewlineAfter)
         {
             t1 = t1.Next;
         }
         if (t1.LengthChar == 1)
         {
             continue;
         }
         if (!t1.Chars.IsLetter || !t.Chars.IsLetter || t1.Chars.IsLatinLetter != t.Chars.IsLatinLetter)
         {
             continue;
         }
         if (t1.Chars.IsAllUpper && !t.Chars.IsAllUpper)
         {
             continue;
         }
         else if (!t1.Chars.IsAllLower)
         {
             continue;
         }
         else if (t.Chars.IsAllUpper)
         {
             continue;
         }
         if (t1.Morph.ContainsAttr("прдктв.", null))
         {
             continue;
         }
         Pullenti.Morph.MorphClass mc1 = t1.GetMorphClassInDictionary();
         if (!mc1.IsUndefined && !mc0.IsUndefined)
         {
             continue;
         }
         if (((t as Pullenti.Ner.TextToken).Term.Length + (t1 as Pullenti.Ner.TextToken).Term.Length) < 6)
         {
             continue;
         }
         string corw = (t as Pullenti.Ner.TextToken).Term + (t1 as Pullenti.Ner.TextToken).Term;
         List <Pullenti.Morph.MorphToken> ccc = Pullenti.Morph.MorphologyService.Process(corw, lang, null);
         if (ccc == null || ccc.Count != 1)
         {
             continue;
         }
         if (corw == "ПОСТ" || corw == "ВРЕД")
         {
             continue;
         }
         Pullenti.Ner.TextToken tt = new Pullenti.Ner.TextToken(ccc[0], this, t.BeginChar, t1.EndChar);
         if (tt.GetMorphClassInDictionary().IsUndefined)
         {
             continue;
         }
         tt.Chars = t.Chars;
         if (t == FirstToken)
         {
             FirstToken = tt;
         }
         else
         {
             t.Previous.Next = tt;
         }
         if (t1.Next != null)
         {
             tt.Next = t1.Next;
         }
         t = tt;
     }
 }
예제 #4
0
        public AnalysisKit(Pullenti.Ner.SourceOfAnalysis sofa = null, bool onlyTokenizing = false, Pullenti.Morph.MorphLang lang = null, ProgressChangedEventHandler progress = null)
        {
            if (sofa == null)
            {
                return;
            }
            m_Sofa    = sofa;
            StartDate = DateTime.Now;
            List <Pullenti.Morph.MorphToken> tokens = Pullenti.Morph.MorphologyService.Process(sofa.Text, lang, progress);

            Pullenti.Ner.Token t0 = null;
            if (tokens != null)
            {
                for (int ii = 0; ii < tokens.Count; ii++)
                {
                    Pullenti.Morph.MorphToken mt = tokens[ii];
                    if (mt.BeginChar == 733860)
                    {
                    }
                    Pullenti.Ner.TextToken tt = new Pullenti.Ner.TextToken(mt, this);
                    if (sofa.CorrectionDict != null)
                    {
                        string corw;
                        if (sofa.CorrectionDict.TryGetValue(mt.Term, out corw))
                        {
                            List <Pullenti.Morph.MorphToken> ccc = Pullenti.Morph.MorphologyService.Process(corw, lang, null);
                            if (ccc != null && ccc.Count == 1)
                            {
                                Pullenti.Ner.TextToken tt1 = new Pullenti.Ner.TextToken(ccc[0], this, tt.BeginChar, tt.EndChar)
                                {
                                    Term0 = tt.Term
                                };
                                tt1.Chars = tt.Chars;
                                tt        = tt1;
                                if (CorrectedTokens == null)
                                {
                                    CorrectedTokens = new Dictionary <Pullenti.Ner.Token, string>();
                                }
                                CorrectedTokens.Add(tt, tt.GetSourceText());
                            }
                        }
                    }
                    if (t0 == null)
                    {
                        FirstToken = tt;
                    }
                    else
                    {
                        t0.Next = tt;
                    }
                    t0 = tt;
                }
            }
            if (sofa.ClearDust)
            {
                this.ClearDust();
            }
            if (sofa.DoWordsMergingByMorph)
            {
                this.CorrectWordsByMerging(lang);
            }
            if (sofa.DoWordCorrectionByMorph)
            {
                this.CorrectWordsByMorph(lang);
            }
            this.MergeLetters();
            this.DefineBaseLanguage();
            if (sofa.CreateNumberTokens)
            {
                for (Pullenti.Ner.Token t = FirstToken; t != null; t = t.Next)
                {
                    Pullenti.Ner.NumberToken nt = NumberHelper.TryParseNumber(t);
                    if (nt == null)
                    {
                        continue;
                    }
                    this.EmbedToken(nt);
                    t = nt;
                }
            }
            if (onlyTokenizing)
            {
                return;
            }
            for (Pullenti.Ner.Token t = FirstToken; t != null; t = t.Next)
            {
                if (t.Morph.Class.IsPreposition)
                {
                    continue;
                }
                Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
                if (mc.IsUndefined && t.Chars.IsCyrillicLetter && t.LengthChar > 4)
                {
                    string             tail = sofa.Text.Substring(t.EndChar - 1, 2);
                    Pullenti.Ner.Token tte  = null;
                    Pullenti.Ner.Token tt   = t.Previous;
                    if (tt != null && ((tt.IsCommaAnd || tt.Morph.Class.IsPreposition || tt.Morph.Class.IsConjunction)))
                    {
                        tt = tt.Previous;
                    }
                    if ((tt != null && !tt.GetMorphClassInDictionary().IsUndefined&& ((tt.Morph.Class.Value & t.Morph.Class.Value)) != 0) && tt.LengthChar > 4)
                    {
                        string tail2 = sofa.Text.Substring(tt.EndChar - 1, 2);
                        if (tail2 == tail)
                        {
                            tte = tt;
                        }
                    }
                    if (tte == null)
                    {
                        tt = t.Next;
                        if (tt != null && ((tt.IsCommaAnd || tt.Morph.Class.IsPreposition || tt.Morph.Class.IsConjunction)))
                        {
                            tt = tt.Next;
                        }
                        if ((tt != null && !tt.GetMorphClassInDictionary().IsUndefined&& ((tt.Morph.Class.Value & t.Morph.Class.Value)) != 0) && tt.LengthChar > 4)
                        {
                            string tail2 = sofa.Text.Substring(tt.EndChar - 1, 2);
                            if (tail2 == tail)
                            {
                                tte = tt;
                            }
                        }
                    }
                    if (tte != null)
                    {
                        t.Morph.RemoveItemsEx(tte.Morph, tte.GetMorphClassInDictionary());
                    }
                }
                continue;
            }
            this.CreateStatistics();
        }
예제 #5
0
        public static ParenthesisToken TryAttach(Pullenti.Ner.Token t)
        {
            if (t == null)
            {
                return(null);
            }
            Pullenti.Ner.Core.TerminToken tok = m_Termins.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
            if (tok != null)
            {
                ParenthesisToken res = new ParenthesisToken(t, tok.EndToken);
                return(res);
            }
            if (!(t is Pullenti.Ner.TextToken))
            {
                return(null);
            }
            Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
            bool ok = false;

            Pullenti.Ner.Token t1;
            if (mc.IsAdverb)
            {
                ok = true;
            }
            else if (mc.IsAdjective)
            {
                if (t.Morph.ContainsAttr("сравн.", null) && t.Morph.ContainsAttr("кач.прил.", null))
                {
                    ok = true;
                }
            }
            if (ok && t.Next != null)
            {
                if (t.Next.IsChar(','))
                {
                    return(new ParenthesisToken(t, t));
                }
                t1 = t.Next;
                if (t1.GetMorphClassInDictionary() == Pullenti.Morph.MorphClass.Verb)
                {
                    if (t1.Morph.ContainsAttr("н.вр.", null) && t1.Morph.ContainsAttr("нес.в.", null) && t1.Morph.ContainsAttr("дейст.з.", null))
                    {
                        return(new ParenthesisToken(t, t1));
                    }
                }
            }
            t1 = null;
            if ((t.IsValue("В", null) && t.Next != null && t.Next.IsValue("СООТВЕТСТВИЕ", null)) && t.Next.Next != null && t.Next.Next.Morph.Class.IsPreposition)
            {
                t1 = t.Next.Next.Next;
            }
            else if (t.IsValue("СОГЛАСНО", null))
            {
                t1 = t.Next;
            }
            else if (t.IsValue("В", null) && t.Next != null)
            {
                if (t.Next.IsValue("СИЛА", null))
                {
                    t1 = t.Next.Next;
                }
                else if (t.Next.Morph.Class.IsAdjective || t.Next.Morph.Class.IsPronoun)
                {
                    Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                    if (npt != null)
                    {
                        if (npt.Noun.IsValue("ВИД", null) || npt.Noun.IsValue("СЛУЧАЙ", null) || npt.Noun.IsValue("СФЕРА", null))
                        {
                            return(new ParenthesisToken(t, npt.EndToken));
                        }
                    }
                }
            }
            if (t1 != null)
            {
                if (t1.Next != null)
                {
                    Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t1, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                    if (npt1 != null)
                    {
                        if (npt1.Noun.IsValue("НОРМА", null) || npt1.Noun.IsValue("ПОЛОЖЕНИЕ", null) || npt1.Noun.IsValue("УКАЗАНИЕ", null))
                        {
                            t1 = npt1.EndToken.Next;
                        }
                    }
                }
                Pullenti.Ner.Referent r = t1.GetReferent();
                if (r != null)
                {
                    ParenthesisToken res = new ParenthesisToken(t, t1)
                    {
                        Ref = r
                    };
                    if (t1.Next != null && t1.Next.IsComma)
                    {
                        bool sila = false;
                        for (Pullenti.Ner.Token ttt = t1.Next.Next; ttt != null; ttt = ttt.Next)
                        {
                            if (ttt.IsValue("СИЛА", null) || ttt.IsValue("ДЕЙСТВИЕ", null))
                            {
                                sila = true;
                                continue;
                            }
                            if (ttt.IsComma)
                            {
                                if (sila)
                                {
                                    res.EndToken = ttt.Previous;
                                }
                                break;
                            }
                            if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(ttt, false, false))
                            {
                                break;
                            }
                        }
                    }
                    return(res);
                }
                Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t1, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                if (npt != null)
                {
                    return(new ParenthesisToken(t, npt.EndToken));
                }
            }
            Pullenti.Ner.Token tt = t;
            if (tt.IsValue("НЕ", null) && t != null)
            {
                tt = tt.Next;
            }
            if (tt.Morph.Class.IsPreposition && tt != null)
            {
                tt = tt.Next;
                Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                if (npt1 != null)
                {
                    tt = npt1.EndToken;
                    if (tt.Next != null && tt.Next.IsComma)
                    {
                        return(new ParenthesisToken(t, tt.Next));
                    }
                    if (npt1.Noun.IsValue("ОЧЕРЕДЬ", null))
                    {
                        return(new ParenthesisToken(t, tt));
                    }
                }
            }
            if (t.IsValue("ВЕДЬ", null))
            {
                return(new ParenthesisToken(t, t));
            }
            return(null);
        }
예제 #6
0
        /// <summary>
        /// Попытаться выделить союз с указанного токена.
        /// </summary>
        /// <param name="t">начальный токен</param>
        /// <return>результат или null</return>
        public static ConjunctionToken TryParse(Pullenti.Ner.Token t)
        {
            if (!(t is Pullenti.Ner.TextToken))
            {
                return(null);
            }
            if (t.IsComma)
            {
                ConjunctionToken ne = TryParse(t.Next);
                if (ne != null)
                {
                    ne.BeginToken = t;
                    ne.IsSimple   = false;
                    return(ne);
                }
                return(new ConjunctionToken(t, t)
                {
                    Typ = ConjunctionType.Comma, IsSimple = true, Normal = ","
                });
            }
            TerminToken tok = m_Ontology.TryParse(t, TerminParseAttr.No);

            if (tok != null)
            {
                if (t.IsValue("ТО", null))
                {
                    NounPhraseToken npt = NounPhraseHelper.TryParse(t, NounPhraseParseAttr.ParseAdverbs, 0, null);
                    if (npt != null && npt.EndChar > tok.EndToken.EndChar)
                    {
                        return(null);
                    }
                }
                if (tok.Termin.Tag2 != null)
                {
                    if (!(tok.EndToken is Pullenti.Ner.TextToken))
                    {
                        return(null);
                    }
                    if (tok.EndToken.GetMorphClassInDictionary().IsVerb)
                    {
                        if (!(tok.EndToken as Pullenti.Ner.TextToken).Term.EndsWith("АЯ"))
                        {
                            return(null);
                        }
                    }
                }
                return(new ConjunctionToken(t, tok.EndToken)
                {
                    Normal = tok.Termin.CanonicText, Typ = (ConjunctionType)tok.Termin.Tag
                });
            }
            if (!t.GetMorphClassInDictionary().IsConjunction)
            {
                return(null);
            }
            if (t.IsAnd || t.IsOr)
            {
                ConjunctionToken res = new ConjunctionToken(t, t)
                {
                    Normal = (t as Pullenti.Ner.TextToken).Term, IsSimple = true, Typ = (t.IsOr ? ConjunctionType.Or : ConjunctionType.And)
                };
                if (((t.Next != null && t.Next.IsChar('(') && (t.Next.Next is Pullenti.Ner.TextToken)) && t.Next.Next.IsOr && t.Next.Next.Next != null) && t.Next.Next.Next.IsChar(')'))
                {
                    res.EndToken = t.Next.Next.Next;
                }
                else if ((t.Next != null && t.Next.IsCharOf("\\/") && (t.Next.Next is Pullenti.Ner.TextToken)) && t.Next.Next.IsOr)
                {
                    res.EndToken = t.Next.Next;
                }
                return(res);
            }
            string term = (t as Pullenti.Ner.TextToken).Term;

            if (term == "НИ")
            {
                return new ConjunctionToken(t, t)
                       {
                           Normal = term, Typ = ConjunctionType.Not
                       }
            }
            ;
            if ((term == "А" || term == "НО" || term == "ЗАТО") || term == "ОДНАКО")
            {
                return new ConjunctionToken(t, t)
                       {
                           Normal = term, Typ = ConjunctionType.But
                       }
            }
            ;
            return(null);
        }
예제 #7
0
        public static OrgItemEponymToken TryAttach(Pullenti.Ner.Token t, bool mustHasPrefix = false)
        {
            Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
            if (tt == null)
            {
                if (t == null)
                {
                    return(null);
                }
                Pullenti.Ner.Referent r1 = t.GetReferent();
                if (r1 != null && r1.TypeName == "DATE")
                {
                    string str = r1.ToString().ToUpper();
                    if ((str == "1 МАЯ" || str == "7 ОКТЯБРЯ" || str == "9 МАЯ") || str == "8 МАРТА")
                    {
                        OrgItemEponymToken dt = new OrgItemEponymToken(t, t)
                        {
                            Eponyms = new List <string>()
                        };
                        dt.Eponyms.Add(str);
                        return(dt);
                    }
                }
                Pullenti.Ner.NumberToken age = Pullenti.Ner.Core.NumberHelper.TryParseAge(t);
                if ((age != null && (((age.EndToken.Next is Pullenti.Ner.TextToken) || (age.EndToken.Next is Pullenti.Ner.ReferentToken))) && (age.WhitespacesAfterCount < 3)) && !age.EndToken.Next.Chars.IsAllLower && age.EndToken.Next.Chars.IsCyrillicLetter)
                {
                    OrgItemEponymToken dt = new OrgItemEponymToken(t, age.EndToken.Next)
                    {
                        Eponyms = new List <string>()
                    };
                    dt.Eponyms.Add(string.Format("{0} {1}", age.Value, dt.EndToken.GetSourceText().ToUpper()));
                    return(dt);
                }
                return(null);
            }
            Pullenti.Ner.Token t1 = null;
            bool full             = false;
            bool hasName          = false;

            if (tt.Term == "ИМЕНИ" || tt.Term == "ІМЕНІ")
            {
                t1      = t.Next;
                full    = true;
                hasName = true;
            }
            else if (((tt.Term == "ИМ" || tt.Term == "ІМ")) && tt.Next != null)
            {
                if (tt.Next.IsChar('.'))
                {
                    t1   = tt.Next.Next;
                    full = true;
                }
                else if ((tt.Next is Pullenti.Ner.TextToken) && tt.Chars.IsAllLower && !tt.Next.Chars.IsAllLower)
                {
                    t1 = tt.Next;
                }
                hasName = true;
            }
            else if (tt.Previous != null && ((tt.Previous.IsValue("ФОНД", null) || tt.Previous.IsValue("ХРАМ", null) || tt.Previous.IsValue("ЦЕРКОВЬ", "ЦЕРКВА"))))
            {
                if ((!tt.Chars.IsCyrillicLetter || tt.Morph.Class.IsPreposition || tt.Morph.Class.IsConjunction) || !tt.Chars.IsLetter)
                {
                    return(null);
                }
                if (tt.WhitespacesBeforeCount != 1)
                {
                    return(null);
                }
                if (tt.Chars.IsAllLower)
                {
                    return(null);
                }
                if (tt.Morph.Class.IsAdjective)
                {
                    Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                    if (npt != null && npt.BeginToken != npt.EndToken)
                    {
                        return(null);
                    }
                }
                OrgItemNameToken na = OrgItemNameToken.TryAttach(tt, null, false, true);
                if (na != null)
                {
                    if (na.IsEmptyWord || na.IsStdName || na.IsStdTail)
                    {
                        return(null);
                    }
                }
                t1 = tt;
            }
            if (t1 == null || ((t1.IsNewlineBefore && !full)))
            {
                return(null);
            }
            if (tt.Previous != null && tt.Previous.Morph.Class.IsPreposition)
            {
                return(null);
            }
            if (mustHasPrefix && !hasName)
            {
                return(null);
            }
            Pullenti.Ner.Referent r = t1.GetReferent();
            if ((r != null && r.TypeName == "DATE" && full) && r.FindSlot("DAY", null, true) != null && r.FindSlot("YEAR", null, true) == null)
            {
                OrgItemEponymToken dt = new OrgItemEponymToken(t, t1)
                {
                    Eponyms = new List <string>()
                };
                dt.Eponyms.Add(r.ToString().ToUpper());
                return(dt);
            }
            bool holy = false;

            if ((t1.IsValue("СВЯТОЙ", null) || t1.IsValue("СВЯТИЙ", null) || t1.IsValue("СВ", null)) || t1.IsValue("СВЯТ", null))
            {
                t1   = t1.Next;
                holy = true;
                if (t1 != null && t1.IsChar('.'))
                {
                    t1 = t1.Next;
                }
            }
            if (t1 == null)
            {
                return(null);
            }
            Pullenti.Morph.MorphClass cl = t1.GetMorphClassInDictionary();
            if (cl.IsNoun || cl.IsAdjective)
            {
                Pullenti.Ner.ReferentToken rt = t1.Kit.ProcessReferent("PERSON", t1);
                if (rt != null && rt.Referent.TypeName == "PERSON" && rt.BeginToken != rt.EndToken)
                {
                    string e = rt.Referent.GetStringValue("LASTNAME");
                    if (e != null)
                    {
                        if (rt.EndToken.IsValue(e, null))
                        {
                            OrgItemEponymToken re = new OrgItemEponymToken(t, rt.EndToken);
                            re.Eponyms.Add(rt.EndToken.GetSourceText());
                            return(re);
                        }
                    }
                }
            }
            Pullenti.Ner.NumberToken nt = Pullenti.Ner.Core.NumberHelper.TryParseAnniversary(t1);
            if (nt != null && nt.Typ == Pullenti.Ner.NumberSpellingType.Age)
            {
                Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(nt.EndToken.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                if (npt != null)
                {
                    string             s   = string.Format("{0}-{1} {2}", nt.Value, (t.Kit.BaseLanguage.IsUa ? "РОКІВ" : "ЛЕТ"), Pullenti.Ner.Core.MiscHelper.GetTextValue(npt.BeginToken, npt.EndToken, Pullenti.Ner.Core.GetTextAttr.No));
                    OrgItemEponymToken res = new OrgItemEponymToken(t, npt.EndToken);
                    res.Eponyms.Add(s);
                    return(res);
                }
            }
            List <PersonItemToken> its = PersonItemToken.TryAttach(t1);

            if (its == null)
            {
                if ((t1 is Pullenti.Ner.ReferentToken) && (t1.GetReferent() is Pullenti.Ner.Geo.GeoReferent))
                {
                    string             s  = Pullenti.Ner.Core.MiscHelper.GetTextValue(t1, t1, Pullenti.Ner.Core.GetTextAttr.No);
                    OrgItemEponymToken re = new OrgItemEponymToken(t, t1);
                    re.Eponyms.Add(s);
                    return(re);
                }
                return(null);
            }
            List <string> eponims = new List <string>();
            int           i       = 0;
            int           j;

            if (its[i].Typ == PersonItemType.LocaseWord)
            {
                i++;
            }
            if (i >= its.Count)
            {
                return(null);
            }
            if (!full)
            {
                if (its[i].BeginToken.Morph.Class.IsAdjective && !its[i].BeginToken.Morph.Class.IsProperSurname)
                {
                    return(null);
                }
            }
            if (its[i].Typ == PersonItemType.Initial)
            {
                i++;
                while (true)
                {
                    if ((i < its.Count) && its[i].Typ == PersonItemType.Initial)
                    {
                        i++;
                    }
                    if (i >= its.Count || ((its[i].Typ != PersonItemType.Surname && its[i].Typ != PersonItemType.Name)))
                    {
                        break;
                    }
                    eponims.Add(its[i].Value);
                    t1 = its[i].EndToken;
                    if ((i + 2) >= its.Count || its[i + 1].Typ != PersonItemType.And || its[i + 2].Typ != PersonItemType.Initial)
                    {
                        break;
                    }
                    i += 3;
                }
            }
            else if (((i + 1) < its.Count) && its[i].Typ == PersonItemType.Name && its[i + 1].Typ == PersonItemType.Surname)
            {
                eponims.Add(its[i + 1].Value);
                t1 = its[i + 1].EndToken;
                i += 2;
                if ((((i + 2) < its.Count) && its[i].Typ == PersonItemType.And && its[i + 1].Typ == PersonItemType.Name) && its[i + 2].Typ == PersonItemType.Surname)
                {
                    eponims.Add(its[i + 2].Value);
                    t1 = its[i + 2].EndToken;
                }
            }
            else if (its[i].Typ == PersonItemType.Surname)
            {
                if (its.Count == (i + 2) && its[i].Chars == its[i + 1].Chars)
                {
                    its[i].Value   += (" " + its[i + 1].Value);
                    its[i].EndToken = its[i + 1].EndToken;
                    its.RemoveAt(i + 1);
                }
                eponims.Add(its[i].Value);
                if (((i + 1) < its.Count) && its[i + 1].Typ == PersonItemType.Name)
                {
                    if ((i + 2) == its.Count)
                    {
                        i++;
                    }
                    else if (its[i + 2].Typ != PersonItemType.Surname)
                    {
                        i++;
                    }
                }
                else if (((i + 1) < its.Count) && its[i + 1].Typ == PersonItemType.Initial)
                {
                    if ((i + 2) == its.Count)
                    {
                        i++;
                    }
                    else if (its[i + 2].Typ == PersonItemType.Initial && (i + 3) == its.Count)
                    {
                        i += 2;
                    }
                }
                else if (((i + 2) < its.Count) && its[i + 1].Typ == PersonItemType.And && its[i + 2].Typ == PersonItemType.Surname)
                {
                    bool ok = true;
                    Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(its[i + 2].BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                    if (npt != null && !npt.Morph.Case.IsGenitive && !npt.Morph.Case.IsUndefined)
                    {
                        ok = false;
                    }
                    if (ok)
                    {
                        eponims.Add(its[i + 2].Value);
                        i += 2;
                    }
                }
                t1 = its[i].EndToken;
            }
            else if (its[i].Typ == PersonItemType.Name && holy)
            {
                t1 = its[i].EndToken;
                bool sec = false;
                if (((i + 1) < its.Count) && its[i].Chars == its[i + 1].Chars && its[i + 1].Typ != PersonItemType.Initial)
                {
                    sec = true;
                    t1  = its[i + 1].EndToken;
                }
                if (sec)
                {
                    eponims.Add(string.Format("СВЯТ.{0} {1}", its[i].Value, its[i + 1].Value));
                }
                else
                {
                    eponims.Add(string.Format("СВЯТ.{0}", its[i].Value));
                }
            }
            else if (full && (i + 1) == its.Count && ((its[i].Typ == PersonItemType.Name || its[i].Typ == PersonItemType.Surname)))
            {
                t1 = its[i].EndToken;
                eponims.Add(its[i].Value);
            }
            else if ((its[i].Typ == PersonItemType.Name && its.Count == 3 && its[i + 1].Typ == PersonItemType.Name) && its[i + 2].Typ == PersonItemType.Surname)
            {
                t1 = its[i + 2].EndToken;
                eponims.Add(string.Format("{0} {1} {2}", its[i].Value, its[i + 1].Value, its[i + 2].Value));
                i += 2;
            }
            if (eponims.Count == 0)
            {
                return(null);
            }
            return(new OrgItemEponymToken(t, t1)
            {
                Eponyms = eponims
            });
        }
예제 #8
0
        public static List <SentItem> ParseNearItems(Pullenti.Ner.Token t, Pullenti.Ner.Token t1, int lev, List <SentItem> prev)
        {
            if (lev > 100)
            {
                return(null);
            }
            if (t == null || t.BeginChar > t1.EndChar)
            {
                return(null);
            }
            List <SentItem> res = new List <SentItem>();

            if (t is Pullenti.Ner.ReferentToken)
            {
                res.Add(new SentItem(t as Pullenti.Ner.MetaToken));
                return(res);
            }
            DelimToken delim = DelimToken.TryParse(t);

            if (delim != null)
            {
                res.Add(new SentItem(delim));
                return(res);
            }
            Pullenti.Ner.Core.ConjunctionToken conj = Pullenti.Ner.Core.ConjunctionHelper.TryParse(t);
            if (conj != null)
            {
                res.Add(new SentItem(conj));
                return(res);
            }
            Pullenti.Ner.Core.PrepositionToken prep = Pullenti.Ner.Core.PrepositionHelper.TryParse(t);
            Pullenti.Ner.Token t111 = (prep == null ? t : prep.EndToken.Next);
            if ((t111 is Pullenti.Ner.NumberToken) && ((t111.Morph.Class.IsAdjective && !t111.Morph.Class.IsNoun)))
            {
                t111 = null;
            }
            Pullenti.Ner.Measure.Internal.NumbersWithUnitToken num = (t111 == null ? null : Pullenti.Ner.Measure.Internal.NumbersWithUnitToken.TryParse(t111, null, false, false, false, false));
            if (num != null)
            {
                if (num.Units.Count == 0)
                {
                    Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(num.EndToken.Next, m_NptAttrs, 0, null);
                    if (npt1 == null && num.EndToken.Next != null && num.EndToken.Next.IsValue("РАЗ", null))
                    {
                        npt1      = new Pullenti.Ner.Core.NounPhraseToken(num.EndToken.Next, num.EndToken.Next);
                        npt1.Noun = new Pullenti.Ner.MetaToken(num.EndToken.Next, num.EndToken.Next);
                    }
                    if (npt1 != null && prep != null)
                    {
                        if (npt1.Noun.EndToken.IsValue("РАЗ", null))
                        {
                            npt1.Morph.RemoveItems(prep.NextCase);
                        }
                        else if (((npt1.Morph.Case & prep.NextCase)).IsUndefined)
                        {
                            npt1 = null;
                        }
                        else
                        {
                            npt1.Morph.RemoveItems(prep.NextCase);
                        }
                    }
                    if ((npt1 != null && npt1.EndToken.IsValue("ОНИ", null) && npt1.Preposition != null) && npt1.Preposition.Normal == "ИЗ")
                    {
                        npt1.Morph       = new Pullenti.Ner.MorphCollection(num.EndToken.Morph);
                        npt1.Preposition = null;
                        string   nn  = num.ToString();
                        SentItem si1 = new SentItem(npt1);
                        if (nn == "1" && (num.EndToken is Pullenti.Ner.NumberToken) && (num.EndToken as Pullenti.Ner.NumberToken).EndToken.IsValue("ОДИН", null))
                        {
                            Pullenti.Semantic.SemAttribute a = new Pullenti.Semantic.SemAttribute()
                            {
                                Typ = Pullenti.Semantic.SemAttributeType.OneOf, Spelling = (num.EndToken as Pullenti.Ner.NumberToken).EndToken.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false)
                            };
                            SemAttributeEx aex = new SemAttributeEx(num)
                            {
                                Attr = a
                            };
                            si1.Attrs = new List <SemAttributeEx>();
                            si1.Attrs.Add(aex);
                        }
                        else
                        {
                            si1.Quant = new Pullenti.Semantic.SemQuantity(nn, num.BeginToken, num.EndToken);
                        }
                        if (prep != null)
                        {
                            si1.Prep = prep.Normal;
                        }
                        res.Add(si1);
                        return(res);
                    }
                    if (npt1 != null)
                    {
                        SentItem si1 = new SentItem(npt1)
                        {
                            Quant = new Pullenti.Semantic.SemQuantity(num.ToString(), num.BeginToken, num.EndToken)
                        };
                        if (prep != null)
                        {
                            si1.Prep = prep.Normal;
                        }
                        if (npt1.EndToken.IsValue("РАЗ", null))
                        {
                            si1.Typ = SentItemType.Formula;
                        }
                        if (((npt1.Morph.Number & Pullenti.Morph.MorphNumber.Plural)) == Pullenti.Morph.MorphNumber.Undefined && si1.Quant.Spelling != "1")
                        {
                            bool ok = false;
                            if (si1.Quant.Spelling.EndsWith("1"))
                            {
                                ok = true;
                            }
                            else if (si1.Typ == SentItemType.Formula)
                            {
                                ok = true;
                            }
                            else if (si1.Quant.Spelling.EndsWith("2") && npt1.Morph.Case.IsGenitive)
                            {
                                ok = true;
                            }
                            else if (si1.Quant.Spelling.EndsWith("3") && npt1.Morph.Case.IsGenitive)
                            {
                                ok = true;
                            }
                            else if (si1.Quant.Spelling.EndsWith("4") && npt1.Morph.Case.IsGenitive)
                            {
                                ok = true;
                            }
                            if (ok)
                            {
                                npt1.Morph        = new Pullenti.Ner.MorphCollection();
                                npt1.Morph.Number = Pullenti.Morph.MorphNumber.Plural;
                            }
                        }
                        res.Add(si1);
                        return(res);
                    }
                }
                num.BeginToken = t;
                num.Morph      = new Pullenti.Ner.MorphCollection(num.EndToken.Morph);
                SentItem si = new SentItem(num);
                if (prep != null)
                {
                    si.Prep = prep.Normal;
                }
                res.Add(si);
                if (si.Prep == "НА")
                {
                    AdverbToken aa = AdverbToken.TryParse(si.EndToken.Next);
                    if (aa != null && ((aa.Typ == Pullenti.Semantic.SemAttributeType.Less || aa.Typ == Pullenti.Semantic.SemAttributeType.Great)))
                    {
                        si.AddAttr(aa);
                        si.EndToken = aa.EndToken;
                    }
                }
                return(res);
            }
            Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
            AdverbToken adv = AdverbToken.TryParse(t);

            Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, m_NptAttrs, 0, null);
            if (npt != null && (npt.EndToken is Pullenti.Ner.TextToken) && (npt.EndToken as Pullenti.Ner.TextToken).Term == "БЫЛИ")
            {
                npt = null;
            }
            if (npt != null && adv != null)
            {
                if (adv.EndChar > npt.EndChar)
                {
                    npt = null;
                }
                else if (adv.EndChar == npt.EndChar)
                {
                    res.Add(new SentItem(npt));
                    res.Add(new SentItem(adv));
                    return(res);
                }
            }
            if (npt != null && npt.Adjectives.Count == 0)
            {
                if (npt.EndToken.IsValue("КОТОРЫЙ", null) && t.Previous != null && t.Previous.IsCommaAnd)
                {
                    List <SentItem> res1 = ParseSubsent(npt, t1, lev + 1, prev);
                    if (res1 != null)
                    {
                        return(res1);
                    }
                }
                if (npt.EndToken.IsValue("СКОЛЬКО", null))
                {
                    Pullenti.Ner.Token tt1 = npt.EndToken.Next;
                    if (tt1 != null && tt1.IsValue("ВСЕГО", null))
                    {
                        tt1 = tt1.Next;
                    }
                    Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt1, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                    if (npt1 != null && !npt1.Morph.Case.IsUndefined && prep != null)
                    {
                        if (((prep.NextCase & npt1.Morph.Case)).IsUndefined)
                        {
                            npt1 = null;
                        }
                        else
                        {
                            npt1.Morph.RemoveItems(prep.NextCase);
                        }
                    }
                    if (npt1 != null)
                    {
                        npt1.BeginToken  = npt.BeginToken;
                        npt1.Preposition = npt.Preposition;
                        npt1.Adjectives.Add(new Pullenti.Ner.MetaToken(npt.EndToken, npt.EndToken));
                        npt = npt1;
                    }
                }
                if (npt.EndToken.Morph.Class.IsAdjective)
                {
                    if (Pullenti.Ner.Core.VerbPhraseHelper.TryParse(t, true, false, false) != null)
                    {
                        npt = null;
                    }
                }
            }
            Pullenti.Ner.Core.VerbPhraseToken vrb = null;
            if (npt != null && npt.Adjectives.Count > 0)
            {
                vrb = Pullenti.Ner.Core.VerbPhraseHelper.TryParse(t, true, false, false);
                if (vrb != null && vrb.FirstVerb.IsParticiple)
                {
                    npt = null;
                }
            }
            else if (adv == null || npt != null)
            {
                vrb = Pullenti.Ner.Core.VerbPhraseHelper.TryParse(t, true, false, false);
            }
            if (npt != null)
            {
                res.Add(new SentItem(npt));
            }
            if (vrb != null && !vrb.FirstVerb.IsParticiple && !vrb.FirstVerb.IsDeeParticiple)
            {
                List <Pullenti.Morph.MorphWordForm> vars = new List <Pullenti.Morph.MorphWordForm>();
                foreach (Pullenti.Morph.MorphBaseInfo wf in vrb.FirstVerb.Morph.Items)
                {
                    if (wf.Class.IsVerb && (wf is Pullenti.Morph.MorphWordForm) && (wf as Pullenti.Morph.MorphWordForm).IsInDictionary)
                    {
                        vars.Add(wf as Pullenti.Morph.MorphWordForm);
                    }
                }
                if (vars.Count < 2)
                {
                    res.Add(new SentItem(vrb));
                }
                else
                {
                    vrb.FirstVerb.VerbMorph = vars[0];
                    res.Add(new SentItem(vrb));
                    for (int i = 1; i < vars.Count; i++)
                    {
                        vrb = Pullenti.Ner.Core.VerbPhraseHelper.TryParse(t, false, false, false);
                        if (vrb == null)
                        {
                            break;
                        }
                        vrb.FirstVerb.VerbMorph = vars[i];
                        res.Add(new SentItem(vrb));
                    }
                    if (vars[0].Misc.Mood == Pullenti.Morph.MorphMood.Imperative && vars[1].Misc.Mood != Pullenti.Morph.MorphMood.Imperative)
                    {
                        SentItem rr = res[0];
                        res[0] = res[1];
                        res[1] = rr;
                    }
                }
                return(res);
            }
            if (vrb != null)
            {
                List <SentItem> res1 = ParseParticiples(vrb, t1, lev + 1);
                if (res1 != null)
                {
                    res.AddRange(res1);
                }
            }
            if (res.Count > 0)
            {
                return(res);
            }
            if (adv != null)
            {
                if (adv.Typ == Pullenti.Semantic.SemAttributeType.Other)
                {
                    Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(adv.EndToken.Next, m_NptAttrs, 0, null);
                    if (npt1 != null && npt1.EndToken.IsValue("ОНИ", null) && npt1.Preposition != null)
                    {
                        SentItem si1 = new SentItem(npt1);
                        Pullenti.Semantic.SemAttribute a = new Pullenti.Semantic.SemAttribute()
                        {
                            Typ = Pullenti.Semantic.SemAttributeType.Other, Spelling = adv.EndToken.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false)
                        };
                        SemAttributeEx aex = new SemAttributeEx(num)
                        {
                            Attr = a
                        };
                        si1.Attrs = new List <SemAttributeEx>();
                        si1.Attrs.Add(aex);
                        if (prep != null)
                        {
                            si1.Prep = prep.Normal;
                        }
                        res.Add(si1);
                        return(res);
                    }
                    for (int i = prev.Count - 1; i >= 0; i--)
                    {
                        if (prev[i].Attrs != null)
                        {
                            foreach (SemAttributeEx a in prev[i].Attrs)
                            {
                                if (a.Attr.Typ == Pullenti.Semantic.SemAttributeType.OneOf)
                                {
                                    SentItem si1 = new SentItem(prev[i].Source);
                                    Pullenti.Semantic.SemAttribute aa = new Pullenti.Semantic.SemAttribute()
                                    {
                                        Typ = Pullenti.Semantic.SemAttributeType.Other, Spelling = adv.EndToken.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false)
                                    };
                                    SemAttributeEx aex = new SemAttributeEx(adv)
                                    {
                                        Attr = aa
                                    };
                                    si1.Attrs = new List <SemAttributeEx>();
                                    si1.Attrs.Add(aex);
                                    if (prep != null)
                                    {
                                        si1.Prep = prep.Normal;
                                    }
                                    si1.BeginToken = adv.BeginToken;
                                    si1.EndToken   = adv.EndToken;
                                    res.Add(si1);
                                    return(res);
                                }
                            }
                        }
                    }
                }
                res.Add(new SentItem(adv));
                return(res);
            }
            if (mc.IsAdjective)
            {
                npt = new Pullenti.Ner.Core.NounPhraseToken(t, t)
                {
                    Morph = new Pullenti.Ner.MorphCollection(t.Morph)
                };
                npt.Noun = new Pullenti.Ner.MetaToken(t, t);
                res.Add(new SentItem(npt));
                return(res);
            }
            return(null);
        }
예제 #9
0
        public static Pullenti.Ner.ReferentToken TryAttachOrg(Pullenti.Ner.Token t, bool canBeCyr = false)
        {
            if (t == null)
            {
                return(null);
            }
            bool br = false;

            if (t.IsChar('(') && t.Next != null)
            {
                t  = t.Next;
                br = true;
            }
            if (t is Pullenti.Ner.NumberToken)
            {
                if ((t as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Words && t.Morph.Class.IsAdjective && t.Chars.IsCapitalUpper)
                {
                }
                else
                {
                    return(null);
                }
            }
            else
            {
                if (t.Chars.IsAllLower)
                {
                    return(null);
                }
                if ((t.LengthChar < 3) && !t.Chars.IsLetter)
                {
                    return(null);
                }
                if (!t.Chars.IsLatinLetter)
                {
                    if (!canBeCyr || !t.Chars.IsCyrillicLetter)
                    {
                        return(null);
                    }
                }
            }
            Pullenti.Ner.Token t0 = t;
            Pullenti.Ner.Token t1 = t0;
            int            namWo  = 0;
            OrgItemEngItem tok    = null;

            Pullenti.Ner.Geo.GeoReferent geo    = null;
            OrgItemTypeToken             addTyp = null;

            for (; t != null; t = t.Next)
            {
                if (t != t0 && t.WhitespacesBeforeCount > 1)
                {
                    break;
                }
                if (t.IsChar(')'))
                {
                    break;
                }
                if (t.IsChar('(') && t.Next != null)
                {
                    if ((t.Next.GetReferent() is Pullenti.Ner.Geo.GeoReferent) && t.Next.Next != null && t.Next.Next.IsChar(')'))
                    {
                        geo = t.Next.GetReferent() as Pullenti.Ner.Geo.GeoReferent;
                        t   = t.Next.Next;
                        continue;
                    }
                    OrgItemTypeToken typ = OrgItemTypeToken.TryAttach(t.Next, true, null);
                    if ((typ != null && typ.EndToken.Next != null && typ.EndToken.Next.IsChar(')')) && typ.Chars.IsLatinLetter)
                    {
                        addTyp = typ;
                        t      = typ.EndToken.Next;
                        continue;
                    }
                    if (((t.Next is Pullenti.Ner.TextToken) && t.Next.Next != null && t.Next.Next.IsChar(')')) && t.Next.Chars.IsCapitalUpper)
                    {
                        t1 = (t = t.Next.Next);
                        continue;
                    }
                    break;
                }
                tok = TryAttach(t, canBeCyr);
                if (tok == null && t.IsCharOf(".,") && t.Next != null)
                {
                    tok = TryAttach(t.Next, canBeCyr);
                    if (tok == null && t.Next.IsCharOf(",."))
                    {
                        tok = TryAttach(t.Next.Next, canBeCyr);
                    }
                }
                if (tok != null)
                {
                    if (tok.LengthChar == 1 && t0.Chars.IsCyrillicLetter)
                    {
                        return(null);
                    }
                    break;
                }
                if (t.IsHiphen && !t.IsWhitespaceAfter && !t.IsWhitespaceBefore)
                {
                    continue;
                }
                if (t.IsCharOf("&+") || t.IsAnd)
                {
                    continue;
                }
                if (t.IsChar('.'))
                {
                    if (t.Previous != null && t.Previous.LengthChar == 1)
                    {
                        continue;
                    }
                    else if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t.Next))
                    {
                        break;
                    }
                }
                if (!t.Chars.IsLatinLetter)
                {
                    if (!canBeCyr || !t.Chars.IsCyrillicLetter)
                    {
                        break;
                    }
                }
                if (t.Chars.IsAllLower)
                {
                    if (t.Morph.Class.IsPreposition || t.Morph.Class.IsConjunction)
                    {
                        continue;
                    }
                    if (br)
                    {
                        continue;
                    }
                    break;
                }
                Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
                if (mc.IsVerb)
                {
                    if (t.Next != null && t.Next.Morph.Class.IsPreposition)
                    {
                        break;
                    }
                }
                if (t.Next != null && t.Next.IsValue("OF", null))
                {
                    break;
                }
                if (t is Pullenti.Ner.TextToken)
                {
                    namWo++;
                }
                t1 = t;
            }
            if (tok == null)
            {
                return(null);
            }
            if (t0 == tok.BeginToken)
            {
                Pullenti.Ner.Core.BracketSequenceToken br2 = Pullenti.Ner.Core.BracketHelper.TryParse(tok.EndToken.Next, Pullenti.Ner.Core.BracketParseAttr.No, 100);
                if (br2 != null)
                {
                    Pullenti.Ner.Org.OrganizationReferent org1 = new Pullenti.Ner.Org.OrganizationReferent();
                    if (tok.ShortValue != null)
                    {
                        org1.AddTypeStr(tok.ShortValue);
                    }
                    org1.AddTypeStr(tok.FullValue);
                    string nam1 = Pullenti.Ner.Core.MiscHelper.GetTextValue(br2.BeginToken, br2.EndToken, Pullenti.Ner.Core.GetTextAttr.No);
                    if (nam1 != null)
                    {
                        org1.AddName(nam1, true, null);
                        return(new Pullenti.Ner.ReferentToken(org1, t0, br2.EndToken));
                    }
                }
                return(null);
            }
            Pullenti.Ner.Org.OrganizationReferent org = new Pullenti.Ner.Org.OrganizationReferent();
            Pullenti.Ner.Token te = tok.EndToken;
            if (tok.IsBank)
            {
                t1 = tok.EndToken;
            }
            if (tok.FullValue == "company" && (tok.WhitespacesAfterCount < 3))
            {
                OrgItemEngItem tok1 = TryAttach(tok.EndToken.Next, canBeCyr);
                if (tok1 != null)
                {
                    t1  = tok.EndToken;
                    tok = tok1;
                    te  = tok.EndToken;
                }
            }
            if (tok.FullValue == "company")
            {
                if (namWo == 0)
                {
                    return(null);
                }
            }
            string nam = Pullenti.Ner.Core.MiscHelper.GetTextValue(t0, t1, Pullenti.Ner.Core.GetTextAttr.IgnoreArticles);

            if (nam == "STOCK" && tok.FullValue == "company")
            {
                return(null);
            }
            string altNam = null;

            if (string.IsNullOrEmpty(nam))
            {
                return(null);
            }
            if (nam.IndexOf('(') > 0)
            {
                int i1 = nam.IndexOf('(');
                int i2 = nam.IndexOf(')');
                if (i1 < i2)
                {
                    altNam = nam;
                    string tai = null;
                    if ((i2 + 1) < nam.Length)
                    {
                        tai = nam.Substring(i2).Trim();
                    }
                    nam = nam.Substring(0, i1).Trim();
                    if (tai != null)
                    {
                        nam = string.Format("{0} {1}", nam, tai);
                    }
                }
            }
            if (tok.IsBank)
            {
                org.AddTypeStr((tok.Kit.BaseLanguage.IsEn ? "bank" : "банк"));
                org.AddProfile(Pullenti.Ner.Org.OrgProfile.Finance);
                if ((t1.Next != null && t1.Next.IsValue("OF", null) && t1.Next.Next != null) && t1.Next.Next.Chars.IsLatinLetter)
                {
                    OrgItemNameToken nam0 = OrgItemNameToken.TryAttach(t1.Next, null, false, false);
                    if (nam0 != null)
                    {
                        te = nam0.EndToken;
                    }
                    else
                    {
                        te = t1.Next.Next;
                    }
                    nam = Pullenti.Ner.Core.MiscHelper.GetTextValue(t0, te, Pullenti.Ner.Core.GetTextAttr.No);
                    if (te.GetReferent() is Pullenti.Ner.Geo.GeoReferent)
                    {
                        org.AddGeoObject(te.GetReferent() as Pullenti.Ner.Geo.GeoReferent);
                    }
                }
                else if (t0 == t1)
                {
                    return(null);
                }
            }
            else
            {
                if (tok.ShortValue != null)
                {
                    org.AddTypeStr(tok.ShortValue);
                }
                org.AddTypeStr(tok.FullValue);
            }
            if (string.IsNullOrEmpty(nam))
            {
                return(null);
            }
            org.AddName(nam, true, null);
            if (altNam != null)
            {
                org.AddName(altNam, true, null);
            }
            Pullenti.Ner.ReferentToken res = new Pullenti.Ner.ReferentToken(org, t0, te);
            t = te;
            while (t.Next != null)
            {
                if (t.Next.IsCharOf(",."))
                {
                    t = t.Next;
                }
                else
                {
                    break;
                }
            }
            if (t.WhitespacesAfterCount < 2)
            {
                tok = TryAttach(t.Next, canBeCyr);
                if (tok != null)
                {
                    if (tok.ShortValue != null)
                    {
                        org.AddTypeStr(tok.ShortValue);
                    }
                    org.AddTypeStr(tok.FullValue);
                    res.EndToken = tok.EndToken;
                }
            }
            if (geo != null)
            {
                org.AddGeoObject(geo);
            }
            if (addTyp != null)
            {
                org.AddType(addTyp, false);
            }
            if (!br)
            {
                return(res);
            }
            t = res.EndToken;
            if (t.Next == null || t.Next.IsChar(')'))
            {
                res.EndToken = t.Next;
            }
            else
            {
                return(null);
            }
            return(res);
        }
예제 #10
0
        public static OrgItemNameToken TryAttach(Pullenti.Ner.Token t, OrgItemNameToken prev, bool extOnto, bool first)
        {
            if (t == null)
            {
                return(null);
            }
            if (t.IsValue("ОРДЕНА", null) && t.Next != null)
            {
                Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                if (npt != null)
                {
                    Pullenti.Ner.Token t1 = npt.EndToken;
                    if (((t1.IsValue("ЗНАК", null) || t1.IsValue("ДРУЖБА", null))) && (t1.WhitespacesAfterCount < 2))
                    {
                        npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t1.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                        if (npt != null)
                        {
                            t1 = npt.EndToken;
                        }
                    }
                    return(new OrgItemNameToken(t, t1)
                    {
                        IsIgnoredPart = true
                    });
                }
                if (t.Next.GetMorphClassInDictionary().IsProperSurname)
                {
                    return new OrgItemNameToken(t, t.Next)
                           {
                               IsIgnoredPart = true
                           }
                }
                ;
                Pullenti.Ner.ReferentToken ppp = t.Kit.ProcessReferent("PERSON", t.Next);
                if (ppp != null)
                {
                    return new OrgItemNameToken(t, ppp.EndToken)
                           {
                               IsIgnoredPart = true
                           }
                }
                ;
                if ((t.WhitespacesAfterCount < 2) && Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(t.Next, true, false))
                {
                    Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t.Next, Pullenti.Ner.Core.BracketParseAttr.NearCloseBracket, 10);

                    if (br != null && (br.LengthChar < 40))
                    {
                        return new OrgItemNameToken(t, br.EndToken)
                               {
                                   IsIgnoredPart = true
                               }
                    }
                    ;
                }
            }
            if (first && t.Chars.IsCyrillicLetter && t.Morph.Class.IsPreposition)
            {
                if (!t.IsValue("ПО", null) && !t.IsValue("ПРИ", null))
                {
                    return(null);
                }
            }
            OrgItemNameToken res = _TryAttach(t, prev, extOnto);

            if (res == null)
            {
                if (extOnto)
                {
                    if ((t.GetReferent() is Pullenti.Ner.Geo.GeoReferent) || (((t is Pullenti.Ner.TextToken) && !t.IsChar(';'))))
                    {
                        return new OrgItemNameToken(t, t)
                               {
                                   Value = t.GetSourceText()
                               }
                    }
                    ;
                }
                return(null);
            }
            if (prev == null && !extOnto)
            {
                if (t.Kit.Ontology != null)
                {
                    Pullenti.Ner.Org.OrganizationAnalyzer.OrgAnalyzerData ad = t.Kit.Ontology._getAnalyzerData(Pullenti.Ner.Org.OrganizationAnalyzer.ANALYZER_NAME) as Pullenti.Ner.Org.OrganizationAnalyzer.OrgAnalyzerData;
                    if (ad != null)
                    {
                        Pullenti.Ner.Core.TerminToken tok = ad.OrgPureNames.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);

                        if (tok != null && tok.EndChar > res.EndChar)
                        {
                            res.EndToken = tok.EndToken;
                        }
                    }
                }
            }
            if (prev != null && !extOnto)
            {
                if ((prev.Chars.IsAllLower && !res.Chars.IsAllLower && !res.IsStdTail) && !res.IsStdName)
                {
                    if (prev.Chars.IsLatinLetter && res.Chars.IsLatinLetter)
                    {
                    }
                    else if (m_StdNouns.TryParse(res.BeginToken, Pullenti.Ner.Core.TerminParseAttr.No) != null)
                    {
                    }
                    else
                    {
                        return(null);
                    }
                }
            }
            if ((res.EndToken.Next != null && !res.EndToken.IsWhitespaceAfter && res.EndToken.Next.IsHiphen) && !res.EndToken.Next.IsWhitespaceAfter)
            {
                Pullenti.Ner.TextToken tt = res.EndToken.Next.Next as Pullenti.Ner.TextToken;
                if (tt != null)
                {
                    if (tt.Chars == res.Chars || tt.Chars.IsAllUpper)
                    {
                        res.EndToken = tt;
                        res.Value    = string.Format("{0}-{1}", res.Value, tt.Term);
                    }
                }
            }
            if ((res.EndToken.Next != null && res.EndToken.Next.IsAnd && res.EndToken.WhitespacesAfterCount == 1) && res.EndToken.Next.WhitespacesAfterCount == 1)
            {
                OrgItemNameToken res1 = _TryAttach(res.EndToken.Next.Next, prev, extOnto);
                if (res1 != null && res1.Chars == res.Chars && OrgItemTypeToken.TryAttach(res.EndToken.Next.Next, false, null) == null)
                {
                    if (!((res1.Morph.Case & res.Morph.Case)).IsUndefined)
                    {
                        res.EndToken = res1.EndToken;
                        res.Value    = string.Format("{0} {1} {2}", res.Value, (res.Kit.BaseLanguage.IsUa ? "ТА" : "И"), res1.Value);
                    }
                }
            }
            for (Pullenti.Ner.Token tt = res.BeginToken; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next)
            {
                if (m_StdNouns.TryParse(tt, Pullenti.Ner.Core.TerminParseAttr.No) != null)
                {
                    res.StdOrgNameNouns++;
                }
            }
            if (m_StdNouns.TryParse(res.EndToken, Pullenti.Ner.Core.TerminParseAttr.No) != null)
            {
                int  cou = 1;
                bool non = false;
                Pullenti.Ner.Token et = res.EndToken;
                if (!_isNotTermNoun(res.EndToken))
                {
                    non = true;
                }
                bool br = false;
                for (Pullenti.Ner.Token tt = res.EndToken.Next; tt != null; tt = tt.Next)
                {
                    if (tt.IsTableControlChar)
                    {
                        break;
                    }
                    if (tt.IsChar('('))
                    {
                        if (!non)
                        {
                            break;
                        }
                        br = true;
                        continue;
                    }
                    if (tt.IsChar(')'))
                    {
                        br = false;
                        et = tt;
                        break;
                    }
                    if (!(tt is Pullenti.Ner.TextToken))
                    {
                        break;
                    }
                    if (tt.WhitespacesBeforeCount > 1)
                    {
                        if (tt.NewlinesBeforeCount > 1)
                        {
                            break;
                        }
                        if (tt.Chars != res.EndToken.Chars)
                        {
                            break;
                        }
                    }
                    if (tt.Morph.Class.IsPreposition || tt.IsCommaAnd)
                    {
                        continue;
                    }
                    Pullenti.Morph.MorphClass dd = tt.GetMorphClassInDictionary();
                    if (!dd.IsNoun && !dd.IsAdjective)
                    {
                        break;
                    }
                    Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                    if (npt2 == null)
                    {
                        if (dd == Pullenti.Morph.MorphClass.Adjective)
                        {
                            continue;
                        }
                        break;
                    }
                    if (m_StdNouns.TryParse(npt2.EndToken, Pullenti.Ner.Core.TerminParseAttr.No) == null)
                    {
                        break;
                    }
                    if (npt2.EndToken.Chars != res.EndToken.Chars)
                    {
                        break;
                    }
                    if ((npt2.EndToken.IsValue("УПРАВЛЕНИЕ", null) || npt2.EndToken.IsValue("ИНСТИТУТ", null) || npt2.EndToken.IsValue("УПРАВЛІННЯ", null)) || npt2.EndToken.IsValue("ІНСТИТУТ", null) || tt.Previous.IsValue("ПРИ", null))
                    {
                        Pullenti.Ner.ReferentToken rt = tt.Kit.ProcessReferent(Pullenti.Ner.Org.OrganizationAnalyzer.ANALYZER_NAME, tt);
                        if (rt != null)
                        {
                            break;
                        }
                    }
                    cou++;
                    tt = npt2.EndToken;
                    if (!_isNotTermNoun(tt))
                    {
                        non = true;
                        et  = tt;
                    }
                }
                if (non && !br)
                {
                    res.StdOrgNameNouns += cou;
                    res.EndToken         = et;
                }
            }
            return(res);
        }
예제 #11
0
        static OrgItemNameToken _TryAttach(Pullenti.Ner.Token t, OrgItemNameToken prev, bool extOnto)
        {
            if (t == null)
            {
                return(null);
            }
            Pullenti.Ner.Referent r = t.GetReferent();
            if (r != null)
            {
                if (r.TypeName == "DENOMINATION")
                {
                    return new OrgItemNameToken(t, t)
                           {
                               Value = r.ToString(true, t.Kit.BaseLanguage, 0), IsDenomination = true
                           }
                }
                ;
                if ((r is Pullenti.Ner.Geo.GeoReferent) && t.Chars.IsLatinLetter)
                {
                    OrgItemNameToken res2 = _TryAttach(t.Next, prev, extOnto);

                    if (res2 != null && res2.Chars.IsLatinLetter)
                    {
                        res2.BeginToken     = t;
                        res2.Value          = string.Format("{0} {1}", Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(t as Pullenti.Ner.MetaToken, Pullenti.Ner.Core.GetTextAttr.No), res2.Value);
                        res2.IsInDictionary = false;
                        return(res2);
                    }
                }
                return(null);
            }
            Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
            if (tt == null)
            {
                return(null);
            }
            OrgItemNameToken res = null;

            Pullenti.Ner.Core.TerminToken tok = m_StdTails.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
            if (tok == null && t.IsChar(','))
            {
                tok = m_StdTails.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No);
            }
            if (tok != null)
            {
                return new OrgItemNameToken(t, tok.EndToken)
                       {
                           Value = tok.Termin.CanonicText, IsStdTail = tok.Termin.Tag == null, IsEmptyWord = tok.Termin.Tag != null, Morph = tok.Morph
                       }
            }
            ;
            if ((((tok = m_StdNames.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No)))) != null)
            {
                return new OrgItemNameToken(t, tok.EndToken)
                       {
                           Value = tok.Termin.CanonicText, IsStdName = true
                       }
            }
            ;
            OrgItemEngItem eng = OrgItemEngItem.TryAttach(t, false);

            if (eng == null && t.IsChar(','))
            {
                eng = OrgItemEngItem.TryAttach(t.Next, false);
            }
            if (eng != null)
            {
                return new OrgItemNameToken(t, eng.EndToken)
                       {
                           Value = eng.FullValue, IsStdTail = true
                       }
            }
            ;
            if (tt.Chars.IsAllLower && prev != null)
            {
                if (!prev.Chars.IsAllLower && !prev.Chars.IsCapitalUpper)
                {
                    return(null);
                }
            }
            if (tt.IsChar(',') && prev != null)
            {
                Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);

                if (npt1 == null || npt1.Chars != prev.Chars || ((npt1.Morph.Case & prev.Morph.Case)).IsUndefined)
                {
                    return(null);
                }
                OrgItemTypeToken ty = OrgItemTypeToken.TryAttach(t.Next, false, null);
                if (ty != null)
                {
                    return(null);
                }
                if (npt1.EndToken.Next == null || !npt1.EndToken.Next.IsValue("И", null))
                {
                    return(null);
                }
                Pullenti.Ner.Token t1 = npt1.EndToken.Next;
                Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t1.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                if (npt2 == null || npt2.Chars != prev.Chars || ((npt2.Morph.Case & npt1.Morph.Case & prev.Morph.Case)).IsUndefined)
                {
                    return(null);
                }
                ty = OrgItemTypeToken.TryAttach(t1.Next, false, null);
                if (ty != null)
                {
                    return(null);
                }
                res = new OrgItemNameToken(npt1.BeginToken, npt1.EndToken)
                {
                    Morph = npt1.Morph, Value = npt1.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false)
                };
                res.IsNounPhrase       = true;
                res.IsAfterConjunction = true;
                if (prev.Preposition != null)
                {
                    res.Preposition = prev.Preposition;
                }
                return(res);
            }
            if (((tt.IsChar('&') || tt.IsValue("AND", null) || tt.IsValue("UND", null))) && prev != null)
            {
                if ((tt.Next is Pullenti.Ner.TextToken) && tt.LengthChar == 1 && tt.Next.Chars.IsLatinLetter)
                {
                    res = new OrgItemNameToken(tt, tt.Next)
                    {
                        Chars = tt.Next.Chars
                    };
                    res.IsAfterConjunction = true;
                    res.Value = "& " + (tt.Next as Pullenti.Ner.TextToken).Term;
                    return(res);
                }
                res = OrgItemNameToken.TryAttach(tt.Next, null, extOnto, false);
                if (res == null || res.Chars != prev.Chars)
                {
                    return(null);
                }
                res.IsAfterConjunction = true;
                res.Value = "& " + res.Value;
                return(res);
            }
            if (!tt.Chars.IsLetter)
            {
                return(null);
            }
            List <Pullenti.Semantic.Utils.DerivateGroup> expinf = null;

            if (prev != null && prev.EndToken.GetMorphClassInDictionary().IsNoun)
            {
                string wo = prev.EndToken.GetNormalCaseText(Pullenti.Morph.MorphClass.Noun, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false);
                expinf = Pullenti.Semantic.Utils.DerivateService.FindDerivates(wo, true, prev.EndToken.Morph.Language);
            }
            Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
            if (npt != null && npt.InternalNoun != null)
            {
                npt = null;
            }
            bool explOk = false;

            if (npt != null && prev != null && prev.EndToken.GetMorphClassInDictionary().IsNoun)
            {
                Pullenti.Ner.Core.NounPhraseToken npt0 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(prev.EndToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                if (npt0 != null)
                {
                    List <Pullenti.Semantic.Core.SemanticLink> links = Pullenti.Semantic.Core.SemanticHelper.TryCreateLinks(npt0, npt, null);
                    if (links.Count > 0)
                    {
                        explOk = true;
                    }
                }
            }
            if (npt != null && ((explOk || npt.Morph.Case.IsGenitive || ((prev != null && !((prev.Morph.Case & npt.Morph.Case)).IsUndefined)))))
            {
                Pullenti.Morph.MorphClass mc = npt.BeginToken.GetMorphClassInDictionary();
                if (mc.IsVerb || mc.IsPronoun)
                {
                    return(null);
                }
                if (mc.IsAdverb)
                {
                    if (npt.BeginToken.Next != null && npt.BeginToken.Next.IsHiphen)
                    {
                    }
                    else
                    {
                        return(null);
                    }
                }
                if (mc.IsPreposition)
                {
                    return(null);
                }
                if (mc.IsNoun && npt.Chars.IsAllLower)
                {
                    Pullenti.Morph.MorphCase ca = npt.Morph.Case;
                    if ((!ca.IsDative && !ca.IsGenitive && !ca.IsInstrumental) && !ca.IsPrepositional)
                    {
                        return(null);
                    }
                }
                res = new OrgItemNameToken(npt.BeginToken, npt.EndToken)
                {
                    Morph = npt.Morph, Value = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false)
                };
                res.IsNounPhrase = true;
                if ((npt.EndToken.WhitespacesAfterCount < 2) && (npt.EndToken.Next is Pullenti.Ner.TextToken))
                {
                    Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(npt.EndToken.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                    if (npt2 != null && npt2.Morph.Case.IsGenitive && npt2.Chars.IsAllLower)
                    {
                        OrgItemTypeToken           typ = OrgItemTypeToken.TryAttach(npt.EndToken.Next, true, null);
                        OrgItemEponymToken         epo = OrgItemEponymToken.TryAttach(npt.EndToken.Next, false);
                        Pullenti.Ner.ReferentToken rtt = t.Kit.ProcessReferent("PERSONPROPERTY", npt.EndToken.Next);
                        if (typ == null && epo == null && ((rtt == null || rtt.Morph.Number == Pullenti.Morph.MorphNumber.Plural)))
                        {
                            res.EndToken = npt2.EndToken;
                            res.Value    = string.Format("{0} {1}", res.Value, Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(npt2, Pullenti.Ner.Core.GetTextAttr.No));
                        }
                    }
                    else if (npt.EndToken.Next.IsComma && (npt.EndToken.Next.Next is Pullenti.Ner.TextToken))
                    {
                        Pullenti.Ner.Token        tt2 = npt.EndToken.Next.Next;
                        Pullenti.Morph.MorphClass mv2 = tt2.GetMorphClassInDictionary();
                        if (mv2.IsAdjective && mv2.IsVerb)
                        {
                            Pullenti.Morph.MorphBaseInfo bi = new Pullenti.Morph.MorphBaseInfo()
                            {
                                Case = npt.Morph.Case, Gender = npt.Morph.Gender, Number = npt.Morph.Number
                            };
                            if (tt2.Morph.CheckAccord(bi, false, false))
                            {
                                npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt2.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                                if (npt2 != null && ((npt2.Morph.Case.IsDative || npt2.Morph.Case.IsGenitive)) && npt2.Chars.IsAllLower)
                                {
                                    res.EndToken = npt2.EndToken;
                                    res.Value    = string.Format("{0} {1}", res.Value, Pullenti.Ner.Core.MiscHelper.GetTextValue(npt.EndToken.Next, res.EndToken, Pullenti.Ner.Core.GetTextAttr.No));
                                }
                            }
                        }
                    }
                }
                if (explOk)
                {
                    res.IsAfterConjunction = true;
                }
            }
            else if (npt != null && ((((prev != null && prev.IsNounPhrase && npt.Morph.Case.IsInstrumental)) || extOnto)))
            {
                res = new OrgItemNameToken(npt.BeginToken, npt.EndToken)
                {
                    Morph = npt.Morph, Value = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false)
                };
                res.IsNounPhrase = true;
            }
            else if (tt.IsAnd)
            {
                res = TryAttach(tt.Next, prev, extOnto, false);
                if (res == null || !res.IsNounPhrase || prev == null)
                {
                    return(null);
                }
                if (((prev.Morph.Case & res.Morph.Case)).IsUndefined)
                {
                    return(null);
                }
                if (prev.Morph.Number != Pullenti.Morph.MorphNumber.Undefined && res.Morph.Number != Pullenti.Morph.MorphNumber.Undefined)
                {
                    if (((prev.Morph.Number & res.Morph.Number)) == Pullenti.Morph.MorphNumber.Undefined)
                    {
                        if (prev.Chars != res.Chars)
                        {
                            return(null);
                        }
                        OrgItemTypeToken ty = OrgItemTypeToken.TryAttach(res.EndToken.Next, false, null);
                        if (ty != null)
                        {
                            return(null);
                        }
                    }
                }
                Pullenti.Morph.CharsInfo ci = res.Chars;
                res.Chars = ci;
                res.IsAfterConjunction = true;
                return(res);
            }
            else if (((tt.Term == "ПО" || tt.Term == "ПРИ" || tt.Term == "ЗА") || tt.Term == "С" || tt.Term == "В") || tt.Term == "НА")
            {
                npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                if (npt != null)
                {
                    if (m_VervotWords.TryParse(npt.EndToken, Pullenti.Ner.Core.TerminParseAttr.No) != null)
                    {
                        return(null);
                    }
                    bool ok = false;
                    if (tt.Term == "ПО")
                    {
                        ok = npt.Morph.Case.IsDative;
                    }
                    else if (tt.Term == "С")
                    {
                        ok = npt.Morph.Case.IsInstrumental;
                    }
                    else if (tt.Term == "ЗА")
                    {
                        ok = npt.Morph.Case.IsGenitive | npt.Morph.Case.IsInstrumental;
                    }
                    else if (tt.Term == "НА")
                    {
                        ok = npt.Morph.Case.IsPrepositional;
                    }
                    else if (tt.Term == "В")
                    {
                        ok = npt.Morph.Case.IsDative | npt.Morph.Case.IsPrepositional;
                        if (ok)
                        {
                            ok = false;
                            if (t.Next.IsValue("СФЕРА", null) || t.Next.IsValue("ОБЛАСТЬ", null))
                            {
                                ok = true;
                            }
                        }
                    }
                    else if (tt.Term == "ПРИ")
                    {
                        ok = npt.Morph.Case.IsPrepositional;
                        if (ok)
                        {
                            if (OrgItemTypeToken.TryAttach(tt.Next, true, null) != null)
                            {
                                ok = false;
                            }
                            else
                            {
                                Pullenti.Ner.ReferentToken rt = tt.Kit.ProcessReferent(Pullenti.Ner.Org.OrganizationAnalyzer.ANALYZER_NAME, tt.Next);
                                if (rt != null)
                                {
                                    ok = false;
                                }
                            }
                        }
                        string s = npt.Noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false);
                        if (s == "ПОДДЕРЖКА" || s == "УЧАСТИЕ")
                        {
                            ok = false;
                        }
                    }
                    else
                    {
                        ok = npt.Morph.Case.IsPrepositional;
                    }
                    if (ok)
                    {
                        res = new OrgItemNameToken(t, npt.EndToken)
                        {
                            Morph = npt.Morph, Value = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false), Chars = npt.Chars
                        };
                        res.IsNounPhrase = true;
                        res.Preposition  = tt.Term;
                        if (((res.Value == "ДЕЛО" || res.Value == "ВОПРОС")) && !res.IsNewlineAfter)
                        {
                            OrgItemNameToken res2 = _TryAttach(res.EndToken.Next, res, extOnto);
                            if (res2 != null && res2.Morph.Case.IsGenitive)
                            {
                                res.Value    = string.Format("{0} {1}", res.Value, res2.Value);
                                res.EndToken = res2.EndToken;
                                for (Pullenti.Ner.Token ttt = res2.EndToken.Next; ttt != null; ttt = ttt.Next)
                                {
                                    if (!ttt.IsCommaAnd)
                                    {
                                        break;
                                    }
                                    OrgItemNameToken res3 = _TryAttach(ttt.Next, res2, extOnto);
                                    if (res3 == null)
                                    {
                                        break;
                                    }
                                    res.Value    = string.Format("{0} {1}", res.Value, res3.Value);
                                    res.EndToken = res3.EndToken;
                                    if (ttt.IsAnd)
                                    {
                                        break;
                                    }
                                    ttt = res.EndToken;
                                }
                            }
                        }
                    }
                }
                if (res == null)
                {
                    return(null);
                }
            }
            else if (tt.Term == "OF")
            {
                Pullenti.Ner.Token t1 = tt.Next;
                if (t1 != null && Pullenti.Ner.Core.MiscHelper.IsEngArticle(t1))
                {
                    t1 = t1.Next;
                }
                if (t1 != null && t1.Chars.IsLatinLetter && !t1.Chars.IsAllLower)
                {
                    res = new OrgItemNameToken(t, t1)
                    {
                        Chars = t1.Chars, Morph = t1.Morph
                    };
                    for (Pullenti.Ner.Token ttt = t1.Next; ttt != null; ttt = ttt.Next)
                    {
                        if (ttt.WhitespacesBeforeCount > 2)
                        {
                            break;
                        }
                        if (Pullenti.Ner.Core.MiscHelper.IsEngAdjSuffix(ttt))
                        {
                            ttt = ttt.Next;
                            continue;
                        }
                        if (!ttt.Chars.IsLatinLetter)
                        {
                            break;
                        }
                        if (ttt.Morph.Class.IsPreposition)
                        {
                            break;
                        }
                        t1 = (res.EndToken = ttt);
                    }
                    res.Value       = Pullenti.Ner.Core.MiscHelper.GetTextValue(t, t1, Pullenti.Ner.Core.GetTextAttr.IgnoreArticles);
                    res.Preposition = tt.Term;
                    return(res);
                }
            }
            if (res == null)
            {
                if (tt.Chars.IsLatinLetter && tt.LengthChar == 1)
                {
                }
                else if (tt.Chars.IsAllLower || (tt.LengthChar < 2))
                {
                    if (!tt.Chars.IsLatinLetter || prev == null || !prev.Chars.IsLatinLetter)
                    {
                        return(null);
                    }
                }
                if (tt.Chars.IsCyrillicLetter)
                {
                    Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary();
                    if (mc.IsVerb || mc.IsAdverb)
                    {
                        return(null);
                    }
                }
                else if (tt.Chars.IsLatinLetter && !tt.IsWhitespaceAfter)
                {
                    if (!tt.IsWhitespaceAfter && (tt.LengthChar < 5))
                    {
                        if (tt.Next is Pullenti.Ner.NumberToken)
                        {
                            return(null);
                        }
                    }
                }
                res = new OrgItemNameToken(tt, tt)
                {
                    Value = tt.Term, Morph = tt.Morph
                };
                for (t = tt.Next; t != null; t = t.Next)
                {
                    if ((((t.IsHiphen || t.IsCharOf("\\/"))) && t.Next != null && (t.Next is Pullenti.Ner.TextToken)) && !t.IsWhitespaceBefore && !t.IsWhitespaceAfter)
                    {
                        t            = t.Next;
                        res.EndToken = t;
                        res.Value    = string.Format("{0}{1}{2}", res.Value, (t.Previous.IsChar('.') ? '.' : '-'), (t as Pullenti.Ner.TextToken).Term);
                    }
                    else if (t.IsChar('.'))
                    {
                        if (!t.IsWhitespaceAfter && !t.IsWhitespaceBefore && (t.Next is Pullenti.Ner.TextToken))
                        {
                            res.EndToken = t.Next;
                            t            = t.Next;
                            res.Value    = string.Format("{0}.{1}", res.Value, (t as Pullenti.Ner.TextToken).Term);
                        }
                        else if ((t.Next != null && !t.IsNewlineAfter && t.Next.Chars.IsLatinLetter) && tt.Chars.IsLatinLetter)
                        {
                            res.EndToken = t;
                        }
                        else
                        {
                            break;
                        }
                    }
                    else
                    {
                        break;
                    }
                }
            }
            for (Pullenti.Ner.Token t0 = res.BeginToken; t0 != null; t0 = t0.Next)
            {
                if ((((tt = t0 as Pullenti.Ner.TextToken))) != null && tt.IsLetters)
                {
                    if (!tt.Morph.Class.IsConjunction && !tt.Morph.Class.IsPreposition)
                    {
                        foreach (Pullenti.Morph.MorphBaseInfo mf in tt.Morph.Items)
                        {
                            if ((mf as Pullenti.Morph.MorphWordForm).IsInDictionary)
                            {
                                res.IsInDictionary = true;
                            }
                        }
                    }
                }
                if (t0 == res.EndToken)
                {
                    break;
                }
            }
            if (res.BeginToken == res.EndToken && res.BeginToken.Chars.IsAllUpper)
            {
                if (res.EndToken.Next != null && !res.EndToken.IsWhitespaceAfter)
                {
                    Pullenti.Ner.Token t1 = res.EndToken.Next;
                    if (t1.Next != null && !t1.IsWhitespaceAfter && t1.IsHiphen)
                    {
                        t1 = t1.Next;
                    }
                    if (t1 is Pullenti.Ner.NumberToken)
                    {
                        res.Value   += (t1 as Pullenti.Ner.NumberToken).Value;
                        res.EndToken = t1;
                    }
                }
            }
            if (res.BeginToken == res.EndToken && res.BeginToken.Chars.IsLastLower)
            {
                string src = res.BeginToken.GetSourceText();
                for (int i = src.Length - 1; i >= 0; i--)
                {
                    if (char.IsUpper(src[i]))
                    {
                        res.Value = src.Substring(0, i + 1);
                        break;
                    }
                }
            }
            return(res);
        }
예제 #12
0
 public static AdverbToken TryParse(Pullenti.Ner.Token t)
 {
     if (t == null)
     {
         return(null);
     }
     if ((t is Pullenti.Ner.TextToken) && (t as Pullenti.Ner.TextToken).Term == "НЕ")
     {
         AdverbToken nn = TryParse(t.Next);
         if (nn != null)
         {
             nn.Not        = true;
             nn.BeginToken = t;
             return(nn);
         }
     }
     Pullenti.Ner.Token t0 = t;
     Pullenti.Ner.Token t1;
     if (t.Next != null && t.Morph.Class.IsPreposition)
     {
         t = t.Next;
     }
     if (t.IsValue("ДРУГ", null) || t.IsValue("САМ", null))
     {
         t1 = t.Next;
         if (t1 != null && t1.Morph.Class.IsPreposition)
         {
             t1 = t1.Next;
         }
         if (t1 != null)
         {
             if (t1.IsValue("ДРУГ", null) && t.IsValue("ДРУГ", null))
             {
                 return new AdverbToken(t0, t1)
                        {
                            Typ = Pullenti.Semantic.SemAttributeType.EachOther
                        }
             }
             ;
             if (t1.IsValue("СЕБЯ", null) && t.IsValue("САМ", null))
             {
                 return new AdverbToken(t0, t1)
                        {
                            Typ = Pullenti.Semantic.SemAttributeType.Himelf
                        }
             }
             ;
         }
     }
     Pullenti.Ner.Core.TerminToken tok = m_Termins.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
     if (tok != null)
     {
         AdverbToken res = new AdverbToken(t0, tok.EndToken)
         {
             Typ = (Pullenti.Semantic.SemAttributeType)tok.Termin.Tag
         };
         t = res.EndToken.Next;
         if (t != null && t.IsComma)
         {
             t = t.Next;
         }
         if (res.Typ == Pullenti.Semantic.SemAttributeType.Less || res.Typ == Pullenti.Semantic.SemAttributeType.Great)
         {
             if (t != null && t.IsValue("ЧЕМ", null))
             {
                 res.EndToken = t;
             }
         }
         return(res);
     }
     Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
     if (mc.IsAdverb)
     {
         return(new AdverbToken(t, t));
     }
     if (t.IsValue("ВСТРЕЧА", null) && t.Previous != null && t.Previous.IsValue("НА", null))
     {
         AdverbToken ne = TryParse(t.Next);
         if (ne != null && ne.Typ == Pullenti.Semantic.SemAttributeType.EachOther)
         {
             return(new AdverbToken(t.Previous, t));
         }
     }
     return(null);
 }
예제 #13
0
        /// <summary>
        /// Попробовать восстановить последовательность, обрамляемую кавычками или скобками. Поддерживается
        /// вложенность, возможность отсутствия закрывающего элемента и др.
        /// </summary>
        /// <param name="t">начальный токен</param>
        /// <param name="attrs">параметры выделения</param>
        /// <param name="maxTokens">максимально токенов (вдруг забыли закрывающую кавычку)</param>
        /// <return>метатокен BracketSequenceToken</return>
        public static BracketSequenceToken TryParse(Pullenti.Ner.Token t, BracketParseAttr attrs = BracketParseAttr.No, int maxTokens = 100)
        {
            Pullenti.Ner.Token t0 = t;
            int cou = 0;

            if (!CanBeStartOfSequence(t0, false, false))
            {
                return(null);
            }
            List <Bracket> brList = new List <Bracket>();

            brList.Add(new Bracket(t0));
            cou = 0;
            int crlf = 0;

            Pullenti.Ner.Token last = null;
            int  lev     = 1;
            bool isAssim = brList[0].Char != '«' && m_AssymOPenChars.IndexOf(brList[0].Char) >= 0;
            bool genCase = false;

            for (t = t0.Next; t != null; t = t.Next)
            {
                if (t.IsTableControlChar)
                {
                    break;
                }
                last = t;
                if (t.IsCharOf(m_OpenChars) || t.IsCharOf(m_CloseChars))
                {
                    if (t.IsNewlineBefore && ((attrs & BracketParseAttr.CanBeManyLines)) == BracketParseAttr.No)
                    {
                        if (t.WhitespacesBeforeCount > 10 || CanBeStartOfSequence(t, false, false))
                        {
                            if (t.IsChar('(') && !t0.IsChar('('))
                            {
                            }
                            else
                            {
                                last = t.Previous;
                                break;
                            }
                        }
                    }
                    Bracket bb = new Bracket(t);
                    brList.Add(bb);
                    if (brList.Count > 20)
                    {
                        break;
                    }
                    if ((brList.Count == 3 && brList[1].CanBeOpen && bb.CanBeClose) && MustBeCloseChar(bb.Char, brList[1].Char) && MustBeCloseChar(bb.Char, brList[0].Char))
                    {
                        bool ok = false;
                        for (Pullenti.Ner.Token tt = t.Next; tt != null; tt = tt.Next)
                        {
                            if (tt.IsNewlineBefore)
                            {
                                break;
                            }
                            if (tt.IsChar(','))
                            {
                                break;
                            }
                            if (tt.IsChar('.'))
                            {
                                for (tt = tt.Next; tt != null; tt = tt.Next)
                                {
                                    if (tt.IsNewlineBefore)
                                    {
                                        break;
                                    }
                                    else if (tt.IsCharOf(m_OpenChars) || tt.IsCharOf(m_CloseChars))
                                    {
                                        Bracket bb2 = new Bracket(tt);
                                        if (BracketHelper.CanBeEndOfSequence(tt, false, null, false) && CanBeCloseChar(bb2.Char, brList[0].Char))
                                        {
                                            ok = true;
                                        }
                                        break;
                                    }
                                }
                                break;
                            }
                            if (t.IsCharOf(m_OpenChars) || t.IsCharOf(m_CloseChars))
                            {
                                ok = true;
                                break;
                            }
                        }
                        if (!ok)
                        {
                            break;
                        }
                    }
                    if (isAssim)
                    {
                        if (bb.CanBeOpen && !bb.CanBeClose && bb.Char == brList[0].Char)
                        {
                            lev++;
                        }
                        else if (bb.CanBeClose && !bb.CanBeOpen && m_OpenChars.IndexOf(brList[0].Char) == m_CloseChars.IndexOf(bb.Char))
                        {
                            lev--;
                            if (lev == 0)
                            {
                                break;
                            }
                        }
                    }
                }
                else
                {
                    if ((++cou) > maxTokens)
                    {
                        break;
                    }
                    if (((attrs & BracketParseAttr.CanContainsVerbs)) == BracketParseAttr.No)
                    {
                        if (t.Morph.Language.IsCyrillic)
                        {
                            if (t.GetMorphClassInDictionary() == Pullenti.Morph.MorphClass.Verb)
                            {
                                if (!t.Morph.Class.IsAdjective && !t.Morph.ContainsAttr("страд.з.", null))
                                {
                                    if (t.Chars.IsAllLower)
                                    {
                                        string norm = t.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false);
                                        if (!Pullenti.Morph.LanguageHelper.EndsWith(norm, "СЯ"))
                                        {
                                            if (brList.Count > 1)
                                            {
                                                break;
                                            }
                                            if (brList[0].Char != '(')
                                            {
                                                break;
                                            }
                                        }
                                    }
                                }
                            }
                        }
                        else if (t.Morph.Language.IsEn)
                        {
                            if (t.Morph.Class == Pullenti.Morph.MorphClass.Verb && t.Chars.IsAllLower)
                            {
                                break;
                            }
                        }
                        Pullenti.Ner.Referent r = t.GetReferent();
                        if (r != null && r.TypeName == "ADDRESS")
                        {
                            if (!t0.IsChar('('))
                            {
                                break;
                            }
                        }
                    }
                }
                if (((attrs & BracketParseAttr.CanBeManyLines)) != BracketParseAttr.No)
                {
                    if (t.IsNewlineBefore)
                    {
                        if (t.NewlinesBeforeCount > 1)
                        {
                            break;
                        }
                        crlf++;
                    }
                    continue;
                }
                if (t.IsNewlineBefore)
                {
                    if (t.WhitespacesBeforeCount > 15)
                    {
                        last = t.Previous;
                        break;
                    }
                    crlf++;
                    if (!t.Chars.IsAllLower)
                    {
                        if (MiscHelper.CanBeStartOfSentence(t))
                        {
                            bool has = false;
                            for (Pullenti.Ner.Token tt = t.Next; tt != null; tt = tt.Next)
                            {
                                if (tt.IsNewlineBefore)
                                {
                                    break;
                                }
                                else if (tt.LengthChar == 1 && tt.IsCharOf(m_OpenChars) && tt.IsWhitespaceBefore)
                                {
                                    break;
                                }
                                else if (tt.LengthChar == 1 && tt.IsCharOf(m_CloseChars) && !tt.IsWhitespaceBefore)
                                {
                                    has = true;
                                    break;
                                }
                            }
                            if (!has)
                            {
                                last = t.Previous;
                                break;
                            }
                        }
                    }
                    if ((t.Previous is Pullenti.Ner.MetaToken) && CanBeEndOfSequence((t.Previous as Pullenti.Ner.MetaToken).EndToken, false, null, false))
                    {
                        last = t.Previous;
                        break;
                    }
                }
                if (crlf > 1)
                {
                    if (brList.Count > 1)
                    {
                        break;
                    }
                    if (crlf > 10)
                    {
                        break;
                    }
                }
                if (t.IsChar(';') && t.IsNewlineAfter)
                {
                    break;
                }
                NounPhraseToken npt = NounPhraseHelper.TryParse(t, NounPhraseParseAttr.No, 0, null);
                if (npt != null)
                {
                    if (t.IsNewlineBefore)
                    {
                        genCase = npt.Morph.Case.IsGenitive;
                    }
                    last = (t = npt.EndToken);
                }
            }
            if ((brList.Count == 1 && brList[0].CanBeOpen && (last is Pullenti.Ner.MetaToken)) && last.IsNewlineAfter)
            {
                if (BracketHelper.CanBeEndOfSequence((last as Pullenti.Ner.MetaToken).EndToken, false, null, false))
                {
                    return(new BracketSequenceToken(t0, last));
                }
            }
            if ((brList.Count == 1 && brList[0].CanBeOpen && genCase) && last.IsNewlineAfter && crlf <= 2)
            {
                return(new BracketSequenceToken(t0, last));
            }
            if (brList.Count < 1)
            {
                return(null);
            }
            for (int i = 1; i < (brList.Count - 1); i++)
            {
                if (brList[i].Char == '<' && brList[i + 1].Char == '>')
                {
                    brList[i].CanBeOpen      = true;
                    brList[i + 1].CanBeClose = true;
                }
            }
            List <BracketSequenceToken> internals = null;

            while (brList.Count > 3)
            {
                int i = brList.Count - 1;
                if ((brList[i].CanBeClose && brList[i - 1].CanBeOpen && !CanBeCloseChar(brList[i].Char, brList[0].Char)) && CanBeCloseChar(brList[i].Char, brList[i - 1].Char))
                {
                    brList.RemoveRange(brList.Count - 2, 2);
                    continue;
                }
                break;
            }
            while (brList.Count >= 4)
            {
                bool changed = false;
                for (int i = 1; i < (brList.Count - 2); i++)
                {
                    if ((brList[i].CanBeOpen && !brList[i].CanBeClose && brList[i + 1].CanBeClose) && !brList[i + 1].CanBeOpen)
                    {
                        bool ok = false;
                        if (MustBeCloseChar(brList[i + 1].Char, brList[i].Char) || brList[i].Char != brList[0].Char)
                        {
                            ok = true;
                            if ((i == 1 && ((i + 2) < brList.Count) && brList[i + 2].Char == ')') && brList[i + 1].Char != ')' && CanBeCloseChar(brList[i + 1].Char, brList[i - 1].Char))
                            {
                                brList[i + 2] = brList[i + 1];
                            }
                        }
                        else if (i > 1 && ((i + 2) < brList.Count) && MustBeCloseChar(brList[i + 2].Char, brList[i - 1].Char))
                        {
                            ok = true;
                        }
                        if (ok)
                        {
                            if (internals == null)
                            {
                                internals = new List <BracketSequenceToken>();
                            }
                            internals.Add(new BracketSequenceToken(brList[i].Source, brList[i + 1].Source));
                            brList.RemoveRange(i, 2);
                            changed = true;
                            break;
                        }
                    }
                }
                if (!changed)
                {
                    break;
                }
            }
            BracketSequenceToken res = null;

            if ((brList.Count >= 4 && brList[1].CanBeOpen && brList[2].CanBeClose) && brList[3].CanBeClose && !brList[3].CanBeOpen)
            {
                if (CanBeCloseChar(brList[3].Char, brList[0].Char))
                {
                    res = new BracketSequenceToken(brList[0].Source, brList[3].Source);
                    if (brList[0].Source.Next != brList[1].Source || brList[2].Source.Next != brList[3].Source)
                    {
                        res.Internal.Add(new BracketSequenceToken(brList[1].Source, brList[2].Source));
                    }
                    if (internals != null)
                    {
                        res.Internal.AddRange(internals);
                    }
                }
            }
            if ((res == null && brList.Count >= 3 && brList[2].CanBeClose) && !brList[2].CanBeOpen)
            {
                if (((attrs & BracketParseAttr.NearCloseBracket)) != BracketParseAttr.No)
                {
                    if (CanBeCloseChar(brList[1].Char, brList[0].Char))
                    {
                        return(new BracketSequenceToken(brList[0].Source, brList[1].Source));
                    }
                }
                bool ok = true;
                if (CanBeCloseChar(brList[2].Char, brList[0].Char) && CanBeCloseChar(brList[1].Char, brList[0].Char) && brList[1].CanBeClose)
                {
                    for (t = brList[1].Source; t != brList[2].Source && t != null; t = t.Next)
                    {
                        if (t.IsNewlineBefore)
                        {
                            ok = false;
                            break;
                        }
                        if (t.Chars.IsLetter && t.Chars.IsAllLower)
                        {
                            ok = false;
                            break;
                        }
                        NounPhraseToken npt = NounPhraseHelper.TryParse(t, NounPhraseParseAttr.No, 0, null);
                        if (npt != null)
                        {
                            t = npt.EndToken;
                        }
                    }
                    if (ok)
                    {
                        for (t = brList[0].Source.Next; t != brList[1].Source && t != null; t = t.Next)
                        {
                            if (t.IsNewlineBefore)
                            {
                                return(new BracketSequenceToken(brList[0].Source, t.Previous));
                            }
                        }
                    }
                    int lev1 = 0;
                    for (Pullenti.Ner.Token tt = brList[0].Source.Previous; tt != null; tt = tt.Previous)
                    {
                        if (tt.IsNewlineAfter || tt.IsTableControlChar)
                        {
                            break;
                        }
                        if (!(tt is Pullenti.Ner.TextToken))
                        {
                            continue;
                        }
                        if (tt.Chars.IsLetter || tt.LengthChar > 1)
                        {
                            continue;
                        }
                        char ch = (tt as Pullenti.Ner.TextToken).Term[0];
                        if (CanBeCloseChar(ch, brList[0].Char))
                        {
                            lev1++;
                        }
                        else if (CanBeCloseChar(brList[1].Char, ch))
                        {
                            lev1--;
                            if (lev1 < 0)
                            {
                                return(new BracketSequenceToken(brList[0].Source, brList[1].Source));
                            }
                        }
                    }
                }
                if (ok && CanBeCloseChar(brList[2].Char, brList[0].Char))
                {
                    BracketSequenceToken intern = new BracketSequenceToken(brList[1].Source, brList[2].Source);
                    res = new BracketSequenceToken(brList[0].Source, brList[2].Source);
                    res.Internal.Add(intern);
                }
                else if (ok && CanBeCloseChar(brList[2].Char, brList[1].Char) && brList[0].CanBeOpen)
                {
                    if (CanBeCloseChar(brList[2].Char, brList[0].Char))
                    {
                        BracketSequenceToken intern = new BracketSequenceToken(brList[1].Source, brList[2].Source);
                        res = new BracketSequenceToken(brList[0].Source, brList[2].Source);
                        res.Internal.Add(intern);
                    }
                    else if (brList.Count == 3)
                    {
                        return(null);
                    }
                }
            }
            if (res == null && brList.Count > 1 && brList[1].CanBeClose)
            {
                res = new BracketSequenceToken(brList[0].Source, brList[1].Source);
            }
            if (res == null && brList.Count > 1 && CanBeCloseChar(brList[1].Char, brList[0].Char))
            {
                res = new BracketSequenceToken(brList[0].Source, brList[1].Source);
            }
            if (res == null && brList.Count == 2 && brList[0].Char == brList[1].Char)
            {
                res = new BracketSequenceToken(brList[0].Source, brList[1].Source);
            }
            if (res != null && internals != null)
            {
                foreach (BracketSequenceToken i in internals)
                {
                    if (i.BeginChar < res.EndChar)
                    {
                        res.Internal.Add(i);
                    }
                }
            }
            if (res == null)
            {
                cou = 0;
                for (Pullenti.Ner.Token tt = t0.Next; tt != null; tt = tt.Next, cou++)
                {
                    if (tt.IsTableControlChar)
                    {
                        break;
                    }
                    if (MiscHelper.CanBeStartOfSentence(tt))
                    {
                        break;
                    }
                    if (maxTokens > 0 && cou > maxTokens)
                    {
                        break;
                    }
                    Pullenti.Ner.MetaToken mt = tt as Pullenti.Ner.MetaToken;
                    if (mt == null)
                    {
                        continue;
                    }
                    if (mt.EndToken is Pullenti.Ner.TextToken)
                    {
                        if ((mt.EndToken as Pullenti.Ner.TextToken).IsCharOf(m_CloseChars))
                        {
                            Bracket bb = new Bracket(mt.EndToken as Pullenti.Ner.TextToken);
                            if (bb.CanBeClose && CanBeCloseChar(bb.Char, brList[0].Char))
                            {
                                return(new BracketSequenceToken(t0, tt));
                            }
                        }
                    }
                }
            }
            return(res);
        }
예제 #14
0
        static string GetNameWithoutBrackets(Pullenti.Ner.Token begin, Pullenti.Ner.Token end, bool normalizeFirstNounGroup = false, bool normalFirstGroupSingle = false, bool ignoreGeoReferent = false)
        {
            string res = null;

            if (BracketHelper.CanBeStartOfSequence(begin, false, false) && BracketHelper.CanBeEndOfSequence(end, false, begin, false))
            {
                begin = begin.Next;
                end   = end.Previous;
            }
            if (normalizeFirstNounGroup && !begin.Morph.Class.IsPreposition)
            {
                NounPhraseToken npt = NounPhraseHelper.TryParse(begin, NounPhraseParseAttr.ReferentCanBeNoun, 0, null);
                if (npt != null)
                {
                    if (npt.Noun.GetMorphClassInDictionary().IsUndefined&& npt.Adjectives.Count == 0)
                    {
                        npt = null;
                    }
                }
                if (npt != null && npt.EndToken.EndChar > end.EndChar)
                {
                    npt = null;
                }
                if (npt != null)
                {
                    res = npt.GetNormalCaseText(null, (normalFirstGroupSingle ? Pullenti.Morph.MorphNumber.Singular : Pullenti.Morph.MorphNumber.Undefined), Pullenti.Morph.MorphGender.Undefined, false);
                    Pullenti.Ner.Token te = npt.EndToken.Next;
                    if (((te != null && te.Next != null && te.IsComma) && (te.Next is Pullenti.Ner.TextToken) && te.Next.EndChar <= end.EndChar) && te.Next.Morph.Class.IsVerb && te.Next.Morph.Class.IsAdjective)
                    {
                        foreach (Pullenti.Morph.MorphBaseInfo it in te.Next.Morph.Items)
                        {
                            if (it.Gender == npt.Morph.Gender || ((it.Gender & npt.Morph.Gender)) != Pullenti.Morph.MorphGender.Undefined)
                            {
                                if (!((it.Case & npt.Morph.Case)).IsUndefined)
                                {
                                    if (it.Number == npt.Morph.Number || ((it.Number & npt.Morph.Number)) != Pullenti.Morph.MorphNumber.Undefined)
                                    {
                                        string var = (te.Next as Pullenti.Ner.TextToken).Term;
                                        if (it is Pullenti.Morph.MorphWordForm)
                                        {
                                            var = (it as Pullenti.Morph.MorphWordForm).NormalCase;
                                        }
                                        Pullenti.Morph.MorphBaseInfo bi = new Pullenti.Morph.MorphBaseInfo()
                                        {
                                            Class = Pullenti.Morph.MorphClass.Adjective, Gender = npt.Morph.Gender, Number = npt.Morph.Number, Language = npt.Morph.Language
                                        };
                                        var = Pullenti.Morph.MorphologyService.GetWordform(var, bi);
                                        if (var != null)
                                        {
                                            res = string.Format("{0}, {1}", res, var);
                                            te  = te.Next.Next;
                                        }
                                        break;
                                    }
                                }
                            }
                        }
                    }
                    if (te != null && te.EndChar <= end.EndChar)
                    {
                        string s = GetNameEx(te, end, Pullenti.Morph.MorphClass.Undefined, Pullenti.Morph.MorphCase.Undefined, Pullenti.Morph.MorphGender.Undefined, true, ignoreGeoReferent);
                        if (!string.IsNullOrEmpty(s))
                        {
                            if (!char.IsLetterOrDigit(s[0]))
                            {
                                res = string.Format("{0}{1}", res, s);
                            }
                            else
                            {
                                res = string.Format("{0} {1}", res, s);
                            }
                        }
                    }
                }
                else if ((begin is Pullenti.Ner.TextToken) && begin.Chars.IsCyrillicLetter)
                {
                    Pullenti.Morph.MorphClass mm = begin.GetMorphClassInDictionary();
                    if (!mm.IsUndefined)
                    {
                        res = begin.GetNormalCaseText(mm, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false);
                        if (begin.EndChar < end.EndChar)
                        {
                            res = string.Format("{0} {1}", res, GetNameEx(begin.Next, end, Pullenti.Morph.MorphClass.Undefined, Pullenti.Morph.MorphCase.Undefined, Pullenti.Morph.MorphGender.Undefined, true, false));
                        }
                    }
                }
            }
            if (res == null)
            {
                res = GetNameEx(begin, end, Pullenti.Morph.MorphClass.Undefined, Pullenti.Morph.MorphCase.Undefined, Pullenti.Morph.MorphGender.Undefined, true, ignoreGeoReferent);
            }
            if (!string.IsNullOrEmpty(res))
            {
                int k = 0;
                for (int i = res.Length - 1; i >= 0; i--, k++)
                {
                    if (res[i] == '*' || char.IsWhiteSpace(res[i]))
                    {
                    }
                    else
                    {
                        break;
                    }
                }
                if (k > 0)
                {
                    if (k == res.Length)
                    {
                        return(null);
                    }
                    res = res.Substring(0, res.Length - k);
                }
            }
            return(res);
        }
예제 #15
0
 void CorrectWordsByMorph(Pullenti.Morph.MorphLang lang)
 {
     for (Pullenti.Ner.Token tt = FirstToken; tt != null; tt = tt.Next)
     {
         if (!(tt is Pullenti.Ner.TextToken))
         {
             continue;
         }
         if (tt.Morph.ContainsAttr("прдктв.", null))
         {
             continue;
         }
         Pullenti.Morph.MorphClass dd = tt.GetMorphClassInDictionary();
         if (!dd.IsUndefined || (tt.LengthChar < 4))
         {
             continue;
         }
         if (tt.Morph.Class.IsProperSurname && !tt.Chars.IsAllLower)
         {
             continue;
         }
         if (tt.Chars.IsAllUpper)
         {
             continue;
         }
         string corw = Pullenti.Morph.MorphologyService.CorrectWord((tt as Pullenti.Ner.TextToken).Term, (tt.Morph.Language.IsUndefined ? lang : tt.Morph.Language));
         if (corw == null)
         {
             continue;
         }
         List <Pullenti.Morph.MorphToken> ccc = Pullenti.Morph.MorphologyService.Process(corw, lang, null);
         if (ccc == null || ccc.Count != 1)
         {
             continue;
         }
         Pullenti.Ner.TextToken tt1 = new Pullenti.Ner.TextToken(ccc[0], this, tt.BeginChar, tt.EndChar)
         {
             Chars = tt.Chars, Term0 = (tt as Pullenti.Ner.TextToken).Term
         };
         Pullenti.Morph.MorphClass mc = tt1.GetMorphClassInDictionary();
         if (mc.IsProperSurname)
         {
             continue;
         }
         if (tt == FirstToken)
         {
             FirstToken = tt1;
         }
         else
         {
             tt.Previous.Next = tt1;
         }
         tt1.Next = tt.Next;
         tt       = tt1;
         if (CorrectedTokens == null)
         {
             CorrectedTokens = new Dictionary <Pullenti.Ner.Token, string>();
         }
         CorrectedTokens.Add(tt, tt.GetSourceText());
     }
 }
예제 #16
0
        static WeaponItemToken _TryParse(Pullenti.Ner.Token t, WeaponItemToken prev, bool afterConj, bool attachHigh = false)
        {
            if (t == null)
            {
                return(null);
            }
            if (Pullenti.Ner.Core.BracketHelper.IsBracket(t, true))
            {
                WeaponItemToken wit = _TryParse(t.Next, prev, afterConj, attachHigh);
                if (wit != null)
                {
                    if (wit.EndToken.Next == null)
                    {
                        wit.BeginToken = t;
                        return(wit);
                    }
                    if (Pullenti.Ner.Core.BracketHelper.IsBracket(wit.EndToken.Next, true))
                    {
                        wit.BeginToken = t;
                        wit.EndToken   = wit.EndToken.Next;
                        return(wit);
                    }
                }
            }
            Pullenti.Ner.Core.TerminToken tok = m_Ontology.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
            if (tok != null)
            {
                WeaponItemToken res = new WeaponItemToken(t, tok.EndToken);
                res.Typ = (Typs)tok.Termin.Tag;
                if (res.Typ == Typs.Noun)
                {
                    res.Value = tok.Termin.CanonicText;
                    if (tok.Termin.Tag2 != null)
                    {
                        res.IsDoubt = true;
                    }
                    for (Pullenti.Ner.Token tt = res.EndToken.Next; tt != null; tt = tt.Next)
                    {
                        if (tt.WhitespacesBeforeCount > 2)
                        {
                            break;
                        }
                        WeaponItemToken wit = _TryParse(tt, null, false, false);
                        if (wit != null)
                        {
                            if (wit.Typ == Typs.Brand)
                            {
                                res.InnerTokens.Add(wit);
                                res.EndToken = (tt = wit.EndToken);
                                continue;
                            }
                            break;
                        }
                        if (!(tt is Pullenti.Ner.TextToken))
                        {
                            break;
                        }
                        Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary();
                        if (mc == Pullenti.Morph.MorphClass.Adjective)
                        {
                            if (res.AltValue == null)
                            {
                                res.AltValue = res.Value;
                            }
                            if (res.AltValue.EndsWith(res.Value))
                            {
                                res.AltValue = res.AltValue.Substring(0, res.AltValue.Length - res.Value.Length);
                            }
                            res.AltValue = string.Format("{0}{1} {2}", res.AltValue, (tt as Pullenti.Ner.TextToken).Term, res.Value);
                            res.EndToken = tt;
                            continue;
                        }
                        break;
                    }
                    return(res);
                }
                if (res.Typ == Typs.Brand || res.Typ == Typs.Name)
                {
                    res.Value = tok.Termin.CanonicText;
                    return(res);
                }
                if (res.Typ == Typs.Model)
                {
                    res.Value = tok.Termin.CanonicText;
                    if (tok.Termin.Tag2 is List <Pullenti.Ner.Core.Termin> )
                    {
                        List <Pullenti.Ner.Core.Termin> li = tok.Termin.Tag2 as List <Pullenti.Ner.Core.Termin>;
                        foreach (Pullenti.Ner.Core.Termin to in li)
                        {
                            WeaponItemToken wit = new WeaponItemToken(t, tok.EndToken)
                            {
                                Typ = (Typs)to.Tag, Value = to.CanonicText, IsInternal = tok.BeginToken == tok.EndToken
                            };
                            res.InnerTokens.Add(wit);
                            if (to.AdditionalVars != null && to.AdditionalVars.Count > 0)
                            {
                                wit.AltValue = to.AdditionalVars[0].CanonicText;
                            }
                        }
                    }
                    res._correctModel();
                    return(res);
                }
            }
            Pullenti.Ner.Token nnn = Pullenti.Ner.Core.MiscHelper.CheckNumberPrefix(t);
            if (nnn != null)
            {
                Pullenti.Ner.Transport.Internal.TransItemToken tit = Pullenti.Ner.Transport.Internal.TransItemToken._attachNumber(nnn, true);
                if (tit != null)
                {
                    WeaponItemToken res = new WeaponItemToken(t, tit.EndToken)
                    {
                        Typ = Typs.Number
                    };
                    res.Value    = tit.Value;
                    res.AltValue = tit.AltValue;
                    return(res);
                }
            }
            if (((t is Pullenti.Ner.TextToken) && t.Chars.IsLetter && t.Chars.IsAllUpper) && (t.LengthChar < 4))
            {
                if ((t.Next != null && ((t.Next.IsHiphen || t.Next.IsChar('.'))) && (t.Next.WhitespacesAfterCount < 2)) && (t.Next.Next is Pullenti.Ner.NumberToken))
                {
                    WeaponItemToken res = new WeaponItemToken(t, t.Next)
                    {
                        Typ = Typs.Model, IsDoubt = true
                    };
                    res.Value = (t as Pullenti.Ner.TextToken).Term;
                    res._correctModel();
                    return(res);
                }
                if ((t.Next is Pullenti.Ner.NumberToken) && !t.IsWhitespaceAfter)
                {
                    WeaponItemToken res = new WeaponItemToken(t, t)
                    {
                        Typ = Typs.Model, IsDoubt = true
                    };
                    res.Value = (t as Pullenti.Ner.TextToken).Term;
                    res._correctModel();
                    return(res);
                }
                if ((t as Pullenti.Ner.TextToken).Term == "СП" && (t.WhitespacesAfterCount < 3) && (t.Next is Pullenti.Ner.TextToken))
                {
                    WeaponItemToken pp = _TryParse(t.Next, null, false, false);
                    if (pp != null && ((pp.Typ == Typs.Model || pp.Typ == Typs.Brand)))
                    {
                        WeaponItemToken res = new WeaponItemToken(t, t)
                        {
                            Typ = Typs.Noun
                        };
                        res.Value    = "ПИСТОЛЕТ";
                        res.AltValue = "СЛУЖЕБНЫЙ ПИСТОЛЕТ";
                        return(res);
                    }
                }
            }
            if (((t is Pullenti.Ner.TextToken) && t.Chars.IsLetter && !t.Chars.IsAllLower) && t.LengthChar > 2)
            {
                bool ok = false;
                if (prev != null && ((prev.Typ == Typs.Noun || prev.Typ == Typs.Model || prev.Typ == Typs.Brand)))
                {
                    ok = true;
                }
                else if (prev == null && t.Previous != null && t.Previous.IsCommaAnd)
                {
                    ok = true;
                }
                if (ok)
                {
                    WeaponItemToken res = new WeaponItemToken(t, t)
                    {
                        Typ = Typs.Name, IsDoubt = true
                    };
                    res.Value = (t as Pullenti.Ner.TextToken).Term;
                    if ((t.Next != null && t.Next.IsHiphen && (t.Next.Next is Pullenti.Ner.TextToken)) && t.Next.Next.Chars == t.Chars)
                    {
                        res.Value    = string.Format("{0}-{1}", res.Value, (t.Next.Next as Pullenti.Ner.TextToken).Term);
                        res.EndToken = t.Next.Next;
                    }
                    if (prev != null && prev.Typ == Typs.Noun)
                    {
                        res.Typ = Typs.Brand;
                    }
                    if (res.EndToken.Next != null && res.EndToken.Next.IsHiphen && (res.EndToken.Next.Next is Pullenti.Ner.NumberToken))
                    {
                        res.Typ = Typs.Model;
                        res._correctModel();
                    }
                    else if (!res.EndToken.IsWhitespaceAfter && (res.EndToken.Next is Pullenti.Ner.NumberToken))
                    {
                        res.Typ = Typs.Model;
                        res._correctModel();
                    }
                    return(res);
                }
            }
            if (t.IsValue("МАРКА", null))
            {
                WeaponItemToken res = _TryParse(t.Next, prev, afterConj, false);
                if (res != null && res.Typ == Typs.Brand)
                {
                    res.BeginToken = t;
                    return(res);
                }
                if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(t.Next, true, false))
                {
                    Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t.Next, Pullenti.Ner.Core.BracketParseAttr.No, 100);
                    if (br != null)
                    {
                        return new WeaponItemToken(t, br.EndToken)
                               {
                                   Typ = Typs.Brand, Value = Pullenti.Ner.Core.MiscHelper.GetTextValue(br.BeginToken, br.EndToken, Pullenti.Ner.Core.GetTextAttr.No)
                               }
                    }
                    ;
                }
                if (((t is Pullenti.Ner.TextToken) && (t.Next is Pullenti.Ner.TextToken) && t.Next.LengthChar > 1) && !t.Next.Chars.IsAllLower)
                {
                    return new WeaponItemToken(t, t.Next)
                           {
                               Typ = Typs.Brand, Value = (t as Pullenti.Ner.TextToken).Term
                           }
                }
                ;
            }
            if (t.IsValue("КАЛИБР", "КАЛІБР"))
            {
                Pullenti.Ner.Token tt1 = t.Next;
                if (tt1 != null && ((tt1.IsHiphen || tt1.IsChar(':'))))
                {
                    tt1 = tt1.Next;
                }
                Pullenti.Ner.Measure.Internal.NumbersWithUnitToken num = Pullenti.Ner.Measure.Internal.NumbersWithUnitToken.TryParse(tt1, null, false, false, false, false);
                if (num != null && num.SingleVal != null)
                {
                    return new WeaponItemToken(t, num.EndToken)
                           {
                               Typ = Typs.Caliber, Value = Pullenti.Ner.Core.NumberHelper.DoubleToString(num.SingleVal.Value)
                           }
                }
                ;
            }
            if (t is Pullenti.Ner.NumberToken)
            {
                Pullenti.Ner.Measure.Internal.NumbersWithUnitToken num = Pullenti.Ner.Measure.Internal.NumbersWithUnitToken.TryParse(t, null, false, false, false, false);
                if (num != null && num.SingleVal != null)
                {
                    if (num.Units.Count == 1 && num.Units[0].Unit != null && num.Units[0].Unit.NameCyr == "мм")
                    {
                        return new WeaponItemToken(t, num.EndToken)
                               {
                                   Typ = Typs.Caliber, Value = Pullenti.Ner.Core.NumberHelper.DoubleToString(num.SingleVal.Value)
                               }
                    }
                    ;

                    if (num.EndToken.Next != null && num.EndToken.Next.IsValue("КАЛИБР", "КАЛІБР"))
                    {
                        return new WeaponItemToken(t, num.EndToken.Next)
                               {
                                   Typ = Typs.Caliber, Value = Pullenti.Ner.Core.NumberHelper.DoubleToString(num.SingleVal.Value)
                               }
                    }
                    ;
                }
            }
            if (t.IsValue("ПРОИЗВОДСТВО", "ВИРОБНИЦТВО"))
            {
                Pullenti.Ner.Token tt1 = t.Next;
                if (tt1 != null && ((tt1.IsHiphen || tt1.IsChar(':'))))
                {
                    tt1 = tt1.Next;
                }
                if (tt1 is Pullenti.Ner.ReferentToken)
                {
                    if ((tt1.GetReferent() is Pullenti.Ner.Org.OrganizationReferent) || (tt1.GetReferent() is Pullenti.Ner.Geo.GeoReferent))
                    {
                        return new WeaponItemToken(t, tt1)
                               {
                                   Typ = Typs.Developer, Ref = tt1.GetReferent()
                               }
                    }
                    ;
                }
            }
            return(null);
        }

        void _correctModel()
        {
            Pullenti.Ner.Token tt = EndToken.Next;
            if (tt == null || tt.WhitespacesBeforeCount > 2)
            {
                return;
            }
            if (tt.IsValue(":\\/.", null) || tt.IsHiphen)
            {
                tt = tt.Next;
            }
            if (tt is Pullenti.Ner.NumberToken)
            {
                StringBuilder tmp = new StringBuilder();
                tmp.Append((tt as Pullenti.Ner.NumberToken).Value);
                bool isLat = Pullenti.Morph.LanguageHelper.IsLatinChar(Value[0]);
                EndToken = tt;
                for (tt = tt.Next; tt != null; tt = tt.Next)
                {
                    if ((tt is Pullenti.Ner.TextToken) && tt.LengthChar == 1 && tt.Chars.IsLetter)
                    {
                        if (!tt.IsWhitespaceBefore || ((tt.Previous != null && tt.Previous.IsHiphen)))
                        {
                            char ch = (tt as Pullenti.Ner.TextToken).Term[0];
                            EndToken = tt;
                            char ch2 = (char)0;
                            if (Pullenti.Morph.LanguageHelper.IsLatinChar(ch) && !isLat)
                            {
                                ch2 = Pullenti.Morph.LanguageHelper.GetCyrForLat(ch);
                                if (ch2 != ((char)0))
                                {
                                    ch = ch2;
                                }
                            }
                            else if (Pullenti.Morph.LanguageHelper.IsCyrillicChar(ch) && isLat)
                            {
                                ch2 = Pullenti.Morph.LanguageHelper.GetLatForCyr(ch);
                                if (ch2 != ((char)0))
                                {
                                    ch = ch2;
                                }
                            }
                            tmp.Append(ch);
                            continue;
                        }
                    }
                    break;
                }
                Value    = string.Format("{0}-{1}", Value, tmp.ToString());
                AltValue = Pullenti.Ner.Core.MiscHelper.CreateCyrLatAlternative(Value);
            }
            if (!EndToken.IsWhitespaceAfter && EndToken.Next != null && ((EndToken.Next.IsHiphen || EndToken.Next.IsCharOf("\\/"))))
            {
                if (!EndToken.Next.IsWhitespaceAfter && (EndToken.Next.Next is Pullenti.Ner.NumberToken))
                {
                    EndToken = EndToken.Next.Next;
                    Value    = string.Format("{0}-{1}", Value, (EndToken as Pullenti.Ner.NumberToken).Value);
                    if (AltValue != null)
                    {
                        AltValue = string.Format("{0}-{1}", AltValue, (EndToken as Pullenti.Ner.NumberToken).Value);
                    }
                }
            }
        }
예제 #17
0
        public static NounPhraseItem TryParse(Pullenti.Ner.Token t, List <NounPhraseItem> items, Pullenti.Ner.Core.NounPhraseParseAttr attrs)
        {
            if (t == null)
            {
                return(null);
            }
            Pullenti.Ner.Token t0 = t;
            bool _canBeSurname    = false;
            bool _isDoubtAdj      = false;

            Pullenti.Ner.ReferentToken rt = t as Pullenti.Ner.ReferentToken;
            if (rt != null && rt.BeginToken == rt.EndToken && (rt.BeginToken is Pullenti.Ner.TextToken))
            {
                NounPhraseItem res = TryParse(rt.BeginToken, items, attrs);
                if (res != null)
                {
                    res.BeginToken = (res.EndToken = t);
                    res.CanBeNoun  = true;
                    return(res);
                }
            }
            if (rt != null)
            {
                NounPhraseItem res = new NounPhraseItem(t, t);
                foreach (Pullenti.Morph.MorphBaseInfo m in t.Morph.Items)
                {
                    NounPhraseItemTextVar v = new NounPhraseItemTextVar(m, null);
                    v.NormalValue = t.GetReferent().ToString();
                    res.NounMorph.Add(v);
                }
                res.CanBeNoun = true;
                return(res);
            }
            if (t is Pullenti.Ner.NumberToken)
            {
            }
            bool hasLegalVerb = false;

            if (t is Pullenti.Ner.TextToken)
            {
                if (!t.Chars.IsLetter)
                {
                    return(null);
                }
                string str = (t as Pullenti.Ner.TextToken).Term;
                if (str[str.Length - 1] == 'А' || str[str.Length - 1] == 'О')
                {
                    foreach (Pullenti.Morph.MorphBaseInfo wf in t.Morph.Items)
                    {
                        if ((wf is Pullenti.Morph.MorphWordForm) && (wf as Pullenti.Morph.MorphWordForm).IsInDictionary)
                        {
                            if (wf.Class.IsVerb)
                            {
                                Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
                                if (!mc.IsNoun && ((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.IgnoreParticiples)) == Pullenti.Ner.Core.NounPhraseParseAttr.No)
                                {
                                    if (!Pullenti.Morph.LanguageHelper.EndsWithEx(str, "ОГО", "ЕГО", null, null))
                                    {
                                        return(null);
                                    }
                                }
                                hasLegalVerb = true;
                            }
                            if (wf.Class.IsAdverb)
                            {
                                if (t.Next == null || !t.Next.IsHiphen)
                                {
                                    if ((str == "ВСЕГО" || str == "ДОМА" || str == "НЕСКОЛЬКО") || str == "МНОГО" || str == "ПОРЯДКА")
                                    {
                                    }
                                    else
                                    {
                                        return(null);
                                    }
                                }
                            }
                            if (wf.Class.IsAdjective)
                            {
                                if (wf.ContainsAttr("к.ф.", null))
                                {
                                    if (t.GetMorphClassInDictionary() == Pullenti.Morph.MorphClass.Adjective)
                                    {
                                    }
                                    else
                                    {
                                        _isDoubtAdj = true;
                                    }
                                }
                            }
                        }
                    }
                }
                Pullenti.Morph.MorphClass mc0 = t.Morph.Class;
                if (mc0.IsProperSurname && !t.Chars.IsAllLower)
                {
                    foreach (Pullenti.Morph.MorphBaseInfo wf in t.Morph.Items)
                    {
                        if (wf.Class.IsProperSurname && wf.Number != Pullenti.Morph.MorphNumber.Plural)
                        {
                            Pullenti.Morph.MorphWordForm wff = wf as Pullenti.Morph.MorphWordForm;
                            if (wff == null)
                            {
                                continue;
                            }
                            string s = ((wff.NormalFull ?? wff.NormalCase)) ?? "";
                            if (Pullenti.Morph.LanguageHelper.EndsWithEx(s, "ИН", "ЕН", "ЫН", null))
                            {
                                if (!wff.IsInDictionary)
                                {
                                    _canBeSurname = true;
                                }
                                else
                                {
                                    return(null);
                                }
                            }
                            if (wff.IsInDictionary && Pullenti.Morph.LanguageHelper.EndsWith(s, "ОВ"))
                            {
                                _canBeSurname = true;
                            }
                        }
                    }
                }
                if (mc0.IsProperName && !t.Chars.IsAllLower)
                {
                    foreach (Pullenti.Morph.MorphBaseInfo wff in t.Morph.Items)
                    {
                        Pullenti.Morph.MorphWordForm wf = wff as Pullenti.Morph.MorphWordForm;
                        if (wf == null)
                        {
                            continue;
                        }
                        if (wf.NormalCase == "ГОР")
                        {
                            continue;
                        }
                        if (wf.Class.IsProperName && wf.IsInDictionary)
                        {
                            if (wf.NormalCase == null || !wf.NormalCase.StartsWith("ЛЮБ"))
                            {
                                if (mc0.IsAdjective && t.Morph.ContainsAttr("неизм.", null))
                                {
                                }
                                else if (((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.ReferentCanBeNoun)) == Pullenti.Ner.Core.NounPhraseParseAttr.ReferentCanBeNoun)
                                {
                                }
                                else
                                {
                                    if (items == null || (items.Count < 1))
                                    {
                                        return(null);
                                    }
                                    if (!items[0].IsStdAdjective)
                                    {
                                        return(null);
                                    }
                                }
                            }
                        }
                    }
                }
                if (mc0.IsAdjective && t.Morph.ItemsCount == 1)
                {
                    if (t.Morph[0].ContainsAttr("в.ср.ст.", null))
                    {
                        return(null);
                    }
                }
                Pullenti.Morph.MorphClass mc1 = t.GetMorphClassInDictionary();
                if (mc1 == Pullenti.Morph.MorphClass.Verb && t.Morph.Case.IsUndefined)
                {
                    return(null);
                }
                if ((((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.IgnoreParticiples)) == Pullenti.Ner.Core.NounPhraseParseAttr.IgnoreParticiples && t.Morph.Class.IsVerb && !t.Morph.Class.IsNoun) && !t.Morph.Class.IsProper)
                {
                    foreach (Pullenti.Morph.MorphBaseInfo wf in t.Morph.Items)
                    {
                        if (wf.Class.IsVerb)
                        {
                            if (wf.ContainsAttr("дейст.з.", null))
                            {
                                if (Pullenti.Morph.LanguageHelper.EndsWith((t as Pullenti.Ner.TextToken).Term, "СЯ"))
                                {
                                }
                                else
                                {
                                    return(null);
                                }
                            }
                        }
                    }
                }
            }
            Pullenti.Ner.Token t1 = null;
            for (int k = 0; k < 2; k++)
            {
                t = t1 ?? t0;
                if (k == 0)
                {
                    if (((t0 is Pullenti.Ner.TextToken) && t0.Next != null && t0.Next.IsHiphen) && t0.Next.Next != null)
                    {
                        if (!t0.IsWhitespaceAfter && !t0.Morph.Class.IsPronoun && !(t0.Next.Next is Pullenti.Ner.NumberToken))
                        {
                            if (!t0.Next.IsWhitespaceAfter)
                            {
                                t = t0.Next.Next;
                            }
                            else if (t0.Next.Next.Chars.IsAllLower && Pullenti.Morph.LanguageHelper.EndsWith((t0 as Pullenti.Ner.TextToken).Term, "О"))
                            {
                                t = t0.Next.Next;
                            }
                        }
                    }
                }
                NounPhraseItem it = new NounPhraseItem(t0, t)
                {
                    CanBeSurname = _canBeSurname
                };
                if (t0 == t && (t0 is Pullenti.Ner.ReferentToken))
                {
                    it.CanBeNoun = true;
                    it.Morph     = new Pullenti.Ner.MorphCollection(t0.Morph);
                }
                bool canBePrepos = false;
                foreach (Pullenti.Morph.MorphBaseInfo v in t.Morph.Items)
                {
                    Pullenti.Morph.MorphWordForm wf = v as Pullenti.Morph.MorphWordForm;
                    if (v.Class.IsVerb && !v.Case.IsUndefined)
                    {
                        it.CanBeAdj = true;
                        it.AdjMorph.Add(new NounPhraseItemTextVar(v, t));
                        continue;
                    }
                    if (v.Class.IsPreposition)
                    {
                        canBePrepos = true;
                    }
                    if (v.Class.IsAdjective || ((v.Class.IsPronoun && !v.Class.IsPersonalPronoun && !v.ContainsAttr("неизм.", null))) || ((v.Class.IsNoun && (t is Pullenti.Ner.NumberToken))))
                    {
                        if (TryAccordVariant(items, (items == null ? 0 : items.Count), v, false))
                        {
                            bool isDoub = false;
                            if (v.ContainsAttr("к.ф.", null))
                            {
                                continue;
                            }
                            if (v.ContainsAttr("собир.", null) && !(t is Pullenti.Ner.NumberToken))
                            {
                                if (wf != null && wf.IsInDictionary)
                                {
                                    return(null);
                                }
                                continue;
                            }
                            if (v.ContainsAttr("сравн.", null))
                            {
                                continue;
                            }
                            bool ok = true;
                            if (t is Pullenti.Ner.TextToken)
                            {
                                string s = (t as Pullenti.Ner.TextToken).Term;
                                if (s == "ПРАВО" || s == "ПРАВА")
                                {
                                    ok = false;
                                }
                                else if (Pullenti.Morph.LanguageHelper.EndsWith(s, "ОВ") && t.GetMorphClassInDictionary().IsNoun)
                                {
                                    ok = false;
                                }
                            }
                            else if (t is Pullenti.Ner.NumberToken)
                            {
                                if (v.Class.IsNoun && t.Morph.Class.IsAdjective)
                                {
                                    ok = false;
                                }
                                else if (t.Morph.Class.IsNoun && ((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.ParseNumericAsAdjective)) == Pullenti.Ner.Core.NounPhraseParseAttr.No)
                                {
                                    ok = false;
                                }
                            }
                            if (ok)
                            {
                                it.AdjMorph.Add(new NounPhraseItemTextVar(v, t));
                                it.CanBeAdj = true;
                                if (_isDoubtAdj && t0 == t)
                                {
                                    it.IsDoubtAdjective = true;
                                }
                                if (hasLegalVerb && wf != null && wf.IsInDictionary)
                                {
                                    it.CanBeNoun = true;
                                }
                                if (wf != null && wf.Class.IsPronoun)
                                {
                                    it.CanBeNoun = true;
                                    it.NounMorph.Add(new NounPhraseItemTextVar(v, t));
                                }
                            }
                        }
                    }
                    bool canBeNoun = false;
                    if (t is Pullenti.Ner.NumberToken)
                    {
                    }
                    else if (v.Class.IsNoun || ((wf != null && wf.NormalCase == "САМ")))
                    {
                        canBeNoun = true;
                    }
                    else if (v.Class.IsPersonalPronoun)
                    {
                        if (items == null || items.Count == 0)
                        {
                            canBeNoun = true;
                        }
                        else
                        {
                            foreach (NounPhraseItem it1 in items)
                            {
                                if (it1.IsVerb)
                                {
                                    if (items.Count == 1 && !v.Case.IsNominative)
                                    {
                                        canBeNoun = true;
                                    }
                                    else
                                    {
                                        return(null);
                                    }
                                }
                            }
                            if (items.Count == 1)
                            {
                                if (items[0].CanBeAdjForPersonalPronoun)
                                {
                                    canBeNoun = true;
                                }
                            }
                        }
                    }
                    else if ((v.Class.IsPronoun && ((items == null || items.Count == 0 || ((items.Count == 1 && items[0].CanBeAdjForPersonalPronoun)))) && wf != null) && (((((wf.NormalCase == "ТОТ" || wf.NormalFull == "ТО" || wf.NormalCase == "ТО") || wf.NormalCase == "ЭТО" || wf.NormalCase == "ВСЕ") || wf.NormalCase == "ЧТО" || wf.NormalCase == "КТО") || wf.NormalFull == "КОТОРЫЙ" || wf.NormalCase == "КОТОРЫЙ")))
                    {
                        if (wf.NormalCase == "ВСЕ")
                        {
                            if (t.Next != null && t.Next.IsValue("РАВНО", null))
                            {
                                return(null);
                            }
                        }
                        canBeNoun = true;
                    }
                    else if (wf != null && ((wf.NormalFull ?? wf.NormalCase)) == "КОТОРЫЙ" && ((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.ParsePronouns)) == Pullenti.Ner.Core.NounPhraseParseAttr.No)
                    {
                        return(null);
                    }
                    else if (v.Class.IsProper && (t is Pullenti.Ner.TextToken))
                    {
                        if (t.LengthChar > 4 || v.Class.IsProperName)
                        {
                            canBeNoun = true;
                        }
                    }
                    if (canBeNoun)
                    {
                        bool added = false;
                        if (items != null && items.Count > 1 && ((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.MultiNouns)) != Pullenti.Ner.Core.NounPhraseParseAttr.No)
                        {
                            bool ok1 = true;
                            for (int ii = 1; ii < items.Count; ii++)
                            {
                                if (!items[ii].ConjBefore)
                                {
                                    ok1 = false;
                                    break;
                                }
                            }
                            if (ok1)
                            {
                                if (TryAccordVariant(items, (items == null ? 0 : items.Count), v, true))
                                {
                                    it.NounMorph.Add(new NounPhraseItemTextVar(v, t));
                                    it.CanBeNoun  = true;
                                    it.MultiNouns = true;
                                    added         = true;
                                }
                            }
                        }
                        if (!added)
                        {
                            if (TryAccordVariant(items, (items == null ? 0 : items.Count), v, false))
                            {
                                it.NounMorph.Add(new NounPhraseItemTextVar(v, t));
                                it.CanBeNoun = true;
                                if (v.Class.IsPersonalPronoun && t.Morph.ContainsAttr("неизм.", null) && !it.CanBeAdj)
                                {
                                    NounPhraseItemTextVar itt = new NounPhraseItemTextVar(v, t);
                                    itt.Case   = Pullenti.Morph.MorphCase.AllCases;
                                    itt.Number = Pullenti.Morph.MorphNumber.Undefined;
                                    if (itt.NormalValue == null)
                                    {
                                    }
                                    it.AdjMorph.Add(itt);
                                    it.CanBeAdj = true;
                                }
                            }
                            else if ((items.Count > 0 && items[0].AdjMorph.Count > 0 && items[0].AdjMorph[0].Number == Pullenti.Morph.MorphNumber.Plural) && !((items[0].AdjMorph[0].Case & v.Case)).IsUndefined && !items[0].AdjMorph[0].Class.IsVerb)
                            {
                                if (t.Next != null && t.Next.IsCommaAnd && (t.Next.Next is Pullenti.Ner.TextToken))
                                {
                                    Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next.Next, attrs, 0, null);
                                    if (npt2 != null && npt2.Preposition == null && !((npt2.Morph.Case & v.Case & items[0].AdjMorph[0].Case)).IsUndefined)
                                    {
                                        it.NounMorph.Add(new NounPhraseItemTextVar(v, t));
                                        it.CanBeNoun = true;
                                    }
                                }
                            }
                        }
                    }
                }
                if (t0 != t)
                {
                    foreach (NounPhraseItemTextVar v in it.AdjMorph)
                    {
                        v.CorrectPrefix(t0 as Pullenti.Ner.TextToken, false);
                    }
                    foreach (NounPhraseItemTextVar v in it.NounMorph)
                    {
                        v.CorrectPrefix(t0 as Pullenti.Ner.TextToken, true);
                    }
                }
                if (k == 1 && it.CanBeNoun && !it.CanBeAdj)
                {
                    if (t1 != null)
                    {
                        it.EndToken = t1;
                    }
                    else
                    {
                        it.EndToken = t0.Next.Next;
                    }
                    foreach (NounPhraseItemTextVar v in it.NounMorph)
                    {
                        if (v.NormalValue != null && (v.NormalValue.IndexOf('-') < 0))
                        {
                            v.NormalValue = string.Format("{0}-{1}", v.NormalValue, it.EndToken.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false));
                        }
                    }
                }
                if (it.CanBeAdj)
                {
                    if (m_StdAdjectives.TryParse(it.BeginToken, Pullenti.Ner.Core.TerminParseAttr.No) != null)
                    {
                        it.IsStdAdjective = true;
                    }
                }
                if (canBePrepos && it.CanBeNoun)
                {
                    if (items != null && items.Count > 0)
                    {
                        Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition | Pullenti.Ner.Core.NounPhraseParseAttr.ParsePronouns | Pullenti.Ner.Core.NounPhraseParseAttr.ParseVerbs, 0, null);
                        if (npt1 != null && npt1.EndChar > t.EndChar)
                        {
                            return(null);
                        }
                    }
                    else
                    {
                        Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePronouns | Pullenti.Ner.Core.NounPhraseParseAttr.ParseVerbs, 0, null);
                        if (npt1 != null)
                        {
                            Pullenti.Morph.MorphCase mc = Pullenti.Morph.LanguageHelper.GetCaseAfterPreposition((t as Pullenti.Ner.TextToken).Lemma);
                            if (!((mc & npt1.Morph.Case)).IsUndefined)
                            {
                                return(null);
                            }
                        }
                    }
                }
                if (it.CanBeNoun || it.CanBeAdj || k == 1)
                {
                    if (it.BeginToken.Morph.Class.IsPronoun)
                    {
                        Pullenti.Ner.Token tt2 = it.EndToken.Next;
                        if ((tt2 != null && tt2.IsHiphen && !tt2.IsWhitespaceAfter) && !tt2.IsWhitespaceBefore)
                        {
                            tt2 = tt2.Next;
                        }
                        if (tt2 is Pullenti.Ner.TextToken)
                        {
                            string ss = (tt2 as Pullenti.Ner.TextToken).Term;
                            if ((ss == "ЖЕ" || ss == "БЫ" || ss == "ЛИ") || ss == "Ж")
                            {
                                it.EndToken = tt2;
                            }
                            else if (ss == "НИБУДЬ" || ss == "ЛИБО" || (((ss == "ТО" && tt2.Previous.IsHiphen)) && it.CanBeAdj))
                            {
                                it.EndToken = tt2;
                                foreach (NounPhraseItemTextVar m in it.AdjMorph)
                                {
                                    m.NormalValue = string.Format("{0}-{1}", m.NormalValue, ss);
                                    if (m.SingleNumberValue != null)
                                    {
                                        m.SingleNumberValue = string.Format("{0}-{1}", m.SingleNumberValue, ss);
                                    }
                                }
                            }
                        }
                    }
                    return(it);
                }
                if (t0 == t)
                {
                    if (t0.IsValue("БИЗНЕС", null) && t0.Next != null && t0.Next.Chars == t0.Chars)
                    {
                        t1 = t0.Next;
                        continue;
                    }
                    return(it);
                }
            }
            return(null);
        }
예제 #18
0
        public static MeasureToken TryParse(Pullenti.Ner.Token t, Pullenti.Ner.Core.TerminCollection addUnits, bool canBeSet = true, bool canUnitsAbsent = false, bool isResctriction = false, bool isSubval = false)
        {
            if (!(t is Pullenti.Ner.TextToken))
            {
                return(null);
            }
            if (t.IsTableControlChar)
            {
                return(null);
            }
            Pullenti.Ner.Token     t0  = t;
            Pullenti.Ner.MetaToken whd = null;
            int minmax = 0;

            Pullenti.Ner.Token tt = NumbersWithUnitToken._isMinOrMax(t0, ref minmax);
            if (tt != null)
            {
                t = tt.Next;
            }
            Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition | Pullenti.Ner.Core.NounPhraseParseAttr.IgnoreBrackets, 0, null);
            if (npt == null)
            {
                whd = NumbersWithUnitToken._tryParseWHL(t);
                if (whd != null)
                {
                    npt = new Pullenti.Ner.Core.NounPhraseToken(t0, whd.EndToken);
                }
                else if (t0.IsValue("КПД", null))
                {
                    npt = new Pullenti.Ner.Core.NounPhraseToken(t0, t0);
                }
                else if ((t0 is Pullenti.Ner.TextToken) && t0.LengthChar > 3 && t0.GetMorphClassInDictionary().IsUndefined)
                {
                    npt = new Pullenti.Ner.Core.NounPhraseToken(t0, t0);
                }
                else if (t0.IsValue("T", null) && t0.Chars.IsAllLower)
                {
                    npt = new Pullenti.Ner.Core.NounPhraseToken(t0, t0);
                    t   = t0;
                    if (t.Next != null && t.Next.IsChar('='))
                    {
                        npt.EndToken = t.Next;
                    }
                }
                else if ((t0 is Pullenti.Ner.TextToken) && t0.Chars.IsLetter && isSubval)
                {
                    if (NumbersWithUnitToken.TryParse(t, addUnits, false, false, false, false) != null)
                    {
                        return(null);
                    }
                    npt = new Pullenti.Ner.Core.NounPhraseToken(t0, t0);
                    for (t = t0.Next; t != null; t = t.Next)
                    {
                        if (t.WhitespacesBeforeCount > 2)
                        {
                            break;
                        }
                        else if (!(t is Pullenti.Ner.TextToken))
                        {
                            break;
                        }
                        else if (!t.Chars.IsLetter)
                        {
                            Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 100);
                            if (br != null)
                            {
                                npt.EndToken = (t = br.EndToken);
                            }
                            else
                            {
                                break;
                            }
                        }
                        else if (NumbersWithUnitToken.TryParse(t, addUnits, false, false, false, false) != null)
                        {
                            break;
                        }
                        else
                        {
                            npt.EndToken = t;
                        }
                    }
                }
                else
                {
                    return(null);
                }
            }
            else if (Pullenti.Ner.Core.NumberHelper.TryParseRealNumber(t, true, false) != null)
            {
                return(null);
            }
            else
            {
                Pullenti.Ner.Date.Internal.DateItemToken dtok = Pullenti.Ner.Date.Internal.DateItemToken.TryAttach(t, null, false);
                if (dtok != null)
                {
                    return(null);
                }
            }
            Pullenti.Ner.Token t1 = npt.EndToken;
            t = npt.EndToken;
            Pullenti.Ner.MetaToken name = new Pullenti.Ner.MetaToken(npt.BeginToken, npt.EndToken)
            {
                Morph = npt.Morph
            };
            List <UnitToken>    units     = null;
            List <UnitToken>    units2    = null;
            List <MeasureToken> internals = new List <MeasureToken>();
            bool not = false;

            for (tt = t1.Next; tt != null; tt = tt.Next)
            {
                if (tt.IsNewlineBefore)
                {
                    break;
                }
                if (tt.IsTableControlChar)
                {
                    break;
                }
                Pullenti.Ner.Token tt2 = NumbersWithUnitToken._isMinOrMax(tt, ref minmax);
                if (tt2 != null)
                {
                    t1 = (t = (tt = tt2));
                    continue;
                }
                if ((tt.IsValue("БЫТЬ", null) || tt.IsValue("ДОЛЖЕН", null) || tt.IsValue("ДОЛЖНЫЙ", null)) || tt.IsValue("МОЖЕТ", null) || ((tt.IsValue("СОСТАВЛЯТЬ", null) && !tt.GetMorphClassInDictionary().IsAdjective)))
                {
                    t1 = (t = tt);
                    if (tt.Previous.IsValue("НЕ", null))
                    {
                        not = true;
                    }
                    continue;
                }
                Pullenti.Ner.MetaToken www = NumbersWithUnitToken._tryParseWHL(tt);
                if (www != null)
                {
                    whd = www;
                    t1  = (t = (tt = www.EndToken));
                    continue;
                }
                if (tt.IsValue("ПРИ", null))
                {
                    MeasureToken mt1 = TryParse(tt.Next, addUnits, false, false, true, false);
                    if (mt1 != null)
                    {
                        internals.Add(mt1);
                        t1 = (t = (tt = mt1.EndToken));
                        continue;
                    }
                    NumbersWithUnitToken n1 = NumbersWithUnitToken.TryParse(tt.Next, addUnits, false, false, false, false);
                    if (n1 != null && n1.Units.Count > 0)
                    {
                        mt1 = new MeasureToken(n1.BeginToken, n1.EndToken)
                        {
                            Nums = n1
                        };
                        internals.Add(mt1);
                        t1 = (t = (tt = mt1.EndToken));
                        continue;
                    }
                }
                if (tt.IsValue("ПО", null) && tt.Next != null && tt.Next.IsValue("U", null))
                {
                    t1 = (t = (tt = tt.Next));
                    continue;
                }
                if (internals.Count > 0)
                {
                    if (tt.IsChar(':'))
                    {
                        break;
                    }
                    MeasureToken mt1 = TryParse(tt.Next, addUnits, false, false, true, false);
                    if (mt1 != null && mt1.Reliable)
                    {
                        internals.Add(mt1);
                        t1 = (t = (tt = mt1.EndToken));
                        continue;
                    }
                }
                if ((tt is Pullenti.Ner.NumberToken) && (tt as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Words)
                {
                    Pullenti.Ner.Core.NounPhraseToken npt3 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.ParseNumericAsAdjective, 0, null);
                    if (npt3 != null)
                    {
                        t1 = (tt = npt3.EndToken);
                        if (internals.Count == 0)
                        {
                            name.EndToken = t1;
                        }
                        continue;
                    }
                }
                if (((tt.IsHiphen && !tt.IsWhitespaceBefore && !tt.IsWhitespaceAfter) && (tt.Next is Pullenti.Ner.NumberToken) && (tt.Previous is Pullenti.Ner.TextToken)) && tt.Previous.Chars.IsAllUpper)
                {
                    t1 = (tt = (t = tt.Next));
                    if (internals.Count == 0)
                    {
                        name.EndToken = t1;
                    }
                    continue;
                }
                if (((tt is Pullenti.Ner.NumberToken) && !tt.IsWhitespaceBefore && (tt.Previous is Pullenti.Ner.TextToken)) && tt.Previous.Chars.IsAllUpper)
                {
                    t1 = (t = tt);
                    if (internals.Count == 0)
                    {
                        name.EndToken = t1;
                    }
                    continue;
                }
                if ((((tt is Pullenti.Ner.NumberToken) && !tt.IsWhitespaceAfter && tt.Next.IsHiphen) && !tt.Next.IsWhitespaceAfter && (tt.Next.Next is Pullenti.Ner.TextToken)) && tt.Next.Next.LengthChar > 2)
                {
                    t1 = (t = (tt = tt.Next.Next));
                    Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                    if (npt1 != null && npt1.EndChar > tt.EndChar)
                    {
                        t1 = (t = (tt = npt1.EndToken));
                    }
                    if (internals.Count == 0)
                    {
                        name.EndToken = t1;
                    }
                    continue;
                }
                if ((tt is Pullenti.Ner.NumberToken) && tt.Previous != null)
                {
                    if (tt.Previous.IsValue("USB", null))
                    {
                        t1 = (t = tt);
                        if (internals.Count == 0)
                        {
                            name.EndToken = t1;
                        }
                        for (Pullenti.Ner.Token ttt = tt.Next; ttt != null; ttt = ttt.Next)
                        {
                            if (ttt.IsWhitespaceBefore)
                            {
                                break;
                            }
                            if (ttt.IsCharOf(",:"))
                            {
                                break;
                            }
                            t1 = (t = (tt = ttt));
                            if (internals.Count == 0)
                            {
                                name.EndToken = t1;
                            }
                        }
                        continue;
                    }
                }
                NumbersWithUnitToken mt0 = NumbersWithUnitToken.TryParse(tt, addUnits, false, false, false, false);
                if (mt0 != null)
                {
                    Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.ParseNumericAsAdjective | Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition, 0, null);
                    if (npt1 != null && npt1.EndChar > mt0.EndChar)
                    {
                        t1 = (t = (tt = npt1.EndToken));
                        if (internals.Count == 0)
                        {
                            name.EndToken = t1;
                        }
                        continue;
                    }
                    break;
                }
                if (((tt.IsComma || tt.IsChar('('))) && tt.Next != null)
                {
                    www = NumbersWithUnitToken._tryParseWHL(tt.Next);
                    if (www != null)
                    {
                        whd = www;
                        t1  = (t = (tt = www.EndToken));
                        if (tt.Next != null && tt.Next.IsComma)
                        {
                            t1 = (tt = tt.Next);
                        }
                        if (tt.Next != null && tt.Next.IsChar(')'))
                        {
                            t1 = (tt = tt.Next);
                            continue;
                        }
                    }
                    List <UnitToken> uu = UnitToken.TryParseList(tt.Next, addUnits, false);
                    if (uu != null)
                    {
                        t1    = (t = uu[uu.Count - 1].EndToken);
                        units = uu;
                        if (tt.IsChar('(') && t1.Next != null && t1.Next.IsChar(')'))
                        {
                            t1 = (t = (tt = t1.Next));
                            continue;
                        }
                        else if (t1.Next != null && t1.Next.IsChar('('))
                        {
                            uu = UnitToken.TryParseList(t1.Next.Next, addUnits, false);
                            if (uu != null && uu[uu.Count - 1].EndToken.Next != null && uu[uu.Count - 1].EndToken.Next.IsChar(')'))
                            {
                                units2 = uu;
                                t1     = (t = (tt = uu[uu.Count - 1].EndToken.Next));
                                continue;
                            }
                            www = NumbersWithUnitToken._tryParseWHL(t1.Next);
                            if (www != null)
                            {
                                whd = www;
                                t1  = (t = (tt = www.EndToken));
                                continue;
                            }
                        }
                        if (uu != null && uu.Count > 0 && !uu[0].IsDoubt)
                        {
                            break;
                        }
                        if (t1.Next != null)
                        {
                            if (t1.Next.IsTableControlChar || t1.IsNewlineAfter)
                            {
                                break;
                            }
                        }
                        units = null;
                    }
                }
                if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(tt, false, false) && !(tt.Next is Pullenti.Ner.NumberToken))
                {
                    Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(tt, Pullenti.Ner.Core.BracketParseAttr.No, 100);
                    if (br != null)
                    {
                        t1 = (t = (tt = br.EndToken));
                        continue;
                    }
                }
                if (tt.IsValue("НЕ", null) && tt.Next != null)
                {
                    Pullenti.Morph.MorphClass mc = tt.Next.GetMorphClassInDictionary();
                    if (mc.IsAdverb || mc.IsMisc)
                    {
                        break;
                    }
                    continue;
                }
                if (tt.IsValue("ЯМЗ", null))
                {
                }
                Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition | Pullenti.Ner.Core.NounPhraseParseAttr.IgnoreBrackets | Pullenti.Ner.Core.NounPhraseParseAttr.ParsePronouns, 0, null);
                if (npt2 == null)
                {
                    if (tt.Morph.Class.IsPreposition || tt.Morph.Class.IsConjunction)
                    {
                        Pullenti.Ner.Core.TerminToken to = NumbersWithUnitToken.m_Termins.TryParse(tt, Pullenti.Ner.Core.TerminParseAttr.No);
                        if (to != null)
                        {
                            if ((to.EndToken.Next is Pullenti.Ner.TextToken) && to.EndToken.Next.IsLetters)
                            {
                            }
                            else
                            {
                                break;
                            }
                        }
                        t1 = tt;
                        continue;
                    }
                    Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary();
                    if (((tt is Pullenti.Ner.TextToken) && tt.Chars.IsLetter && tt.LengthChar > 1) && (((tt.Chars.IsAllUpper || mc.IsAdverb || mc.IsUndefined) || mc.IsAdjective)))
                    {
                        List <UnitToken> uu = UnitToken.TryParseList(tt, addUnits, false);
                        if (uu != null)
                        {
                            if (uu[0].LengthChar > 1 || uu.Count > 1)
                            {
                                units = uu;
                                t1    = (t = uu[uu.Count - 1].EndToken);
                                break;
                            }
                        }
                        t1 = (t = tt);
                        if (internals.Count == 0)
                        {
                            name.EndToken = tt;
                        }
                        continue;
                    }
                    if (tt.IsComma)
                    {
                        continue;
                    }
                    if (tt.IsChar('.'))
                    {
                        if (!Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(tt.Next))
                        {
                            continue;
                        }
                        List <UnitToken> uu = UnitToken.TryParseList(tt.Next, addUnits, false);
                        if (uu != null)
                        {
                            if (uu[0].LengthChar > 2 || uu.Count > 1)
                            {
                                units = uu;
                                t1    = (t = uu[uu.Count - 1].EndToken);
                                break;
                            }
                        }
                    }
                    break;
                }
                t1 = (t = (tt = npt2.EndToken));
                if (internals.Count > 0)
                {
                }
                else if (t.IsValue("ПРЕДЕЛ", null) || t.IsValue("ГРАНИЦА", null) || t.IsValue("ДИАПАЗОН", null))
                {
                }
                else if (t.Chars.IsLetter)
                {
                    name.EndToken = t1;
                }
            }
            Pullenti.Ner.Token t11 = t1;
            for (t1 = t1.Next; t1 != null; t1 = t1.Next)
            {
                if (t1.IsTableControlChar)
                {
                }
                else if (t1.IsCharOf(":,_"))
                {
                    if (isResctriction)
                    {
                        return(null);
                    }
                    Pullenti.Ner.MetaToken www = NumbersWithUnitToken._tryParseWHL(t1.Next);
                    if (www != null)
                    {
                        whd = www;
                        t1  = (t = www.EndToken);
                        continue;
                    }
                    List <UnitToken> uu = UnitToken.TryParseList(t1.Next, addUnits, false);
                    if (uu != null)
                    {
                        if (uu[0].LengthChar > 1 || uu.Count > 1)
                        {
                            units = uu;
                            t1    = (t = uu[uu.Count - 1].EndToken);
                            continue;
                        }
                    }
                    if (t1.IsChar(':'))
                    {
                        List <MeasureToken> li = new List <MeasureToken>();
                        for (Pullenti.Ner.Token ttt = t1.Next; ttt != null; ttt = ttt.Next)
                        {
                            if (ttt.IsHiphen || ttt.IsTableControlChar)
                            {
                                continue;
                            }
                            if ((ttt is Pullenti.Ner.TextToken) && !ttt.Chars.IsLetter)
                            {
                                continue;
                            }
                            MeasureToken mt1 = TryParse(ttt, addUnits, true, true, false, true);
                            if (mt1 == null)
                            {
                                break;
                            }
                            li.Add(mt1);
                            ttt = mt1.EndToken;
                            if (ttt.Next != null && ttt.Next.IsChar(';'))
                            {
                                ttt = ttt.Next;
                            }
                            if (ttt.IsChar(';'))
                            {
                            }
                            else if (ttt.IsNewlineAfter && mt1.IsNewlineBefore)
                            {
                            }
                            else
                            {
                                break;
                            }
                        }
                        if (li.Count > 1)
                        {
                            MeasureToken res0 = new MeasureToken(t0, li[li.Count - 1].EndToken)
                            {
                                Internals = li, IsEmpty = true
                            };
                            if (internals != null && internals.Count > 0)
                            {
                                res0.InternalEx = internals[0];
                            }
                            string nam = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(name, Pullenti.Ner.Core.GetTextAttr.FirstNounGroupToNominative);
                            li[0].BeginToken = t0;
                            foreach (MeasureToken v in li)
                            {
                                v.Name = string.Format("{0} ({1})", nam, v.Name ?? "").Trim();
                                if (v.Nums != null && v.Nums.Units.Count == 0 && units != null)
                                {
                                    v.Nums.Units = units;
                                }
                            }
                            return(res0);
                        }
                    }
                }
                else if (t1.IsHiphen && t1.IsWhitespaceAfter && t1.IsWhitespaceBefore)
                {
                }
                else if (t1.IsHiphen && t1.Next != null && t1.Next.IsChar('('))
                {
                }
                else
                {
                    break;
                }
            }
            if (t1 == null)
            {
                return(null);
            }
            List <NumbersWithUnitToken> mts = NumbersWithUnitToken.TryParseMulti(t1, addUnits, false, not, true, isResctriction);

            if (mts == null)
            {
                if (units != null && units.Count > 0)
                {
                    if (t1 == null || t1.Previous.IsChar(':'))
                    {
                        mts = new List <NumbersWithUnitToken>();
                        if (t1 == null)
                        {
                            for (t1 = t11; t1 != null && t1.Next != null; t1 = t1.Next)
                            {
                            }
                        }
                        else
                        {
                            t1 = t1.Previous;
                        }
                        mts.Add(new NumbersWithUnitToken(t0, t1)
                        {
                            SingleVal = double.NaN
                        });
                    }
                }
                if (mts == null)
                {
                    return(null);
                }
            }
            NumbersWithUnitToken mt = mts[0];

            if (mt.BeginToken == mt.EndToken && !(mt.BeginToken is Pullenti.Ner.NumberToken))
            {
                return(null);
            }
            if (!isSubval && name.BeginToken.Morph.Class.IsPreposition)
            {
                name.BeginToken = name.BeginToken.Next;
            }
            if (mt.WHL != null)
            {
                whd = mt.WHL;
            }
            for (int kk = 0; kk < 10; kk++)
            {
                if (whd != null && whd.EndToken == name.EndToken)
                {
                    name.EndToken = whd.BeginToken.Previous;
                    continue;
                }
                if (units != null)
                {
                    if (units[units.Count - 1].EndToken == name.EndToken)
                    {
                        name.EndToken = units[0].BeginToken.Previous;
                        continue;
                    }
                }
                break;
            }
            if (mts.Count > 1 && internals.Count == 0)
            {
                if (mt.Units.Count == 0)
                {
                    if (units != null)
                    {
                        foreach (NumbersWithUnitToken m in mts)
                        {
                            m.Units = units;
                        }
                    }
                }
                MeasureToken res1 = new MeasureToken(t0, mts[mts.Count - 1].EndToken)
                {
                    Morph = name.Morph, Reliable = true
                };
                res1.Name = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(name, Pullenti.Ner.Core.GetTextAttr.FirstNounGroupToNominative);
                for (int k = 0; k < mts.Count; k++)
                {
                    MeasureToken ttt = new MeasureToken(mts[k].BeginToken, mts[k].EndToken)
                    {
                        Nums = mts[k]
                    };
                    if (whd != null)
                    {
                        List <string> nams = whd.Tag as List <string>;
                        if (k < nams.Count)
                        {
                            ttt.Name = nams[k];
                        }
                    }
                    res1.Internals.Add(ttt);
                }
                Pullenti.Ner.Token tt1 = res1.EndToken.Next;
                if (tt1 != null && tt1.IsChar('±'))
                {
                    NumbersWithUnitToken nn = NumbersWithUnitToken._tryParse(tt1, addUnits, true, false, false);
                    if (nn != null && nn.PlusMinusPercent)
                    {
                        res1.EndToken = nn.EndToken;
                        res1.Nums     = nn;
                        if (nn.Units.Count > 0 && units == null && mt.Units.Count == 0)
                        {
                            foreach (NumbersWithUnitToken m in mts)
                            {
                                m.Units = nn.Units;
                            }
                        }
                    }
                }
                return(res1);
            }
            if (!mt.IsWhitespaceBefore)
            {
                if (mt.BeginToken.Previous == null)
                {
                    return(null);
                }
                if (mt.BeginToken.Previous.IsCharOf(":),") || mt.BeginToken.Previous.IsTableControlChar || mt.BeginToken.Previous.IsValue("IP", null))
                {
                }
                else if (mt.BeginToken.IsHiphen && mt.Units.Count > 0 && !mt.Units[0].IsDoubt)
                {
                }
                else
                {
                    return(null);
                }
            }
            if (mt.Units.Count == 0 && units != null)
            {
                mt.Units = units;
                if (mt.DivNum != null && units.Count > 1 && mt.DivNum.Units.Count == 0)
                {
                    for (int i = 1; i < units.Count; i++)
                    {
                        if (units[i].Pow == -1)
                        {
                            for (int j = i; j < units.Count; j++)
                            {
                                mt.DivNum.Units.Add(units[j]);
                                units[j].Pow = -units[j].Pow;
                            }
                            mt.Units.RemoveRange(i, units.Count - i);
                            break;
                        }
                    }
                }
            }
            if ((minmax < 0) && mt.SingleVal != null)
            {
                mt.FromVal     = mt.SingleVal;
                mt.FromInclude = true;
                mt.SingleVal   = null;
            }
            if (minmax > 0 && mt.SingleVal != null)
            {
                mt.ToVal     = mt.SingleVal;
                mt.ToInclude = true;
                mt.SingleVal = null;
            }
            if (mt.Units.Count == 0)
            {
                units = UnitToken.TryParseList(mt.EndToken.Next, addUnits, true);
                if (units == null)
                {
                    if (canUnitsAbsent)
                    {
                    }
                    else
                    {
                        return(null);
                    }
                }
                else
                {
                    mt.Units = units;
                }
            }
            MeasureToken res = new MeasureToken(t0, mt.EndToken)
            {
                Morph = name.Morph, Internals = internals
            };

            if (((!t0.IsWhitespaceBefore && t0.Previous != null && t0 == name.BeginToken) && t0.Previous.IsHiphen && !t0.Previous.IsWhitespaceBefore) && (t0.Previous.Previous is Pullenti.Ner.TextToken))
            {
                name.BeginToken = (res.BeginToken = name.BeginToken.Previous.Previous);
            }
            res.Name = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(name, (!isSubval ? Pullenti.Ner.Core.GetTextAttr.FirstNounGroupToNominative : Pullenti.Ner.Core.GetTextAttr.No));
            res.Nums = mt;
            foreach (UnitToken u in res.Nums.Units)
            {
                if (u.Keyword != null)
                {
                    if (u.Keyword.BeginChar >= res.BeginChar)
                    {
                        res.Reliable = true;
                    }
                }
            }
            res._parseInternals(addUnits);
            if (res.Internals.Count > 0 || !canBeSet)
            {
                return(res);
            }
            t1 = res.EndToken.Next;
            if (t1 != null && t1.IsCommaAnd)
            {
                t1 = t1.Next;
            }
            List <NumbersWithUnitToken> mts1 = NumbersWithUnitToken.TryParseMulti(t1, addUnits, false, false, false, false);

            if ((mts1 != null && mts1.Count == 1 && (t1.WhitespacesBeforeCount < 3)) && mts1[0].Units.Count > 0 && !UnitToken.CanBeEquals(mts[0].Units, mts1[0].Units))
            {
                res.IsSet = true;
                res.Nums  = null;
                res.Internals.Add(new MeasureToken(mt.BeginToken, mt.EndToken)
                {
                    Nums = mt
                });
                res.Internals.Add(new MeasureToken(mts1[0].BeginToken, mts1[0].EndToken)
                {
                    Nums = mts1[0]
                });
                res.EndToken = mts1[0].EndToken;
            }
            return(res);
        }
예제 #19
0
        public static PhoneItemToken TryAttach(Pullenti.Ner.Token t0)
        {
            PhoneItemToken res = _TryAttach(t0);

            if (res == null)
            {
                return(null);
            }
            if (res.ItemType != PhoneItemType.Prefix)
            {
                return(res);
            }
            for (Pullenti.Ner.Token t = res.EndToken.Next; t != null; t = t.Next)
            {
                if (t.IsTableControlChar)
                {
                    break;
                }
                if (t.IsNewlineBefore)
                {
                    break;
                }
                PhoneItemToken res2 = _TryAttach(t);
                if (res2 != null)
                {
                    if (res2.ItemType == PhoneItemType.Prefix)
                    {
                        if (res.Kind == Pullenti.Ner.Phone.PhoneKind.Undefined)
                        {
                            res.Kind = res2.Kind;
                        }
                        t = (res.EndToken = res2.EndToken);
                        continue;
                    }
                    break;
                }
                if (t.IsChar(':'))
                {
                    res.EndToken = t;
                    break;
                }
                if (!(t is Pullenti.Ner.TextToken))
                {
                    break;
                }
                if (t0.LengthChar == 1)
                {
                    break;
                }
                Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                if (npt != null)
                {
                    t = npt.EndToken;
                    if (t.IsValue("ПОСЕЛЕНИЕ", null))
                    {
                        return(null);
                    }
                    res.EndToken = t;
                    continue;
                }
                if (t.GetMorphClassInDictionary().IsProper)
                {
                    res.EndToken = t;
                    continue;
                }
                if (t.Morph.Class.IsPreposition)
                {
                    continue;
                }
                break;
            }
            return(res);
        }
예제 #20
0
        public static BlockLine Create(Pullenti.Ner.Token t, Pullenti.Ner.Core.TerminCollection names)
        {
            if (t == null)
            {
                return(null);
            }
            BlockLine res = new BlockLine(t, t);

            for (Pullenti.Ner.Token tt = t; tt != null; tt = tt.Next)
            {
                if (tt != t && tt.IsNewlineBefore)
                {
                    break;
                }
                else
                {
                    res.EndToken = tt;
                }
            }
            int nums = 0;

            while (t != null && t.Next != null && t.EndChar <= res.EndChar)
            {
                if (t is Pullenti.Ner.NumberToken)
                {
                }
                else
                {
                    Pullenti.Ner.NumberToken rom = Pullenti.Ner.Core.NumberHelper.TryParseRoman(t);
                    if (rom != null && rom.EndToken.Next != null)
                    {
                        t = rom.EndToken;
                    }
                    else
                    {
                        break;
                    }
                }
                if (t.Next.IsChar('.'))
                {
                }
                else if ((t.Next is Pullenti.Ner.TextToken) && !t.Next.Chars.IsAllLower)
                {
                }
                else
                {
                    break;
                }
                res.NumberEnd = t;
                t             = t.Next;
                if (t.IsChar('.') && t.Next != null)
                {
                    res.NumberEnd = t;
                    t             = t.Next;
                }
                if (t.IsNewlineBefore)
                {
                    return(res);
                }
                nums++;
            }
            Pullenti.Ner.Core.TerminToken tok = m_Ontology.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
            if (tok == null)
            {
                Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                if (npt1 != null && npt1.EndToken != npt1.BeginToken)
                {
                    tok = m_Ontology.TryParse(npt1.Noun.BeginToken, Pullenti.Ner.Core.TerminParseAttr.No);
                }
            }
            if (tok != null)
            {
                if (t.Previous != null && t.Previous.IsChar(':'))
                {
                    tok = null;
                }
            }
            if (tok != null)
            {
                BlkTyps typ = (BlkTyps)tok.Termin.Tag;
                if (typ == BlkTyps.Conslusion)
                {
                    if (t.IsNewlineAfter)
                    {
                    }
                    else if (t.Next != null && t.Next.Morph.Class.IsPreposition && t.Next.Next != null)
                    {
                        Pullenti.Ner.Core.TerminToken tok2 = m_Ontology.TryParse(t.Next.Next, Pullenti.Ner.Core.TerminParseAttr.No);
                        if (tok2 != null && ((BlkTyps)tok2.Termin.Tag) == BlkTyps.Chapter)
                        {
                        }
                        else
                        {
                            tok = null;
                        }
                    }
                    else
                    {
                        tok = null;
                    }
                }
                if (t.Kit.BaseLanguage != t.Morph.Language)
                {
                    tok = null;
                }
                if (typ == BlkTyps.Index && !t.IsValue("ОГЛАВЛЕНИЕ", null))
                {
                    if (!t.IsNewlineAfter && t.Next != null)
                    {
                        Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                        if (npt != null && npt.IsNewlineAfter && npt.Morph.Case.IsGenitive)
                        {
                            tok = null;
                        }
                        else if (npt == null)
                        {
                            tok = null;
                        }
                    }
                }
                if ((typ == BlkTyps.Intro && tok != null && !tok.IsNewlineAfter) && t.IsValue("ВВЕДЕНИЕ", null))
                {
                    Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                    if (npt != null && npt.Morph.Case.IsGenitive)
                    {
                        tok = null;
                    }
                }
                if (tok != null)
                {
                    if (res.NumberEnd == null)
                    {
                        res.NumberEnd = tok.EndToken;
                        if (res.NumberEnd.EndChar > res.EndChar)
                        {
                            res.EndToken = res.NumberEnd;
                        }
                    }
                    res.Typ = typ;
                    t       = tok.EndToken;
                    if (t.Next != null && t.Next.IsCharOf(":."))
                    {
                        t            = t.Next;
                        res.EndToken = t;
                    }
                    if (t.IsNewlineAfter || t.Next == null)
                    {
                        return(res);
                    }
                    t = t.Next;
                }
            }
            if (t.IsChar('§') && (t.Next is Pullenti.Ner.NumberToken))
            {
                res.Typ       = BlkTyps.Chapter;
                res.NumberEnd = t;
                t             = t.Next;
            }
            if (names != null)
            {
                Pullenti.Ner.Core.TerminToken tok2 = names.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
                if (tok2 != null && tok2.EndToken.IsNewlineAfter)
                {
                    res.EndToken    = tok2.EndToken;
                    res.IsExistName = true;
                    if (res.Typ == BlkTyps.Undefined)
                    {
                        BlockLine li2 = Create((res.NumberEnd == null ? null : res.NumberEnd.Next), null);
                        if (li2 != null && ((li2.Typ == BlkTyps.Literature || li2.Typ == BlkTyps.Intro || li2.Typ == BlkTyps.Conslusion)))
                        {
                            res.Typ = li2.Typ;
                        }
                        else
                        {
                            res.Typ = BlkTyps.Chapter;
                        }
                    }
                    return(res);
                }
            }
            Pullenti.Ner.Token t1 = res.EndToken;
            if ((((t1 is Pullenti.Ner.NumberToken) || t1.IsChar('.'))) && t1.Previous != null)
            {
                t1 = t1.Previous;
                if (t1.IsChar('.'))
                {
                    res.HasContentItemTail = true;
                    for (; t1 != null && t1.BeginChar > res.BeginChar; t1 = t1.Previous)
                    {
                        if (!t1.IsChar('.'))
                        {
                            break;
                        }
                    }
                }
            }
            res.IsAllUpper = true;
            for (; t != null && t.EndChar <= t1.EndChar; t = t.Next)
            {
                if (!(t is Pullenti.Ner.TextToken) || !t.Chars.IsLetter)
                {
                    res.NotWords++;
                }
                else
                {
                    Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
                    if (mc.IsUndefined)
                    {
                        res.NotWords++;
                    }
                    else if (t.LengthChar > 2)
                    {
                        res.Words++;
                    }
                    if (!t.Chars.IsAllUpper)
                    {
                        res.IsAllUpper = false;
                    }
                    if ((t as Pullenti.Ner.TextToken).IsPureVerb)
                    {
                        if (!(t as Pullenti.Ner.TextToken).Term.EndsWith("ING"))
                        {
                            res.HasVerb = true;
                        }
                    }
                }
            }
            if (res.Typ == BlkTyps.Undefined)
            {
                Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse((res.NumberEnd == null ? res.BeginToken : res.NumberEnd.Next), Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                if (npt != null)
                {
                    if (npt.Noun.IsValue("ХАРАКТЕРИСТИКА", null) || npt.Noun.IsValue("СОДЕРЖАНИЕ", "ЗМІСТ"))
                    {
                        bool ok = true;
                        for (Pullenti.Ner.Token tt = npt.EndToken.Next; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next)
                        {
                            if (tt.IsChar('.'))
                            {
                                continue;
                            }
                            Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                            if (npt2 == null || !npt2.Morph.Case.IsGenitive)
                            {
                                ok = false;
                                break;
                            }
                            tt = npt2.EndToken;
                            if (tt.EndChar > res.EndChar)
                            {
                                res.EndToken = tt;
                                if (!tt.IsNewlineAfter)
                                {
                                    for (; res.EndToken.Next != null; res.EndToken = res.EndToken.Next)
                                    {
                                        if (res.EndToken.IsNewlineAfter)
                                        {
                                            break;
                                        }
                                    }
                                }
                            }
                        }
                        if (ok)
                        {
                            res.Typ         = BlkTyps.Intro;
                            res.IsExistName = true;
                        }
                    }
                    else if (npt.Noun.IsValue("ВЫВОД", "ВИСНОВОК") || npt.Noun.IsValue("РЕЗУЛЬТАТ", "ДОСЛІДЖЕННЯ"))
                    {
                        bool ok = true;
                        for (Pullenti.Ner.Token tt = npt.EndToken.Next; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next)
                        {
                            if (tt.IsCharOf(",.") || tt.IsAnd)
                            {
                                continue;
                            }
                            Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                            if (npt1 != null)
                            {
                                if (npt1.Noun.IsValue("РЕЗУЛЬТАТ", "ДОСЛІДЖЕННЯ") || npt1.Noun.IsValue("РЕКОМЕНДАЦИЯ", "РЕКОМЕНДАЦІЯ") || npt1.Noun.IsValue("ИССЛЕДОВАНИЕ", "ДОСЛІДЖЕННЯ"))
                                {
                                    tt = npt1.EndToken;
                                    if (tt.EndChar > res.EndChar)
                                    {
                                        res.EndToken = tt;
                                        if (!tt.IsNewlineAfter)
                                        {
                                            for (; res.EndToken.Next != null; res.EndToken = res.EndToken.Next)
                                            {
                                                if (res.EndToken.IsNewlineAfter)
                                                {
                                                    break;
                                                }
                                            }
                                        }
                                    }
                                    continue;
                                }
                            }
                            ok = false;
                            break;
                        }
                        if (ok)
                        {
                            res.Typ         = BlkTyps.Conslusion;
                            res.IsExistName = true;
                        }
                    }
                    if (res.Typ == BlkTyps.Undefined && npt != null && npt.EndChar <= res.EndChar)
                    {
                        bool ok   = false;
                        int  publ = 0;
                        if (_isPub(npt))
                        {
                            ok   = true;
                            publ = 1;
                        }
                        else if ((npt.Noun.IsValue("СПИСОК", null) || npt.Noun.IsValue("УКАЗАТЕЛЬ", "ПОКАЖЧИК") || npt.Noun.IsValue("ПОЛОЖЕНИЕ", "ПОЛОЖЕННЯ")) || npt.Noun.IsValue("ВЫВОД", "ВИСНОВОК") || npt.Noun.IsValue("РЕЗУЛЬТАТ", "ДОСЛІДЖЕННЯ"))
                        {
                            if (npt.EndChar == res.EndChar)
                            {
                                return(null);
                            }
                            ok = true;
                        }
                        if (ok)
                        {
                            if (npt.BeginToken == npt.EndToken && npt.Noun.IsValue("СПИСОК", null) && npt.EndChar == res.EndChar)
                            {
                                ok = false;
                            }
                            for (Pullenti.Ner.Token tt = npt.EndToken.Next; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next)
                            {
                                if (tt.IsCharOf(",.:") || tt.IsAnd || tt.Morph.Class.IsPreposition)
                                {
                                    continue;
                                }
                                if (tt.IsValue("ОТРАЖЕНЫ", "ВІДОБРАЖЕНІ"))
                                {
                                    continue;
                                }
                                npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                                if (npt == null)
                                {
                                    ok = false;
                                    break;
                                }
                                if (((_isPub(npt) || npt.Noun.IsValue("РАБОТА", "РОБОТА") || npt.Noun.IsValue("ИССЛЕДОВАНИЕ", "ДОСЛІДЖЕННЯ")) || npt.Noun.IsValue("АВТОР", null) || npt.Noun.IsValue("ТРУД", "ПРАЦЯ")) || npt.Noun.IsValue("ТЕМА", null) || npt.Noun.IsValue("ДИССЕРТАЦИЯ", "ДИСЕРТАЦІЯ"))
                                {
                                    tt = npt.EndToken;
                                    if (_isPub(npt))
                                    {
                                        publ++;
                                    }
                                    if (tt.EndChar > res.EndChar)
                                    {
                                        res.EndToken = tt;
                                        if (!tt.IsNewlineAfter)
                                        {
                                            for (; res.EndToken.Next != null; res.EndToken = res.EndToken.Next)
                                            {
                                                if (res.EndToken.IsNewlineAfter)
                                                {
                                                    break;
                                                }
                                            }
                                        }
                                    }
                                    continue;
                                }
                                ok = false;
                                break;
                            }
                            if (ok)
                            {
                                res.Typ         = BlkTyps.Literature;
                                res.IsExistName = true;
                                if (publ == 0 && (res.EndChar < (((res.Kit.Sofa.Text.Length * 2) / 3))))
                                {
                                    if (res.NumberEnd != null)
                                    {
                                        res.Typ = BlkTyps.Misc;
                                    }
                                    else
                                    {
                                        res.Typ = BlkTyps.Undefined;
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(res);
        }
예제 #21
0
        // Основная функция выделения телефонов
        public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
        {
            Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this);
            bool hasDenoms = false;

            foreach (Pullenti.Ner.Analyzer a in kit.Processor.Analyzers)
            {
                if ((a is Pullenti.Ner.Denomination.DenominationAnalyzer) && !a.IgnoreThisAnalyzer)
                {
                    hasDenoms = true;
                }
            }
            if (!hasDenoms)
            {
                Pullenti.Ner.Denomination.DenominationAnalyzer a = new Pullenti.Ner.Denomination.DenominationAnalyzer();
                a.Process(kit);
            }
            List <KeywordReferent> li   = new List <KeywordReferent>();
            StringBuilder          tmp  = new StringBuilder();
            List <string>          tmp2 = new List <string>();
            int max = 0;

            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
            {
                max++;
            }
            int cur = 0;

            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next, cur++)
            {
                Pullenti.Ner.Referent r = t.GetReferent();
                if (r != null)
                {
                    t = this._addReferents(ad, t, cur, max);
                    continue;
                }
                if (!(t is Pullenti.Ner.TextToken))
                {
                    continue;
                }
                if (!t.Chars.IsLetter || (t.LengthChar < 3))
                {
                    continue;
                }
                string term = (t as Pullenti.Ner.TextToken).Term;
                if (term == "ЕСТЬ")
                {
                    if ((t.Previous is Pullenti.Ner.TextToken) && t.Previous.Morph.Class.IsVerb)
                    {
                    }
                    else
                    {
                        continue;
                    }
                }
                Pullenti.Ner.Core.NounPhraseToken npt = null;
                npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.AdjectiveCanBeLast | Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition, 0, null);
                if (npt == null)
                {
                    Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
                    if (mc.IsVerb && !mc.IsPreposition)
                    {
                        if ((t as Pullenti.Ner.TextToken).IsVerbBe)
                        {
                            continue;
                        }
                        if (t.IsValue("МОЧЬ", null) || t.IsValue("WOULD", null))
                        {
                            continue;
                        }
                        KeywordReferent kref = new KeywordReferent()
                        {
                            Typ = KeywordType.Predicate
                        };
                        string norm = t.GetNormalCaseText(Pullenti.Morph.MorphClass.Verb, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false);
                        if (norm == null)
                        {
                            norm = (t as Pullenti.Ner.TextToken).Lemma;
                        }
                        if (norm.EndsWith("ЬСЯ"))
                        {
                            norm = norm.Substring(0, norm.Length - 2);
                        }
                        kref.AddSlot(KeywordReferent.ATTR_VALUE, norm, false, 0);
                        List <Pullenti.Semantic.Utils.DerivateGroup> drv = Pullenti.Semantic.Utils.DerivateService.FindDerivates(norm, true, t.Morph.Language);
                        _addNormals(kref, drv, norm);
                        kref = ad.RegisterReferent(kref) as KeywordReferent;
                        _setRank(kref, cur, max);
                        Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(ad.RegisterReferent(kref), t, t)
                        {
                            Morph = t.Morph
                        };
                        kit.EmbedToken(rt1);
                        t = rt1;
                        continue;
                    }
                    continue;
                }
                if (npt.InternalNoun != null)
                {
                    continue;
                }
                if (npt.EndToken.IsValue("ЦЕЛОМ", null) || npt.EndToken.IsValue("ЧАСТНОСТИ", null))
                {
                    if (npt.Preposition != null)
                    {
                        t = npt.EndToken;
                        continue;
                    }
                }
                if (npt.EndToken.IsValue("СТОРОНЫ", null) && npt.Preposition != null && npt.Preposition.Normal == "С")
                {
                    t = npt.EndToken;
                    continue;
                }
                if (npt.BeginToken == npt.EndToken)
                {
                    Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
                    if (mc.IsPreposition)
                    {
                        continue;
                    }
                    else if (mc.IsAdverb)
                    {
                        if (t.IsValue("ПОТОМ", null))
                        {
                            continue;
                        }
                    }
                }
                else
                {
                }
                li.Clear();
                Pullenti.Ner.Token t0 = t;
                for (Pullenti.Ner.Token tt = t; tt != null && tt.EndChar <= npt.EndChar; tt = tt.Next)
                {
                    if (!(tt is Pullenti.Ner.TextToken))
                    {
                        continue;
                    }
                    if (tt.IsValue("NATURAL", null))
                    {
                    }
                    if ((tt.LengthChar < 3) || !tt.Chars.IsLetter)
                    {
                        continue;
                    }
                    Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary();
                    if ((mc.IsPreposition || mc.IsPronoun || mc.IsPersonalPronoun) || mc.IsConjunction)
                    {
                        if (tt.IsValue("ОТНОШЕНИЕ", null))
                        {
                        }
                        else
                        {
                            continue;
                        }
                    }
                    if (mc.IsMisc)
                    {
                        if (Pullenti.Ner.Core.MiscHelper.IsEngArticle(tt))
                        {
                            continue;
                        }
                    }
                    KeywordReferent kref = new KeywordReferent()
                    {
                        Typ = KeywordType.Object
                    };
                    string norm = (tt as Pullenti.Ner.TextToken).Lemma;
                    kref.AddSlot(KeywordReferent.ATTR_VALUE, norm, false, 0);
                    if (norm != "ЕСТЬ")
                    {
                        List <Pullenti.Semantic.Utils.DerivateGroup> drv = Pullenti.Semantic.Utils.DerivateService.FindDerivates(norm, true, tt.Morph.Language);
                        _addNormals(kref, drv, norm);
                    }
                    kref = ad.RegisterReferent(kref) as KeywordReferent;
                    _setRank(kref, cur, max);
                    Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(kref, tt, tt)
                    {
                        Morph = tt.Morph
                    };
                    kit.EmbedToken(rt1);
                    if (tt == t && li.Count == 0)
                    {
                        t0 = rt1;
                    }
                    t = rt1;
                    li.Add(kref);
                }
                if (li.Count > 1)
                {
                    KeywordReferent kref = new KeywordReferent()
                    {
                        Typ = KeywordType.Object
                    };
                    tmp.Length = 0;
                    tmp2.Clear();
                    bool hasNorm = false;
                    foreach (KeywordReferent kw in li)
                    {
                        string s = kw.GetStringValue(KeywordReferent.ATTR_VALUE);
                        if (tmp.Length > 0)
                        {
                            tmp.Append(' ');
                        }
                        tmp.Append(s);
                        string n = kw.GetStringValue(KeywordReferent.ATTR_NORMAL);
                        if (n != null)
                        {
                            hasNorm = true;
                            tmp2.Add(n);
                        }
                        else
                        {
                            tmp2.Add(s);
                        }
                        kref.AddSlot(KeywordReferent.ATTR_REF, kw, false, 0);
                    }
                    string val = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false);
                    kref.AddSlot(KeywordReferent.ATTR_VALUE, val, false, 0);
                    tmp.Length = 0;
                    tmp2.Sort();
                    foreach (string s in tmp2)
                    {
                        if (tmp.Length > 0)
                        {
                            tmp.Append(' ');
                        }
                        tmp.Append(s);
                    }
                    string norm = tmp.ToString();
                    if (norm != val)
                    {
                        kref.AddSlot(KeywordReferent.ATTR_NORMAL, norm, false, 0);
                    }
                    kref = ad.RegisterReferent(kref) as KeywordReferent;
                    _setRank(kref, cur, max);
                    Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(kref, t0, t)
                    {
                        Morph = npt.Morph
                    };
                    kit.EmbedToken(rt1);
                    t = rt1;
                }
            }
            cur = 0;
            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next, cur++)
            {
                KeywordReferent kw = t.GetReferent() as KeywordReferent;
                if (kw == null || kw.Typ != KeywordType.Object)
                {
                    continue;
                }
                if (t.Next == null || kw.ChildWords > 2)
                {
                    continue;
                }
                Pullenti.Ner.Token t1 = t.Next;
                if (t1.IsValue("OF", null) && (t1.WhitespacesAfterCount < 3) && t1.Next != null)
                {
                    t1 = t1.Next;
                    if ((t1 is Pullenti.Ner.TextToken) && Pullenti.Ner.Core.MiscHelper.IsEngArticle(t1) && t1.Next != null)
                    {
                        t1 = t1.Next;
                    }
                }
                else if (!t1.Morph.Case.IsGenitive || t.WhitespacesAfterCount > 1)
                {
                    continue;
                }
                KeywordReferent kw2 = t1.GetReferent() as KeywordReferent;
                if (kw2 == null)
                {
                    continue;
                }
                if (kw == kw2)
                {
                    continue;
                }
                if (kw2.Typ != KeywordType.Object || (kw.ChildWords + kw2.ChildWords) > 3)
                {
                    continue;
                }
                KeywordReferent kwUn = new KeywordReferent();
                kwUn.Union(kw, kw2, Pullenti.Ner.Core.MiscHelper.GetTextValue(t1, t1, Pullenti.Ner.Core.GetTextAttr.No));
                kwUn = ad.RegisterReferent(kwUn) as KeywordReferent;
                _setRank(kwUn, cur, max);
                Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(kwUn, t, t1)
                {
                    Morph = t.Morph
                };
                kit.EmbedToken(rt1);
                t = rt1;
            }
            if (SortKeywordsByRank)
            {
                List <Pullenti.Ner.Referent> all = new List <Pullenti.Ner.Referent>(ad.Referents);
                all.Sort(new CompByRank());
                ad.Referents = all;
            }
            if (AnnotationMaxSentences > 0)
            {
                KeywordReferent ano = Pullenti.Ner.Keyword.Internal.AutoannoSentToken.CreateAnnotation(kit, AnnotationMaxSentences);
                if (ano != null)
                {
                    ad.RegisterReferent(ano);
                }
            }
        }
예제 #22
0
        public static UnitToken TryParse(Pullenti.Ner.Token t, Pullenti.Ner.Core.TerminCollection addUnits, UnitToken prev, bool parseUnknownUnits = false)
        {
            if (t == null)
            {
                return(null);
            }
            Pullenti.Ner.Token t0 = t;
            int  pow   = 1;
            bool isNeg = false;

            if ((t.IsCharOf("\\/") || t.IsValue("НА", null) || t.IsValue("OF", null)) || t.IsValue("PER", null))
            {
                isNeg = true;
                t     = t.Next;
            }
            else if (t.IsValue("В", null) && prev != null)
            {
                isNeg = true;
                t     = t.Next;
            }
            else if (MeasureHelper.IsMultChar(t))
            {
                t = t.Next;
            }
            Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
            if (tt == null)
            {
                return(null);
            }
            if (tt.Term == "КВ" || tt.Term == "КВАДР" || tt.IsValue("КВАДРАТНЫЙ", null))
            {
                pow = 2;
                tt  = tt.Next as Pullenti.Ner.TextToken;
                if (tt != null && tt.IsChar('.'))
                {
                    tt = tt.Next as Pullenti.Ner.TextToken;
                }
                if (tt == null)
                {
                    return(null);
                }
            }
            else if (tt.Term == "КУБ" || tt.Term == "КУБИЧ" || tt.IsValue("КУБИЧЕСКИЙ", null))
            {
                pow = 3;
                tt  = tt.Next as Pullenti.Ner.TextToken;
                if (tt != null && tt.IsChar('.'))
                {
                    tt = tt.Next as Pullenti.Ner.TextToken;
                }
                if (tt == null)
                {
                    return(null);
                }
            }
            else if (tt.Term == "µ")
            {
                UnitToken res = TryParse(tt.Next, addUnits, prev, false);
                if (res != null)
                {
                    foreach (Unit u in UnitsHelper.Units)
                    {
                        if (u.Factor == UnitsFactors.Micro && string.Compare("мк" + u.NameCyr, res.Unit.NameCyr, true) == 0)
                        {
                            res.Unit       = u;
                            res.BeginToken = tt;
                            res.Pow        = pow;
                            if (isNeg)
                            {
                                res.Pow = -pow;
                            }
                            return(res);
                        }
                    }
                }
            }
            List <Pullenti.Ner.Core.TerminToken> toks = UnitsHelper.Termins.TryParseAll(tt, Pullenti.Ner.Core.TerminParseAttr.No);

            if (toks != null)
            {
                if ((prev != null && tt == t0 && toks.Count == 1) && t.IsWhitespaceBefore)
                {
                    return(null);
                }
                if (toks[0].BeginToken == toks[0].EndToken && tt.Morph.Class.IsPreposition && (tt.WhitespacesAfterCount < 3))
                {
                    if (Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition, 0, null) != null)
                    {
                        return(null);
                    }
                    if (tt.Next is Pullenti.Ner.NumberToken)
                    {
                        if ((tt.Next as Pullenti.Ner.NumberToken).Typ != Pullenti.Ner.NumberSpellingType.Digit)
                        {
                            return(null);
                        }
                    }
                    UnitToken nex = TryParse(tt.Next, addUnits, null, false);
                    if (nex != null)
                    {
                        return(null);
                    }
                }
                if (toks[0].BeginToken == toks[0].EndToken && ((toks[0].BeginToken.IsValue("М", null) || toks[0].BeginToken.IsValue("M", null))) && toks[0].BeginToken.Chars.IsAllLower)
                {
                    if (prev != null && prev.Unit != null && prev.Unit.Kind == Pullenti.Ner.Measure.MeasureKind.Length)
                    {
                        UnitToken res = new UnitToken(t0, toks[0].EndToken)
                        {
                            Unit = UnitsHelper.uMinute
                        };
                        res.Pow = pow;
                        if (isNeg)
                        {
                            res.Pow = -pow;
                        }
                        return(res);
                    }
                }
                List <UnitToken> uts = new List <UnitToken>();
                foreach (Pullenti.Ner.Core.TerminToken tok in toks)
                {
                    UnitToken res = new UnitToken(t0, tok.EndToken)
                    {
                        Unit = tok.Termin.Tag as Unit
                    };
                    res.Pow = pow;
                    if (isNeg)
                    {
                        res.Pow = -pow;
                    }
                    if (res.Unit.BaseMultiplier == 1000000 && (t0 is Pullenti.Ner.TextToken) && char.IsLower((t0 as Pullenti.Ner.TextToken).GetSourceText()[0]))
                    {
                        foreach (Unit u in UnitsHelper.Units)
                        {
                            if (u.Factor == UnitsFactors.Milli && string.Compare(u.NameCyr, res.Unit.NameCyr, true) == 0)
                            {
                                res.Unit = u;
                                break;
                            }
                        }
                    }
                    res._correct();
                    res._checkDoubt();
                    uts.Add(res);
                }
                int       max  = 0;
                UnitToken best = null;
                foreach (UnitToken ut in uts)
                {
                    if (ut.Keyword != null)
                    {
                        if (ut.Keyword.BeginChar >= max)
                        {
                            max  = ut.Keyword.BeginChar;
                            best = ut;
                        }
                    }
                }
                if (best != null)
                {
                    return(best);
                }
                foreach (UnitToken ut in uts)
                {
                    if (!ut.IsDoubt)
                    {
                        return(ut);
                    }
                }
                return(uts[0]);
            }
            Pullenti.Ner.Token t1 = null;
            if (t.IsCharOf("º°"))
            {
                t1 = t;
            }
            else if ((t.IsChar('<') && t.Next != null && t.Next.Next != null) && t.Next.Next.IsChar('>') && ((t.Next.IsValue("О", null) || t.Next.IsValue("O", null) || (((t.Next is Pullenti.Ner.NumberToken) && (t.Next as Pullenti.Ner.NumberToken).Value == "0")))))
            {
                t1 = t.Next.Next;
            }
            if (t1 != null)
            {
                UnitToken res = new UnitToken(t0, t1)
                {
                    Unit = UnitsHelper.uGradus
                };
                res._checkDoubt();
                t = t1.Next;
                if (t != null && t.IsComma)
                {
                    t = t.Next;
                }
                if (t != null && t.IsValue("ПО", null))
                {
                    t = t.Next;
                }
                if (t is Pullenti.Ner.TextToken)
                {
                    string vv = (t as Pullenti.Ner.TextToken).Term;
                    if (vv == "C" || vv == "С" || vv.StartsWith("ЦЕЛЬС"))
                    {
                        res.Unit     = UnitsHelper.uGradusC;
                        res.IsDoubt  = false;
                        res.EndToken = t;
                    }
                    if (vv == "F" || vv.StartsWith("ФАР"))
                    {
                        res.Unit     = UnitsHelper.uGradusF;
                        res.IsDoubt  = false;
                        res.EndToken = t;
                    }
                }
                return(res);
            }
            if ((t is Pullenti.Ner.TextToken) && ((t.IsValue("ОС", null) || t.IsValue("OC", null))))
            {
                string str = t.GetSourceText();
                if (str == "оС" || str == "oC")
                {
                    UnitToken res = new UnitToken(t, t)
                    {
                        Unit = UnitsHelper.uGradusC, IsDoubt = false
                    };
                    return(res);
                }
            }
            if (t.IsChar('%'))
            {
                Pullenti.Ner.Token tt1 = t.Next;
                if (tt1 != null && tt1.IsChar('('))
                {
                    tt1 = tt1.Next;
                }
                if ((tt1 is Pullenti.Ner.TextToken) && (tt1 as Pullenti.Ner.TextToken).Term.StartsWith("ОБ"))
                {
                    UnitToken re = new UnitToken(t, tt1)
                    {
                        Unit = UnitsHelper.uAlco
                    };
                    if (re.EndToken.Next != null && re.EndToken.Next.IsChar('.'))
                    {
                        re.EndToken = re.EndToken.Next;
                    }
                    if (re.EndToken.Next != null && re.EndToken.Next.IsChar(')') && t.Next.IsChar('('))
                    {
                        re.EndToken = re.EndToken.Next;
                    }
                    return(re);
                }
                return(new UnitToken(t, t)
                {
                    Unit = UnitsHelper.uPercent
                });
            }
            if (addUnits != null)
            {
                Pullenti.Ner.Core.TerminToken tok = addUnits.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
                if (tok != null)
                {
                    UnitToken res = new UnitToken(t0, tok.EndToken)
                    {
                        ExtOnto = tok.Termin.Tag as Pullenti.Ner.Measure.UnitReferent
                    };
                    if (tok.EndToken.Next != null && tok.EndToken.Next.IsChar('.'))
                    {
                        tok.EndToken = tok.EndToken.Next;
                    }
                    res.Pow = pow;
                    if (isNeg)
                    {
                        res.Pow = -pow;
                    }
                    res._correct();
                    return(res);
                }
            }
            if (!parseUnknownUnits)
            {
                return(null);
            }
            if ((t.WhitespacesBeforeCount > 2 || !t.Chars.IsLetter || t.LengthChar > 5) || !(t is Pullenti.Ner.TextToken))
            {
                return(null);
            }
            if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t))
            {
                return(null);
            }
            t1 = t;
            if (t.Next != null && t.Next.IsChar('.'))
            {
                t1 = t;
            }
            bool ok = false;

            if (t1.Next == null || t1.WhitespacesAfterCount > 2)
            {
                ok = true;
            }
            else if (t1.Next.IsComma || t1.Next.IsCharOf("\\/") || t1.Next.IsTableControlChar)
            {
                ok = true;
            }
            else if (MeasureHelper.IsMultChar(t1.Next))
            {
                ok = true;
            }
            if (!ok)
            {
                return(null);
            }
            Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
            if (mc.IsUndefined)
            {
            }
            else if (t.LengthChar > 7)
            {
                return(null);
            }
            UnitToken res1 = new UnitToken(t0, t1)
            {
                Pow = pow, IsDoubt = true
            };

            res1.UnknownName = (t as Pullenti.Ner.TextToken).GetSourceText();
            res1._correct();
            return(res1);
        }
예제 #23
0
        static Pullenti.Ner.ReferentToken TryParseThesis(Pullenti.Ner.Token t)
        {
            if (t == null)
            {
                return(null);
            }
            Pullenti.Ner.Token        t0     = t;
            Pullenti.Ner.Token        tt     = t;
            Pullenti.Morph.MorphClass mc     = tt.GetMorphClassInDictionary();
            Pullenti.Ner.MetaToken    preamb = null;
            if (mc.IsConjunction)
            {
                return(null);
            }
            if (t.IsValue("LET", null))
            {
                return(null);
            }
            if (mc.IsPreposition || mc.IsMisc || mc.IsAdverb)
            {
                if (!Pullenti.Ner.Core.MiscHelper.IsEngArticle(tt))
                {
                    for (tt = tt.Next; tt != null; tt = tt.Next)
                    {
                        if (tt.IsComma)
                        {
                            break;
                        }
                        if (tt.IsChar('('))
                        {
                            Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(tt, Pullenti.Ner.Core.BracketParseAttr.No, 100);
                            if (br != null)
                            {
                                tt = br.EndToken;
                                continue;
                            }
                        }
                        if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(tt))
                        {
                            break;
                        }
                        Pullenti.Ner.Core.NounPhraseToken npt0 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.ParseNumericAsAdjective | Pullenti.Ner.Core.NounPhraseParseAttr.ReferentCanBeNoun, 0, null);
                        if (npt0 != null)
                        {
                            tt = npt0.EndToken;
                            continue;
                        }
                        if (tt.GetMorphClassInDictionary().IsVerb)
                        {
                            break;
                        }
                    }
                    if (tt == null || !tt.IsComma || tt.Next == null)
                    {
                        return(null);
                    }
                    preamb = new Pullenti.Ner.MetaToken(t0, tt.Previous);
                    tt     = tt.Next;
                }
            }
            Pullenti.Ner.Token t1 = tt;
            mc = tt.GetMorphClassInDictionary();
            Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.ParseNumericAsAdjective | Pullenti.Ner.Core.NounPhraseParseAttr.ReferentCanBeNoun | Pullenti.Ner.Core.NounPhraseParseAttr.ParseAdverbs, 0, null);
            if (npt == null && (tt is Pullenti.Ner.TextToken))
            {
                if (tt.Chars.IsAllUpper)
                {
                    npt = new Pullenti.Ner.Core.NounPhraseToken(tt, tt);
                }
                else if (!tt.Chars.IsAllLower)
                {
                    if (mc.IsProper || preamb != null)
                    {
                        npt = new Pullenti.Ner.Core.NounPhraseToken(tt, tt);
                    }
                }
            }
            if (npt == null)
            {
                return(null);
            }
            if (mc.IsPersonalPronoun)
            {
                return(null);
            }
            Pullenti.Ner.Token t2 = npt.EndToken.Next;
            if (t2 == null || Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t2) || !(t2 is Pullenti.Ner.TextToken))
            {
                return(null);
            }
            if (!t2.GetMorphClassInDictionary().IsVerb)
            {
                return(null);
            }
            Pullenti.Ner.Token t3 = t2;
            for (tt = t2.Next; tt != null; tt = tt.Next)
            {
                if (!tt.GetMorphClassInDictionary().IsVerb)
                {
                    break;
                }
            }
            for (; tt != null; tt = tt.Next)
            {
                if (tt.Next == null)
                {
                    t3 = tt;
                    break;
                }
                if (tt.IsCharOf(".;!?"))
                {
                    if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(tt.Next))
                    {
                        t3 = tt;
                        break;
                    }
                }
                if (!(tt is Pullenti.Ner.TextToken))
                {
                    continue;
                }
                if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(tt, false, false))
                {
                    Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(tt, Pullenti.Ner.Core.BracketParseAttr.No, 100);
                    if (br != null)
                    {
                        tt = br.EndToken;
                        continue;
                    }
                }
            }
            tt = t3;
            if (t3.IsCharOf(";.!?"))
            {
                tt = tt.Previous;
            }
            string txt = Pullenti.Ner.Core.MiscHelper.GetTextValue(t2, tt, Pullenti.Ner.Core.GetTextAttr.KeepRegister | Pullenti.Ner.Core.GetTextAttr.KeepQuotes);

            if (txt == null || (txt.Length < 15))
            {
                return(null);
            }
            if (t0 != t1)
            {
                tt = t1.Previous;
                if (tt.IsComma)
                {
                    tt = tt.Previous;
                }
                string txt0 = Pullenti.Ner.Core.MiscHelper.GetTextValue(t0, tt, Pullenti.Ner.Core.GetTextAttr.KeepRegister | Pullenti.Ner.Core.GetTextAttr.KeepQuotes);
                if (txt0 != null && txt0.Length > 10)
                {
                    if (t0.Chars.IsCapitalUpper)
                    {
                        txt0 = char.ToLower(txt0[0]) + txt0.Substring(1);
                    }
                    txt = string.Format("{0}, {1}", txt, txt0);
                }
            }
            tt = t1;
            if (Pullenti.Ner.Core.MiscHelper.IsEngArticle(tt))
            {
                tt = tt.Next;
            }
            string nam = Pullenti.Ner.Core.MiscHelper.GetTextValue(tt, t2.Previous, Pullenti.Ner.Core.GetTextAttr.KeepQuotes);

            if (nam.StartsWith("SO-CALLED"))
            {
                nam = nam.Substring(9).Trim();
            }
            Pullenti.Ner.Definition.DefinitionReferent dr = new Pullenti.Ner.Definition.DefinitionReferent();
            dr.Kind = Pullenti.Ner.Definition.DefinitionKind.Assertation;
            dr.AddSlot(Pullenti.Ner.Definition.DefinitionReferent.ATTR_TERMIN, nam, false, 0);
            dr.AddSlot(Pullenti.Ner.Definition.DefinitionReferent.ATTR_VALUE, txt, false, 0);
            return(new Pullenti.Ner.ReferentToken(dr, t0, t3));
        }