Beispiel #1
0
 public static bool TryAccordAdjAndNoun(NounPhraseItem adj, NounPhraseItem noun)
 {
     foreach (NounPhraseItemTextVar v in adj.AdjMorph)
     {
         foreach (NounPhraseItemTextVar vv in noun.NounMorph)
         {
             if (v.CheckAccord(vv, false, false))
             {
                 return(true);
             }
         }
     }
     return(false);
 }
Beispiel #2
0
        public static NounPhraseItem TryParse(Pullenti.Ner.Token t, List <NounPhraseItem> items, Pullenti.Ner.Core.NounPhraseParseAttr attrs)
        {
            if (t == null)
            {
                return(null);
            }
            Pullenti.Ner.Token t0 = t;
            bool _canBeSurname    = false;
            bool _isDoubtAdj      = false;

            Pullenti.Ner.ReferentToken rt = t as Pullenti.Ner.ReferentToken;
            if (rt != null && rt.BeginToken == rt.EndToken && (rt.BeginToken is Pullenti.Ner.TextToken))
            {
                NounPhraseItem res = TryParse(rt.BeginToken, items, attrs);
                if (res != null)
                {
                    res.BeginToken = (res.EndToken = t);
                    res.CanBeNoun  = true;
                    return(res);
                }
            }
            if (rt != null)
            {
                NounPhraseItem res = new NounPhraseItem(t, t);
                foreach (Pullenti.Morph.MorphBaseInfo m in t.Morph.Items)
                {
                    NounPhraseItemTextVar v = new NounPhraseItemTextVar(m, null);
                    v.NormalValue = t.GetReferent().ToString();
                    res.NounMorph.Add(v);
                }
                res.CanBeNoun = true;
                return(res);
            }
            if (t is Pullenti.Ner.NumberToken)
            {
            }
            bool hasLegalVerb = false;

            if (t is Pullenti.Ner.TextToken)
            {
                if (!t.Chars.IsLetter)
                {
                    return(null);
                }
                string str = (t as Pullenti.Ner.TextToken).Term;
                if (str[str.Length - 1] == 'А' || str[str.Length - 1] == 'О')
                {
                    foreach (Pullenti.Morph.MorphBaseInfo wf in t.Morph.Items)
                    {
                        if ((wf is Pullenti.Morph.MorphWordForm) && (wf as Pullenti.Morph.MorphWordForm).IsInDictionary)
                        {
                            if (wf.Class.IsVerb)
                            {
                                Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
                                if (!mc.IsNoun && ((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.IgnoreParticiples)) == Pullenti.Ner.Core.NounPhraseParseAttr.No)
                                {
                                    if (!Pullenti.Morph.LanguageHelper.EndsWithEx(str, "ОГО", "ЕГО", null, null))
                                    {
                                        return(null);
                                    }
                                }
                                hasLegalVerb = true;
                            }
                            if (wf.Class.IsAdverb)
                            {
                                if (t.Next == null || !t.Next.IsHiphen)
                                {
                                    if ((str == "ВСЕГО" || str == "ДОМА" || str == "НЕСКОЛЬКО") || str == "МНОГО" || str == "ПОРЯДКА")
                                    {
                                    }
                                    else
                                    {
                                        return(null);
                                    }
                                }
                            }
                            if (wf.Class.IsAdjective)
                            {
                                if (wf.ContainsAttr("к.ф.", null))
                                {
                                    if (t.GetMorphClassInDictionary() == Pullenti.Morph.MorphClass.Adjective)
                                    {
                                    }
                                    else
                                    {
                                        _isDoubtAdj = true;
                                    }
                                }
                            }
                        }
                    }
                }
                Pullenti.Morph.MorphClass mc0 = t.Morph.Class;
                if (mc0.IsProperSurname && !t.Chars.IsAllLower)
                {
                    foreach (Pullenti.Morph.MorphBaseInfo wf in t.Morph.Items)
                    {
                        if (wf.Class.IsProperSurname && wf.Number != Pullenti.Morph.MorphNumber.Plural)
                        {
                            Pullenti.Morph.MorphWordForm wff = wf as Pullenti.Morph.MorphWordForm;
                            if (wff == null)
                            {
                                continue;
                            }
                            string s = ((wff.NormalFull ?? wff.NormalCase)) ?? "";
                            if (Pullenti.Morph.LanguageHelper.EndsWithEx(s, "ИН", "ЕН", "ЫН", null))
                            {
                                if (!wff.IsInDictionary)
                                {
                                    _canBeSurname = true;
                                }
                                else
                                {
                                    return(null);
                                }
                            }
                            if (wff.IsInDictionary && Pullenti.Morph.LanguageHelper.EndsWith(s, "ОВ"))
                            {
                                _canBeSurname = true;
                            }
                        }
                    }
                }
                if (mc0.IsProperName && !t.Chars.IsAllLower)
                {
                    foreach (Pullenti.Morph.MorphBaseInfo wff in t.Morph.Items)
                    {
                        Pullenti.Morph.MorphWordForm wf = wff as Pullenti.Morph.MorphWordForm;
                        if (wf == null)
                        {
                            continue;
                        }
                        if (wf.NormalCase == "ГОР")
                        {
                            continue;
                        }
                        if (wf.Class.IsProperName && wf.IsInDictionary)
                        {
                            if (wf.NormalCase == null || !wf.NormalCase.StartsWith("ЛЮБ"))
                            {
                                if (mc0.IsAdjective && t.Morph.ContainsAttr("неизм.", null))
                                {
                                }
                                else if (((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.ReferentCanBeNoun)) == Pullenti.Ner.Core.NounPhraseParseAttr.ReferentCanBeNoun)
                                {
                                }
                                else
                                {
                                    if (items == null || (items.Count < 1))
                                    {
                                        return(null);
                                    }
                                    if (!items[0].IsStdAdjective)
                                    {
                                        return(null);
                                    }
                                }
                            }
                        }
                    }
                }
                if (mc0.IsAdjective && t.Morph.ItemsCount == 1)
                {
                    if (t.Morph[0].ContainsAttr("в.ср.ст.", null))
                    {
                        return(null);
                    }
                }
                Pullenti.Morph.MorphClass mc1 = t.GetMorphClassInDictionary();
                if (mc1 == Pullenti.Morph.MorphClass.Verb && t.Morph.Case.IsUndefined)
                {
                    return(null);
                }
                if ((((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.IgnoreParticiples)) == Pullenti.Ner.Core.NounPhraseParseAttr.IgnoreParticiples && t.Morph.Class.IsVerb && !t.Morph.Class.IsNoun) && !t.Morph.Class.IsProper)
                {
                    foreach (Pullenti.Morph.MorphBaseInfo wf in t.Morph.Items)
                    {
                        if (wf.Class.IsVerb)
                        {
                            if (wf.ContainsAttr("дейст.з.", null))
                            {
                                if (Pullenti.Morph.LanguageHelper.EndsWith((t as Pullenti.Ner.TextToken).Term, "СЯ"))
                                {
                                }
                                else
                                {
                                    return(null);
                                }
                            }
                        }
                    }
                }
            }
            Pullenti.Ner.Token t1 = null;
            for (int k = 0; k < 2; k++)
            {
                t = t1 ?? t0;
                if (k == 0)
                {
                    if (((t0 is Pullenti.Ner.TextToken) && t0.Next != null && t0.Next.IsHiphen) && t0.Next.Next != null)
                    {
                        if (!t0.IsWhitespaceAfter && !t0.Morph.Class.IsPronoun && !(t0.Next.Next is Pullenti.Ner.NumberToken))
                        {
                            if (!t0.Next.IsWhitespaceAfter)
                            {
                                t = t0.Next.Next;
                            }
                            else if (t0.Next.Next.Chars.IsAllLower && Pullenti.Morph.LanguageHelper.EndsWith((t0 as Pullenti.Ner.TextToken).Term, "О"))
                            {
                                t = t0.Next.Next;
                            }
                        }
                    }
                }
                NounPhraseItem it = new NounPhraseItem(t0, t)
                {
                    CanBeSurname = _canBeSurname
                };
                if (t0 == t && (t0 is Pullenti.Ner.ReferentToken))
                {
                    it.CanBeNoun = true;
                    it.Morph     = new Pullenti.Ner.MorphCollection(t0.Morph);
                }
                bool canBePrepos = false;
                foreach (Pullenti.Morph.MorphBaseInfo v in t.Morph.Items)
                {
                    Pullenti.Morph.MorphWordForm wf = v as Pullenti.Morph.MorphWordForm;
                    if (v.Class.IsVerb && !v.Case.IsUndefined)
                    {
                        it.CanBeAdj = true;
                        it.AdjMorph.Add(new NounPhraseItemTextVar(v, t));
                        continue;
                    }
                    if (v.Class.IsPreposition)
                    {
                        canBePrepos = true;
                    }
                    if (v.Class.IsAdjective || ((v.Class.IsPronoun && !v.Class.IsPersonalPronoun && !v.ContainsAttr("неизм.", null))) || ((v.Class.IsNoun && (t is Pullenti.Ner.NumberToken))))
                    {
                        if (TryAccordVariant(items, (items == null ? 0 : items.Count), v, false))
                        {
                            bool isDoub = false;
                            if (v.ContainsAttr("к.ф.", null))
                            {
                                continue;
                            }
                            if (v.ContainsAttr("собир.", null) && !(t is Pullenti.Ner.NumberToken))
                            {
                                if (wf != null && wf.IsInDictionary)
                                {
                                    return(null);
                                }
                                continue;
                            }
                            if (v.ContainsAttr("сравн.", null))
                            {
                                continue;
                            }
                            bool ok = true;
                            if (t is Pullenti.Ner.TextToken)
                            {
                                string s = (t as Pullenti.Ner.TextToken).Term;
                                if (s == "ПРАВО" || s == "ПРАВА")
                                {
                                    ok = false;
                                }
                                else if (Pullenti.Morph.LanguageHelper.EndsWith(s, "ОВ") && t.GetMorphClassInDictionary().IsNoun)
                                {
                                    ok = false;
                                }
                            }
                            else if (t is Pullenti.Ner.NumberToken)
                            {
                                if (v.Class.IsNoun && t.Morph.Class.IsAdjective)
                                {
                                    ok = false;
                                }
                                else if (t.Morph.Class.IsNoun && ((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.ParseNumericAsAdjective)) == Pullenti.Ner.Core.NounPhraseParseAttr.No)
                                {
                                    ok = false;
                                }
                            }
                            if (ok)
                            {
                                it.AdjMorph.Add(new NounPhraseItemTextVar(v, t));
                                it.CanBeAdj = true;
                                if (_isDoubtAdj && t0 == t)
                                {
                                    it.IsDoubtAdjective = true;
                                }
                                if (hasLegalVerb && wf != null && wf.IsInDictionary)
                                {
                                    it.CanBeNoun = true;
                                }
                                if (wf != null && wf.Class.IsPronoun)
                                {
                                    it.CanBeNoun = true;
                                    it.NounMorph.Add(new NounPhraseItemTextVar(v, t));
                                }
                            }
                        }
                    }
                    bool canBeNoun = false;
                    if (t is Pullenti.Ner.NumberToken)
                    {
                    }
                    else if (v.Class.IsNoun || ((wf != null && wf.NormalCase == "САМ")))
                    {
                        canBeNoun = true;
                    }
                    else if (v.Class.IsPersonalPronoun)
                    {
                        if (items == null || items.Count == 0)
                        {
                            canBeNoun = true;
                        }
                        else
                        {
                            foreach (NounPhraseItem it1 in items)
                            {
                                if (it1.IsVerb)
                                {
                                    if (items.Count == 1 && !v.Case.IsNominative)
                                    {
                                        canBeNoun = true;
                                    }
                                    else
                                    {
                                        return(null);
                                    }
                                }
                            }
                            if (items.Count == 1)
                            {
                                if (items[0].CanBeAdjForPersonalPronoun)
                                {
                                    canBeNoun = true;
                                }
                            }
                        }
                    }
                    else if ((v.Class.IsPronoun && ((items == null || items.Count == 0 || ((items.Count == 1 && items[0].CanBeAdjForPersonalPronoun)))) && wf != null) && (((((wf.NormalCase == "ТОТ" || wf.NormalFull == "ТО" || wf.NormalCase == "ТО") || wf.NormalCase == "ЭТО" || wf.NormalCase == "ВСЕ") || wf.NormalCase == "ЧТО" || wf.NormalCase == "КТО") || wf.NormalFull == "КОТОРЫЙ" || wf.NormalCase == "КОТОРЫЙ")))
                    {
                        if (wf.NormalCase == "ВСЕ")
                        {
                            if (t.Next != null && t.Next.IsValue("РАВНО", null))
                            {
                                return(null);
                            }
                        }
                        canBeNoun = true;
                    }
                    else if (wf != null && ((wf.NormalFull ?? wf.NormalCase)) == "КОТОРЫЙ" && ((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.ParsePronouns)) == Pullenti.Ner.Core.NounPhraseParseAttr.No)
                    {
                        return(null);
                    }
                    else if (v.Class.IsProper && (t is Pullenti.Ner.TextToken))
                    {
                        if (t.LengthChar > 4 || v.Class.IsProperName)
                        {
                            canBeNoun = true;
                        }
                    }
                    if (canBeNoun)
                    {
                        bool added = false;
                        if (items != null && items.Count > 1 && ((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.MultiNouns)) != Pullenti.Ner.Core.NounPhraseParseAttr.No)
                        {
                            bool ok1 = true;
                            for (int ii = 1; ii < items.Count; ii++)
                            {
                                if (!items[ii].ConjBefore)
                                {
                                    ok1 = false;
                                    break;
                                }
                            }
                            if (ok1)
                            {
                                if (TryAccordVariant(items, (items == null ? 0 : items.Count), v, true))
                                {
                                    it.NounMorph.Add(new NounPhraseItemTextVar(v, t));
                                    it.CanBeNoun  = true;
                                    it.MultiNouns = true;
                                    added         = true;
                                }
                            }
                        }
                        if (!added)
                        {
                            if (TryAccordVariant(items, (items == null ? 0 : items.Count), v, false))
                            {
                                it.NounMorph.Add(new NounPhraseItemTextVar(v, t));
                                it.CanBeNoun = true;
                                if (v.Class.IsPersonalPronoun && t.Morph.ContainsAttr("неизм.", null) && !it.CanBeAdj)
                                {
                                    NounPhraseItemTextVar itt = new NounPhraseItemTextVar(v, t);
                                    itt.Case   = Pullenti.Morph.MorphCase.AllCases;
                                    itt.Number = Pullenti.Morph.MorphNumber.Undefined;
                                    if (itt.NormalValue == null)
                                    {
                                    }
                                    it.AdjMorph.Add(itt);
                                    it.CanBeAdj = true;
                                }
                            }
                            else if ((items.Count > 0 && items[0].AdjMorph.Count > 0 && items[0].AdjMorph[0].Number == Pullenti.Morph.MorphNumber.Plural) && !((items[0].AdjMorph[0].Case & v.Case)).IsUndefined && !items[0].AdjMorph[0].Class.IsVerb)
                            {
                                if (t.Next != null && t.Next.IsCommaAnd && (t.Next.Next is Pullenti.Ner.TextToken))
                                {
                                    Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next.Next, attrs, 0, null);
                                    if (npt2 != null && npt2.Preposition == null && !((npt2.Morph.Case & v.Case & items[0].AdjMorph[0].Case)).IsUndefined)
                                    {
                                        it.NounMorph.Add(new NounPhraseItemTextVar(v, t));
                                        it.CanBeNoun = true;
                                    }
                                }
                            }
                        }
                    }
                }
                if (t0 != t)
                {
                    foreach (NounPhraseItemTextVar v in it.AdjMorph)
                    {
                        v.CorrectPrefix(t0 as Pullenti.Ner.TextToken, false);
                    }
                    foreach (NounPhraseItemTextVar v in it.NounMorph)
                    {
                        v.CorrectPrefix(t0 as Pullenti.Ner.TextToken, true);
                    }
                }
                if (k == 1 && it.CanBeNoun && !it.CanBeAdj)
                {
                    if (t1 != null)
                    {
                        it.EndToken = t1;
                    }
                    else
                    {
                        it.EndToken = t0.Next.Next;
                    }
                    foreach (NounPhraseItemTextVar v in it.NounMorph)
                    {
                        if (v.NormalValue != null && (v.NormalValue.IndexOf('-') < 0))
                        {
                            v.NormalValue = string.Format("{0}-{1}", v.NormalValue, it.EndToken.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false));
                        }
                    }
                }
                if (it.CanBeAdj)
                {
                    if (m_StdAdjectives.TryParse(it.BeginToken, Pullenti.Ner.Core.TerminParseAttr.No) != null)
                    {
                        it.IsStdAdjective = true;
                    }
                }
                if (canBePrepos && it.CanBeNoun)
                {
                    if (items != null && items.Count > 0)
                    {
                        Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition | Pullenti.Ner.Core.NounPhraseParseAttr.ParsePronouns | Pullenti.Ner.Core.NounPhraseParseAttr.ParseVerbs, 0, null);
                        if (npt1 != null && npt1.EndChar > t.EndChar)
                        {
                            return(null);
                        }
                    }
                    else
                    {
                        Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePronouns | Pullenti.Ner.Core.NounPhraseParseAttr.ParseVerbs, 0, null);
                        if (npt1 != null)
                        {
                            Pullenti.Morph.MorphCase mc = Pullenti.Morph.LanguageHelper.GetCaseAfterPreposition((t as Pullenti.Ner.TextToken).Lemma);
                            if (!((mc & npt1.Morph.Case)).IsUndefined)
                            {
                                return(null);
                            }
                        }
                    }
                }
                if (it.CanBeNoun || it.CanBeAdj || k == 1)
                {
                    if (it.BeginToken.Morph.Class.IsPronoun)
                    {
                        Pullenti.Ner.Token tt2 = it.EndToken.Next;
                        if ((tt2 != null && tt2.IsHiphen && !tt2.IsWhitespaceAfter) && !tt2.IsWhitespaceBefore)
                        {
                            tt2 = tt2.Next;
                        }
                        if (tt2 is Pullenti.Ner.TextToken)
                        {
                            string ss = (tt2 as Pullenti.Ner.TextToken).Term;
                            if ((ss == "ЖЕ" || ss == "БЫ" || ss == "ЛИ") || ss == "Ж")
                            {
                                it.EndToken = tt2;
                            }
                            else if (ss == "НИБУДЬ" || ss == "ЛИБО" || (((ss == "ТО" && tt2.Previous.IsHiphen)) && it.CanBeAdj))
                            {
                                it.EndToken = tt2;
                                foreach (NounPhraseItemTextVar m in it.AdjMorph)
                                {
                                    m.NormalValue = string.Format("{0}-{1}", m.NormalValue, ss);
                                    if (m.SingleNumberValue != null)
                                    {
                                        m.SingleNumberValue = string.Format("{0}-{1}", m.SingleNumberValue, ss);
                                    }
                                }
                            }
                        }
                    }
                    return(it);
                }
                if (t0 == t)
                {
                    if (t0.IsValue("БИЗНЕС", null) && t0.Next != null && t0.Next.Chars == t0.Chars)
                    {
                        t1 = t0.Next;
                        continue;
                    }
                    return(it);
                }
            }
            return(null);
        }