Exemplo n.º 1
0
        static VerbPhraseToken TryParseRu(Pullenti.Ner.Token t, bool canBePartition, bool canBeAdjPartition, bool forceParse)
        {
            VerbPhraseToken res = null;

            Pullenti.Ner.Token t0         = t;
            Pullenti.Ner.Token not        = null;
            bool             hasVerb      = false;
            bool             verbBeBefore = false;
            PrepositionToken prep         = null;

            for (; t != null; t = t.Next)
            {
                if (!(t is Pullenti.Ner.TextToken))
                {
                    break;
                }
                Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
                bool isParticiple         = false;
                if (tt.Term == "НЕ")
                {
                    not = t;
                    continue;
                }
                int    ty   = 0;
                string norm = null;
                Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary();
                if (tt.Term == "НЕТ")
                {
                    if (hasVerb)
                    {
                        break;
                    }
                    ty = 1;
                }
                else if (tt.Term == "ДОПУСТИМО")
                {
                    ty = 3;
                }
                else if (mc.IsAdverb && !mc.IsVerb)
                {
                    ty = 2;
                }
                else if (tt.IsPureVerb || tt.IsVerbBe)
                {
                    ty = 1;
                    if (hasVerb)
                    {
                        if (!tt.Morph.ContainsAttr("инф.", null))
                        {
                            if (verbBeBefore)
                            {
                            }
                            else
                            {
                                break;
                            }
                        }
                    }
                }
                else if (mc.IsVerb)
                {
                    if (mc.IsPreposition || mc.IsMisc || mc.IsPronoun)
                    {
                    }
                    else if (mc.IsNoun)
                    {
                        if (tt.Term == "СТАЛИ" || tt.Term == "СТЕКЛО" || tt.Term == "БЫЛИ")
                        {
                            ty = 1;
                        }
                        else if (!tt.Chars.IsAllLower && !MiscHelper.CanBeStartOfSentence(tt))
                        {
                            ty = 1;
                        }
                        else if (mc.IsAdjective && canBePartition)
                        {
                            ty = 1;
                        }
                        else if (forceParse)
                        {
                            ty = 1;
                        }
                    }
                    else if (mc.IsProper)
                    {
                        if (tt.Chars.IsAllLower)
                        {
                            ty = 1;
                        }
                    }
                    else
                    {
                        ty = 1;
                    }
                    if (mc.IsAdjective)
                    {
                        isParticiple = true;
                    }
                    if (!tt.Morph.Case.IsUndefined)
                    {
                        isParticiple = true;
                    }
                    if (!canBePartition && isParticiple)
                    {
                        break;
                    }
                    if (hasVerb)
                    {
                        if (tt.Morph.ContainsAttr("инф.", null))
                        {
                        }
                        else if (!isParticiple)
                        {
                        }
                        else
                        {
                            break;
                        }
                    }
                }
                else if ((mc.IsAdjective && tt.Morph.ContainsAttr("к.ф.", null) && tt.Term.EndsWith("О")) && NounPhraseHelper.TryParse(tt, NounPhraseParseAttr.No, 0, null) == null)
                {
                    ty = 2;
                }
                else if (mc.IsAdjective && ((canBePartition || canBeAdjPartition)))
                {
                    if (tt.Morph.ContainsAttr("к.ф.", null) && !canBeAdjPartition)
                    {
                        break;
                    }
                    norm = tt.GetNormalCaseText(Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Masculine, false);
                    if (norm.EndsWith("ЙШИЙ"))
                    {
                    }
                    else
                    {
                        List <Pullenti.Semantic.Utils.DerivateGroup> grs = Pullenti.Semantic.Utils.DerivateService.FindDerivates(norm, true, null);
                        if (grs != null && grs.Count > 0)
                        {
                            bool hVerb = false;
                            bool hPart = false;
                            foreach (Pullenti.Semantic.Utils.DerivateGroup gr in grs)
                            {
                                foreach (Pullenti.Semantic.Utils.DerivateWord w in gr.Words)
                                {
                                    if (w.Class.IsAdjective && w.Class.IsVerb)
                                    {
                                        if (w.Spelling == norm)
                                        {
                                            hPart = true;
                                        }
                                    }
                                    else if (w.Class.IsVerb)
                                    {
                                        hVerb = true;
                                    }
                                }
                            }
                            if (hPart && hVerb)
                            {
                                ty = 3;
                            }
                            else if (canBeAdjPartition)
                            {
                                ty = 3;
                            }
                            if (ty != 3 && !string.IsNullOrEmpty(grs[0].Prefix) && norm.StartsWith(grs[0].Prefix))
                            {
                                hVerb = false;
                                hPart = false;
                                string norm1 = norm.Substring(grs[0].Prefix.Length);
                                grs = Pullenti.Semantic.Utils.DerivateService.FindDerivates(norm1, true, null);
                                if (grs != null && grs.Count > 0)
                                {
                                    foreach (Pullenti.Semantic.Utils.DerivateGroup gr in grs)
                                    {
                                        foreach (Pullenti.Semantic.Utils.DerivateWord w in gr.Words)
                                        {
                                            if (w.Class.IsAdjective && w.Class.IsVerb)
                                            {
                                                if (w.Spelling == norm1)
                                                {
                                                    hPart = true;
                                                }
                                            }
                                            else if (w.Class.IsVerb)
                                            {
                                                hVerb = true;
                                            }
                                        }
                                    }
                                }
                                if (hPart && hVerb)
                                {
                                    ty = 3;
                                }
                            }
                        }
                    }
                }
                if (ty == 0 && t == t0 && canBePartition)
                {
                    prep = PrepositionHelper.TryParse(t);
                    if (prep != null)
                    {
                        t = prep.EndToken;
                        continue;
                    }
                }
                if (ty == 0)
                {
                    break;
                }
                if (res == null)
                {
                    res = new VerbPhraseToken(t0, t);
                }
                res.EndToken = t;
                VerbPhraseItemToken it = new VerbPhraseItemToken(t, t)
                {
                    Morph = new Pullenti.Ner.MorphCollection(t.Morph)
                };
                if (not != null)
                {
                    it.BeginToken = not;
                    it.Not        = true;
                    not           = null;
                }
                it.IsAdverb = ty == 2;
                if (prep != null && !t.Morph.Case.IsUndefined && res.Items.Count == 0)
                {
                    if (((prep.NextCase & t.Morph.Case)).IsUndefined)
                    {
                        return(null);
                    }
                    it.Morph.RemoveItems(prep.NextCase);
                    res.Preposition = prep;
                }
                if (norm == null)
                {
                    norm = t.GetNormalCaseText((ty == 3 ? Pullenti.Morph.MorphClass.Adjective : (ty == 2 ? Pullenti.Morph.MorphClass.Adverb : Pullenti.Morph.MorphClass.Verb)), Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Masculine, false);
                    if (ty == 1 && !tt.Morph.Case.IsUndefined)
                    {
                        Pullenti.Morph.MorphWordForm mi = new Pullenti.Morph.MorphWordForm()
                        {
                            Case = Pullenti.Morph.MorphCase.Nominative, Number = Pullenti.Morph.MorphNumber.Singular, Gender = Pullenti.Morph.MorphGender.Masculine
                        };
                        foreach (Pullenti.Morph.MorphBaseInfo mit in tt.Morph.Items)
                        {
                            if (mit is Pullenti.Morph.MorphWordForm)
                            {
                                mi.Misc = (mit as Pullenti.Morph.MorphWordForm).Misc;
                                break;
                            }
                        }
                        string nnn = Pullenti.Morph.MorphologyService.GetWordform("КК" + (t as Pullenti.Ner.TextToken).Term, mi);
                        if (nnn != null)
                        {
                            norm = nnn.Substring(2);
                        }
                    }
                }
                it.Normal = norm;
                res.Items.Add(it);
                if (!hasVerb && ((ty == 1 || ty == 3)))
                {
                    res.Morph = it.Morph;
                    hasVerb   = true;
                }
                if (ty == 1 || ty == 3)
                {
                    if (ty == 1 && tt.IsVerbBe)
                    {
                        verbBeBefore = true;
                    }
                    else
                    {
                        verbBeBefore = false;
                    }
                }
            }
            if (!hasVerb)
            {
                return(null);
            }
            for (int i = res.Items.Count - 1; i > 0; i--)
            {
                if (res.Items[i].IsAdverb)
                {
                    res.Items.RemoveAt(i);
                    res.EndToken = res.Items[i - 1].EndToken;
                }
                else
                {
                    break;
                }
            }
            return(res);
        }
Exemplo n.º 2
0
        static string GetNameWithoutBrackets(Pullenti.Ner.Token begin, Pullenti.Ner.Token end, bool normalizeFirstNounGroup = false, bool normalFirstGroupSingle = false, bool ignoreGeoReferent = false)
        {
            string res = null;

            if (BracketHelper.CanBeStartOfSequence(begin, false, false) && BracketHelper.CanBeEndOfSequence(end, false, begin, false))
            {
                begin = begin.Next;
                end   = end.Previous;
            }
            if (normalizeFirstNounGroup && !begin.Morph.Class.IsPreposition)
            {
                NounPhraseToken npt = NounPhraseHelper.TryParse(begin, NounPhraseParseAttr.ReferentCanBeNoun, 0, null);
                if (npt != null)
                {
                    if (npt.Noun.GetMorphClassInDictionary().IsUndefined&& npt.Adjectives.Count == 0)
                    {
                        npt = null;
                    }
                }
                if (npt != null && npt.EndToken.EndChar > end.EndChar)
                {
                    npt = null;
                }
                if (npt != null)
                {
                    res = npt.GetNormalCaseText(null, (normalFirstGroupSingle ? Pullenti.Morph.MorphNumber.Singular : Pullenti.Morph.MorphNumber.Undefined), Pullenti.Morph.MorphGender.Undefined, false);
                    Pullenti.Ner.Token te = npt.EndToken.Next;
                    if (((te != null && te.Next != null && te.IsComma) && (te.Next is Pullenti.Ner.TextToken) && te.Next.EndChar <= end.EndChar) && te.Next.Morph.Class.IsVerb && te.Next.Morph.Class.IsAdjective)
                    {
                        foreach (Pullenti.Morph.MorphBaseInfo it in te.Next.Morph.Items)
                        {
                            if (it.Gender == npt.Morph.Gender || ((it.Gender & npt.Morph.Gender)) != Pullenti.Morph.MorphGender.Undefined)
                            {
                                if (!((it.Case & npt.Morph.Case)).IsUndefined)
                                {
                                    if (it.Number == npt.Morph.Number || ((it.Number & npt.Morph.Number)) != Pullenti.Morph.MorphNumber.Undefined)
                                    {
                                        string var = (te.Next as Pullenti.Ner.TextToken).Term;
                                        if (it is Pullenti.Morph.MorphWordForm)
                                        {
                                            var = (it as Pullenti.Morph.MorphWordForm).NormalCase;
                                        }
                                        Pullenti.Morph.MorphBaseInfo bi = new Pullenti.Morph.MorphBaseInfo()
                                        {
                                            Class = Pullenti.Morph.MorphClass.Adjective, Gender = npt.Morph.Gender, Number = npt.Morph.Number, Language = npt.Morph.Language
                                        };
                                        var = Pullenti.Morph.MorphologyService.GetWordform(var, bi);
                                        if (var != null)
                                        {
                                            res = string.Format("{0}, {1}", res, var);
                                            te  = te.Next.Next;
                                        }
                                        break;
                                    }
                                }
                            }
                        }
                    }
                    if (te != null && te.EndChar <= end.EndChar)
                    {
                        string s = GetNameEx(te, end, Pullenti.Morph.MorphClass.Undefined, Pullenti.Morph.MorphCase.Undefined, Pullenti.Morph.MorphGender.Undefined, true, ignoreGeoReferent);
                        if (!string.IsNullOrEmpty(s))
                        {
                            if (!char.IsLetterOrDigit(s[0]))
                            {
                                res = string.Format("{0}{1}", res, s);
                            }
                            else
                            {
                                res = string.Format("{0} {1}", res, s);
                            }
                        }
                    }
                }
                else if ((begin is Pullenti.Ner.TextToken) && begin.Chars.IsCyrillicLetter)
                {
                    Pullenti.Morph.MorphClass mm = begin.GetMorphClassInDictionary();
                    if (!mm.IsUndefined)
                    {
                        res = begin.GetNormalCaseText(mm, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false);
                        if (begin.EndChar < end.EndChar)
                        {
                            res = string.Format("{0} {1}", res, GetNameEx(begin.Next, end, Pullenti.Morph.MorphClass.Undefined, Pullenti.Morph.MorphCase.Undefined, Pullenti.Morph.MorphGender.Undefined, true, false));
                        }
                    }
                }
            }
            if (res == null)
            {
                res = GetNameEx(begin, end, Pullenti.Morph.MorphClass.Undefined, Pullenti.Morph.MorphCase.Undefined, Pullenti.Morph.MorphGender.Undefined, true, ignoreGeoReferent);
            }
            if (!string.IsNullOrEmpty(res))
            {
                int k = 0;
                for (int i = res.Length - 1; i >= 0; i--, k++)
                {
                    if (res[i] == '*' || char.IsWhiteSpace(res[i]))
                    {
                    }
                    else
                    {
                        break;
                    }
                }
                if (k > 0)
                {
                    if (k == res.Length)
                    {
                        return(null);
                    }
                    res = res.Substring(0, res.Length - k);
                }
            }
            return(res);
        }
Exemplo n.º 3
0
        /// <summary>
        /// Попробовать восстановить последовательность, обрамляемую кавычками или скобками. Поддерживается
        /// вложенность, возможность отсутствия закрывающего элемента и др.
        /// </summary>
        /// <param name="t">начальный токен</param>
        /// <param name="attrs">параметры выделения</param>
        /// <param name="maxTokens">максимально токенов (вдруг забыли закрывающую кавычку)</param>
        /// <return>метатокен BracketSequenceToken</return>
        public static BracketSequenceToken TryParse(Pullenti.Ner.Token t, BracketParseAttr attrs = BracketParseAttr.No, int maxTokens = 100)
        {
            Pullenti.Ner.Token t0 = t;
            int cou = 0;

            if (!CanBeStartOfSequence(t0, false, false))
            {
                return(null);
            }
            List <Bracket> brList = new List <Bracket>();

            brList.Add(new Bracket(t0));
            cou = 0;
            int crlf = 0;

            Pullenti.Ner.Token last = null;
            int  lev     = 1;
            bool isAssim = brList[0].Char != '«' && m_AssymOPenChars.IndexOf(brList[0].Char) >= 0;
            bool genCase = false;

            for (t = t0.Next; t != null; t = t.Next)
            {
                if (t.IsTableControlChar)
                {
                    break;
                }
                last = t;
                if (t.IsCharOf(m_OpenChars) || t.IsCharOf(m_CloseChars))
                {
                    if (t.IsNewlineBefore && ((attrs & BracketParseAttr.CanBeManyLines)) == BracketParseAttr.No)
                    {
                        if (t.WhitespacesBeforeCount > 10 || CanBeStartOfSequence(t, false, false))
                        {
                            if (t.IsChar('(') && !t0.IsChar('('))
                            {
                            }
                            else
                            {
                                last = t.Previous;
                                break;
                            }
                        }
                    }
                    Bracket bb = new Bracket(t);
                    brList.Add(bb);
                    if (brList.Count > 20)
                    {
                        break;
                    }
                    if ((brList.Count == 3 && brList[1].CanBeOpen && bb.CanBeClose) && MustBeCloseChar(bb.Char, brList[1].Char) && MustBeCloseChar(bb.Char, brList[0].Char))
                    {
                        bool ok = false;
                        for (Pullenti.Ner.Token tt = t.Next; tt != null; tt = tt.Next)
                        {
                            if (tt.IsNewlineBefore)
                            {
                                break;
                            }
                            if (tt.IsChar(','))
                            {
                                break;
                            }
                            if (tt.IsChar('.'))
                            {
                                for (tt = tt.Next; tt != null; tt = tt.Next)
                                {
                                    if (tt.IsNewlineBefore)
                                    {
                                        break;
                                    }
                                    else if (tt.IsCharOf(m_OpenChars) || tt.IsCharOf(m_CloseChars))
                                    {
                                        Bracket bb2 = new Bracket(tt);
                                        if (BracketHelper.CanBeEndOfSequence(tt, false, null, false) && CanBeCloseChar(bb2.Char, brList[0].Char))
                                        {
                                            ok = true;
                                        }
                                        break;
                                    }
                                }
                                break;
                            }
                            if (t.IsCharOf(m_OpenChars) || t.IsCharOf(m_CloseChars))
                            {
                                ok = true;
                                break;
                            }
                        }
                        if (!ok)
                        {
                            break;
                        }
                    }
                    if (isAssim)
                    {
                        if (bb.CanBeOpen && !bb.CanBeClose && bb.Char == brList[0].Char)
                        {
                            lev++;
                        }
                        else if (bb.CanBeClose && !bb.CanBeOpen && m_OpenChars.IndexOf(brList[0].Char) == m_CloseChars.IndexOf(bb.Char))
                        {
                            lev--;
                            if (lev == 0)
                            {
                                break;
                            }
                        }
                    }
                }
                else
                {
                    if ((++cou) > maxTokens)
                    {
                        break;
                    }
                    if (((attrs & BracketParseAttr.CanContainsVerbs)) == BracketParseAttr.No)
                    {
                        if (t.Morph.Language.IsCyrillic)
                        {
                            if (t.GetMorphClassInDictionary() == Pullenti.Morph.MorphClass.Verb)
                            {
                                if (!t.Morph.Class.IsAdjective && !t.Morph.ContainsAttr("страд.з.", null))
                                {
                                    if (t.Chars.IsAllLower)
                                    {
                                        string norm = t.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false);
                                        if (!Pullenti.Morph.LanguageHelper.EndsWith(norm, "СЯ"))
                                        {
                                            if (brList.Count > 1)
                                            {
                                                break;
                                            }
                                            if (brList[0].Char != '(')
                                            {
                                                break;
                                            }
                                        }
                                    }
                                }
                            }
                        }
                        else if (t.Morph.Language.IsEn)
                        {
                            if (t.Morph.Class == Pullenti.Morph.MorphClass.Verb && t.Chars.IsAllLower)
                            {
                                break;
                            }
                        }
                        Pullenti.Ner.Referent r = t.GetReferent();
                        if (r != null && r.TypeName == "ADDRESS")
                        {
                            if (!t0.IsChar('('))
                            {
                                break;
                            }
                        }
                    }
                }
                if (((attrs & BracketParseAttr.CanBeManyLines)) != BracketParseAttr.No)
                {
                    if (t.IsNewlineBefore)
                    {
                        if (t.NewlinesBeforeCount > 1)
                        {
                            break;
                        }
                        crlf++;
                    }
                    continue;
                }
                if (t.IsNewlineBefore)
                {
                    if (t.WhitespacesBeforeCount > 15)
                    {
                        last = t.Previous;
                        break;
                    }
                    crlf++;
                    if (!t.Chars.IsAllLower)
                    {
                        if (MiscHelper.CanBeStartOfSentence(t))
                        {
                            bool has = false;
                            for (Pullenti.Ner.Token tt = t.Next; tt != null; tt = tt.Next)
                            {
                                if (tt.IsNewlineBefore)
                                {
                                    break;
                                }
                                else if (tt.LengthChar == 1 && tt.IsCharOf(m_OpenChars) && tt.IsWhitespaceBefore)
                                {
                                    break;
                                }
                                else if (tt.LengthChar == 1 && tt.IsCharOf(m_CloseChars) && !tt.IsWhitespaceBefore)
                                {
                                    has = true;
                                    break;
                                }
                            }
                            if (!has)
                            {
                                last = t.Previous;
                                break;
                            }
                        }
                    }
                    if ((t.Previous is Pullenti.Ner.MetaToken) && CanBeEndOfSequence((t.Previous as Pullenti.Ner.MetaToken).EndToken, false, null, false))
                    {
                        last = t.Previous;
                        break;
                    }
                }
                if (crlf > 1)
                {
                    if (brList.Count > 1)
                    {
                        break;
                    }
                    if (crlf > 10)
                    {
                        break;
                    }
                }
                if (t.IsChar(';') && t.IsNewlineAfter)
                {
                    break;
                }
                NounPhraseToken npt = NounPhraseHelper.TryParse(t, NounPhraseParseAttr.No, 0, null);
                if (npt != null)
                {
                    if (t.IsNewlineBefore)
                    {
                        genCase = npt.Morph.Case.IsGenitive;
                    }
                    last = (t = npt.EndToken);
                }
            }
            if ((brList.Count == 1 && brList[0].CanBeOpen && (last is Pullenti.Ner.MetaToken)) && last.IsNewlineAfter)
            {
                if (BracketHelper.CanBeEndOfSequence((last as Pullenti.Ner.MetaToken).EndToken, false, null, false))
                {
                    return(new BracketSequenceToken(t0, last));
                }
            }
            if ((brList.Count == 1 && brList[0].CanBeOpen && genCase) && last.IsNewlineAfter && crlf <= 2)
            {
                return(new BracketSequenceToken(t0, last));
            }
            if (brList.Count < 1)
            {
                return(null);
            }
            for (int i = 1; i < (brList.Count - 1); i++)
            {
                if (brList[i].Char == '<' && brList[i + 1].Char == '>')
                {
                    brList[i].CanBeOpen      = true;
                    brList[i + 1].CanBeClose = true;
                }
            }
            List <BracketSequenceToken> internals = null;

            while (brList.Count > 3)
            {
                int i = brList.Count - 1;
                if ((brList[i].CanBeClose && brList[i - 1].CanBeOpen && !CanBeCloseChar(brList[i].Char, brList[0].Char)) && CanBeCloseChar(brList[i].Char, brList[i - 1].Char))
                {
                    brList.RemoveRange(brList.Count - 2, 2);
                    continue;
                }
                break;
            }
            while (brList.Count >= 4)
            {
                bool changed = false;
                for (int i = 1; i < (brList.Count - 2); i++)
                {
                    if ((brList[i].CanBeOpen && !brList[i].CanBeClose && brList[i + 1].CanBeClose) && !brList[i + 1].CanBeOpen)
                    {
                        bool ok = false;
                        if (MustBeCloseChar(brList[i + 1].Char, brList[i].Char) || brList[i].Char != brList[0].Char)
                        {
                            ok = true;
                            if ((i == 1 && ((i + 2) < brList.Count) && brList[i + 2].Char == ')') && brList[i + 1].Char != ')' && CanBeCloseChar(brList[i + 1].Char, brList[i - 1].Char))
                            {
                                brList[i + 2] = brList[i + 1];
                            }
                        }
                        else if (i > 1 && ((i + 2) < brList.Count) && MustBeCloseChar(brList[i + 2].Char, brList[i - 1].Char))
                        {
                            ok = true;
                        }
                        if (ok)
                        {
                            if (internals == null)
                            {
                                internals = new List <BracketSequenceToken>();
                            }
                            internals.Add(new BracketSequenceToken(brList[i].Source, brList[i + 1].Source));
                            brList.RemoveRange(i, 2);
                            changed = true;
                            break;
                        }
                    }
                }
                if (!changed)
                {
                    break;
                }
            }
            BracketSequenceToken res = null;

            if ((brList.Count >= 4 && brList[1].CanBeOpen && brList[2].CanBeClose) && brList[3].CanBeClose && !brList[3].CanBeOpen)
            {
                if (CanBeCloseChar(brList[3].Char, brList[0].Char))
                {
                    res = new BracketSequenceToken(brList[0].Source, brList[3].Source);
                    if (brList[0].Source.Next != brList[1].Source || brList[2].Source.Next != brList[3].Source)
                    {
                        res.Internal.Add(new BracketSequenceToken(brList[1].Source, brList[2].Source));
                    }
                    if (internals != null)
                    {
                        res.Internal.AddRange(internals);
                    }
                }
            }
            if ((res == null && brList.Count >= 3 && brList[2].CanBeClose) && !brList[2].CanBeOpen)
            {
                if (((attrs & BracketParseAttr.NearCloseBracket)) != BracketParseAttr.No)
                {
                    if (CanBeCloseChar(brList[1].Char, brList[0].Char))
                    {
                        return(new BracketSequenceToken(brList[0].Source, brList[1].Source));
                    }
                }
                bool ok = true;
                if (CanBeCloseChar(brList[2].Char, brList[0].Char) && CanBeCloseChar(brList[1].Char, brList[0].Char) && brList[1].CanBeClose)
                {
                    for (t = brList[1].Source; t != brList[2].Source && t != null; t = t.Next)
                    {
                        if (t.IsNewlineBefore)
                        {
                            ok = false;
                            break;
                        }
                        if (t.Chars.IsLetter && t.Chars.IsAllLower)
                        {
                            ok = false;
                            break;
                        }
                        NounPhraseToken npt = NounPhraseHelper.TryParse(t, NounPhraseParseAttr.No, 0, null);
                        if (npt != null)
                        {
                            t = npt.EndToken;
                        }
                    }
                    if (ok)
                    {
                        for (t = brList[0].Source.Next; t != brList[1].Source && t != null; t = t.Next)
                        {
                            if (t.IsNewlineBefore)
                            {
                                return(new BracketSequenceToken(brList[0].Source, t.Previous));
                            }
                        }
                    }
                    int lev1 = 0;
                    for (Pullenti.Ner.Token tt = brList[0].Source.Previous; tt != null; tt = tt.Previous)
                    {
                        if (tt.IsNewlineAfter || tt.IsTableControlChar)
                        {
                            break;
                        }
                        if (!(tt is Pullenti.Ner.TextToken))
                        {
                            continue;
                        }
                        if (tt.Chars.IsLetter || tt.LengthChar > 1)
                        {
                            continue;
                        }
                        char ch = (tt as Pullenti.Ner.TextToken).Term[0];
                        if (CanBeCloseChar(ch, brList[0].Char))
                        {
                            lev1++;
                        }
                        else if (CanBeCloseChar(brList[1].Char, ch))
                        {
                            lev1--;
                            if (lev1 < 0)
                            {
                                return(new BracketSequenceToken(brList[0].Source, brList[1].Source));
                            }
                        }
                    }
                }
                if (ok && CanBeCloseChar(brList[2].Char, brList[0].Char))
                {
                    BracketSequenceToken intern = new BracketSequenceToken(brList[1].Source, brList[2].Source);
                    res = new BracketSequenceToken(brList[0].Source, brList[2].Source);
                    res.Internal.Add(intern);
                }
                else if (ok && CanBeCloseChar(brList[2].Char, brList[1].Char) && brList[0].CanBeOpen)
                {
                    if (CanBeCloseChar(brList[2].Char, brList[0].Char))
                    {
                        BracketSequenceToken intern = new BracketSequenceToken(brList[1].Source, brList[2].Source);
                        res = new BracketSequenceToken(brList[0].Source, brList[2].Source);
                        res.Internal.Add(intern);
                    }
                    else if (brList.Count == 3)
                    {
                        return(null);
                    }
                }
            }
            if (res == null && brList.Count > 1 && brList[1].CanBeClose)
            {
                res = new BracketSequenceToken(brList[0].Source, brList[1].Source);
            }
            if (res == null && brList.Count > 1 && CanBeCloseChar(brList[1].Char, brList[0].Char))
            {
                res = new BracketSequenceToken(brList[0].Source, brList[1].Source);
            }
            if (res == null && brList.Count == 2 && brList[0].Char == brList[1].Char)
            {
                res = new BracketSequenceToken(brList[0].Source, brList[1].Source);
            }
            if (res != null && internals != null)
            {
                foreach (BracketSequenceToken i in internals)
                {
                    if (i.BeginChar < res.EndChar)
                    {
                        res.Internal.Add(i);
                    }
                }
            }
            if (res == null)
            {
                cou = 0;
                for (Pullenti.Ner.Token tt = t0.Next; tt != null; tt = tt.Next, cou++)
                {
                    if (tt.IsTableControlChar)
                    {
                        break;
                    }
                    if (MiscHelper.CanBeStartOfSentence(tt))
                    {
                        break;
                    }
                    if (maxTokens > 0 && cou > maxTokens)
                    {
                        break;
                    }
                    Pullenti.Ner.MetaToken mt = tt as Pullenti.Ner.MetaToken;
                    if (mt == null)
                    {
                        continue;
                    }
                    if (mt.EndToken is Pullenti.Ner.TextToken)
                    {
                        if ((mt.EndToken as Pullenti.Ner.TextToken).IsCharOf(m_CloseChars))
                        {
                            Bracket bb = new Bracket(mt.EndToken as Pullenti.Ner.TextToken);
                            if (bb.CanBeClose && CanBeCloseChar(bb.Char, brList[0].Char))
                            {
                                return(new BracketSequenceToken(t0, tt));
                            }
                        }
                    }
                }
            }
            return(res);
        }
Exemplo n.º 4
0
        /// <summary>
        /// Попытаться выделить союз с указанного токена.
        /// </summary>
        /// <param name="t">начальный токен</param>
        /// <return>результат или null</return>
        public static ConjunctionToken TryParse(Pullenti.Ner.Token t)
        {
            if (!(t is Pullenti.Ner.TextToken))
            {
                return(null);
            }
            if (t.IsComma)
            {
                ConjunctionToken ne = TryParse(t.Next);
                if (ne != null)
                {
                    ne.BeginToken = t;
                    ne.IsSimple   = false;
                    return(ne);
                }
                return(new ConjunctionToken(t, t)
                {
                    Typ = ConjunctionType.Comma, IsSimple = true, Normal = ","
                });
            }
            TerminToken tok = m_Ontology.TryParse(t, TerminParseAttr.No);

            if (tok != null)
            {
                if (t.IsValue("ТО", null))
                {
                    NounPhraseToken npt = NounPhraseHelper.TryParse(t, NounPhraseParseAttr.ParseAdverbs, 0, null);
                    if (npt != null && npt.EndChar > tok.EndToken.EndChar)
                    {
                        return(null);
                    }
                }
                if (tok.Termin.Tag2 != null)
                {
                    if (!(tok.EndToken is Pullenti.Ner.TextToken))
                    {
                        return(null);
                    }
                    if (tok.EndToken.GetMorphClassInDictionary().IsVerb)
                    {
                        if (!(tok.EndToken as Pullenti.Ner.TextToken).Term.EndsWith("АЯ"))
                        {
                            return(null);
                        }
                    }
                }
                return(new ConjunctionToken(t, tok.EndToken)
                {
                    Normal = tok.Termin.CanonicText, Typ = (ConjunctionType)tok.Termin.Tag
                });
            }
            if (!t.GetMorphClassInDictionary().IsConjunction)
            {
                return(null);
            }
            if (t.IsAnd || t.IsOr)
            {
                ConjunctionToken res = new ConjunctionToken(t, t)
                {
                    Normal = (t as Pullenti.Ner.TextToken).Term, IsSimple = true, Typ = (t.IsOr ? ConjunctionType.Or : ConjunctionType.And)
                };
                if (((t.Next != null && t.Next.IsChar('(') && (t.Next.Next is Pullenti.Ner.TextToken)) && t.Next.Next.IsOr && t.Next.Next.Next != null) && t.Next.Next.Next.IsChar(')'))
                {
                    res.EndToken = t.Next.Next.Next;
                }
                else if ((t.Next != null && t.Next.IsCharOf("\\/") && (t.Next.Next is Pullenti.Ner.TextToken)) && t.Next.Next.IsOr)
                {
                    res.EndToken = t.Next.Next;
                }
                return(res);
            }
            string term = (t as Pullenti.Ner.TextToken).Term;

            if (term == "НИ")
            {
                return new ConjunctionToken(t, t)
                       {
                           Normal = term, Typ = ConjunctionType.Not
                       }
            }
            ;
            if ((term == "А" || term == "НО" || term == "ЗАТО") || term == "ОДНАКО")
            {
                return new ConjunctionToken(t, t)
                       {
                           Normal = term, Typ = ConjunctionType.But
                       }
            }
            ;
            return(null);
        }