Beispiel #1
0
        /// <summary>
        /// Попытаться выделить предлог с указанного токена
        /// </summary>
        /// <param name="t">начальный токен</param>
        /// <return>результат или null</return>
        public static PrepositionToken TryParse(Pullenti.Ner.Token t)
        {
            if (!(t is Pullenti.Ner.TextToken))
            {
                return(null);
            }
            TerminToken tok = m_Ontology.TryParse(t, TerminParseAttr.No);

            if (tok != null)
            {
                return new PrepositionToken(t, tok.EndToken)
                       {
                           Normal = tok.Termin.CanonicText, NextCase = (Pullenti.Morph.MorphCase)tok.Termin.Tag
                       }
            }
            ;
            Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
            if (!mc.IsPreposition)
            {
                return(null);
            }
            PrepositionToken res = new PrepositionToken(t, t);

            res.Normal   = t.GetNormalCaseText(Pullenti.Morph.MorphClass.Preposition, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false);
            res.NextCase = Pullenti.Morph.LanguageHelper.GetCaseAfterPreposition(res.Normal);
            if ((t.Next != null && t.Next.IsHiphen && !t.IsWhitespaceAfter) && (t.Next.Next is Pullenti.Ner.TextToken) && t.Next.Next.GetMorphClassInDictionary().IsPreposition)
            {
                res.EndToken = t.Next.Next;
            }
            return(res);
        }
Beispiel #2
0
        // Привязка с точностью до похожести
        // simD - параметр "похожесть (0.05..1)"
        public List <TerminToken> TryParseAllSim(Pullenti.Ner.Token token, double simD)
        {
            if (simD >= 1 || (simD < 0.05))
            {
                return(this.TryParseAll(token, TerminParseAttr.No));
            }
            if (Termins.Count == 0 || token == null)
            {
                return(null);
            }
            Pullenti.Ner.TextToken tt = token as Pullenti.Ner.TextToken;
            if (tt == null && (token is Pullenti.Ner.ReferentToken))
            {
                tt = (token as Pullenti.Ner.ReferentToken).BeginToken as Pullenti.Ner.TextToken;
            }
            List <TerminToken> res = null;

            foreach (Termin t in Termins)
            {
                if (!t.Lang.IsUndefined)
                {
                    if (!token.Morph.Language.IsUndefined)
                    {
                        if (((token.Morph.Language & t.Lang)).IsUndefined)
                        {
                            continue;
                        }
                    }
                }
                TerminToken ar = t.TryParseSim(tt, simD, TerminParseAttr.No);
                if (ar == null)
                {
                    continue;
                }
                ar.Termin = t;
                if (res == null || ar.TokensCount > res[0].TokensCount)
                {
                    res = new List <TerminToken>();
                    res.Add(ar);
                }
                else if (ar.TokensCount == res[0].TokensCount)
                {
                    res.Add(ar);
                }
            }
            return(res);
        }
Beispiel #3
0
        /// <summary>
        /// Попытка привязать все возможные термины
        /// </summary>
        /// <param name="token">начальный токен</param>
        /// <param name="attrs">атрибуты выделения</param>
        /// <return>список из всех подходящих привязок TerminToken или null</return>
        public List <TerminToken> TryParseAll(Pullenti.Ner.Token token, TerminParseAttr attrs = TerminParseAttr.No)
        {
            if (token == null)
            {
                return(null);
            }
            List <TerminToken> re = this._TryAttachAll_(token, attrs, false);

            if (re == null && token.Morph.Language.IsUa)
            {
                re = this._TryAttachAll_(token, attrs, true);
            }
            if (re == null && Synonyms != null)
            {
                TerminToken re0 = Synonyms.TryParse(token, TerminParseAttr.No);
                if (re0 != null && (re0.Termin.Tag is List <string>))
                {
                    Termin term = this.Find(re0.Termin.CanonicText);
                    foreach (string syn in re0.Termin.Tag as List <string> )
                    {
                        if (term != null)
                        {
                            break;
                        }
                        term = this.Find(syn);
                    }
                    if (term != null)
                    {
                        re0.Termin = term;
                        List <TerminToken> res1 = new List <TerminToken>();
                        res1.Add(re0);
                        return(res1);
                    }
                }
            }
            return(re);
        }
Beispiel #4
0
        bool _manageVar(Pullenti.Ner.Token token, TerminParseAttr pars, string v, CharNode nod, int i0, ref List <TerminToken> res)
        {
            for (int i = i0; i < v.Length; i++)
            {
                short ch = (short)v[i];
                if (nod.Children == null)
                {
                    return(false);
                }
                CharNode nn;
                if (!nod.Children.TryGetValue(ch, out nn))
                {
                    return(false);
                }
                nod = nn;
            }
            List <Termin> vars = nod.Termins;

            if (vars == null || vars.Count == 0)
            {
                return(false);
            }
            foreach (Termin t in vars)
            {
                TerminToken ar = t.TryParse(token, pars);
                if (ar != null)
                {
                    ar.Termin = t;
                    if (res == null)
                    {
                        res = new List <TerminToken>();
                        res.Add(ar);
                    }
                    else if (ar.TokensCount > res[0].TokensCount)
                    {
                        res.Clear();
                        res.Add(ar);
                    }
                    else if (ar.TokensCount == res[0].TokensCount)
                    {
                        int j;
                        for (j = 0; j < res.Count; j++)
                        {
                            if (res[j].Termin == ar.Termin)
                            {
                                break;
                            }
                        }
                        if (j >= res.Count)
                        {
                            res.Add(ar);
                        }
                    }
                }
                if (t.AdditionalVars != null)
                {
                    foreach (Termin av in t.AdditionalVars)
                    {
                        ar = av.TryParse(token, pars);
                        if (ar == null)
                        {
                            continue;
                        }
                        ar.Termin = t;
                        if (res == null)
                        {
                            res = new List <TerminToken>();
                            res.Add(ar);
                        }
                        else if (ar.TokensCount > res[0].TokensCount)
                        {
                            res.Clear();
                            res.Add(ar);
                        }
                        else if (ar.TokensCount == res[0].TokensCount)
                        {
                            int j;
                            for (j = 0; j < res.Count; j++)
                            {
                                if (res[j].Termin == ar.Termin)
                                {
                                    break;
                                }
                            }
                            if (j >= res.Count)
                            {
                                res.Add(ar);
                            }
                        }
                    }
                }
            }
            return(v.Length > 1);
        }
Beispiel #5
0
        List <TerminToken> _TryAttachAll_(Pullenti.Ner.Token token, TerminParseAttr pars = TerminParseAttr.No, bool mainRoot = false)
        {
            if (Termins.Count == 0 || token == null)
            {
                return(null);
            }
            string s = null;

            Pullenti.Ner.TextToken tt = token as Pullenti.Ner.TextToken;
            if (tt == null && (token is Pullenti.Ner.ReferentToken))
            {
                tt = (token as Pullenti.Ner.ReferentToken).BeginToken as Pullenti.Ner.TextToken;
            }
            List <TerminToken> res = null;
            bool     wasVars       = false;
            CharNode root          = (mainRoot ? m_Root : this._getRoot(token.Morph.Language, token.Chars.IsLatinLetter));

            if (tt != null)
            {
                s = tt.Term;
                CharNode nod    = root;
                bool     noVars = false;
                int      len0   = 0;
                if (((pars & TerminParseAttr.TermOnly)) != TerminParseAttr.No)
                {
                }
                else if (tt.InvariantPrefixLengthOfMorphVars <= s.Length)
                {
                    len0 = tt.InvariantPrefixLengthOfMorphVars;
                    for (int i = 0; i < tt.InvariantPrefixLengthOfMorphVars; i++)
                    {
                        short ch = (short)s[i];
                        if (nod.Children == null)
                        {
                            noVars = true;
                            break;
                        }
                        CharNode nn;
                        if (!nod.Children.TryGetValue(ch, out nn))
                        {
                            noVars = true;
                            break;
                        }
                        nod = nn;
                    }
                }
                if (!noVars)
                {
                    if (this._manageVar(token, pars, s, nod, len0, ref res))
                    {
                        wasVars = true;
                    }
                    for (int i = 0; i < tt.Morph.ItemsCount; i++)
                    {
                        if (((pars & TerminParseAttr.TermOnly)) != TerminParseAttr.No)
                        {
                            continue;
                        }
                        Pullenti.Morph.MorphWordForm wf = tt.Morph[i] as Pullenti.Morph.MorphWordForm;
                        if (wf == null)
                        {
                            continue;
                        }
                        if (((pars & TerminParseAttr.InDictionaryOnly)) != TerminParseAttr.No)
                        {
                            if (!wf.IsInDictionary)
                            {
                                continue;
                            }
                        }
                        int  j;
                        bool ok = true;
                        if (wf.NormalCase == null || wf.NormalCase == s)
                        {
                            ok = false;
                        }
                        else
                        {
                            for (j = 0; j < i; j++)
                            {
                                Pullenti.Morph.MorphWordForm wf2 = tt.Morph[j] as Pullenti.Morph.MorphWordForm;
                                if (wf2 != null)
                                {
                                    if (wf2.NormalCase == wf.NormalCase || wf2.NormalFull == wf.NormalCase)
                                    {
                                        break;
                                    }
                                }
                            }
                            if (j < i)
                            {
                                ok = false;
                            }
                        }
                        if (ok)
                        {
                            if (this._manageVar(token, pars, wf.NormalCase, nod, tt.InvariantPrefixLengthOfMorphVars, ref res))
                            {
                                wasVars = true;
                            }
                        }
                        if (wf.NormalFull == null || wf.NormalFull == wf.NormalCase || wf.NormalFull == s)
                        {
                            continue;
                        }
                        for (j = 0; j < i; j++)
                        {
                            Pullenti.Morph.MorphWordForm wf2 = tt.Morph[j] as Pullenti.Morph.MorphWordForm;
                            if (wf2 != null && wf2.NormalFull == wf.NormalFull)
                            {
                                break;
                            }
                        }
                        if (j < i)
                        {
                            continue;
                        }
                        if (this._manageVar(token, pars, wf.NormalFull, nod, tt.InvariantPrefixLengthOfMorphVars, ref res))
                        {
                            wasVars = true;
                        }
                    }
                }
            }
            else if (token is Pullenti.Ner.NumberToken)
            {
                if (this._manageVar(token, pars, (token as Pullenti.Ner.NumberToken).Value.ToString(), root, 0, ref res))
                {
                    wasVars = true;
                }
            }
            else
            {
                return(null);
            }
            if (!wasVars && s != null && s.Length == 1)
            {
                List <Termin> vars;
                if (m_Hash1.TryGetValue((short)s[0], out vars))
                {
                    foreach (Termin t in vars)
                    {
                        if (!t.Lang.IsUndefined)
                        {
                            if (!token.Morph.Language.IsUndefined)
                            {
                                if (((token.Morph.Language & t.Lang)).IsUndefined)
                                {
                                    continue;
                                }
                            }
                        }
                        TerminToken ar = t.TryParse(tt, TerminParseAttr.No);
                        if (ar == null)
                        {
                            continue;
                        }
                        ar.Termin = t;
                        if (res == null)
                        {
                            res = new List <TerminToken>();
                            res.Add(ar);
                        }
                        else if (ar.TokensCount > res[0].TokensCount)
                        {
                            res.Clear();
                            res.Add(ar);
                        }
                        else if (ar.TokensCount == res[0].TokensCount)
                        {
                            res.Add(ar);
                        }
                    }
                }
            }
            if (res != null)
            {
                int ii  = 0;
                int max = 0;
                for (int i = 0; i < res.Count; i++)
                {
                    if (res[i].LengthChar > max)
                    {
                        max = res[i].LengthChar;
                        ii  = i;
                    }
                }
                if (ii > 0)
                {
                    TerminToken v = res[ii];
                    res.RemoveAt(ii);
                    res.Insert(0, v);
                }
            }
            return(res);
        }
Beispiel #6
0
        /// <summary>
        /// Попытаться выделить союз с указанного токена.
        /// </summary>
        /// <param name="t">начальный токен</param>
        /// <return>результат или null</return>
        public static ConjunctionToken TryParse(Pullenti.Ner.Token t)
        {
            if (!(t is Pullenti.Ner.TextToken))
            {
                return(null);
            }
            if (t.IsComma)
            {
                ConjunctionToken ne = TryParse(t.Next);
                if (ne != null)
                {
                    ne.BeginToken = t;
                    ne.IsSimple   = false;
                    return(ne);
                }
                return(new ConjunctionToken(t, t)
                {
                    Typ = ConjunctionType.Comma, IsSimple = true, Normal = ","
                });
            }
            TerminToken tok = m_Ontology.TryParse(t, TerminParseAttr.No);

            if (tok != null)
            {
                if (t.IsValue("ТО", null))
                {
                    NounPhraseToken npt = NounPhraseHelper.TryParse(t, NounPhraseParseAttr.ParseAdverbs, 0, null);
                    if (npt != null && npt.EndChar > tok.EndToken.EndChar)
                    {
                        return(null);
                    }
                }
                if (tok.Termin.Tag2 != null)
                {
                    if (!(tok.EndToken is Pullenti.Ner.TextToken))
                    {
                        return(null);
                    }
                    if (tok.EndToken.GetMorphClassInDictionary().IsVerb)
                    {
                        if (!(tok.EndToken as Pullenti.Ner.TextToken).Term.EndsWith("АЯ"))
                        {
                            return(null);
                        }
                    }
                }
                return(new ConjunctionToken(t, tok.EndToken)
                {
                    Normal = tok.Termin.CanonicText, Typ = (ConjunctionType)tok.Termin.Tag
                });
            }
            if (!t.GetMorphClassInDictionary().IsConjunction)
            {
                return(null);
            }
            if (t.IsAnd || t.IsOr)
            {
                ConjunctionToken res = new ConjunctionToken(t, t)
                {
                    Normal = (t as Pullenti.Ner.TextToken).Term, IsSimple = true, Typ = (t.IsOr ? ConjunctionType.Or : ConjunctionType.And)
                };
                if (((t.Next != null && t.Next.IsChar('(') && (t.Next.Next is Pullenti.Ner.TextToken)) && t.Next.Next.IsOr && t.Next.Next.Next != null) && t.Next.Next.Next.IsChar(')'))
                {
                    res.EndToken = t.Next.Next.Next;
                }
                else if ((t.Next != null && t.Next.IsCharOf("\\/") && (t.Next.Next is Pullenti.Ner.TextToken)) && t.Next.Next.IsOr)
                {
                    res.EndToken = t.Next.Next;
                }
                return(res);
            }
            string term = (t as Pullenti.Ner.TextToken).Term;

            if (term == "НИ")
            {
                return new ConjunctionToken(t, t)
                       {
                           Normal = term, Typ = ConjunctionType.Not
                       }
            }
            ;
            if ((term == "А" || term == "НО" || term == "ЗАТО") || term == "ОДНАКО")
            {
                return new ConjunctionToken(t, t)
                       {
                           Normal = term, Typ = ConjunctionType.But
                       }
            }
            ;
            return(null);
        }