Ejemplo n.º 1
0
        /// <summary>
        /// Попытка найти термин в словаре для начального токена
        /// </summary>
        /// <param name="token">начальный токен</param>
        /// <param name="attrs">атрибуты выделения</param>
        /// <return>результирующий токен, если привязалось несколько, то первый, если ни одного, то null</return>
        public TerminToken TryParse(Pullenti.Ner.Token token, TerminParseAttr attrs = TerminParseAttr.No)
        {
            if (Termins.Count == 0)
            {
                return(null);
            }
            List <TerminToken> li = this.TryParseAll(token, attrs);

            if (li != null)
            {
                return(li[0]);
            }
            else
            {
                return(null);
            }
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Попытка привязать все возможные термины
        /// </summary>
        /// <param name="token">начальный токен</param>
        /// <param name="attrs">атрибуты выделения</param>
        /// <return>список из всех подходящих привязок TerminToken или null</return>
        public List <TerminToken> TryParseAll(Pullenti.Ner.Token token, TerminParseAttr attrs = TerminParseAttr.No)
        {
            if (token == null)
            {
                return(null);
            }
            List <TerminToken> re = this._TryAttachAll_(token, attrs, false);

            if (re == null && token.Morph.Language.IsUa)
            {
                re = this._TryAttachAll_(token, attrs, true);
            }
            if (re == null && Synonyms != null)
            {
                TerminToken re0 = Synonyms.TryParse(token, TerminParseAttr.No);
                if (re0 != null && (re0.Termin.Tag is List <string>))
                {
                    Termin term = this.Find(re0.Termin.CanonicText);
                    foreach (string syn in re0.Termin.Tag as List <string> )
                    {
                        if (term != null)
                        {
                            break;
                        }
                        term = this.Find(syn);
                    }
                    if (term != null)
                    {
                        re0.Termin = term;
                        List <TerminToken> res1 = new List <TerminToken>();
                        res1.Add(re0);
                        return(res1);
                    }
                }
            }
            return(re);
        }
Ejemplo n.º 3
0
        bool _manageVar(Pullenti.Ner.Token token, TerminParseAttr pars, string v, CharNode nod, int i0, ref List <TerminToken> res)
        {
            for (int i = i0; i < v.Length; i++)
            {
                short ch = (short)v[i];
                if (nod.Children == null)
                {
                    return(false);
                }
                CharNode nn;
                if (!nod.Children.TryGetValue(ch, out nn))
                {
                    return(false);
                }
                nod = nn;
            }
            List <Termin> vars = nod.Termins;

            if (vars == null || vars.Count == 0)
            {
                return(false);
            }
            foreach (Termin t in vars)
            {
                TerminToken ar = t.TryParse(token, pars);
                if (ar != null)
                {
                    ar.Termin = t;
                    if (res == null)
                    {
                        res = new List <TerminToken>();
                        res.Add(ar);
                    }
                    else if (ar.TokensCount > res[0].TokensCount)
                    {
                        res.Clear();
                        res.Add(ar);
                    }
                    else if (ar.TokensCount == res[0].TokensCount)
                    {
                        int j;
                        for (j = 0; j < res.Count; j++)
                        {
                            if (res[j].Termin == ar.Termin)
                            {
                                break;
                            }
                        }
                        if (j >= res.Count)
                        {
                            res.Add(ar);
                        }
                    }
                }
                if (t.AdditionalVars != null)
                {
                    foreach (Termin av in t.AdditionalVars)
                    {
                        ar = av.TryParse(token, pars);
                        if (ar == null)
                        {
                            continue;
                        }
                        ar.Termin = t;
                        if (res == null)
                        {
                            res = new List <TerminToken>();
                            res.Add(ar);
                        }
                        else if (ar.TokensCount > res[0].TokensCount)
                        {
                            res.Clear();
                            res.Add(ar);
                        }
                        else if (ar.TokensCount == res[0].TokensCount)
                        {
                            int j;
                            for (j = 0; j < res.Count; j++)
                            {
                                if (res[j].Termin == ar.Termin)
                                {
                                    break;
                                }
                            }
                            if (j >= res.Count)
                            {
                                res.Add(ar);
                            }
                        }
                    }
                }
            }
            return(v.Length > 1);
        }
Ejemplo n.º 4
0
        List <TerminToken> _TryAttachAll_(Pullenti.Ner.Token token, TerminParseAttr pars = TerminParseAttr.No, bool mainRoot = false)
        {
            if (Termins.Count == 0 || token == null)
            {
                return(null);
            }
            string s = null;

            Pullenti.Ner.TextToken tt = token as Pullenti.Ner.TextToken;
            if (tt == null && (token is Pullenti.Ner.ReferentToken))
            {
                tt = (token as Pullenti.Ner.ReferentToken).BeginToken as Pullenti.Ner.TextToken;
            }
            List <TerminToken> res = null;
            bool     wasVars       = false;
            CharNode root          = (mainRoot ? m_Root : this._getRoot(token.Morph.Language, token.Chars.IsLatinLetter));

            if (tt != null)
            {
                s = tt.Term;
                CharNode nod    = root;
                bool     noVars = false;
                int      len0   = 0;
                if (((pars & TerminParseAttr.TermOnly)) != TerminParseAttr.No)
                {
                }
                else if (tt.InvariantPrefixLengthOfMorphVars <= s.Length)
                {
                    len0 = tt.InvariantPrefixLengthOfMorphVars;
                    for (int i = 0; i < tt.InvariantPrefixLengthOfMorphVars; i++)
                    {
                        short ch = (short)s[i];
                        if (nod.Children == null)
                        {
                            noVars = true;
                            break;
                        }
                        CharNode nn;
                        if (!nod.Children.TryGetValue(ch, out nn))
                        {
                            noVars = true;
                            break;
                        }
                        nod = nn;
                    }
                }
                if (!noVars)
                {
                    if (this._manageVar(token, pars, s, nod, len0, ref res))
                    {
                        wasVars = true;
                    }
                    for (int i = 0; i < tt.Morph.ItemsCount; i++)
                    {
                        if (((pars & TerminParseAttr.TermOnly)) != TerminParseAttr.No)
                        {
                            continue;
                        }
                        Pullenti.Morph.MorphWordForm wf = tt.Morph[i] as Pullenti.Morph.MorphWordForm;
                        if (wf == null)
                        {
                            continue;
                        }
                        if (((pars & TerminParseAttr.InDictionaryOnly)) != TerminParseAttr.No)
                        {
                            if (!wf.IsInDictionary)
                            {
                                continue;
                            }
                        }
                        int  j;
                        bool ok = true;
                        if (wf.NormalCase == null || wf.NormalCase == s)
                        {
                            ok = false;
                        }
                        else
                        {
                            for (j = 0; j < i; j++)
                            {
                                Pullenti.Morph.MorphWordForm wf2 = tt.Morph[j] as Pullenti.Morph.MorphWordForm;
                                if (wf2 != null)
                                {
                                    if (wf2.NormalCase == wf.NormalCase || wf2.NormalFull == wf.NormalCase)
                                    {
                                        break;
                                    }
                                }
                            }
                            if (j < i)
                            {
                                ok = false;
                            }
                        }
                        if (ok)
                        {
                            if (this._manageVar(token, pars, wf.NormalCase, nod, tt.InvariantPrefixLengthOfMorphVars, ref res))
                            {
                                wasVars = true;
                            }
                        }
                        if (wf.NormalFull == null || wf.NormalFull == wf.NormalCase || wf.NormalFull == s)
                        {
                            continue;
                        }
                        for (j = 0; j < i; j++)
                        {
                            Pullenti.Morph.MorphWordForm wf2 = tt.Morph[j] as Pullenti.Morph.MorphWordForm;
                            if (wf2 != null && wf2.NormalFull == wf.NormalFull)
                            {
                                break;
                            }
                        }
                        if (j < i)
                        {
                            continue;
                        }
                        if (this._manageVar(token, pars, wf.NormalFull, nod, tt.InvariantPrefixLengthOfMorphVars, ref res))
                        {
                            wasVars = true;
                        }
                    }
                }
            }
            else if (token is Pullenti.Ner.NumberToken)
            {
                if (this._manageVar(token, pars, (token as Pullenti.Ner.NumberToken).Value.ToString(), root, 0, ref res))
                {
                    wasVars = true;
                }
            }
            else
            {
                return(null);
            }
            if (!wasVars && s != null && s.Length == 1)
            {
                List <Termin> vars;
                if (m_Hash1.TryGetValue((short)s[0], out vars))
                {
                    foreach (Termin t in vars)
                    {
                        if (!t.Lang.IsUndefined)
                        {
                            if (!token.Morph.Language.IsUndefined)
                            {
                                if (((token.Morph.Language & t.Lang)).IsUndefined)
                                {
                                    continue;
                                }
                            }
                        }
                        TerminToken ar = t.TryParse(tt, TerminParseAttr.No);
                        if (ar == null)
                        {
                            continue;
                        }
                        ar.Termin = t;
                        if (res == null)
                        {
                            res = new List <TerminToken>();
                            res.Add(ar);
                        }
                        else if (ar.TokensCount > res[0].TokensCount)
                        {
                            res.Clear();
                            res.Add(ar);
                        }
                        else if (ar.TokensCount == res[0].TokensCount)
                        {
                            res.Add(ar);
                        }
                    }
                }
            }
            if (res != null)
            {
                int ii  = 0;
                int max = 0;
                for (int i = 0; i < res.Count; i++)
                {
                    if (res[i].LengthChar > max)
                    {
                        max = res[i].LengthChar;
                        ii  = i;
                    }
                }
                if (ii > 0)
                {
                    TerminToken v = res[ii];
                    res.RemoveAt(ii);
                    res.Insert(0, v);
                }
            }
            return(res);
        }