/// <summary> /// Попытка найти термин в словаре для начального токена /// </summary> /// <param name="token">начальный токен</param> /// <param name="attrs">атрибуты выделения</param> /// <return>результирующий токен, если привязалось несколько, то первый, если ни одного, то null</return> public TerminToken TryParse(Pullenti.Ner.Token token, TerminParseAttr attrs = TerminParseAttr.No) { if (Termins.Count == 0) { return(null); } List <TerminToken> li = this.TryParseAll(token, attrs); if (li != null) { return(li[0]); } else { return(null); } }
/// <summary> /// Попытка привязать все возможные термины /// </summary> /// <param name="token">начальный токен</param> /// <param name="attrs">атрибуты выделения</param> /// <return>список из всех подходящих привязок TerminToken или null</return> public List <TerminToken> TryParseAll(Pullenti.Ner.Token token, TerminParseAttr attrs = TerminParseAttr.No) { if (token == null) { return(null); } List <TerminToken> re = this._TryAttachAll_(token, attrs, false); if (re == null && token.Morph.Language.IsUa) { re = this._TryAttachAll_(token, attrs, true); } if (re == null && Synonyms != null) { TerminToken re0 = Synonyms.TryParse(token, TerminParseAttr.No); if (re0 != null && (re0.Termin.Tag is List <string>)) { Termin term = this.Find(re0.Termin.CanonicText); foreach (string syn in re0.Termin.Tag as List <string> ) { if (term != null) { break; } term = this.Find(syn); } if (term != null) { re0.Termin = term; List <TerminToken> res1 = new List <TerminToken>(); res1.Add(re0); return(res1); } } } return(re); }
bool _manageVar(Pullenti.Ner.Token token, TerminParseAttr pars, string v, CharNode nod, int i0, ref List <TerminToken> res) { for (int i = i0; i < v.Length; i++) { short ch = (short)v[i]; if (nod.Children == null) { return(false); } CharNode nn; if (!nod.Children.TryGetValue(ch, out nn)) { return(false); } nod = nn; } List <Termin> vars = nod.Termins; if (vars == null || vars.Count == 0) { return(false); } foreach (Termin t in vars) { TerminToken ar = t.TryParse(token, pars); if (ar != null) { ar.Termin = t; if (res == null) { res = new List <TerminToken>(); res.Add(ar); } else if (ar.TokensCount > res[0].TokensCount) { res.Clear(); res.Add(ar); } else if (ar.TokensCount == res[0].TokensCount) { int j; for (j = 0; j < res.Count; j++) { if (res[j].Termin == ar.Termin) { break; } } if (j >= res.Count) { res.Add(ar); } } } if (t.AdditionalVars != null) { foreach (Termin av in t.AdditionalVars) { ar = av.TryParse(token, pars); if (ar == null) { continue; } ar.Termin = t; if (res == null) { res = new List <TerminToken>(); res.Add(ar); } else if (ar.TokensCount > res[0].TokensCount) { res.Clear(); res.Add(ar); } else if (ar.TokensCount == res[0].TokensCount) { int j; for (j = 0; j < res.Count; j++) { if (res[j].Termin == ar.Termin) { break; } } if (j >= res.Count) { res.Add(ar); } } } } } return(v.Length > 1); }
List <TerminToken> _TryAttachAll_(Pullenti.Ner.Token token, TerminParseAttr pars = TerminParseAttr.No, bool mainRoot = false) { if (Termins.Count == 0 || token == null) { return(null); } string s = null; Pullenti.Ner.TextToken tt = token as Pullenti.Ner.TextToken; if (tt == null && (token is Pullenti.Ner.ReferentToken)) { tt = (token as Pullenti.Ner.ReferentToken).BeginToken as Pullenti.Ner.TextToken; } List <TerminToken> res = null; bool wasVars = false; CharNode root = (mainRoot ? m_Root : this._getRoot(token.Morph.Language, token.Chars.IsLatinLetter)); if (tt != null) { s = tt.Term; CharNode nod = root; bool noVars = false; int len0 = 0; if (((pars & TerminParseAttr.TermOnly)) != TerminParseAttr.No) { } else if (tt.InvariantPrefixLengthOfMorphVars <= s.Length) { len0 = tt.InvariantPrefixLengthOfMorphVars; for (int i = 0; i < tt.InvariantPrefixLengthOfMorphVars; i++) { short ch = (short)s[i]; if (nod.Children == null) { noVars = true; break; } CharNode nn; if (!nod.Children.TryGetValue(ch, out nn)) { noVars = true; break; } nod = nn; } } if (!noVars) { if (this._manageVar(token, pars, s, nod, len0, ref res)) { wasVars = true; } for (int i = 0; i < tt.Morph.ItemsCount; i++) { if (((pars & TerminParseAttr.TermOnly)) != TerminParseAttr.No) { continue; } Pullenti.Morph.MorphWordForm wf = tt.Morph[i] as Pullenti.Morph.MorphWordForm; if (wf == null) { continue; } if (((pars & TerminParseAttr.InDictionaryOnly)) != TerminParseAttr.No) { if (!wf.IsInDictionary) { continue; } } int j; bool ok = true; if (wf.NormalCase == null || wf.NormalCase == s) { ok = false; } else { for (j = 0; j < i; j++) { Pullenti.Morph.MorphWordForm wf2 = tt.Morph[j] as Pullenti.Morph.MorphWordForm; if (wf2 != null) { if (wf2.NormalCase == wf.NormalCase || wf2.NormalFull == wf.NormalCase) { break; } } } if (j < i) { ok = false; } } if (ok) { if (this._manageVar(token, pars, wf.NormalCase, nod, tt.InvariantPrefixLengthOfMorphVars, ref res)) { wasVars = true; } } if (wf.NormalFull == null || wf.NormalFull == wf.NormalCase || wf.NormalFull == s) { continue; } for (j = 0; j < i; j++) { Pullenti.Morph.MorphWordForm wf2 = tt.Morph[j] as Pullenti.Morph.MorphWordForm; if (wf2 != null && wf2.NormalFull == wf.NormalFull) { break; } } if (j < i) { continue; } if (this._manageVar(token, pars, wf.NormalFull, nod, tt.InvariantPrefixLengthOfMorphVars, ref res)) { wasVars = true; } } } } else if (token is Pullenti.Ner.NumberToken) { if (this._manageVar(token, pars, (token as Pullenti.Ner.NumberToken).Value.ToString(), root, 0, ref res)) { wasVars = true; } } else { return(null); } if (!wasVars && s != null && s.Length == 1) { List <Termin> vars; if (m_Hash1.TryGetValue((short)s[0], out vars)) { foreach (Termin t in vars) { if (!t.Lang.IsUndefined) { if (!token.Morph.Language.IsUndefined) { if (((token.Morph.Language & t.Lang)).IsUndefined) { continue; } } } TerminToken ar = t.TryParse(tt, TerminParseAttr.No); if (ar == null) { continue; } ar.Termin = t; if (res == null) { res = new List <TerminToken>(); res.Add(ar); } else if (ar.TokensCount > res[0].TokensCount) { res.Clear(); res.Add(ar); } else if (ar.TokensCount == res[0].TokensCount) { res.Add(ar); } } } } if (res != null) { int ii = 0; int max = 0; for (int i = 0; i < res.Count; i++) { if (res[i].LengthChar > max) { max = res[i].LengthChar; ii = i; } } if (ii > 0) { TerminToken v = res[ii]; res.RemoveAt(ii); res.Insert(0, v); } } return(res); }