/// <summary> /// Попытаться выделить предлог с указанного токена /// </summary> /// <param name="t">начальный токен</param> /// <return>результат или null</return> public static PrepositionToken TryParse(Pullenti.Ner.Token t) { if (!(t is Pullenti.Ner.TextToken)) { return(null); } TerminToken tok = m_Ontology.TryParse(t, TerminParseAttr.No); if (tok != null) { return new PrepositionToken(t, tok.EndToken) { Normal = tok.Termin.CanonicText, NextCase = (Pullenti.Morph.MorphCase)tok.Termin.Tag } } ; Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary(); if (!mc.IsPreposition) { return(null); } PrepositionToken res = new PrepositionToken(t, t); res.Normal = t.GetNormalCaseText(Pullenti.Morph.MorphClass.Preposition, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false); res.NextCase = Pullenti.Morph.LanguageHelper.GetCaseAfterPreposition(res.Normal); if ((t.Next != null && t.Next.IsHiphen && !t.IsWhitespaceAfter) && (t.Next.Next is Pullenti.Ner.TextToken) && t.Next.Next.GetMorphClassInDictionary().IsPreposition) { res.EndToken = t.Next.Next; } return(res); }
// Привязка с точностью до похожести // simD - параметр "похожесть (0.05..1)" public List <TerminToken> TryParseAllSim(Pullenti.Ner.Token token, double simD) { if (simD >= 1 || (simD < 0.05)) { return(this.TryParseAll(token, TerminParseAttr.No)); } if (Termins.Count == 0 || token == null) { return(null); } Pullenti.Ner.TextToken tt = token as Pullenti.Ner.TextToken; if (tt == null && (token is Pullenti.Ner.ReferentToken)) { tt = (token as Pullenti.Ner.ReferentToken).BeginToken as Pullenti.Ner.TextToken; } List <TerminToken> res = null; foreach (Termin t in Termins) { if (!t.Lang.IsUndefined) { if (!token.Morph.Language.IsUndefined) { if (((token.Morph.Language & t.Lang)).IsUndefined) { continue; } } } TerminToken ar = t.TryParseSim(tt, simD, TerminParseAttr.No); if (ar == null) { continue; } ar.Termin = t; if (res == null || ar.TokensCount > res[0].TokensCount) { res = new List <TerminToken>(); res.Add(ar); } else if (ar.TokensCount == res[0].TokensCount) { res.Add(ar); } } return(res); }
/// <summary> /// Попытка привязать все возможные термины /// </summary> /// <param name="token">начальный токен</param> /// <param name="attrs">атрибуты выделения</param> /// <return>список из всех подходящих привязок TerminToken или null</return> public List <TerminToken> TryParseAll(Pullenti.Ner.Token token, TerminParseAttr attrs = TerminParseAttr.No) { if (token == null) { return(null); } List <TerminToken> re = this._TryAttachAll_(token, attrs, false); if (re == null && token.Morph.Language.IsUa) { re = this._TryAttachAll_(token, attrs, true); } if (re == null && Synonyms != null) { TerminToken re0 = Synonyms.TryParse(token, TerminParseAttr.No); if (re0 != null && (re0.Termin.Tag is List <string>)) { Termin term = this.Find(re0.Termin.CanonicText); foreach (string syn in re0.Termin.Tag as List <string> ) { if (term != null) { break; } term = this.Find(syn); } if (term != null) { re0.Termin = term; List <TerminToken> res1 = new List <TerminToken>(); res1.Add(re0); return(res1); } } } return(re); }
bool _manageVar(Pullenti.Ner.Token token, TerminParseAttr pars, string v, CharNode nod, int i0, ref List <TerminToken> res) { for (int i = i0; i < v.Length; i++) { short ch = (short)v[i]; if (nod.Children == null) { return(false); } CharNode nn; if (!nod.Children.TryGetValue(ch, out nn)) { return(false); } nod = nn; } List <Termin> vars = nod.Termins; if (vars == null || vars.Count == 0) { return(false); } foreach (Termin t in vars) { TerminToken ar = t.TryParse(token, pars); if (ar != null) { ar.Termin = t; if (res == null) { res = new List <TerminToken>(); res.Add(ar); } else if (ar.TokensCount > res[0].TokensCount) { res.Clear(); res.Add(ar); } else if (ar.TokensCount == res[0].TokensCount) { int j; for (j = 0; j < res.Count; j++) { if (res[j].Termin == ar.Termin) { break; } } if (j >= res.Count) { res.Add(ar); } } } if (t.AdditionalVars != null) { foreach (Termin av in t.AdditionalVars) { ar = av.TryParse(token, pars); if (ar == null) { continue; } ar.Termin = t; if (res == null) { res = new List <TerminToken>(); res.Add(ar); } else if (ar.TokensCount > res[0].TokensCount) { res.Clear(); res.Add(ar); } else if (ar.TokensCount == res[0].TokensCount) { int j; for (j = 0; j < res.Count; j++) { if (res[j].Termin == ar.Termin) { break; } } if (j >= res.Count) { res.Add(ar); } } } } } return(v.Length > 1); }
List <TerminToken> _TryAttachAll_(Pullenti.Ner.Token token, TerminParseAttr pars = TerminParseAttr.No, bool mainRoot = false) { if (Termins.Count == 0 || token == null) { return(null); } string s = null; Pullenti.Ner.TextToken tt = token as Pullenti.Ner.TextToken; if (tt == null && (token is Pullenti.Ner.ReferentToken)) { tt = (token as Pullenti.Ner.ReferentToken).BeginToken as Pullenti.Ner.TextToken; } List <TerminToken> res = null; bool wasVars = false; CharNode root = (mainRoot ? m_Root : this._getRoot(token.Morph.Language, token.Chars.IsLatinLetter)); if (tt != null) { s = tt.Term; CharNode nod = root; bool noVars = false; int len0 = 0; if (((pars & TerminParseAttr.TermOnly)) != TerminParseAttr.No) { } else if (tt.InvariantPrefixLengthOfMorphVars <= s.Length) { len0 = tt.InvariantPrefixLengthOfMorphVars; for (int i = 0; i < tt.InvariantPrefixLengthOfMorphVars; i++) { short ch = (short)s[i]; if (nod.Children == null) { noVars = true; break; } CharNode nn; if (!nod.Children.TryGetValue(ch, out nn)) { noVars = true; break; } nod = nn; } } if (!noVars) { if (this._manageVar(token, pars, s, nod, len0, ref res)) { wasVars = true; } for (int i = 0; i < tt.Morph.ItemsCount; i++) { if (((pars & TerminParseAttr.TermOnly)) != TerminParseAttr.No) { continue; } Pullenti.Morph.MorphWordForm wf = tt.Morph[i] as Pullenti.Morph.MorphWordForm; if (wf == null) { continue; } if (((pars & TerminParseAttr.InDictionaryOnly)) != TerminParseAttr.No) { if (!wf.IsInDictionary) { continue; } } int j; bool ok = true; if (wf.NormalCase == null || wf.NormalCase == s) { ok = false; } else { for (j = 0; j < i; j++) { Pullenti.Morph.MorphWordForm wf2 = tt.Morph[j] as Pullenti.Morph.MorphWordForm; if (wf2 != null) { if (wf2.NormalCase == wf.NormalCase || wf2.NormalFull == wf.NormalCase) { break; } } } if (j < i) { ok = false; } } if (ok) { if (this._manageVar(token, pars, wf.NormalCase, nod, tt.InvariantPrefixLengthOfMorphVars, ref res)) { wasVars = true; } } if (wf.NormalFull == null || wf.NormalFull == wf.NormalCase || wf.NormalFull == s) { continue; } for (j = 0; j < i; j++) { Pullenti.Morph.MorphWordForm wf2 = tt.Morph[j] as Pullenti.Morph.MorphWordForm; if (wf2 != null && wf2.NormalFull == wf.NormalFull) { break; } } if (j < i) { continue; } if (this._manageVar(token, pars, wf.NormalFull, nod, tt.InvariantPrefixLengthOfMorphVars, ref res)) { wasVars = true; } } } } else if (token is Pullenti.Ner.NumberToken) { if (this._manageVar(token, pars, (token as Pullenti.Ner.NumberToken).Value.ToString(), root, 0, ref res)) { wasVars = true; } } else { return(null); } if (!wasVars && s != null && s.Length == 1) { List <Termin> vars; if (m_Hash1.TryGetValue((short)s[0], out vars)) { foreach (Termin t in vars) { if (!t.Lang.IsUndefined) { if (!token.Morph.Language.IsUndefined) { if (((token.Morph.Language & t.Lang)).IsUndefined) { continue; } } } TerminToken ar = t.TryParse(tt, TerminParseAttr.No); if (ar == null) { continue; } ar.Termin = t; if (res == null) { res = new List <TerminToken>(); res.Add(ar); } else if (ar.TokensCount > res[0].TokensCount) { res.Clear(); res.Add(ar); } else if (ar.TokensCount == res[0].TokensCount) { res.Add(ar); } } } } if (res != null) { int ii = 0; int max = 0; for (int i = 0; i < res.Count; i++) { if (res[i].LengthChar > max) { max = res[i].LengthChar; ii = i; } } if (ii > 0) { TerminToken v = res[ii]; res.RemoveAt(ii); res.Insert(0, v); } } return(res); }
/// <summary> /// Попытаться выделить союз с указанного токена. /// </summary> /// <param name="t">начальный токен</param> /// <return>результат или null</return> public static ConjunctionToken TryParse(Pullenti.Ner.Token t) { if (!(t is Pullenti.Ner.TextToken)) { return(null); } if (t.IsComma) { ConjunctionToken ne = TryParse(t.Next); if (ne != null) { ne.BeginToken = t; ne.IsSimple = false; return(ne); } return(new ConjunctionToken(t, t) { Typ = ConjunctionType.Comma, IsSimple = true, Normal = "," }); } TerminToken tok = m_Ontology.TryParse(t, TerminParseAttr.No); if (tok != null) { if (t.IsValue("ТО", null)) { NounPhraseToken npt = NounPhraseHelper.TryParse(t, NounPhraseParseAttr.ParseAdverbs, 0, null); if (npt != null && npt.EndChar > tok.EndToken.EndChar) { return(null); } } if (tok.Termin.Tag2 != null) { if (!(tok.EndToken is Pullenti.Ner.TextToken)) { return(null); } if (tok.EndToken.GetMorphClassInDictionary().IsVerb) { if (!(tok.EndToken as Pullenti.Ner.TextToken).Term.EndsWith("АЯ")) { return(null); } } } return(new ConjunctionToken(t, tok.EndToken) { Normal = tok.Termin.CanonicText, Typ = (ConjunctionType)tok.Termin.Tag }); } if (!t.GetMorphClassInDictionary().IsConjunction) { return(null); } if (t.IsAnd || t.IsOr) { ConjunctionToken res = new ConjunctionToken(t, t) { Normal = (t as Pullenti.Ner.TextToken).Term, IsSimple = true, Typ = (t.IsOr ? ConjunctionType.Or : ConjunctionType.And) }; if (((t.Next != null && t.Next.IsChar('(') && (t.Next.Next is Pullenti.Ner.TextToken)) && t.Next.Next.IsOr && t.Next.Next.Next != null) && t.Next.Next.Next.IsChar(')')) { res.EndToken = t.Next.Next.Next; } else if ((t.Next != null && t.Next.IsCharOf("\\/") && (t.Next.Next is Pullenti.Ner.TextToken)) && t.Next.Next.IsOr) { res.EndToken = t.Next.Next; } return(res); } string term = (t as Pullenti.Ner.TextToken).Term; if (term == "НИ") { return new ConjunctionToken(t, t) { Normal = term, Typ = ConjunctionType.Not } } ; if ((term == "А" || term == "НО" || term == "ЗАТО") || term == "ОДНАКО") { return new ConjunctionToken(t, t) { Normal = term, Typ = ConjunctionType.But } } ; return(null); }