void SerializeItem(Stream stream, Pullenti.Morph.MorphBaseInfo bi) { byte ty = (byte)0; if (bi is Pullenti.Morph.MorphWordForm) { ty = 1; } stream.WriteByte(ty); Pullenti.Ner.Core.Internal.SerializerHelper.SerializeShort(stream, bi.Class.Value); Pullenti.Ner.Core.Internal.SerializerHelper.SerializeShort(stream, bi.Case.Value); Pullenti.Ner.Core.Internal.SerializerHelper.SerializeShort(stream, (short)bi.Gender); Pullenti.Ner.Core.Internal.SerializerHelper.SerializeShort(stream, (short)bi.Number); Pullenti.Ner.Core.Internal.SerializerHelper.SerializeShort(stream, bi.Language.Value); Pullenti.Morph.MorphWordForm wf = bi as Pullenti.Morph.MorphWordForm; if (wf == null) { return; } Pullenti.Ner.Core.Internal.SerializerHelper.SerializeString(stream, wf.NormalCase); Pullenti.Ner.Core.Internal.SerializerHelper.SerializeString(stream, wf.NormalFull); Pullenti.Ner.Core.Internal.SerializerHelper.SerializeShort(stream, wf.UndefCoef); Pullenti.Ner.Core.Internal.SerializerHelper.SerializeInt(stream, (wf.Misc == null ? 0 : wf.Misc.Attrs.Count)); if (wf.Misc != null) { foreach (string a in wf.Misc.Attrs) { Pullenti.Ner.Core.Internal.SerializerHelper.SerializeString(stream, a); } } }
public NounPhraseItemTextVar(Pullenti.Morph.MorphBaseInfo src = null, Pullenti.Ner.Token t = null) : base() { if (src != null) { this.CopyFrom(src); } Pullenti.Morph.MorphWordForm wf = src as Pullenti.Morph.MorphWordForm; if (wf != null) { NormalValue = wf.NormalCase; if (wf.Number == Pullenti.Morph.MorphNumber.Plural && wf.NormalFull != null) { SingleNumberValue = wf.NormalFull; } UndefCoef = wf.UndefCoef; } else if (t != null) { NormalValue = t.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false); } if (Case.IsUndefined && src != null) { if (src.ContainsAttr("неизм.", null)) { Case = Pullenti.Morph.MorphCase.AllCases; } } }
public static void _setMorph(Pullenti.Semantic.SemObject obj, Pullenti.Morph.MorphWordForm wf) { if (wf == null) { return; } obj.Morph.NormalCase = wf.NormalCase; obj.Morph.NormalFull = wf.NormalFull ?? wf.NormalCase; obj.Morph.Number = wf.Number; obj.Morph.Gender = wf.Gender; obj.Morph.Misc = wf.Misc; }
static bool _isRevVerb(Pullenti.Morph.MorphWordForm vf) { if (vf.Misc.Attrs.Contains("возвр.")) { return(true); } if (vf.NormalCase != null) { if (vf.NormalCase.EndsWith("СЯ") || vf.NormalCase.EndsWith("СЬ")) { return(true); } } return(false); }
double _calcActant() { if (CanBeParticiple) { return(Coef = -1); } Pullenti.Morph.MorphWordForm vf2 = ToVerb.LastVerb.VerbMorph; if (vf2 == null) { return(-1); } if (FromPrep == null) { return(Coef = 0); } Pullenti.Ner.MorphCollection fm = From.Source.Source.Morph; List <Pullenti.Semantic.Utils.DerivateGroup> grs = Pullenti.Semantic.Utils.DerivateService.FindDerivates(vf2.NormalFull ?? vf2.NormalCase, true, null); if (grs != null) { foreach (Pullenti.Semantic.Utils.DerivateGroup gr in grs) { if (gr.Cm.Nexts == null || !gr.Cm.Nexts.ContainsKey(FromPrep)) { continue; } Pullenti.Morph.MorphCase cas = gr.Cm.Nexts[FromPrep]; if (!((cas & fm.Case)).IsUndefined) { Coef = Pullenti.Semantic.SemanticService.Params.NextModel; if (string.IsNullOrEmpty(FromPrep)) { if (fm.Case.IsNominative) { Coef /= 2; } Coef /= 2; } return(Coef); } if (From.Source.Source.Morph.Case.IsUndefined) { return(Coef = 0); } } } return(Coef = 0.1); }
public Pullenti.Morph.MorphBaseInfo FindItem(Pullenti.Morph.MorphCase cas, Pullenti.Morph.MorphNumber num = Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender gen = Pullenti.Morph.MorphGender.Undefined) { if (m_Items == null) { return(null); } Pullenti.Morph.MorphBaseInfo res = null; int maxCoef = 0; foreach (Pullenti.Morph.MorphBaseInfo it in m_Items) { if (!cas.IsUndefined) { if (((it.Case & cas)).IsUndefined) { continue; } } if (num != Pullenti.Morph.MorphNumber.Undefined) { if (((num & it.Number)) == Pullenti.Morph.MorphNumber.Undefined) { continue; } } if (gen != Pullenti.Morph.MorphGender.Undefined) { if (((gen & it.Gender)) == Pullenti.Morph.MorphGender.Undefined) { continue; } } Pullenti.Morph.MorphWordForm wf = it as Pullenti.Morph.MorphWordForm; if (wf != null && wf.UndefCoef > 0) { if (wf.UndefCoef > maxCoef) { maxCoef = wf.UndefCoef; res = it; } continue; } return(it); } return(res); }
public MorphCollection(MorphCollection source = null) { if (source == null) { return; } foreach (Pullenti.Morph.MorphBaseInfo it in source.Items) { Pullenti.Morph.MorphBaseInfo mi = null; if (it is Pullenti.Morph.MorphWordForm) { Pullenti.Morph.MorphWordForm wf = new Pullenti.Morph.MorphWordForm(); wf.CopyFromWordForm(it as Pullenti.Morph.MorphWordForm); mi = wf; } else { mi = new Pullenti.Morph.MorphBaseInfo(); mi.CopyFrom(it); } if (m_Items == null) { m_Items = new List <Pullenti.Morph.MorphBaseInfo>(); } m_Items.Add(mi); } m_Class = new Pullenti.Morph.MorphClass() { Value = source.m_Class.Value }; m_Gender = source.m_Gender; m_Case = new Pullenti.Morph.MorphCase() { Value = source.m_Case.Value }; m_Number = source.m_Number; m_Language = new Pullenti.Morph.MorphLang() { Value = source.m_Language.Value }; m_Voice = source.m_Voice; m_NeedRecalc = false; }
Pullenti.Morph.MorphBaseInfo DeserializeItem(Stream stream) { int ty = stream.ReadByte(); Pullenti.Morph.MorphBaseInfo res = (ty == 0 ? new Pullenti.Morph.MorphBaseInfo() : (Pullenti.Morph.MorphBaseInfo) new Pullenti.Morph.MorphWordForm()); res.Class = new Pullenti.Morph.MorphClass() { Value = Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeShort(stream) }; res.Case = new Pullenti.Morph.MorphCase() { Value = Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeShort(stream) }; res.Gender = (Pullenti.Morph.MorphGender)Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeShort(stream); res.Number = (Pullenti.Morph.MorphNumber)Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeShort(stream); res.Language = new Pullenti.Morph.MorphLang() { Value = Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeShort(stream) }; if (ty == 0) { return(res); } Pullenti.Morph.MorphWordForm wf = res as Pullenti.Morph.MorphWordForm; wf.NormalCase = Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeString(stream); wf.NormalFull = Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeString(stream); wf.UndefCoef = Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeShort(stream); int cou = Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeInt(stream); for (int i = 0; i < cou; i++) { if (wf.Misc == null) { wf.Misc = new Pullenti.Morph.MorphMiscInfo(); } wf.Misc.Attrs.Add(Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeString(stream)); } return(res); }
/// <summary> /// Удалить элементы, не соответствующие падежу /// </summary> public void RemoveItems(Pullenti.Morph.MorphCase cas) { if (m_Items == null) { return; } if (m_Items.Count == 0) { m_Case = m_Case & cas; } for (int i = m_Items.Count - 1; i >= 0; i--) { if (((m_Items[i].Case & cas)).IsUndefined) { m_Items.RemoveAt(i); m_NeedRecalc = true; } else if (((m_Items[i].Case & cas)) != m_Items[i].Case) { if (m_Items[i] is Pullenti.Morph.MorphWordForm) { Pullenti.Morph.MorphWordForm wf = new Pullenti.Morph.MorphWordForm(); wf.CopyFromWordForm(m_Items[i] as Pullenti.Morph.MorphWordForm); wf.Case &= cas; m_Items[i] = wf; } else { Pullenti.Morph.MorphBaseInfo bi = new Pullenti.Morph.MorphBaseInfo(); bi.CopyFrom(m_Items[i]); bi.Case &= cas; m_Items[i] = bi; } m_NeedRecalc = true; } } m_NeedRecalc = true; }
/// <summary> /// Попробовать привязать словарь /// </summary> public object CheckValue(Dictionary <string, object> dict) { if (dict == null) { return(null); } object res; if (dict.TryGetValue(Term, out res)) { return(res); } if (Morph != null) { foreach (Pullenti.Morph.MorphBaseInfo it in Morph.Items) { Pullenti.Morph.MorphWordForm mf = it as Pullenti.Morph.MorphWordForm; if (mf != null) { if (mf.NormalCase != null) { if (dict.TryGetValue(mf.NormalCase, out res)) { return(res); } } if (mf.NormalFull != null && mf.NormalCase != mf.NormalFull) { if (dict.TryGetValue(mf.NormalFull, out res)) { return(res); } } } } } return(null); }
StatisticWordInfo AddToken(Pullenti.Ner.TextToken tt) { List <string> vars = new List <string>(); vars.Add(tt.Term); string s = MiscHelper.GetAbsoluteNormalValue(tt.Term, false); if (s != null && !vars.Contains(s)) { vars.Add(s); } foreach (Pullenti.Morph.MorphBaseInfo wff in tt.Morph.Items) { Pullenti.Morph.MorphWordForm wf = wff as Pullenti.Morph.MorphWordForm; if (wf == null) { continue; } if (wf.NormalCase != null && !vars.Contains(wf.NormalCase)) { vars.Add(wf.NormalCase); } if (wf.NormalFull != null && !vars.Contains(wf.NormalFull)) { vars.Add(wf.NormalFull); } } StatisticWordInfo res = null; foreach (string v in vars) { if (m_Items.TryGetValue(v, out res)) { break; } } if (res == null) { res = new StatisticWordInfo() { Normal = tt.Lemma } } ; foreach (string v in vars) { if (!m_Items.ContainsKey(v)) { m_Items.Add(v, res); } } res.TotalCount++; if ((tt.Next is Pullenti.Ner.TextToken) && tt.Next.Chars.IsAllLower) { if (tt.Next.Chars.IsCyrillicLetter && tt.Next.GetMorphClassInDictionary().IsVerb) { Pullenti.Morph.MorphGender g = tt.Next.Morph.Gender; if (g == Pullenti.Morph.MorphGender.Feminie) { res.FemaleVerbsAfterCount++; } else if (((g & Pullenti.Morph.MorphGender.Masculine)) != Pullenti.Morph.MorphGender.Undefined) { res.MaleVerbsAfterCount++; } } } if (tt.Previous != null) { if ((tt.Previous is Pullenti.Ner.TextToken) && tt.Previous.Chars.IsLetter && !tt.Previous.Chars.IsAllLower) { } else { res.NotCapitalBeforeCount++; } } return(res); } Dictionary <string, StatisticWordInfo> m_Items = new Dictionary <string, StatisticWordInfo>(); StatisticWordInfo FindItem(Pullenti.Ner.TextToken tt, bool doAbsolute = true) { if (tt == null) { return(null); } StatisticWordInfo res; if (m_Items.TryGetValue(tt.Term, out res)) { return(res); } if (doAbsolute) { string s = MiscHelper.GetAbsoluteNormalValue(tt.Term, false); if (s != null) { if (m_Items.TryGetValue(s, out res)) { return(res); } } } foreach (Pullenti.Morph.MorphBaseInfo wff in tt.Morph.Items) { Pullenti.Morph.MorphWordForm wf = wff as Pullenti.Morph.MorphWordForm; if (wf == null) { continue; } if (m_Items.TryGetValue(wf.NormalCase ?? "", out res)) { return(res); } if (wf.NormalFull != null && m_Items.TryGetValue(wf.NormalFull, out res)) { return(res); } } return(null); } void AddBigramm(StatisticWordInfo b1, StatisticWordInfo b2) { Dictionary <string, int> di; if (!m_Bigramms.TryGetValue(b1.Normal, out di)) { m_Bigramms.Add(b1.Normal, (di = new Dictionary <string, int>())); } if (di.ContainsKey(b2.Normal)) { di[b2.Normal]++; } else { di.Add(b2.Normal, 1); } if (!m_BigrammsRev.TryGetValue(b2.Normal, out di)) { m_BigrammsRev.Add(b2.Normal, (di = new Dictionary <string, int>())); } if (di.ContainsKey(b1.Normal)) { di[b1.Normal]++; } else { di.Add(b1.Normal, 1); } } Dictionary <string, Dictionary <string, int> > m_Bigramms = new Dictionary <string, Dictionary <string, int> >(); Dictionary <string, Dictionary <string, int> > m_BigrammsRev = new Dictionary <string, Dictionary <string, int> >(); Dictionary <string, Dictionary <string, int> > m_Initials = new Dictionary <string, Dictionary <string, int> >(); Dictionary <string, Dictionary <string, int> > m_InitialsRev = new Dictionary <string, Dictionary <string, int> >();
static VerbPhraseToken TryParseRu(Pullenti.Ner.Token t, bool canBePartition, bool canBeAdjPartition, bool forceParse) { VerbPhraseToken res = null; Pullenti.Ner.Token t0 = t; Pullenti.Ner.Token not = null; bool hasVerb = false; bool verbBeBefore = false; PrepositionToken prep = null; for (; t != null; t = t.Next) { if (!(t is Pullenti.Ner.TextToken)) { break; } Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; bool isParticiple = false; if (tt.Term == "НЕ") { not = t; continue; } int ty = 0; string norm = null; Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary(); if (tt.Term == "НЕТ") { if (hasVerb) { break; } ty = 1; } else if (tt.Term == "ДОПУСТИМО") { ty = 3; } else if (mc.IsAdverb && !mc.IsVerb) { ty = 2; } else if (tt.IsPureVerb || tt.IsVerbBe) { ty = 1; if (hasVerb) { if (!tt.Morph.ContainsAttr("инф.", null)) { if (verbBeBefore) { } else { break; } } } } else if (mc.IsVerb) { if (mc.IsPreposition || mc.IsMisc || mc.IsPronoun) { } else if (mc.IsNoun) { if (tt.Term == "СТАЛИ" || tt.Term == "СТЕКЛО" || tt.Term == "БЫЛИ") { ty = 1; } else if (!tt.Chars.IsAllLower && !MiscHelper.CanBeStartOfSentence(tt)) { ty = 1; } else if (mc.IsAdjective && canBePartition) { ty = 1; } else if (forceParse) { ty = 1; } } else if (mc.IsProper) { if (tt.Chars.IsAllLower) { ty = 1; } } else { ty = 1; } if (mc.IsAdjective) { isParticiple = true; } if (!tt.Morph.Case.IsUndefined) { isParticiple = true; } if (!canBePartition && isParticiple) { break; } if (hasVerb) { if (tt.Morph.ContainsAttr("инф.", null)) { } else if (!isParticiple) { } else { break; } } } else if ((mc.IsAdjective && tt.Morph.ContainsAttr("к.ф.", null) && tt.Term.EndsWith("О")) && NounPhraseHelper.TryParse(tt, NounPhraseParseAttr.No, 0, null) == null) { ty = 2; } else if (mc.IsAdjective && ((canBePartition || canBeAdjPartition))) { if (tt.Morph.ContainsAttr("к.ф.", null) && !canBeAdjPartition) { break; } norm = tt.GetNormalCaseText(Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Masculine, false); if (norm.EndsWith("ЙШИЙ")) { } else { List <Pullenti.Semantic.Utils.DerivateGroup> grs = Pullenti.Semantic.Utils.DerivateService.FindDerivates(norm, true, null); if (grs != null && grs.Count > 0) { bool hVerb = false; bool hPart = false; foreach (Pullenti.Semantic.Utils.DerivateGroup gr in grs) { foreach (Pullenti.Semantic.Utils.DerivateWord w in gr.Words) { if (w.Class.IsAdjective && w.Class.IsVerb) { if (w.Spelling == norm) { hPart = true; } } else if (w.Class.IsVerb) { hVerb = true; } } } if (hPart && hVerb) { ty = 3; } else if (canBeAdjPartition) { ty = 3; } if (ty != 3 && !string.IsNullOrEmpty(grs[0].Prefix) && norm.StartsWith(grs[0].Prefix)) { hVerb = false; hPart = false; string norm1 = norm.Substring(grs[0].Prefix.Length); grs = Pullenti.Semantic.Utils.DerivateService.FindDerivates(norm1, true, null); if (grs != null && grs.Count > 0) { foreach (Pullenti.Semantic.Utils.DerivateGroup gr in grs) { foreach (Pullenti.Semantic.Utils.DerivateWord w in gr.Words) { if (w.Class.IsAdjective && w.Class.IsVerb) { if (w.Spelling == norm1) { hPart = true; } } else if (w.Class.IsVerb) { hVerb = true; } } } } if (hPart && hVerb) { ty = 3; } } } } } if (ty == 0 && t == t0 && canBePartition) { prep = PrepositionHelper.TryParse(t); if (prep != null) { t = prep.EndToken; continue; } } if (ty == 0) { break; } if (res == null) { res = new VerbPhraseToken(t0, t); } res.EndToken = t; VerbPhraseItemToken it = new VerbPhraseItemToken(t, t) { Morph = new Pullenti.Ner.MorphCollection(t.Morph) }; if (not != null) { it.BeginToken = not; it.Not = true; not = null; } it.IsAdverb = ty == 2; if (prep != null && !t.Morph.Case.IsUndefined && res.Items.Count == 0) { if (((prep.NextCase & t.Morph.Case)).IsUndefined) { return(null); } it.Morph.RemoveItems(prep.NextCase); res.Preposition = prep; } if (norm == null) { norm = t.GetNormalCaseText((ty == 3 ? Pullenti.Morph.MorphClass.Adjective : (ty == 2 ? Pullenti.Morph.MorphClass.Adverb : Pullenti.Morph.MorphClass.Verb)), Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Masculine, false); if (ty == 1 && !tt.Morph.Case.IsUndefined) { Pullenti.Morph.MorphWordForm mi = new Pullenti.Morph.MorphWordForm() { Case = Pullenti.Morph.MorphCase.Nominative, Number = Pullenti.Morph.MorphNumber.Singular, Gender = Pullenti.Morph.MorphGender.Masculine }; foreach (Pullenti.Morph.MorphBaseInfo mit in tt.Morph.Items) { if (mit is Pullenti.Morph.MorphWordForm) { mi.Misc = (mit as Pullenti.Morph.MorphWordForm).Misc; break; } } string nnn = Pullenti.Morph.MorphologyService.GetWordform("КК" + (t as Pullenti.Ner.TextToken).Term, mi); if (nnn != null) { norm = nnn.Substring(2); } } } it.Normal = norm; res.Items.Add(it); if (!hasVerb && ((ty == 1 || ty == 3))) { res.Morph = it.Morph; hasVerb = true; } if (ty == 1 || ty == 3) { if (ty == 1 && tt.IsVerbBe) { verbBeBefore = true; } else { verbBeBefore = false; } } } if (!hasVerb) { return(null); } for (int i = res.Items.Count - 1; i > 0; i--) { if (res.Items[i].IsAdverb) { res.Items.RemoveAt(i); res.EndToken = res.Items[i - 1].EndToken; } else { break; } } return(res); }
List <TerminToken> _TryAttachAll_(Pullenti.Ner.Token token, TerminParseAttr pars = TerminParseAttr.No, bool mainRoot = false) { if (Termins.Count == 0 || token == null) { return(null); } string s = null; Pullenti.Ner.TextToken tt = token as Pullenti.Ner.TextToken; if (tt == null && (token is Pullenti.Ner.ReferentToken)) { tt = (token as Pullenti.Ner.ReferentToken).BeginToken as Pullenti.Ner.TextToken; } List <TerminToken> res = null; bool wasVars = false; CharNode root = (mainRoot ? m_Root : this._getRoot(token.Morph.Language, token.Chars.IsLatinLetter)); if (tt != null) { s = tt.Term; CharNode nod = root; bool noVars = false; int len0 = 0; if (((pars & TerminParseAttr.TermOnly)) != TerminParseAttr.No) { } else if (tt.InvariantPrefixLengthOfMorphVars <= s.Length) { len0 = tt.InvariantPrefixLengthOfMorphVars; for (int i = 0; i < tt.InvariantPrefixLengthOfMorphVars; i++) { short ch = (short)s[i]; if (nod.Children == null) { noVars = true; break; } CharNode nn; if (!nod.Children.TryGetValue(ch, out nn)) { noVars = true; break; } nod = nn; } } if (!noVars) { if (this._manageVar(token, pars, s, nod, len0, ref res)) { wasVars = true; } for (int i = 0; i < tt.Morph.ItemsCount; i++) { if (((pars & TerminParseAttr.TermOnly)) != TerminParseAttr.No) { continue; } Pullenti.Morph.MorphWordForm wf = tt.Morph[i] as Pullenti.Morph.MorphWordForm; if (wf == null) { continue; } if (((pars & TerminParseAttr.InDictionaryOnly)) != TerminParseAttr.No) { if (!wf.IsInDictionary) { continue; } } int j; bool ok = true; if (wf.NormalCase == null || wf.NormalCase == s) { ok = false; } else { for (j = 0; j < i; j++) { Pullenti.Morph.MorphWordForm wf2 = tt.Morph[j] as Pullenti.Morph.MorphWordForm; if (wf2 != null) { if (wf2.NormalCase == wf.NormalCase || wf2.NormalFull == wf.NormalCase) { break; } } } if (j < i) { ok = false; } } if (ok) { if (this._manageVar(token, pars, wf.NormalCase, nod, tt.InvariantPrefixLengthOfMorphVars, ref res)) { wasVars = true; } } if (wf.NormalFull == null || wf.NormalFull == wf.NormalCase || wf.NormalFull == s) { continue; } for (j = 0; j < i; j++) { Pullenti.Morph.MorphWordForm wf2 = tt.Morph[j] as Pullenti.Morph.MorphWordForm; if (wf2 != null && wf2.NormalFull == wf.NormalFull) { break; } } if (j < i) { continue; } if (this._manageVar(token, pars, wf.NormalFull, nod, tt.InvariantPrefixLengthOfMorphVars, ref res)) { wasVars = true; } } } } else if (token is Pullenti.Ner.NumberToken) { if (this._manageVar(token, pars, (token as Pullenti.Ner.NumberToken).Value.ToString(), root, 0, ref res)) { wasVars = true; } } else { return(null); } if (!wasVars && s != null && s.Length == 1) { List <Termin> vars; if (m_Hash1.TryGetValue((short)s[0], out vars)) { foreach (Termin t in vars) { if (!t.Lang.IsUndefined) { if (!token.Morph.Language.IsUndefined) { if (((token.Morph.Language & t.Lang)).IsUndefined) { continue; } } } TerminToken ar = t.TryParse(tt, TerminParseAttr.No); if (ar == null) { continue; } ar.Termin = t; if (res == null) { res = new List <TerminToken>(); res.Add(ar); } else if (ar.TokensCount > res[0].TokensCount) { res.Clear(); res.Add(ar); } else if (ar.TokensCount == res[0].TokensCount) { res.Add(ar); } } } } if (res != null) { int ii = 0; int max = 0; for (int i = 0; i < res.Count; i++) { if (res[i].LengthChar > max) { max = res[i].LengthChar; ii = i; } } if (ii > 0) { TerminToken v = res[ii]; res.RemoveAt(ii); res.Insert(0, v); } } return(res); }
public string GetWordform(string word, Pullenti.Morph.MorphClass cla, Pullenti.Morph.MorphGender gender, Pullenti.Morph.MorphCase cas, Pullenti.Morph.MorphNumber num, Pullenti.Morph.MorphLang lang, Pullenti.Morph.MorphWordForm addInfo) { if (Pullenti.Morph.LanguageHelper.IsCyrillicChar(word[0])) { if (m_EngineRu.Language.IsRu && lang.IsRu) { return(m_EngineRu.GetWordform(word, cla, gender, cas, num, addInfo)); } if (m_EngineUa.Language.IsUa && lang.IsUa) { return(m_EngineUa.GetWordform(word, cla, gender, cas, num, addInfo)); } if (m_EngineBy.Language.IsBy && lang.IsBy) { return(m_EngineBy.GetWordform(word, cla, gender, cas, num, addInfo)); } if (m_EngineKz.Language.IsKz && lang.IsKz) { return(m_EngineKz.GetWordform(word, cla, gender, cas, num, addInfo)); } return(m_EngineRu.GetWordform(word, cla, gender, cas, num, addInfo)); } else { return(m_EngineEn.GetWordform(word, cla, gender, cas, num, addInfo)); } }
public List <Pullenti.Morph.MorphToken> Run(string text, bool onlyTokenizing, Pullenti.Morph.MorphLang dlang, bool goodText, ProgressChangedEventHandler progress) { if (string.IsNullOrEmpty(text)) { return(null); } TextWrapper twr = new TextWrapper(text, goodText); TextWrapper.CharsList twrch = twr.Chars; List <Pullenti.Morph.MorphToken> res = new List <Pullenti.Morph.MorphToken>(text.Length / 6); Dictionary <string, UniLexWrap> uniLex = new Dictionary <string, UniLexWrap>(); int i; int j; string term0 = null; int pureRusWords = 0; int pureUkrWords = 0; int pureByWords = 0; int pureKzWords = 0; int totRusWords = 0; int totUkrWords = 0; int totByWords = 0; int totKzWords = 0; for (i = 0; i < twr.Length; i++) { int ty = this.GetCharTyp(twrch[i]); if (ty == 0) { continue; } if (ty > 2) { j = i + 1; } else { for (j = i + 1; j < twr.Length; j++) { if (this.GetCharTyp(twrch[j]) != ty) { break; } } } string wstr = text.Substring(i, j - i); string term = null; if (goodText) { term = wstr; } else { string trstr = Pullenti.Morph.LanguageHelper.TransliteralCorrection(wstr, term0, false); term = Pullenti.Morph.LanguageHelper.CorrectWord(trstr); } if (string.IsNullOrEmpty(term)) { i = j - 1; continue; } Pullenti.Morph.MorphLang lang = Pullenti.Morph.LanguageHelper.GetWordLang(term); if (lang == Pullenti.Morph.MorphLang.UA) { pureUkrWords++; } else if (lang == Pullenti.Morph.MorphLang.RU) { pureRusWords++; } else if (lang == Pullenti.Morph.MorphLang.BY) { pureByWords++; } else if (lang == Pullenti.Morph.MorphLang.KZ) { pureKzWords++; } if (((lang & Pullenti.Morph.MorphLang.RU)) != Pullenti.Morph.MorphLang.Unknown) { totRusWords++; } if (((lang & Pullenti.Morph.MorphLang.UA)) != Pullenti.Morph.MorphLang.Unknown) { totUkrWords++; } if (((lang & Pullenti.Morph.MorphLang.BY)) != Pullenti.Morph.MorphLang.Unknown) { totByWords++; } if (((lang & Pullenti.Morph.MorphLang.KZ)) != Pullenti.Morph.MorphLang.Unknown) { totKzWords++; } if (ty == 1) { term0 = term; } UniLexWrap lemmas = null; if (ty == 1 && !onlyTokenizing) { if (!uniLex.TryGetValue(term, out lemmas)) { UniLexWrap nuni = new UniLexWrap(lang); uniLex.Add(term, nuni); lemmas = nuni; } } Pullenti.Morph.MorphToken tok = new Pullenti.Morph.MorphToken(); tok.Term = term; tok.BeginChar = i; if (i == 733860) { } tok.EndChar = j - 1; tok.Tag = lemmas; res.Add(tok); i = j - 1; } Pullenti.Morph.MorphLang defLang = new Pullenti.Morph.MorphLang(); if (dlang != null) { defLang.Value = dlang.Value; } if (pureRusWords > pureUkrWords && pureRusWords > pureByWords && pureRusWords > pureKzWords) { defLang = Pullenti.Morph.MorphLang.RU; } else if (totRusWords > totUkrWords && totRusWords > totByWords && totRusWords > totKzWords) { defLang = Pullenti.Morph.MorphLang.RU; } else if (pureUkrWords > pureRusWords && pureUkrWords > pureByWords && pureUkrWords > pureKzWords) { defLang = Pullenti.Morph.MorphLang.UA; } else if (totUkrWords > totRusWords && totUkrWords > totByWords && totUkrWords > totKzWords) { defLang = Pullenti.Morph.MorphLang.UA; } else if (pureKzWords > pureRusWords && pureKzWords > pureUkrWords && pureKzWords > pureByWords) { defLang = Pullenti.Morph.MorphLang.KZ; } else if (totKzWords > totRusWords && totKzWords > totUkrWords && totKzWords > totByWords) { defLang = Pullenti.Morph.MorphLang.KZ; } else if (pureByWords > pureRusWords && pureByWords > pureUkrWords && pureByWords > pureKzWords) { defLang = Pullenti.Morph.MorphLang.BY; } else if (totByWords > totRusWords && totByWords > totUkrWords && totByWords > totKzWords) { if (totRusWords > 10 && totByWords > (totRusWords + 20)) { defLang = Pullenti.Morph.MorphLang.BY; } else if (totRusWords == 0 || totByWords >= (totRusWords * 2)) { defLang = Pullenti.Morph.MorphLang.BY; } } if (((defLang.IsUndefined || defLang.IsUa)) && totRusWords > 0) { if (((totUkrWords > totRusWords && m_EngineUa.Language.IsUa)) || ((totByWords > totRusWords && m_EngineBy.Language.IsBy)) || ((totKzWords > totRusWords && m_EngineKz.Language.IsKz))) { int cou0 = 0; totRusWords = (totByWords = (totUkrWords = (totKzWords = 0))); foreach (KeyValuePair <string, UniLexWrap> kp in uniLex) { Pullenti.Morph.MorphLang lang = new Pullenti.Morph.MorphLang(); kp.Value.WordForms = this.ProcessOneWord(kp.Key, ref lang); if (kp.Value.WordForms != null) { foreach (Pullenti.Morph.MorphWordForm wf in kp.Value.WordForms) { lang |= wf.Language; } } kp.Value.Lang = lang; if (lang.IsRu) { totRusWords++; } if (lang.IsUa) { totUkrWords++; } if (lang.IsBy) { totByWords++; } if (lang.IsKz) { totKzWords++; } if (lang.IsCyrillic) { cou0++; } if (cou0 >= 100) { break; } } if (totRusWords > ((totByWords / 2)) && totRusWords > ((totUkrWords / 2))) { defLang = Pullenti.Morph.MorphLang.RU; } else if (totUkrWords > ((totRusWords / 2)) && totUkrWords > ((totByWords / 2))) { defLang = Pullenti.Morph.MorphLang.UA; } else if (totByWords > ((totRusWords / 2)) && totByWords > ((totUkrWords / 2))) { defLang = Pullenti.Morph.MorphLang.BY; } } else if (defLang.IsUndefined) { defLang = Pullenti.Morph.MorphLang.RU; } } int cou = 0; totRusWords = (totByWords = (totUkrWords = (totKzWords = 0))); foreach (KeyValuePair <string, UniLexWrap> kp in uniLex) { Pullenti.Morph.MorphLang lang = defLang; if (lang.IsUndefined) { if (totRusWords > totByWords && totRusWords > totUkrWords && totRusWords > totKzWords) { lang = Pullenti.Morph.MorphLang.RU; } else if (totUkrWords > totRusWords && totUkrWords > totByWords && totUkrWords > totKzWords) { lang = Pullenti.Morph.MorphLang.UA; } else if (totByWords > totRusWords && totByWords > totUkrWords && totByWords > totKzWords) { lang = Pullenti.Morph.MorphLang.BY; } else if (totKzWords > totRusWords && totKzWords > totUkrWords && totKzWords > totByWords) { lang = Pullenti.Morph.MorphLang.KZ; } } kp.Value.WordForms = this.ProcessOneWord(kp.Key, ref lang); kp.Value.Lang = lang; if (((lang & Pullenti.Morph.MorphLang.RU)) != Pullenti.Morph.MorphLang.Unknown) { totRusWords++; } if (((lang & Pullenti.Morph.MorphLang.UA)) != Pullenti.Morph.MorphLang.Unknown) { totUkrWords++; } if (((lang & Pullenti.Morph.MorphLang.BY)) != Pullenti.Morph.MorphLang.Unknown) { totByWords++; } if (((lang & Pullenti.Morph.MorphLang.KZ)) != Pullenti.Morph.MorphLang.Unknown) { totKzWords++; } if (progress != null) { this.OnProgress(cou, uniLex.Count, progress); } cou++; } List <Pullenti.Morph.MorphWordForm> emptyList = null; foreach (Pullenti.Morph.MorphToken r in res) { UniLexWrap uni = r.Tag as UniLexWrap; r.Tag = null; if (uni == null || uni.WordForms == null || uni.WordForms.Count == 0) { if (emptyList == null) { emptyList = new List <Pullenti.Morph.MorphWordForm>(); } r.WordForms = emptyList; if (uni != null) { r.Language = uni.Lang; } } else { r.WordForms = uni.WordForms; } } if (!goodText) { for (i = 0; i < (res.Count - 2); i++) { UnicodeInfo ui0 = twrch[res[i].BeginChar]; UnicodeInfo ui1 = twrch[res[i + 1].BeginChar]; UnicodeInfo ui2 = twrch[res[i + 2].BeginChar]; if (ui1.IsQuot) { int p = res[i + 1].BeginChar; if ((p >= 2 && "БбТт".IndexOf(text[p - 1]) >= 0 && ((p + 3) < text.Length)) && "ЕеЯяЁё".IndexOf(text[p + 1]) >= 0) { string wstr = Pullenti.Morph.LanguageHelper.TransliteralCorrection(Pullenti.Morph.LanguageHelper.CorrectWord(string.Format("{0}Ъ{1}", res[i].GetSourceText(text), res[i + 2].GetSourceText(text))), null, false); List <Pullenti.Morph.MorphWordForm> li = this.ProcessOneWord0(wstr); if (li != null && li.Count > 0 && li[0].IsInDictionary) { res[i].EndChar = res[i + 2].EndChar; res[i].Term = wstr; res[i].WordForms = li; res.RemoveRange(i + 1, 2); } } else if ((ui1.IsApos && p > 0 && char.IsLetter(text[p - 1])) && ((p + 1) < text.Length) && char.IsLetter(text[p + 1])) { if (defLang == Pullenti.Morph.MorphLang.UA || ((res[i].Language & Pullenti.Morph.MorphLang.UA)) != Pullenti.Morph.MorphLang.Unknown || ((res[i + 2].Language & Pullenti.Morph.MorphLang.UA)) != Pullenti.Morph.MorphLang.Unknown) { string wstr = Pullenti.Morph.LanguageHelper.TransliteralCorrection(Pullenti.Morph.LanguageHelper.CorrectWord(string.Format("{0}{1}", res[i].GetSourceText(text), res[i + 2].GetSourceText(text))), null, false); List <Pullenti.Morph.MorphWordForm> li = this.ProcessOneWord0(wstr); bool okk = true; if (okk) { res[i].EndChar = res[i + 2].EndChar; res[i].Term = wstr; if (li == null) { li = new List <Pullenti.Morph.MorphWordForm>(); } if (li != null && li.Count > 0) { res[i].Language = li[0].Language; } res[i].WordForms = li; res.RemoveRange(i + 1, 2); } } } } else if (((ui1.UniChar == '3' || ui1.UniChar == '4')) && res[i + 1].Length == 1) { string src = (ui1.UniChar == '3' ? "З" : "Ч"); int i0 = i + 1; if ((res[i].EndChar + 1) == res[i + 1].BeginChar && ui0.IsCyrillic) { i0--; src = res[i0].GetSourceText(text) + src; } int i1 = i + 1; if ((res[i + 1].EndChar + 1) == res[i + 2].BeginChar && ui2.IsCyrillic) { i1++; src += res[i1].GetSourceText(text); } if (src.Length > 2) { string wstr = Pullenti.Morph.LanguageHelper.TransliteralCorrection(Pullenti.Morph.LanguageHelper.CorrectWord(src), null, false); List <Pullenti.Morph.MorphWordForm> li = this.ProcessOneWord0(wstr); if (li != null && li.Count > 0 && li[0].IsInDictionary) { res[i0].EndChar = res[i1].EndChar; res[i0].Term = wstr; res[i0].WordForms = li; res.RemoveRange(i0 + 1, i1 - i0); } } } else if ((ui1.IsHiphen && ui0.IsLetter && ui2.IsLetter) && res[i].EndChar > res[i].BeginChar && res[i + 2].EndChar > res[i + 2].BeginChar) { bool newline = false; int sps = 0; for (j = res[i + 1].EndChar + 1; j < res[i + 2].BeginChar; j++) { if (text[j] == '\r' || text[j] == '\n') { newline = true; sps++; } else if (!char.IsWhiteSpace(text[j])) { break; } else { sps++; } } string fullWord = Pullenti.Morph.LanguageHelper.CorrectWord(res[i].GetSourceText(text) + res[i + 2].GetSourceText(text)); if (!newline) { if (uniLex.ContainsKey(fullWord) || fullWord == "ИЗЗА") { newline = true; } else if (text[res[i + 1].BeginChar] == ((char)0x00AD)) { newline = true; } else if (Pullenti.Morph.LanguageHelper.EndsWithEx(res[i].GetSourceText(text), "О", "о", null, null) && res[i + 2].WordForms.Count > 0 && res[i + 2].WordForms[0].IsInDictionary) { if (text[res[i + 1].BeginChar] == '¬') { List <Pullenti.Morph.MorphWordForm> li = this.ProcessOneWord0(fullWord); if (li != null && li.Count > 0 && li[0].IsInDictionary) { newline = true; } } } else if ((res[i].EndChar + 2) == res[i + 2].BeginChar) { if (!char.IsUpper(text[res[i + 2].BeginChar]) && (sps < 2) && fullWord.Length > 4) { newline = true; if ((i + 3) < res.Count) { UnicodeInfo ui3 = twrch[res[i + 3].BeginChar]; if (ui3.IsHiphen) { newline = false; } } } } else if (((res[i].EndChar + 1) == res[i + 1].BeginChar && sps > 0 && (sps < 3)) && fullWord.Length > 4) { newline = true; } } if (newline) { List <Pullenti.Morph.MorphWordForm> li = this.ProcessOneWord0(fullWord); if (li != null && li.Count > 0 && ((li[0].IsInDictionary || uniLex.ContainsKey(fullWord)))) { res[i].EndChar = res[i + 2].EndChar; res[i].Term = fullWord; res[i].WordForms = li; res.RemoveRange(i + 1, 2); } } else { } } else if ((ui1.IsLetter && ui0.IsLetter && res[i].Length > 2) && res[i + 1].Length > 1) { if (ui0.IsUpper != ui1.IsUpper) { continue; } if (!ui0.IsCyrillic || !ui1.IsCyrillic) { continue; } bool newline = false; for (j = res[i].EndChar + 1; j < res[i + 1].BeginChar; j++) { if (twrch[j].Code == 0xD || twrch[j].Code == 0xA) { newline = true; break; } } if (!newline) { continue; } string fullWord = Pullenti.Morph.LanguageHelper.CorrectWord(res[i].GetSourceText(text) + res[i + 1].GetSourceText(text)); if (!uniLex.ContainsKey(fullWord)) { continue; } List <Pullenti.Morph.MorphWordForm> li = this.ProcessOneWord0(fullWord); if (li != null && li.Count > 0 && li[0].IsInDictionary) { res[i].EndChar = res[i + 1].EndChar; res[i].Term = fullWord; res[i].WordForms = li; res.RemoveAt(i + 1); } } } } for (i = 0; i < res.Count; i++) { Pullenti.Morph.MorphToken mt = res[i]; mt.CharInfo = new Pullenti.Morph.CharsInfo(); UnicodeInfo ui0 = twrch[mt.BeginChar]; UnicodeInfo ui00 = UnicodeInfo.AllChars[(int)(mt.Term[0])]; for (j = mt.BeginChar + 1; j <= mt.EndChar; j++) { if (ui0.IsLetter) { break; } ui0 = twrch[j]; } if (ui0.IsLetter) { mt.CharInfo.IsLetter = true; if (ui00.IsLatin) { mt.CharInfo.IsLatinLetter = true; } else if (ui00.IsCyrillic) { mt.CharInfo.IsCyrillicLetter = true; } if (mt.Language == Pullenti.Morph.MorphLang.Unknown) { if (Pullenti.Morph.LanguageHelper.IsCyrillic(mt.Term)) { mt.Language = (defLang.IsUndefined ? Pullenti.Morph.MorphLang.RU : defLang); } } if (goodText) { continue; } bool allUp = true; bool allLo = true; for (j = mt.BeginChar; j <= mt.EndChar; j++) { if (twrch[j].IsUpper || twrch[j].IsDigit) { allLo = false; } else { allUp = false; } } if (allUp) { mt.CharInfo.IsAllUpper = true; } else if (allLo) { mt.CharInfo.IsAllLower = true; } else if (((ui0.IsUpper || twrch[mt.BeginChar].IsDigit)) && mt.EndChar > mt.BeginChar) { allLo = true; for (j = mt.BeginChar + 1; j <= mt.EndChar; j++) { if (twrch[j].IsUpper || twrch[j].IsDigit) { allLo = false; break; } } if (allLo) { mt.CharInfo.IsCapitalUpper = true; } else if (twrch[mt.EndChar].IsLower && (mt.EndChar - mt.BeginChar) > 1) { allUp = true; for (j = mt.BeginChar; j < mt.EndChar; j++) { if (twrch[j].IsLower) { allUp = false; break; } } if (allUp) { mt.CharInfo.IsLastLower = true; } } } } if (mt.CharInfo.IsLastLower && mt.Length > 2 && mt.CharInfo.IsCyrillicLetter) { string pref = text.Substring(mt.BeginChar, mt.EndChar - mt.BeginChar); bool ok = false; foreach (Pullenti.Morph.MorphWordForm wf in mt.WordForms) { if (wf.NormalCase == pref || wf.NormalFull == pref) { ok = true; break; } } if (!ok) { Pullenti.Morph.MorphWordForm wf0 = new Pullenti.Morph.MorphWordForm() { NormalCase = pref, Class = Pullenti.Morph.MorphClass.Noun, UndefCoef = 1 }; mt.WordForms = new List <Pullenti.Morph.MorphWordForm>(mt.WordForms); mt.WordForms.Insert(0, wf0); } } } if (goodText || onlyTokenizing) { return(res); } for (i = 0; i < res.Count; i++) { if (res[i].Length == 1 && res[i].CharInfo.IsLatinLetter) { char ch = res[i].Term[0]; if (ch == 'C' || ch == 'A' || ch == 'P') { } else { continue; } bool isRus = false; for (int ii = i - 1; ii >= 0; ii--) { if ((res[ii].EndChar + 1) != res[ii + 1].BeginChar) { break; } else if (res[ii].CharInfo.IsLetter) { isRus = res[ii].CharInfo.IsCyrillicLetter; break; } } if (!isRus) { for (int ii = i + 1; ii < res.Count; ii++) { if ((res[ii - 1].EndChar + 1) != res[ii].BeginChar) { break; } else if (res[ii].CharInfo.IsLetter) { isRus = res[ii].CharInfo.IsCyrillicLetter; break; } } } if (isRus) { res[i].Term = Pullenti.Morph.LanguageHelper.TransliteralCorrection(res[i].Term, null, true); res[i].CharInfo.IsCyrillicLetter = true; res[i].CharInfo.IsLatinLetter = true; } } } foreach (Pullenti.Morph.MorphToken r in res) { if (r.CharInfo.IsAllUpper || r.CharInfo.IsCapitalUpper) { if (r.Language.IsCyrillic) { bool ok = false; foreach (Pullenti.Morph.MorphWordForm wf in r.WordForms) { if (wf.Class.IsProperSurname) { ok = true; break; } } if (!ok) { r.WordForms = new List <Pullenti.Morph.MorphWordForm>(r.WordForms); m_EngineRu.ProcessSurnameVariants(r.Term, r.WordForms); } } } } foreach (Pullenti.Morph.MorphToken r in res) { foreach (Pullenti.Morph.MorphWordForm mv in r.WordForms) { if (mv.NormalCase == null) { mv.NormalCase = r.Term; } } } for (i = 0; i < (res.Count - 2); i++) { if (res[i].CharInfo.IsLatinLetter && res[i].CharInfo.IsAllUpper && res[i].Length == 1) { if (twrch[res[i + 1].BeginChar].IsQuot && res[i + 2].CharInfo.IsLatinLetter && res[i + 2].Length > 2) { if ((res[i].EndChar + 1) == res[i + 1].BeginChar && (res[i + 1].EndChar + 1) == res[i + 2].BeginChar) { string wstr = string.Format("{0}{1}", res[i].Term, res[i + 2].Term); List <Pullenti.Morph.MorphWordForm> li = this.ProcessOneWord0(wstr); if (li != null) { res[i].WordForms = li; } res[i].EndChar = res[i + 2].EndChar; res[i].Term = wstr; if (res[i + 2].CharInfo.IsAllLower) { res[i].CharInfo.IsAllUpper = false; res[i].CharInfo.IsCapitalUpper = true; } else if (!res[i + 2].CharInfo.IsAllUpper) { res[i].CharInfo.IsAllUpper = false; } res.RemoveRange(i + 1, 2); } } } } for (i = 0; i < (res.Count - 1); i++) { if (!res[i].CharInfo.IsLetter && !res[i + 1].CharInfo.IsLetter && (res[i].EndChar + 1) == res[i + 1].BeginChar) { if (twrch[res[i].BeginChar].IsHiphen && twrch[res[i + 1].BeginChar].IsHiphen) { if (i == 0 || !twrch[res[i - 1].BeginChar].IsHiphen) { } else { continue; } if ((i + 2) == res.Count || !twrch[res[i + 2].BeginChar].IsHiphen) { } else { continue; } res[i].EndChar = res[i + 1].EndChar; res.RemoveAt(i + 1); } } } return(res); }
public static NounPhraseItem TryParse(Pullenti.Ner.Token t, List <NounPhraseItem> items, Pullenti.Ner.Core.NounPhraseParseAttr attrs) { if (t == null) { return(null); } Pullenti.Ner.Token t0 = t; bool _canBeSurname = false; bool _isDoubtAdj = false; Pullenti.Ner.ReferentToken rt = t as Pullenti.Ner.ReferentToken; if (rt != null && rt.BeginToken == rt.EndToken && (rt.BeginToken is Pullenti.Ner.TextToken)) { NounPhraseItem res = TryParse(rt.BeginToken, items, attrs); if (res != null) { res.BeginToken = (res.EndToken = t); res.CanBeNoun = true; return(res); } } if (rt != null) { NounPhraseItem res = new NounPhraseItem(t, t); foreach (Pullenti.Morph.MorphBaseInfo m in t.Morph.Items) { NounPhraseItemTextVar v = new NounPhraseItemTextVar(m, null); v.NormalValue = t.GetReferent().ToString(); res.NounMorph.Add(v); } res.CanBeNoun = true; return(res); } if (t is Pullenti.Ner.NumberToken) { } bool hasLegalVerb = false; if (t is Pullenti.Ner.TextToken) { if (!t.Chars.IsLetter) { return(null); } string str = (t as Pullenti.Ner.TextToken).Term; if (str[str.Length - 1] == 'А' || str[str.Length - 1] == 'О') { foreach (Pullenti.Morph.MorphBaseInfo wf in t.Morph.Items) { if ((wf is Pullenti.Morph.MorphWordForm) && (wf as Pullenti.Morph.MorphWordForm).IsInDictionary) { if (wf.Class.IsVerb) { Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary(); if (!mc.IsNoun && ((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.IgnoreParticiples)) == Pullenti.Ner.Core.NounPhraseParseAttr.No) { if (!Pullenti.Morph.LanguageHelper.EndsWithEx(str, "ОГО", "ЕГО", null, null)) { return(null); } } hasLegalVerb = true; } if (wf.Class.IsAdverb) { if (t.Next == null || !t.Next.IsHiphen) { if ((str == "ВСЕГО" || str == "ДОМА" || str == "НЕСКОЛЬКО") || str == "МНОГО" || str == "ПОРЯДКА") { } else { return(null); } } } if (wf.Class.IsAdjective) { if (wf.ContainsAttr("к.ф.", null)) { if (t.GetMorphClassInDictionary() == Pullenti.Morph.MorphClass.Adjective) { } else { _isDoubtAdj = true; } } } } } } Pullenti.Morph.MorphClass mc0 = t.Morph.Class; if (mc0.IsProperSurname && !t.Chars.IsAllLower) { foreach (Pullenti.Morph.MorphBaseInfo wf in t.Morph.Items) { if (wf.Class.IsProperSurname && wf.Number != Pullenti.Morph.MorphNumber.Plural) { Pullenti.Morph.MorphWordForm wff = wf as Pullenti.Morph.MorphWordForm; if (wff == null) { continue; } string s = ((wff.NormalFull ?? wff.NormalCase)) ?? ""; if (Pullenti.Morph.LanguageHelper.EndsWithEx(s, "ИН", "ЕН", "ЫН", null)) { if (!wff.IsInDictionary) { _canBeSurname = true; } else { return(null); } } if (wff.IsInDictionary && Pullenti.Morph.LanguageHelper.EndsWith(s, "ОВ")) { _canBeSurname = true; } } } } if (mc0.IsProperName && !t.Chars.IsAllLower) { foreach (Pullenti.Morph.MorphBaseInfo wff in t.Morph.Items) { Pullenti.Morph.MorphWordForm wf = wff as Pullenti.Morph.MorphWordForm; if (wf == null) { continue; } if (wf.NormalCase == "ГОР") { continue; } if (wf.Class.IsProperName && wf.IsInDictionary) { if (wf.NormalCase == null || !wf.NormalCase.StartsWith("ЛЮБ")) { if (mc0.IsAdjective && t.Morph.ContainsAttr("неизм.", null)) { } else if (((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.ReferentCanBeNoun)) == Pullenti.Ner.Core.NounPhraseParseAttr.ReferentCanBeNoun) { } else { if (items == null || (items.Count < 1)) { return(null); } if (!items[0].IsStdAdjective) { return(null); } } } } } } if (mc0.IsAdjective && t.Morph.ItemsCount == 1) { if (t.Morph[0].ContainsAttr("в.ср.ст.", null)) { return(null); } } Pullenti.Morph.MorphClass mc1 = t.GetMorphClassInDictionary(); if (mc1 == Pullenti.Morph.MorphClass.Verb && t.Morph.Case.IsUndefined) { return(null); } if ((((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.IgnoreParticiples)) == Pullenti.Ner.Core.NounPhraseParseAttr.IgnoreParticiples && t.Morph.Class.IsVerb && !t.Morph.Class.IsNoun) && !t.Morph.Class.IsProper) { foreach (Pullenti.Morph.MorphBaseInfo wf in t.Morph.Items) { if (wf.Class.IsVerb) { if (wf.ContainsAttr("дейст.з.", null)) { if (Pullenti.Morph.LanguageHelper.EndsWith((t as Pullenti.Ner.TextToken).Term, "СЯ")) { } else { return(null); } } } } } } Pullenti.Ner.Token t1 = null; for (int k = 0; k < 2; k++) { t = t1 ?? t0; if (k == 0) { if (((t0 is Pullenti.Ner.TextToken) && t0.Next != null && t0.Next.IsHiphen) && t0.Next.Next != null) { if (!t0.IsWhitespaceAfter && !t0.Morph.Class.IsPronoun && !(t0.Next.Next is Pullenti.Ner.NumberToken)) { if (!t0.Next.IsWhitespaceAfter) { t = t0.Next.Next; } else if (t0.Next.Next.Chars.IsAllLower && Pullenti.Morph.LanguageHelper.EndsWith((t0 as Pullenti.Ner.TextToken).Term, "О")) { t = t0.Next.Next; } } } } NounPhraseItem it = new NounPhraseItem(t0, t) { CanBeSurname = _canBeSurname }; if (t0 == t && (t0 is Pullenti.Ner.ReferentToken)) { it.CanBeNoun = true; it.Morph = new Pullenti.Ner.MorphCollection(t0.Morph); } bool canBePrepos = false; foreach (Pullenti.Morph.MorphBaseInfo v in t.Morph.Items) { Pullenti.Morph.MorphWordForm wf = v as Pullenti.Morph.MorphWordForm; if (v.Class.IsVerb && !v.Case.IsUndefined) { it.CanBeAdj = true; it.AdjMorph.Add(new NounPhraseItemTextVar(v, t)); continue; } if (v.Class.IsPreposition) { canBePrepos = true; } if (v.Class.IsAdjective || ((v.Class.IsPronoun && !v.Class.IsPersonalPronoun && !v.ContainsAttr("неизм.", null))) || ((v.Class.IsNoun && (t is Pullenti.Ner.NumberToken)))) { if (TryAccordVariant(items, (items == null ? 0 : items.Count), v, false)) { bool isDoub = false; if (v.ContainsAttr("к.ф.", null)) { continue; } if (v.ContainsAttr("собир.", null) && !(t is Pullenti.Ner.NumberToken)) { if (wf != null && wf.IsInDictionary) { return(null); } continue; } if (v.ContainsAttr("сравн.", null)) { continue; } bool ok = true; if (t is Pullenti.Ner.TextToken) { string s = (t as Pullenti.Ner.TextToken).Term; if (s == "ПРАВО" || s == "ПРАВА") { ok = false; } else if (Pullenti.Morph.LanguageHelper.EndsWith(s, "ОВ") && t.GetMorphClassInDictionary().IsNoun) { ok = false; } } else if (t is Pullenti.Ner.NumberToken) { if (v.Class.IsNoun && t.Morph.Class.IsAdjective) { ok = false; } else if (t.Morph.Class.IsNoun && ((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.ParseNumericAsAdjective)) == Pullenti.Ner.Core.NounPhraseParseAttr.No) { ok = false; } } if (ok) { it.AdjMorph.Add(new NounPhraseItemTextVar(v, t)); it.CanBeAdj = true; if (_isDoubtAdj && t0 == t) { it.IsDoubtAdjective = true; } if (hasLegalVerb && wf != null && wf.IsInDictionary) { it.CanBeNoun = true; } if (wf != null && wf.Class.IsPronoun) { it.CanBeNoun = true; it.NounMorph.Add(new NounPhraseItemTextVar(v, t)); } } } } bool canBeNoun = false; if (t is Pullenti.Ner.NumberToken) { } else if (v.Class.IsNoun || ((wf != null && wf.NormalCase == "САМ"))) { canBeNoun = true; } else if (v.Class.IsPersonalPronoun) { if (items == null || items.Count == 0) { canBeNoun = true; } else { foreach (NounPhraseItem it1 in items) { if (it1.IsVerb) { if (items.Count == 1 && !v.Case.IsNominative) { canBeNoun = true; } else { return(null); } } } if (items.Count == 1) { if (items[0].CanBeAdjForPersonalPronoun) { canBeNoun = true; } } } } else if ((v.Class.IsPronoun && ((items == null || items.Count == 0 || ((items.Count == 1 && items[0].CanBeAdjForPersonalPronoun)))) && wf != null) && (((((wf.NormalCase == "ТОТ" || wf.NormalFull == "ТО" || wf.NormalCase == "ТО") || wf.NormalCase == "ЭТО" || wf.NormalCase == "ВСЕ") || wf.NormalCase == "ЧТО" || wf.NormalCase == "КТО") || wf.NormalFull == "КОТОРЫЙ" || wf.NormalCase == "КОТОРЫЙ"))) { if (wf.NormalCase == "ВСЕ") { if (t.Next != null && t.Next.IsValue("РАВНО", null)) { return(null); } } canBeNoun = true; } else if (wf != null && ((wf.NormalFull ?? wf.NormalCase)) == "КОТОРЫЙ" && ((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.ParsePronouns)) == Pullenti.Ner.Core.NounPhraseParseAttr.No) { return(null); } else if (v.Class.IsProper && (t is Pullenti.Ner.TextToken)) { if (t.LengthChar > 4 || v.Class.IsProperName) { canBeNoun = true; } } if (canBeNoun) { bool added = false; if (items != null && items.Count > 1 && ((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.MultiNouns)) != Pullenti.Ner.Core.NounPhraseParseAttr.No) { bool ok1 = true; for (int ii = 1; ii < items.Count; ii++) { if (!items[ii].ConjBefore) { ok1 = false; break; } } if (ok1) { if (TryAccordVariant(items, (items == null ? 0 : items.Count), v, true)) { it.NounMorph.Add(new NounPhraseItemTextVar(v, t)); it.CanBeNoun = true; it.MultiNouns = true; added = true; } } } if (!added) { if (TryAccordVariant(items, (items == null ? 0 : items.Count), v, false)) { it.NounMorph.Add(new NounPhraseItemTextVar(v, t)); it.CanBeNoun = true; if (v.Class.IsPersonalPronoun && t.Morph.ContainsAttr("неизм.", null) && !it.CanBeAdj) { NounPhraseItemTextVar itt = new NounPhraseItemTextVar(v, t); itt.Case = Pullenti.Morph.MorphCase.AllCases; itt.Number = Pullenti.Morph.MorphNumber.Undefined; if (itt.NormalValue == null) { } it.AdjMorph.Add(itt); it.CanBeAdj = true; } } else if ((items.Count > 0 && items[0].AdjMorph.Count > 0 && items[0].AdjMorph[0].Number == Pullenti.Morph.MorphNumber.Plural) && !((items[0].AdjMorph[0].Case & v.Case)).IsUndefined && !items[0].AdjMorph[0].Class.IsVerb) { if (t.Next != null && t.Next.IsCommaAnd && (t.Next.Next is Pullenti.Ner.TextToken)) { Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next.Next, attrs, 0, null); if (npt2 != null && npt2.Preposition == null && !((npt2.Morph.Case & v.Case & items[0].AdjMorph[0].Case)).IsUndefined) { it.NounMorph.Add(new NounPhraseItemTextVar(v, t)); it.CanBeNoun = true; } } } } } } if (t0 != t) { foreach (NounPhraseItemTextVar v in it.AdjMorph) { v.CorrectPrefix(t0 as Pullenti.Ner.TextToken, false); } foreach (NounPhraseItemTextVar v in it.NounMorph) { v.CorrectPrefix(t0 as Pullenti.Ner.TextToken, true); } } if (k == 1 && it.CanBeNoun && !it.CanBeAdj) { if (t1 != null) { it.EndToken = t1; } else { it.EndToken = t0.Next.Next; } foreach (NounPhraseItemTextVar v in it.NounMorph) { if (v.NormalValue != null && (v.NormalValue.IndexOf('-') < 0)) { v.NormalValue = string.Format("{0}-{1}", v.NormalValue, it.EndToken.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false)); } } } if (it.CanBeAdj) { if (m_StdAdjectives.TryParse(it.BeginToken, Pullenti.Ner.Core.TerminParseAttr.No) != null) { it.IsStdAdjective = true; } } if (canBePrepos && it.CanBeNoun) { if (items != null && items.Count > 0) { Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition | Pullenti.Ner.Core.NounPhraseParseAttr.ParsePronouns | Pullenti.Ner.Core.NounPhraseParseAttr.ParseVerbs, 0, null); if (npt1 != null && npt1.EndChar > t.EndChar) { return(null); } } else { Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePronouns | Pullenti.Ner.Core.NounPhraseParseAttr.ParseVerbs, 0, null); if (npt1 != null) { Pullenti.Morph.MorphCase mc = Pullenti.Morph.LanguageHelper.GetCaseAfterPreposition((t as Pullenti.Ner.TextToken).Lemma); if (!((mc & npt1.Morph.Case)).IsUndefined) { return(null); } } } } if (it.CanBeNoun || it.CanBeAdj || k == 1) { if (it.BeginToken.Morph.Class.IsPronoun) { Pullenti.Ner.Token tt2 = it.EndToken.Next; if ((tt2 != null && tt2.IsHiphen && !tt2.IsWhitespaceAfter) && !tt2.IsWhitespaceBefore) { tt2 = tt2.Next; } if (tt2 is Pullenti.Ner.TextToken) { string ss = (tt2 as Pullenti.Ner.TextToken).Term; if ((ss == "ЖЕ" || ss == "БЫ" || ss == "ЛИ") || ss == "Ж") { it.EndToken = tt2; } else if (ss == "НИБУДЬ" || ss == "ЛИБО" || (((ss == "ТО" && tt2.Previous.IsHiphen)) && it.CanBeAdj)) { it.EndToken = tt2; foreach (NounPhraseItemTextVar m in it.AdjMorph) { m.NormalValue = string.Format("{0}-{1}", m.NormalValue, ss); if (m.SingleNumberValue != null) { m.SingleNumberValue = string.Format("{0}-{1}", m.SingleNumberValue, ss); } } } } } return(it); } if (t0 == t) { if (t0.IsValue("БИЗНЕС", null) && t0.Next != null && t0.Next.Chars == t0.Chars) { t1 = t0.Next; continue; } return(it); } } return(null); }
public override string GetNormalCaseText(Pullenti.Morph.MorphClass mc = null, Pullenti.Morph.MorphNumber num = Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender gender = Pullenti.Morph.MorphGender.Undefined, bool keepChars = false) { bool empty = true; if (mc != null && mc.IsPreposition) { return(Pullenti.Morph.LanguageHelper.NormalizePreposition(Term)); } foreach (Pullenti.Morph.MorphBaseInfo it in Morph.Items) { if (mc != null && !mc.IsUndefined) { Pullenti.Morph.MorphClass cc = it.Class & mc; if (cc.IsUndefined) { continue; } if (cc.IsMisc && !cc.IsProper && mc != it.Class) { continue; } } Pullenti.Morph.MorphWordForm wf = it as Pullenti.Morph.MorphWordForm; bool normalFull = false; if (gender != Pullenti.Morph.MorphGender.Undefined) { if (((it.Gender & gender)) == Pullenti.Morph.MorphGender.Undefined) { if ((gender == Pullenti.Morph.MorphGender.Masculine && ((it.Gender != Pullenti.Morph.MorphGender.Undefined || it.Number == Pullenti.Morph.MorphNumber.Plural)) && wf != null) && wf.NormalFull != null) { normalFull = true; } else if (gender == Pullenti.Morph.MorphGender.Masculine && it.Class.IsPersonalPronoun) { } else { continue; } } } if (!it.Case.IsUndefined) { empty = false; } if (wf != null) { string res; if (num == Pullenti.Morph.MorphNumber.Singular && it.Number == Pullenti.Morph.MorphNumber.Plural && wf.NormalFull != null) { int le = wf.NormalCase.Length; if ((le == (wf.NormalFull.Length + 2) && le > 4 && wf.NormalCase[le - 2] == 'С') && wf.NormalCase[le - 1] == 'Я') { res = wf.NormalCase; } else { res = (normalFull ? wf.NormalFull : wf.NormalFull); } } else { res = (normalFull ? wf.NormalFull : (wf.NormalCase ?? Term)); } if (num == Pullenti.Morph.MorphNumber.Singular && mc != null && mc == Pullenti.Morph.MorphClass.Noun) { if (res == "ДЕТИ") { res = "РЕБЕНОК"; } } if (keepChars) { if (Chars.IsAllLower) { res = res.ToLower(); } else if (Chars.IsCapitalUpper) { res = Pullenti.Ner.Core.MiscHelper.ConvertFirstCharUpperAndOtherLower(res); } } return(res); } } if (!empty) { return(null); } string te = null; if (num == Pullenti.Morph.MorphNumber.Singular && mc != null) { Pullenti.Morph.MorphBaseInfo bi = new Pullenti.Morph.MorphBaseInfo() { Class = new Pullenti.Morph.MorphClass() { Value = mc.Value }, Gender = gender, Number = Pullenti.Morph.MorphNumber.Singular, Language = Morph.Language }; string vars = Pullenti.Morph.MorphologyService.GetWordform(Term, bi); if (vars != null) { te = vars; } } if (te == null) { te = Term; } if (keepChars) { if (Chars.IsAllLower) { return(te.ToLower()); } else if (Chars.IsCapitalUpper) { return(Pullenti.Ner.Core.MiscHelper.ConvertFirstCharUpperAndOtherLower(te)); } } return(te); }
public TextToken(Pullenti.Morph.MorphToken source, Pullenti.Ner.Core.AnalysisKit kit, int bchar = -1, int echar = -1) : base(kit, (bchar >= 0 ? bchar : (source == null ? 0 : source.BeginChar)), (echar >= 0 ? echar : (source == null ? 0 : source.EndChar))) { if (source == null) { return; } Chars = source.CharInfo; Term = source.Term; Lemma = source.GetLemma() ?? Term; MaxLengthOfMorphVars = (short)Term.Length; Morph = new MorphCollection(); if (source.WordForms != null) { foreach (Pullenti.Morph.MorphWordForm wf in source.WordForms) { Morph.AddItem(wf); if (wf.NormalCase != null && (MaxLengthOfMorphVars < wf.NormalCase.Length)) { MaxLengthOfMorphVars = (short)wf.NormalCase.Length; } if (wf.NormalFull != null && (MaxLengthOfMorphVars < wf.NormalFull.Length)) { MaxLengthOfMorphVars = (short)wf.NormalFull.Length; } } } for (int i = 0; i < Term.Length; i++) { char ch = Term[i]; int j; for (j = 0; j < Morph.ItemsCount; j++) { Pullenti.Morph.MorphWordForm wf = Morph[j] as Pullenti.Morph.MorphWordForm; if (wf.NormalCase != null) { if (i >= wf.NormalCase.Length) { break; } if (wf.NormalCase[i] != ch) { break; } } if (wf.NormalFull != null) { if (i >= wf.NormalFull.Length) { break; } if (wf.NormalFull[i] != ch) { break; } } } if (j < Morph.ItemsCount) { break; } InvariantPrefixLengthOfMorphVars = (short)((i + 1)); } if (Morph.Language.IsUndefined && !source.Language.IsUndefined) { Morph.Language = source.Language; } }
public static string GetNameEx(Pullenti.Ner.Token begin, Pullenti.Ner.Token end, Pullenti.Morph.MorphClass cla, Pullenti.Morph.MorphCase mc, Pullenti.Morph.MorphGender gender = Pullenti.Morph.MorphGender.Undefined, bool ignoreBracketsAndHiphens = false, bool ignoreGeoReferent = false) { if (end == null || begin == null) { return(null); } if (begin.EndChar > end.BeginChar && begin != end) { return(null); } StringBuilder res = new StringBuilder(); string prefix = null; for (Pullenti.Ner.Token t = begin; t != null && t.EndChar <= end.EndChar; t = t.Next) { if (res.Length > 1000) { break; } if (t.IsTableControlChar) { continue; } if (ignoreBracketsAndHiphens) { if (BracketHelper.IsBracket(t, false)) { if (t == end) { break; } if (t.IsCharOf("(<[")) { BracketSequenceToken br = BracketHelper.TryParse(t, BracketParseAttr.No, 100); if (br != null && br.EndChar <= end.EndChar) { string tmp = GetNameEx(br.BeginToken.Next, br.EndToken.Previous, Pullenti.Morph.MorphClass.Undefined, Pullenti.Morph.MorphCase.Undefined, Pullenti.Morph.MorphGender.Undefined, ignoreBracketsAndHiphens, false); if (tmp != null) { if ((br.EndChar == end.EndChar && br.BeginToken.Next == br.EndToken.Previous && !br.BeginToken.Next.Chars.IsLetter) && !(br.BeginToken.Next is Pullenti.Ner.ReferentToken)) { } else { res.AppendFormat(" {0}{1}{2}", t.GetSourceText(), tmp, br.EndToken.GetSourceText()); } } t = br.EndToken; } } continue; } if (t.IsHiphen) { if (t == end) { break; } else if (t.IsWhitespaceBefore || t.IsWhitespaceAfter) { continue; } } } Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; if (tt != null) { if (!ignoreBracketsAndHiphens) { if ((tt.Next != null && tt.Next.IsHiphen && (tt.Next.Next is Pullenti.Ner.TextToken)) && tt != end && tt.Next != end) { if (prefix == null) { prefix = tt.Term; } else { prefix = string.Format("{0}-{1}", prefix, tt.Term); } t = tt.Next; if (t == end) { break; } else { continue; } } } string s = null; if (cla.Value != 0 || !mc.IsUndefined || gender != Pullenti.Morph.MorphGender.Undefined) { foreach (Pullenti.Morph.MorphBaseInfo wff in tt.Morph.Items) { Pullenti.Morph.MorphWordForm wf = wff as Pullenti.Morph.MorphWordForm; if (wf == null) { continue; } if (cla.Value != 0) { if (((wf.Class.Value & cla.Value)) == 0) { continue; } } if (!mc.IsUndefined) { if (((wf.Case & mc)).IsUndefined) { continue; } } if (gender != Pullenti.Morph.MorphGender.Undefined) { if (((wf.Gender & gender)) == Pullenti.Morph.MorphGender.Undefined) { continue; } } if (s == null || wf.NormalCase == tt.Term) { s = wf.NormalCase; } } if (s == null && gender != Pullenti.Morph.MorphGender.Undefined) { foreach (Pullenti.Morph.MorphBaseInfo wff in tt.Morph.Items) { Pullenti.Morph.MorphWordForm wf = wff as Pullenti.Morph.MorphWordForm; if (wf == null) { continue; } if (cla.Value != 0) { if (((wf.Class.Value & cla.Value)) == 0) { continue; } } if (!mc.IsUndefined) { if (((wf.Case & mc)).IsUndefined) { continue; } } if (s == null || wf.NormalCase == tt.Term) { s = wf.NormalCase; } } } } if (s == null) { s = tt.Term; if (tt.Chars.IsLastLower && tt.LengthChar > 2) { s = tt.GetSourceText(); for (int i = s.Length - 1; i >= 0; i--) { if (char.IsUpper(s[i])) { s = s.Substring(0, i + 1); break; } } } } if (prefix != null) { string delim = "-"; if (ignoreBracketsAndHiphens) { delim = " "; } s = string.Format("{0}{1}{2}", prefix, delim, s); } prefix = null; if (res.Length > 0 && s.Length > 0) { if (char.IsLetterOrDigit(s[0])) { char ch0 = res[res.Length - 1]; if (ch0 == '-') { } else { res.Append(' '); } } else if (!ignoreBracketsAndHiphens && BracketHelper.CanBeStartOfSequence(tt, false, false)) { res.Append(' '); } } res.Append(s); } else if (t is Pullenti.Ner.NumberToken) { if (res.Length > 0) { if (!t.IsWhitespaceBefore && res[res.Length - 1] == '-') { } else { res.Append(' '); } } Pullenti.Ner.NumberToken nt = t as Pullenti.Ner.NumberToken; if ((t.Morph.Class.IsAdjective && nt.Typ == Pullenti.Ner.NumberSpellingType.Words && nt.BeginToken == nt.EndToken) && (nt.BeginToken is Pullenti.Ner.TextToken)) { res.Append((nt.BeginToken as Pullenti.Ner.TextToken).Term); } else { res.Append(nt.Value); } } else if (t is Pullenti.Ner.MetaToken) { if ((ignoreGeoReferent && t != begin && t.GetReferent() != null) && t.GetReferent().TypeName == "GEO") { continue; } string s = GetNameEx((t as Pullenti.Ner.MetaToken).BeginToken, (t as Pullenti.Ner.MetaToken).EndToken, cla, mc, gender, ignoreBracketsAndHiphens, ignoreGeoReferent); if (!string.IsNullOrEmpty(s)) { if (res.Length > 0) { if (!t.IsWhitespaceBefore && res[res.Length - 1] == '-') { } else { res.Append(' '); } } res.Append(s); } } if (t == end) { break; } } if (res.Length == 0) { return(null); } return(res.ToString()); }
double _calcAgent(bool noplural) { if (!string.IsNullOrEmpty(FromPrep)) { return(Coef = -1); } Pullenti.Morph.MorphWordForm vf = ToVerb.FirstVerb.VerbMorph; if (vf == null) { return(Coef = -1); } Pullenti.Morph.MorphWordForm vf2 = ToVerb.LastVerb.VerbMorph; if (vf2 == null) { return(Coef = -1); } if (vf.Misc.Mood == Pullenti.Morph.MorphMood.Imperative) { return(Coef = -1); } Pullenti.Ner.MorphCollection morph = FromMorph; if (vf2.Misc.Voice == Pullenti.Morph.MorphVoice.Passive || ToVerb.LastVerb.Morph.ContainsAttr("страд.з.", null)) { if (!morph.Case.IsUndefined) { if (morph.Case.IsInstrumental) { Coef = Pullenti.Semantic.SemanticService.Params.TransitiveCoef; if (vf2.Case.IsInstrumental) { Coef /= 2; } return(Coef); } return(Coef = -1); } return(Coef = 0); } if (vf.Misc.Attrs.Contains("инф.")) { return(Coef = -1); } if (_isRevVerb(vf2)) { Pullenti.Morph.MorphCase agCase = Pullenti.Morph.MorphCase.Undefined; List <Pullenti.Semantic.Utils.DerivateGroup> grs = Pullenti.Semantic.Utils.DerivateService.FindDerivates(vf2.NormalFull ?? vf2.NormalCase, true, null); if (grs != null) { foreach (Pullenti.Semantic.Utils.DerivateGroup gr in grs) { if (gr.CmRev.Agent != null) { agCase = gr.CmRev.Agent.Case; break; } } } if (!morph.Case.IsUndefined) { if (agCase.IsDative) { if (morph.Case.IsDative) { Coef = Pullenti.Semantic.SemanticService.Params.TransitiveCoef; if (morph.Case.IsGenitive) { Coef /= 2; } return(Coef); } return(Coef = -1); } if (agCase.IsInstrumental) { if (morph.Case.IsInstrumental) { if (morph.Case.IsNominative) { return(Coef = 0); } return(Coef = Pullenti.Semantic.SemanticService.Params.TransitiveCoef); } return(Coef = -1); } if (!morph.Case.IsNominative) { return(Coef = -1); } } else { return(Coef = 0); } } if (vf.Number == Pullenti.Morph.MorphNumber.Plural) { if (!morph.Case.IsUndefined) { if (vf.Case.IsUndefined) { if (!morph.Case.IsNominative) { return(Coef = -1); } } else if (((vf.Case & morph.Case)).IsUndefined) { return(Coef = -1); } } if (noplural) { if (FromIsPlural) { } else if (((morph.Number & Pullenti.Morph.MorphNumber.Plural)) == Pullenti.Morph.MorphNumber.Undefined) { return(Coef = -1); } else if (!_checkMorphAccord(morph, false, vf)) { return(Coef = -1); } else if (morph.Items.Count > 0 && !vf.Case.IsUndefined) { bool ok = false; foreach (Pullenti.Morph.MorphBaseInfo it in morph.Items) { if (((it.Number & Pullenti.Morph.MorphNumber.Plural)) == Pullenti.Morph.MorphNumber.Plural) { if (!it.Case.IsUndefined && ((it.Case & vf.Case)).IsUndefined) { continue; } ok = true; break; } } if (!ok) { return(Coef = -1); } } } Plural = 1; Coef = Pullenti.Semantic.SemanticService.Params.VerbPlural; if (vf2.NormalCase == "БЫТЬ") { if (morph.Case.IsUndefined && From.Source.BeginToken.BeginChar > ToVerb.EndChar) { Coef /= 2; } } } else { if (vf.Number == Pullenti.Morph.MorphNumber.Singular) { Plural = 0; if (FromIsPlural) { return(Coef = -1); } } if (!_checkMorphAccord(morph, false, vf)) { return(Coef = -1); } if (!morph.Case.IsUndefined) { if (!morph.Case.IsNominative) { if (ToVerb.FirstVerb.IsParticiple) { } else { return(Coef = -1); } } } if (vf.Misc.Person != Pullenti.Morph.MorphPerson.Undefined) { if (((vf.Misc.Person & Pullenti.Morph.MorphPerson.Third)) == Pullenti.Morph.MorphPerson.Undefined) { if (((vf.Misc.Person & Pullenti.Morph.MorphPerson.First)) == Pullenti.Morph.MorphPerson.First) { if (!morph.ContainsAttr("1 л.", null)) { return(Coef = -1); } } if (((vf.Misc.Person & Pullenti.Morph.MorphPerson.Second)) == Pullenti.Morph.MorphPerson.Second) { if (!morph.ContainsAttr("2 л.", null)) { return(Coef = -1); } } } } Coef = Pullenti.Semantic.SemanticService.Params.MorphAccord; if (morph.Case.IsUndefined) { Coef /= 4; } } return(Coef); }
public static Pullenti.Semantic.SemObject CreateNptAdj(Pullenti.Semantic.SemGraph gr, Pullenti.Ner.Core.NounPhraseToken npt, Pullenti.Ner.MetaToken a) { if (a.Morph.Class.IsPronoun) { Pullenti.Semantic.SemObject asem = new Pullenti.Semantic.SemObject(gr); gr.Objects.Add(asem); asem.Tokens.Add(a); asem.Typ = (a.BeginToken.Morph.Class.IsPersonalPronoun ? Pullenti.Semantic.SemObjectType.PersonalPronoun : Pullenti.Semantic.SemObjectType.Pronoun); foreach (Pullenti.Morph.MorphBaseInfo it in a.BeginToken.Morph.Items) { Pullenti.Morph.MorphWordForm wf = it as Pullenti.Morph.MorphWordForm; if (wf == null) { continue; } if (!npt.Morph.Case.IsUndefined) { if (((npt.Morph.Case & wf.Case)).IsUndefined) { continue; } } _setMorph(asem, wf); if (asem.Morph.NormalFull == "КАКОВ") { asem.Morph.NormalFull = "КАКОЙ"; } break; } if (asem.Morph.NormalFull == null) { asem.Morph.NormalFull = (asem.Morph.NormalCase = a.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false)); } return(asem); } if (!a.Morph.Class.IsVerb) { Pullenti.Semantic.SemObject asem = new Pullenti.Semantic.SemObject(gr); gr.Objects.Add(asem); asem.Tokens.Add(a); asem.Typ = Pullenti.Semantic.SemObjectType.Adjective; foreach (Pullenti.Morph.MorphBaseInfo wf in a.BeginToken.Morph.Items) { if (wf.CheckAccord(npt.Morph, false, false) && wf.Class.IsAdjective && (wf is Pullenti.Morph.MorphWordForm)) { _setMorph(asem, wf as Pullenti.Morph.MorphWordForm); break; } } if (asem.Morph.NormalCase == null) { asem.Morph.NormalCase = a.GetNormalCaseText(Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false); asem.Morph.NormalFull = a.GetNormalCaseText(Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Masculine, false); _setMorph0(asem, a.BeginToken.Morph); } List <Pullenti.Semantic.Utils.DerivateGroup> grs = Pullenti.Semantic.Utils.DerivateService.FindDerivates(asem.Morph.NormalFull, true, null); if (grs != null && grs.Count > 0) { asem.Concept = grs[0]; } return(asem); } return(null); }
double _calcPacient(bool noplural) { if (!string.IsNullOrEmpty(FromPrep)) { return(Coef = -1); } Pullenti.Morph.MorphWordForm vf = ToVerb.FirstVerb.VerbMorph; if (vf == null) { return(-1); } Pullenti.Morph.MorphWordForm vf2 = ToVerb.LastVerb.VerbMorph; if (vf2 == null) { return(-1); } Pullenti.Ner.MorphCollection morph = FromMorph; if (vf2.Misc.Voice == Pullenti.Morph.MorphVoice.Passive || ToVerb.LastVerb.Morph.ContainsAttr("страд.з.", null)) { if (vf.Number == Pullenti.Morph.MorphNumber.Plural) { if (noplural) { if (FromIsPlural) { } else if (!_checkMorphAccord(morph, false, vf)) { return(-1); } else if (morph.Items.Count > 0 && !vf.Case.IsUndefined) { bool ok = false; foreach (Pullenti.Morph.MorphBaseInfo it in morph.Items) { if (((it.Number & Pullenti.Morph.MorphNumber.Plural)) == Pullenti.Morph.MorphNumber.Plural) { if (!it.Case.IsUndefined && ((it.Case & vf.Case)).IsUndefined) { continue; } ok = true; break; } } if (!ok) { return(Coef = -1); } } } Coef = Pullenti.Semantic.SemanticService.Params.VerbPlural; Plural = 1; } else { if (vf.Number == Pullenti.Morph.MorphNumber.Singular) { Plural = 0; if (FromIsPlural) { return(-1); } } if (!_checkMorphAccord(morph, false, vf)) { return(-1); } Coef = Pullenti.Semantic.SemanticService.Params.MorphAccord; } return(Coef); } bool isTrans = false; bool isRefDative = false; List <Pullenti.Semantic.Utils.DerivateGroup> grs = Pullenti.Semantic.Utils.DerivateService.FindDerivates(vf2.NormalFull ?? vf2.NormalCase, true, null); if (grs != null) { foreach (Pullenti.Semantic.Utils.DerivateGroup gr in grs) { if (gr.Cm.Transitive) { isTrans = true; } if (gr.CmRev.Agent != null && !gr.CmRev.Agent.Case.IsNominative) { isRefDative = true; } } } if (_isRevVerb(vf2)) { if (!string.IsNullOrEmpty(FromPrep)) { return(-1); } if (!morph.Case.IsUndefined) { if (isRefDative) { if (morph.Case.IsNominative) { return(Coef = Pullenti.Semantic.SemanticService.Params.TransitiveCoef); } } else if (morph.Case.IsInstrumental) { return(Coef = Pullenti.Semantic.SemanticService.Params.TransitiveCoef); } return(-1); } return(Coef = 0); } if (vf2 != vf && !isTrans) { grs = Pullenti.Semantic.Utils.DerivateService.FindDerivates(vf.NormalFull ?? vf.NormalCase, true, null); if (grs != null) { foreach (Pullenti.Semantic.Utils.DerivateGroup gr in grs) { if (gr.Cm.Transitive) { isTrans = true; } } } } if (isTrans) { if (!string.IsNullOrEmpty(FromPrep)) { return(-1); } if (!morph.Case.IsUndefined) { if (morph.Case.IsAccusative) { Coef = Pullenti.Semantic.SemanticService.Params.TransitiveCoef; if (morph.Case.IsDative) { Coef /= 2; } if (morph.Case.IsGenitive) { Coef /= 2; } if (morph.Case.IsInstrumental) { Coef /= 2; } return(Coef); } else { return(-1); } } } if (vf2.NormalCase == "БЫТЬ") { if (!string.IsNullOrEmpty(FromPrep)) { return(-1); } if (morph.Case.IsInstrumental) { return(Coef = Pullenti.Semantic.SemanticService.Params.TransitiveCoef); } if (morph.Case.IsNominative) { if (From.Source.BeginToken.BeginChar > ToVerb.EndChar) { return(Coef = Pullenti.Semantic.SemanticService.Params.TransitiveCoef); } else { return(Coef = Pullenti.Semantic.SemanticService.Params.TransitiveCoef / 2); } } if (morph.Case.IsUndefined) { return(Coef = Pullenti.Semantic.SemanticService.Params.TransitiveCoef / 2); } } return(-1); }
public static Pullenti.Semantic.SemObject CreateVerbGroup(Pullenti.Semantic.SemGraph gr, Pullenti.Ner.Core.VerbPhraseToken vpt) { List <Pullenti.Semantic.SemObject> sems = new List <Pullenti.Semantic.SemObject>(); List <Pullenti.Semantic.SemAttribute> attrs = new List <Pullenti.Semantic.SemAttribute>(); List <Pullenti.Semantic.SemObject> adverbs = new List <Pullenti.Semantic.SemObject>(); for (int i = 0; i < vpt.Items.Count; i++) { Pullenti.Ner.Core.VerbPhraseItemToken v = vpt.Items[i]; if (v.IsAdverb) { AdverbToken adv = AdverbToken.TryParse(v.BeginToken); if (adv == null) { continue; } if (adv.Typ != Pullenti.Semantic.SemAttributeType.Undefined) { attrs.Add(new Pullenti.Semantic.SemAttribute() { Not = adv.Not, Typ = adv.Typ, Spelling = adv.Spelling }); continue; } Pullenti.Semantic.SemObject adverb = CreateAdverb(gr, adv); if (attrs.Count > 0) { adverb.Attrs.AddRange(attrs); attrs.Clear(); } adverbs.Add(adverb); continue; } if (v.Normal == "БЫТЬ") { int j; for (j = i + 1; j < vpt.Items.Count; j++) { if (!vpt.Items[j].IsAdverb) { break; } } if (j < vpt.Items.Count) { continue; } } Pullenti.Semantic.SemObject sem = new Pullenti.Semantic.SemObject(gr); gr.Objects.Add(sem); sem.Tokens.Add(v); v.Tag = sem; _setMorph(sem, v.VerbMorph); sem.Morph.NormalCase = (sem.Morph.NormalFull = v.Normal); if (v.IsParticiple || v.IsDeeParticiple) { sem.Typ = Pullenti.Semantic.SemObjectType.Participle; sem.Morph.NormalFull = v.EndToken.GetNormalCaseText(Pullenti.Morph.MorphClass.Verb, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false) ?? sem.Morph.NormalCase; sem.Morph.NormalCase = v.EndToken.GetNormalCaseText(Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false); if (sem.Morph.NormalCase == sem.Morph.NormalFull && v.Normal.EndsWith("Й")) { List <Pullenti.Semantic.Utils.DerivateGroup> grs2 = Pullenti.Semantic.Utils.DerivateService.FindDerivates(v.Normal, true, null); if (grs2 != null) { foreach (Pullenti.Semantic.Utils.DerivateGroup g in grs2) { foreach (Pullenti.Semantic.Utils.DerivateWord w in g.Words) { if (w.Lang == v.EndToken.Morph.Language && w.Class.IsVerb && !w.Class.IsAdjective) { sem.Morph.NormalFull = w.Spelling; break; } } } } } else if (sem.Morph.NormalCase == sem.Morph.NormalFull && v.IsParticiple && sem.Morph.NormalFull.EndsWith("Ь")) { foreach (Pullenti.Morph.MorphBaseInfo it in v.EndToken.Morph.Items) { Pullenti.Morph.MorphWordForm wf = it as Pullenti.Morph.MorphWordForm; if (wf == null) { continue; } if (wf.NormalCase.EndsWith("Й") || ((wf.NormalFull != null && wf.NormalFull.EndsWith("Й")))) { sem.Morph.NormalCase = wf.NormalFull ?? wf.NormalCase; break; } } if (sem.Morph.NormalCase == sem.Morph.NormalFull) { List <Pullenti.Semantic.Utils.DerivateGroup> grs2 = Pullenti.Semantic.Utils.DerivateService.FindDerivates(sem.Morph.NormalCase, true, null); if (grs2 != null) { foreach (Pullenti.Semantic.Utils.DerivateGroup g in grs2) { foreach (Pullenti.Semantic.Utils.DerivateWord w in g.Words) { if (w.Lang == v.EndToken.Morph.Language && w.Class.IsVerb && w.Class.IsAdjective) { sem.Morph.NormalCase = w.Spelling; break; } } break; } } } } } else { sem.Typ = Pullenti.Semantic.SemObjectType.Verb; } if (v.VerbMorph != null && v.VerbMorph.ContainsAttr("возвр.", null)) { if (sem.Morph.NormalFull.EndsWith("СЯ") || sem.Morph.NormalFull.EndsWith("СЬ")) { sem.Morph.NormalFull = sem.Morph.NormalFull.Substring(0, sem.Morph.NormalFull.Length - 2); } } List <Pullenti.Semantic.Utils.DerivateGroup> grs = Pullenti.Semantic.Utils.DerivateService.FindDerivates(sem.Morph.NormalFull, true, null); if (grs != null && grs.Count > 0) { sem.Concept = grs[0]; if (v.VerbMorph != null && v.VerbMorph.Misc.Aspect == Pullenti.Morph.MorphAspect.Imperfective) { foreach (Pullenti.Semantic.Utils.DerivateWord w in grs[0].Words) { if (w.Class.IsVerb && !w.Class.IsAdjective) { if (w.Aspect == Pullenti.Morph.MorphAspect.Perfective) { sem.Morph.NormalFull = w.Spelling; break; } } } } } sem.Not = v.Not; sems.Add(sem); if (attrs.Count > 0) { sem.Attrs.AddRange(attrs); attrs.Clear(); } if (adverbs.Count > 0) { foreach (Pullenti.Semantic.SemObject a in adverbs) { gr.AddLink(Pullenti.Semantic.SemLinkType.Detail, sem, a, "как", false, null); } } adverbs.Clear(); } if (sems.Count == 0) { return(null); } if (attrs.Count > 0) { sems[sems.Count - 1].Attrs.AddRange(attrs); } if (adverbs.Count > 0) { Pullenti.Semantic.SemObject sem = sems[sems.Count - 1]; foreach (Pullenti.Semantic.SemObject a in adverbs) { gr.AddLink(Pullenti.Semantic.SemLinkType.Detail, sem, a, "как", false, null); } } for (int i = sems.Count - 1; i > 0; i--) { gr.AddLink(Pullenti.Semantic.SemLinkType.Detail, sems[i - 1], sems[i], "что делать", false, null); } return(sems[0]); }
static List <SemanticLink> _tryCreateVerb(Pullenti.Ner.Core.VerbPhraseToken vpt1, Pullenti.Ner.MetaToken slave, Pullenti.Semantic.Utils.DerivateGroup gr) { if (slave is Pullenti.Ner.Core.VerbPhraseToken) { return(_tryCreateInf(vpt1, slave as Pullenti.Ner.Core.VerbPhraseToken, gr)); } SemanticAbstractSlave sla2 = slave as SemanticAbstractSlave; List <SemanticLink> res = new List <SemanticLink>(); if (sla2 == null) { return(res); } Pullenti.Semantic.Utils.ControlModelItem cit = FindControlItem(vpt1.LastVerb, gr); string prep = sla2.Preposition; Pullenti.Morph.MorphBaseInfo morph = (Pullenti.Morph.MorphBaseInfo)sla2.Morph; bool isRev1 = vpt1.LastVerb.IsVerbReversive || vpt1.LastVerb.IsVerbPassive; bool noNomin = false; bool noInstr = false; if (prep == null && morph.Case.IsNominative && !vpt1.FirstVerb.IsParticiple) { bool ok = true; bool err = false; Pullenti.Morph.MorphWordForm vm = vpt1.FirstVerb.VerbMorph; if (vm == null) { return(res); } if (vm.Number == Pullenti.Morph.MorphNumber.Singular) { if (morph.Number == Pullenti.Morph.MorphNumber.Plural) { if (!vpt1.FirstVerb.IsVerbInfinitive) { ok = false; } } } if (!CheckMorphAccord(morph, false, vm, false)) { if (!err && !vpt1.FirstVerb.IsVerbInfinitive) { ok = false; } } else if (vm.Misc.Person != Pullenti.Morph.MorphPerson.Undefined) { if (((vm.Misc.Person & Pullenti.Morph.MorphPerson.Third)) == Pullenti.Morph.MorphPerson.Undefined) { if (((vm.Misc.Person & Pullenti.Morph.MorphPerson.First)) == Pullenti.Morph.MorphPerson.First) { if (!morph.ContainsAttr("1 л.", null)) { ok = false; } } if (((vm.Misc.Person & Pullenti.Morph.MorphPerson.Second)) == Pullenti.Morph.MorphPerson.Second) { if (!morph.ContainsAttr("2 л.", null)) { ok = false; } } } } noNomin = true; if (ok) { Pullenti.Semantic.Utils.ControlModelItem cit00 = cit; bool isRev0 = isRev1; if (vpt1.FirstVerb != vpt1.LastVerb && ((vpt1.FirstVerb.IsVerbReversive || vpt1.FirstVerb.IsVerbPassive || vpt1.FirstVerb.Normal == "ИМЕТЬ"))) { cit00 = null; isRev0 = true; List <Pullenti.Semantic.Utils.DerivateGroup> grs = FindDerivates(vpt1.FirstVerb); if (grs != null) { foreach (Pullenti.Semantic.Utils.DerivateGroup gg in grs) { if ((((cit00 = FindControlItem(vpt1.FirstVerb, gg)))) != null) { break; } } } } SemanticLink sl = null; bool addagent = false; if (cit00 == null) { sl = new SemanticLink() { Modelled = true, Role = (isRev0 ? SemanticRole.Pacient : SemanticRole.Agent), Rank = 1, Question = Pullenti.Semantic.Utils.ControlModelQuestion.BaseNominative, IsPassive = isRev0 } } ; else { foreach (KeyValuePair <Pullenti.Semantic.Utils.ControlModelQuestion, SemanticRole> kp in cit00.Links) { Pullenti.Semantic.Utils.ControlModelQuestion q = kp.Key; if (q.Check(null, Pullenti.Morph.MorphCase.Nominative)) { sl = new SemanticLink() { Role = kp.Value, Rank = 2, Question = q, IsPassive = isRev0 }; if (sl.Role == SemanticRole.Agent) { sl.IsPassive = false; } else if (sl.Role == SemanticRole.Pacient && cit00.NominativeCanBeAgentAndPacient && vpt1.LastVerb.IsVerbReversive) { addagent = true; } break; } } } if (sl != null) { if (cit00 == null && morph.Case.IsInstrumental && isRev0) { sl.Rank -= 0.5; } if (morph.Case.IsAccusative) { sl.Rank -= 0.5; } if (sla2.BeginChar > vpt1.BeginChar) { sl.Rank -= 0.5; } if (err) { sl.Rank -= 0.5; } res.Add(sl); if (addagent) { res.Add(new SemanticLink() { Role = SemanticRole.Agent, Rank = sl.Rank, Question = sl.Question }); } } } } if (prep == null && isRev1 && morph.Case.IsInstrumental) { noInstr = true; Pullenti.Semantic.Utils.ControlModelItem cit00 = cit; SemanticLink sl = null; if (cit00 == null) { sl = new SemanticLink() { Modelled = true, Role = SemanticRole.Agent, Rank = 1, Question = Pullenti.Semantic.Utils.ControlModelQuestion.BaseInstrumental, IsPassive = true } } ; else { foreach (KeyValuePair <Pullenti.Semantic.Utils.ControlModelQuestion, SemanticRole> kp in cit00.Links) { Pullenti.Semantic.Utils.ControlModelQuestion q = kp.Key; if (q.Check(null, Pullenti.Morph.MorphCase.Instrumental)) { sl = new SemanticLink() { Role = kp.Value, Rank = 2, Question = q }; if (sl.Role == SemanticRole.Agent) { sl.IsPassive = true; } break; } } } if (sl != null) { if (cit00 == null && morph.Case.IsNominative) { sl.Rank -= 0.5; } if (morph.Case.IsAccusative) { sl.Rank -= 0.5; } if (sla2.BeginChar < vpt1.BeginChar) { sl.Rank -= 0.5; } res.Add(sl); if ((gr != null && gr.Model.Items.Count > 0 && gr.Model.Items[0].Typ == Pullenti.Semantic.Utils.ControlModelItemType.Verb) && gr.Model.Items[0].Links.ContainsKey(Pullenti.Semantic.Utils.ControlModelQuestion.BaseInstrumental)) { sl.Rank = 0; SemanticLink sl0 = new SemanticLink() { Question = sl.Question, Rank = 1, Role = gr.Model.Items[0].Links[Pullenti.Semantic.Utils.ControlModelQuestion.BaseInstrumental] }; res.Insert(0, sl0); } } } if (prep == null && morph.Case.IsDative && ((cit == null || !cit.Links.ContainsKey(Pullenti.Semantic.Utils.ControlModelQuestion.BaseDative)))) { SemanticLink sl = new SemanticLink() { Modelled = cit == null, Role = SemanticRole.Strong, Rank = 1, Question = Pullenti.Semantic.Utils.ControlModelQuestion.BaseDative }; if (morph.Case.IsAccusative || morph.Case.IsNominative) { sl.Rank -= 0.5; } if (vpt1.EndToken.Next != sla2.BeginToken) { sl.Rank -= 0.5; } if (cit != null) { sl.Rank -= 0.5; } res.Add(sl); } _createRoles(cit, prep, morph.Case, res, noNomin, noInstr); if (gr != null && gr.Model.Pacients.Count > 0) { bool ok = false; foreach (string n in gr.Model.Pacients) { if (sla2.Source != null) { if (sla2.Source.EndToken.IsValue(n, null)) { ok = true; break; } } else if (sla2.EndToken.IsValue(n, null)) { ok = true; break; } } if (ok) { if (res.Count == 0) { ok = false; if (prep == null && isRev1 && morph.Case.IsNominative) { ok = true; } else if (prep == null && !isRev1 && morph.Case.IsAccusative) { ok = true; } if (ok) { res.Add(new SemanticLink() { Role = SemanticRole.Pacient, Question = (isRev1 ? Pullenti.Semantic.Utils.ControlModelQuestion.BaseNominative : Pullenti.Semantic.Utils.ControlModelQuestion.BaseAccusative), Idiom = true }); } } else { foreach (SemanticLink r in res) { r.Rank += 4; if (r.Role == SemanticRole.Common) { r.Role = SemanticRole.Strong; } if (vpt1.EndToken.Next == sla2.BeginToken) { r.Rank += 2; } r.Idiom = true; } } } } return(res); }
internal static NumbersWithUnitToken _tryParse(Pullenti.Ner.Token t, Pullenti.Ner.Core.TerminCollection addUnits, bool second, bool canOmitNumber, bool canBeNan) { if (t == null) { return(null); } while (t != null) { if (t.IsCommaAnd || t.IsValue("НО", null)) { t = t.Next; } else { break; } } Pullenti.Ner.Token t0 = t; bool about = false; bool hasKeyw = false; bool isDiapKeyw = false; int minMax = 0; Pullenti.Ner.Token ttt = _isMinOrMax(t, ref minMax); if (ttt != null) { t = ttt.Next; if (t == null) { return(null); } } if (t == null) { return(null); } if (t.IsChar('~') || t.IsValue("ОКОЛО", null) || t.IsValue("ПРИМЕРНО", null)) { t = t.Next; about = true; hasKeyw = true; if (t == null) { return(null); } } if (t.IsValue("В", null) && t.Next != null) { if (t.Next.IsValue("ПРЕДЕЛ", null) || t.IsValue("ДИАПАЗОН", null)) { t = t.Next.Next; if (t == null) { return(null); } isDiapKeyw = true; } } if (t0.IsChar('(')) { NumbersWithUnitToken mt0 = _tryParse(t.Next, addUnits, false, false, false); if (mt0 != null && mt0.EndToken.Next != null && mt0.EndToken.Next.IsChar(')')) { if (second) { if (mt0.FromVal != null && mt0.ToVal != null && mt0.FromVal.Value == (-mt0.ToVal.Value)) { } else { return(null); } } mt0.BeginToken = t0; mt0.EndToken = mt0.EndToken.Next; List <UnitToken> uu = UnitToken.TryParseList(mt0.EndToken.Next, addUnits, false); if (uu != null && mt0.Units.Count == 0) { mt0.Units = uu; mt0.EndToken = uu[uu.Count - 1].EndToken; } return(mt0); } } bool plusminus = false; bool unitBefore = false; bool isAge = false; DiapTyp dty = DiapTyp.Undefined; Pullenti.Ner.MetaToken whd = null; List <UnitToken> uni = null; Pullenti.Ner.Core.TerminToken tok = (m_Termins == null ? null : m_Termins.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No)); if (tok != null) { if (tok.EndToken.IsValue("СТАРШЕ", null) || tok.EndToken.IsValue("МЛАДШЕ", null)) { isAge = true; } t = tok.EndToken.Next; dty = (DiapTyp)tok.Termin.Tag; hasKeyw = true; if (!tok.IsWhitespaceAfter) { if (t == null) { return(null); } if (t is Pullenti.Ner.NumberToken) { if (tok.BeginToken == tok.EndToken && !tok.Chars.IsAllLower) { return(null); } } else if (t.IsComma && t.Next != null && t.Next.IsValue("ЧЕМ", null)) { t = t.Next.Next; if (t != null && t.Morph.Class.IsPreposition) { t = t.Next; } } else if (t.IsCharOf(":,(") || t.IsTableControlChar) { } else { return(null); } } if (t != null && t.IsChar('(')) { uni = UnitToken.TryParseList(t.Next, addUnits, false); if (uni != null) { t = uni[uni.Count - 1].EndToken.Next; while (t != null) { if (t.IsCharOf("):")) { t = t.Next; } else { break; } } NumbersWithUnitToken mt0 = _tryParse(t, addUnits, false, canOmitNumber, false); if (mt0 != null && mt0.Units.Count == 0) { mt0.BeginToken = t0; mt0.Units = uni; return(mt0); } } whd = _tryParseWHL(t); if (whd != null) { t = whd.EndToken.Next; } } else if (t != null && t.IsValue("IP", null)) { uni = UnitToken.TryParseList(t, addUnits, false); if (uni != null) { t = uni[uni.Count - 1].EndToken.Next; } } if ((t != null && t.IsHiphen && t.IsWhitespaceBefore) && t.IsWhitespaceAfter) { t = t.Next; } } else if (t.IsChar('<')) { dty = DiapTyp.Ls; t = t.Next; hasKeyw = true; if (t != null && t.IsChar('=')) { t = t.Next; dty = DiapTyp.Le; } } else if (t.IsChar('>')) { dty = DiapTyp.Gt; t = t.Next; hasKeyw = true; if (t != null && t.IsChar('=')) { t = t.Next; dty = DiapTyp.Ge; } } else if (t.IsChar('≤')) { dty = DiapTyp.Le; hasKeyw = true; t = t.Next; } else if (t.IsChar('≥')) { dty = DiapTyp.Ge; hasKeyw = true; t = t.Next; } else if (t.IsValue("IP", null)) { uni = UnitToken.TryParseList(t, addUnits, false); if (uni != null) { t = uni[uni.Count - 1].EndToken.Next; } } else if (t.IsValue("ЗА", null) && (t.Next is Pullenti.Ner.NumberToken)) { dty = DiapTyp.Ge; t = t.Next; } while (t != null && ((t.IsCharOf(":,") || t.IsValue("ЧЕМ", null) || t.IsTableControlChar))) { t = t.Next; } if (t != null) { if (t.IsChar('+') || t.IsValue("ПЛЮС", null)) { t = t.Next; if (t != null && !t.IsWhitespaceBefore) { if (t.IsHiphen) { t = t.Next; plusminus = true; } else if ((t.IsCharOf("\\/") && t.Next != null && !t.IsNewlineAfter) && t.Next.IsHiphen) { t = t.Next.Next; plusminus = true; } } } else if (second && (t.IsCharOf("\\/÷…~"))) { t = t.Next; } else if ((t.IsHiphen && t == t0 && !second) && m_Termins.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No) != null) { tok = m_Termins.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No); t = tok.EndToken.Next; dty = (DiapTyp)tok.Termin.Tag; } else if (t.IsHiphen && t == t0 && ((t.IsWhitespaceAfter || second))) { t = t.Next; } else if (t.IsChar('±')) { t = t.Next; plusminus = true; hasKeyw = true; } else if ((second && t.IsChar('.') && t.Next != null) && t.Next.IsChar('.')) { t = t.Next.Next; if (t != null && t.IsChar('.')) { t = t.Next; } } } Pullenti.Ner.NumberToken num = Pullenti.Ner.Core.NumberHelper.TryParseRealNumber(t, true, false); if (num == null) { uni = UnitToken.TryParseList(t, addUnits, false); if (uni != null) { unitBefore = true; t = uni[uni.Count - 1].EndToken.Next; bool delim = false; while (t != null) { if (t.IsCharOf(":,")) { delim = true; t = t.Next; } else if (t.IsHiphen && t.IsWhitespaceAfter) { delim = true; t = t.Next; } else { break; } } if (!delim) { if (t == null) { if (hasKeyw && canBeNan) { } else { return(null); } } else if (!t.IsWhitespaceBefore) { return(null); } if (t.Next != null && t.IsHiphen && t.IsWhitespaceAfter) { delim = true; t = t.Next; } } num = Pullenti.Ner.Core.NumberHelper.TryParseRealNumber(t, true, false); } } NumbersWithUnitToken res = null; double rval = (double)0; if (num == null) { Pullenti.Ner.Core.TerminToken tt = m_Spec.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tt != null) { rval = (double)tt.Termin.Tag; string unam = (string)tt.Termin.Tag2; foreach (Unit u in UnitsHelper.Units) { if (u.FullnameCyr == unam) { uni = new List <UnitToken>(); uni.Add(new UnitToken(t, t) { Unit = u }); break; } } if (uni == null) { return(null); } res = new NumbersWithUnitToken(t0, tt.EndToken) { About = about }; t = tt.EndToken.Next; } else { if (!canOmitNumber && !hasKeyw && !canBeNan) { return(null); } if ((uni != null && uni.Count == 1 && uni[0].BeginToken == uni[0].EndToken) && uni[0].LengthChar > 3) { rval = 1; res = new NumbersWithUnitToken(t0, uni[uni.Count - 1].EndToken) { About = about }; t = res.EndToken.Next; } else if (hasKeyw && canBeNan) { rval = double.NaN; res = new NumbersWithUnitToken(t0, t0) { About = about }; if (t != null) { res.EndToken = t.Previous; } else { for (t = t0; t != null; t = t.Next) { res.EndToken = t; } } } else { return(null); } } } else { if ((t == t0 && t0.IsHiphen && !t.IsWhitespaceBefore) && !t.IsWhitespaceAfter && (num.RealValue < 0)) { num = Pullenti.Ner.Core.NumberHelper.TryParseRealNumber(t.Next, true, false); if (num == null) { return(null); } } if (t == t0 && (t is Pullenti.Ner.NumberToken) && t.Morph.Class.IsAdjective) { Pullenti.Ner.TextToken nn = (t as Pullenti.Ner.NumberToken).EndToken as Pullenti.Ner.TextToken; if (nn == null) { return(null); } string norm = nn.GetNormalCaseText(Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false); if ((norm.EndsWith("Ь") || norm == "ЧЕТЫРЕ" || norm == "ТРИ") || norm == "ДВА") { } else { Pullenti.Morph.MorphWordForm mi = Pullenti.Morph.MorphologyService.GetWordBaseInfo("КОКО" + nn.Term, null, false, false); if (mi.Class.IsAdjective) { return(null); } } } t = num.EndToken.Next; res = new NumbersWithUnitToken(t0, num.EndToken) { About = about }; rval = num.RealValue; } if (uni == null) { uni = UnitToken.TryParseList(t, addUnits, false); if (uni != null) { if ((plusminus && second && uni.Count >= 1) && uni[0].Unit == UnitsHelper.uPercent) { res.EndToken = uni[0].EndToken; res.PlusMinusPercent = true; Pullenti.Ner.Token tt1 = uni[0].EndToken.Next; uni = UnitToken.TryParseList(tt1, addUnits, false); if (uni != null) { res.Units = uni; res.EndToken = uni[uni.Count - 1].EndToken; } } else { res.Units = uni; res.EndToken = uni[uni.Count - 1].EndToken; } t = res.EndToken.Next; } } else { res.Units = uni; if (uni.Count > 1) { List <UnitToken> uni1 = UnitToken.TryParseList(t, addUnits, false); if (((uni1 != null && uni1[0].Unit == uni[0].Unit && (uni1.Count < uni.Count)) && uni[uni1.Count].Pow == -1 && uni1[uni1.Count - 1].EndToken.Next != null) && uni1[uni1.Count - 1].EndToken.Next.IsCharOf("/\\")) { NumbersWithUnitToken num2 = _tryParse(uni1[uni1.Count - 1].EndToken.Next.Next, addUnits, false, false, false); if (num2 != null && num2.Units != null && num2.Units[0].Unit == uni[uni1.Count].Unit) { res.Units = uni1; res.DivNum = num2; res.EndToken = num2.EndToken; } } } } res.WHL = whd; if (dty != DiapTyp.Undefined) { if (dty == DiapTyp.Ge || dty == DiapTyp.From) { res.FromInclude = true; res.FromVal = rval; } else if (dty == DiapTyp.Gt) { res.FromInclude = false; res.FromVal = rval; } else if (dty == DiapTyp.Le || dty == DiapTyp.To) { res.ToInclude = true; res.ToVal = rval; } else if (dty == DiapTyp.Ls) { res.ToInclude = false; res.ToVal = rval; } } bool isSecondMax = false; if (!second) { int iii = 0; ttt = _isMinOrMax(t, ref iii); if (ttt != null && iii > 0) { isSecondMax = true; t = ttt.Next; } } NumbersWithUnitToken next = (second || plusminus || ((t != null && ((t.IsTableControlChar || t.IsNewlineBefore)))) ? null : _tryParse(t, addUnits, true, false, canBeNan)); if (next != null && (t.Previous is Pullenti.Ner.NumberToken)) { if (MeasureHelper.IsMultChar((t.Previous as Pullenti.Ner.NumberToken).EndToken)) { next = null; } } if (next != null && ((next.ToVal != null || next.SingleVal != null)) && next.FromVal == null) { if ((((next.BeginToken.IsChar('+') && next.SingleVal != null && !double.IsNaN(next.SingleVal.Value)) && next.EndToken.Next != null && next.EndToken.Next.IsCharOf("\\/")) && next.EndToken.Next.Next != null && next.EndToken.Next.Next.IsHiphen) && !hasKeyw && !double.IsNaN(rval)) { NumbersWithUnitToken next2 = _tryParse(next.EndToken.Next.Next.Next, addUnits, true, false, false); if (next2 != null && next2.SingleVal != null && !double.IsNaN(next2.SingleVal.Value)) { res.FromVal = rval - next2.SingleVal.Value; res.FromInclude = true; res.ToVal = rval + next.SingleVal.Value; res.ToInclude = true; if (next2.Units != null && res.Units.Count == 0) { res.Units = next2.Units; } res.EndToken = next2.EndToken; return(res); } } if (next.Units.Count > 0) { if (res.Units.Count == 0) { res.Units = next.Units; } else if (!UnitToken.CanBeEquals(res.Units, next.Units)) { next = null; } } else if (res.Units.Count > 0 && !unitBefore && !next.PlusMinusPercent) { next = null; } if (next != null) { res.EndToken = next.EndToken; } if (next != null && next.ToVal != null) { res.ToVal = next.ToVal; res.ToInclude = next.ToInclude; } else if (next != null && next.SingleVal != null) { if (next.BeginToken.IsCharOf("/\\")) { res.DivNum = next; res.SingleVal = rval; return(res); } else if (next.PlusMinusPercent) { res.SingleVal = rval; res.PlusMinus = next.SingleVal; res.PlusMinusPercent = true; res.ToInclude = true; } else { res.ToVal = next.SingleVal; res.ToInclude = true; } } if (next != null) { if (res.FromVal == null) { res.FromVal = rval; res.FromInclude = true; } return(res); } } else if ((next != null && next.FromVal != null && next.ToVal != null) && next.ToVal.Value == (-next.FromVal.Value)) { if (next.Units.Count == 1 && next.Units[0].Unit == UnitsHelper.uPercent && res.Units.Count > 0) { res.SingleVal = rval; res.PlusMinus = next.ToVal.Value; res.PlusMinusPercent = true; res.EndToken = next.EndToken; return(res); } if (next.Units.Count == 0) { res.SingleVal = rval; res.PlusMinus = next.ToVal.Value; res.EndToken = next.EndToken; return(res); } res.FromVal = next.FromVal + rval; res.FromInclude = true; res.ToVal = next.ToVal + rval; res.ToInclude = true; res.EndToken = next.EndToken; if (next.Units.Count > 0) { res.Units = next.Units; } return(res); } if (dty == DiapTyp.Undefined) { if (plusminus && ((!res.PlusMinusPercent || !second))) { res.FromInclude = true; res.FromVal = -rval; res.ToInclude = true; res.ToVal = rval; } else { res.SingleVal = rval; res.PlusMinusPercent = plusminus; } } if (isAge) { res.IsAge = true; } return(res); }