public override string ToString() { StringBuilder res = new StringBuilder(); if (Kind != Pullenti.Ner.Named.NamedEntityKind.Undefined) { res.AppendFormat(" [{0}]", Kind); } if (IsWellknown) { res.AppendFormat(" (!)"); } if (IsInBracket) { res.AppendFormat(" [br]"); } if (TypeValue != null) { res.AppendFormat(" {0}", TypeValue); } if (NameValue != null) { res.AppendFormat(" \"{0}\"", NameValue); } if (Ref != null) { res.AppendFormat(" -> {0}", Ref.ToString()); } return(res.ToString()); }
public override string ToString(bool shortVariant, Pullenti.Morph.MorphLang lang, int lev = 0) { StringBuilder res = new StringBuilder(); string typ = this.GetStringValue(ATTR_TYPE); if (typ != null) { res.Append(typ); } string name = this.GetStringValue(ATTR_NAME); if (name != null) { if (res.Length > 0) { res.Append(' '); } res.Append(Pullenti.Ner.Core.MiscHelper.ConvertFirstCharUpperAndOtherLower(name)); } Pullenti.Ner.Referent re = this.GetSlotValue(ATTR_REF) as Pullenti.Ner.Referent; if (re != null) { if (res.Length > 0) { res.Append("; "); } res.Append(re.ToString(shortVariant, lang, lev + 1)); } return(res.ToString()); }
public override string ToString(bool shortVariant, Pullenti.Morph.MorphLang lang = null, int lev = 0) { StringBuilder res = new StringBuilder(); GoodAttrType typ = Typ; string nam = this.GetStringValue(ATTR_NAME); if (!shortVariant) { if (typ != GoodAttrType.Undefined) { res.AppendFormat("{0}{1}: ", Pullenti.Ner.Goods.Internal.AttrMeta.GlobalMeta.TypAttr.ConvertInnerValueToOuterValue(typ.ToString(), lang), (nam == null ? "" : string.Format(" ({0})", nam.ToLower()))); } } string s = this.GetStringValue(ATTR_VALUE); if (s != null) { if (typ == GoodAttrType.Keyword || typ == GoodAttrType.Character) { res.Append(s.ToLower()); } else if (typ == GoodAttrType.Numeric) { List <string> vals = Values; List <string> units = Units; for (int i = 0; i < vals.Count; i++) { if (i > 0) { res.Append(" x "); } res.Append(vals[i]); if (vals.Count == units.Count) { res.Append(units[i].ToLower()); } else if (units.Count > 0) { res.Append(units[0].ToLower()); } } } else { res.Append(s); } } Pullenti.Ner.Referent re = Ref; if (re != null) { res.Append(re.ToString(shortVariant, lang, 0)); } return(res.ToString()); }
public override string ToString(bool shortVariant, Pullenti.Morph.MorphLang lang, int lev = 0) { StringBuilder res = new StringBuilder(); res.Append(Pullenti.Ner.Core.MiscHelper.ConvertFirstCharUpperAndOtherLower(Typ ?? "?")); Pullenti.Ner.Referent org = this.GetSlotValue(ATTR_REF) as Pullenti.Ner.Referent; Pullenti.Ner.Referent del = this.GetSlotValue(ATTR_DELEGATE) as Pullenti.Ner.Referent; if (org != null) { res.AppendFormat(": {0}", org.ToString(shortVariant, lang, 0)); if (!shortVariant && del != null) { res.AppendFormat(" (в лице {0})", del.ToString(true, lang, lev + 1)); } } else if (del != null) { res.AppendFormat(": в лице {0}", del.ToString(shortVariant, lang, lev + 1)); } return(res.ToString()); }
public override string ToString(bool shortVariant, Pullenti.Morph.MorphLang lang, int lev = 0) { StringBuilder res = new StringBuilder(); res.Append(Pullenti.Ner.Core.MiscHelper.ConvertFirstCharUpperAndOtherLower(Typ ?? "?")); object val = Value; if (val != null) { res.AppendFormat(": {0}", val); } if (!shortVariant && (lev < 30)) { Pullenti.Ner.Referent re = this.GetSlotValue(ATTR_REF) as Pullenti.Ner.Referent; if (re != null) { res.AppendFormat(" ({0})", re.ToString(shortVariant, lang, lev + 1)); } } return(res.ToString()); }
public override string ToString(bool shortVariant, Pullenti.Morph.MorphLang lang = null, int lev = 0) { StringBuilder res = new StringBuilder(); if (Number != null) { res.AppendFormat("[{0}] ", Number); } if (Pages != null) { res.AppendFormat("{0} {1}; ", (lang != null && lang.IsEn ? "pages" : "стр."), Pages); } Pullenti.Ner.Referent book = Book; if (book == null) { res.Append("?"); } else { res.Append(book.ToString(shortVariant, lang, lev)); } return(res.ToString()); }
public override string ToString(bool shortVariant, Pullenti.Morph.MorphLang lang = null, int lev = 0) { if (lev > 10) { return("?"); } double rank = Rank; string val = this.GetStringValue(ATTR_VALUE); if (val == null) { Pullenti.Ner.Referent r = this.GetSlotValue(ATTR_REF) as Pullenti.Ner.Referent; if (r != null) { val = r.ToString(true, lang, lev + 1); } else { val = this.GetStringValue(ATTR_NORMAL); } } if (shortVariant) { return(val ?? "?"); } string norm = this.GetStringValue(ATTR_NORMAL); if (norm == null) { return(val ?? "?"); } else { return(string.Format("{0} [{1}]", val ?? "?", norm)); } }
public static OrgItemEponymToken TryAttach(Pullenti.Ner.Token t, bool mustHasPrefix = false) { Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; if (tt == null) { if (t == null) { return(null); } Pullenti.Ner.Referent r1 = t.GetReferent(); if (r1 != null && r1.TypeName == "DATE") { string str = r1.ToString().ToUpper(); if ((str == "1 МАЯ" || str == "7 ОКТЯБРЯ" || str == "9 МАЯ") || str == "8 МАРТА") { OrgItemEponymToken dt = new OrgItemEponymToken(t, t) { Eponyms = new List <string>() }; dt.Eponyms.Add(str); return(dt); } } Pullenti.Ner.NumberToken age = Pullenti.Ner.Core.NumberHelper.TryParseAge(t); if ((age != null && (((age.EndToken.Next is Pullenti.Ner.TextToken) || (age.EndToken.Next is Pullenti.Ner.ReferentToken))) && (age.WhitespacesAfterCount < 3)) && !age.EndToken.Next.Chars.IsAllLower && age.EndToken.Next.Chars.IsCyrillicLetter) { OrgItemEponymToken dt = new OrgItemEponymToken(t, age.EndToken.Next) { Eponyms = new List <string>() }; dt.Eponyms.Add(string.Format("{0} {1}", age.Value, dt.EndToken.GetSourceText().ToUpper())); return(dt); } return(null); } Pullenti.Ner.Token t1 = null; bool full = false; bool hasName = false; if (tt.Term == "ИМЕНИ" || tt.Term == "ІМЕНІ") { t1 = t.Next; full = true; hasName = true; } else if (((tt.Term == "ИМ" || tt.Term == "ІМ")) && tt.Next != null) { if (tt.Next.IsChar('.')) { t1 = tt.Next.Next; full = true; } else if ((tt.Next is Pullenti.Ner.TextToken) && tt.Chars.IsAllLower && !tt.Next.Chars.IsAllLower) { t1 = tt.Next; } hasName = true; } else if (tt.Previous != null && ((tt.Previous.IsValue("ФОНД", null) || tt.Previous.IsValue("ХРАМ", null) || tt.Previous.IsValue("ЦЕРКОВЬ", "ЦЕРКВА")))) { if ((!tt.Chars.IsCyrillicLetter || tt.Morph.Class.IsPreposition || tt.Morph.Class.IsConjunction) || !tt.Chars.IsLetter) { return(null); } if (tt.WhitespacesBeforeCount != 1) { return(null); } if (tt.Chars.IsAllLower) { return(null); } if (tt.Morph.Class.IsAdjective) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && npt.BeginToken != npt.EndToken) { return(null); } } OrgItemNameToken na = OrgItemNameToken.TryAttach(tt, null, false, true); if (na != null) { if (na.IsEmptyWord || na.IsStdName || na.IsStdTail) { return(null); } } t1 = tt; } if (t1 == null || ((t1.IsNewlineBefore && !full))) { return(null); } if (tt.Previous != null && tt.Previous.Morph.Class.IsPreposition) { return(null); } if (mustHasPrefix && !hasName) { return(null); } Pullenti.Ner.Referent r = t1.GetReferent(); if ((r != null && r.TypeName == "DATE" && full) && r.FindSlot("DAY", null, true) != null && r.FindSlot("YEAR", null, true) == null) { OrgItemEponymToken dt = new OrgItemEponymToken(t, t1) { Eponyms = new List <string>() }; dt.Eponyms.Add(r.ToString().ToUpper()); return(dt); } bool holy = false; if ((t1.IsValue("СВЯТОЙ", null) || t1.IsValue("СВЯТИЙ", null) || t1.IsValue("СВ", null)) || t1.IsValue("СВЯТ", null)) { t1 = t1.Next; holy = true; if (t1 != null && t1.IsChar('.')) { t1 = t1.Next; } } if (t1 == null) { return(null); } Pullenti.Morph.MorphClass cl = t1.GetMorphClassInDictionary(); if (cl.IsNoun || cl.IsAdjective) { Pullenti.Ner.ReferentToken rt = t1.Kit.ProcessReferent("PERSON", t1); if (rt != null && rt.Referent.TypeName == "PERSON" && rt.BeginToken != rt.EndToken) { string e = rt.Referent.GetStringValue("LASTNAME"); if (e != null) { if (rt.EndToken.IsValue(e, null)) { OrgItemEponymToken re = new OrgItemEponymToken(t, rt.EndToken); re.Eponyms.Add(rt.EndToken.GetSourceText()); return(re); } } } } Pullenti.Ner.NumberToken nt = Pullenti.Ner.Core.NumberHelper.TryParseAnniversary(t1); if (nt != null && nt.Typ == Pullenti.Ner.NumberSpellingType.Age) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(nt.EndToken.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { string s = string.Format("{0}-{1} {2}", nt.Value, (t.Kit.BaseLanguage.IsUa ? "РОКІВ" : "ЛЕТ"), Pullenti.Ner.Core.MiscHelper.GetTextValue(npt.BeginToken, npt.EndToken, Pullenti.Ner.Core.GetTextAttr.No)); OrgItemEponymToken res = new OrgItemEponymToken(t, npt.EndToken); res.Eponyms.Add(s); return(res); } } List <PersonItemToken> its = PersonItemToken.TryAttach(t1); if (its == null) { if ((t1 is Pullenti.Ner.ReferentToken) && (t1.GetReferent() is Pullenti.Ner.Geo.GeoReferent)) { string s = Pullenti.Ner.Core.MiscHelper.GetTextValue(t1, t1, Pullenti.Ner.Core.GetTextAttr.No); OrgItemEponymToken re = new OrgItemEponymToken(t, t1); re.Eponyms.Add(s); return(re); } return(null); } List <string> eponims = new List <string>(); int i = 0; int j; if (its[i].Typ == PersonItemType.LocaseWord) { i++; } if (i >= its.Count) { return(null); } if (!full) { if (its[i].BeginToken.Morph.Class.IsAdjective && !its[i].BeginToken.Morph.Class.IsProperSurname) { return(null); } } if (its[i].Typ == PersonItemType.Initial) { i++; while (true) { if ((i < its.Count) && its[i].Typ == PersonItemType.Initial) { i++; } if (i >= its.Count || ((its[i].Typ != PersonItemType.Surname && its[i].Typ != PersonItemType.Name))) { break; } eponims.Add(its[i].Value); t1 = its[i].EndToken; if ((i + 2) >= its.Count || its[i + 1].Typ != PersonItemType.And || its[i + 2].Typ != PersonItemType.Initial) { break; } i += 3; } } else if (((i + 1) < its.Count) && its[i].Typ == PersonItemType.Name && its[i + 1].Typ == PersonItemType.Surname) { eponims.Add(its[i + 1].Value); t1 = its[i + 1].EndToken; i += 2; if ((((i + 2) < its.Count) && its[i].Typ == PersonItemType.And && its[i + 1].Typ == PersonItemType.Name) && its[i + 2].Typ == PersonItemType.Surname) { eponims.Add(its[i + 2].Value); t1 = its[i + 2].EndToken; } } else if (its[i].Typ == PersonItemType.Surname) { if (its.Count == (i + 2) && its[i].Chars == its[i + 1].Chars) { its[i].Value += (" " + its[i + 1].Value); its[i].EndToken = its[i + 1].EndToken; its.RemoveAt(i + 1); } eponims.Add(its[i].Value); if (((i + 1) < its.Count) && its[i + 1].Typ == PersonItemType.Name) { if ((i + 2) == its.Count) { i++; } else if (its[i + 2].Typ != PersonItemType.Surname) { i++; } } else if (((i + 1) < its.Count) && its[i + 1].Typ == PersonItemType.Initial) { if ((i + 2) == its.Count) { i++; } else if (its[i + 2].Typ == PersonItemType.Initial && (i + 3) == its.Count) { i += 2; } } else if (((i + 2) < its.Count) && its[i + 1].Typ == PersonItemType.And && its[i + 2].Typ == PersonItemType.Surname) { bool ok = true; Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(its[i + 2].BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && !npt.Morph.Case.IsGenitive && !npt.Morph.Case.IsUndefined) { ok = false; } if (ok) { eponims.Add(its[i + 2].Value); i += 2; } } t1 = its[i].EndToken; } else if (its[i].Typ == PersonItemType.Name && holy) { t1 = its[i].EndToken; bool sec = false; if (((i + 1) < its.Count) && its[i].Chars == its[i + 1].Chars && its[i + 1].Typ != PersonItemType.Initial) { sec = true; t1 = its[i + 1].EndToken; } if (sec) { eponims.Add(string.Format("СВЯТ.{0} {1}", its[i].Value, its[i + 1].Value)); } else { eponims.Add(string.Format("СВЯТ.{0}", its[i].Value)); } } else if (full && (i + 1) == its.Count && ((its[i].Typ == PersonItemType.Name || its[i].Typ == PersonItemType.Surname))) { t1 = its[i].EndToken; eponims.Add(its[i].Value); } else if ((its[i].Typ == PersonItemType.Name && its.Count == 3 && its[i + 1].Typ == PersonItemType.Name) && its[i + 2].Typ == PersonItemType.Surname) { t1 = its[i + 2].EndToken; eponims.Add(string.Format("{0} {1} {2}", its[i].Value, its[i + 1].Value, its[i + 2].Value)); i += 2; } if (eponims.Count == 0) { return(null); } return(new OrgItemEponymToken(t, t1) { Eponyms = eponims }); }
bool _CanBeEquals(Pullenti.Ner.Referent obj, Pullenti.Ner.Core.ReferentsEqualType typ, bool ignoreGeo) { DecreeReferent dr = obj as DecreeReferent; if (dr == null) { return(false); } if (dr.Typ0 != null && Typ0 != null) { if (dr.Typ0 != Typ0) { return(false); } } int numEq = 0; if (Number != null || dr.Number != null) { if (Number != null && dr.Number != null) { List <string> di1 = this._allNumberDigits(); List <string> di2 = dr._allNumberDigits(); foreach (string d1 in di1) { if (di2.Contains(d1)) { numEq = 1; break; } } if (numEq == 0 && !IsLaw) { return(false); } foreach (Pullenti.Ner.Slot s in Slots) { if (s.TypeName == ATTR_NUMBER) { if (dr.FindSlot(s.TypeName, s.Value, true) != null) { numEq = 2; break; } } } if (numEq == 0) { return(false); } } } if (CaseNumber != null && dr.CaseNumber != null) { if (CaseNumber != dr.CaseNumber) { return(false); } } if (this.FindSlot(ATTR_GEO, null, true) != null && dr.FindSlot(ATTR_GEO, null, true) != null) { if (this.GetStringValue(ATTR_GEO) != dr.GetStringValue(ATTR_GEO)) { return(false); } } bool srcEq = false; bool srcNotEq = false; Pullenti.Ner.Slot src = this.FindSlot(ATTR_SOURCE, null, true); if (src != null && dr.FindSlot(ATTR_SOURCE, null, true) != null) { if (dr.FindSlot(src.TypeName, src.Value, true) == null) { srcNotEq = true; } else { srcEq = true; } } bool dateNotEq = false; bool dateIsEqu = false; bool yearsIsEqu = false; string date1 = this.GetStringValue(ATTR_DATE); string date2 = dr.GetStringValue(ATTR_DATE); if (date1 != null || date2 != null) { if (IsLaw) { List <int> ys1 = this._allYears(); List <int> ys2 = dr._allYears(); foreach (int y1 in ys1) { if (ys2.Contains(y1)) { yearsIsEqu = true; break; } } if (yearsIsEqu) { List <DateTime> dts1 = this._allDates(); List <DateTime> dts2 = dr._allDates(); foreach (DateTime d1 in dts1) { if (dts2.Contains(d1)) { dateIsEqu = true; break; } } } if (!dateIsEqu) { if (Typ == "КОНСТИТУЦИЯ") { return(false); } if (Date != null && dr.Date != null) { dateNotEq = true; } } } else if (date1 == date2 || ((Date != null && dr.Date != null && Date == dr.Date))) { if (numEq > 1) { return(true); } dateIsEqu = true; } else if (Date != null && dr.Date != null) { if (Date.Value.Year != dr.Date.Value.Year) { return(false); } if (numEq >= 1) { if (srcEq) { return(true); } if (srcNotEq) { return(false); } } else { return(false); } } else if (typ == Pullenti.Ner.Core.ReferentsEqualType.DifferentTexts || Kind == DecreeKind.Publisher) { dateNotEq = true; } } if (this.FindSlot(ATTR_NAME, null, true) != null && dr.FindSlot(ATTR_NAME, null, true) != null) { foreach (Pullenti.Ner.Slot s in Slots) { if (s.TypeName == ATTR_NAME) { if (dr.FindSlot(s.TypeName, s.Value, true) != null) { return(true); } foreach (Pullenti.Ner.Slot ss in dr.Slots) { if (ss.TypeName == s.TypeName) { string n0 = s.Value.ToString(); string n1 = ss.Value.ToString(); if (n0.StartsWith(n1) || n1.StartsWith(n0)) { return(true); } } } } } if (dateNotEq) { return(false); } if (IsLaw && !dateIsEqu) { return(false); } if (numEq > 0) { if (srcEq) { return(true); } if (srcNotEq && typ == Pullenti.Ner.Core.ReferentsEqualType.DifferentTexts) { return(false); } else if ((!srcNotEq && numEq > 1 && Date == null) && dr.Date == null) { return(true); } return(false); } } else if (IsLaw && dateNotEq) { return(false); } if (dateNotEq) { return(false); } string ty = Typ; if (ty == null) { return(numEq > 0); } DecreeKind t = Pullenti.Ner.Decree.Internal.DecreeToken.GetKind(ty); if (t == DecreeKind.Ustav || ty == "КОНСТИТУЦИЯ") { return(true); } if (numEq > 0) { return(true); } if (this.ToString() == obj.ToString()) { return(true); } return(false); }
// Добавляем ссылку на организацию, также добавляем имена internal void AddOrgReferent(Pullenti.Ner.Referent org) { if (org == null) { return; } bool nam = false; this.AddSlot(ATTR_REF, org, false, 0); GeoReferent geo = null; string specTyp = null; string num = org.GetStringValue("NUMBER"); foreach (Pullenti.Ner.Slot s in org.Slots) { if (s.TypeName == "NAME") { if (num == null) { this.AddName(s.Value as string); } else { this.AddName(string.Format("{0}-{1}", s.Value, num)); } nam = true; } else if (s.TypeName == "TYPE") { string v = s.Value as string; if (v == "СЕЛЬСКИЙ СОВЕТ") { this.AddTyp("сельский округ"); } else if (v == "ГОРОДСКОЙ СОВЕТ") { this.AddTyp("городской округ"); } else if (v == "ПОСЕЛКОВЫЙ СОВЕТ") { this.AddTyp("поселковый округ"); } else if (v == "аэропорт") { specTyp = v.ToUpper(); } } else if (s.TypeName == "GEO" && (s.Value is GeoReferent)) { geo = s.Value as GeoReferent; } } if (!nam) { foreach (Pullenti.Ner.Slot s in org.Slots) { if (s.TypeName == "EPONYM") { if (num == null) { this.AddName((s.Value as string).ToUpper()); } else { this.AddName(string.Format("{0}-{1}", (s.Value as string).ToUpper(), num)); } nam = true; } } } if (!nam && num != null) { foreach (Pullenti.Ner.Slot s in org.Slots) { if (s.TypeName == "TYPE") { this.AddName(string.Format("{0}-{1}", (s.Value as string).ToUpper(), num)); nam = true; } } } if (geo != null && !nam) { foreach (string n in geo.GetStringValues(ATTR_NAME)) { this.AddName(n); if (specTyp != null) { this.AddName(string.Format("{0} {1}", n, specTyp)); this.AddName(string.Format("{0} {1}", specTyp, n)); } nam = true; } } if (!nam) { this.AddName(org.ToString(true, Pullenti.Morph.MorphLang.Unknown, 0).ToUpper()); } }
public override string ToString() { return(string.Format("{0}: {1} {2}{3}", Typ.ToString(), Value ?? ((Ref == null ? "" : Ref.ToString())), AltValue ?? "", (IsInternal ? "[int]" : ""))); }
public static Pullenti.Semantic.SemObject CreateNounGroup(Pullenti.Semantic.SemGraph gr, Pullenti.Ner.Core.NounPhraseToken npt) { Pullenti.Ner.Token noun = npt.Noun.BeginToken; Pullenti.Semantic.SemObject sem = new Pullenti.Semantic.SemObject(gr); sem.Tokens.Add(npt.Noun); sem.Typ = Pullenti.Semantic.SemObjectType.Noun; if (npt.Noun.Morph.Class.IsPersonalPronoun) { sem.Typ = Pullenti.Semantic.SemObjectType.PersonalPronoun; } else if (npt.Noun.Morph.Class.IsPronoun) { sem.Typ = Pullenti.Semantic.SemObjectType.Pronoun; } if (npt.Noun.BeginToken != npt.Noun.EndToken) { sem.Morph.NormalCase = npt.Noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false); sem.Morph.NormalFull = npt.Noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false); sem.Morph.Class = Pullenti.Morph.MorphClass.Noun; sem.Morph.Number = npt.Morph.Number; sem.Morph.Gender = npt.Morph.Gender; sem.Morph.Case = npt.Morph.Case; } else if (noun is Pullenti.Ner.TextToken) { foreach (Pullenti.Morph.MorphBaseInfo wf in noun.Morph.Items) { if (wf.CheckAccord(npt.Morph, false, false) && (wf is Pullenti.Morph.MorphWordForm)) { _setMorph(sem, wf as Pullenti.Morph.MorphWordForm); break; } } if (sem.Morph.NormalCase == null) { sem.Morph.NormalCase = noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false); sem.Morph.NormalFull = noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false); } List <Pullenti.Semantic.Utils.DerivateGroup> grs = Pullenti.Semantic.Utils.DerivateService.FindDerivates(sem.Morph.NormalFull, true, null); if (grs != null && grs.Count > 0) { sem.Concept = grs[0]; } } else if (noun is Pullenti.Ner.ReferentToken) { Pullenti.Ner.Referent r = (noun as Pullenti.Ner.ReferentToken).Referent; if (r == null) { return(null); } sem.Morph.NormalFull = (sem.Morph.NormalCase = r.ToString()); sem.Concept = r; } else if (noun is Pullenti.Ner.NumberToken) { Pullenti.Ner.NumberToken num = noun as Pullenti.Ner.NumberToken; sem.Morph.Gender = noun.Morph.Gender; sem.Morph.Number = noun.Morph.Number; if (num.IntValue != null) { sem.Morph.NormalCase = Pullenti.Ner.Core.NumberHelper.GetNumberAdjective(num.IntValue.Value, noun.Morph.Gender, noun.Morph.Number); sem.Morph.NormalFull = Pullenti.Ner.Core.NumberHelper.GetNumberAdjective(num.IntValue.Value, Pullenti.Morph.MorphGender.Masculine, Pullenti.Morph.MorphNumber.Singular); } else { sem.Morph.NormalFull = (sem.Morph.NormalCase = noun.GetSourceText().ToUpper()); } } noun.Tag = sem; if (npt.Adjectives.Count > 0) { foreach (Pullenti.Ner.MetaToken a in npt.Adjectives) { if (npt.MultiNouns && a != npt.Adjectives[0]) { break; } Pullenti.Semantic.SemObject asem = CreateNptAdj(gr, npt, a); if (asem != null) { gr.AddLink(Pullenti.Semantic.SemLinkType.Detail, sem, asem, "какой", false, null); } } } if (npt.InternalNoun != null) { Pullenti.Semantic.SemObject intsem = CreateNounGroup(gr, npt.InternalNoun); if (intsem != null) { gr.AddLink(Pullenti.Semantic.SemLinkType.Detail, sem, intsem, null, false, null); } } gr.Objects.Add(sem); return(sem); }
public override string ToString() { return(Ref.ToString()); }
Pullenti.Ner.Token _addReferents(Pullenti.Ner.Core.AnalyzerData ad, Pullenti.Ner.Token t, int cur, int max) { if (!(t is Pullenti.Ner.ReferentToken)) { return(t); } Pullenti.Ner.Referent r = t.GetReferent(); if (r == null) { return(t); } if (r is Pullenti.Ner.Denomination.DenominationReferent) { Pullenti.Ner.Denomination.DenominationReferent dr = r as Pullenti.Ner.Denomination.DenominationReferent; KeywordReferent kref0 = new KeywordReferent() { Typ = KeywordType.Referent }; foreach (Pullenti.Ner.Slot s in dr.Slots) { if (s.TypeName == Pullenti.Ner.Denomination.DenominationReferent.ATTR_VALUE) { kref0.AddSlot(KeywordReferent.ATTR_NORMAL, s.Value, false, 0); } } kref0.AddSlot(KeywordReferent.ATTR_REF, dr, false, 0); Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(ad.RegisterReferent(kref0), t, t); t.Kit.EmbedToken(rt0); return(rt0); } if ((r is Pullenti.Ner.Phone.PhoneReferent) || (r is Pullenti.Ner.Uri.UriReferent) || (r is Pullenti.Ner.Bank.BankDataReferent)) { return(t); } if (r is Pullenti.Ner.Money.MoneyReferent) { Pullenti.Ner.Money.MoneyReferent mr = r as Pullenti.Ner.Money.MoneyReferent; KeywordReferent kref0 = new KeywordReferent() { Typ = KeywordType.Object }; kref0.AddSlot(KeywordReferent.ATTR_NORMAL, mr.Currency, false, 0); Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(ad.RegisterReferent(kref0), t, t); t.Kit.EmbedToken(rt0); return(rt0); } if (r.TypeName == "DATE" || r.TypeName == "DATERANGE" || r.TypeName == "BOOKLINKREF") { return(t); } for (Pullenti.Ner.Token tt = (t as Pullenti.Ner.MetaToken).BeginToken; tt != null && tt.EndChar <= t.EndChar; tt = tt.Next) { if (tt is Pullenti.Ner.ReferentToken) { this._addReferents(ad, tt, cur, max); } } KeywordReferent kref = new KeywordReferent() { Typ = KeywordType.Referent }; string norm = null; if (r.TypeName == "GEO") { norm = r.GetStringValue("ALPHA2"); } if (norm == null) { norm = r.ToString(true, null, 0); } if (norm != null) { kref.AddSlot(KeywordReferent.ATTR_NORMAL, norm.ToUpper(), false, 0); } kref.AddSlot(KeywordReferent.ATTR_REF, t.GetReferent(), false, 0); _setRank(kref, cur, max); Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(ad.RegisterReferent(kref), t, t); t.Kit.EmbedToken(rt1); return(rt1); }
static OrgItemNameToken _TryAttach(Pullenti.Ner.Token t, OrgItemNameToken prev, bool extOnto) { if (t == null) { return(null); } Pullenti.Ner.Referent r = t.GetReferent(); if (r != null) { if (r.TypeName == "DENOMINATION") { return new OrgItemNameToken(t, t) { Value = r.ToString(true, t.Kit.BaseLanguage, 0), IsDenomination = true } } ; if ((r is Pullenti.Ner.Geo.GeoReferent) && t.Chars.IsLatinLetter) { OrgItemNameToken res2 = _TryAttach(t.Next, prev, extOnto); if (res2 != null && res2.Chars.IsLatinLetter) { res2.BeginToken = t; res2.Value = string.Format("{0} {1}", Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(t as Pullenti.Ner.MetaToken, Pullenti.Ner.Core.GetTextAttr.No), res2.Value); res2.IsInDictionary = false; return(res2); } } return(null); } Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; if (tt == null) { return(null); } OrgItemNameToken res = null; Pullenti.Ner.Core.TerminToken tok = m_StdTails.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok == null && t.IsChar(',')) { tok = m_StdTails.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No); } if (tok != null) { return new OrgItemNameToken(t, tok.EndToken) { Value = tok.Termin.CanonicText, IsStdTail = tok.Termin.Tag == null, IsEmptyWord = tok.Termin.Tag != null, Morph = tok.Morph } } ; if ((((tok = m_StdNames.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No)))) != null) { return new OrgItemNameToken(t, tok.EndToken) { Value = tok.Termin.CanonicText, IsStdName = true } } ; OrgItemEngItem eng = OrgItemEngItem.TryAttach(t, false); if (eng == null && t.IsChar(',')) { eng = OrgItemEngItem.TryAttach(t.Next, false); } if (eng != null) { return new OrgItemNameToken(t, eng.EndToken) { Value = eng.FullValue, IsStdTail = true } } ; if (tt.Chars.IsAllLower && prev != null) { if (!prev.Chars.IsAllLower && !prev.Chars.IsCapitalUpper) { return(null); } } if (tt.IsChar(',') && prev != null) { Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt1 == null || npt1.Chars != prev.Chars || ((npt1.Morph.Case & prev.Morph.Case)).IsUndefined) { return(null); } OrgItemTypeToken ty = OrgItemTypeToken.TryAttach(t.Next, false, null); if (ty != null) { return(null); } if (npt1.EndToken.Next == null || !npt1.EndToken.Next.IsValue("И", null)) { return(null); } Pullenti.Ner.Token t1 = npt1.EndToken.Next; Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t1.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt2 == null || npt2.Chars != prev.Chars || ((npt2.Morph.Case & npt1.Morph.Case & prev.Morph.Case)).IsUndefined) { return(null); } ty = OrgItemTypeToken.TryAttach(t1.Next, false, null); if (ty != null) { return(null); } res = new OrgItemNameToken(npt1.BeginToken, npt1.EndToken) { Morph = npt1.Morph, Value = npt1.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false) }; res.IsNounPhrase = true; res.IsAfterConjunction = true; if (prev.Preposition != null) { res.Preposition = prev.Preposition; } return(res); } if (((tt.IsChar('&') || tt.IsValue("AND", null) || tt.IsValue("UND", null))) && prev != null) { if ((tt.Next is Pullenti.Ner.TextToken) && tt.LengthChar == 1 && tt.Next.Chars.IsLatinLetter) { res = new OrgItemNameToken(tt, tt.Next) { Chars = tt.Next.Chars }; res.IsAfterConjunction = true; res.Value = "& " + (tt.Next as Pullenti.Ner.TextToken).Term; return(res); } res = OrgItemNameToken.TryAttach(tt.Next, null, extOnto, false); if (res == null || res.Chars != prev.Chars) { return(null); } res.IsAfterConjunction = true; res.Value = "& " + res.Value; return(res); } if (!tt.Chars.IsLetter) { return(null); } List <Pullenti.Semantic.Utils.DerivateGroup> expinf = null; if (prev != null && prev.EndToken.GetMorphClassInDictionary().IsNoun) { string wo = prev.EndToken.GetNormalCaseText(Pullenti.Morph.MorphClass.Noun, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false); expinf = Pullenti.Semantic.Utils.DerivateService.FindDerivates(wo, true, prev.EndToken.Morph.Language); } Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && npt.InternalNoun != null) { npt = null; } bool explOk = false; if (npt != null && prev != null && prev.EndToken.GetMorphClassInDictionary().IsNoun) { Pullenti.Ner.Core.NounPhraseToken npt0 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(prev.EndToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt0 != null) { List <Pullenti.Semantic.Core.SemanticLink> links = Pullenti.Semantic.Core.SemanticHelper.TryCreateLinks(npt0, npt, null); if (links.Count > 0) { explOk = true; } } } if (npt != null && ((explOk || npt.Morph.Case.IsGenitive || ((prev != null && !((prev.Morph.Case & npt.Morph.Case)).IsUndefined))))) { Pullenti.Morph.MorphClass mc = npt.BeginToken.GetMorphClassInDictionary(); if (mc.IsVerb || mc.IsPronoun) { return(null); } if (mc.IsAdverb) { if (npt.BeginToken.Next != null && npt.BeginToken.Next.IsHiphen) { } else { return(null); } } if (mc.IsPreposition) { return(null); } if (mc.IsNoun && npt.Chars.IsAllLower) { Pullenti.Morph.MorphCase ca = npt.Morph.Case; if ((!ca.IsDative && !ca.IsGenitive && !ca.IsInstrumental) && !ca.IsPrepositional) { return(null); } } res = new OrgItemNameToken(npt.BeginToken, npt.EndToken) { Morph = npt.Morph, Value = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false) }; res.IsNounPhrase = true; if ((npt.EndToken.WhitespacesAfterCount < 2) && (npt.EndToken.Next is Pullenti.Ner.TextToken)) { Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(npt.EndToken.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt2 != null && npt2.Morph.Case.IsGenitive && npt2.Chars.IsAllLower) { OrgItemTypeToken typ = OrgItemTypeToken.TryAttach(npt.EndToken.Next, true, null); OrgItemEponymToken epo = OrgItemEponymToken.TryAttach(npt.EndToken.Next, false); Pullenti.Ner.ReferentToken rtt = t.Kit.ProcessReferent("PERSONPROPERTY", npt.EndToken.Next); if (typ == null && epo == null && ((rtt == null || rtt.Morph.Number == Pullenti.Morph.MorphNumber.Plural))) { res.EndToken = npt2.EndToken; res.Value = string.Format("{0} {1}", res.Value, Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(npt2, Pullenti.Ner.Core.GetTextAttr.No)); } } else if (npt.EndToken.Next.IsComma && (npt.EndToken.Next.Next is Pullenti.Ner.TextToken)) { Pullenti.Ner.Token tt2 = npt.EndToken.Next.Next; Pullenti.Morph.MorphClass mv2 = tt2.GetMorphClassInDictionary(); if (mv2.IsAdjective && mv2.IsVerb) { Pullenti.Morph.MorphBaseInfo bi = new Pullenti.Morph.MorphBaseInfo() { Case = npt.Morph.Case, Gender = npt.Morph.Gender, Number = npt.Morph.Number }; if (tt2.Morph.CheckAccord(bi, false, false)) { npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt2.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt2 != null && ((npt2.Morph.Case.IsDative || npt2.Morph.Case.IsGenitive)) && npt2.Chars.IsAllLower) { res.EndToken = npt2.EndToken; res.Value = string.Format("{0} {1}", res.Value, Pullenti.Ner.Core.MiscHelper.GetTextValue(npt.EndToken.Next, res.EndToken, Pullenti.Ner.Core.GetTextAttr.No)); } } } } } if (explOk) { res.IsAfterConjunction = true; } } else if (npt != null && ((((prev != null && prev.IsNounPhrase && npt.Morph.Case.IsInstrumental)) || extOnto))) { res = new OrgItemNameToken(npt.BeginToken, npt.EndToken) { Morph = npt.Morph, Value = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false) }; res.IsNounPhrase = true; } else if (tt.IsAnd) { res = TryAttach(tt.Next, prev, extOnto, false); if (res == null || !res.IsNounPhrase || prev == null) { return(null); } if (((prev.Morph.Case & res.Morph.Case)).IsUndefined) { return(null); } if (prev.Morph.Number != Pullenti.Morph.MorphNumber.Undefined && res.Morph.Number != Pullenti.Morph.MorphNumber.Undefined) { if (((prev.Morph.Number & res.Morph.Number)) == Pullenti.Morph.MorphNumber.Undefined) { if (prev.Chars != res.Chars) { return(null); } OrgItemTypeToken ty = OrgItemTypeToken.TryAttach(res.EndToken.Next, false, null); if (ty != null) { return(null); } } } Pullenti.Morph.CharsInfo ci = res.Chars; res.Chars = ci; res.IsAfterConjunction = true; return(res); } else if (((tt.Term == "ПО" || tt.Term == "ПРИ" || tt.Term == "ЗА") || tt.Term == "С" || tt.Term == "В") || tt.Term == "НА") { npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { if (m_VervotWords.TryParse(npt.EndToken, Pullenti.Ner.Core.TerminParseAttr.No) != null) { return(null); } bool ok = false; if (tt.Term == "ПО") { ok = npt.Morph.Case.IsDative; } else if (tt.Term == "С") { ok = npt.Morph.Case.IsInstrumental; } else if (tt.Term == "ЗА") { ok = npt.Morph.Case.IsGenitive | npt.Morph.Case.IsInstrumental; } else if (tt.Term == "НА") { ok = npt.Morph.Case.IsPrepositional; } else if (tt.Term == "В") { ok = npt.Morph.Case.IsDative | npt.Morph.Case.IsPrepositional; if (ok) { ok = false; if (t.Next.IsValue("СФЕРА", null) || t.Next.IsValue("ОБЛАСТЬ", null)) { ok = true; } } } else if (tt.Term == "ПРИ") { ok = npt.Morph.Case.IsPrepositional; if (ok) { if (OrgItemTypeToken.TryAttach(tt.Next, true, null) != null) { ok = false; } else { Pullenti.Ner.ReferentToken rt = tt.Kit.ProcessReferent(Pullenti.Ner.Org.OrganizationAnalyzer.ANALYZER_NAME, tt.Next); if (rt != null) { ok = false; } } } string s = npt.Noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false); if (s == "ПОДДЕРЖКА" || s == "УЧАСТИЕ") { ok = false; } } else { ok = npt.Morph.Case.IsPrepositional; } if (ok) { res = new OrgItemNameToken(t, npt.EndToken) { Morph = npt.Morph, Value = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false), Chars = npt.Chars }; res.IsNounPhrase = true; res.Preposition = tt.Term; if (((res.Value == "ДЕЛО" || res.Value == "ВОПРОС")) && !res.IsNewlineAfter) { OrgItemNameToken res2 = _TryAttach(res.EndToken.Next, res, extOnto); if (res2 != null && res2.Morph.Case.IsGenitive) { res.Value = string.Format("{0} {1}", res.Value, res2.Value); res.EndToken = res2.EndToken; for (Pullenti.Ner.Token ttt = res2.EndToken.Next; ttt != null; ttt = ttt.Next) { if (!ttt.IsCommaAnd) { break; } OrgItemNameToken res3 = _TryAttach(ttt.Next, res2, extOnto); if (res3 == null) { break; } res.Value = string.Format("{0} {1}", res.Value, res3.Value); res.EndToken = res3.EndToken; if (ttt.IsAnd) { break; } ttt = res.EndToken; } } } } } if (res == null) { return(null); } } else if (tt.Term == "OF") { Pullenti.Ner.Token t1 = tt.Next; if (t1 != null && Pullenti.Ner.Core.MiscHelper.IsEngArticle(t1)) { t1 = t1.Next; } if (t1 != null && t1.Chars.IsLatinLetter && !t1.Chars.IsAllLower) { res = new OrgItemNameToken(t, t1) { Chars = t1.Chars, Morph = t1.Morph }; for (Pullenti.Ner.Token ttt = t1.Next; ttt != null; ttt = ttt.Next) { if (ttt.WhitespacesBeforeCount > 2) { break; } if (Pullenti.Ner.Core.MiscHelper.IsEngAdjSuffix(ttt)) { ttt = ttt.Next; continue; } if (!ttt.Chars.IsLatinLetter) { break; } if (ttt.Morph.Class.IsPreposition) { break; } t1 = (res.EndToken = ttt); } res.Value = Pullenti.Ner.Core.MiscHelper.GetTextValue(t, t1, Pullenti.Ner.Core.GetTextAttr.IgnoreArticles); res.Preposition = tt.Term; return(res); } } if (res == null) { if (tt.Chars.IsLatinLetter && tt.LengthChar == 1) { } else if (tt.Chars.IsAllLower || (tt.LengthChar < 2)) { if (!tt.Chars.IsLatinLetter || prev == null || !prev.Chars.IsLatinLetter) { return(null); } } if (tt.Chars.IsCyrillicLetter) { Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary(); if (mc.IsVerb || mc.IsAdverb) { return(null); } } else if (tt.Chars.IsLatinLetter && !tt.IsWhitespaceAfter) { if (!tt.IsWhitespaceAfter && (tt.LengthChar < 5)) { if (tt.Next is Pullenti.Ner.NumberToken) { return(null); } } } res = new OrgItemNameToken(tt, tt) { Value = tt.Term, Morph = tt.Morph }; for (t = tt.Next; t != null; t = t.Next) { if ((((t.IsHiphen || t.IsCharOf("\\/"))) && t.Next != null && (t.Next is Pullenti.Ner.TextToken)) && !t.IsWhitespaceBefore && !t.IsWhitespaceAfter) { t = t.Next; res.EndToken = t; res.Value = string.Format("{0}{1}{2}", res.Value, (t.Previous.IsChar('.') ? '.' : '-'), (t as Pullenti.Ner.TextToken).Term); } else if (t.IsChar('.')) { if (!t.IsWhitespaceAfter && !t.IsWhitespaceBefore && (t.Next is Pullenti.Ner.TextToken)) { res.EndToken = t.Next; t = t.Next; res.Value = string.Format("{0}.{1}", res.Value, (t as Pullenti.Ner.TextToken).Term); } else if ((t.Next != null && !t.IsNewlineAfter && t.Next.Chars.IsLatinLetter) && tt.Chars.IsLatinLetter) { res.EndToken = t; } else { break; } } else { break; } } } for (Pullenti.Ner.Token t0 = res.BeginToken; t0 != null; t0 = t0.Next) { if ((((tt = t0 as Pullenti.Ner.TextToken))) != null && tt.IsLetters) { if (!tt.Morph.Class.IsConjunction && !tt.Morph.Class.IsPreposition) { foreach (Pullenti.Morph.MorphBaseInfo mf in tt.Morph.Items) { if ((mf as Pullenti.Morph.MorphWordForm).IsInDictionary) { res.IsInDictionary = true; } } } } if (t0 == res.EndToken) { break; } } if (res.BeginToken == res.EndToken && res.BeginToken.Chars.IsAllUpper) { if (res.EndToken.Next != null && !res.EndToken.IsWhitespaceAfter) { Pullenti.Ner.Token t1 = res.EndToken.Next; if (t1.Next != null && !t1.IsWhitespaceAfter && t1.IsHiphen) { t1 = t1.Next; } if (t1 is Pullenti.Ner.NumberToken) { res.Value += (t1 as Pullenti.Ner.NumberToken).Value; res.EndToken = t1; } } } if (res.BeginToken == res.EndToken && res.BeginToken.Chars.IsLastLower) { string src = res.BeginToken.GetSourceText(); for (int i = src.Length - 1; i >= 0; i--) { if (char.IsUpper(src[i])) { res.Value = src.Substring(0, i + 1); break; } } } return(res); }