/// <summary> /// Возвращает разницу номеров r2 - r1, иначе null, если номеров нет /// </summary> /// <param name="r1">первая ссылка</param> /// <param name="r2">вторая ссылка</param> public static int?GetNumberDiff(Pullenti.Ner.Referent r1, Pullenti.Ner.Referent r2) { string num1 = r1.GetStringValue(ATTR_NUMBER); string num2 = r2.GetStringValue(ATTR_NUMBER); if (num1 == null || num2 == null) { return(null); } int n1; int n2; if (!int.TryParse(num1, out n1) || !int.TryParse(num2, out n2)) { return(null); } return(n2 - n1); }
// Добавляем ссылку на организацию, также добавляем имена internal void AddOrgReferent(Pullenti.Ner.Referent org) { if (org == null) { return; } bool nam = false; this.AddSlot(ATTR_REF, org, false, 0); GeoReferent geo = null; string specTyp = null; string num = org.GetStringValue("NUMBER"); foreach (Pullenti.Ner.Slot s in org.Slots) { if (s.TypeName == "NAME") { if (num == null) { this.AddName(s.Value as string); } else { this.AddName(string.Format("{0}-{1}", s.Value, num)); } nam = true; } else if (s.TypeName == "TYPE") { string v = s.Value as string; if (v == "СЕЛЬСКИЙ СОВЕТ") { this.AddTyp("сельский округ"); } else if (v == "ГОРОДСКОЙ СОВЕТ") { this.AddTyp("городской округ"); } else if (v == "ПОСЕЛКОВЫЙ СОВЕТ") { this.AddTyp("поселковый округ"); } else if (v == "аэропорт") { specTyp = v.ToUpper(); } } else if (s.TypeName == "GEO" && (s.Value is GeoReferent)) { geo = s.Value as GeoReferent; } } if (!nam) { foreach (Pullenti.Ner.Slot s in org.Slots) { if (s.TypeName == "EPONYM") { if (num == null) { this.AddName((s.Value as string).ToUpper()); } else { this.AddName(string.Format("{0}-{1}", (s.Value as string).ToUpper(), num)); } nam = true; } } } if (!nam && num != null) { foreach (Pullenti.Ner.Slot s in org.Slots) { if (s.TypeName == "TYPE") { this.AddName(string.Format("{0}-{1}", (s.Value as string).ToUpper(), num)); nam = true; } } } if (geo != null && !nam) { foreach (string n in geo.GetStringValues(ATTR_NAME)) { this.AddName(n); if (specTyp != null) { this.AddName(string.Format("{0} {1}", n, specTyp)); this.AddName(string.Format("{0} {1}", specTyp, n)); } nam = true; } } if (!nam) { this.AddName(org.ToString(true, Pullenti.Morph.MorphLang.Unknown, 0).ToUpper()); } }
public override bool CanBeEquals(Pullenti.Ner.Referent obj, Pullenti.Ner.Core.ReferentsEqualType typ) { PersonReferent p = obj as PersonReferent; if (p == null) { return(false); } foreach (Pullenti.Ner.Slot a in Slots) { if (a.TypeName == ATTR_IDENTITY) { foreach (Pullenti.Ner.Slot aa in p.Slots) { if (aa.TypeName == a.TypeName) { if (_DelSurnameEnd(a.Value as string) == _DelSurnameEnd(aa.Value as string)) { return(true); } } } } } string nick1 = this.GetStringValue(ATTR_NICKNAME); string nick2 = obj.GetStringValue(ATTR_NICKNAME); if (nick1 != null && nick2 != null) { if (nick1 != nick2) { return(false); } } if (this.FindSlot(ATTR_LASTNAME, null, true) != null && p.FindSlot(ATTR_LASTNAME, null, true) != null) { if (!this.CompareSurnamesPers(p)) { return(false); } if (this.FindSlot(ATTR_FIRSTNAME, null, true) != null && p.FindSlot(ATTR_FIRSTNAME, null, true) != null) { if (!this.CheckNames(ATTR_FIRSTNAME, p)) { return(false); } if (this.FindSlot(ATTR_MIDDLENAME, null, true) != null && p.FindSlot(ATTR_MIDDLENAME, null, true) != null) { if (!this.CheckNames(ATTR_MIDDLENAME, p)) { return(false); } } else if (typ == Pullenti.Ner.Core.ReferentsEqualType.DifferentTexts) { if (this.FindSlot(ATTR_MIDDLENAME, null, true) != null || p.FindSlot(ATTR_MIDDLENAME, null, true) != null) { return(this.ToString() == p.ToString()); } List <string> names1 = new List <string>(); List <string> names2 = new List <string>(); foreach (Pullenti.Ner.Slot s in Slots) { if (s.TypeName == ATTR_FIRSTNAME) { string nam = s.Value.ToString(); if (!IsInitial(nam)) { names1.Add(nam); } } } foreach (Pullenti.Ner.Slot s in p.Slots) { if (s.TypeName == ATTR_FIRSTNAME) { string nam = s.Value.ToString(); if (!IsInitial(nam)) { if (names1.Contains(nam)) { return(true); } names2.Add(nam); } } } if (names1.Count == 0 && names2.Count == 0) { return(true); } return(false); } } else if (typ == Pullenti.Ner.Core.ReferentsEqualType.DifferentTexts && ((this.FindSlot(ATTR_FIRSTNAME, null, true) != null || p.FindSlot(ATTR_FIRSTNAME, null, true) != null))) { return(false); } return(true); } string tit1 = this._findShortestKingTitul(false); string tit2 = p._findShortestKingTitul(false); if (((tit1 != null || tit2 != null)) || ((nick1 != null && nick1 == nick2))) { if (tit1 == null || tit2 == null) { if (nick1 != null && nick1 == nick2) { } else { return(false); } } else if (tit1 != tit2) { if (!tit1.Contains(tit2) && !tit2.Contains(tit1)) { return(false); } } if (this.FindSlot(ATTR_FIRSTNAME, null, true) != null && p.FindSlot(ATTR_FIRSTNAME, null, true) != null) { if (!this.CheckNames(ATTR_FIRSTNAME, p)) { return(false); } return(true); } } return(false); }
Pullenti.Ner.ReferentToken TryAttach(Pullenti.Ner.Token t, bool keyWord) { if (t == null) { return(null); } Pullenti.Ner.Token t0 = t; Pullenti.Ner.Token t1 = t; List <string> urisKeys = null; List <Pullenti.Ner.Uri.UriReferent> uris = null; Pullenti.Ner.Referent org = null; Pullenti.Ner.Referent corOrg = null; bool orgIsBank = false; int empty = 0; Pullenti.Ner.Uri.UriReferent lastUri = null; for (; t != null; t = t.Next) { if (t.IsTableControlChar && t != t0) { break; } if (t.IsComma || t.Morph.Class.IsPreposition || t.IsCharOf("/\\")) { continue; } bool bankKeyword = false; if (t.IsValue("ПОЛНЫЙ", null) && t.Next != null && ((t.Next.IsValue("НАИМЕНОВАНИЕ", null) || t.Next.IsValue("НАЗВАНИЕ", null)))) { t = t.Next.Next; if (t == null) { break; } } if (t.IsValue("БАНК", null)) { if ((t is Pullenti.Ner.ReferentToken) && t.GetReferent().TypeName == "ORGANIZATION") { bankKeyword = true; } Pullenti.Ner.Token tt = t.Next; Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { tt = npt.EndToken.Next; } if (tt != null && tt.IsChar(':')) { tt = tt.Next; } if (tt != null) { if (!bankKeyword) { t = tt; bankKeyword = true; } else if (tt.GetReferent() != null && tt.GetReferent().TypeName == "ORGANIZATION") { t = tt; } } } Pullenti.Ner.Referent r = t.GetReferent(); if (r != null && r.TypeName == "ORGANIZATION") { bool isBank = false; int kk = 0; for (Pullenti.Ner.Referent rr = r; rr != null && (kk < 4); rr = rr.ParentReferent, kk++) { isBank = string.Compare(rr.GetStringValue("KIND") ?? "", "Bank", true) == 0; if (isBank) { break; } } if (!isBank && bankKeyword) { isBank = true; } if (!isBank && uris != null && urisKeys.Contains("ИНН")) { return(null); } if ((lastUri != null && lastUri.Scheme == "К/С" && t.Previous != null) && t.Previous.IsValue("В", null)) { corOrg = r; t1 = t; } else if (org == null || ((!orgIsBank && isBank))) { org = r; t1 = t; orgIsBank = isBank; if (isBank) { continue; } } if (uris == null && !keyWord) { return(null); } continue; } if (r is Pullenti.Ner.Uri.UriReferent) { Pullenti.Ner.Uri.UriReferent u = r as Pullenti.Ner.Uri.UriReferent; if (uris == null) { if (!_isBankReq(u.Scheme)) { return(null); } if (u.Scheme == "ИНН" && t.IsNewlineAfter) { return(null); } uris = new List <Pullenti.Ner.Uri.UriReferent>(); urisKeys = new List <string>(); } else { if (!_isBankReq(u.Scheme)) { break; } if (urisKeys.Contains(u.Scheme)) { break; } if (u.Scheme == "ИНН") { if (empty > 0) { break; } } } urisKeys.Add(u.Scheme); uris.Add(u); lastUri = u; t1 = t; empty = 0; continue; } else if (uris == null && !keyWord && !orgIsBank) { return(null); } if (r != null && ((r.TypeName == "GEO" || r.TypeName == "ADDRESS"))) { empty++; continue; } if (t is Pullenti.Ner.TextToken) { if (t.IsValue("ПОЛНЫЙ", null) || t.IsValue("НАИМЕНОВАНИЕ", null) || t.IsValue("НАЗВАНИЕ", null)) { } else if (t.Chars.IsLetter) { Pullenti.Ner.Core.TerminToken tok = m_Ontology.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { t = tok.EndToken; empty = 0; } else { empty++; if (t.IsNewlineBefore) { Pullenti.Ner.Core.NounPhraseToken nnn = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (nnn != null && nnn.EndToken.Next != null && nnn.EndToken.Next.IsChar(':')) { break; } } } if (uris == null) { break; } } } if (empty > 2) { break; } if (empty > 0 && t.IsChar(':') && t.IsNewlineAfter) { break; } if (((t is Pullenti.Ner.NumberToken) && t.IsNewlineBefore && t.Next != null) && !t.Next.Chars.IsLetter) { break; } } if (uris == null) { return(null); } if (!urisKeys.Contains("Р/С") && !urisKeys.Contains("Л/С")) { return(null); } bool ok = false; if ((uris.Count < 2) && org == null) { return(null); } BankDataReferent bdr = new BankDataReferent(); foreach (Pullenti.Ner.Uri.UriReferent u in uris) { bdr.AddSlot(BankDataReferent.ATTR_ITEM, u, false, 0); } if (org != null) { bdr.AddSlot(BankDataReferent.ATTR_BANK, org, false, 0); } if (corOrg != null) { bdr.AddSlot(BankDataReferent.ATTR_CORBANK, corOrg, false, 0); } Pullenti.Ner.Referent org0 = (t0.Previous == null ? null : t0.Previous.GetReferent()); if (org0 != null && org0.TypeName == "ORGANIZATION") { foreach (Pullenti.Ner.Slot s in org0.Slots) { if (s.Value is Pullenti.Ner.Uri.UriReferent) { Pullenti.Ner.Uri.UriReferent u = s.Value as Pullenti.Ner.Uri.UriReferent; if (_isBankReq(u.Scheme)) { if (!urisKeys.Contains(u.Scheme)) { bdr.AddSlot(BankDataReferent.ATTR_ITEM, u, false, 0); } } } } } return(new Pullenti.Ner.ReferentToken(bdr, t0, t1)); }
static Pullenti.Ner.Core.NumberExToken _correctMoney(Pullenti.Ner.Core.NumberExToken res, Pullenti.Ner.Token t1) { if (t1 == null) { return(null); } List <Pullenti.Ner.Core.TerminToken> toks = m_Postfixes.TryParseAll(t1, Pullenti.Ner.Core.TerminParseAttr.No); if (toks == null || toks.Count == 0) { return(null); } Pullenti.Ner.Token tt = toks[0].EndToken.Next; Pullenti.Ner.Referent r = (tt == null ? null : tt.GetReferent()); string alpha2 = null; if (r != null && r.TypeName == "GEO") { alpha2 = r.GetStringValue("ALPHA2"); } if (alpha2 != null && toks.Count > 0) { for (int i = toks.Count - 1; i >= 0; i--) { if (!toks[i].Termin.CanonicText.StartsWith(alpha2)) { toks.RemoveAt(i); } } if (toks.Count == 0) { toks = m_Postfixes.TryParseAll(t1, Pullenti.Ner.Core.TerminParseAttr.No); } } if (toks.Count > 1) { alpha2 = null; string str = toks[0].Termin.Terms[0].CanonicalText; if (str == "РУБЛЬ" || str == "RUBLE") { alpha2 = "RU"; } else if (str == "ДОЛЛАР" || str == "ДОЛАР" || str == "DOLLAR") { alpha2 = "US"; } else if (str == "ФУНТ" || str == "POUND") { alpha2 = "UK"; } if (alpha2 != null) { for (int i = toks.Count - 1; i >= 0; i--) { if (!toks[i].Termin.CanonicText.StartsWith(alpha2) && toks[i].Termin.CanonicText != "GBP") { toks.RemoveAt(i); } } } alpha2 = null; } if (toks.Count < 1) { return(null); } res.ExTypParam = toks[0].Termin.CanonicText; if (alpha2 != null && tt != null) { res.EndToken = tt; } tt = res.EndToken.Next; if (tt != null && tt.IsCommaAnd) { tt = tt.Next; } if ((tt is Pullenti.Ner.NumberToken) && tt.Next != null && (tt.WhitespacesAfterCount < 4)) { Pullenti.Ner.Token tt1 = tt.Next; if ((tt1 != null && tt1.IsChar('(') && (tt1.Next is Pullenti.Ner.NumberToken)) && tt1.Next.Next != null && tt1.Next.Next.IsChar(')')) { if ((tt as Pullenti.Ner.NumberToken).Value == (tt1.Next as Pullenti.Ner.NumberToken).Value) { tt1 = tt1.Next.Next.Next; } } Pullenti.Ner.Core.TerminToken tok = m_SmallMoney.TryParse(tt1, Pullenti.Ner.Core.TerminParseAttr.No); if (tok == null && tt1 != null && tt1.IsChar(')')) { tok = m_SmallMoney.TryParse(tt1.Next, Pullenti.Ner.Core.TerminParseAttr.No); } if (tok != null && (tt as Pullenti.Ner.NumberToken).IntValue != null) { int max = (int)tok.Termin.Tag; int val = (tt as Pullenti.Ner.NumberToken).IntValue.Value; if (val < max) { double f = (double)val; f /= max; double f0 = res.RealValue - ((long)res.RealValue); int re0 = (int)(((f0 * 100) + 0.0001)); if (re0 > 0 && val != re0) { res.AltRestMoney = val; } else if (f0 == 0) { res.RealValue += f; } f0 = res.AltRealValue - ((long)res.AltRealValue); re0 = (int)(((f0 * 100) + 0.0001)); if (re0 > 0 && val != re0) { res.AltRestMoney = val; } else if (f0 == 0) { res.AltRealValue += f; } res.EndToken = tok.EndToken; } } } else if ((tt is Pullenti.Ner.TextToken) && tt.IsValue("НОЛЬ", null)) { Pullenti.Ner.Core.TerminToken tok = m_SmallMoney.TryParse(tt.Next, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { res.EndToken = tok.EndToken; } } return(res); }
internal static Pullenti.Ner.ReferentToken CreateReferentToken(Pullenti.Ner.Person.PersonReferent p, Pullenti.Ner.Token begin, Pullenti.Ner.Token end, Pullenti.Ner.MorphCollection morph, List <PersonAttrToken> attrs, Pullenti.Ner.Person.PersonAnalyzer.PersonAnalyzerData ad, bool forAttribute, bool afterBePredicate) { if (p == null) { return(null); } bool hasPrefix = false; if (attrs != null) { foreach (PersonAttrToken a in attrs) { if (a.Typ == PersonAttrTerminType.BestRegards) { hasPrefix = true; } else { if (a.BeginChar < begin.BeginChar) { begin = a.BeginToken; if ((a.EndToken.Next != null && a.EndToken.Next.IsChar(')') && begin.Previous != null) && begin.Previous.IsChar('(')) { begin = begin.Previous; } } if (a.Typ != PersonAttrTerminType.Prefix) { if (a.Age != null) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_AGE, a.Age, false, 0); } if (a.PropRef == null) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, a.Value, false, 0); } else { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, a, false, 0); } } else if (a.Gender == Pullenti.Morph.MorphGender.Feminie && !p.IsFemale) { p.IsFemale = true; } else if (a.Gender == Pullenti.Morph.MorphGender.Masculine && !p.IsMale) { p.IsMale = true; } } } } else if ((begin.Previous is Pullenti.Ner.TextToken) && (begin.WhitespacesBeforeCount < 3)) { if ((begin.Previous as Pullenti.Ner.TextToken).Term == "ИП") { PersonAttrToken a = new PersonAttrToken(begin.Previous, begin.Previous); a.PropRef = new Pullenti.Ner.Person.PersonPropertyReferent(); a.PropRef.Name = "индивидуальный предприниматель"; p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, a, false, 0); begin = begin.Previous; } } Pullenti.Ner.MorphCollection m0 = new Pullenti.Ner.MorphCollection(); foreach (Pullenti.Morph.MorphBaseInfo it in morph.Items) { Pullenti.Morph.MorphBaseInfo bi = new Pullenti.Morph.MorphBaseInfo(); bi.CopyFrom(it); bi.Number = Pullenti.Morph.MorphNumber.Singular; if (bi.Gender == Pullenti.Morph.MorphGender.Undefined) { if (p.IsMale && !p.IsFemale) { bi.Gender = Pullenti.Morph.MorphGender.Masculine; } if (!p.IsMale && p.IsFemale) { bi.Gender = Pullenti.Morph.MorphGender.Feminie; } } m0.AddItem(bi); } morph = m0; if ((attrs != null && attrs.Count > 0 && !attrs[0].Morph.Case.IsUndefined) && morph.Case.IsUndefined) { morph.Case = attrs[0].Morph.Case; if (attrs[0].Morph.Number == Pullenti.Morph.MorphNumber.Singular) { morph.Number = Pullenti.Morph.MorphNumber.Singular; } if (p.IsMale && !p.IsFemale) { morph.Gender = Pullenti.Morph.MorphGender.Masculine; } else if (p.IsFemale) { morph.Gender = Pullenti.Morph.MorphGender.Feminie; } } if (begin.Previous != null) { Pullenti.Ner.Token ttt = begin.Previous; if (ttt.IsValue("ИМЕНИ", "ІМЕНІ")) { forAttribute = true; } else { if (ttt.IsChar('.') && ttt.Previous != null) { ttt = ttt.Previous; } if (ttt.WhitespacesAfterCount < 3) { if (ttt.IsValue("ИМ", "ІМ")) { forAttribute = true; } } } } if (forAttribute) { return new Pullenti.Ner.ReferentToken(p, begin, end) { Morph = morph, MiscAttrs = (int)p.m_PersonIdentityTyp } } ; if ((begin.Previous != null && begin.Previous.IsCommaAnd && (begin.Previous.Previous is Pullenti.Ner.ReferentToken)) && (begin.Previous.Previous.GetReferent() is Pullenti.Ner.Person.PersonReferent)) { Pullenti.Ner.ReferentToken rt00 = begin.Previous.Previous as Pullenti.Ner.ReferentToken; for (Pullenti.Ner.Token ttt = (Pullenti.Ner.Token)rt00; ttt != null;) { if (ttt.Previous == null || !(ttt.Previous.Previous is Pullenti.Ner.ReferentToken)) { break; } if (!ttt.Previous.IsCommaAnd || !(ttt.Previous.Previous.GetReferent() is Pullenti.Ner.Person.PersonReferent)) { break; } rt00 = ttt.Previous.Previous as Pullenti.Ner.ReferentToken; ttt = rt00; } if (rt00.BeginToken.GetReferent() is Pullenti.Ner.Person.PersonPropertyReferent) { bool ok = false; if ((rt00.BeginToken as Pullenti.Ner.ReferentToken).EndToken.Next != null && (rt00.BeginToken as Pullenti.Ner.ReferentToken).EndToken.Next.IsChar(':')) { ok = true; } else if (rt00.BeginToken.Morph.Number == Pullenti.Morph.MorphNumber.Plural) { ok = true; } if (ok) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, rt00.BeginToken.GetReferent(), false, 0); } } } if (ad != null) { if (ad.OverflowLevel > 10) { return new Pullenti.Ner.ReferentToken(p, begin, end) { Morph = morph, MiscAttrs = (int)p.m_PersonIdentityTyp } } ; ad.OverflowLevel++; } List <PersonAttrToken> attrs1 = null; bool hasPosition = false; bool openBr = false; for (Pullenti.Ner.Token t = end.Next; t != null; t = t.Next) { if (t.IsTableControlChar) { break; } if (t.IsNewlineBefore) { if (t.NewlinesBeforeCount > 2) { break; } if (attrs1 != null && attrs1.Count > 0) { break; } Pullenti.Ner.Mail.Internal.MailLine ml = Pullenti.Ner.Mail.Internal.MailLine.Parse(t, 0, 0); if (ml != null && ml.Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From) { break; } if (t.Chars.IsCapitalUpper) { PersonAttrToken attr1 = PersonAttrToken.TryAttach(t, (ad == null ? null : ad.LocalOntology), PersonAttrToken.PersonAttrAttachAttrs.No); bool ok1 = false; if (attr1 != null) { if (hasPrefix || attr1.IsNewlineAfter || ((attr1.EndToken.Next != null && attr1.EndToken.Next.IsTableControlChar))) { ok1 = true; } else { for (Pullenti.Ner.Token tt2 = t.Next; tt2 != null && tt2.EndChar <= attr1.EndChar; tt2 = tt2.Next) { if (tt2.IsWhitespaceBefore) { ok1 = true; } } } } else { Pullenti.Ner.Token ttt = CorrectTailAttributes(p, t); if (ttt != null && ttt != t) { end = (t = ttt); continue; } } if (!ok1) { break; } } } if (t.IsHiphen || t.IsCharOf("_>|")) { continue; } if (t.IsValue("МОДЕЛЬ", null)) { break; } Pullenti.Ner.Token tt = CorrectTailAttributes(p, t); if (tt != t && tt != null) { end = (t = tt); continue; } bool isBe = false; if (t.IsChar('(') && t == end.Next) { openBr = true; t = t.Next; if (t == null) { break; } PersonItemToken pit1 = PersonItemToken.TryAttach(t, null, PersonItemToken.ParseAttr.No, null); if ((pit1 != null && t.Chars.IsCapitalUpper && pit1.EndToken.Next != null) && (t is Pullenti.Ner.TextToken) && pit1.EndToken.Next.IsChar(')')) { if (pit1.Lastname != null) { Pullenti.Morph.MorphBaseInfo inf = new Pullenti.Morph.MorphBaseInfo() { Case = Pullenti.Morph.MorphCase.Nominative }; if (p.IsMale) { inf.Gender |= Pullenti.Morph.MorphGender.Masculine; } if (p.IsFemale) { inf.Gender |= Pullenti.Morph.MorphGender.Feminie; } PersonMorphCollection sur = PersonIdentityToken.CreateLastname(pit1, inf); if (sur != null) { p.AddFioIdentity(sur, null, null); end = (t = pit1.EndToken.Next); continue; } } } if ((t is Pullenti.Ner.TextToken) && t.Chars.IsLatinLetter) { List <PersonItemToken> pits = PersonItemToken.TryAttachList(t, null, PersonItemToken.ParseAttr.CanBeLatin, 10); if (((pits != null && pits.Count >= 2 && pits.Count <= 3) && pits[0].Chars.IsLatinLetter && pits[1].Chars.IsLatinLetter) && pits[pits.Count - 1].EndToken.Next != null && pits[pits.Count - 1].EndToken.Next.IsChar(')')) { Pullenti.Ner.Person.PersonReferent pr2 = new Pullenti.Ner.Person.PersonReferent(); int cou = 0; foreach (PersonItemToken pi in pits) { foreach (Pullenti.Ner.Slot si in p.Slots) { if (si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_FIRSTNAME || si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_MIDDLENAME || si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_LASTNAME) { if (Pullenti.Ner.Core.MiscHelper.CanBeEqualCyrAndLatSS(si.Value.ToString(), pi.Value)) { cou++; pr2.AddSlot(si.TypeName, pi.Value, false, 0); break; } } } } if (cou == pits.Count) { foreach (Pullenti.Ner.Slot si in pr2.Slots) { p.AddSlot(si.TypeName, si.Value, false, 0); } end = (t = pits[pits.Count - 1].EndToken.Next); continue; } } } } else if (t.IsComma) { t = t.Next; if ((t is Pullenti.Ner.TextToken) && (t as Pullenti.Ner.TextToken).IsValue("WHO", null)) { continue; } if ((t is Pullenti.Ner.TextToken) && t.Chars.IsLatinLetter) { List <PersonItemToken> pits = PersonItemToken.TryAttachList(t, null, PersonItemToken.ParseAttr.CanBeLatin, 10); if ((pits != null && pits.Count >= 2 && pits.Count <= 3) && pits[0].Chars.IsLatinLetter && pits[1].Chars.IsLatinLetter) { Pullenti.Ner.Person.PersonReferent pr2 = new Pullenti.Ner.Person.PersonReferent(); int cou = 0; foreach (PersonItemToken pi in pits) { foreach (Pullenti.Ner.Slot si in p.Slots) { if (si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_FIRSTNAME || si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_MIDDLENAME || si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_LASTNAME) { if (Pullenti.Ner.Core.MiscHelper.CanBeEqualCyrAndLatSS(si.Value.ToString(), pi.Value)) { cou++; pr2.AddSlot(si.TypeName, pi.Value, false, 0); break; } } } } if (cou == pits.Count) { foreach (Pullenti.Ner.Slot si in pr2.Slots) { p.AddSlot(si.TypeName, si.Value, false, 0); } end = (t = pits[pits.Count - 1].EndToken); continue; } } } } else if ((t is Pullenti.Ner.TextToken) && (t as Pullenti.Ner.TextToken).IsVerbBe) { t = t.Next; } else if (t.IsAnd && t.IsWhitespaceAfter && !t.IsNewlineAfter) { if (t == end.Next) { break; } t = t.Next; } else if (t.IsHiphen && t == end.Next) { t = t.Next; } else if (t.IsChar('.') && t == end.Next && hasPrefix) { t = t.Next; } Pullenti.Ner.Token ttt2 = CreateNickname(p, t); if (ttt2 != null) { t = (end = ttt2); continue; } if (t == null) { break; } PersonAttrToken attr = null; attr = PersonAttrToken.TryAttach(t, (ad == null ? null : ad.LocalOntology), PersonAttrToken.PersonAttrAttachAttrs.No); if (attr == null) { if ((t != null && t.GetReferent() != null && t.GetReferent().TypeName == "GEO") && attrs1 != null && openBr) { continue; } if ((t.Chars.IsCapitalUpper && openBr && t.Next != null) && t.Next.IsChar(')')) { if (p.FindSlot(Pullenti.Ner.Person.PersonReferent.ATTR_LASTNAME, null, true) == null) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_LASTNAME, t.GetSourceText().ToUpper(), false, 0); t = t.Next; end = t; } } if (t != null && t.IsValue("КОТОРЫЙ", null) && t.Morph.Number == Pullenti.Morph.MorphNumber.Singular) { if (!p.IsFemale && t.Morph.Gender == Pullenti.Morph.MorphGender.Feminie) { p.IsFemale = true; p.CorrectData(); } else if (!p.IsMale && t.Morph.Gender == Pullenti.Morph.MorphGender.Masculine) { p.IsMale = true; p.CorrectData(); } } break; } if (attr.Morph.Number == Pullenti.Morph.MorphNumber.Plural) { break; } if (attr.Typ == PersonAttrTerminType.BestRegards) { break; } if (attr.IsDoubt) { if (hasPrefix) { } else if (t.IsNewlineBefore && attr.IsNewlineAfter) { } else if (t.Previous != null && ((t.Previous.IsHiphen || t.Previous.IsChar(':')))) { } else { break; } } if (!morph.Case.IsUndefined && !attr.Morph.Case.IsUndefined) { if (((morph.Case & attr.Morph.Case)).IsUndefined && !isBe) { break; } } if (openBr) { if (Pullenti.Ner.Person.PersonAnalyzer.TryAttachPerson(t, ad, false, 0, true) != null) { break; } } if (attrs1 == null) { if (t.Previous.IsComma && t.Previous == end.Next) { Pullenti.Ner.Token ttt = attr.EndToken.Next; if (ttt != null) { if (ttt.Morph.Class.IsVerb) { if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(begin)) { } else { break; } } } } attrs1 = new List <PersonAttrToken>(); } attrs1.Add(attr); if (attr.Typ == PersonAttrTerminType.Position || attr.Typ == PersonAttrTerminType.King) { if (!isBe) { hasPosition = true; } } else if (attr.Typ != PersonAttrTerminType.Prefix) { if (attr.Typ == PersonAttrTerminType.Other && attr.Age != null) { } else { attrs1 = null; break; } } t = attr.EndToken; } if (attrs1 != null && hasPosition && attrs != null) { Pullenti.Ner.Token te1 = attrs[attrs.Count - 1].EndToken.Next; Pullenti.Ner.Token te2 = attrs1[0].BeginToken; if (te1.WhitespacesAfterCount > te2.WhitespacesBeforeCount && (te2.WhitespacesBeforeCount < 2)) { } else if (attrs1[0].Age != null) { } else if (((te1.IsHiphen || te1.IsChar(':'))) && !attrs1[0].IsNewlineBefore && ((te2.Previous.IsComma || te2.Previous == end))) { } else { foreach (PersonAttrToken a in attrs) { if (a.Typ == PersonAttrTerminType.Position) { Pullenti.Ner.Token te = attrs1[attrs1.Count - 1].EndToken; if (te.Next != null) { if (!te.Next.IsChar('.')) { attrs1 = null; break; } } } } } } if (attrs1 != null && !hasPrefix) { PersonAttrToken attr = attrs1[attrs1.Count - 1]; bool ok = false; if (attr.EndToken.Next != null && attr.EndToken.Next.Chars.IsCapitalUpper) { ok = true; } else { Pullenti.Ner.ReferentToken rt = Pullenti.Ner.Person.PersonAnalyzer.TryAttachPerson(attr.BeginToken, ad, false, -1, false); if (rt != null && (rt.Referent is Pullenti.Ner.Person.PersonReferent)) { ok = true; } } if (ok) { if (attr.BeginToken.WhitespacesBeforeCount > attr.EndToken.WhitespacesAfterCount) { attrs1 = null; } else if (attr.BeginToken.WhitespacesBeforeCount == attr.EndToken.WhitespacesAfterCount) { Pullenti.Ner.ReferentToken rt1 = Pullenti.Ner.Person.PersonAnalyzer.TryAttachPerson(attr.BeginToken, ad, false, -1, false); if (rt1 != null) { attrs1 = null; } } } } if (attrs1 != null) { foreach (PersonAttrToken a in attrs1) { if (a.Typ != PersonAttrTerminType.Prefix) { if (a.Age != null) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_AGE, a.Age, true, 0); } else if (a.PropRef == null) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, a.Value, false, 0); } else { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, a, false, 0); } end = a.EndToken; if (a.Gender != Pullenti.Morph.MorphGender.Undefined && !p.IsFemale && !p.IsMale) { if (a.Gender == Pullenti.Morph.MorphGender.Masculine && !p.IsMale) { p.IsMale = true; p.CorrectData(); } else if (a.Gender == Pullenti.Morph.MorphGender.Feminie && !p.IsFemale) { p.IsFemale = true; p.CorrectData(); } } } } if (openBr) { if (end.Next != null && end.Next.IsChar(')')) { end = end.Next; } } } int crlfCou = 0; for (Pullenti.Ner.Token t = end.Next; t != null; t = t.Next) { if (t.IsTableControlChar) { break; } if (t.IsNewlineBefore) { Pullenti.Ner.Mail.Internal.MailLine ml = Pullenti.Ner.Mail.Internal.MailLine.Parse(t, 0, 0); if (ml != null && ml.Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From) { break; } crlfCou++; } if (t.IsCharOf(":,(") || t.IsHiphen) { continue; } if (t.IsChar('.') && t == end.Next) { continue; } Pullenti.Ner.Referent r = t.GetReferent(); if (r != null) { if (r.TypeName == "PHONE" || r.TypeName == "URI" || r.TypeName == "ADDRESS") { string ty = r.GetStringValue("SCHEME"); if (r.TypeName == "URI") { if ((ty != "mailto" && ty != "skype" && ty != "ICQ") && ty != "http") { break; } } p.AddContact(r); end = t; crlfCou = 0; continue; } } if (r is Pullenti.Ner.Person.PersonIdentityReferent) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_IDDOC, r, false, 0); end = t; crlfCou = 0; continue; } if (r != null && r.TypeName == "ORGANIZATION") { if (t.Next != null && t.Next.Morph.Class.IsVerb) { break; } if (begin.Previous != null && begin.Previous.Morph.Class.IsVerb) { break; } if (t.WhitespacesAfterCount == 1) { break; } bool exist = false; foreach (Pullenti.Ner.Slot s in p.Slots) { if (s.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_ATTR && (s.Value is Pullenti.Ner.Person.PersonPropertyReferent)) { Pullenti.Ner.Person.PersonPropertyReferent pr = s.Value as Pullenti.Ner.Person.PersonPropertyReferent; if (pr.FindSlot(Pullenti.Ner.Person.PersonPropertyReferent.ATTR_REF, r, true) != null) { exist = true; break; } } else if (s.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_ATTR && (s.Value is PersonAttrToken)) { PersonAttrToken pr = s.Value as PersonAttrToken; if (pr.Referent.FindSlot(Pullenti.Ner.Person.PersonPropertyReferent.ATTR_REF, r, true) != null) { exist = true; break; } } } if (!exist) { PersonAttrToken pat = new PersonAttrToken(t, t); pat.PropRef = new Pullenti.Ner.Person.PersonPropertyReferent() { Name = "сотрудник" }; pat.PropRef.AddSlot(Pullenti.Ner.Person.PersonPropertyReferent.ATTR_REF, r, false, 0); p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, pat, false, 0); } continue; } if (r != null) { break; } if (!hasPrefix || crlfCou >= 2) { break; } Pullenti.Ner.ReferentToken rt = t.Kit.ProcessReferent("PERSON", t); if (rt != null) { break; } } if (ad != null) { ad.OverflowLevel--; } if (begin.IsValue("НА", null) && begin.Next != null && begin.Next.IsValue("ИМЯ", null)) { Pullenti.Ner.Token t0 = begin.Previous; if (t0 != null && t0.IsComma) { t0 = t0.Previous; } if (t0 != null && (t0.GetReferent() is Pullenti.Ner.Person.PersonIdentityReferent)) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_IDDOC, t0.GetReferent(), false, 0); } } return(new Pullenti.Ner.ReferentToken(p, begin, end) { Morph = morph, MiscAttrs = (int)p.m_PersonIdentityTyp }); }
Pullenti.Ner.Token _addReferents(Pullenti.Ner.Core.AnalyzerData ad, Pullenti.Ner.Token t, int cur, int max) { if (!(t is Pullenti.Ner.ReferentToken)) { return(t); } Pullenti.Ner.Referent r = t.GetReferent(); if (r == null) { return(t); } if (r is Pullenti.Ner.Denomination.DenominationReferent) { Pullenti.Ner.Denomination.DenominationReferent dr = r as Pullenti.Ner.Denomination.DenominationReferent; KeywordReferent kref0 = new KeywordReferent() { Typ = KeywordType.Referent }; foreach (Pullenti.Ner.Slot s in dr.Slots) { if (s.TypeName == Pullenti.Ner.Denomination.DenominationReferent.ATTR_VALUE) { kref0.AddSlot(KeywordReferent.ATTR_NORMAL, s.Value, false, 0); } } kref0.AddSlot(KeywordReferent.ATTR_REF, dr, false, 0); Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(ad.RegisterReferent(kref0), t, t); t.Kit.EmbedToken(rt0); return(rt0); } if ((r is Pullenti.Ner.Phone.PhoneReferent) || (r is Pullenti.Ner.Uri.UriReferent) || (r is Pullenti.Ner.Bank.BankDataReferent)) { return(t); } if (r is Pullenti.Ner.Money.MoneyReferent) { Pullenti.Ner.Money.MoneyReferent mr = r as Pullenti.Ner.Money.MoneyReferent; KeywordReferent kref0 = new KeywordReferent() { Typ = KeywordType.Object }; kref0.AddSlot(KeywordReferent.ATTR_NORMAL, mr.Currency, false, 0); Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(ad.RegisterReferent(kref0), t, t); t.Kit.EmbedToken(rt0); return(rt0); } if (r.TypeName == "DATE" || r.TypeName == "DATERANGE" || r.TypeName == "BOOKLINKREF") { return(t); } for (Pullenti.Ner.Token tt = (t as Pullenti.Ner.MetaToken).BeginToken; tt != null && tt.EndChar <= t.EndChar; tt = tt.Next) { if (tt is Pullenti.Ner.ReferentToken) { this._addReferents(ad, tt, cur, max); } } KeywordReferent kref = new KeywordReferent() { Typ = KeywordType.Referent }; string norm = null; if (r.TypeName == "GEO") { norm = r.GetStringValue("ALPHA2"); } if (norm == null) { norm = r.ToString(true, null, 0); } if (norm != null) { kref.AddSlot(KeywordReferent.ATTR_NORMAL, norm.ToUpper(), false, 0); } kref.AddSlot(KeywordReferent.ATTR_REF, t.GetReferent(), false, 0); _setRank(kref, cur, max); Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(ad.RegisterReferent(kref), t, t); t.Kit.EmbedToken(rt1); return(rt1); }