public static Pullenti.Ner.ReferentToken TryAttachOrg(Pullenti.Ner.Token t, bool canBeCyr = false) { if (t == null) { return(null); } bool br = false; if (t.IsChar('(') && t.Next != null) { t = t.Next; br = true; } if (t is Pullenti.Ner.NumberToken) { if ((t as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Words && t.Morph.Class.IsAdjective && t.Chars.IsCapitalUpper) { } else { return(null); } } else { if (t.Chars.IsAllLower) { return(null); } if ((t.LengthChar < 3) && !t.Chars.IsLetter) { return(null); } if (!t.Chars.IsLatinLetter) { if (!canBeCyr || !t.Chars.IsCyrillicLetter) { return(null); } } } Pullenti.Ner.Token t0 = t; Pullenti.Ner.Token t1 = t0; int namWo = 0; OrgItemEngItem tok = null; Pullenti.Ner.Geo.GeoReferent geo = null; OrgItemTypeToken addTyp = null; for (; t != null; t = t.Next) { if (t != t0 && t.WhitespacesBeforeCount > 1) { break; } if (t.IsChar(')')) { break; } if (t.IsChar('(') && t.Next != null) { if ((t.Next.GetReferent() is Pullenti.Ner.Geo.GeoReferent) && t.Next.Next != null && t.Next.Next.IsChar(')')) { geo = t.Next.GetReferent() as Pullenti.Ner.Geo.GeoReferent; t = t.Next.Next; continue; } OrgItemTypeToken typ = OrgItemTypeToken.TryAttach(t.Next, true, null); if ((typ != null && typ.EndToken.Next != null && typ.EndToken.Next.IsChar(')')) && typ.Chars.IsLatinLetter) { addTyp = typ; t = typ.EndToken.Next; continue; } if (((t.Next is Pullenti.Ner.TextToken) && t.Next.Next != null && t.Next.Next.IsChar(')')) && t.Next.Chars.IsCapitalUpper) { t1 = (t = t.Next.Next); continue; } break; } tok = TryAttach(t, canBeCyr); if (tok == null && t.IsCharOf(".,") && t.Next != null) { tok = TryAttach(t.Next, canBeCyr); if (tok == null && t.Next.IsCharOf(",.")) { tok = TryAttach(t.Next.Next, canBeCyr); } } if (tok != null) { if (tok.LengthChar == 1 && t0.Chars.IsCyrillicLetter) { return(null); } break; } if (t.IsHiphen && !t.IsWhitespaceAfter && !t.IsWhitespaceBefore) { continue; } if (t.IsCharOf("&+") || t.IsAnd) { continue; } if (t.IsChar('.')) { if (t.Previous != null && t.Previous.LengthChar == 1) { continue; } else if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t.Next)) { break; } } if (!t.Chars.IsLatinLetter) { if (!canBeCyr || !t.Chars.IsCyrillicLetter) { break; } } if (t.Chars.IsAllLower) { if (t.Morph.Class.IsPreposition || t.Morph.Class.IsConjunction) { continue; } if (br) { continue; } break; } Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary(); if (mc.IsVerb) { if (t.Next != null && t.Next.Morph.Class.IsPreposition) { break; } } if (t.Next != null && t.Next.IsValue("OF", null)) { break; } if (t is Pullenti.Ner.TextToken) { namWo++; } t1 = t; } if (tok == null) { return(null); } if (t0 == tok.BeginToken) { Pullenti.Ner.Core.BracketSequenceToken br2 = Pullenti.Ner.Core.BracketHelper.TryParse(tok.EndToken.Next, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br2 != null) { Pullenti.Ner.Org.OrganizationReferent org1 = new Pullenti.Ner.Org.OrganizationReferent(); if (tok.ShortValue != null) { org1.AddTypeStr(tok.ShortValue); } org1.AddTypeStr(tok.FullValue); string nam1 = Pullenti.Ner.Core.MiscHelper.GetTextValue(br2.BeginToken, br2.EndToken, Pullenti.Ner.Core.GetTextAttr.No); if (nam1 != null) { org1.AddName(nam1, true, null); return(new Pullenti.Ner.ReferentToken(org1, t0, br2.EndToken)); } } return(null); } Pullenti.Ner.Org.OrganizationReferent org = new Pullenti.Ner.Org.OrganizationReferent(); Pullenti.Ner.Token te = tok.EndToken; if (tok.IsBank) { t1 = tok.EndToken; } if (tok.FullValue == "company" && (tok.WhitespacesAfterCount < 3)) { OrgItemEngItem tok1 = TryAttach(tok.EndToken.Next, canBeCyr); if (tok1 != null) { t1 = tok.EndToken; tok = tok1; te = tok.EndToken; } } if (tok.FullValue == "company") { if (namWo == 0) { return(null); } } string nam = Pullenti.Ner.Core.MiscHelper.GetTextValue(t0, t1, Pullenti.Ner.Core.GetTextAttr.IgnoreArticles); if (nam == "STOCK" && tok.FullValue == "company") { return(null); } string altNam = null; if (string.IsNullOrEmpty(nam)) { return(null); } if (nam.IndexOf('(') > 0) { int i1 = nam.IndexOf('('); int i2 = nam.IndexOf(')'); if (i1 < i2) { altNam = nam; string tai = null; if ((i2 + 1) < nam.Length) { tai = nam.Substring(i2).Trim(); } nam = nam.Substring(0, i1).Trim(); if (tai != null) { nam = string.Format("{0} {1}", nam, tai); } } } if (tok.IsBank) { org.AddTypeStr((tok.Kit.BaseLanguage.IsEn ? "bank" : "банк")); org.AddProfile(Pullenti.Ner.Org.OrgProfile.Finance); if ((t1.Next != null && t1.Next.IsValue("OF", null) && t1.Next.Next != null) && t1.Next.Next.Chars.IsLatinLetter) { OrgItemNameToken nam0 = OrgItemNameToken.TryAttach(t1.Next, null, false, false); if (nam0 != null) { te = nam0.EndToken; } else { te = t1.Next.Next; } nam = Pullenti.Ner.Core.MiscHelper.GetTextValue(t0, te, Pullenti.Ner.Core.GetTextAttr.No); if (te.GetReferent() is Pullenti.Ner.Geo.GeoReferent) { org.AddGeoObject(te.GetReferent() as Pullenti.Ner.Geo.GeoReferent); } } else if (t0 == t1) { return(null); } } else { if (tok.ShortValue != null) { org.AddTypeStr(tok.ShortValue); } org.AddTypeStr(tok.FullValue); } if (string.IsNullOrEmpty(nam)) { return(null); } org.AddName(nam, true, null); if (altNam != null) { org.AddName(altNam, true, null); } Pullenti.Ner.ReferentToken res = new Pullenti.Ner.ReferentToken(org, t0, te); t = te; while (t.Next != null) { if (t.Next.IsCharOf(",.")) { t = t.Next; } else { break; } } if (t.WhitespacesAfterCount < 2) { tok = TryAttach(t.Next, canBeCyr); if (tok != null) { if (tok.ShortValue != null) { org.AddTypeStr(tok.ShortValue); } org.AddTypeStr(tok.FullValue); res.EndToken = tok.EndToken; } } if (geo != null) { org.AddGeoObject(geo); } if (addTyp != null) { org.AddType(addTyp, false); } if (!br) { return(res); } t = res.EndToken; if (t.Next == null || t.Next.IsChar(')')) { res.EndToken = t.Next; } else { return(null); } return(res); }
internal static bool CanBeHigherToken(Pullenti.Ner.Token rhi, Pullenti.Ner.Token rlo) { if (rhi == null || rlo == null) { return(false); } if (rhi.Morph.Case.IsInstrumental && !rhi.Morph.Case.IsGenitive) { return(false); } Pullenti.Ner.Geo.GeoReferent hi = rhi.GetReferent() as Pullenti.Ner.Geo.GeoReferent; Pullenti.Ner.Geo.GeoReferent lo = rlo.GetReferent() as Pullenti.Ner.Geo.GeoReferent; if (hi == null || lo == null) { return(false); } bool citiInReg = false; if (hi.IsCity && lo.IsRegion) { if (hi.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, "город", true) != null || hi.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, "місто", true) != null || hi.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, "city", true) != null) { string s = _getTypesString(lo); if (((s.Contains("район") || s.Contains("административный округ") || s.Contains("муниципальный округ")) || s.Contains("адміністративний округ") || s.Contains("муніципальний округ")) || lo.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, "округ", true) != null) { if (rhi.Next == rlo && rlo.Morph.Case.IsGenitive) { citiInReg = true; } } } } if (hi.IsRegion && lo.IsCity) { if (lo.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, "город", true) != null || lo.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, "місто", true) != null || lo.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, "city", true) != null) { string s = _getTypesString(hi); if (s == "район;") { if (hi.Higher != null && hi.Higher.IsRegion) { citiInReg = true; } else if (rhi.EndChar <= rlo.BeginChar && rhi.Next.IsComma && !rlo.Morph.Case.IsGenitive) { citiInReg = true; } else if (rhi.EndChar <= rlo.BeginChar && rhi.Next.IsComma) { citiInReg = true; } } } else { citiInReg = true; } } if (rhi.EndChar <= rlo.BeginChar) { if (!rhi.Morph.Class.IsAdjective) { if (hi.IsState && !rhi.Chars.IsLatinLetter) { return(false); } } if (rhi.IsNewlineAfter || rlo.IsNewlineBefore) { if (!citiInReg) { return(false); } } } else { } if (rlo.Previous != null && rlo.Previous.Morph.Class.IsPreposition) { if (rlo.Previous.Morph.Language.IsUa) { if ((rlo.Previous.IsValue("У", null) && !rlo.Morph.Case.IsDative && !rlo.Morph.Case.IsPrepositional) && !rlo.Morph.Case.IsUndefined) { return(false); } if (rlo.Previous.IsValue("З", null) && !rlo.Morph.Case.IsGenitive && !rlo.Morph.Case.IsUndefined) { return(false); } } else { if ((rlo.Previous.IsValue("В", null) && !rlo.Morph.Case.IsDative && !rlo.Morph.Case.IsPrepositional) && !rlo.Morph.Case.IsUndefined) { return(false); } if (rlo.Previous.IsValue("ИЗ", null) && !rlo.Morph.Case.IsGenitive && !rlo.Morph.Case.IsUndefined) { return(false); } } } if (!CanBeHigher(hi, lo)) { return(citiInReg); } return(true); }
static PersonIdToken TryParse(Pullenti.Ner.Token t, PersonIdToken prev) { if (t.IsValue("СВИДЕТЕЛЬСТВО", null)) { Pullenti.Ner.Token tt1 = t; bool ip = false; bool reg = false; for (Pullenti.Ner.Token tt = t.Next; tt != null; tt = tt.Next) { if (tt.IsCommaAnd || tt.Morph.Class.IsPreposition) { continue; } if (tt.IsValue("РЕГИСТРАЦИЯ", null) || tt.IsValue("РЕЕСТР", null) || tt.IsValue("ЗАРЕГИСТРИРОВАТЬ", null)) { reg = true; tt1 = tt; } else if (tt.IsValue("ИНДИВИДУАЛЬНЫЙ", null) || tt.IsValue("ИП", null)) { ip = true; tt1 = tt; } else if ((tt.IsValue("ВНЕСЕНИЕ", null) || tt.IsValue("ГОСУДАРСТВЕННЫЙ", null) || tt.IsValue("ЕДИНЫЙ", null)) || tt.IsValue("ЗАПИСЬ", null) || tt.IsValue("ПРЕДПРИНИМАТЕЛЬ", null)) { tt1 = tt; } else if (tt.GetReferent() != null && tt.GetReferent().TypeName == "DATERANGE") { tt1 = tt; } else { break; } } if (reg && ip) { return new PersonIdToken(t, tt1) { Typ = Typs.Keyword, Value = "СВИДЕТЕЛЬСТВО О ГОСУДАРСТВЕННОЙ РЕГИСТРАЦИИ ФИЗИЧЕСКОГО ЛИЦА В КАЧЕСТВЕ ИНДИВИДУАЛЬНОГО ПРЕДПРИНИМАТЕЛЯ" } } ; } Pullenti.Ner.Core.TerminToken tok = m_Ontology.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { Typs ty = (Typs)tok.Termin.Tag; PersonIdToken res = new PersonIdToken(tok.BeginToken, tok.EndToken) { Typ = ty, Value = tok.Termin.CanonicText }; if (prev == null) { if (ty != Typs.Keyword) { return(null); } for (t = tok.EndToken.Next; t != null; t = t.Next) { Pullenti.Ner.Referent r = t.GetReferent(); if (r != null && (r is Pullenti.Ner.Geo.GeoReferent)) { res.Referent = r; res.EndToken = t; continue; } if (t.IsValue("ГРАЖДАНИН", null) && t.Next != null && (t.Next.GetReferent() is Pullenti.Ner.Geo.GeoReferent)) { res.Referent = t.Next.GetReferent(); t = (res.EndToken = t.Next); continue; } if (r != null) { break; } PersonAttrToken ait = PersonAttrToken.TryAttach(t, null, PersonAttrToken.PersonAttrAttachAttrs.No); if (ait != null) { if (ait.Referent != null) { foreach (Pullenti.Ner.Slot s in ait.Referent.Slots) { if (s.TypeName == Pullenti.Ner.Person.PersonPropertyReferent.ATTR_REF && (s.Value is Pullenti.Ner.Geo.GeoReferent)) { res.Referent = s.Value as Pullenti.Ner.Referent; } } } res.EndToken = ait.EndToken; break; } if (t.IsValue("ДАННЫЙ", null)) { res.EndToken = t; continue; } break; } if ((res.Referent is Pullenti.Ner.Geo.GeoReferent) && !(res.Referent as Pullenti.Ner.Geo.GeoReferent).IsState) { res.Referent = null; } return(res); } if (ty == Typs.Number) { StringBuilder tmp = new StringBuilder(); Pullenti.Ner.Token tt = tok.EndToken.Next; if (tt != null && tt.IsChar(':')) { tt = tt.Next; } for (; tt != null; tt = tt.Next) { if (tt.IsNewlineBefore) { break; } if (!(tt is Pullenti.Ner.NumberToken)) { break; } tmp.Append(tt.GetSourceText()); res.EndToken = tt; } if (tmp.Length < 1) { return(null); } res.Value = tmp.ToString(); res.HasPrefix = true; return(res); } if (ty == Typs.Seria) { StringBuilder tmp = new StringBuilder(); Pullenti.Ner.Token tt = tok.EndToken.Next; if (tt != null && tt.IsChar(':')) { tt = tt.Next; } bool nextNum = false; for (; tt != null; tt = tt.Next) { if (tt.IsNewlineBefore) { break; } if (Pullenti.Ner.Core.MiscHelper.CheckNumberPrefix(tt) != null) { nextNum = true; break; } if (!(tt is Pullenti.Ner.NumberToken)) { if (!(tt is Pullenti.Ner.TextToken)) { break; } if (!tt.Chars.IsAllUpper) { break; } Pullenti.Ner.NumberToken nu = Pullenti.Ner.Core.NumberHelper.TryParseRoman(tt); if (nu != null) { tmp.Append(nu.GetSourceText()); tt = nu.EndToken; } else if (tt.LengthChar != 2) { break; } else { tmp.Append((tt as Pullenti.Ner.TextToken).Term); res.EndToken = tt; } if (tt.Next != null && tt.Next.IsHiphen) { tt = tt.Next; } continue; } if (tmp.Length >= 4) { break; } tmp.Append(tt.GetSourceText()); res.EndToken = tt; } if (tmp.Length < 4) { if (tmp.Length < 2) { return(null); } Pullenti.Ner.Token tt1 = res.EndToken.Next; if (tt1 != null && tt1.IsComma) { tt1 = tt1.Next; } PersonIdToken next = TryParse(tt1, res); if (next != null && next.Typ == Typs.Number) { } else { return(null); } } res.Value = tmp.ToString(); res.HasPrefix = true; return(res); } if (ty == Typs.Code) { for (Pullenti.Ner.Token tt = res.EndToken.Next; tt != null; tt = tt.Next) { if (tt.IsCharOf(":") || tt.IsHiphen) { continue; } if (tt is Pullenti.Ner.NumberToken) { res.EndToken = tt; continue; } break; } } if (ty == Typs.Address) { if (t.GetReferent() is Pullenti.Ner.Address.AddressReferent) { res.Referent = t.GetReferent(); res.EndToken = t; return(res); } for (Pullenti.Ner.Token tt = res.EndToken.Next; tt != null; tt = tt.Next) { if (tt.IsCharOf(":") || tt.IsHiphen || tt.Morph.Class.IsPreposition) { continue; } if (tt.GetReferent() is Pullenti.Ner.Address.AddressReferent) { res.Referent = tt.GetReferent(); res.EndToken = tt; } break; } if (res.Referent == null) { return(null); } } return(res); } else if (prev == null) { return(null); } Pullenti.Ner.Token t0 = t; Pullenti.Ner.Token t1 = Pullenti.Ner.Core.MiscHelper.CheckNumberPrefix(t0); if (t1 != null) { t = t1; } if (t is Pullenti.Ner.NumberToken) { StringBuilder tmp = new StringBuilder(); PersonIdToken res = new PersonIdToken(t0, t) { Typ = Typs.Number }; for (Pullenti.Ner.Token tt = t; tt != null; tt = tt.Next) { if (tt.IsNewlineBefore || !(tt is Pullenti.Ner.NumberToken)) { break; } tmp.Append(tt.GetSourceText()); res.EndToken = tt; } if (tmp.Length < 4) { if (tmp.Length < 2) { return(null); } if (prev == null || prev.Typ != Typs.Keyword) { return(null); } PersonIdToken ne = TryParse(res.EndToken.Next, prev); if (ne != null && ne.Typ == Typs.Number) { res.Typ = Typs.Seria; } else { return(null); } } res.Value = tmp.ToString(); if (t0 != t) { res.HasPrefix = true; } return(res); } if (t is Pullenti.Ner.ReferentToken) { Pullenti.Ner.Referent r = t.GetReferent(); if (r != null) { if (r.TypeName == "DATE") { return new PersonIdToken(t, t) { Typ = Typs.Date, Referent = r } } ; if (r.TypeName == "ORGANIZATION") { return new PersonIdToken(t, t) { Typ = Typs.Org, Referent = r } } ; if (r.TypeName == "ADDRESS") { return new PersonIdToken(t, t) { Typ = Typs.Address, Referent = r } } ; } } if ((prev != null && prev.Typ == Typs.Keyword && (t is Pullenti.Ner.TextToken)) && !t.Chars.IsAllLower && t.Chars.IsLetter) { PersonIdToken rr = TryParse(t.Next, prev); if (rr != null && rr.Typ == Typs.Number) { return new PersonIdToken(t, t) { Typ = Typs.Seria, Value = (t as Pullenti.Ner.TextToken).Term } } ; } if ((t != null && t.IsValue("ОТ", "ВІД") && (t.Next is Pullenti.Ner.ReferentToken)) && t.Next.GetReferent().TypeName == "DATE") { return new PersonIdToken(t, t.Next) { Typ = Typs.Date, Referent = t.Next.GetReferent() } } ; return(null); }
public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { PhoneAnalizerData ad = kit.GetAnalyzerData(this) as PhoneAnalizerData; for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { List<Pullenti.Ner.Phone.Internal.PhoneItemToken> pli = Pullenti.Ner.Phone.Internal.PhoneItemToken.TryAttachAll(t, 15); if (pli == null || pli.Count == 0) continue; PhoneReferent prevPhone = null; int kkk = 0; for (Pullenti.Ner.Token tt = t.Previous; tt != null; tt = tt.Previous) { if (tt.GetReferent() is PhoneReferent) { prevPhone = tt.GetReferent() as PhoneReferent; break; } else if (tt is Pullenti.Ner.ReferentToken) { } else if (tt.IsChar(')')) { Pullenti.Ner.Token ttt = tt.Previous; int cou = 0; for (; ttt != null; ttt = ttt.Previous) { if (ttt.IsChar('(')) break; else if ((++cou) > 100) break; } if (ttt == null || !ttt.IsChar('(')) break; tt = ttt; } else if (!tt.IsCharOf(",;/\\") && !tt.IsAnd) { if ((++kkk) > 5) break; if (tt.IsNewlineBefore || tt.IsNewlineAfter) break; } } int j = 0; bool isPhoneBefore = false; bool isPref = false; PhoneKind ki = PhoneKind.Undefined; while (j < pli.Count) { if (pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Prefix) { if (ki == PhoneKind.Undefined) ki = pli[j].Kind; isPref = true; isPhoneBefore = true; j++; if ((j < pli.Count) && pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Delim) j++; } else if (((j + 1) < pli.Count) && pli[j + 1].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Prefix && j == 0) { if (ki == PhoneKind.Undefined) ki = pli[0].Kind; isPref = true; pli.RemoveAt(0); } else break; } if (prevPhone != null) isPhoneBefore = true; if (pli.Count == 1 && pli[0].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Number) { Pullenti.Ner.Token tt = t.Previous; if ((tt is Pullenti.Ner.TextToken) && !tt.Chars.IsLetter) tt = tt.Previous; if (tt is Pullenti.Ner.TextToken) { if (Pullenti.Ner.Uri.UriAnalyzer.m_Schemes.TryParse(tt, Pullenti.Ner.Core.TerminParseAttr.No) != null) continue; } } List<Pullenti.Ner.ReferentToken> rts = this.TryAttach(pli, j, isPhoneBefore, prevPhone); if (rts == null) { for (j = 1; j < pli.Count; j++) { if (pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Prefix) { pli.RemoveRange(0, j); rts = this.TryAttach(pli, 1, true, prevPhone); break; } } } if (rts == null) t = pli[pli.Count - 1].EndToken; else { if ((ki == PhoneKind.Undefined && prevPhone != null && !isPref) && prevPhone.Kind != PhoneKind.Mobile && kkk == 0) ki = prevPhone.Kind; foreach (Pullenti.Ner.ReferentToken rt in rts) { PhoneReferent ph = rt.Referent as PhoneReferent; if (ki != PhoneKind.Undefined) ph.Kind = ki; else { if (rt == rts[0] && (rt.WhitespacesBeforeCount < 3)) { Pullenti.Ner.Token tt1 = rt.BeginToken.Previous; if (tt1 != null && tt1.IsTableControlChar) tt1 = tt1.Previous; if ((tt1 is Pullenti.Ner.TextToken) && ((tt1.IsNewlineBefore || ((tt1.Previous != null && tt1.Previous.IsTableControlChar))))) { string term = (tt1 as Pullenti.Ner.TextToken).Term; if (term == "T" || term == "Т") rt.BeginToken = tt1; else if (term == "Ф" || term == "F") { ph.Kind = (ki = PhoneKind.Fax); rt.BeginToken = tt1; } else if (term == "M" || term == "М") { ph.Kind = (ki = PhoneKind.Mobile); rt.BeginToken = tt1; } } } ph.Correct(); } rt.Referent = ad.RegisterReferent(rt.Referent); kit.EmbedToken(rt); t = rt; } } } }
static BookLinkToken _tryParse(Pullenti.Ner.Token t, int lev) { if (t == null || lev > 3) { return(null); } if (t.IsChar('[')) { BookLinkToken re = _tryParse(t.Next, lev + 1); if (re != null && re.EndToken.Next != null && re.EndToken.Next.IsChar(']')) { re.BeginToken = t; re.EndToken = re.EndToken.Next; return(re); } if (re != null && re.EndToken.IsChar(']')) { re.BeginToken = t; return(re); } if (re != null) { if (re.Typ == BookLinkTyp.Sostavitel || re.Typ == BookLinkTyp.Editors) { return(re); } } Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { if ((br.EndToken.Previous is Pullenti.Ner.NumberToken) && (br.LengthChar < 30)) { return new BookLinkToken(t, br.EndToken) { Typ = BookLinkTyp.Number, Value = Pullenti.Ner.Core.MiscHelper.GetTextValue(br.BeginToken.Next, br.EndToken.Previous, Pullenti.Ner.Core.GetTextAttr.No) } } ; } } Pullenti.Ner.Token t0 = t; if (t is Pullenti.Ner.ReferentToken) { if (t.GetReferent() is Pullenti.Ner.Person.PersonReferent) { return(TryParseAuthor(t, Pullenti.Ner.Person.Internal.FioTemplateType.Undefined)); } if (t.GetReferent() is Pullenti.Ner.Geo.GeoReferent) { return new BookLinkToken(t, t) { Typ = BookLinkTyp.Geo, Ref = t.GetReferent() } } ; if (t.GetReferent() is Pullenti.Ner.Date.DateReferent) { Pullenti.Ner.Date.DateReferent dr = t.GetReferent() as Pullenti.Ner.Date.DateReferent; if (dr.Slots.Count == 1 && dr.Year > 0) { return new BookLinkToken(t, t) { Typ = BookLinkTyp.Year, Value = dr.Year.ToString() } } ; if (dr.Year > 0 && t.Previous != null && t.Previous.IsComma) { return new BookLinkToken(t, t) { Typ = BookLinkTyp.Year, Value = dr.Year.ToString() } } ; } if (t.GetReferent() is Pullenti.Ner.Org.OrganizationReferent) { Pullenti.Ner.Org.OrganizationReferent org = t.GetReferent() as Pullenti.Ner.Org.OrganizationReferent; if (org.Kind == Pullenti.Ner.Org.OrganizationKind.Press) { return new BookLinkToken(t, t) { Typ = BookLinkTyp.Press, Ref = org } } ; } if (t.GetReferent() is Pullenti.Ner.Uri.UriReferent) { Pullenti.Ner.Uri.UriReferent uri = t.GetReferent() as Pullenti.Ner.Uri.UriReferent; if ((uri.Scheme == "http" || uri.Scheme == "https" || uri.Scheme == "ftp") || uri.Scheme == null) { return new BookLinkToken(t, t) { Typ = BookLinkTyp.Url, Ref = uri } } ; } } Pullenti.Ner.Core.TerminToken tok = m_Termins.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { BookLinkTyp typ = (BookLinkTyp)tok.Termin.Tag; bool ok = true; if (typ == BookLinkTyp.Type || typ == BookLinkTyp.NameTail || typ == BookLinkTyp.ElectronRes) { if (t.Previous != null && ((t.Previous.IsCharOf(".:[") || t.Previous.IsHiphen))) { } else { ok = false; } } if (ok) { return new BookLinkToken(t, tok.EndToken) { Typ = typ, Value = tok.Termin.CanonicText } } ; if (typ == BookLinkTyp.ElectronRes) { for (Pullenti.Ner.Token tt = tok.EndToken.Next; tt != null; tt = tt.Next) { if ((tt is Pullenti.Ner.TextToken) && !tt.Chars.IsLetter) { continue; } if (tt.GetReferent() is Pullenti.Ner.Uri.UriReferent) { return new BookLinkToken(t, tt) { Typ = BookLinkTyp.ElectronRes, Ref = tt.GetReferent() } } ; break; } } } if (t.IsChar('/')) { BookLinkToken res = new BookLinkToken(t, t) { Typ = BookLinkTyp.Delimeter, Value = "/" }; if (t.Next != null && t.Next.IsChar('/')) { res.EndToken = t.Next; res.Value = "//"; } if (!t.IsWhitespaceBefore && !t.IsWhitespaceAfter) { int coo = 3; bool no = true; for (Pullenti.Ner.Token tt = t.Next; tt != null && coo > 0; tt = tt.Next, coo--) { BookLinkToken vvv = TryParse(tt, lev + 1); if (vvv != null && vvv.Typ != BookLinkTyp.Number) { no = false; break; } } if (no) { return(null); } } return(res); } if ((t is Pullenti.Ner.NumberToken) && (t as Pullenti.Ner.NumberToken).IntValue != null && (t as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Digit) { BookLinkToken res = new BookLinkToken(t, t) { Typ = BookLinkTyp.Number, Value = (t as Pullenti.Ner.NumberToken).Value.ToString() }; int val = (t as Pullenti.Ner.NumberToken).IntValue.Value; if (val >= 1930 && (val < 2030)) { res.Typ = BookLinkTyp.Year; } if (t.Next != null && t.Next.IsChar('.')) { res.EndToken = t.Next; } else if ((t.Next != null && t.Next.LengthChar == 1 && !t.Next.Chars.IsLetter) && t.Next.IsWhitespaceAfter) { res.EndToken = t.Next; } else if (t.Next is Pullenti.Ner.TextToken) { string term = (t.Next as Pullenti.Ner.TextToken).Term; if (((term == "СТР" || term == "C" || term == "С") || term == "P" || term == "S") || term == "PAGES") { res.EndToken = t.Next; res.Typ = BookLinkTyp.Pages; res.Value = (t as Pullenti.Ner.NumberToken).Value.ToString(); } } return(res); } if (t is Pullenti.Ner.TextToken) { string term = (t as Pullenti.Ner.TextToken).Term; if (((((((term == "СТР" || term == "C" || term == "С") || term == "ТОМ" || term == "T") || term == "Т" || term == "P") || term == "PP" || term == "V") || term == "VOL" || term == "S") || term == "СТОР" || t.IsValue("PAGE", null)) || t.IsValue("СТРАНИЦА", "СТОРІНКА")) { Pullenti.Ner.Token tt = t.Next; while (tt != null) { if (tt.IsCharOf(".:~")) { tt = tt.Next; } else { break; } } if (tt is Pullenti.Ner.NumberToken) { BookLinkToken res = new BookLinkToken(t, tt) { Typ = BookLinkTyp.PageRange }; Pullenti.Ner.Token tt0 = tt; Pullenti.Ner.Token tt1 = tt; for (tt = tt.Next; tt != null; tt = tt.Next) { if (tt.IsCharOf(",") || tt.IsHiphen) { if (tt.Next is Pullenti.Ner.NumberToken) { tt = tt.Next; res.EndToken = tt; tt1 = tt; continue; } } break; } res.Value = Pullenti.Ner.Core.MiscHelper.GetTextValue(tt0, tt1, Pullenti.Ner.Core.GetTextAttr.No); return(res); } } if ((term == "M" || term == "М" || term == "СПБ") || term == "K" || term == "К") { if (t.Next != null && t.Next.IsCharOf(":;")) { BookLinkToken re = new BookLinkToken(t, t.Next) { Typ = BookLinkTyp.Geo }; return(re); } if (t.Next != null && t.Next.IsCharOf(".")) { BookLinkToken res = new BookLinkToken(t, t.Next) { Typ = BookLinkTyp.Geo }; if (t.Next.Next != null && t.Next.Next.IsCharOf(":;")) { res.EndToken = t.Next.Next; } else if (t.Next.Next != null && (t.Next.Next is Pullenti.Ner.NumberToken)) { } else if (t.Next.Next != null && t.Next.Next.IsComma && (t.Next.Next.Next is Pullenti.Ner.NumberToken)) { } else { return(null); } return(res); } } if (term == "ПЕР" || term == "ПЕРЕВ" || term == "ПЕРЕВОД") { Pullenti.Ner.Token tt = t; if (tt.Next != null && tt.Next.IsChar('.')) { tt = tt.Next; } if (tt.Next != null && ((tt.Next.IsValue("C", null) || tt.Next.IsValue("С", null)))) { tt = tt.Next; if (tt.Next == null || tt.WhitespacesAfterCount > 2) { return(null); } BookLinkToken re = new BookLinkToken(t, tt.Next) { Typ = BookLinkTyp.Translate }; return(re); } } if (term == "ТАМ" || term == "ТАМЖЕ") { BookLinkToken res = new BookLinkToken(t, t) { Typ = BookLinkTyp.Tamze }; if (t.Next != null && t.Next.IsValue("ЖЕ", null)) { res.EndToken = t.Next; } return(res); } if (((term == "СМ" || term == "CM" || term == "НАПР") || term == "НАПРИМЕР" || term == "SEE") || term == "ПОДРОБНЕЕ" || term == "ПОДРОБНО") { BookLinkToken res = new BookLinkToken(t, t) { Typ = BookLinkTyp.See }; for (t = t.Next; t != null; t = t.Next) { if (t.IsCharOf(".:") || t.IsValue("ALSO", null)) { res.EndToken = t; continue; } if (t.IsValue("В", null) || t.IsValue("IN", null)) { res.EndToken = t; continue; } BookLinkToken vvv = _tryParse(t, lev + 1); if (vvv != null && vvv.Typ == BookLinkTyp.See) { res.EndToken = vvv.EndToken; break; } break; } return(res); } if (term == "БОЛЕЕ") { BookLinkToken vvv = _tryParse(t.Next, lev + 1); if (vvv != null && vvv.Typ == BookLinkTyp.See) { vvv.BeginToken = t; return(vvv); } } Pullenti.Ner.Token no = Pullenti.Ner.Core.MiscHelper.CheckNumberPrefix(t); if (no is Pullenti.Ner.NumberToken) { return new BookLinkToken(t, no) { Typ = BookLinkTyp.N } } ; if (((term == "B" || term == "В")) && (t.Next is Pullenti.Ner.NumberToken) && (t.Next.Next is Pullenti.Ner.TextToken)) { string term2 = (t.Next.Next as Pullenti.Ner.TextToken).Term; if (((term2 == "Т" || term2 == "T" || term2.StartsWith("ТОМ")) || term2 == "TT" || term2 == "ТТ") || term2 == "КН" || term2.StartsWith("КНИГ")) { return new BookLinkToken(t, t.Next.Next) { Typ = BookLinkTyp.Volume } } ; } } if (t.IsChar('(')) { if (((t.Next is Pullenti.Ner.NumberToken) && (t.Next as Pullenti.Ner.NumberToken).IntValue != null && t.Next.Next != null) && t.Next.Next.IsChar(')')) { int num = (t.Next as Pullenti.Ner.NumberToken).IntValue.Value; if (num > 1900 && num <= 2040) { if (num <= DateTime.Now.Year) { return new BookLinkToken(t, t.Next.Next) { Typ = BookLinkTyp.Year, Value = num.ToString() } } ; } } if (((t.Next is Pullenti.Ner.ReferentToken) && (t.Next.GetReferent() is Pullenti.Ner.Date.DateReferent) && t.Next.Next != null) && t.Next.Next.IsChar(')')) { int num = (t.Next.GetReferent() as Pullenti.Ner.Date.DateReferent).Year; if (num > 0) { return new BookLinkToken(t, t.Next.Next) { Typ = BookLinkTyp.Year, Value = num.ToString() } } ; } } return(null); }
public static List <Line> Parse(Pullenti.Ner.Token t0, int maxLines, int maxChars, int maxEndChar) { List <Line> res = new List <Line>(); int totalChars = 0; for (Pullenti.Ner.Token t = t0; t != null; t = t.Next) { if (maxEndChar > 0) { if (t.BeginChar > maxEndChar) { break; } } Pullenti.Ner.Token t1; for (t1 = t; t1 != null && t1.Next != null; t1 = t1.Next) { if (t1.IsNewlineAfter) { if (t1.Next == null || Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t1.Next)) { break; } } if (t1 == t && t.IsNewlineBefore && (t.GetReferent() is Pullenti.Ner.Person.PersonReferent)) { if (t1.Next == null) { continue; } if ((t1.Next is Pullenti.Ner.TextToken) && t1.Next.Chars.IsLetter && !t1.Next.Chars.IsAllLower) { break; } } } if (t1 == null) { t1 = t; } TitleItemToken tit = TitleItemToken.TryAttach(t); if (tit != null) { if (tit.Typ == TitleItemToken.Types.Keywords) { break; } } Pullenti.Ner.Core.Internal.BlockTitleToken bl = Pullenti.Ner.Core.Internal.BlockTitleToken.TryAttach(t, false, null); if (bl != null) { if (bl.Typ != Pullenti.Ner.Core.Internal.BlkTyps.Undefined) { break; } } Line l = new Line(t, t1); res.Add(l); totalChars += l.CharsCount; if (res.Count >= maxLines || totalChars >= maxChars) { break; } t = t1; } return(res); }
static Pullenti.Ner.ReferentToken _tryNameExist(List <CityItemToken> li, out Pullenti.Ner.Core.IntOntologyItem oi, bool always) { oi = null; if (li == null || li[0].Typ != CityItemToken.ItemType.City) { return(null); } oi = li[0].OntoItem; Pullenti.Ner.TextToken tt = li[0].BeginToken as Pullenti.Ner.TextToken; if (tt == null) { return(null); } bool ok = false; string nam = (oi == null ? li[0].Value : oi.CanonicText); if (nam == null) { return(null); } if (nam == "РИМ") { if (tt.Term == "РИМ") { if ((tt.Next is Pullenti.Ner.TextToken) && tt.Next.GetMorphClassInDictionary().IsProperSecname) { } else { ok = true; } } else if (tt.Previous != null && tt.Previous.IsValue("В", null) && tt.Term == "РИМЕ") { ok = true; } } else if (oi != null && oi.Referent != null && oi.Owner.IsExtOntology) { ok = true; } else if (nam.EndsWith("ГРАД") || nam.EndsWith("СК")) { ok = true; } else if (nam.EndsWith("TOWN") || nam.StartsWith("SAN")) { ok = true; } else if (li[0].Chars.IsLatinLetter && li[0].BeginToken.Previous != null && ((li[0].BeginToken.Previous.IsValue("IN", null) || li[0].BeginToken.Previous.IsValue("FROM", null)))) { ok = true; } else { for (Pullenti.Ner.Token tt2 = li[0].EndToken.Next; tt2 != null; tt2 = tt2.Next) { if (tt2.IsNewlineBefore) { break; } if ((tt2.IsCharOf(",(") || tt2.Morph.Class.IsPreposition || tt2.Morph.Class.IsConjunction) || tt2.Morph.Class.IsMisc) { continue; } if ((tt2.GetReferent() is Pullenti.Ner.Geo.GeoReferent) && tt2.Chars.IsCyrillicLetter == li[0].Chars.IsCyrillicLetter) { ok = true; } break; } if (!ok) { for (Pullenti.Ner.Token tt2 = li[0].BeginToken.Previous; tt2 != null; tt2 = tt2.Previous) { if (tt2.IsNewlineAfter) { break; } if ((tt2.IsCharOf(",)") || tt2.Morph.Class.IsPreposition || tt2.Morph.Class.IsConjunction) || tt2.Morph.Class.IsMisc) { continue; } if ((tt2.GetReferent() is Pullenti.Ner.Geo.GeoReferent) && tt2.Chars.IsCyrillicLetter == li[0].Chars.IsCyrillicLetter) { ok = true; } if (ok) { List <Pullenti.Ner.Address.Internal.StreetItemToken> sits = Pullenti.Ner.Address.Internal.StreetItemToken.TryParseList(li[0].BeginToken, null, 10); if (sits != null && sits.Count > 1) { Pullenti.Ner.Address.Internal.AddressItemToken ss = Pullenti.Ner.Address.Internal.StreetDefineHelper.TryParseStreet(sits, false, false); if (ss != null) { sits.RemoveAt(0); if (Pullenti.Ner.Address.Internal.StreetDefineHelper.TryParseStreet(sits, false, false) == null) { ok = false; } } } } if (ok) { if (li.Count > 1 && li[1].Typ == CityItemToken.ItemType.ProperName && (li[1].WhitespacesBeforeCount < 3)) { ok = false; } else { Pullenti.Morph.MorphClass mc = li[0].BeginToken.GetMorphClassInDictionary(); if (mc.IsProperName || mc.IsProperSurname || mc.IsAdjective) { ok = false; } else { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(li[0].BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && npt.EndChar > li[0].EndChar) { ok = false; } } } } if (Pullenti.Ner.Address.Internal.AddressItemToken.TryAttachOrg(li[0].BeginToken) != null) { ok = false; break; } break; } } } if (always) { if (li[0].WhitespacesBeforeCount > 3 && li[0].Doubtful && li[0].BeginToken.GetMorphClassInDictionary().IsProperSurname) { Pullenti.Ner.ReferentToken pp = li[0].Kit.ProcessReferent("PERSON", li[0].BeginToken); if (pp != null) { always = false; } } } if (li[0].BeginToken.Chars.IsLatinLetter && li[0].BeginToken == li[0].EndToken) { Pullenti.Ner.Token tt1 = li[0].EndToken.Next; if (tt1 != null && tt1.IsChar(',')) { tt1 = tt1.Next; } if (((tt1 is Pullenti.Ner.TextToken) && tt1.Chars.IsLatinLetter && (tt1.LengthChar < 3)) && !tt1.Chars.IsAllLower) { ok = false; } } if (!ok && !always) { return(null); } Pullenti.Ner.Geo.GeoReferent city = null; if (oi != null && (oi.Referent is Pullenti.Ner.Geo.GeoReferent) && !oi.Owner.IsExtOntology) { city = oi.Referent.Clone() as Pullenti.Ner.Geo.GeoReferent; city.Occurrence.Clear(); } else { city = new Pullenti.Ner.Geo.GeoReferent(); city.AddName(nam); if (oi != null && (oi.Referent is Pullenti.Ner.Geo.GeoReferent)) { city.MergeSlots2(oi.Referent as Pullenti.Ner.Geo.GeoReferent, li[0].Kit.BaseLanguage); } if (!city.IsCity) { city.AddTypCity(li[0].Kit.BaseLanguage); } } return(new Pullenti.Ner.ReferentToken(city, li[0].BeginToken, li[0].EndToken) { Morph = li[0].Morph }); }
Pullenti.Ner.ReferentToken TryAttach(Pullenti.Ner.Token t, bool keyWord) { if (t == null) { return(null); } Pullenti.Ner.Token t0 = t; Pullenti.Ner.Token t1 = t; List <string> urisKeys = null; List <Pullenti.Ner.Uri.UriReferent> uris = null; Pullenti.Ner.Referent org = null; Pullenti.Ner.Referent corOrg = null; bool orgIsBank = false; int empty = 0; Pullenti.Ner.Uri.UriReferent lastUri = null; for (; t != null; t = t.Next) { if (t.IsTableControlChar && t != t0) { break; } if (t.IsComma || t.Morph.Class.IsPreposition || t.IsCharOf("/\\")) { continue; } bool bankKeyword = false; if (t.IsValue("ПОЛНЫЙ", null) && t.Next != null && ((t.Next.IsValue("НАИМЕНОВАНИЕ", null) || t.Next.IsValue("НАЗВАНИЕ", null)))) { t = t.Next.Next; if (t == null) { break; } } if (t.IsValue("БАНК", null)) { if ((t is Pullenti.Ner.ReferentToken) && t.GetReferent().TypeName == "ORGANIZATION") { bankKeyword = true; } Pullenti.Ner.Token tt = t.Next; Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { tt = npt.EndToken.Next; } if (tt != null && tt.IsChar(':')) { tt = tt.Next; } if (tt != null) { if (!bankKeyword) { t = tt; bankKeyword = true; } else if (tt.GetReferent() != null && tt.GetReferent().TypeName == "ORGANIZATION") { t = tt; } } } Pullenti.Ner.Referent r = t.GetReferent(); if (r != null && r.TypeName == "ORGANIZATION") { bool isBank = false; int kk = 0; for (Pullenti.Ner.Referent rr = r; rr != null && (kk < 4); rr = rr.ParentReferent, kk++) { isBank = string.Compare(rr.GetStringValue("KIND") ?? "", "Bank", true) == 0; if (isBank) { break; } } if (!isBank && bankKeyword) { isBank = true; } if (!isBank && uris != null && urisKeys.Contains("ИНН")) { return(null); } if ((lastUri != null && lastUri.Scheme == "К/С" && t.Previous != null) && t.Previous.IsValue("В", null)) { corOrg = r; t1 = t; } else if (org == null || ((!orgIsBank && isBank))) { org = r; t1 = t; orgIsBank = isBank; if (isBank) { continue; } } if (uris == null && !keyWord) { return(null); } continue; } if (r is Pullenti.Ner.Uri.UriReferent) { Pullenti.Ner.Uri.UriReferent u = r as Pullenti.Ner.Uri.UriReferent; if (uris == null) { if (!_isBankReq(u.Scheme)) { return(null); } if (u.Scheme == "ИНН" && t.IsNewlineAfter) { return(null); } uris = new List <Pullenti.Ner.Uri.UriReferent>(); urisKeys = new List <string>(); } else { if (!_isBankReq(u.Scheme)) { break; } if (urisKeys.Contains(u.Scheme)) { break; } if (u.Scheme == "ИНН") { if (empty > 0) { break; } } } urisKeys.Add(u.Scheme); uris.Add(u); lastUri = u; t1 = t; empty = 0; continue; } else if (uris == null && !keyWord && !orgIsBank) { return(null); } if (r != null && ((r.TypeName == "GEO" || r.TypeName == "ADDRESS"))) { empty++; continue; } if (t is Pullenti.Ner.TextToken) { if (t.IsValue("ПОЛНЫЙ", null) || t.IsValue("НАИМЕНОВАНИЕ", null) || t.IsValue("НАЗВАНИЕ", null)) { } else if (t.Chars.IsLetter) { Pullenti.Ner.Core.TerminToken tok = m_Ontology.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { t = tok.EndToken; empty = 0; } else { empty++; if (t.IsNewlineBefore) { Pullenti.Ner.Core.NounPhraseToken nnn = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (nnn != null && nnn.EndToken.Next != null && nnn.EndToken.Next.IsChar(':')) { break; } } } if (uris == null) { break; } } } if (empty > 2) { break; } if (empty > 0 && t.IsChar(':') && t.IsNewlineAfter) { break; } if (((t is Pullenti.Ner.NumberToken) && t.IsNewlineBefore && t.Next != null) && !t.Next.Chars.IsLetter) { break; } } if (uris == null) { return(null); } if (!urisKeys.Contains("Р/С") && !urisKeys.Contains("Л/С")) { return(null); } bool ok = false; if ((uris.Count < 2) && org == null) { return(null); } BankDataReferent bdr = new BankDataReferent(); foreach (Pullenti.Ner.Uri.UriReferent u in uris) { bdr.AddSlot(BankDataReferent.ATTR_ITEM, u, false, 0); } if (org != null) { bdr.AddSlot(BankDataReferent.ATTR_BANK, org, false, 0); } if (corOrg != null) { bdr.AddSlot(BankDataReferent.ATTR_CORBANK, corOrg, false, 0); } Pullenti.Ner.Referent org0 = (t0.Previous == null ? null : t0.Previous.GetReferent()); if (org0 != null && org0.TypeName == "ORGANIZATION") { foreach (Pullenti.Ner.Slot s in org0.Slots) { if (s.Value is Pullenti.Ner.Uri.UriReferent) { Pullenti.Ner.Uri.UriReferent u = s.Value as Pullenti.Ner.Uri.UriReferent; if (_isBankReq(u.Scheme)) { if (!urisKeys.Contains(u.Scheme)) { bdr.AddSlot(BankDataReferent.ATTR_ITEM, u, false, 0); } } } } } return(new Pullenti.Ner.ReferentToken(bdr, t0, t1)); }
public static Pullenti.Ner.ReferentToken TryAttachTerritory(List <TerrItemToken> li, Pullenti.Ner.Core.AnalyzerData ad, bool attachAlways = false, List <CityItemToken> cits = null, List <Pullenti.Ner.Geo.GeoReferent> exists = null) { if (li == null || li.Count == 0) { return(null); } TerrItemToken exObj = null; TerrItemToken newName = null; List <TerrItemToken> adjList = new List <TerrItemToken>(); TerrItemToken noun = null; TerrItemToken addNoun = null; Pullenti.Ner.ReferentToken rt = _tryAttachMoscowAO(li, ad); if (rt != null) { return(rt); } if (li[0].TerminItem != null && li[0].TerminItem.CanonicText == "ТЕРРИТОРИЯ") { Pullenti.Ner.ReferentToken res2 = _tryAttachPureTerr(li, ad); return(res2); } if (li.Count == 2) { if (li[0].Rzd != null && li[1].RzdDir != null) { Pullenti.Ner.Geo.GeoReferent rzd = new Pullenti.Ner.Geo.GeoReferent(); rzd.AddName(li[1].RzdDir); rzd.AddTypTer(li[0].Kit.BaseLanguage); rzd.AddSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_REF, li[0].Rzd.Referent, false, 0); rzd.AddExtReferent(li[0].Rzd); return(new Pullenti.Ner.ReferentToken(rzd, li[0].BeginToken, li[1].EndToken)); } if (li[1].Rzd != null && li[0].RzdDir != null) { Pullenti.Ner.Geo.GeoReferent rzd = new Pullenti.Ner.Geo.GeoReferent(); rzd.AddName(li[0].RzdDir); rzd.AddTypTer(li[0].Kit.BaseLanguage); rzd.AddSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_REF, li[1].Rzd.Referent, false, 0); rzd.AddExtReferent(li[1].Rzd); return(new Pullenti.Ner.ReferentToken(rzd, li[0].BeginToken, li[1].EndToken)); } } bool canBeCityBefore = false; bool adjTerrBefore = false; if (cits != null) { if (cits[0].Typ == CityItemToken.ItemType.City) { canBeCityBefore = true; } else if (cits[0].Typ == CityItemToken.ItemType.Noun && cits.Count > 1) { canBeCityBefore = true; } } int k; for (k = 0; k < li.Count; k++) { if (li[k].OntoItem != null) { if (exObj != null || newName != null) { break; } if (noun != null) { if (k == 1) { if (noun.TerminItem.CanonicText == "РАЙОН" || noun.TerminItem.CanonicText == "ОБЛАСТЬ" || noun.TerminItem.CanonicText == "СОЮЗ") { if (li[k].OntoItem.Referent is Pullenti.Ner.Geo.GeoReferent) { if ((li[k].OntoItem.Referent as Pullenti.Ner.Geo.GeoReferent).IsState) { break; } } bool ok = false; Pullenti.Ner.Token tt = li[k].EndToken.Next; if (tt == null) { ok = true; } else if (tt.IsCharOf(",.")) { ok = true; } if (!ok) { ok = MiscLocationHelper.CheckGeoObjectBefore(li[0].BeginToken); } if (!ok) { Pullenti.Ner.Address.Internal.AddressItemToken adr = Pullenti.Ner.Address.Internal.AddressItemToken.TryParse(tt, null, false, false, null); if (adr != null) { if (adr.Typ == Pullenti.Ner.Address.Internal.AddressItemToken.ItemType.Street) { ok = true; } } } if (!ok) { break; } } if (li[k].OntoItem != null) { if (noun.BeginToken.IsValue("МО", null) || noun.BeginToken.IsValue("ЛО", null)) { return(null); } } } } exObj = li[k]; } else if (li[k].TerminItem != null) { if (noun != null) { break; } if (li[k].TerminItem.IsAlwaysPrefix && k > 0) { break; } if (k > 0 && li[k].IsDoubt) { if (li[k].BeginToken == li[k].EndToken && li[k].BeginToken.IsValue("ЗАО", null)) { break; } } if (li[k].TerminItem.IsAdjective || li[k].IsGeoInDictionary) { adjList.Add(li[k]); } else { if (exObj != null) { Pullenti.Ner.Geo.GeoReferent geo = exObj.OntoItem.Referent as Pullenti.Ner.Geo.GeoReferent; if (geo == null) { break; } if (exObj.IsAdjective && ((li[k].TerminItem.CanonicText == "СОЮЗ" || li[k].TerminItem.CanonicText == "ФЕДЕРАЦИЯ"))) { string str = exObj.OntoItem.ToString(); if (!str.Contains(li[k].TerminItem.CanonicText)) { return(null); } } if (li[k].TerminItem.CanonicText == "РАЙОН" || li[k].TerminItem.CanonicText == "ОКРУГ" || li[k].TerminItem.CanonicText == "КРАЙ") { StringBuilder tmp = new StringBuilder(); foreach (Pullenti.Ner.Slot s in geo.Slots) { if (s.TypeName == Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE) { tmp.AppendFormat("{0};", s.Value); } } if (!tmp.ToString().ToUpper().Contains(li[k].TerminItem.CanonicText)) { if (k != 1 || newName != null) { break; } newName = li[0]; newName.IsAdjective = true; newName.OntoItem = null; exObj = null; } } } noun = li[k]; if (k == 0) { TerrItemToken tt = TerrItemToken.TryParse(li[k].BeginToken.Previous, null, true, false, null); if (tt != null && tt.Morph.Class.IsAdjective) { adjTerrBefore = true; } } } } else { if (exObj != null) { break; } if (newName != null) { break; } newName = li[k]; } } string name = null; string altName = null; string fullName = null; Pullenti.Ner.MorphCollection morph = null; if (exObj != null) { if (exObj.IsAdjective && !exObj.Morph.Language.IsEn && noun == null) { if (attachAlways && exObj.EndToken.Next != null) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(exObj.BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (exObj.EndToken.Next.IsCommaAnd) { } else if (npt == null) { } else { Pullenti.Ner.Address.Internal.StreetItemToken str = Pullenti.Ner.Address.Internal.StreetItemToken.TryParse(exObj.EndToken.Next, null, false, null, false); if (str != null) { if (str.Typ == Pullenti.Ner.Address.Internal.StreetItemType.Noun && str.EndToken == npt.EndToken) { return(null); } } } } else { CityItemToken cit = CityItemToken.TryParse(exObj.EndToken.Next, null, false, null); if (cit != null && ((cit.Typ == CityItemToken.ItemType.Noun || cit.Typ == CityItemToken.ItemType.City))) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(exObj.BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && npt.EndToken == cit.EndToken) { } else { return(null); } } else if (exObj.BeginToken.IsValue("ПОДНЕБЕСНЫЙ", null)) { } else { return(null); } } } if (noun == null && exObj.CanBeCity) { CityItemToken cit0 = CityItemToken.TryParseBack(exObj.BeginToken.Previous); if (cit0 != null && cit0.Typ != CityItemToken.ItemType.ProperName) { return(null); } } if (exObj.IsDoubt && noun == null) { bool ok2 = false; if (_canBeGeoAfter(exObj.EndToken.Next)) { ok2 = true; } else if (!exObj.CanBeSurname && !exObj.CanBeCity) { if ((exObj.EndToken.Next != null && exObj.EndToken.Next.IsChar(')') && exObj.BeginToken.Previous != null) && exObj.BeginToken.Previous.IsChar('(')) { ok2 = true; } else if (exObj.Chars.IsLatinLetter && exObj.BeginToken.Previous != null) { if (exObj.BeginToken.Previous.IsValue("IN", null)) { ok2 = true; } else if (exObj.BeginToken.Previous.IsValue("THE", null) && exObj.BeginToken.Previous.Previous != null && exObj.BeginToken.Previous.Previous.IsValue("IN", null)) { ok2 = true; } } } if (!ok2) { CityItemToken cit0 = CityItemToken.TryParseBack(exObj.BeginToken.Previous); if (cit0 != null && cit0.Typ != CityItemToken.ItemType.ProperName) { } else if (MiscLocationHelper.CheckGeoObjectBefore(exObj.BeginToken.Previous)) { } else { return(null); } } } name = exObj.OntoItem.CanonicText; morph = exObj.Morph; } else if (newName != null) { if (noun == null) { return(null); } for (int j = 1; j < k; j++) { if (li[j].IsNewlineBefore && !li[0].IsNewlineBefore) { if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(li[j].BeginToken, false, false)) { } else { return(null); } } } morph = noun.Morph; if (newName.IsAdjective) { if (noun.TerminItem.Acronym == "АО") { if (noun.BeginToken != noun.EndToken) { return(null); } if (newName.Morph.Gender != Pullenti.Morph.MorphGender.Feminie) { return(null); } } Pullenti.Ner.Geo.GeoReferent geoBefore = null; Pullenti.Ner.Token tt0 = li[0].BeginToken.Previous; if (tt0 != null && tt0.IsCommaAnd) { tt0 = tt0.Previous; } if (!li[0].IsNewlineBefore && tt0 != null) { geoBefore = tt0.GetReferent() as Pullenti.Ner.Geo.GeoReferent; } if (li.IndexOf(noun) < li.IndexOf(newName)) { if (noun.TerminItem.IsState) { return(null); } if (newName.CanBeSurname && geoBefore == null) { if (((noun.Morph.Case & newName.Morph.Case)).IsUndefined) { return(null); } } if (Pullenti.Ner.Core.MiscHelper.IsExistsInDictionary(newName.BeginToken, newName.EndToken, Pullenti.Morph.MorphClass.Adjective | Pullenti.Morph.MorphClass.Pronoun | Pullenti.Morph.MorphClass.Verb)) { if (noun.BeginToken != newName.BeginToken) { if (geoBefore == null) { if (li.Count == 2 && _canBeGeoAfter(li[1].EndToken.Next)) { } else if (li.Count == 3 && li[2].TerminItem != null && _canBeGeoAfter(li[2].EndToken.Next)) { } else if (newName.IsGeoInDictionary) { } else if (newName.EndToken.IsNewlineAfter) { } else { return(null); } } } } Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(newName.EndToken, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePronouns, 0, null); if (npt != null && npt.EndToken != newName.EndToken) { if (li.Count >= 3 && li[2].TerminItem != null && npt.EndToken == li[2].EndToken) { addNoun = li[2]; } else { return(null); } } Pullenti.Ner.ReferentToken rtp = newName.Kit.ProcessReferent("PERSON", newName.BeginToken); if (rtp != null) { return(null); } name = Pullenti.Ner.Core.ProperNameHelper.GetNameEx(newName.BeginToken, newName.EndToken, Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphCase.Undefined, noun.TerminItem.Gender, false, false); } else { bool ok = false; if (((k + 1) < li.Count) && li[k].TerminItem == null && li[k + 1].TerminItem != null) { ok = true; } else if ((k < li.Count) && li[k].OntoItem != null) { ok = true; } else if (k == li.Count && !newName.IsAdjInDictionary) { ok = true; } else if (MiscLocationHelper.CheckGeoObjectBefore(li[0].BeginToken) || canBeCityBefore) { ok = true; } else if (MiscLocationHelper.CheckGeoObjectAfter(li[k - 1].EndToken, false)) { ok = true; } else if (li.Count == 3 && k == 2) { CityItemToken cit = CityItemToken.TryParse(li[2].BeginToken, null, false, null); if (cit != null) { if (cit.Typ == CityItemToken.ItemType.City || cit.Typ == CityItemToken.ItemType.Noun) { ok = true; } } } else if (li.Count == 2) { ok = _canBeGeoAfter(li[li.Count - 1].EndToken.Next); } if (!ok && !li[0].IsNewlineBefore && !li[0].Chars.IsAllLower) { Pullenti.Ner.ReferentToken rt00 = li[0].Kit.ProcessReferent("PERSONPROPERTY", li[0].BeginToken.Previous); if (rt00 != null) { ok = true; } } if (noun.TerminItem != null && noun.TerminItem.IsStrong && newName.IsAdjective) { ok = true; } if (noun.IsDoubt && adjList.Count == 0 && geoBefore == null) { return(null); } name = Pullenti.Ner.Core.ProperNameHelper.GetNameEx(newName.BeginToken, newName.EndToken, Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphCase.Undefined, noun.TerminItem.Gender, false, false); if (!ok && !attachAlways) { if (Pullenti.Ner.Core.MiscHelper.IsExistsInDictionary(newName.BeginToken, newName.EndToken, Pullenti.Morph.MorphClass.Adjective | Pullenti.Morph.MorphClass.Pronoun | Pullenti.Morph.MorphClass.Verb)) { if (exists != null) { foreach (Pullenti.Ner.Geo.GeoReferent e in exists) { if (e.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_NAME, name, true) != null) { ok = true; break; } } } if (!ok) { return(null); } } } fullName = string.Format("{0} {1}", Pullenti.Ner.Core.ProperNameHelper.GetNameEx(li[0].BeginToken, noun.BeginToken.Previous, Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphCase.Undefined, noun.TerminItem.Gender, false, false), noun.TerminItem.CanonicText); } } else { if (!attachAlways || ((noun.TerminItem != null && noun.TerminItem.CanonicText == "ФЕДЕРАЦИЯ"))) { bool isLatin = noun.Chars.IsLatinLetter && newName.Chars.IsLatinLetter; if (li.IndexOf(noun) > li.IndexOf(newName)) { if (!isLatin) { return(null); } } if (!newName.IsDistrictName && !Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(newName.BeginToken, false, false)) { if (adjList.Count == 0 && Pullenti.Ner.Core.MiscHelper.IsExistsInDictionary(newName.BeginToken, newName.EndToken, Pullenti.Morph.MorphClass.Noun | Pullenti.Morph.MorphClass.Pronoun)) { if (li.Count == 2 && noun.IsCityRegion && (noun.WhitespacesAfterCount < 2)) { } else { return(null); } } if (!isLatin) { if ((noun.TerminItem.IsRegion && !attachAlways && ((!adjTerrBefore || newName.IsDoubt))) && !noun.IsCityRegion && !noun.TerminItem.IsSpecificPrefix) { if (!MiscLocationHelper.CheckGeoObjectBefore(noun.BeginToken)) { if (!noun.IsDoubt && noun.BeginToken != noun.EndToken) { } else if ((noun.TerminItem.IsAlwaysPrefix && li.Count == 2 && li[0] == noun) && li[1] == newName) { } else { return(null); } } } if (noun.IsDoubt && adjList.Count == 0) { if (noun.TerminItem.Acronym == "МО" || noun.TerminItem.Acronym == "ЛО") { if (k == (li.Count - 1) && li[k].TerminItem != null) { addNoun = li[k]; k++; } else if (li.Count == 2 && noun == li[0] && newName.ToString().EndsWith("совет")) { } else { return(null); } } else { return(null); } } Pullenti.Ner.ReferentToken pers = newName.Kit.ProcessReferent("PERSON", newName.BeginToken); if (pers != null) { return(null); } } } } name = Pullenti.Ner.Core.MiscHelper.GetTextValue(newName.BeginToken, newName.EndToken, Pullenti.Ner.Core.GetTextAttr.No); if (newName.BeginToken != newName.EndToken) { for (Pullenti.Ner.Token ttt = newName.BeginToken.Next; ttt != null && ttt.EndChar <= newName.EndChar; ttt = ttt.Next) { if (ttt.Chars.IsLetter) { TerrItemToken ty = TerrItemToken.TryParse(ttt, null, false, false, null); if ((ty != null && ty.TerminItem != null && noun != null) && ((ty.TerminItem.CanonicText.Contains(noun.TerminItem.CanonicText) || noun.TerminItem.CanonicText.Contains(ty.TerminItem.CanonicText)))) { name = Pullenti.Ner.Core.MiscHelper.GetTextValue(newName.BeginToken, ttt.Previous, Pullenti.Ner.Core.GetTextAttr.No); break; } } } } if (adjList.Count > 0) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(adjList[0].BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && npt.EndToken == noun.EndToken) { altName = string.Format("{0} {1}", npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false), name); } } } } else { if ((li.Count == 1 && noun != null && noun.EndToken.Next != null) && (noun.EndToken.Next.GetReferent() is Pullenti.Ner.Geo.GeoReferent)) { Pullenti.Ner.Geo.GeoReferent g = noun.EndToken.Next.GetReferent() as Pullenti.Ner.Geo.GeoReferent; if (noun.TerminItem != null) { string tyy = noun.TerminItem.CanonicText.ToLower(); bool ooo = false; if (g.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, tyy, true) != null) { ooo = true; } else if (tyy.EndsWith("район") && g.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, "район", true) != null) { ooo = true; } if (ooo) { return new Pullenti.Ner.ReferentToken(g, noun.BeginToken, noun.EndToken.Next) { Morph = noun.BeginToken.Morph } } ; } } if ((li.Count == 1 && noun == li[0] && li[0].TerminItem != null) && TerrItemToken.TryParse(li[0].EndToken.Next, null, true, false, null) == null && TerrItemToken.TryParse(li[0].BeginToken.Previous, null, true, false, null) == null) { if (li[0].Morph.Number == Pullenti.Morph.MorphNumber.Plural) { return(null); } int cou = 0; string str = li[0].TerminItem.CanonicText.ToLower(); for (Pullenti.Ner.Token tt = li[0].BeginToken.Previous; tt != null; tt = tt.Previous) { if (tt.IsNewlineAfter) { cou += 10; } else { cou++; } if (cou > 500) { break; } Pullenti.Ner.Geo.GeoReferent g = tt.GetReferent() as Pullenti.Ner.Geo.GeoReferent; if (g == null) { continue; } bool ok = true; cou = 0; for (tt = li[0].EndToken.Next; tt != null; tt = tt.Next) { if (tt.IsNewlineBefore) { cou += 10; } else { cou++; } if (cou > 500) { break; } TerrItemToken tee = TerrItemToken.TryParse(tt, null, true, false, null); if (tee == null) { continue; } ok = false; break; } if (ok) { for (int ii = 0; g != null && (ii < 3); g = g.Higher, ii++) { if (g.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, str, true) != null) { return new Pullenti.Ner.ReferentToken(g, li[0].BeginToken, li[0].EndToken) { Morph = noun.BeginToken.Morph } } ; } } break; } } return(null); } Pullenti.Ner.Geo.GeoReferent ter = null; if (exObj != null && (exObj.Tag is Pullenti.Ner.Geo.GeoReferent)) { ter = exObj.Tag as Pullenti.Ner.Geo.GeoReferent; } else { ter = new Pullenti.Ner.Geo.GeoReferent(); if (exObj != null) { Pullenti.Ner.Geo.GeoReferent geo = exObj.OntoItem.Referent as Pullenti.Ner.Geo.GeoReferent; if (geo != null && !geo.IsCity) { ter.MergeSlots2(geo, li[0].Kit.BaseLanguage); } else { ter.AddName(name); } if (noun == null && exObj.CanBeCity) { ter.AddTypCity(li[0].Kit.BaseLanguage); } else { } } else if (newName != null) { ter.AddName(name); if (altName != null) { ter.AddName(altName); } } if (noun != null) { if (noun.TerminItem.CanonicText == "АО") { ter.AddTyp((li[0].Kit.BaseLanguage.IsUa ? "АВТОНОМНИЙ ОКРУГ" : "АВТОНОМНЫЙ ОКРУГ")); } else if (noun.TerminItem.CanonicText == "МУНИЦИПАЛЬНОЕ СОБРАНИЕ" || noun.TerminItem.CanonicText == "МУНІЦИПАЛЬНЕ ЗБОРИ") { ter.AddTyp((li[0].Kit.BaseLanguage.IsUa ? "МУНІЦИПАЛЬНЕ УТВОРЕННЯ" : "МУНИЦИПАЛЬНОЕ ОБРАЗОВАНИЕ")); } else if (noun.TerminItem.Acronym == "МО" && addNoun != null) { ter.AddTyp(addNoun.TerminItem.CanonicText); } else { if (noun.TerminItem.CanonicText == "СОЮЗ" && exObj != null && exObj.EndChar > noun.EndChar) { return new Pullenti.Ner.ReferentToken(ter, exObj.BeginToken, exObj.EndToken) { Morph = exObj.Morph } } ; ter.AddTyp(noun.TerminItem.CanonicText); if (noun.TerminItem.IsRegion && ter.IsState) { ter.AddTypReg(li[0].Kit.BaseLanguage); } } } if (ter.IsState && ter.IsRegion) { foreach (TerrItemToken a in adjList) { if (a.TerminItem.IsRegion) { ter.AddTypReg(li[0].Kit.BaseLanguage); break; } } } if (ter.IsState) { if (fullName != null) { ter.AddName(fullName); } } } Pullenti.Ner.ReferentToken res = new Pullenti.Ner.ReferentToken(ter, li[0].BeginToken, li[k - 1].EndToken); if (noun != null && noun.Morph.Class.IsNoun) { res.Morph = noun.Morph; } else { res.Morph = new Pullenti.Ner.MorphCollection(); for (int ii = 0; ii < k; ii++) { foreach (Pullenti.Morph.MorphBaseInfo v in li[ii].Morph.Items) { Pullenti.Morph.MorphBaseInfo bi = new Pullenti.Morph.MorphBaseInfo(); bi.CopyFrom(v); if (noun != null) { if (bi.Class.IsAdjective) { bi.Class = Pullenti.Morph.MorphClass.Noun; } } res.Morph.AddItem(bi); } } } if (li[0].TerminItem != null && li[0].TerminItem.IsSpecificPrefix) { res.BeginToken = li[0].EndToken.Next; } if (addNoun != null && addNoun.EndChar > res.EndChar) { res.EndToken = addNoun.EndToken; } if ((res.BeginToken.Previous is Pullenti.Ner.TextToken) && (res.WhitespacesBeforeCount < 2)) { Pullenti.Ner.TextToken tt = res.BeginToken.Previous as Pullenti.Ner.TextToken; if (tt.Term == "АР") { foreach (string ty in ter.Typs) { if (ty.Contains("республика") || ty.Contains("республіка")) { res.BeginToken = tt; break; } } } } return(res); }
static Pullenti.Ner.ReferentToken _tryNounName(List <CityItemToken> li, out Pullenti.Ner.Core.IntOntologyItem oi, bool always) { oi = null; if (li == null || (li.Count < 2) || ((li[0].Typ != CityItemToken.ItemType.Noun && li[0].Typ != CityItemToken.ItemType.Misc))) { return(null); } bool ok = !li[0].Doubtful; if (ok && li[0].Typ == CityItemToken.ItemType.Misc) { ok = false; } string typ = (li[0].Typ == CityItemToken.ItemType.Misc ? null : li[0].Value); string typ2 = (li[0].Typ == CityItemToken.ItemType.Misc ? null : li[0].AltValue); string probAdj = null; int i1 = 1; Pullenti.Ner.Referent org = null; if ((typ != null && li[i1].Typ == CityItemToken.ItemType.Noun && ((i1 + 1) < li.Count)) && li[0].WhitespacesAfterCount <= 1 && (((Pullenti.Morph.LanguageHelper.EndsWith(typ, "ПОСЕЛОК") || Pullenti.Morph.LanguageHelper.EndsWith(typ, "СЕЛИЩЕ") || typ == "ДЕРЕВНЯ") || typ == "СЕЛО"))) { if (li[i1].BeginToken == li[i1].EndToken) { Pullenti.Ner.Address.Internal.AddressItemToken ooo = Pullenti.Ner.Address.Internal.AddressItemToken.TryAttachOrg(li[i1].BeginToken); if (ooo != null && ooo.RefToken != null) { return(null); } } typ2 = li[i1].Value; if (typ2 == "СТАНЦИЯ" && li[i1].BeginToken.IsValue("СТ", null) && ((i1 + 1) < li.Count)) { Pullenti.Ner.MorphCollection m = li[i1 + 1].Morph; if (m.Number == Pullenti.Morph.MorphNumber.Plural) { probAdj = "СТАРЫЕ"; } else if (m.Gender == Pullenti.Morph.MorphGender.Feminie) { probAdj = "СТАРАЯ"; } else if (m.Gender == Pullenti.Morph.MorphGender.Masculine) { probAdj = "СТАРЫЙ"; } else { probAdj = "СТАРОЕ"; } } i1++; } string name = li[i1].Value ?? ((li[i1].OntoItem == null ? null : li[i1].OntoItem.CanonicText)); string altName = li[i1].AltValue; if (name == null) { return(null); } Pullenti.Ner.MorphCollection mc = li[0].Morph; if (i1 == 1 && li[i1].Typ == CityItemToken.ItemType.City && ((li[0].Value == "ГОРОД" || li[0].Value == "МІСТО" || li[0].Typ == CityItemToken.ItemType.Misc))) { if (typ == null && ((i1 + 1) < li.Count) && li[i1 + 1].Typ == CityItemToken.ItemType.Noun) { return(null); } oi = li[i1].OntoItem; if (oi != null) { name = oi.CanonicText; } if (name.Length > 2 || oi.MiscAttr != null) { if (!li[1].Doubtful || ((oi != null && oi.MiscAttr != null))) { ok = true; } else if (!ok && !li[1].IsNewlineBefore) { if (li[0].GeoObjectBefore || li[1].GeoObjectAfter) { ok = true; } else if (Pullenti.Ner.Address.Internal.StreetDefineHelper.CheckStreetAfter(li[1].EndToken.Next)) { ok = true; } else if (li[1].EndToken.Next != null && (li[1].EndToken.Next.GetReferent() is Pullenti.Ner.Date.DateReferent)) { ok = true; } else if ((li[1].WhitespacesBeforeCount < 2) && li[1].OntoItem != null) { if (li[1].IsNewlineAfter) { ok = true; } else { ok = true; } } } if (li[1].Doubtful && li[1].EndToken.Next != null && li[1].EndToken.Chars == li[1].EndToken.Next.Chars) { ok = false; } if (li[0].BeginToken.Previous != null && li[0].BeginToken.Previous.IsValue("В", null)) { ok = true; } } if (!ok) { ok = CheckYearAfter(li[1].EndToken.Next); } if (!ok) { ok = CheckCityAfter(li[1].EndToken.Next); } } else if ((li[i1].Typ == CityItemToken.ItemType.ProperName || li[i1].Typ == CityItemToken.ItemType.City)) { if (((li[0].Value == "АДМИНИСТРАЦИЯ" || li[0].Value == "АДМІНІСТРАЦІЯ")) && i1 == 1) { return(null); } if (li[i1].IsNewlineBefore) { if (li.Count != 2) { return(null); } } if (!li[0].Doubtful) { ok = true; if (name.Length < 2) { ok = false; } else if ((name.Length < 3) && li[0].Morph.Number != Pullenti.Morph.MorphNumber.Singular) { ok = false; } if (li[i1].Doubtful && !li[i1].GeoObjectAfter && !li[0].GeoObjectBefore) { if (li[i1].Morph.Case.IsGenitive) { if (li[i1].EndToken.Next == null || MiscLocationHelper.CheckGeoObjectAfter(li[i1].EndToken.Next, false) || Pullenti.Ner.Address.Internal.AddressItemToken.CheckHouseAfter(li[i1].EndToken.Next, false, true)) { } else if (li[0].BeginToken.Previous == null || MiscLocationHelper.CheckGeoObjectBefore(li[0].BeginToken)) { } else { ok = false; } } if (ok) { Pullenti.Ner.ReferentToken rt0 = li[i1].Kit.ProcessReferent("PERSONPROPERTY", li[0].BeginToken.Previous); if (rt0 != null) { Pullenti.Ner.ReferentToken rt1 = li[i1].Kit.ProcessReferent("PERSON", li[i1].BeginToken); if (rt1 != null) { ok = false; } } } } Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(li[i1].BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { if (npt.EndToken.EndChar > li[i1].EndChar && npt.Adjectives.Count > 0 && !npt.Adjectives[0].EndToken.Next.IsComma) { ok = false; } else if (TerrItemToken.m_UnknownRegions.TryParse(npt.EndToken, Pullenti.Ner.Core.TerminParseAttr.FullwordsOnly) != null) { bool ok1 = false; if (li[0].BeginToken.Previous != null) { Pullenti.Ner.Token ttt = li[0].BeginToken.Previous; if (ttt.IsComma && ttt.Previous != null) { ttt = ttt.Previous; } Pullenti.Ner.Geo.GeoReferent geo = ttt.GetReferent() as Pullenti.Ner.Geo.GeoReferent; if (geo != null && !geo.IsCity) { ok1 = true; } } if (npt.EndToken.Next != null) { Pullenti.Ner.Token ttt = npt.EndToken.Next; if (ttt.IsComma && ttt.Next != null) { ttt = ttt.Next; } Pullenti.Ner.Geo.GeoReferent geo = ttt.GetReferent() as Pullenti.Ner.Geo.GeoReferent; if (geo != null && !geo.IsCity) { ok1 = true; } } if (!ok1) { return(null); } } } if (li[0].Value == "ПОРТ") { if (li[i1].Chars.IsAllUpper || li[i1].Chars.IsLatinLetter) { return(null); } } } else if (li[0].GeoObjectBefore) { ok = true; } else if (li[i1].GeoObjectAfter && !li[i1].IsNewlineAfter) { ok = true; } else { ok = CheckYearAfter(li[i1].EndToken.Next); } if (!ok) { ok = CheckStreetAfter(li[i1].EndToken.Next); } if (!ok && li[0].BeginToken.Previous != null && li[0].BeginToken.Previous.IsValue("В", null)) { ok = true; } } else { return(null); } if (!ok && !always) { if (MiscLocationHelper.CheckNearBefore(li[0].BeginToken.Previous) == null) { return(null); } } if (li.Count > (i1 + 1)) { li.RemoveRange(i1 + 1, li.Count - i1 - 1); } Pullenti.Ner.Geo.GeoReferent city = new Pullenti.Ner.Geo.GeoReferent(); if (oi != null && oi.Referent != null) { city = oi.Referent.Clone() as Pullenti.Ner.Geo.GeoReferent; city.Occurrence.Clear(); } if (!li[0].Morph.Case.IsUndefined && li[0].Morph.Gender != Pullenti.Morph.MorphGender.Undefined) { if (li[i1].EndToken.Morph.Class.IsAdjective && li[i1].BeginToken == li[i1].EndToken) { string nam = Pullenti.Ner.Core.ProperNameHelper.GetNameEx(li[i1].BeginToken, li[i1].EndToken, Pullenti.Morph.MorphClass.Adjective, li[0].Morph.Case, li[0].Morph.Gender, false, false); if (nam != null && nam != name) { name = nam; } } } if (li[0].Morph.Case.IsNominative) { if (altName != null) { city.AddName(altName); } altName = null; } city.AddName(name); if (probAdj != null) { city.AddName(probAdj + " " + name); } if (altName != null) { city.AddName(altName); if (probAdj != null) { city.AddName(probAdj + " " + altName); } } if (typ != null) { city.AddTyp(typ); } else if (!city.IsCity) { city.AddTypCity(li[0].Kit.BaseLanguage); } if (typ2 != null) { city.AddTyp(typ2.ToLower()); } if (li[0].HigherGeo != null && GeoOwnerHelper.CanBeHigher(li[0].HigherGeo, city)) { city.Higher = li[0].HigherGeo; } if (li[0].Typ == CityItemToken.ItemType.Misc) { li.RemoveAt(0); } Pullenti.Ner.ReferentToken res = new Pullenti.Ner.ReferentToken(city, li[0].BeginToken, li[li.Count - 1].EndToken) { Morph = mc }; if (res.EndToken.Next != null && res.EndToken.Next.IsHiphen && (res.EndToken.Next.Next is Pullenti.Ner.NumberToken)) { Pullenti.Ner.NumberToken num = res.EndToken.Next.Next as Pullenti.Ner.NumberToken; if ((num.Typ == Pullenti.Ner.NumberSpellingType.Digit && !num.Morph.Class.IsAdjective && num.IntValue != null) && (num.IntValue.Value < 50)) { foreach (Pullenti.Ner.Slot s in city.Slots) { if (s.TypeName == Pullenti.Ner.Geo.GeoReferent.ATTR_NAME) { city.UploadSlot(s, string.Format("{0}-{1}", s.Value, num.Value)); } } res.EndToken = num; } } if (li[0].BeginToken == li[0].EndToken && li[0].BeginToken.IsValue("ГОРОДОК", null)) { if (Pullenti.Ner.Address.Internal.AddressItemToken.CheckHouseAfter(res.EndToken.Next, true, false)) { return(null); } } return(res); }
/// <summary> /// Попробовать восстановить последовательность, обрамляемую кавычками или скобками. Поддерживается /// вложенность, возможность отсутствия закрывающего элемента и др. /// </summary> /// <param name="t">начальный токен</param> /// <param name="attrs">параметры выделения</param> /// <param name="maxTokens">максимально токенов (вдруг забыли закрывающую кавычку)</param> /// <return>метатокен BracketSequenceToken</return> public static BracketSequenceToken TryParse(Pullenti.Ner.Token t, BracketParseAttr attrs = BracketParseAttr.No, int maxTokens = 100) { Pullenti.Ner.Token t0 = t; int cou = 0; if (!CanBeStartOfSequence(t0, false, false)) { return(null); } List <Bracket> brList = new List <Bracket>(); brList.Add(new Bracket(t0)); cou = 0; int crlf = 0; Pullenti.Ner.Token last = null; int lev = 1; bool isAssim = brList[0].Char != '«' && m_AssymOPenChars.IndexOf(brList[0].Char) >= 0; bool genCase = false; for (t = t0.Next; t != null; t = t.Next) { if (t.IsTableControlChar) { break; } last = t; if (t.IsCharOf(m_OpenChars) || t.IsCharOf(m_CloseChars)) { if (t.IsNewlineBefore && ((attrs & BracketParseAttr.CanBeManyLines)) == BracketParseAttr.No) { if (t.WhitespacesBeforeCount > 10 || CanBeStartOfSequence(t, false, false)) { if (t.IsChar('(') && !t0.IsChar('(')) { } else { last = t.Previous; break; } } } Bracket bb = new Bracket(t); brList.Add(bb); if (brList.Count > 20) { break; } if ((brList.Count == 3 && brList[1].CanBeOpen && bb.CanBeClose) && MustBeCloseChar(bb.Char, brList[1].Char) && MustBeCloseChar(bb.Char, brList[0].Char)) { bool ok = false; for (Pullenti.Ner.Token tt = t.Next; tt != null; tt = tt.Next) { if (tt.IsNewlineBefore) { break; } if (tt.IsChar(',')) { break; } if (tt.IsChar('.')) { for (tt = tt.Next; tt != null; tt = tt.Next) { if (tt.IsNewlineBefore) { break; } else if (tt.IsCharOf(m_OpenChars) || tt.IsCharOf(m_CloseChars)) { Bracket bb2 = new Bracket(tt); if (BracketHelper.CanBeEndOfSequence(tt, false, null, false) && CanBeCloseChar(bb2.Char, brList[0].Char)) { ok = true; } break; } } break; } if (t.IsCharOf(m_OpenChars) || t.IsCharOf(m_CloseChars)) { ok = true; break; } } if (!ok) { break; } } if (isAssim) { if (bb.CanBeOpen && !bb.CanBeClose && bb.Char == brList[0].Char) { lev++; } else if (bb.CanBeClose && !bb.CanBeOpen && m_OpenChars.IndexOf(brList[0].Char) == m_CloseChars.IndexOf(bb.Char)) { lev--; if (lev == 0) { break; } } } } else { if ((++cou) > maxTokens) { break; } if (((attrs & BracketParseAttr.CanContainsVerbs)) == BracketParseAttr.No) { if (t.Morph.Language.IsCyrillic) { if (t.GetMorphClassInDictionary() == Pullenti.Morph.MorphClass.Verb) { if (!t.Morph.Class.IsAdjective && !t.Morph.ContainsAttr("страд.з.", null)) { if (t.Chars.IsAllLower) { string norm = t.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false); if (!Pullenti.Morph.LanguageHelper.EndsWith(norm, "СЯ")) { if (brList.Count > 1) { break; } if (brList[0].Char != '(') { break; } } } } } } else if (t.Morph.Language.IsEn) { if (t.Morph.Class == Pullenti.Morph.MorphClass.Verb && t.Chars.IsAllLower) { break; } } Pullenti.Ner.Referent r = t.GetReferent(); if (r != null && r.TypeName == "ADDRESS") { if (!t0.IsChar('(')) { break; } } } } if (((attrs & BracketParseAttr.CanBeManyLines)) != BracketParseAttr.No) { if (t.IsNewlineBefore) { if (t.NewlinesBeforeCount > 1) { break; } crlf++; } continue; } if (t.IsNewlineBefore) { if (t.WhitespacesBeforeCount > 15) { last = t.Previous; break; } crlf++; if (!t.Chars.IsAllLower) { if (MiscHelper.CanBeStartOfSentence(t)) { bool has = false; for (Pullenti.Ner.Token tt = t.Next; tt != null; tt = tt.Next) { if (tt.IsNewlineBefore) { break; } else if (tt.LengthChar == 1 && tt.IsCharOf(m_OpenChars) && tt.IsWhitespaceBefore) { break; } else if (tt.LengthChar == 1 && tt.IsCharOf(m_CloseChars) && !tt.IsWhitespaceBefore) { has = true; break; } } if (!has) { last = t.Previous; break; } } } if ((t.Previous is Pullenti.Ner.MetaToken) && CanBeEndOfSequence((t.Previous as Pullenti.Ner.MetaToken).EndToken, false, null, false)) { last = t.Previous; break; } } if (crlf > 1) { if (brList.Count > 1) { break; } if (crlf > 10) { break; } } if (t.IsChar(';') && t.IsNewlineAfter) { break; } NounPhraseToken npt = NounPhraseHelper.TryParse(t, NounPhraseParseAttr.No, 0, null); if (npt != null) { if (t.IsNewlineBefore) { genCase = npt.Morph.Case.IsGenitive; } last = (t = npt.EndToken); } } if ((brList.Count == 1 && brList[0].CanBeOpen && (last is Pullenti.Ner.MetaToken)) && last.IsNewlineAfter) { if (BracketHelper.CanBeEndOfSequence((last as Pullenti.Ner.MetaToken).EndToken, false, null, false)) { return(new BracketSequenceToken(t0, last)); } } if ((brList.Count == 1 && brList[0].CanBeOpen && genCase) && last.IsNewlineAfter && crlf <= 2) { return(new BracketSequenceToken(t0, last)); } if (brList.Count < 1) { return(null); } for (int i = 1; i < (brList.Count - 1); i++) { if (brList[i].Char == '<' && brList[i + 1].Char == '>') { brList[i].CanBeOpen = true; brList[i + 1].CanBeClose = true; } } List <BracketSequenceToken> internals = null; while (brList.Count > 3) { int i = brList.Count - 1; if ((brList[i].CanBeClose && brList[i - 1].CanBeOpen && !CanBeCloseChar(brList[i].Char, brList[0].Char)) && CanBeCloseChar(brList[i].Char, brList[i - 1].Char)) { brList.RemoveRange(brList.Count - 2, 2); continue; } break; } while (brList.Count >= 4) { bool changed = false; for (int i = 1; i < (brList.Count - 2); i++) { if ((brList[i].CanBeOpen && !brList[i].CanBeClose && brList[i + 1].CanBeClose) && !brList[i + 1].CanBeOpen) { bool ok = false; if (MustBeCloseChar(brList[i + 1].Char, brList[i].Char) || brList[i].Char != brList[0].Char) { ok = true; if ((i == 1 && ((i + 2) < brList.Count) && brList[i + 2].Char == ')') && brList[i + 1].Char != ')' && CanBeCloseChar(brList[i + 1].Char, brList[i - 1].Char)) { brList[i + 2] = brList[i + 1]; } } else if (i > 1 && ((i + 2) < brList.Count) && MustBeCloseChar(brList[i + 2].Char, brList[i - 1].Char)) { ok = true; } if (ok) { if (internals == null) { internals = new List <BracketSequenceToken>(); } internals.Add(new BracketSequenceToken(brList[i].Source, brList[i + 1].Source)); brList.RemoveRange(i, 2); changed = true; break; } } } if (!changed) { break; } } BracketSequenceToken res = null; if ((brList.Count >= 4 && brList[1].CanBeOpen && brList[2].CanBeClose) && brList[3].CanBeClose && !brList[3].CanBeOpen) { if (CanBeCloseChar(brList[3].Char, brList[0].Char)) { res = new BracketSequenceToken(brList[0].Source, brList[3].Source); if (brList[0].Source.Next != brList[1].Source || brList[2].Source.Next != brList[3].Source) { res.Internal.Add(new BracketSequenceToken(brList[1].Source, brList[2].Source)); } if (internals != null) { res.Internal.AddRange(internals); } } } if ((res == null && brList.Count >= 3 && brList[2].CanBeClose) && !brList[2].CanBeOpen) { if (((attrs & BracketParseAttr.NearCloseBracket)) != BracketParseAttr.No) { if (CanBeCloseChar(brList[1].Char, brList[0].Char)) { return(new BracketSequenceToken(brList[0].Source, brList[1].Source)); } } bool ok = true; if (CanBeCloseChar(brList[2].Char, brList[0].Char) && CanBeCloseChar(brList[1].Char, brList[0].Char) && brList[1].CanBeClose) { for (t = brList[1].Source; t != brList[2].Source && t != null; t = t.Next) { if (t.IsNewlineBefore) { ok = false; break; } if (t.Chars.IsLetter && t.Chars.IsAllLower) { ok = false; break; } NounPhraseToken npt = NounPhraseHelper.TryParse(t, NounPhraseParseAttr.No, 0, null); if (npt != null) { t = npt.EndToken; } } if (ok) { for (t = brList[0].Source.Next; t != brList[1].Source && t != null; t = t.Next) { if (t.IsNewlineBefore) { return(new BracketSequenceToken(brList[0].Source, t.Previous)); } } } int lev1 = 0; for (Pullenti.Ner.Token tt = brList[0].Source.Previous; tt != null; tt = tt.Previous) { if (tt.IsNewlineAfter || tt.IsTableControlChar) { break; } if (!(tt is Pullenti.Ner.TextToken)) { continue; } if (tt.Chars.IsLetter || tt.LengthChar > 1) { continue; } char ch = (tt as Pullenti.Ner.TextToken).Term[0]; if (CanBeCloseChar(ch, brList[0].Char)) { lev1++; } else if (CanBeCloseChar(brList[1].Char, ch)) { lev1--; if (lev1 < 0) { return(new BracketSequenceToken(brList[0].Source, brList[1].Source)); } } } } if (ok && CanBeCloseChar(brList[2].Char, brList[0].Char)) { BracketSequenceToken intern = new BracketSequenceToken(brList[1].Source, brList[2].Source); res = new BracketSequenceToken(brList[0].Source, brList[2].Source); res.Internal.Add(intern); } else if (ok && CanBeCloseChar(brList[2].Char, brList[1].Char) && brList[0].CanBeOpen) { if (CanBeCloseChar(brList[2].Char, brList[0].Char)) { BracketSequenceToken intern = new BracketSequenceToken(brList[1].Source, brList[2].Source); res = new BracketSequenceToken(brList[0].Source, brList[2].Source); res.Internal.Add(intern); } else if (brList.Count == 3) { return(null); } } } if (res == null && brList.Count > 1 && brList[1].CanBeClose) { res = new BracketSequenceToken(brList[0].Source, brList[1].Source); } if (res == null && brList.Count > 1 && CanBeCloseChar(brList[1].Char, brList[0].Char)) { res = new BracketSequenceToken(brList[0].Source, brList[1].Source); } if (res == null && brList.Count == 2 && brList[0].Char == brList[1].Char) { res = new BracketSequenceToken(brList[0].Source, brList[1].Source); } if (res != null && internals != null) { foreach (BracketSequenceToken i in internals) { if (i.BeginChar < res.EndChar) { res.Internal.Add(i); } } } if (res == null) { cou = 0; for (Pullenti.Ner.Token tt = t0.Next; tt != null; tt = tt.Next, cou++) { if (tt.IsTableControlChar) { break; } if (MiscHelper.CanBeStartOfSentence(tt)) { break; } if (maxTokens > 0 && cou > maxTokens) { break; } Pullenti.Ner.MetaToken mt = tt as Pullenti.Ner.MetaToken; if (mt == null) { continue; } if (mt.EndToken is Pullenti.Ner.TextToken) { if ((mt.EndToken as Pullenti.Ner.TextToken).IsCharOf(m_CloseChars)) { Bracket bb = new Bracket(mt.EndToken as Pullenti.Ner.TextToken); if (bb.CanBeClose && CanBeCloseChar(bb.Char, brList[0].Char)) { return(new BracketSequenceToken(t0, tt)); } } } } } return(res); }
public static string GetNameEx(Pullenti.Ner.Token begin, Pullenti.Ner.Token end, Pullenti.Morph.MorphClass cla, Pullenti.Morph.MorphCase mc, Pullenti.Morph.MorphGender gender = Pullenti.Morph.MorphGender.Undefined, bool ignoreBracketsAndHiphens = false, bool ignoreGeoReferent = false) { if (end == null || begin == null) { return(null); } if (begin.EndChar > end.BeginChar && begin != end) { return(null); } StringBuilder res = new StringBuilder(); string prefix = null; for (Pullenti.Ner.Token t = begin; t != null && t.EndChar <= end.EndChar; t = t.Next) { if (res.Length > 1000) { break; } if (t.IsTableControlChar) { continue; } if (ignoreBracketsAndHiphens) { if (BracketHelper.IsBracket(t, false)) { if (t == end) { break; } if (t.IsCharOf("(<[")) { BracketSequenceToken br = BracketHelper.TryParse(t, BracketParseAttr.No, 100); if (br != null && br.EndChar <= end.EndChar) { string tmp = GetNameEx(br.BeginToken.Next, br.EndToken.Previous, Pullenti.Morph.MorphClass.Undefined, Pullenti.Morph.MorphCase.Undefined, Pullenti.Morph.MorphGender.Undefined, ignoreBracketsAndHiphens, false); if (tmp != null) { if ((br.EndChar == end.EndChar && br.BeginToken.Next == br.EndToken.Previous && !br.BeginToken.Next.Chars.IsLetter) && !(br.BeginToken.Next is Pullenti.Ner.ReferentToken)) { } else { res.AppendFormat(" {0}{1}{2}", t.GetSourceText(), tmp, br.EndToken.GetSourceText()); } } t = br.EndToken; } } continue; } if (t.IsHiphen) { if (t == end) { break; } else if (t.IsWhitespaceBefore || t.IsWhitespaceAfter) { continue; } } } Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; if (tt != null) { if (!ignoreBracketsAndHiphens) { if ((tt.Next != null && tt.Next.IsHiphen && (tt.Next.Next is Pullenti.Ner.TextToken)) && tt != end && tt.Next != end) { if (prefix == null) { prefix = tt.Term; } else { prefix = string.Format("{0}-{1}", prefix, tt.Term); } t = tt.Next; if (t == end) { break; } else { continue; } } } string s = null; if (cla.Value != 0 || !mc.IsUndefined || gender != Pullenti.Morph.MorphGender.Undefined) { foreach (Pullenti.Morph.MorphBaseInfo wff in tt.Morph.Items) { Pullenti.Morph.MorphWordForm wf = wff as Pullenti.Morph.MorphWordForm; if (wf == null) { continue; } if (cla.Value != 0) { if (((wf.Class.Value & cla.Value)) == 0) { continue; } } if (!mc.IsUndefined) { if (((wf.Case & mc)).IsUndefined) { continue; } } if (gender != Pullenti.Morph.MorphGender.Undefined) { if (((wf.Gender & gender)) == Pullenti.Morph.MorphGender.Undefined) { continue; } } if (s == null || wf.NormalCase == tt.Term) { s = wf.NormalCase; } } if (s == null && gender != Pullenti.Morph.MorphGender.Undefined) { foreach (Pullenti.Morph.MorphBaseInfo wff in tt.Morph.Items) { Pullenti.Morph.MorphWordForm wf = wff as Pullenti.Morph.MorphWordForm; if (wf == null) { continue; } if (cla.Value != 0) { if (((wf.Class.Value & cla.Value)) == 0) { continue; } } if (!mc.IsUndefined) { if (((wf.Case & mc)).IsUndefined) { continue; } } if (s == null || wf.NormalCase == tt.Term) { s = wf.NormalCase; } } } } if (s == null) { s = tt.Term; if (tt.Chars.IsLastLower && tt.LengthChar > 2) { s = tt.GetSourceText(); for (int i = s.Length - 1; i >= 0; i--) { if (char.IsUpper(s[i])) { s = s.Substring(0, i + 1); break; } } } } if (prefix != null) { string delim = "-"; if (ignoreBracketsAndHiphens) { delim = " "; } s = string.Format("{0}{1}{2}", prefix, delim, s); } prefix = null; if (res.Length > 0 && s.Length > 0) { if (char.IsLetterOrDigit(s[0])) { char ch0 = res[res.Length - 1]; if (ch0 == '-') { } else { res.Append(' '); } } else if (!ignoreBracketsAndHiphens && BracketHelper.CanBeStartOfSequence(tt, false, false)) { res.Append(' '); } } res.Append(s); } else if (t is Pullenti.Ner.NumberToken) { if (res.Length > 0) { if (!t.IsWhitespaceBefore && res[res.Length - 1] == '-') { } else { res.Append(' '); } } Pullenti.Ner.NumberToken nt = t as Pullenti.Ner.NumberToken; if ((t.Morph.Class.IsAdjective && nt.Typ == Pullenti.Ner.NumberSpellingType.Words && nt.BeginToken == nt.EndToken) && (nt.BeginToken is Pullenti.Ner.TextToken)) { res.Append((nt.BeginToken as Pullenti.Ner.TextToken).Term); } else { res.Append(nt.Value); } } else if (t is Pullenti.Ner.MetaToken) { if ((ignoreGeoReferent && t != begin && t.GetReferent() != null) && t.GetReferent().TypeName == "GEO") { continue; } string s = GetNameEx((t as Pullenti.Ner.MetaToken).BeginToken, (t as Pullenti.Ner.MetaToken).EndToken, cla, mc, gender, ignoreBracketsAndHiphens, ignoreGeoReferent); if (!string.IsNullOrEmpty(s)) { if (res.Length > 0) { if (!t.IsWhitespaceBefore && res[res.Length - 1] == '-') { } else { res.Append(' '); } } res.Append(s); } } if (t == end) { break; } } if (res.Length == 0) { return(null); } return(res.ToString()); }
public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this); Pullenti.Ner.Core.TerminCollection models = new Pullenti.Ner.Core.TerminCollection(); Dictionary <string, List <Pullenti.Ner.Referent> > objsByModel = new Dictionary <string, List <Pullenti.Ner.Referent> >(); Pullenti.Ner.Core.TerminCollection objByNames = new Pullenti.Ner.Core.TerminCollection(); for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { List <Pullenti.Ner.Transport.Internal.TransItemToken> its = Pullenti.Ner.Transport.Internal.TransItemToken.TryParseList(t, 10); if (its == null) { continue; } List <Pullenti.Ner.ReferentToken> rts = this.TryAttach(its, false); if (rts != null) { foreach (Pullenti.Ner.ReferentToken rt in rts) { int cou = 0; for (Pullenti.Ner.Token tt = t.Previous; tt != null && (cou < 1000); tt = tt.Previous, cou++) { TransportReferent tr = tt.GetReferent() as TransportReferent; if (tr == null) { continue; } bool ok = true; foreach (Pullenti.Ner.Slot s in rt.Referent.Slots) { if (tr.FindSlot(s.TypeName, s.Value, true) == null) { ok = false; break; } } if (ok) { rt.Referent = tr; break; } } rt.Referent = ad.RegisterReferent(rt.Referent); kit.EmbedToken(rt); t = rt; foreach (Pullenti.Ner.Slot s in rt.Referent.Slots) { if (s.TypeName == TransportReferent.ATTR_MODEL) { string mod = s.Value.ToString(); for (int k = 0; k < 2; k++) { if (!char.IsDigit(mod[0])) { List <Pullenti.Ner.Referent> li; if (!objsByModel.TryGetValue(mod, out li)) { objsByModel.Add(mod, (li = new List <Pullenti.Ner.Referent>())); } if (!li.Contains(rt.Referent)) { li.Add(rt.Referent); } models.AddString(mod, li, null, false); } if (k > 0) { break; } string brand = rt.Referent.GetStringValue(TransportReferent.ATTR_BRAND); if (brand == null) { break; } mod = string.Format("{0} {1}", brand, mod); } } else if (s.TypeName == TransportReferent.ATTR_NAME) { objByNames.Add(new Pullenti.Ner.Core.Termin(s.Value.ToString()) { Tag = rt.Referent }); } } } } } if (objsByModel.Count == 0 && objByNames.Termins.Count == 0) { return; } for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 10); if (br != null) { Pullenti.Ner.Core.TerminToken toks = objByNames.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No); if (toks != null && toks.EndToken.Next == br.EndToken) { Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(toks.Termin.Tag as Pullenti.Ner.Referent, br.BeginToken, br.EndToken); kit.EmbedToken(rt0); t = rt0; continue; } } if (!(t is Pullenti.Ner.TextToken)) { continue; } if (!t.Chars.IsLetter) { continue; } Pullenti.Ner.Core.TerminToken tok = models.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok == null) { if (!t.Chars.IsAllLower) { tok = objByNames.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); } if (tok == null) { continue; } } if (!tok.IsWhitespaceAfter) { if (tok.EndToken.Next == null || !tok.EndToken.Next.IsCharOf(",.)")) { if (!Pullenti.Ner.Core.BracketHelper.IsBracket(tok.EndToken.Next, false)) { continue; } } } Pullenti.Ner.Referent tr = null; List <Pullenti.Ner.Referent> li = tok.Termin.Tag as List <Pullenti.Ner.Referent>; if (li != null && li.Count == 1) { tr = li[0]; } else { tr = tok.Termin.Tag as Pullenti.Ner.Referent; } if (tr != null) { Pullenti.Ner.Transport.Internal.TransItemToken tit = Pullenti.Ner.Transport.Internal.TransItemToken.TryParse(tok.BeginToken.Previous, null, false, true); if (tit != null && tit.Typ == Pullenti.Ner.Transport.Internal.TransItemToken.Typs.Brand) { tr.AddSlot(TransportReferent.ATTR_BRAND, tit.Value, false, 0); tok.BeginToken = tit.BeginToken; } Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(tr, tok.BeginToken, tok.EndToken); kit.EmbedToken(rt0); t = rt0; continue; } } }
public static Pullenti.Ner.Core.NumberExToken TryParseNumberWithPostfix(Pullenti.Ner.Token t) { if (t == null) { return(null); } Pullenti.Ner.Token t0 = t; string isDollar = null; if (t.LengthChar == 1 && t.Next != null) { if ((((isDollar = Pullenti.Ner.Core.NumberHelper.IsMoneyChar(t)))) != null) { t = t.Next; } } Pullenti.Ner.NumberToken nt = t as Pullenti.Ner.NumberToken; if (nt == null) { if ((!(t.Previous is Pullenti.Ner.NumberToken) && t.IsChar('(') && (t.Next is Pullenti.Ner.NumberToken)) && t.Next.Next != null && t.Next.Next.IsChar(')')) { Pullenti.Ner.Core.TerminToken toks1 = m_Postfixes.TryParse(t.Next.Next.Next, Pullenti.Ner.Core.TerminParseAttr.No); if (toks1 != null && ((Pullenti.Ner.Core.NumberExType)toks1.Termin.Tag) == Pullenti.Ner.Core.NumberExType.Money) { Pullenti.Ner.NumberToken nt0 = t.Next as Pullenti.Ner.NumberToken; Pullenti.Ner.Core.NumberExToken res = new Pullenti.Ner.Core.NumberExToken(t, toks1.EndToken, nt0.Value, nt0.Typ, Pullenti.Ner.Core.NumberExType.Money) { AltRealValue = nt0.RealValue, Morph = toks1.BeginToken.Morph }; return(_correctMoney(res, toks1.BeginToken)); } } Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; if (tt == null || !tt.Morph.Class.IsAdjective) { return(null); } string val = tt.Term; for (int i = 4; i < (val.Length - 5); i++) { string v = val.Substring(0, i); List <Pullenti.Ner.Core.Termin> li = Pullenti.Ner.Core.NumberHelper.m_Nums.FindTerminsByString(v, tt.Morph.Language); if (li == null) { continue; } string vv = val.Substring(i); List <Pullenti.Ner.Core.Termin> lii = m_Postfixes.FindTerminsByString(vv, tt.Morph.Language); if (lii != null && lii.Count > 0) { Pullenti.Ner.Core.NumberExToken re = new Pullenti.Ner.Core.NumberExToken(t, t, ((int)li[0].Tag).ToString(), Pullenti.Ner.NumberSpellingType.Words, (Pullenti.Ner.Core.NumberExType)lii[0].Tag) { Morph = t.Morph }; _correctExtTypes(re); return(re); } break; } return(null); } if (t.Next == null && isDollar == null) { return(null); } double f = nt.RealValue; if (double.IsNaN(f)) { return(null); } Pullenti.Ner.Token t1 = nt.Next; if (((t1 != null && t1.IsCharOf(",."))) || (((t1 is Pullenti.Ner.NumberToken) && (t1.WhitespacesBeforeCount < 3)))) { double d; Pullenti.Ner.NumberToken tt11 = Pullenti.Ner.Core.NumberHelper.TryParseRealNumber(nt, false, false); if (tt11 != null) { t1 = tt11.EndToken.Next; f = tt11.RealValue; } } if (t1 == null) { if (isDollar == null) { return(null); } } else if ((t1.Next != null && t1.Next.IsValue("С", "З") && t1.Next.Next != null) && t1.Next.Next.IsValue("ПОЛОВИНА", null)) { f += 0.5; t1 = t1.Next.Next; } if (t1 != null && t1.IsHiphen && t1.Next != null) { t1 = t1.Next; } bool det = false; double altf = f; if (((t1 is Pullenti.Ner.NumberToken) && t1.Previous != null && t1.Previous.IsHiphen) && (t1 as Pullenti.Ner.NumberToken).IntValue == 0 && t1.LengthChar == 2) { t1 = t1.Next; } if ((t1 != null && t1.Next != null && t1.IsChar('(')) && (((t1.Next is Pullenti.Ner.NumberToken) || t1.Next.IsValue("НОЛЬ", null))) && t1.Next.Next != null) { Pullenti.Ner.NumberToken nt1 = t1.Next as Pullenti.Ner.NumberToken; double val = (double)0; if (nt1 != null) { val = nt1.RealValue; } if (Math.Floor(f) == Math.Floor(val)) { Pullenti.Ner.Token ttt = t1.Next.Next; if (ttt.IsChar(')')) { t1 = ttt.Next; det = true; if ((t1 is Pullenti.Ner.NumberToken) && (t1 as Pullenti.Ner.NumberToken).IntValue != null && (t1 as Pullenti.Ner.NumberToken).IntValue.Value == 0) { t1 = t1.Next; } } else if (((((ttt is Pullenti.Ner.NumberToken) && ((ttt as Pullenti.Ner.NumberToken).RealValue < 100) && ttt.Next != null) && ttt.Next.IsChar('/') && ttt.Next.Next != null) && ttt.Next.Next.GetSourceText() == "100" && ttt.Next.Next.Next != null) && ttt.Next.Next.Next.IsChar(')')) { int rest = GetDecimalRest100(f); if ((ttt as Pullenti.Ner.NumberToken).IntValue != null && rest == (ttt as Pullenti.Ner.NumberToken).IntValue.Value) { t1 = ttt.Next.Next.Next.Next; det = true; } } else if ((ttt.IsValue("ЦЕЛЫХ", null) && (ttt.Next is Pullenti.Ner.NumberToken) && ttt.Next.Next != null) && ttt.Next.Next.Next != null && ttt.Next.Next.Next.IsChar(')')) { Pullenti.Ner.NumberToken num2 = ttt.Next as Pullenti.Ner.NumberToken; altf = num2.RealValue; if (ttt.Next.Next.IsValue("ДЕСЯТЫЙ", null)) { altf /= 10; } else if (ttt.Next.Next.IsValue("СОТЫЙ", null)) { altf /= 100; } else if (ttt.Next.Next.IsValue("ТЫСЯЧНЫЙ", null)) { altf /= 1000; } else if (ttt.Next.Next.IsValue("ДЕСЯТИТЫСЯЧНЫЙ", null)) { altf /= 10000; } else if (ttt.Next.Next.IsValue("СТОТЫСЯЧНЫЙ", null)) { altf /= 100000; } else if (ttt.Next.Next.IsValue("МИЛЛИОННЫЙ", null)) { altf /= 1000000; } if (altf < 1) { altf += val; t1 = ttt.Next.Next.Next.Next; det = true; } } else { Pullenti.Ner.Core.TerminToken toks1 = m_Postfixes.TryParse(ttt, Pullenti.Ner.Core.TerminParseAttr.No); if (toks1 != null) { if (((Pullenti.Ner.Core.NumberExType)toks1.Termin.Tag) == Pullenti.Ner.Core.NumberExType.Money) { if (toks1.EndToken.Next != null && toks1.EndToken.Next.IsChar(')')) { Pullenti.Ner.Core.NumberExToken res = new Pullenti.Ner.Core.NumberExToken(t, toks1.EndToken.Next, nt.Value, nt.Typ, Pullenti.Ner.Core.NumberExType.Money) { RealValue = f, AltRealValue = altf, Morph = toks1.BeginToken.Morph }; return(_correctMoney(res, toks1.BeginToken)); } } } Pullenti.Ner.Core.NumberExToken res2 = TryParseNumberWithPostfix(t1.Next); if (res2 != null && res2.EndToken.Next != null && res2.EndToken.Next.IsChar(')')) { res2.BeginToken = t; res2.EndToken = res2.EndToken.Next; res2.AltRealValue = res2.RealValue; res2.RealValue = f; _correctExtTypes(res2); if (res2.WhitespacesAfterCount < 2) { Pullenti.Ner.Core.TerminToken toks2 = m_Postfixes.TryParse(res2.EndToken.Next, Pullenti.Ner.Core.TerminParseAttr.No); if (toks2 != null) { if (((Pullenti.Ner.Core.NumberExType)toks2.Termin.Tag) == Pullenti.Ner.Core.NumberExType.Money) { res2.EndToken = toks2.EndToken; } } } return(res2); } } } else if (nt1 != null && nt1.Typ == Pullenti.Ner.NumberSpellingType.Words && nt.Typ == Pullenti.Ner.NumberSpellingType.Digit) { altf = nt1.RealValue; Pullenti.Ner.Token ttt = t1.Next.Next; if (ttt.IsChar(')')) { t1 = ttt.Next; det = true; } if (!det) { altf = f; } } } if ((t1 != null && t1.IsChar('(') && t1.Next != null) && t1.Next.IsValue("СУММА", null)) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t1, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { t1 = br.EndToken.Next; } } if (isDollar != null) { Pullenti.Ner.Token te = null; if (t1 != null) { te = t1.Previous; } else { for (t1 = t0; t1 != null; t1 = t1.Next) { if (t1.Next == null) { te = t1; } } } if (te == null) { return(null); } if (te.IsHiphen && te.Next != null) { if (te.Next.IsValue("МИЛЛИОННЫЙ", null)) { f *= 1000000; altf *= 1000000; te = te.Next; } else if (te.Next.IsValue("МИЛЛИАРДНЫЙ", null)) { f *= 1000000000; altf *= 1000000000; te = te.Next; } } if (!te.IsWhitespaceAfter && (te.Next is Pullenti.Ner.TextToken)) { if (te.Next.IsValue("M", null)) { f *= 1000000; altf *= 1000000; te = te.Next; } else if (te.Next.IsValue("BN", null)) { f *= 1000000000; altf *= 1000000000; te = te.Next; } } return(new Pullenti.Ner.Core.NumberExToken(t0, te, "", nt.Typ, Pullenti.Ner.Core.NumberExType.Money) { RealValue = f, AltRealValue = altf, ExTypParam = isDollar }); } if (t1 == null || ((t1.IsNewlineBefore && !det))) { return(null); } Pullenti.Ner.Core.TerminToken toks = m_Postfixes.TryParse(t1, Pullenti.Ner.Core.TerminParseAttr.No); if ((toks == null && det && (t1 is Pullenti.Ner.NumberToken)) && (t1 as Pullenti.Ner.NumberToken).Value == "0") { toks = m_Postfixes.TryParse(t1.Next, Pullenti.Ner.Core.TerminParseAttr.No); } if (toks == null && t1.IsChar('р')) { int cou = 10; for (Pullenti.Ner.Token ttt = t0.Previous; ttt != null && cou > 0; ttt = ttt.Previous, cou--) { if (ttt.IsValue("СУММА", null) || ttt.IsValue("НАЛИЧНЫЙ", null) || ttt.IsValue("БАЛАНС", null)) { } else if (ttt.GetReferent() != null && ttt.GetReferent().TypeName == "MONEY") { } else { continue; } toks = new Pullenti.Ner.Core.TerminToken(t1, t1) { Termin = m_Postfixes.FindTerminsByCanonicText("RUB")[0] }; if (t1.Next != null && t1.Next.IsChar('.')) { toks.EndToken = t1.Next; } Pullenti.Ner.Core.NumberExType ty = (Pullenti.Ner.Core.NumberExType)toks.Termin.Tag; return(new Pullenti.Ner.Core.NumberExToken(t, toks.EndToken, nt.Value, nt.Typ, ty) { RealValue = f, AltRealValue = altf, Morph = toks.BeginToken.Morph, ExTypParam = "RUB" }); } } if (toks != null) { t1 = toks.EndToken; if (!t1.IsChar('.') && t1.Next != null && t1.Next.IsChar('.')) { if ((t1 is Pullenti.Ner.TextToken) && t1.IsValue(toks.Termin.Terms[0].CanonicalText, null)) { } else if (!t1.Chars.IsLetter) { } else { t1 = t1.Next; } } if (toks.Termin.CanonicText == "LTL") { return(null); } if (toks.BeginToken == t1) { if (t1.Morph.Class.IsPreposition || t1.Morph.Class.IsConjunction) { if (t1.IsWhitespaceBefore && t1.IsWhitespaceAfter) { return(null); } } } Pullenti.Ner.Core.NumberExType ty = (Pullenti.Ner.Core.NumberExType)toks.Termin.Tag; Pullenti.Ner.Core.NumberExToken res = new Pullenti.Ner.Core.NumberExToken(t, t1, nt.Value, nt.Typ, ty) { RealValue = f, AltRealValue = altf, Morph = toks.BeginToken.Morph }; if (ty != Pullenti.Ner.Core.NumberExType.Money) { _correctExtTypes(res); return(res); } return(_correctMoney(res, toks.BeginToken)); } Pullenti.Ner.Core.NumberExToken pfx = _attachSpecPostfix(t1); if (pfx != null) { pfx.BeginToken = t; pfx.Value = nt.Value; pfx.Typ = nt.Typ; pfx.RealValue = f; pfx.AltRealValue = altf; return(pfx); } if (t1.Next != null && ((t1.Morph.Class.IsPreposition || t1.Morph.Class.IsConjunction))) { if (t1.IsValue("НА", null)) { } else { Pullenti.Ner.Core.NumberExToken nn = TryParseNumberWithPostfix(t1.Next); if (nn != null) { return new Pullenti.Ner.Core.NumberExToken(t, t, nt.Value, nt.Typ, nn.ExTyp) { RealValue = f, AltRealValue = altf, ExTyp2 = nn.ExTyp2, ExTypParam = nn.ExTypParam } } ; } } if (!t1.IsWhitespaceAfter && (t1.Next is Pullenti.Ner.NumberToken) && (t1 is Pullenti.Ner.TextToken)) { string term = (t1 as Pullenti.Ner.TextToken).Term; Pullenti.Ner.Core.NumberExType ty = Pullenti.Ner.Core.NumberExType.Undefined; if (term == "СМХ" || term == "CMX") { ty = Pullenti.Ner.Core.NumberExType.Santimeter; } else if (term == "MX" || term == "МХ") { ty = Pullenti.Ner.Core.NumberExType.Meter; } else if (term == "MMX" || term == "ММХ") { ty = Pullenti.Ner.Core.NumberExType.Millimeter; } if (ty != Pullenti.Ner.Core.NumberExType.Undefined) { return new Pullenti.Ner.Core.NumberExToken(t, t1, nt.Value, nt.Typ, ty) { RealValue = f, AltRealValue = altf, MultAfter = true } } ; } return(null); }
static OrgItemNameToken _TryAttach(Pullenti.Ner.Token t, OrgItemNameToken prev, bool extOnto) { if (t == null) { return(null); } Pullenti.Ner.Referent r = t.GetReferent(); if (r != null) { if (r.TypeName == "DENOMINATION") { return new OrgItemNameToken(t, t) { Value = r.ToString(true, t.Kit.BaseLanguage, 0), IsDenomination = true } } ; if ((r is Pullenti.Ner.Geo.GeoReferent) && t.Chars.IsLatinLetter) { OrgItemNameToken res2 = _TryAttach(t.Next, prev, extOnto); if (res2 != null && res2.Chars.IsLatinLetter) { res2.BeginToken = t; res2.Value = string.Format("{0} {1}", Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(t as Pullenti.Ner.MetaToken, Pullenti.Ner.Core.GetTextAttr.No), res2.Value); res2.IsInDictionary = false; return(res2); } } return(null); } Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; if (tt == null) { return(null); } OrgItemNameToken res = null; Pullenti.Ner.Core.TerminToken tok = m_StdTails.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok == null && t.IsChar(',')) { tok = m_StdTails.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No); } if (tok != null) { return new OrgItemNameToken(t, tok.EndToken) { Value = tok.Termin.CanonicText, IsStdTail = tok.Termin.Tag == null, IsEmptyWord = tok.Termin.Tag != null, Morph = tok.Morph } } ; if ((((tok = m_StdNames.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No)))) != null) { return new OrgItemNameToken(t, tok.EndToken) { Value = tok.Termin.CanonicText, IsStdName = true } } ; OrgItemEngItem eng = OrgItemEngItem.TryAttach(t, false); if (eng == null && t.IsChar(',')) { eng = OrgItemEngItem.TryAttach(t.Next, false); } if (eng != null) { return new OrgItemNameToken(t, eng.EndToken) { Value = eng.FullValue, IsStdTail = true } } ; if (tt.Chars.IsAllLower && prev != null) { if (!prev.Chars.IsAllLower && !prev.Chars.IsCapitalUpper) { return(null); } } if (tt.IsChar(',') && prev != null) { Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt1 == null || npt1.Chars != prev.Chars || ((npt1.Morph.Case & prev.Morph.Case)).IsUndefined) { return(null); } OrgItemTypeToken ty = OrgItemTypeToken.TryAttach(t.Next, false, null); if (ty != null) { return(null); } if (npt1.EndToken.Next == null || !npt1.EndToken.Next.IsValue("И", null)) { return(null); } Pullenti.Ner.Token t1 = npt1.EndToken.Next; Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t1.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt2 == null || npt2.Chars != prev.Chars || ((npt2.Morph.Case & npt1.Morph.Case & prev.Morph.Case)).IsUndefined) { return(null); } ty = OrgItemTypeToken.TryAttach(t1.Next, false, null); if (ty != null) { return(null); } res = new OrgItemNameToken(npt1.BeginToken, npt1.EndToken) { Morph = npt1.Morph, Value = npt1.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false) }; res.IsNounPhrase = true; res.IsAfterConjunction = true; if (prev.Preposition != null) { res.Preposition = prev.Preposition; } return(res); } if (((tt.IsChar('&') || tt.IsValue("AND", null) || tt.IsValue("UND", null))) && prev != null) { if ((tt.Next is Pullenti.Ner.TextToken) && tt.LengthChar == 1 && tt.Next.Chars.IsLatinLetter) { res = new OrgItemNameToken(tt, tt.Next) { Chars = tt.Next.Chars }; res.IsAfterConjunction = true; res.Value = "& " + (tt.Next as Pullenti.Ner.TextToken).Term; return(res); } res = OrgItemNameToken.TryAttach(tt.Next, null, extOnto, false); if (res == null || res.Chars != prev.Chars) { return(null); } res.IsAfterConjunction = true; res.Value = "& " + res.Value; return(res); } if (!tt.Chars.IsLetter) { return(null); } List <Pullenti.Semantic.Utils.DerivateGroup> expinf = null; if (prev != null && prev.EndToken.GetMorphClassInDictionary().IsNoun) { string wo = prev.EndToken.GetNormalCaseText(Pullenti.Morph.MorphClass.Noun, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false); expinf = Pullenti.Semantic.Utils.DerivateService.FindDerivates(wo, true, prev.EndToken.Morph.Language); } Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && npt.InternalNoun != null) { npt = null; } bool explOk = false; if (npt != null && prev != null && prev.EndToken.GetMorphClassInDictionary().IsNoun) { Pullenti.Ner.Core.NounPhraseToken npt0 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(prev.EndToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt0 != null) { List <Pullenti.Semantic.Core.SemanticLink> links = Pullenti.Semantic.Core.SemanticHelper.TryCreateLinks(npt0, npt, null); if (links.Count > 0) { explOk = true; } } } if (npt != null && ((explOk || npt.Morph.Case.IsGenitive || ((prev != null && !((prev.Morph.Case & npt.Morph.Case)).IsUndefined))))) { Pullenti.Morph.MorphClass mc = npt.BeginToken.GetMorphClassInDictionary(); if (mc.IsVerb || mc.IsPronoun) { return(null); } if (mc.IsAdverb) { if (npt.BeginToken.Next != null && npt.BeginToken.Next.IsHiphen) { } else { return(null); } } if (mc.IsPreposition) { return(null); } if (mc.IsNoun && npt.Chars.IsAllLower) { Pullenti.Morph.MorphCase ca = npt.Morph.Case; if ((!ca.IsDative && !ca.IsGenitive && !ca.IsInstrumental) && !ca.IsPrepositional) { return(null); } } res = new OrgItemNameToken(npt.BeginToken, npt.EndToken) { Morph = npt.Morph, Value = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false) }; res.IsNounPhrase = true; if ((npt.EndToken.WhitespacesAfterCount < 2) && (npt.EndToken.Next is Pullenti.Ner.TextToken)) { Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(npt.EndToken.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt2 != null && npt2.Morph.Case.IsGenitive && npt2.Chars.IsAllLower) { OrgItemTypeToken typ = OrgItemTypeToken.TryAttach(npt.EndToken.Next, true, null); OrgItemEponymToken epo = OrgItemEponymToken.TryAttach(npt.EndToken.Next, false); Pullenti.Ner.ReferentToken rtt = t.Kit.ProcessReferent("PERSONPROPERTY", npt.EndToken.Next); if (typ == null && epo == null && ((rtt == null || rtt.Morph.Number == Pullenti.Morph.MorphNumber.Plural))) { res.EndToken = npt2.EndToken; res.Value = string.Format("{0} {1}", res.Value, Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(npt2, Pullenti.Ner.Core.GetTextAttr.No)); } } else if (npt.EndToken.Next.IsComma && (npt.EndToken.Next.Next is Pullenti.Ner.TextToken)) { Pullenti.Ner.Token tt2 = npt.EndToken.Next.Next; Pullenti.Morph.MorphClass mv2 = tt2.GetMorphClassInDictionary(); if (mv2.IsAdjective && mv2.IsVerb) { Pullenti.Morph.MorphBaseInfo bi = new Pullenti.Morph.MorphBaseInfo() { Case = npt.Morph.Case, Gender = npt.Morph.Gender, Number = npt.Morph.Number }; if (tt2.Morph.CheckAccord(bi, false, false)) { npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt2.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt2 != null && ((npt2.Morph.Case.IsDative || npt2.Morph.Case.IsGenitive)) && npt2.Chars.IsAllLower) { res.EndToken = npt2.EndToken; res.Value = string.Format("{0} {1}", res.Value, Pullenti.Ner.Core.MiscHelper.GetTextValue(npt.EndToken.Next, res.EndToken, Pullenti.Ner.Core.GetTextAttr.No)); } } } } } if (explOk) { res.IsAfterConjunction = true; } } else if (npt != null && ((((prev != null && prev.IsNounPhrase && npt.Morph.Case.IsInstrumental)) || extOnto))) { res = new OrgItemNameToken(npt.BeginToken, npt.EndToken) { Morph = npt.Morph, Value = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false) }; res.IsNounPhrase = true; } else if (tt.IsAnd) { res = TryAttach(tt.Next, prev, extOnto, false); if (res == null || !res.IsNounPhrase || prev == null) { return(null); } if (((prev.Morph.Case & res.Morph.Case)).IsUndefined) { return(null); } if (prev.Morph.Number != Pullenti.Morph.MorphNumber.Undefined && res.Morph.Number != Pullenti.Morph.MorphNumber.Undefined) { if (((prev.Morph.Number & res.Morph.Number)) == Pullenti.Morph.MorphNumber.Undefined) { if (prev.Chars != res.Chars) { return(null); } OrgItemTypeToken ty = OrgItemTypeToken.TryAttach(res.EndToken.Next, false, null); if (ty != null) { return(null); } } } Pullenti.Morph.CharsInfo ci = res.Chars; res.Chars = ci; res.IsAfterConjunction = true; return(res); } else if (((tt.Term == "ПО" || tt.Term == "ПРИ" || tt.Term == "ЗА") || tt.Term == "С" || tt.Term == "В") || tt.Term == "НА") { npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { if (m_VervotWords.TryParse(npt.EndToken, Pullenti.Ner.Core.TerminParseAttr.No) != null) { return(null); } bool ok = false; if (tt.Term == "ПО") { ok = npt.Morph.Case.IsDative; } else if (tt.Term == "С") { ok = npt.Morph.Case.IsInstrumental; } else if (tt.Term == "ЗА") { ok = npt.Morph.Case.IsGenitive | npt.Morph.Case.IsInstrumental; } else if (tt.Term == "НА") { ok = npt.Morph.Case.IsPrepositional; } else if (tt.Term == "В") { ok = npt.Morph.Case.IsDative | npt.Morph.Case.IsPrepositional; if (ok) { ok = false; if (t.Next.IsValue("СФЕРА", null) || t.Next.IsValue("ОБЛАСТЬ", null)) { ok = true; } } } else if (tt.Term == "ПРИ") { ok = npt.Morph.Case.IsPrepositional; if (ok) { if (OrgItemTypeToken.TryAttach(tt.Next, true, null) != null) { ok = false; } else { Pullenti.Ner.ReferentToken rt = tt.Kit.ProcessReferent(Pullenti.Ner.Org.OrganizationAnalyzer.ANALYZER_NAME, tt.Next); if (rt != null) { ok = false; } } } string s = npt.Noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false); if (s == "ПОДДЕРЖКА" || s == "УЧАСТИЕ") { ok = false; } } else { ok = npt.Morph.Case.IsPrepositional; } if (ok) { res = new OrgItemNameToken(t, npt.EndToken) { Morph = npt.Morph, Value = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false), Chars = npt.Chars }; res.IsNounPhrase = true; res.Preposition = tt.Term; if (((res.Value == "ДЕЛО" || res.Value == "ВОПРОС")) && !res.IsNewlineAfter) { OrgItemNameToken res2 = _TryAttach(res.EndToken.Next, res, extOnto); if (res2 != null && res2.Morph.Case.IsGenitive) { res.Value = string.Format("{0} {1}", res.Value, res2.Value); res.EndToken = res2.EndToken; for (Pullenti.Ner.Token ttt = res2.EndToken.Next; ttt != null; ttt = ttt.Next) { if (!ttt.IsCommaAnd) { break; } OrgItemNameToken res3 = _TryAttach(ttt.Next, res2, extOnto); if (res3 == null) { break; } res.Value = string.Format("{0} {1}", res.Value, res3.Value); res.EndToken = res3.EndToken; if (ttt.IsAnd) { break; } ttt = res.EndToken; } } } } } if (res == null) { return(null); } } else if (tt.Term == "OF") { Pullenti.Ner.Token t1 = tt.Next; if (t1 != null && Pullenti.Ner.Core.MiscHelper.IsEngArticle(t1)) { t1 = t1.Next; } if (t1 != null && t1.Chars.IsLatinLetter && !t1.Chars.IsAllLower) { res = new OrgItemNameToken(t, t1) { Chars = t1.Chars, Morph = t1.Morph }; for (Pullenti.Ner.Token ttt = t1.Next; ttt != null; ttt = ttt.Next) { if (ttt.WhitespacesBeforeCount > 2) { break; } if (Pullenti.Ner.Core.MiscHelper.IsEngAdjSuffix(ttt)) { ttt = ttt.Next; continue; } if (!ttt.Chars.IsLatinLetter) { break; } if (ttt.Morph.Class.IsPreposition) { break; } t1 = (res.EndToken = ttt); } res.Value = Pullenti.Ner.Core.MiscHelper.GetTextValue(t, t1, Pullenti.Ner.Core.GetTextAttr.IgnoreArticles); res.Preposition = tt.Term; return(res); } } if (res == null) { if (tt.Chars.IsLatinLetter && tt.LengthChar == 1) { } else if (tt.Chars.IsAllLower || (tt.LengthChar < 2)) { if (!tt.Chars.IsLatinLetter || prev == null || !prev.Chars.IsLatinLetter) { return(null); } } if (tt.Chars.IsCyrillicLetter) { Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary(); if (mc.IsVerb || mc.IsAdverb) { return(null); } } else if (tt.Chars.IsLatinLetter && !tt.IsWhitespaceAfter) { if (!tt.IsWhitespaceAfter && (tt.LengthChar < 5)) { if (tt.Next is Pullenti.Ner.NumberToken) { return(null); } } } res = new OrgItemNameToken(tt, tt) { Value = tt.Term, Morph = tt.Morph }; for (t = tt.Next; t != null; t = t.Next) { if ((((t.IsHiphen || t.IsCharOf("\\/"))) && t.Next != null && (t.Next is Pullenti.Ner.TextToken)) && !t.IsWhitespaceBefore && !t.IsWhitespaceAfter) { t = t.Next; res.EndToken = t; res.Value = string.Format("{0}{1}{2}", res.Value, (t.Previous.IsChar('.') ? '.' : '-'), (t as Pullenti.Ner.TextToken).Term); } else if (t.IsChar('.')) { if (!t.IsWhitespaceAfter && !t.IsWhitespaceBefore && (t.Next is Pullenti.Ner.TextToken)) { res.EndToken = t.Next; t = t.Next; res.Value = string.Format("{0}.{1}", res.Value, (t as Pullenti.Ner.TextToken).Term); } else if ((t.Next != null && !t.IsNewlineAfter && t.Next.Chars.IsLatinLetter) && tt.Chars.IsLatinLetter) { res.EndToken = t; } else { break; } } else { break; } } } for (Pullenti.Ner.Token t0 = res.BeginToken; t0 != null; t0 = t0.Next) { if ((((tt = t0 as Pullenti.Ner.TextToken))) != null && tt.IsLetters) { if (!tt.Morph.Class.IsConjunction && !tt.Morph.Class.IsPreposition) { foreach (Pullenti.Morph.MorphBaseInfo mf in tt.Morph.Items) { if ((mf as Pullenti.Morph.MorphWordForm).IsInDictionary) { res.IsInDictionary = true; } } } } if (t0 == res.EndToken) { break; } } if (res.BeginToken == res.EndToken && res.BeginToken.Chars.IsAllUpper) { if (res.EndToken.Next != null && !res.EndToken.IsWhitespaceAfter) { Pullenti.Ner.Token t1 = res.EndToken.Next; if (t1.Next != null && !t1.IsWhitespaceAfter && t1.IsHiphen) { t1 = t1.Next; } if (t1 is Pullenti.Ner.NumberToken) { res.Value += (t1 as Pullenti.Ner.NumberToken).Value; res.EndToken = t1; } } } if (res.BeginToken == res.EndToken && res.BeginToken.Chars.IsLastLower) { string src = res.BeginToken.GetSourceText(); for (int i = src.Length - 1; i >= 0; i--) { if (char.IsUpper(src[i])) { res.Value = src.Substring(0, i + 1); break; } } } return(res); }
static Pullenti.Ner.Core.NumberExToken _correctMoney(Pullenti.Ner.Core.NumberExToken res, Pullenti.Ner.Token t1) { if (t1 == null) { return(null); } List <Pullenti.Ner.Core.TerminToken> toks = m_Postfixes.TryParseAll(t1, Pullenti.Ner.Core.TerminParseAttr.No); if (toks == null || toks.Count == 0) { return(null); } Pullenti.Ner.Token tt = toks[0].EndToken.Next; Pullenti.Ner.Referent r = (tt == null ? null : tt.GetReferent()); string alpha2 = null; if (r != null && r.TypeName == "GEO") { alpha2 = r.GetStringValue("ALPHA2"); } if (alpha2 != null && toks.Count > 0) { for (int i = toks.Count - 1; i >= 0; i--) { if (!toks[i].Termin.CanonicText.StartsWith(alpha2)) { toks.RemoveAt(i); } } if (toks.Count == 0) { toks = m_Postfixes.TryParseAll(t1, Pullenti.Ner.Core.TerminParseAttr.No); } } if (toks.Count > 1) { alpha2 = null; string str = toks[0].Termin.Terms[0].CanonicalText; if (str == "РУБЛЬ" || str == "RUBLE") { alpha2 = "RU"; } else if (str == "ДОЛЛАР" || str == "ДОЛАР" || str == "DOLLAR") { alpha2 = "US"; } else if (str == "ФУНТ" || str == "POUND") { alpha2 = "UK"; } if (alpha2 != null) { for (int i = toks.Count - 1; i >= 0; i--) { if (!toks[i].Termin.CanonicText.StartsWith(alpha2) && toks[i].Termin.CanonicText != "GBP") { toks.RemoveAt(i); } } } alpha2 = null; } if (toks.Count < 1) { return(null); } res.ExTypParam = toks[0].Termin.CanonicText; if (alpha2 != null && tt != null) { res.EndToken = tt; } tt = res.EndToken.Next; if (tt != null && tt.IsCommaAnd) { tt = tt.Next; } if ((tt is Pullenti.Ner.NumberToken) && tt.Next != null && (tt.WhitespacesAfterCount < 4)) { Pullenti.Ner.Token tt1 = tt.Next; if ((tt1 != null && tt1.IsChar('(') && (tt1.Next is Pullenti.Ner.NumberToken)) && tt1.Next.Next != null && tt1.Next.Next.IsChar(')')) { if ((tt as Pullenti.Ner.NumberToken).Value == (tt1.Next as Pullenti.Ner.NumberToken).Value) { tt1 = tt1.Next.Next.Next; } } Pullenti.Ner.Core.TerminToken tok = m_SmallMoney.TryParse(tt1, Pullenti.Ner.Core.TerminParseAttr.No); if (tok == null && tt1 != null && tt1.IsChar(')')) { tok = m_SmallMoney.TryParse(tt1.Next, Pullenti.Ner.Core.TerminParseAttr.No); } if (tok != null && (tt as Pullenti.Ner.NumberToken).IntValue != null) { int max = (int)tok.Termin.Tag; int val = (tt as Pullenti.Ner.NumberToken).IntValue.Value; if (val < max) { double f = (double)val; f /= max; double f0 = res.RealValue - ((long)res.RealValue); int re0 = (int)(((f0 * 100) + 0.0001)); if (re0 > 0 && val != re0) { res.AltRestMoney = val; } else if (f0 == 0) { res.RealValue += f; } f0 = res.AltRealValue - ((long)res.AltRealValue); re0 = (int)(((f0 * 100) + 0.0001)); if (re0 > 0 && val != re0) { res.AltRestMoney = val; } else if (f0 == 0) { res.AltRealValue += f; } res.EndToken = tok.EndToken; } } } else if ((tt is Pullenti.Ner.TextToken) && tt.IsValue("НОЛЬ", null)) { Pullenti.Ner.Core.TerminToken tok = m_SmallMoney.TryParse(tt.Next, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { res.EndToken = tok.EndToken; } } return(res); }
public static OrgItemNameToken TryAttach(Pullenti.Ner.Token t, OrgItemNameToken prev, bool extOnto, bool first) { if (t == null) { return(null); } if (t.IsValue("ОРДЕНА", null) && t.Next != null) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { Pullenti.Ner.Token t1 = npt.EndToken; if (((t1.IsValue("ЗНАК", null) || t1.IsValue("ДРУЖБА", null))) && (t1.WhitespacesAfterCount < 2)) { npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t1.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { t1 = npt.EndToken; } } return(new OrgItemNameToken(t, t1) { IsIgnoredPart = true }); } if (t.Next.GetMorphClassInDictionary().IsProperSurname) { return new OrgItemNameToken(t, t.Next) { IsIgnoredPart = true } } ; Pullenti.Ner.ReferentToken ppp = t.Kit.ProcessReferent("PERSON", t.Next); if (ppp != null) { return new OrgItemNameToken(t, ppp.EndToken) { IsIgnoredPart = true } } ; if ((t.WhitespacesAfterCount < 2) && Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(t.Next, true, false)) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t.Next, Pullenti.Ner.Core.BracketParseAttr.NearCloseBracket, 10); if (br != null && (br.LengthChar < 40)) { return new OrgItemNameToken(t, br.EndToken) { IsIgnoredPart = true } } ; } } if (first && t.Chars.IsCyrillicLetter && t.Morph.Class.IsPreposition) { if (!t.IsValue("ПО", null) && !t.IsValue("ПРИ", null)) { return(null); } } OrgItemNameToken res = _TryAttach(t, prev, extOnto); if (res == null) { if (extOnto) { if ((t.GetReferent() is Pullenti.Ner.Geo.GeoReferent) || (((t is Pullenti.Ner.TextToken) && !t.IsChar(';')))) { return new OrgItemNameToken(t, t) { Value = t.GetSourceText() } } ; } return(null); } if (prev == null && !extOnto) { if (t.Kit.Ontology != null) { Pullenti.Ner.Org.OrganizationAnalyzer.OrgAnalyzerData ad = t.Kit.Ontology._getAnalyzerData(Pullenti.Ner.Org.OrganizationAnalyzer.ANALYZER_NAME) as Pullenti.Ner.Org.OrganizationAnalyzer.OrgAnalyzerData; if (ad != null) { Pullenti.Ner.Core.TerminToken tok = ad.OrgPureNames.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null && tok.EndChar > res.EndChar) { res.EndToken = tok.EndToken; } } } } if (prev != null && !extOnto) { if ((prev.Chars.IsAllLower && !res.Chars.IsAllLower && !res.IsStdTail) && !res.IsStdName) { if (prev.Chars.IsLatinLetter && res.Chars.IsLatinLetter) { } else if (m_StdNouns.TryParse(res.BeginToken, Pullenti.Ner.Core.TerminParseAttr.No) != null) { } else { return(null); } } } if ((res.EndToken.Next != null && !res.EndToken.IsWhitespaceAfter && res.EndToken.Next.IsHiphen) && !res.EndToken.Next.IsWhitespaceAfter) { Pullenti.Ner.TextToken tt = res.EndToken.Next.Next as Pullenti.Ner.TextToken; if (tt != null) { if (tt.Chars == res.Chars || tt.Chars.IsAllUpper) { res.EndToken = tt; res.Value = string.Format("{0}-{1}", res.Value, tt.Term); } } } if ((res.EndToken.Next != null && res.EndToken.Next.IsAnd && res.EndToken.WhitespacesAfterCount == 1) && res.EndToken.Next.WhitespacesAfterCount == 1) { OrgItemNameToken res1 = _TryAttach(res.EndToken.Next.Next, prev, extOnto); if (res1 != null && res1.Chars == res.Chars && OrgItemTypeToken.TryAttach(res.EndToken.Next.Next, false, null) == null) { if (!((res1.Morph.Case & res.Morph.Case)).IsUndefined) { res.EndToken = res1.EndToken; res.Value = string.Format("{0} {1} {2}", res.Value, (res.Kit.BaseLanguage.IsUa ? "ТА" : "И"), res1.Value); } } } for (Pullenti.Ner.Token tt = res.BeginToken; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next) { if (m_StdNouns.TryParse(tt, Pullenti.Ner.Core.TerminParseAttr.No) != null) { res.StdOrgNameNouns++; } } if (m_StdNouns.TryParse(res.EndToken, Pullenti.Ner.Core.TerminParseAttr.No) != null) { int cou = 1; bool non = false; Pullenti.Ner.Token et = res.EndToken; if (!_isNotTermNoun(res.EndToken)) { non = true; } bool br = false; for (Pullenti.Ner.Token tt = res.EndToken.Next; tt != null; tt = tt.Next) { if (tt.IsTableControlChar) { break; } if (tt.IsChar('(')) { if (!non) { break; } br = true; continue; } if (tt.IsChar(')')) { br = false; et = tt; break; } if (!(tt is Pullenti.Ner.TextToken)) { break; } if (tt.WhitespacesBeforeCount > 1) { if (tt.NewlinesBeforeCount > 1) { break; } if (tt.Chars != res.EndToken.Chars) { break; } } if (tt.Morph.Class.IsPreposition || tt.IsCommaAnd) { continue; } Pullenti.Morph.MorphClass dd = tt.GetMorphClassInDictionary(); if (!dd.IsNoun && !dd.IsAdjective) { break; } Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt2 == null) { if (dd == Pullenti.Morph.MorphClass.Adjective) { continue; } break; } if (m_StdNouns.TryParse(npt2.EndToken, Pullenti.Ner.Core.TerminParseAttr.No) == null) { break; } if (npt2.EndToken.Chars != res.EndToken.Chars) { break; } if ((npt2.EndToken.IsValue("УПРАВЛЕНИЕ", null) || npt2.EndToken.IsValue("ИНСТИТУТ", null) || npt2.EndToken.IsValue("УПРАВЛІННЯ", null)) || npt2.EndToken.IsValue("ІНСТИТУТ", null) || tt.Previous.IsValue("ПРИ", null)) { Pullenti.Ner.ReferentToken rt = tt.Kit.ProcessReferent(Pullenti.Ner.Org.OrganizationAnalyzer.ANALYZER_NAME, tt); if (rt != null) { break; } } cou++; tt = npt2.EndToken; if (!_isNotTermNoun(tt)) { non = true; et = tt; } } if (non && !br) { res.StdOrgNameNouns += cou; res.EndToken = et; } } return(res); }
List <Pullenti.Ner.ReferentToken> TryAttach(List <Pullenti.Ner.Weapon.Internal.WeaponItemToken> its, bool attach) { WeaponReferent tr = new WeaponReferent(); int i; Pullenti.Ner.Token t1 = null; Pullenti.Ner.Weapon.Internal.WeaponItemToken noun = null; Pullenti.Ner.Weapon.Internal.WeaponItemToken brand = null; Pullenti.Ner.Weapon.Internal.WeaponItemToken model = null; for (i = 0; i < its.Count; i++) { if (its[i].Typ == Pullenti.Ner.Weapon.Internal.WeaponItemToken.Typs.Noun) { if (its.Count == 1) { return(null); } if (tr.FindSlot(WeaponReferent.ATTR_TYPE, null, true) != null) { if (tr.FindSlot(WeaponReferent.ATTR_TYPE, its[i].Value, true) == null) { break; } } if (!its[i].IsInternal) { noun = its[i]; } tr.AddSlot(WeaponReferent.ATTR_TYPE, its[i].Value, false, 0); if (its[i].AltValue != null) { tr.AddSlot(WeaponReferent.ATTR_TYPE, its[i].AltValue, false, 0); } t1 = its[i].EndToken; continue; } if (its[i].Typ == Pullenti.Ner.Weapon.Internal.WeaponItemToken.Typs.Brand) { if (tr.FindSlot(WeaponReferent.ATTR_BRAND, null, true) != null) { if (tr.FindSlot(WeaponReferent.ATTR_BRAND, its[i].Value, true) == null) { break; } } if (!its[i].IsInternal) { if (noun != null && noun.IsDoubt) { noun.IsDoubt = false; } } brand = its[i]; tr.AddSlot(WeaponReferent.ATTR_BRAND, its[i].Value, false, 0); t1 = its[i].EndToken; continue; } if (its[i].Typ == Pullenti.Ner.Weapon.Internal.WeaponItemToken.Typs.Model) { if (tr.FindSlot(WeaponReferent.ATTR_MODEL, null, true) != null) { if (tr.FindSlot(WeaponReferent.ATTR_MODEL, its[i].Value, true) == null) { break; } } model = its[i]; tr.AddSlot(WeaponReferent.ATTR_MODEL, its[i].Value, false, 0); if (its[i].AltValue != null) { tr.AddSlot(WeaponReferent.ATTR_MODEL, its[i].AltValue, false, 0); } t1 = its[i].EndToken; continue; } if (its[i].Typ == Pullenti.Ner.Weapon.Internal.WeaponItemToken.Typs.Name) { if (tr.FindSlot(WeaponReferent.ATTR_NAME, null, true) != null) { break; } tr.AddSlot(WeaponReferent.ATTR_NAME, its[i].Value, false, 0); if (its[i].AltValue != null) { tr.AddSlot(WeaponReferent.ATTR_NAME, its[i].AltValue, false, 0); } t1 = its[i].EndToken; continue; } if (its[i].Typ == Pullenti.Ner.Weapon.Internal.WeaponItemToken.Typs.Number) { if (tr.FindSlot(WeaponReferent.ATTR_NUMBER, null, true) != null) { break; } tr.AddSlot(WeaponReferent.ATTR_NUMBER, its[i].Value, false, 0); if (its[i].AltValue != null) { tr.AddSlot(WeaponReferent.ATTR_NUMBER, its[i].AltValue, false, 0); } t1 = its[i].EndToken; continue; } if (its[i].Typ == Pullenti.Ner.Weapon.Internal.WeaponItemToken.Typs.Caliber) { if (tr.FindSlot(WeaponReferent.ATTR_CALIBER, null, true) != null) { break; } tr.AddSlot(WeaponReferent.ATTR_CALIBER, its[i].Value, false, 0); if (its[i].AltValue != null) { tr.AddSlot(WeaponReferent.ATTR_CALIBER, its[i].AltValue, false, 0); } t1 = its[i].EndToken; continue; } if (its[i].Typ == Pullenti.Ner.Weapon.Internal.WeaponItemToken.Typs.Developer) { tr.AddSlot(WeaponReferent.ATTR_REF, its[i].Ref, false, 0); t1 = its[i].EndToken; continue; } if (its[i].Typ == Pullenti.Ner.Weapon.Internal.WeaponItemToken.Typs.Date) { if (tr.FindSlot(WeaponReferent.ATTR_DATE, null, true) != null) { break; } tr.AddSlot(WeaponReferent.ATTR_DATE, its[i].Ref, true, 0); t1 = its[i].EndToken; continue; } } bool hasGoodNoun = (noun == null ? false : !noun.IsDoubt); WeaponReferent prev = null; if (noun == null) { for (Pullenti.Ner.Token tt = its[0].BeginToken.Previous; tt != null; tt = tt.Previous) { if ((((prev = tt.GetReferent() as WeaponReferent))) != null) { List <Pullenti.Ner.Slot> addSlots = new List <Pullenti.Ner.Slot>(); foreach (Pullenti.Ner.Slot s in prev.Slots) { if (s.TypeName == WeaponReferent.ATTR_TYPE) { tr.AddSlot(s.TypeName, s.Value, false, 0); } else if (s.TypeName == WeaponReferent.ATTR_BRAND || s.TypeName == WeaponReferent.ATTR_BRAND || s.TypeName == WeaponReferent.ATTR_MODEL) { if (tr.FindSlot(s.TypeName, null, true) == null) { addSlots.Add(s); } } } foreach (Pullenti.Ner.Slot s in addSlots) { tr.AddSlot(s.TypeName, s.Value, false, 0); } hasGoodNoun = true; break; } else if ((tt is Pullenti.Ner.TextToken) && ((!tt.Chars.IsLetter || tt.Morph.Class.IsConjunction))) { } else { break; } } } if (noun == null && model != null) { int cou = 0; for (Pullenti.Ner.Token tt = its[0].BeginToken.Previous; tt != null && (cou < 100); tt = tt.Previous, cou++) { if ((((prev = tt.GetReferent() as WeaponReferent))) != null) { if (prev.FindSlot(WeaponReferent.ATTR_MODEL, model.Value, true) == null) { continue; } List <Pullenti.Ner.Slot> addSlots = new List <Pullenti.Ner.Slot>(); foreach (Pullenti.Ner.Slot s in prev.Slots) { if (s.TypeName == WeaponReferent.ATTR_TYPE) { tr.AddSlot(s.TypeName, s.Value, false, 0); } else if (s.TypeName == WeaponReferent.ATTR_BRAND || s.TypeName == WeaponReferent.ATTR_BRAND) { if (tr.FindSlot(s.TypeName, null, true) == null) { addSlots.Add(s); } } } foreach (Pullenti.Ner.Slot s in addSlots) { tr.AddSlot(s.TypeName, s.Value, false, 0); } hasGoodNoun = true; break; } } } if (hasGoodNoun) { } else if (noun != null) { if (model != null || ((brand != null && !brand.IsDoubt))) { } else { return(null); } } else { if (model == null) { return(null); } int cou = 0; bool ok = false; for (Pullenti.Ner.Token tt = t1.Previous; tt != null && (cou < 20); tt = tt.Previous, cou++) { if ((tt.IsValue("ОРУЖИЕ", null) || tt.IsValue("ВООРУЖЕНИЕ", null) || tt.IsValue("ВЫСТРЕЛ", null)) || tt.IsValue("ВЫСТРЕЛИТЬ", null)) { ok = true; break; } } if (!ok) { return(null); } } List <Pullenti.Ner.ReferentToken> res = new List <Pullenti.Ner.ReferentToken>(); res.Add(new Pullenti.Ner.ReferentToken(tr, its[0].BeginToken, t1)); return(res); }
public static NounPhraseItem TryParse(Pullenti.Ner.Token t, List <NounPhraseItem> items, Pullenti.Ner.Core.NounPhraseParseAttr attrs) { if (t == null) { return(null); } Pullenti.Ner.Token t0 = t; bool _canBeSurname = false; bool _isDoubtAdj = false; Pullenti.Ner.ReferentToken rt = t as Pullenti.Ner.ReferentToken; if (rt != null && rt.BeginToken == rt.EndToken && (rt.BeginToken is Pullenti.Ner.TextToken)) { NounPhraseItem res = TryParse(rt.BeginToken, items, attrs); if (res != null) { res.BeginToken = (res.EndToken = t); res.CanBeNoun = true; return(res); } } if (rt != null) { NounPhraseItem res = new NounPhraseItem(t, t); foreach (Pullenti.Morph.MorphBaseInfo m in t.Morph.Items) { NounPhraseItemTextVar v = new NounPhraseItemTextVar(m, null); v.NormalValue = t.GetReferent().ToString(); res.NounMorph.Add(v); } res.CanBeNoun = true; return(res); } if (t is Pullenti.Ner.NumberToken) { } bool hasLegalVerb = false; if (t is Pullenti.Ner.TextToken) { if (!t.Chars.IsLetter) { return(null); } string str = (t as Pullenti.Ner.TextToken).Term; if (str[str.Length - 1] == 'А' || str[str.Length - 1] == 'О') { foreach (Pullenti.Morph.MorphBaseInfo wf in t.Morph.Items) { if ((wf is Pullenti.Morph.MorphWordForm) && (wf as Pullenti.Morph.MorphWordForm).IsInDictionary) { if (wf.Class.IsVerb) { Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary(); if (!mc.IsNoun && ((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.IgnoreParticiples)) == Pullenti.Ner.Core.NounPhraseParseAttr.No) { if (!Pullenti.Morph.LanguageHelper.EndsWithEx(str, "ОГО", "ЕГО", null, null)) { return(null); } } hasLegalVerb = true; } if (wf.Class.IsAdverb) { if (t.Next == null || !t.Next.IsHiphen) { if ((str == "ВСЕГО" || str == "ДОМА" || str == "НЕСКОЛЬКО") || str == "МНОГО" || str == "ПОРЯДКА") { } else { return(null); } } } if (wf.Class.IsAdjective) { if (wf.ContainsAttr("к.ф.", null)) { if (t.GetMorphClassInDictionary() == Pullenti.Morph.MorphClass.Adjective) { } else { _isDoubtAdj = true; } } } } } } Pullenti.Morph.MorphClass mc0 = t.Morph.Class; if (mc0.IsProperSurname && !t.Chars.IsAllLower) { foreach (Pullenti.Morph.MorphBaseInfo wf in t.Morph.Items) { if (wf.Class.IsProperSurname && wf.Number != Pullenti.Morph.MorphNumber.Plural) { Pullenti.Morph.MorphWordForm wff = wf as Pullenti.Morph.MorphWordForm; if (wff == null) { continue; } string s = ((wff.NormalFull ?? wff.NormalCase)) ?? ""; if (Pullenti.Morph.LanguageHelper.EndsWithEx(s, "ИН", "ЕН", "ЫН", null)) { if (!wff.IsInDictionary) { _canBeSurname = true; } else { return(null); } } if (wff.IsInDictionary && Pullenti.Morph.LanguageHelper.EndsWith(s, "ОВ")) { _canBeSurname = true; } } } } if (mc0.IsProperName && !t.Chars.IsAllLower) { foreach (Pullenti.Morph.MorphBaseInfo wff in t.Morph.Items) { Pullenti.Morph.MorphWordForm wf = wff as Pullenti.Morph.MorphWordForm; if (wf == null) { continue; } if (wf.NormalCase == "ГОР") { continue; } if (wf.Class.IsProperName && wf.IsInDictionary) { if (wf.NormalCase == null || !wf.NormalCase.StartsWith("ЛЮБ")) { if (mc0.IsAdjective && t.Morph.ContainsAttr("неизм.", null)) { } else if (((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.ReferentCanBeNoun)) == Pullenti.Ner.Core.NounPhraseParseAttr.ReferentCanBeNoun) { } else { if (items == null || (items.Count < 1)) { return(null); } if (!items[0].IsStdAdjective) { return(null); } } } } } } if (mc0.IsAdjective && t.Morph.ItemsCount == 1) { if (t.Morph[0].ContainsAttr("в.ср.ст.", null)) { return(null); } } Pullenti.Morph.MorphClass mc1 = t.GetMorphClassInDictionary(); if (mc1 == Pullenti.Morph.MorphClass.Verb && t.Morph.Case.IsUndefined) { return(null); } if ((((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.IgnoreParticiples)) == Pullenti.Ner.Core.NounPhraseParseAttr.IgnoreParticiples && t.Morph.Class.IsVerb && !t.Morph.Class.IsNoun) && !t.Morph.Class.IsProper) { foreach (Pullenti.Morph.MorphBaseInfo wf in t.Morph.Items) { if (wf.Class.IsVerb) { if (wf.ContainsAttr("дейст.з.", null)) { if (Pullenti.Morph.LanguageHelper.EndsWith((t as Pullenti.Ner.TextToken).Term, "СЯ")) { } else { return(null); } } } } } } Pullenti.Ner.Token t1 = null; for (int k = 0; k < 2; k++) { t = t1 ?? t0; if (k == 0) { if (((t0 is Pullenti.Ner.TextToken) && t0.Next != null && t0.Next.IsHiphen) && t0.Next.Next != null) { if (!t0.IsWhitespaceAfter && !t0.Morph.Class.IsPronoun && !(t0.Next.Next is Pullenti.Ner.NumberToken)) { if (!t0.Next.IsWhitespaceAfter) { t = t0.Next.Next; } else if (t0.Next.Next.Chars.IsAllLower && Pullenti.Morph.LanguageHelper.EndsWith((t0 as Pullenti.Ner.TextToken).Term, "О")) { t = t0.Next.Next; } } } } NounPhraseItem it = new NounPhraseItem(t0, t) { CanBeSurname = _canBeSurname }; if (t0 == t && (t0 is Pullenti.Ner.ReferentToken)) { it.CanBeNoun = true; it.Morph = new Pullenti.Ner.MorphCollection(t0.Morph); } bool canBePrepos = false; foreach (Pullenti.Morph.MorphBaseInfo v in t.Morph.Items) { Pullenti.Morph.MorphWordForm wf = v as Pullenti.Morph.MorphWordForm; if (v.Class.IsVerb && !v.Case.IsUndefined) { it.CanBeAdj = true; it.AdjMorph.Add(new NounPhraseItemTextVar(v, t)); continue; } if (v.Class.IsPreposition) { canBePrepos = true; } if (v.Class.IsAdjective || ((v.Class.IsPronoun && !v.Class.IsPersonalPronoun && !v.ContainsAttr("неизм.", null))) || ((v.Class.IsNoun && (t is Pullenti.Ner.NumberToken)))) { if (TryAccordVariant(items, (items == null ? 0 : items.Count), v, false)) { bool isDoub = false; if (v.ContainsAttr("к.ф.", null)) { continue; } if (v.ContainsAttr("собир.", null) && !(t is Pullenti.Ner.NumberToken)) { if (wf != null && wf.IsInDictionary) { return(null); } continue; } if (v.ContainsAttr("сравн.", null)) { continue; } bool ok = true; if (t is Pullenti.Ner.TextToken) { string s = (t as Pullenti.Ner.TextToken).Term; if (s == "ПРАВО" || s == "ПРАВА") { ok = false; } else if (Pullenti.Morph.LanguageHelper.EndsWith(s, "ОВ") && t.GetMorphClassInDictionary().IsNoun) { ok = false; } } else if (t is Pullenti.Ner.NumberToken) { if (v.Class.IsNoun && t.Morph.Class.IsAdjective) { ok = false; } else if (t.Morph.Class.IsNoun && ((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.ParseNumericAsAdjective)) == Pullenti.Ner.Core.NounPhraseParseAttr.No) { ok = false; } } if (ok) { it.AdjMorph.Add(new NounPhraseItemTextVar(v, t)); it.CanBeAdj = true; if (_isDoubtAdj && t0 == t) { it.IsDoubtAdjective = true; } if (hasLegalVerb && wf != null && wf.IsInDictionary) { it.CanBeNoun = true; } if (wf != null && wf.Class.IsPronoun) { it.CanBeNoun = true; it.NounMorph.Add(new NounPhraseItemTextVar(v, t)); } } } } bool canBeNoun = false; if (t is Pullenti.Ner.NumberToken) { } else if (v.Class.IsNoun || ((wf != null && wf.NormalCase == "САМ"))) { canBeNoun = true; } else if (v.Class.IsPersonalPronoun) { if (items == null || items.Count == 0) { canBeNoun = true; } else { foreach (NounPhraseItem it1 in items) { if (it1.IsVerb) { if (items.Count == 1 && !v.Case.IsNominative) { canBeNoun = true; } else { return(null); } } } if (items.Count == 1) { if (items[0].CanBeAdjForPersonalPronoun) { canBeNoun = true; } } } } else if ((v.Class.IsPronoun && ((items == null || items.Count == 0 || ((items.Count == 1 && items[0].CanBeAdjForPersonalPronoun)))) && wf != null) && (((((wf.NormalCase == "ТОТ" || wf.NormalFull == "ТО" || wf.NormalCase == "ТО") || wf.NormalCase == "ЭТО" || wf.NormalCase == "ВСЕ") || wf.NormalCase == "ЧТО" || wf.NormalCase == "КТО") || wf.NormalFull == "КОТОРЫЙ" || wf.NormalCase == "КОТОРЫЙ"))) { if (wf.NormalCase == "ВСЕ") { if (t.Next != null && t.Next.IsValue("РАВНО", null)) { return(null); } } canBeNoun = true; } else if (wf != null && ((wf.NormalFull ?? wf.NormalCase)) == "КОТОРЫЙ" && ((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.ParsePronouns)) == Pullenti.Ner.Core.NounPhraseParseAttr.No) { return(null); } else if (v.Class.IsProper && (t is Pullenti.Ner.TextToken)) { if (t.LengthChar > 4 || v.Class.IsProperName) { canBeNoun = true; } } if (canBeNoun) { bool added = false; if (items != null && items.Count > 1 && ((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.MultiNouns)) != Pullenti.Ner.Core.NounPhraseParseAttr.No) { bool ok1 = true; for (int ii = 1; ii < items.Count; ii++) { if (!items[ii].ConjBefore) { ok1 = false; break; } } if (ok1) { if (TryAccordVariant(items, (items == null ? 0 : items.Count), v, true)) { it.NounMorph.Add(new NounPhraseItemTextVar(v, t)); it.CanBeNoun = true; it.MultiNouns = true; added = true; } } } if (!added) { if (TryAccordVariant(items, (items == null ? 0 : items.Count), v, false)) { it.NounMorph.Add(new NounPhraseItemTextVar(v, t)); it.CanBeNoun = true; if (v.Class.IsPersonalPronoun && t.Morph.ContainsAttr("неизм.", null) && !it.CanBeAdj) { NounPhraseItemTextVar itt = new NounPhraseItemTextVar(v, t); itt.Case = Pullenti.Morph.MorphCase.AllCases; itt.Number = Pullenti.Morph.MorphNumber.Undefined; if (itt.NormalValue == null) { } it.AdjMorph.Add(itt); it.CanBeAdj = true; } } else if ((items.Count > 0 && items[0].AdjMorph.Count > 0 && items[0].AdjMorph[0].Number == Pullenti.Morph.MorphNumber.Plural) && !((items[0].AdjMorph[0].Case & v.Case)).IsUndefined && !items[0].AdjMorph[0].Class.IsVerb) { if (t.Next != null && t.Next.IsCommaAnd && (t.Next.Next is Pullenti.Ner.TextToken)) { Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next.Next, attrs, 0, null); if (npt2 != null && npt2.Preposition == null && !((npt2.Morph.Case & v.Case & items[0].AdjMorph[0].Case)).IsUndefined) { it.NounMorph.Add(new NounPhraseItemTextVar(v, t)); it.CanBeNoun = true; } } } } } } if (t0 != t) { foreach (NounPhraseItemTextVar v in it.AdjMorph) { v.CorrectPrefix(t0 as Pullenti.Ner.TextToken, false); } foreach (NounPhraseItemTextVar v in it.NounMorph) { v.CorrectPrefix(t0 as Pullenti.Ner.TextToken, true); } } if (k == 1 && it.CanBeNoun && !it.CanBeAdj) { if (t1 != null) { it.EndToken = t1; } else { it.EndToken = t0.Next.Next; } foreach (NounPhraseItemTextVar v in it.NounMorph) { if (v.NormalValue != null && (v.NormalValue.IndexOf('-') < 0)) { v.NormalValue = string.Format("{0}-{1}", v.NormalValue, it.EndToken.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false)); } } } if (it.CanBeAdj) { if (m_StdAdjectives.TryParse(it.BeginToken, Pullenti.Ner.Core.TerminParseAttr.No) != null) { it.IsStdAdjective = true; } } if (canBePrepos && it.CanBeNoun) { if (items != null && items.Count > 0) { Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition | Pullenti.Ner.Core.NounPhraseParseAttr.ParsePronouns | Pullenti.Ner.Core.NounPhraseParseAttr.ParseVerbs, 0, null); if (npt1 != null && npt1.EndChar > t.EndChar) { return(null); } } else { Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePronouns | Pullenti.Ner.Core.NounPhraseParseAttr.ParseVerbs, 0, null); if (npt1 != null) { Pullenti.Morph.MorphCase mc = Pullenti.Morph.LanguageHelper.GetCaseAfterPreposition((t as Pullenti.Ner.TextToken).Lemma); if (!((mc & npt1.Morph.Case)).IsUndefined) { return(null); } } } } if (it.CanBeNoun || it.CanBeAdj || k == 1) { if (it.BeginToken.Morph.Class.IsPronoun) { Pullenti.Ner.Token tt2 = it.EndToken.Next; if ((tt2 != null && tt2.IsHiphen && !tt2.IsWhitespaceAfter) && !tt2.IsWhitespaceBefore) { tt2 = tt2.Next; } if (tt2 is Pullenti.Ner.TextToken) { string ss = (tt2 as Pullenti.Ner.TextToken).Term; if ((ss == "ЖЕ" || ss == "БЫ" || ss == "ЛИ") || ss == "Ж") { it.EndToken = tt2; } else if (ss == "НИБУДЬ" || ss == "ЛИБО" || (((ss == "ТО" && tt2.Previous.IsHiphen)) && it.CanBeAdj)) { it.EndToken = tt2; foreach (NounPhraseItemTextVar m in it.AdjMorph) { m.NormalValue = string.Format("{0}-{1}", m.NormalValue, ss); if (m.SingleNumberValue != null) { m.SingleNumberValue = string.Format("{0}-{1}", m.SingleNumberValue, ss); } } } } } return(it); } if (t0 == t) { if (t0.IsValue("БИЗНЕС", null) && t0.Next != null && t0.Next.Chars == t0.Chars) { t1 = t0.Next; continue; } return(it); } } return(null); }
public static BookLinkToken TryParseAuthor(Pullenti.Ner.Token t, Pullenti.Ner.Person.Internal.FioTemplateType prevPersTemplate = Pullenti.Ner.Person.Internal.FioTemplateType.Undefined) { if (t == null) { return(null); } Pullenti.Ner.ReferentToken rtp = Pullenti.Ner.Person.Internal.PersonItemToken.TryParsePerson(t, prevPersTemplate); if (rtp != null) { BookLinkToken re; if (rtp.Data == null) { re = new BookLinkToken(t, (rtp == t ? t : rtp.EndToken)) { Typ = BookLinkTyp.Person, Ref = rtp.Referent } } ; else { re = new BookLinkToken(t, rtp.EndToken) { Typ = BookLinkTyp.Person, Tok = rtp } }; re.PersonTemplate = (Pullenti.Ner.Person.Internal.FioTemplateType)rtp.MiscAttrs; for (Pullenti.Ner.Token tt = rtp.BeginToken; tt != null && tt.EndChar <= rtp.EndChar; tt = tt.Next) { if (!(tt.GetReferent() is Pullenti.Ner.Person.PersonPropertyReferent)) { continue; } Pullenti.Ner.ReferentToken rt = tt as Pullenti.Ner.ReferentToken; if (rt.BeginToken.Chars.IsCapitalUpper && tt != rtp.BeginToken) { re.StartOfName = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(rt, Pullenti.Ner.Core.GetTextAttr.KeepRegister); break; } return(null); } return(re); } if (t.IsChar('[')) { BookLinkToken re = TryParseAuthor(t.Next, Pullenti.Ner.Person.Internal.FioTemplateType.Undefined); if (re != null && re.EndToken.Next != null && re.EndToken.Next.IsChar(']')) { re.BeginToken = t; re.EndToken = re.EndToken.Next; return(re); } } if (((t.IsValue("И", null) || t.IsValue("ET", null))) && t.Next != null) { if (t.Next.IsValue("ДРУГИЕ", null) || t.Next.IsValue("ДР", null) || t.Next.IsValue("AL", null)) { BookLinkToken res = new BookLinkToken(t, t.Next) { Typ = BookLinkTyp.AndOthers }; if (t.Next.Next != null && t.Next.Next.IsChar('.')) { res.EndToken = res.EndToken.Next; } return(res); } } return(null); }
// Основная функция выделения телефонов public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this); bool hasDenoms = false; foreach (Pullenti.Ner.Analyzer a in kit.Processor.Analyzers) { if ((a is Pullenti.Ner.Denomination.DenominationAnalyzer) && !a.IgnoreThisAnalyzer) { hasDenoms = true; } } if (!hasDenoms) { Pullenti.Ner.Denomination.DenominationAnalyzer a = new Pullenti.Ner.Denomination.DenominationAnalyzer(); a.Process(kit); } List <KeywordReferent> li = new List <KeywordReferent>(); StringBuilder tmp = new StringBuilder(); List <string> tmp2 = new List <string>(); int max = 0; for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { max++; } int cur = 0; for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next, cur++) { Pullenti.Ner.Referent r = t.GetReferent(); if (r != null) { t = this._addReferents(ad, t, cur, max); continue; } if (!(t is Pullenti.Ner.TextToken)) { continue; } if (!t.Chars.IsLetter || (t.LengthChar < 3)) { continue; } string term = (t as Pullenti.Ner.TextToken).Term; if (term == "ЕСТЬ") { if ((t.Previous is Pullenti.Ner.TextToken) && t.Previous.Morph.Class.IsVerb) { } else { continue; } } Pullenti.Ner.Core.NounPhraseToken npt = null; npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.AdjectiveCanBeLast | Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition, 0, null); if (npt == null) { Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary(); if (mc.IsVerb && !mc.IsPreposition) { if ((t as Pullenti.Ner.TextToken).IsVerbBe) { continue; } if (t.IsValue("МОЧЬ", null) || t.IsValue("WOULD", null)) { continue; } KeywordReferent kref = new KeywordReferent() { Typ = KeywordType.Predicate }; string norm = t.GetNormalCaseText(Pullenti.Morph.MorphClass.Verb, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false); if (norm == null) { norm = (t as Pullenti.Ner.TextToken).Lemma; } if (norm.EndsWith("ЬСЯ")) { norm = norm.Substring(0, norm.Length - 2); } kref.AddSlot(KeywordReferent.ATTR_VALUE, norm, false, 0); List <Pullenti.Semantic.Utils.DerivateGroup> drv = Pullenti.Semantic.Utils.DerivateService.FindDerivates(norm, true, t.Morph.Language); _addNormals(kref, drv, norm); kref = ad.RegisterReferent(kref) as KeywordReferent; _setRank(kref, cur, max); Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(ad.RegisterReferent(kref), t, t) { Morph = t.Morph }; kit.EmbedToken(rt1); t = rt1; continue; } continue; } if (npt.InternalNoun != null) { continue; } if (npt.EndToken.IsValue("ЦЕЛОМ", null) || npt.EndToken.IsValue("ЧАСТНОСТИ", null)) { if (npt.Preposition != null) { t = npt.EndToken; continue; } } if (npt.EndToken.IsValue("СТОРОНЫ", null) && npt.Preposition != null && npt.Preposition.Normal == "С") { t = npt.EndToken; continue; } if (npt.BeginToken == npt.EndToken) { Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary(); if (mc.IsPreposition) { continue; } else if (mc.IsAdverb) { if (t.IsValue("ПОТОМ", null)) { continue; } } } else { } li.Clear(); Pullenti.Ner.Token t0 = t; for (Pullenti.Ner.Token tt = t; tt != null && tt.EndChar <= npt.EndChar; tt = tt.Next) { if (!(tt is Pullenti.Ner.TextToken)) { continue; } if (tt.IsValue("NATURAL", null)) { } if ((tt.LengthChar < 3) || !tt.Chars.IsLetter) { continue; } Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary(); if ((mc.IsPreposition || mc.IsPronoun || mc.IsPersonalPronoun) || mc.IsConjunction) { if (tt.IsValue("ОТНОШЕНИЕ", null)) { } else { continue; } } if (mc.IsMisc) { if (Pullenti.Ner.Core.MiscHelper.IsEngArticle(tt)) { continue; } } KeywordReferent kref = new KeywordReferent() { Typ = KeywordType.Object }; string norm = (tt as Pullenti.Ner.TextToken).Lemma; kref.AddSlot(KeywordReferent.ATTR_VALUE, norm, false, 0); if (norm != "ЕСТЬ") { List <Pullenti.Semantic.Utils.DerivateGroup> drv = Pullenti.Semantic.Utils.DerivateService.FindDerivates(norm, true, tt.Morph.Language); _addNormals(kref, drv, norm); } kref = ad.RegisterReferent(kref) as KeywordReferent; _setRank(kref, cur, max); Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(kref, tt, tt) { Morph = tt.Morph }; kit.EmbedToken(rt1); if (tt == t && li.Count == 0) { t0 = rt1; } t = rt1; li.Add(kref); } if (li.Count > 1) { KeywordReferent kref = new KeywordReferent() { Typ = KeywordType.Object }; tmp.Length = 0; tmp2.Clear(); bool hasNorm = false; foreach (KeywordReferent kw in li) { string s = kw.GetStringValue(KeywordReferent.ATTR_VALUE); if (tmp.Length > 0) { tmp.Append(' '); } tmp.Append(s); string n = kw.GetStringValue(KeywordReferent.ATTR_NORMAL); if (n != null) { hasNorm = true; tmp2.Add(n); } else { tmp2.Add(s); } kref.AddSlot(KeywordReferent.ATTR_REF, kw, false, 0); } string val = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false); kref.AddSlot(KeywordReferent.ATTR_VALUE, val, false, 0); tmp.Length = 0; tmp2.Sort(); foreach (string s in tmp2) { if (tmp.Length > 0) { tmp.Append(' '); } tmp.Append(s); } string norm = tmp.ToString(); if (norm != val) { kref.AddSlot(KeywordReferent.ATTR_NORMAL, norm, false, 0); } kref = ad.RegisterReferent(kref) as KeywordReferent; _setRank(kref, cur, max); Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(kref, t0, t) { Morph = npt.Morph }; kit.EmbedToken(rt1); t = rt1; } } cur = 0; for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next, cur++) { KeywordReferent kw = t.GetReferent() as KeywordReferent; if (kw == null || kw.Typ != KeywordType.Object) { continue; } if (t.Next == null || kw.ChildWords > 2) { continue; } Pullenti.Ner.Token t1 = t.Next; if (t1.IsValue("OF", null) && (t1.WhitespacesAfterCount < 3) && t1.Next != null) { t1 = t1.Next; if ((t1 is Pullenti.Ner.TextToken) && Pullenti.Ner.Core.MiscHelper.IsEngArticle(t1) && t1.Next != null) { t1 = t1.Next; } } else if (!t1.Morph.Case.IsGenitive || t.WhitespacesAfterCount > 1) { continue; } KeywordReferent kw2 = t1.GetReferent() as KeywordReferent; if (kw2 == null) { continue; } if (kw == kw2) { continue; } if (kw2.Typ != KeywordType.Object || (kw.ChildWords + kw2.ChildWords) > 3) { continue; } KeywordReferent kwUn = new KeywordReferent(); kwUn.Union(kw, kw2, Pullenti.Ner.Core.MiscHelper.GetTextValue(t1, t1, Pullenti.Ner.Core.GetTextAttr.No)); kwUn = ad.RegisterReferent(kwUn) as KeywordReferent; _setRank(kwUn, cur, max); Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(kwUn, t, t1) { Morph = t.Morph }; kit.EmbedToken(rt1); t = rt1; } if (SortKeywordsByRank) { List <Pullenti.Ner.Referent> all = new List <Pullenti.Ner.Referent>(ad.Referents); all.Sort(new CompByRank()); ad.Referents = all; } if (AnnotationMaxSentences > 0) { KeywordReferent ano = Pullenti.Ner.Keyword.Internal.AutoannoSentToken.CreateAnnotation(kit, AnnotationMaxSentences); if (ano != null) { ad.RegisterReferent(ano); } } }
public static NamedItemToken TryParse(Pullenti.Ner.Token t, Pullenti.Ner.Core.IntOntologyCollection locOnto) { if (t == null) { return(null); } if (t is Pullenti.Ner.ReferentToken) { Pullenti.Ner.Referent r = t.GetReferent(); if ((r.TypeName == "PERSON" || r.TypeName == "PERSONPROPERTY" || (r is Pullenti.Ner.Geo.GeoReferent)) || r.TypeName == "ORGANIZATION") { return new NamedItemToken(t, t) { Ref = r, Morph = t.Morph } } ; return(null); } Pullenti.Ner.Core.TerminToken typ = m_Types.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); Pullenti.Ner.Core.TerminToken nam = m_Names.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (typ != null) { if (!(t is Pullenti.Ner.TextToken)) { return(null); } NamedItemToken res = new NamedItemToken(typ.BeginToken, typ.EndToken) { Morph = typ.Morph, Chars = typ.Chars }; res.Kind = (Pullenti.Ner.Named.NamedEntityKind)typ.Termin.Tag; res.TypeValue = typ.Termin.CanonicText; if ((nam != null && nam.EndToken == typ.EndToken && !t.Chars.IsAllLower) && ((Pullenti.Ner.Named.NamedEntityKind)nam.Termin.Tag) == res.Kind) { res.NameValue = nam.Termin.CanonicText; res.IsWellknown = true; } return(res); } if (nam != null) { if (nam.BeginToken.Chars.IsAllLower) { return(null); } NamedItemToken res = new NamedItemToken(nam.BeginToken, nam.EndToken) { Morph = nam.Morph, Chars = nam.Chars }; res.Kind = (Pullenti.Ner.Named.NamedEntityKind)nam.Termin.Tag; res.NameValue = nam.Termin.CanonicText; bool ok = true; if (!t.IsWhitespaceBefore && t.Previous != null) { ok = false; } else if (!t.IsWhitespaceAfter && t.Next != null) { if (t.Next.IsCharOf(",.;!?") && t.Next.IsWhitespaceAfter) { } else { ok = false; } } if (ok) { res.IsWellknown = true; res.TypeValue = nam.Termin.Tag2 as string; } return(res); } Pullenti.Ner.MetaToken adj = Pullenti.Ner.Geo.Internal.MiscLocationHelper.TryAttachNordWest(t); if (adj != null) { if (adj.Morph.Class.IsNoun) { if (adj.EndToken.IsValue("ВОСТОК", null)) { if (adj.BeginToken == adj.EndToken) { return(null); } NamedItemToken re = new NamedItemToken(t, adj.EndToken) { Morph = adj.Morph }; re.Kind = Pullenti.Ner.Named.NamedEntityKind.Location; re.NameValue = Pullenti.Ner.Core.MiscHelper.GetTextValue(t, adj.EndToken, Pullenti.Ner.Core.GetTextAttr.FirstNounGroupToNominative); re.IsWellknown = true; return(re); } return(null); } if (adj.WhitespacesAfterCount > 2) { return(null); } if ((adj.EndToken.Next is Pullenti.Ner.ReferentToken) && (adj.EndToken.Next.GetReferent() is Pullenti.Ner.Geo.GeoReferent)) { NamedItemToken re = new NamedItemToken(t, adj.EndToken.Next) { Morph = adj.EndToken.Next.Morph }; re.Kind = Pullenti.Ner.Named.NamedEntityKind.Location; re.NameValue = Pullenti.Ner.Core.MiscHelper.GetTextValue(t, adj.EndToken.Next, Pullenti.Ner.Core.GetTextAttr.FirstNounGroupToNominative); re.IsWellknown = true; re.Ref = adj.EndToken.Next.GetReferent(); return(re); } NamedItemToken res = TryParse(adj.EndToken.Next, locOnto); if (res != null && res.Kind == Pullenti.Ner.Named.NamedEntityKind.Location) { string s = adj.GetNormalCaseText(Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphNumber.Singular, res.Morph.Gender, false); if (s != null) { if (res.NameValue == null) { res.NameValue = s.ToUpper(); } else { res.NameValue = string.Format("{0} {1}", s.ToUpper(), res.NameValue); res.TypeValue = null; } res.BeginToken = t; res.Chars = t.Chars; res.IsWellknown = true; return(res); } } } if (t.Chars.IsCapitalUpper && !Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t)) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && npt.Adjectives.Count > 0) { NamedItemToken test = TryParse(npt.Noun.BeginToken, locOnto); if (test != null && test.EndToken == npt.EndToken && test.TypeValue != null) { test.BeginToken = t; StringBuilder tmp = new StringBuilder(); foreach (Pullenti.Ner.MetaToken a in npt.Adjectives) { string s = a.GetNormalCaseText(Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphNumber.Singular, test.Morph.Gender, false); if (tmp.Length > 0) { tmp.Append(' '); } tmp.Append(s); } test.NameValue = tmp.ToString(); test.Chars = t.Chars; if (test.Kind == Pullenti.Ner.Named.NamedEntityKind.Location) { test.IsWellknown = true; } return(test); } } } if ((Pullenti.Ner.Core.BracketHelper.IsBracket(t, true) && t.Next != null && t.Next.Chars.IsLetter) && !t.Next.Chars.IsAllLower) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { NamedItemToken res = new NamedItemToken(t, br.EndToken); res.IsInBracket = true; res.NameValue = Pullenti.Ner.Core.MiscHelper.GetTextValue(t, br.EndToken, Pullenti.Ner.Core.GetTextAttr.No); nam = m_Names.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No); if (nam != null && nam.EndToken == br.EndToken.Previous) { res.Kind = (Pullenti.Ner.Named.NamedEntityKind)nam.Termin.Tag; res.IsWellknown = true; res.NameValue = nam.Termin.CanonicText; } return(res); } } if (((t is Pullenti.Ner.TextToken) && t.Chars.IsLetter && !t.Chars.IsAllLower) && t.LengthChar > 2) { NamedItemToken res = new NamedItemToken(t, t) { Morph = t.Morph }; string str = (t as Pullenti.Ner.TextToken).Term; if (str.EndsWith("О") || str.EndsWith("И") || str.EndsWith("Ы")) { res.NameValue = str; } else { res.NameValue = t.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false); } res.Chars = t.Chars; if (((!t.IsWhitespaceAfter && t.Next != null && t.Next.IsHiphen) && (t.Next.Next is Pullenti.Ner.TextToken) && !t.Next.Next.IsWhitespaceAfter) && t.Chars.IsCyrillicLetter == t.Next.Next.Chars.IsCyrillicLetter) { t = (res.EndToken = t.Next.Next); res.NameValue = string.Format("{0}-{1}", res.NameValue, t.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false)); } return(res); } return(null); }
Pullenti.Ner.Token _addReferents(Pullenti.Ner.Core.AnalyzerData ad, Pullenti.Ner.Token t, int cur, int max) { if (!(t is Pullenti.Ner.ReferentToken)) { return(t); } Pullenti.Ner.Referent r = t.GetReferent(); if (r == null) { return(t); } if (r is Pullenti.Ner.Denomination.DenominationReferent) { Pullenti.Ner.Denomination.DenominationReferent dr = r as Pullenti.Ner.Denomination.DenominationReferent; KeywordReferent kref0 = new KeywordReferent() { Typ = KeywordType.Referent }; foreach (Pullenti.Ner.Slot s in dr.Slots) { if (s.TypeName == Pullenti.Ner.Denomination.DenominationReferent.ATTR_VALUE) { kref0.AddSlot(KeywordReferent.ATTR_NORMAL, s.Value, false, 0); } } kref0.AddSlot(KeywordReferent.ATTR_REF, dr, false, 0); Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(ad.RegisterReferent(kref0), t, t); t.Kit.EmbedToken(rt0); return(rt0); } if ((r is Pullenti.Ner.Phone.PhoneReferent) || (r is Pullenti.Ner.Uri.UriReferent) || (r is Pullenti.Ner.Bank.BankDataReferent)) { return(t); } if (r is Pullenti.Ner.Money.MoneyReferent) { Pullenti.Ner.Money.MoneyReferent mr = r as Pullenti.Ner.Money.MoneyReferent; KeywordReferent kref0 = new KeywordReferent() { Typ = KeywordType.Object }; kref0.AddSlot(KeywordReferent.ATTR_NORMAL, mr.Currency, false, 0); Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(ad.RegisterReferent(kref0), t, t); t.Kit.EmbedToken(rt0); return(rt0); } if (r.TypeName == "DATE" || r.TypeName == "DATERANGE" || r.TypeName == "BOOKLINKREF") { return(t); } for (Pullenti.Ner.Token tt = (t as Pullenti.Ner.MetaToken).BeginToken; tt != null && tt.EndChar <= t.EndChar; tt = tt.Next) { if (tt is Pullenti.Ner.ReferentToken) { this._addReferents(ad, tt, cur, max); } } KeywordReferent kref = new KeywordReferent() { Typ = KeywordType.Referent }; string norm = null; if (r.TypeName == "GEO") { norm = r.GetStringValue("ALPHA2"); } if (norm == null) { norm = r.ToString(true, null, 0); } if (norm != null) { kref.AddSlot(KeywordReferent.ATTR_NORMAL, norm.ToUpper(), false, 0); } kref.AddSlot(KeywordReferent.ATTR_REF, t.GetReferent(), false, 0); _setRank(kref, cur, max); Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(ad.RegisterReferent(kref), t, t); t.Kit.EmbedToken(rt1); return(rt1); }
static WeaponItemToken _TryParse(Pullenti.Ner.Token t, WeaponItemToken prev, bool afterConj, bool attachHigh = false) { if (t == null) { return(null); } if (Pullenti.Ner.Core.BracketHelper.IsBracket(t, true)) { WeaponItemToken wit = _TryParse(t.Next, prev, afterConj, attachHigh); if (wit != null) { if (wit.EndToken.Next == null) { wit.BeginToken = t; return(wit); } if (Pullenti.Ner.Core.BracketHelper.IsBracket(wit.EndToken.Next, true)) { wit.BeginToken = t; wit.EndToken = wit.EndToken.Next; return(wit); } } } Pullenti.Ner.Core.TerminToken tok = m_Ontology.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { WeaponItemToken res = new WeaponItemToken(t, tok.EndToken); res.Typ = (Typs)tok.Termin.Tag; if (res.Typ == Typs.Noun) { res.Value = tok.Termin.CanonicText; if (tok.Termin.Tag2 != null) { res.IsDoubt = true; } for (Pullenti.Ner.Token tt = res.EndToken.Next; tt != null; tt = tt.Next) { if (tt.WhitespacesBeforeCount > 2) { break; } WeaponItemToken wit = _TryParse(tt, null, false, false); if (wit != null) { if (wit.Typ == Typs.Brand) { res.InnerTokens.Add(wit); res.EndToken = (tt = wit.EndToken); continue; } break; } if (!(tt is Pullenti.Ner.TextToken)) { break; } Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary(); if (mc == Pullenti.Morph.MorphClass.Adjective) { if (res.AltValue == null) { res.AltValue = res.Value; } if (res.AltValue.EndsWith(res.Value)) { res.AltValue = res.AltValue.Substring(0, res.AltValue.Length - res.Value.Length); } res.AltValue = string.Format("{0}{1} {2}", res.AltValue, (tt as Pullenti.Ner.TextToken).Term, res.Value); res.EndToken = tt; continue; } break; } return(res); } if (res.Typ == Typs.Brand || res.Typ == Typs.Name) { res.Value = tok.Termin.CanonicText; return(res); } if (res.Typ == Typs.Model) { res.Value = tok.Termin.CanonicText; if (tok.Termin.Tag2 is List <Pullenti.Ner.Core.Termin> ) { List <Pullenti.Ner.Core.Termin> li = tok.Termin.Tag2 as List <Pullenti.Ner.Core.Termin>; foreach (Pullenti.Ner.Core.Termin to in li) { WeaponItemToken wit = new WeaponItemToken(t, tok.EndToken) { Typ = (Typs)to.Tag, Value = to.CanonicText, IsInternal = tok.BeginToken == tok.EndToken }; res.InnerTokens.Add(wit); if (to.AdditionalVars != null && to.AdditionalVars.Count > 0) { wit.AltValue = to.AdditionalVars[0].CanonicText; } } } res._correctModel(); return(res); } } Pullenti.Ner.Token nnn = Pullenti.Ner.Core.MiscHelper.CheckNumberPrefix(t); if (nnn != null) { Pullenti.Ner.Transport.Internal.TransItemToken tit = Pullenti.Ner.Transport.Internal.TransItemToken._attachNumber(nnn, true); if (tit != null) { WeaponItemToken res = new WeaponItemToken(t, tit.EndToken) { Typ = Typs.Number }; res.Value = tit.Value; res.AltValue = tit.AltValue; return(res); } } if (((t is Pullenti.Ner.TextToken) && t.Chars.IsLetter && t.Chars.IsAllUpper) && (t.LengthChar < 4)) { if ((t.Next != null && ((t.Next.IsHiphen || t.Next.IsChar('.'))) && (t.Next.WhitespacesAfterCount < 2)) && (t.Next.Next is Pullenti.Ner.NumberToken)) { WeaponItemToken res = new WeaponItemToken(t, t.Next) { Typ = Typs.Model, IsDoubt = true }; res.Value = (t as Pullenti.Ner.TextToken).Term; res._correctModel(); return(res); } if ((t.Next is Pullenti.Ner.NumberToken) && !t.IsWhitespaceAfter) { WeaponItemToken res = new WeaponItemToken(t, t) { Typ = Typs.Model, IsDoubt = true }; res.Value = (t as Pullenti.Ner.TextToken).Term; res._correctModel(); return(res); } if ((t as Pullenti.Ner.TextToken).Term == "СП" && (t.WhitespacesAfterCount < 3) && (t.Next is Pullenti.Ner.TextToken)) { WeaponItemToken pp = _TryParse(t.Next, null, false, false); if (pp != null && ((pp.Typ == Typs.Model || pp.Typ == Typs.Brand))) { WeaponItemToken res = new WeaponItemToken(t, t) { Typ = Typs.Noun }; res.Value = "ПИСТОЛЕТ"; res.AltValue = "СЛУЖЕБНЫЙ ПИСТОЛЕТ"; return(res); } } } if (((t is Pullenti.Ner.TextToken) && t.Chars.IsLetter && !t.Chars.IsAllLower) && t.LengthChar > 2) { bool ok = false; if (prev != null && ((prev.Typ == Typs.Noun || prev.Typ == Typs.Model || prev.Typ == Typs.Brand))) { ok = true; } else if (prev == null && t.Previous != null && t.Previous.IsCommaAnd) { ok = true; } if (ok) { WeaponItemToken res = new WeaponItemToken(t, t) { Typ = Typs.Name, IsDoubt = true }; res.Value = (t as Pullenti.Ner.TextToken).Term; if ((t.Next != null && t.Next.IsHiphen && (t.Next.Next is Pullenti.Ner.TextToken)) && t.Next.Next.Chars == t.Chars) { res.Value = string.Format("{0}-{1}", res.Value, (t.Next.Next as Pullenti.Ner.TextToken).Term); res.EndToken = t.Next.Next; } if (prev != null && prev.Typ == Typs.Noun) { res.Typ = Typs.Brand; } if (res.EndToken.Next != null && res.EndToken.Next.IsHiphen && (res.EndToken.Next.Next is Pullenti.Ner.NumberToken)) { res.Typ = Typs.Model; res._correctModel(); } else if (!res.EndToken.IsWhitespaceAfter && (res.EndToken.Next is Pullenti.Ner.NumberToken)) { res.Typ = Typs.Model; res._correctModel(); } return(res); } } if (t.IsValue("МАРКА", null)) { WeaponItemToken res = _TryParse(t.Next, prev, afterConj, false); if (res != null && res.Typ == Typs.Brand) { res.BeginToken = t; return(res); } if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(t.Next, true, false)) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t.Next, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { return new WeaponItemToken(t, br.EndToken) { Typ = Typs.Brand, Value = Pullenti.Ner.Core.MiscHelper.GetTextValue(br.BeginToken, br.EndToken, Pullenti.Ner.Core.GetTextAttr.No) } } ; } if (((t is Pullenti.Ner.TextToken) && (t.Next is Pullenti.Ner.TextToken) && t.Next.LengthChar > 1) && !t.Next.Chars.IsAllLower) { return new WeaponItemToken(t, t.Next) { Typ = Typs.Brand, Value = (t as Pullenti.Ner.TextToken).Term } } ; } if (t.IsValue("КАЛИБР", "КАЛІБР")) { Pullenti.Ner.Token tt1 = t.Next; if (tt1 != null && ((tt1.IsHiphen || tt1.IsChar(':')))) { tt1 = tt1.Next; } Pullenti.Ner.Measure.Internal.NumbersWithUnitToken num = Pullenti.Ner.Measure.Internal.NumbersWithUnitToken.TryParse(tt1, null, false, false, false, false); if (num != null && num.SingleVal != null) { return new WeaponItemToken(t, num.EndToken) { Typ = Typs.Caliber, Value = Pullenti.Ner.Core.NumberHelper.DoubleToString(num.SingleVal.Value) } } ; } if (t is Pullenti.Ner.NumberToken) { Pullenti.Ner.Measure.Internal.NumbersWithUnitToken num = Pullenti.Ner.Measure.Internal.NumbersWithUnitToken.TryParse(t, null, false, false, false, false); if (num != null && num.SingleVal != null) { if (num.Units.Count == 1 && num.Units[0].Unit != null && num.Units[0].Unit.NameCyr == "мм") { return new WeaponItemToken(t, num.EndToken) { Typ = Typs.Caliber, Value = Pullenti.Ner.Core.NumberHelper.DoubleToString(num.SingleVal.Value) } } ; if (num.EndToken.Next != null && num.EndToken.Next.IsValue("КАЛИБР", "КАЛІБР")) { return new WeaponItemToken(t, num.EndToken.Next) { Typ = Typs.Caliber, Value = Pullenti.Ner.Core.NumberHelper.DoubleToString(num.SingleVal.Value) } } ; } } if (t.IsValue("ПРОИЗВОДСТВО", "ВИРОБНИЦТВО")) { Pullenti.Ner.Token tt1 = t.Next; if (tt1 != null && ((tt1.IsHiphen || tt1.IsChar(':')))) { tt1 = tt1.Next; } if (tt1 is Pullenti.Ner.ReferentToken) { if ((tt1.GetReferent() is Pullenti.Ner.Org.OrganizationReferent) || (tt1.GetReferent() is Pullenti.Ner.Geo.GeoReferent)) { return new WeaponItemToken(t, tt1) { Typ = Typs.Developer, Ref = tt1.GetReferent() } } ; } } return(null); } void _correctModel() { Pullenti.Ner.Token tt = EndToken.Next; if (tt == null || tt.WhitespacesBeforeCount > 2) { return; } if (tt.IsValue(":\\/.", null) || tt.IsHiphen) { tt = tt.Next; } if (tt is Pullenti.Ner.NumberToken) { StringBuilder tmp = new StringBuilder(); tmp.Append((tt as Pullenti.Ner.NumberToken).Value); bool isLat = Pullenti.Morph.LanguageHelper.IsLatinChar(Value[0]); EndToken = tt; for (tt = tt.Next; tt != null; tt = tt.Next) { if ((tt is Pullenti.Ner.TextToken) && tt.LengthChar == 1 && tt.Chars.IsLetter) { if (!tt.IsWhitespaceBefore || ((tt.Previous != null && tt.Previous.IsHiphen))) { char ch = (tt as Pullenti.Ner.TextToken).Term[0]; EndToken = tt; char ch2 = (char)0; if (Pullenti.Morph.LanguageHelper.IsLatinChar(ch) && !isLat) { ch2 = Pullenti.Morph.LanguageHelper.GetCyrForLat(ch); if (ch2 != ((char)0)) { ch = ch2; } } else if (Pullenti.Morph.LanguageHelper.IsCyrillicChar(ch) && isLat) { ch2 = Pullenti.Morph.LanguageHelper.GetLatForCyr(ch); if (ch2 != ((char)0)) { ch = ch2; } } tmp.Append(ch); continue; } } break; } Value = string.Format("{0}-{1}", Value, tmp.ToString()); AltValue = Pullenti.Ner.Core.MiscHelper.CreateCyrLatAlternative(Value); } if (!EndToken.IsWhitespaceAfter && EndToken.Next != null && ((EndToken.Next.IsHiphen || EndToken.Next.IsCharOf("\\/")))) { if (!EndToken.Next.IsWhitespaceAfter && (EndToken.Next.Next is Pullenti.Ner.NumberToken)) { EndToken = EndToken.Next.Next; Value = string.Format("{0}-{1}", Value, (EndToken as Pullenti.Ner.NumberToken).Value); if (AltValue != null) { AltValue = string.Format("{0}-{1}", AltValue, (EndToken as Pullenti.Ner.NumberToken).Value); } } } }
internal static Pullenti.Ner.ReferentToken CreateReferentToken(Pullenti.Ner.Person.PersonReferent p, Pullenti.Ner.Token begin, Pullenti.Ner.Token end, Pullenti.Ner.MorphCollection morph, List <PersonAttrToken> attrs, Pullenti.Ner.Person.PersonAnalyzer.PersonAnalyzerData ad, bool forAttribute, bool afterBePredicate) { if (p == null) { return(null); } bool hasPrefix = false; if (attrs != null) { foreach (PersonAttrToken a in attrs) { if (a.Typ == PersonAttrTerminType.BestRegards) { hasPrefix = true; } else { if (a.BeginChar < begin.BeginChar) { begin = a.BeginToken; if ((a.EndToken.Next != null && a.EndToken.Next.IsChar(')') && begin.Previous != null) && begin.Previous.IsChar('(')) { begin = begin.Previous; } } if (a.Typ != PersonAttrTerminType.Prefix) { if (a.Age != null) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_AGE, a.Age, false, 0); } if (a.PropRef == null) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, a.Value, false, 0); } else { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, a, false, 0); } } else if (a.Gender == Pullenti.Morph.MorphGender.Feminie && !p.IsFemale) { p.IsFemale = true; } else if (a.Gender == Pullenti.Morph.MorphGender.Masculine && !p.IsMale) { p.IsMale = true; } } } } else if ((begin.Previous is Pullenti.Ner.TextToken) && (begin.WhitespacesBeforeCount < 3)) { if ((begin.Previous as Pullenti.Ner.TextToken).Term == "ИП") { PersonAttrToken a = new PersonAttrToken(begin.Previous, begin.Previous); a.PropRef = new Pullenti.Ner.Person.PersonPropertyReferent(); a.PropRef.Name = "индивидуальный предприниматель"; p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, a, false, 0); begin = begin.Previous; } } Pullenti.Ner.MorphCollection m0 = new Pullenti.Ner.MorphCollection(); foreach (Pullenti.Morph.MorphBaseInfo it in morph.Items) { Pullenti.Morph.MorphBaseInfo bi = new Pullenti.Morph.MorphBaseInfo(); bi.CopyFrom(it); bi.Number = Pullenti.Morph.MorphNumber.Singular; if (bi.Gender == Pullenti.Morph.MorphGender.Undefined) { if (p.IsMale && !p.IsFemale) { bi.Gender = Pullenti.Morph.MorphGender.Masculine; } if (!p.IsMale && p.IsFemale) { bi.Gender = Pullenti.Morph.MorphGender.Feminie; } } m0.AddItem(bi); } morph = m0; if ((attrs != null && attrs.Count > 0 && !attrs[0].Morph.Case.IsUndefined) && morph.Case.IsUndefined) { morph.Case = attrs[0].Morph.Case; if (attrs[0].Morph.Number == Pullenti.Morph.MorphNumber.Singular) { morph.Number = Pullenti.Morph.MorphNumber.Singular; } if (p.IsMale && !p.IsFemale) { morph.Gender = Pullenti.Morph.MorphGender.Masculine; } else if (p.IsFemale) { morph.Gender = Pullenti.Morph.MorphGender.Feminie; } } if (begin.Previous != null) { Pullenti.Ner.Token ttt = begin.Previous; if (ttt.IsValue("ИМЕНИ", "ІМЕНІ")) { forAttribute = true; } else { if (ttt.IsChar('.') && ttt.Previous != null) { ttt = ttt.Previous; } if (ttt.WhitespacesAfterCount < 3) { if (ttt.IsValue("ИМ", "ІМ")) { forAttribute = true; } } } } if (forAttribute) { return new Pullenti.Ner.ReferentToken(p, begin, end) { Morph = morph, MiscAttrs = (int)p.m_PersonIdentityTyp } } ; if ((begin.Previous != null && begin.Previous.IsCommaAnd && (begin.Previous.Previous is Pullenti.Ner.ReferentToken)) && (begin.Previous.Previous.GetReferent() is Pullenti.Ner.Person.PersonReferent)) { Pullenti.Ner.ReferentToken rt00 = begin.Previous.Previous as Pullenti.Ner.ReferentToken; for (Pullenti.Ner.Token ttt = (Pullenti.Ner.Token)rt00; ttt != null;) { if (ttt.Previous == null || !(ttt.Previous.Previous is Pullenti.Ner.ReferentToken)) { break; } if (!ttt.Previous.IsCommaAnd || !(ttt.Previous.Previous.GetReferent() is Pullenti.Ner.Person.PersonReferent)) { break; } rt00 = ttt.Previous.Previous as Pullenti.Ner.ReferentToken; ttt = rt00; } if (rt00.BeginToken.GetReferent() is Pullenti.Ner.Person.PersonPropertyReferent) { bool ok = false; if ((rt00.BeginToken as Pullenti.Ner.ReferentToken).EndToken.Next != null && (rt00.BeginToken as Pullenti.Ner.ReferentToken).EndToken.Next.IsChar(':')) { ok = true; } else if (rt00.BeginToken.Morph.Number == Pullenti.Morph.MorphNumber.Plural) { ok = true; } if (ok) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, rt00.BeginToken.GetReferent(), false, 0); } } } if (ad != null) { if (ad.OverflowLevel > 10) { return new Pullenti.Ner.ReferentToken(p, begin, end) { Morph = morph, MiscAttrs = (int)p.m_PersonIdentityTyp } } ; ad.OverflowLevel++; } List <PersonAttrToken> attrs1 = null; bool hasPosition = false; bool openBr = false; for (Pullenti.Ner.Token t = end.Next; t != null; t = t.Next) { if (t.IsTableControlChar) { break; } if (t.IsNewlineBefore) { if (t.NewlinesBeforeCount > 2) { break; } if (attrs1 != null && attrs1.Count > 0) { break; } Pullenti.Ner.Mail.Internal.MailLine ml = Pullenti.Ner.Mail.Internal.MailLine.Parse(t, 0, 0); if (ml != null && ml.Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From) { break; } if (t.Chars.IsCapitalUpper) { PersonAttrToken attr1 = PersonAttrToken.TryAttach(t, (ad == null ? null : ad.LocalOntology), PersonAttrToken.PersonAttrAttachAttrs.No); bool ok1 = false; if (attr1 != null) { if (hasPrefix || attr1.IsNewlineAfter || ((attr1.EndToken.Next != null && attr1.EndToken.Next.IsTableControlChar))) { ok1 = true; } else { for (Pullenti.Ner.Token tt2 = t.Next; tt2 != null && tt2.EndChar <= attr1.EndChar; tt2 = tt2.Next) { if (tt2.IsWhitespaceBefore) { ok1 = true; } } } } else { Pullenti.Ner.Token ttt = CorrectTailAttributes(p, t); if (ttt != null && ttt != t) { end = (t = ttt); continue; } } if (!ok1) { break; } } } if (t.IsHiphen || t.IsCharOf("_>|")) { continue; } if (t.IsValue("МОДЕЛЬ", null)) { break; } Pullenti.Ner.Token tt = CorrectTailAttributes(p, t); if (tt != t && tt != null) { end = (t = tt); continue; } bool isBe = false; if (t.IsChar('(') && t == end.Next) { openBr = true; t = t.Next; if (t == null) { break; } PersonItemToken pit1 = PersonItemToken.TryAttach(t, null, PersonItemToken.ParseAttr.No, null); if ((pit1 != null && t.Chars.IsCapitalUpper && pit1.EndToken.Next != null) && (t is Pullenti.Ner.TextToken) && pit1.EndToken.Next.IsChar(')')) { if (pit1.Lastname != null) { Pullenti.Morph.MorphBaseInfo inf = new Pullenti.Morph.MorphBaseInfo() { Case = Pullenti.Morph.MorphCase.Nominative }; if (p.IsMale) { inf.Gender |= Pullenti.Morph.MorphGender.Masculine; } if (p.IsFemale) { inf.Gender |= Pullenti.Morph.MorphGender.Feminie; } PersonMorphCollection sur = PersonIdentityToken.CreateLastname(pit1, inf); if (sur != null) { p.AddFioIdentity(sur, null, null); end = (t = pit1.EndToken.Next); continue; } } } if ((t is Pullenti.Ner.TextToken) && t.Chars.IsLatinLetter) { List <PersonItemToken> pits = PersonItemToken.TryAttachList(t, null, PersonItemToken.ParseAttr.CanBeLatin, 10); if (((pits != null && pits.Count >= 2 && pits.Count <= 3) && pits[0].Chars.IsLatinLetter && pits[1].Chars.IsLatinLetter) && pits[pits.Count - 1].EndToken.Next != null && pits[pits.Count - 1].EndToken.Next.IsChar(')')) { Pullenti.Ner.Person.PersonReferent pr2 = new Pullenti.Ner.Person.PersonReferent(); int cou = 0; foreach (PersonItemToken pi in pits) { foreach (Pullenti.Ner.Slot si in p.Slots) { if (si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_FIRSTNAME || si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_MIDDLENAME || si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_LASTNAME) { if (Pullenti.Ner.Core.MiscHelper.CanBeEqualCyrAndLatSS(si.Value.ToString(), pi.Value)) { cou++; pr2.AddSlot(si.TypeName, pi.Value, false, 0); break; } } } } if (cou == pits.Count) { foreach (Pullenti.Ner.Slot si in pr2.Slots) { p.AddSlot(si.TypeName, si.Value, false, 0); } end = (t = pits[pits.Count - 1].EndToken.Next); continue; } } } } else if (t.IsComma) { t = t.Next; if ((t is Pullenti.Ner.TextToken) && (t as Pullenti.Ner.TextToken).IsValue("WHO", null)) { continue; } if ((t is Pullenti.Ner.TextToken) && t.Chars.IsLatinLetter) { List <PersonItemToken> pits = PersonItemToken.TryAttachList(t, null, PersonItemToken.ParseAttr.CanBeLatin, 10); if ((pits != null && pits.Count >= 2 && pits.Count <= 3) && pits[0].Chars.IsLatinLetter && pits[1].Chars.IsLatinLetter) { Pullenti.Ner.Person.PersonReferent pr2 = new Pullenti.Ner.Person.PersonReferent(); int cou = 0; foreach (PersonItemToken pi in pits) { foreach (Pullenti.Ner.Slot si in p.Slots) { if (si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_FIRSTNAME || si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_MIDDLENAME || si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_LASTNAME) { if (Pullenti.Ner.Core.MiscHelper.CanBeEqualCyrAndLatSS(si.Value.ToString(), pi.Value)) { cou++; pr2.AddSlot(si.TypeName, pi.Value, false, 0); break; } } } } if (cou == pits.Count) { foreach (Pullenti.Ner.Slot si in pr2.Slots) { p.AddSlot(si.TypeName, si.Value, false, 0); } end = (t = pits[pits.Count - 1].EndToken); continue; } } } } else if ((t is Pullenti.Ner.TextToken) && (t as Pullenti.Ner.TextToken).IsVerbBe) { t = t.Next; } else if (t.IsAnd && t.IsWhitespaceAfter && !t.IsNewlineAfter) { if (t == end.Next) { break; } t = t.Next; } else if (t.IsHiphen && t == end.Next) { t = t.Next; } else if (t.IsChar('.') && t == end.Next && hasPrefix) { t = t.Next; } Pullenti.Ner.Token ttt2 = CreateNickname(p, t); if (ttt2 != null) { t = (end = ttt2); continue; } if (t == null) { break; } PersonAttrToken attr = null; attr = PersonAttrToken.TryAttach(t, (ad == null ? null : ad.LocalOntology), PersonAttrToken.PersonAttrAttachAttrs.No); if (attr == null) { if ((t != null && t.GetReferent() != null && t.GetReferent().TypeName == "GEO") && attrs1 != null && openBr) { continue; } if ((t.Chars.IsCapitalUpper && openBr && t.Next != null) && t.Next.IsChar(')')) { if (p.FindSlot(Pullenti.Ner.Person.PersonReferent.ATTR_LASTNAME, null, true) == null) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_LASTNAME, t.GetSourceText().ToUpper(), false, 0); t = t.Next; end = t; } } if (t != null && t.IsValue("КОТОРЫЙ", null) && t.Morph.Number == Pullenti.Morph.MorphNumber.Singular) { if (!p.IsFemale && t.Morph.Gender == Pullenti.Morph.MorphGender.Feminie) { p.IsFemale = true; p.CorrectData(); } else if (!p.IsMale && t.Morph.Gender == Pullenti.Morph.MorphGender.Masculine) { p.IsMale = true; p.CorrectData(); } } break; } if (attr.Morph.Number == Pullenti.Morph.MorphNumber.Plural) { break; } if (attr.Typ == PersonAttrTerminType.BestRegards) { break; } if (attr.IsDoubt) { if (hasPrefix) { } else if (t.IsNewlineBefore && attr.IsNewlineAfter) { } else if (t.Previous != null && ((t.Previous.IsHiphen || t.Previous.IsChar(':')))) { } else { break; } } if (!morph.Case.IsUndefined && !attr.Morph.Case.IsUndefined) { if (((morph.Case & attr.Morph.Case)).IsUndefined && !isBe) { break; } } if (openBr) { if (Pullenti.Ner.Person.PersonAnalyzer.TryAttachPerson(t, ad, false, 0, true) != null) { break; } } if (attrs1 == null) { if (t.Previous.IsComma && t.Previous == end.Next) { Pullenti.Ner.Token ttt = attr.EndToken.Next; if (ttt != null) { if (ttt.Morph.Class.IsVerb) { if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(begin)) { } else { break; } } } } attrs1 = new List <PersonAttrToken>(); } attrs1.Add(attr); if (attr.Typ == PersonAttrTerminType.Position || attr.Typ == PersonAttrTerminType.King) { if (!isBe) { hasPosition = true; } } else if (attr.Typ != PersonAttrTerminType.Prefix) { if (attr.Typ == PersonAttrTerminType.Other && attr.Age != null) { } else { attrs1 = null; break; } } t = attr.EndToken; } if (attrs1 != null && hasPosition && attrs != null) { Pullenti.Ner.Token te1 = attrs[attrs.Count - 1].EndToken.Next; Pullenti.Ner.Token te2 = attrs1[0].BeginToken; if (te1.WhitespacesAfterCount > te2.WhitespacesBeforeCount && (te2.WhitespacesBeforeCount < 2)) { } else if (attrs1[0].Age != null) { } else if (((te1.IsHiphen || te1.IsChar(':'))) && !attrs1[0].IsNewlineBefore && ((te2.Previous.IsComma || te2.Previous == end))) { } else { foreach (PersonAttrToken a in attrs) { if (a.Typ == PersonAttrTerminType.Position) { Pullenti.Ner.Token te = attrs1[attrs1.Count - 1].EndToken; if (te.Next != null) { if (!te.Next.IsChar('.')) { attrs1 = null; break; } } } } } } if (attrs1 != null && !hasPrefix) { PersonAttrToken attr = attrs1[attrs1.Count - 1]; bool ok = false; if (attr.EndToken.Next != null && attr.EndToken.Next.Chars.IsCapitalUpper) { ok = true; } else { Pullenti.Ner.ReferentToken rt = Pullenti.Ner.Person.PersonAnalyzer.TryAttachPerson(attr.BeginToken, ad, false, -1, false); if (rt != null && (rt.Referent is Pullenti.Ner.Person.PersonReferent)) { ok = true; } } if (ok) { if (attr.BeginToken.WhitespacesBeforeCount > attr.EndToken.WhitespacesAfterCount) { attrs1 = null; } else if (attr.BeginToken.WhitespacesBeforeCount == attr.EndToken.WhitespacesAfterCount) { Pullenti.Ner.ReferentToken rt1 = Pullenti.Ner.Person.PersonAnalyzer.TryAttachPerson(attr.BeginToken, ad, false, -1, false); if (rt1 != null) { attrs1 = null; } } } } if (attrs1 != null) { foreach (PersonAttrToken a in attrs1) { if (a.Typ != PersonAttrTerminType.Prefix) { if (a.Age != null) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_AGE, a.Age, true, 0); } else if (a.PropRef == null) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, a.Value, false, 0); } else { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, a, false, 0); } end = a.EndToken; if (a.Gender != Pullenti.Morph.MorphGender.Undefined && !p.IsFemale && !p.IsMale) { if (a.Gender == Pullenti.Morph.MorphGender.Masculine && !p.IsMale) { p.IsMale = true; p.CorrectData(); } else if (a.Gender == Pullenti.Morph.MorphGender.Feminie && !p.IsFemale) { p.IsFemale = true; p.CorrectData(); } } } } if (openBr) { if (end.Next != null && end.Next.IsChar(')')) { end = end.Next; } } } int crlfCou = 0; for (Pullenti.Ner.Token t = end.Next; t != null; t = t.Next) { if (t.IsTableControlChar) { break; } if (t.IsNewlineBefore) { Pullenti.Ner.Mail.Internal.MailLine ml = Pullenti.Ner.Mail.Internal.MailLine.Parse(t, 0, 0); if (ml != null && ml.Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From) { break; } crlfCou++; } if (t.IsCharOf(":,(") || t.IsHiphen) { continue; } if (t.IsChar('.') && t == end.Next) { continue; } Pullenti.Ner.Referent r = t.GetReferent(); if (r != null) { if (r.TypeName == "PHONE" || r.TypeName == "URI" || r.TypeName == "ADDRESS") { string ty = r.GetStringValue("SCHEME"); if (r.TypeName == "URI") { if ((ty != "mailto" && ty != "skype" && ty != "ICQ") && ty != "http") { break; } } p.AddContact(r); end = t; crlfCou = 0; continue; } } if (r is Pullenti.Ner.Person.PersonIdentityReferent) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_IDDOC, r, false, 0); end = t; crlfCou = 0; continue; } if (r != null && r.TypeName == "ORGANIZATION") { if (t.Next != null && t.Next.Morph.Class.IsVerb) { break; } if (begin.Previous != null && begin.Previous.Morph.Class.IsVerb) { break; } if (t.WhitespacesAfterCount == 1) { break; } bool exist = false; foreach (Pullenti.Ner.Slot s in p.Slots) { if (s.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_ATTR && (s.Value is Pullenti.Ner.Person.PersonPropertyReferent)) { Pullenti.Ner.Person.PersonPropertyReferent pr = s.Value as Pullenti.Ner.Person.PersonPropertyReferent; if (pr.FindSlot(Pullenti.Ner.Person.PersonPropertyReferent.ATTR_REF, r, true) != null) { exist = true; break; } } else if (s.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_ATTR && (s.Value is PersonAttrToken)) { PersonAttrToken pr = s.Value as PersonAttrToken; if (pr.Referent.FindSlot(Pullenti.Ner.Person.PersonPropertyReferent.ATTR_REF, r, true) != null) { exist = true; break; } } } if (!exist) { PersonAttrToken pat = new PersonAttrToken(t, t); pat.PropRef = new Pullenti.Ner.Person.PersonPropertyReferent() { Name = "сотрудник" }; pat.PropRef.AddSlot(Pullenti.Ner.Person.PersonPropertyReferent.ATTR_REF, r, false, 0); p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, pat, false, 0); } continue; } if (r != null) { break; } if (!hasPrefix || crlfCou >= 2) { break; } Pullenti.Ner.ReferentToken rt = t.Kit.ProcessReferent("PERSON", t); if (rt != null) { break; } } if (ad != null) { ad.OverflowLevel--; } if (begin.IsValue("НА", null) && begin.Next != null && begin.Next.IsValue("ИМЯ", null)) { Pullenti.Ner.Token t0 = begin.Previous; if (t0 != null && t0.IsComma) { t0 = t0.Previous; } if (t0 != null && (t0.GetReferent() is Pullenti.Ner.Person.PersonIdentityReferent)) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_IDDOC, t0.GetReferent(), false, 0); } } return(new Pullenti.Ner.ReferentToken(p, begin, end) { Morph = morph, MiscAttrs = (int)p.m_PersonIdentityTyp }); }
public static MailLine Parse(Pullenti.Ner.Token t0, int lev, int maxCount = 0) { if (t0 == null) { return(null); } MailLine res = new MailLine(t0, t0); bool pr = true; int cou = 0; for (Pullenti.Ner.Token t = t0; t != null; t = t.Next, cou++) { if (t.IsNewlineBefore && t0 != t) { break; } if (maxCount > 0 && cou > maxCount) { break; } res.EndToken = t; if (t.IsTableControlChar || t.IsHiphen) { continue; } if (pr) { if ((t is Pullenti.Ner.TextToken) && t.IsCharOf(">|")) { res.Lev++; } else { pr = false; Pullenti.Ner.Core.TerminToken tok = m_FromWords.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null && tok.EndToken.Next != null && tok.EndToken.Next.IsChar(':')) { res.Typ = Types.From; t = tok.EndToken.Next; continue; } } } if (t is Pullenti.Ner.ReferentToken) { Pullenti.Ner.Referent r = t.GetReferent(); if (r != null) { if ((((r is Pullenti.Ner.Person.PersonReferent) || (r is Pullenti.Ner.Geo.GeoReferent) || (r is Pullenti.Ner.Address.AddressReferent)) || r.TypeName == "PHONE" || r.TypeName == "URI") || (r is Pullenti.Ner.Person.PersonPropertyReferent) || r.TypeName == "ORGANIZATION") { res.Refs.Add(r); } } } } if (res.Typ == Types.Undefined) { Pullenti.Ner.Token t = t0; for (; t != null && (t.EndChar < res.EndChar); t = t.Next) { if (!t.IsHiphen && t.Chars.IsLetter) { break; } } int ok = 0; int nams = 0; int oth = 0; Pullenti.Ner.Token lastComma = null; for (; t != null && (t.EndChar < res.EndChar); t = t.Next) { if (t.GetReferent() is Pullenti.Ner.Person.PersonReferent) { nams++; continue; } if (t is Pullenti.Ner.TextToken) { if (!t.Chars.IsLetter) { lastComma = t; continue; } Pullenti.Ner.Core.TerminToken tok = m_HelloWords.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { ok++; t = tok.EndToken; continue; } if (t.IsValue("ВСЕ", null) || t.IsValue("ALL", null) || t.IsValue("TEAM", null)) { nams++; continue; } Pullenti.Ner.Person.Internal.PersonItemToken pit = Pullenti.Ner.Person.Internal.PersonItemToken.TryAttach(t, null, Pullenti.Ner.Person.Internal.PersonItemToken.ParseAttr.No, null); if (pit != null) { nams++; t = pit.EndToken; continue; } } if ((++oth) > 3) { if (ok > 0 && lastComma != null) { res.EndToken = lastComma; oth = 0; } break; } } if ((oth < 3) && ok > 0) { res.Typ = Types.Hello; } } if (res.Typ == Types.Undefined) { int okWords = 0; if (t0.IsValue("HAVE", null)) { } for (Pullenti.Ner.Token t = t0; t != null && t.EndChar <= res.EndChar; t = t.Next) { if (!(t is Pullenti.Ner.TextToken)) { continue; } if (t.IsChar('<')) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { t = br.EndToken; continue; } } if (!t.IsLetters || t.IsTableControlChar) { continue; } Pullenti.Ner.Core.TerminToken tok = m_RegardWords.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { okWords++; for (; t != null && t.EndChar <= tok.EndChar; t = t.Next) { t.Tag = tok.Termin; } t = tok.EndToken; if ((t.Next is Pullenti.Ner.TextToken) && t.Next.Morph.Case.IsGenitive) { for (t = t.Next; t.EndChar <= res.EndChar; t = t.Next) { if (t.Morph.Class.IsConjunction) { continue; } Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt1 == null) { break; } if (!npt1.Morph.Case.IsGenitive) { break; } for (; t.EndChar < npt1.EndChar; t = t.Next) { t.Tag = t; } t.Tag = t; } } continue; } if ((t.Morph.Class.IsPreposition || t.Morph.Class.IsConjunction || t.Morph.Class.IsMisc) || t.IsValue("C", null)) { continue; } if ((okWords > 0 && t.Previous != null && t.Previous.IsComma) && t.Previous.BeginChar > t0.BeginChar && !t.Chars.IsAllLower) { res.EndToken = t.Previous; break; } Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt == null) { if ((res.EndChar - t.EndChar) > 10) { okWords = 0; } break; } tok = m_RegardWords.TryParse(npt.EndToken, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null && (npt.EndToken is Pullenti.Ner.TextToken)) { string term = (npt.EndToken as Pullenti.Ner.TextToken).Term; if (term == "ДЕЛ") { tok = null; } } if (tok == null) { if (npt.Noun.IsValue("НАДЕЖДА", null)) { t.Tag = t; } else if (okWords > 0 && t.IsValue("NICE", null) && ((res.EndChar - npt.EndChar) < 13)) { t.Tag = t; } else { okWords = 0; } break; } okWords++; for (; t != null && t.EndChar <= tok.EndChar; t = t.Next) { t.Tag = tok.Termin; } t = tok.EndToken; } if (okWords > 0) { res.Typ = Types.BestRegards; } } if (res.Typ == Types.Undefined) { Pullenti.Ner.Token t = t0; for (; t != null && (t.EndChar < res.EndChar); t = t.Next) { if (!(t is Pullenti.Ner.TextToken)) { break; } else if (!t.IsHiphen && t.Chars.IsLetter) { break; } } if (t != null) { if (t != t0) { } if (((t.IsValue("ПЕРЕСЫЛАЕМОЕ", null) || t.IsValue("ПЕРЕАДРЕСОВАННОЕ", null))) && t.Next != null && t.Next.IsValue("СООБЩЕНИЕ", null)) { res.Typ = Types.From; res.MustBeFirstLine = true; } else if ((t.IsValue("НАЧАЛО", null) && t.Next != null && ((t.Next.IsValue("ПЕРЕСЫЛАЕМОЕ", null) || t.Next.IsValue("ПЕРЕАДРЕСОВАННОЕ", null)))) && t.Next.Next != null && t.Next.Next.IsValue("СООБЩЕНИЕ", null)) { res.Typ = Types.From; res.MustBeFirstLine = true; } else if (t.IsValue("ORIGINAL", null) && t.Next != null && ((t.Next.IsValue("MESSAGE", null) || t.Next.IsValue("APPOINTMENT", null)))) { res.Typ = Types.From; res.MustBeFirstLine = true; } else if (t.IsValue("ПЕРЕСЛАНО", null) && t.Next != null && t.Next.IsValue("ПОЛЬЗОВАТЕЛЕМ", null)) { res.Typ = Types.From; res.MustBeFirstLine = true; } else if (((t.GetReferent() != null && t.GetReferent().TypeName == "DATE")) || ((t.IsValue("IL", null) && t.Next != null && t.Next.IsValue("GIORNO", null))) || ((t.IsValue("ON", null) && (t.Next is Pullenti.Ner.ReferentToken) && t.Next.GetReferent().TypeName == "DATE"))) { bool hasFrom = false; bool hasDate = t.GetReferent() != null && t.GetReferent().TypeName == "DATE"; if (t.IsNewlineAfter && (lev < 5)) { MailLine res1 = Parse(t.Next, lev + 1, 0); if (res1 != null && res1.Typ == Types.Hello) { res.Typ = Types.From; } } MailLine next = Parse(res.EndToken.Next, lev + 1, 0); if (next != null) { if (next.Typ != Types.Undefined) { next = null; } } int tmax = res.EndChar; if (next != null) { tmax = next.EndChar; } Pullenti.Ner.Core.BracketSequenceToken br1 = null; for (; t != null && t.EndChar <= tmax; t = t.Next) { if (t.IsValue("ОТ", null) || t.IsValue("FROM", null)) { hasFrom = true; } else if (t.GetReferent() != null && ((t.GetReferent().TypeName == "URI" || (t.GetReferent() is Pullenti.Ner.Person.PersonReferent)))) { if (t.GetReferent().TypeName == "URI" && hasDate) { if (br1 != null) { hasFrom = true; next = null; } if (t.Previous.IsChar('<') && t.Next != null && t.Next.IsChar('>')) { t = t.Next; if (t.Next != null && t.Next.IsChar(':')) { t = t.Next; } if (t.IsNewlineAfter) { hasFrom = true; next = null; } } } for (t = t.Next; t != null && t.EndChar <= res.EndChar; t = t.Next) { if (t.IsValue("HA", null) && t.Next != null && t.Next.IsValue("SCRITTO", null)) { hasFrom = true; break; } else if (((t.IsValue("НАПИСАТЬ", null) || t.IsValue("WROTE", null))) && ((res.EndChar - t.EndChar) < 10)) { hasFrom = true; break; } } if (hasFrom) { res.Typ = Types.From; if (next != null && t.EndChar >= next.BeginChar) { res.EndToken = next.EndToken; } } break; } else if (br1 == null && !t.IsChar('<') && Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(t, true, false)) { br1 = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br1 != null) { t = br1.EndToken; } } } } else { bool hasUri = false; for (; t != null && (t.EndChar < res.EndChar); t = t.Next) { if (t.GetReferent() != null && ((t.GetReferent().TypeName == "URI" || (t.GetReferent() is Pullenti.Ner.Person.PersonReferent)))) { hasUri = true; } else if (t.IsValue("ПИСАТЬ", null) && hasUri) { if (t.Next != null && t.Next.IsChar('(')) { if (hasUri) { res.Typ = Types.From; } break; } } } } } } return(res); }
static Pullenti.Ner.Token CorrectTailAttributes(Pullenti.Ner.Person.PersonReferent p, Pullenti.Ner.Token t0) { Pullenti.Ner.Token res = t0; Pullenti.Ner.Token t = t0; if (t != null && t.IsChar(',')) { t = t.Next; } bool born = false; bool die = false; if (t != null && ((t.IsValue("РОДИТЬСЯ", "НАРОДИТИСЯ") || t.IsValue("BORN", null)))) { t = t.Next; born = true; } else if (t != null && ((t.IsValue("УМЕРЕТЬ", "ПОМЕРТИ") || t.IsValue("СКОНЧАТЬСЯ", null) || t.IsValue("DIED", null)))) { t = t.Next; die = true; } else if ((t != null && t.IsValue("ДАТА", null) && t.Next != null) && t.Next.IsValue("РОЖДЕНИЕ", "НАРОДЖЕННЯ")) { t = t.Next.Next; born = true; } while (t != null) { if (t.Morph.Class.IsPreposition || t.IsHiphen || t.IsChar(':')) { t = t.Next; } else { break; } } if (t != null && t.GetReferent() != null) { Pullenti.Ner.Referent r = t.GetReferent(); if (r.TypeName == "DATE") { Pullenti.Ner.Token t1 = t; if (t.Next != null && ((t.Next.IsValue("Р", null) || t.Next.IsValue("РОЖДЕНИЕ", "НАРОДЖЕННЯ")))) { born = true; t1 = t.Next; if (t1.Next != null && t1.Next.IsChar('.')) { t1 = t1.Next; } } if (born) { if (p != null) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_BORN, r, false, 0); } res = t1; t = t1; } else if (die) { if (p != null) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_DIE, r, false, 0); } res = t1; t = t1; } } } if (die && t != null) { Pullenti.Ner.NumberToken ag = Pullenti.Ner.Core.NumberHelper.TryParseAge(t.Next); if (ag != null) { if (p != null) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_AGE, ag.Value.ToString(), false, 0); } t = ag.EndToken.Next; res = ag.EndToken; } } if (t == null) { return(res); } if (t.IsChar('(')) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { Pullenti.Ner.Token t1 = t.Next; born = false; if (t1.IsValue("РОД", null)) { born = true; t1 = t1.Next; if (t1 != null && t1.IsChar('.')) { t1 = t1.Next; } } if (t1 is Pullenti.Ner.ReferentToken) { Pullenti.Ner.Referent r = t1.GetReferent(); if (r.TypeName == "DATERANGE" && t1.Next == br.EndToken) { Pullenti.Ner.Referent bd = r.GetSlotValue("FROM") as Pullenti.Ner.Referent; Pullenti.Ner.Referent to = r.GetSlotValue("TO") as Pullenti.Ner.Referent; if (bd != null && to != null) { if (p != null) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_BORN, bd, false, 0); p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_DIE, to, false, 0); } t = (res = br.EndToken); } } else if (r.TypeName == "DATE" && t1.Next == br.EndToken) { if (p != null) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_BORN, r, false, 0); } t = (res = br.EndToken); } } } } return(res); }
public static CanonicDecreeRefUri TryCreateCanonicDecreeRefUri(Pullenti.Ner.Token t) { if (!(t is Pullenti.Ner.ReferentToken)) { return(null); } Pullenti.Ner.Decree.DecreeReferent dr = t.GetReferent() as Pullenti.Ner.Decree.DecreeReferent; CanonicDecreeRefUri res; if (dr != null) { if (dr.Kind == Pullenti.Ner.Decree.DecreeKind.Publisher) { return(null); } res = new CanonicDecreeRefUri(t.Kit.Sofa.Text) { Ref = dr, BeginChar = t.BeginChar, EndChar = t.EndChar }; if ((t.Previous != null && t.Previous.IsChar('(') && t.Next != null) && t.Next.IsChar(')')) { return(res); } if ((t as Pullenti.Ner.ReferentToken).MiscAttrs != 0) { return(res); } Pullenti.Ner.ReferentToken rt = t as Pullenti.Ner.ReferentToken; if (rt.BeginToken.IsChar('(') && rt.EndToken.IsChar(')')) { res = new CanonicDecreeRefUri(t.Kit.Sofa.Text) { Ref = dr, BeginChar = rt.BeginToken.Next.BeginChar, EndChar = rt.EndToken.Previous.EndChar }; return(res); } List <DecreeToken> nextDecreeItems = null; if ((t.Next != null && t.Next.IsCommaAnd && (t.Next.Next is Pullenti.Ner.ReferentToken)) && (t.Next.Next.GetReferent() is Pullenti.Ner.Decree.DecreeReferent)) { nextDecreeItems = DecreeToken.TryAttachList((t.Next.Next as Pullenti.Ner.ReferentToken).BeginToken, null, 10, false); if (nextDecreeItems != null && nextDecreeItems.Count > 1) { for (int i = 0; i < (nextDecreeItems.Count - 1); i++) { if (nextDecreeItems[i].IsNewlineAfter) { nextDecreeItems.RemoveRange(i + 1, nextDecreeItems.Count - i - 1); break; } } } } bool wasTyp = false; bool wasNum = false; for (Pullenti.Ner.Token tt = (t as Pullenti.Ner.MetaToken).BeginToken; tt != null && tt.EndChar <= t.EndChar; tt = tt.Next) { if (tt.BeginChar == t.BeginChar && tt.IsChar('(') && tt.Next != null) { res.BeginChar = tt.Next.BeginChar; } if (tt.IsChar('(') && tt.Next != null && tt.Next.IsValue("ДАЛЕЕ", null)) { if (res.EndChar >= tt.BeginChar) { res.EndChar = tt.Previous.EndChar; } break; } if (tt.EndChar == t.EndChar && tt.IsChar(')')) { res.EndChar = tt.Previous.EndChar; for (Pullenti.Ner.Token tt1 = tt.Previous; tt1 != null && tt1.BeginChar >= res.BeginChar; tt1 = tt1.Previous) { if (tt1.IsChar('(') && tt1.Previous != null) { if (res.BeginChar < tt1.Previous.BeginChar) { res.EndChar = tt1.Previous.EndChar; } } } } List <DecreeToken> li = DecreeToken.TryAttachList(tt, null, 10, false); if (li != null && li.Count > 0) { for (int ii = 0; ii < (li.Count - 1); ii++) { if (li[ii].Typ == DecreeToken.ItemType.Typ && li[ii + 1].Typ == DecreeToken.ItemType.Terr) { res.TypeWithGeo = Pullenti.Ner.Core.MiscHelper.GetTextValue(li[ii].BeginToken, li[ii + 1].EndToken, Pullenti.Ner.Core.GetTextAttr.FirstNounGroupToNominativeSingle); } } if ((nextDecreeItems != null && nextDecreeItems.Count > 1 && (nextDecreeItems.Count < li.Count)) && nextDecreeItems[0].Typ != DecreeToken.ItemType.Typ) { int d = li.Count - nextDecreeItems.Count; int j; for (j = 0; j < nextDecreeItems.Count; j++) { if (nextDecreeItems[j].Typ != li[d + j].Typ) { break; } } if (j >= nextDecreeItems.Count) { li.RemoveRange(0, d); res.BeginChar = li[0].BeginChar; } } else if ((nextDecreeItems != null && nextDecreeItems.Count == 1 && nextDecreeItems[0].Typ == DecreeToken.ItemType.Name) && li.Count == 2 && li[1].Typ == DecreeToken.ItemType.Name) { res.BeginChar = li[1].BeginChar; res.EndChar = li[1].EndChar; break; } else if ((nextDecreeItems != null && nextDecreeItems.Count == 1 && nextDecreeItems[0].Typ == DecreeToken.ItemType.Number) && li[li.Count - 1].Typ == DecreeToken.ItemType.Number) { res.BeginChar = li[li.Count - 1].BeginChar; res.EndChar = li[li.Count - 1].EndChar; } for (int i = 0; i < li.Count; i++) { DecreeToken l = li[i]; if (l.BeginChar > t.EndChar) { li.RemoveRange(i, li.Count - i); break; } if (l.Typ == DecreeToken.ItemType.Name) { if (!wasNum) { if (dr.Kind == Pullenti.Ner.Decree.DecreeKind.Contract) { continue; } if (((i + 1) < li.Count) && ((li[i + 1].Typ == DecreeToken.ItemType.Date || li[i + 1].Typ == DecreeToken.ItemType.Number))) { continue; } } int ee = l.BeginToken.Previous.EndChar; if (ee > res.BeginChar && (ee < res.EndChar)) { res.EndChar = ee; } break; } if (l.Typ == DecreeToken.ItemType.Number) { wasNum = true; } if (i == 0) { if (l.Typ == DecreeToken.ItemType.Typ) { wasTyp = true; } else if (l.Typ == DecreeToken.ItemType.Owner || l.Typ == DecreeToken.ItemType.Org) { if (((i + 1) < li.Count) && ((li[1].Typ == DecreeToken.ItemType.Date || li[1].Typ == DecreeToken.ItemType.Number))) { wasTyp = true; } } if (wasTyp) { Pullenti.Ner.Token tt0 = l.BeginToken.Previous; if (tt0 != null && tt0.IsChar('.')) { tt0 = tt0.Previous; } if (tt0 != null && ((tt0.IsValue("УТВЕРЖДЕННЫЙ", null) || tt0.IsValue("УТВЕРДИТЬ", null) || tt0.IsValue("УТВ", null)))) { if (l.BeginChar > res.BeginChar) { res.BeginChar = l.BeginChar; if (res.EndChar < res.BeginChar) { res.EndChar = t.EndChar; } res.IsAdopted = true; } } } } } if (li.Count > 0) { tt = li[li.Count - 1].EndToken; if (tt.IsChar(')')) { tt = tt.Previous; } continue; } } if (wasTyp) { DecreeToken na = DecreeToken.TryAttachName(tt, dr.Typ0, true, false); if (na != null && tt.BeginChar > t.BeginChar) { Pullenti.Ner.Token tt1 = na.EndToken.Next; if (tt1 != null && tt1.IsCharOf(",()")) { tt1 = tt1.Next; } if (tt1 != null && (tt1.EndChar < t.EndChar)) { if (tt1.IsValue("УТВЕРЖДЕННЫЙ", null) || tt1.IsValue("УТВЕРДИТЬ", null) || tt1.IsValue("УТВ", null)) { tt = tt1; continue; } } if (tt.Previous != null && tt.Previous.IsChar(':') && na.EndChar <= res.EndChar) { res.BeginChar = tt.BeginChar; break; } if (tt.Previous.EndChar > res.BeginChar) { res.EndChar = tt.Previous.EndChar; break; } } } } return(res); } Pullenti.Ner.Decree.DecreePartReferent dpr = t.GetReferent() as Pullenti.Ner.Decree.DecreePartReferent; if (dpr == null) { return(null); } if ((t.Previous != null && t.Previous.IsHiphen && (t.Previous.Previous is Pullenti.Ner.ReferentToken)) && (t.Previous.Previous.GetReferent() is Pullenti.Ner.Decree.DecreePartReferent)) { if (Pullenti.Ner.Decree.DecreePartReferent.CreateRangeReferent(t.Previous.Previous.GetReferent() as Pullenti.Ner.Decree.DecreePartReferent, dpr) != null) { return(null); } } Pullenti.Ner.Token t1 = t; bool hasDiap = false; Pullenti.Ner.ReferentToken DiapRef = null; if ((t.Next != null && t.Next.IsHiphen && (t.Next.Next is Pullenti.Ner.ReferentToken)) && (t.Next.Next.GetReferent() is Pullenti.Ner.Decree.DecreePartReferent)) { Pullenti.Ner.Decree.DecreePartReferent diap = Pullenti.Ner.Decree.DecreePartReferent.CreateRangeReferent(dpr as Pullenti.Ner.Decree.DecreePartReferent, t.Next.Next.GetReferent() as Pullenti.Ner.Decree.DecreePartReferent); if (diap != null) { dpr = diap; hasDiap = true; t1 = t.Next.Next; DiapRef = t1 as Pullenti.Ner.ReferentToken; } } res = new CanonicDecreeRefUri(t.Kit.Sofa.Text) { Ref = dpr, BeginChar = t.BeginChar, EndChar = t1.EndChar, IsDiap = hasDiap }; if ((t.Previous != null && t.Previous.IsChar('(') && t1.Next != null) && t1.Next.IsChar(')')) { return(res); } for (Pullenti.Ner.Token tt = (t as Pullenti.Ner.MetaToken).BeginToken; tt != null && tt.EndChar <= t.EndChar; tt = tt.Next) { if (tt.GetReferent() is Pullenti.Ner.Decree.DecreeReferent) { if (tt.BeginChar > t.BeginChar) { res.EndChar = tt.Previous.EndChar; if (tt.Previous.Morph.Class.IsPreposition && tt.Previous.Previous != null) { res.EndChar = tt.Previous.Previous.EndChar; } } else if (tt.EndChar < t.EndChar) { res.BeginChar = tt.BeginChar; } break; } } bool hasSameBefore = _hasSameDecree(t, dpr, true); bool hasSameAfter = _hasSameDecree(t, dpr, false); PartToken.ItemType ptmin = PartToken.ItemType.Prefix; PartToken.ItemType ptmin2 = PartToken.ItemType.Prefix; int max = 0; int max2 = 00; foreach (Pullenti.Ner.Slot s in dpr.Slots) { PartToken.ItemType pt = PartToken._getTypeByAttrName(s.TypeName); if (pt == PartToken.ItemType.Prefix) { continue; } int co = PartToken._getRank(pt); if (co < 1) { if (pt == PartToken.ItemType.Part && dpr.FindSlot(Pullenti.Ner.Decree.DecreePartReferent.ATTR_CLAUSE, null, true) != null) { co = PartToken._getRank(PartToken.ItemType.Paragraph); } else { continue; } } if (co > max) { max2 = max; ptmin2 = ptmin; max = co; ptmin = pt; } else if (co > max2) { max2 = co; ptmin2 = pt; } } if (ptmin != PartToken.ItemType.Prefix) { for (Pullenti.Ner.Token tt = (t as Pullenti.Ner.MetaToken).BeginToken; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next) { if (tt.BeginChar >= res.BeginChar) { PartToken pt = PartToken.TryAttach(tt, null, false, false); if (pt != null && pt.Typ == ptmin) { res.BeginChar = pt.BeginChar; res.EndChar = pt.EndChar; if (pt.Typ == PartToken.ItemType.Appendix && pt.EndToken.IsValue("К", null) && pt.BeginToken != pt.EndToken) { res.EndChar = pt.EndToken.Previous.EndChar; } if (pt.EndChar == t.EndChar) { if ((t.Next != null && t.Next.IsCommaAnd && (t.Next.Next is Pullenti.Ner.ReferentToken)) && (t.Next.Next.GetReferent() is Pullenti.Ner.Decree.DecreePartReferent)) { Pullenti.Ner.Token tt1 = (t.Next.Next as Pullenti.Ner.ReferentToken).BeginToken; bool ok = true; if (tt1.Chars.IsLetter) { ok = false; } if (ok) { foreach (PartToken.PartValue v in pt.Values) { res.BeginChar = v.BeginChar; res.EndChar = v.EndChar; break; } } } } if (!hasDiap) { return(res); } break; } } } if (hasDiap && DiapRef != null) { for (Pullenti.Ner.Token tt = DiapRef.BeginToken; tt != null && tt.EndChar <= DiapRef.EndChar; tt = tt.Next) { if (tt.IsChar(',')) { break; } if (tt != DiapRef.BeginToken && tt.IsWhitespaceBefore) { break; } res.EndChar = tt.EndChar; } return(res); } } if (((hasSameBefore || hasSameAfter)) && ptmin != PartToken.ItemType.Prefix) { for (Pullenti.Ner.Token tt = (t as Pullenti.Ner.MetaToken).BeginToken; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next) { if (tt.BeginChar >= res.BeginChar) { PartToken pt = (!hasSameBefore ? PartToken.TryAttach(tt, null, false, false) : null); if (pt != null) { if (pt.Typ == ptmin) { foreach (PartToken.PartValue v in pt.Values) { res.BeginChar = v.BeginChar; res.EndChar = v.EndChar; return(res); } } tt = pt.EndToken; continue; } if ((tt is Pullenti.Ner.NumberToken) && tt.BeginChar == res.BeginChar) { res.EndChar = tt.EndChar; for (; tt != null && tt.Next != null;) { if (!tt.Next.IsChar('.') || tt.IsWhitespaceAfter || tt.Next.IsWhitespaceAfter) { break; } if (!(tt.Next.Next is Pullenti.Ner.NumberToken)) { break; } tt = tt.Next.Next; res.EndChar = tt.EndChar; } if (tt.Next != null && tt.Next.IsHiphen) { if (tt.Next.Next is Pullenti.Ner.NumberToken) { tt = tt.Next.Next; res.EndChar = tt.EndChar; for (; tt != null && tt.Next != null;) { if (!tt.Next.IsChar('.') || tt.IsWhitespaceAfter || tt.Next.IsWhitespaceAfter) { break; } if (!(tt.Next.Next is Pullenti.Ner.NumberToken)) { break; } tt = tt.Next.Next; res.EndChar = tt.EndChar; } } else if (tt.Next.Next != null && (tt.Next.Next.GetReferent() is Pullenti.Ner.Decree.DecreePartReferent) && hasDiap) { res.EndChar = (tt.Next.Next as Pullenti.Ner.MetaToken).BeginToken.EndChar; } } return(res); } if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(tt, true, false) && tt.BeginChar == res.BeginChar && hasSameBefore) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(tt, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null && br.EndToken.Previous == tt.Next) { res.EndChar = br.EndChar; return(res); } } } } return(res); } if (!hasSameBefore && !hasSameAfter && ptmin != PartToken.ItemType.Prefix) { for (Pullenti.Ner.Token tt = (t as Pullenti.Ner.MetaToken).BeginToken; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next) { if (tt.BeginChar >= res.BeginChar) { List <PartToken> pts = PartToken.TryAttachList(tt, false, 40); if (pts == null || pts.Count == 0) { break; } for (int i = 0; i < pts.Count; i++) { if (pts[i].Typ == ptmin) { res.BeginChar = pts[i].BeginChar; res.EndChar = pts[i].EndChar; tt = pts[i].EndToken; if (tt.Next != null && tt.Next.IsHiphen) { if (tt.Next.Next is Pullenti.Ner.NumberToken) { res.EndChar = tt.Next.Next.EndChar; } else if (tt.Next.Next != null && (tt.Next.Next.GetReferent() is Pullenti.Ner.Decree.DecreePartReferent) && hasDiap) { res.EndChar = (tt.Next.Next as Pullenti.Ner.MetaToken).BeginToken.EndChar; } } return(res); } } } } } return(res); }
internal static TitlePageReferent _process(Pullenti.Ner.Token begin, int maxCharPos, Pullenti.Ner.Core.AnalysisKit kit, out Pullenti.Ner.Token endToken) { endToken = begin; TitlePageReferent res = new TitlePageReferent(); Pullenti.Ner.Core.Termin term = null; List <Pullenti.Ner.Titlepage.Internal.Line> lines = Pullenti.Ner.Titlepage.Internal.Line.Parse(begin, 30, 1500, maxCharPos); if (lines.Count < 1) { return(null); } int cou = lines.Count; int minNewlinesCount = 10; Dictionary <int, int> linesCountStat = new Dictionary <int, int>(); for (int i = 0; i < lines.Count; i++) { if (Pullenti.Ner.Titlepage.Internal.TitleNameToken.CanBeStartOfTextOrContent(lines[i].BeginToken, lines[i].EndToken)) { cou = i; break; } int j = lines[i].NewlinesBeforeCount; if (i > 0 && j > 0) { if (!linesCountStat.ContainsKey(j)) { linesCountStat.Add(j, 1); } else { linesCountStat[j]++; } } } int max = 0; foreach (KeyValuePair <int, int> kp in linesCountStat) { if (kp.Value > max) { max = kp.Value; minNewlinesCount = kp.Key; } } int endChar = (cou > 0 ? lines[cou - 1].EndChar : 0); if (maxCharPos > 0 && endChar > maxCharPos) { endChar = maxCharPos; } List <Pullenti.Ner.Titlepage.Internal.TitleNameToken> names = new List <Pullenti.Ner.Titlepage.Internal.TitleNameToken>(); for (int i = 0; i < cou; i++) { if (i == 6) { } for (int j = i; (j < cou) && (j < (i + 5)); j++) { if (i == 6 && j == 8) { } if (j > i) { if (lines[j - 1].IsPureEn && lines[j].IsPureRu) { break; } if (lines[j - 1].IsPureRu && lines[j].IsPureEn) { break; } if (lines[j].NewlinesBeforeCount >= (minNewlinesCount * 2)) { break; } } Pullenti.Ner.Titlepage.Internal.TitleNameToken ttt = Pullenti.Ner.Titlepage.Internal.TitleNameToken.TryParse(lines[i].BeginToken, lines[j].EndToken, minNewlinesCount); if (ttt != null) { if (lines[i].IsPureEn) { ttt.Morph.Language = Pullenti.Morph.MorphLang.EN; } else if (lines[i].IsPureRu) { ttt.Morph.Language = Pullenti.Morph.MorphLang.RU; } names.Add(ttt); } } } Pullenti.Ner.Titlepage.Internal.TitleNameToken.Sort(names); Pullenti.Ner.ReferentToken nameRt = null; if (names.Count > 0) { int i0 = 0; if (names[i0].Morph.Language.IsEn) { for (int ii = 1; ii < names.Count; ii++) { if (names[ii].Morph.Language.IsRu && names[ii].Rank > 0) { i0 = ii; break; } } } term = res.AddName(names[i0].BeginNameToken, names[i0].EndNameToken); if (names[i0].TypeValue != null) { res.AddType(names[i0].TypeValue); } if (names[i0].Speciality != null) { res.Speciality = names[i0].Speciality; } Pullenti.Ner.ReferentToken rt = new Pullenti.Ner.ReferentToken(res, names[i0].BeginToken, names[i0].EndToken); if (kit != null) { kit.EmbedToken(rt); } else { res.AddOccurence(new Pullenti.Ner.TextAnnotation(rt.BeginToken, rt.EndToken)); } endToken = rt.EndToken; nameRt = rt; if (begin.BeginChar == rt.BeginChar) { begin = rt; } } if (term != null && kit != null) { for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { Pullenti.Ner.Core.TerminToken tok = term.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok == null) { continue; } Pullenti.Ner.Token t0 = t; Pullenti.Ner.Token t1 = tok.EndToken; if (t1.Next != null && t1.Next.IsChar('.')) { t1 = t1.Next; } if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(t0.Previous, false, false) && Pullenti.Ner.Core.BracketHelper.CanBeEndOfSequence(t1.Next, false, null, false)) { t0 = t0.Previous; t1 = t1.Next; } Pullenti.Ner.ReferentToken rt = new Pullenti.Ner.ReferentToken(res, t0, t1); kit.EmbedToken(rt); t = rt; } } Pullenti.Ner.Titlepage.Internal.PersonRelations pr = new Pullenti.Ner.Titlepage.Internal.PersonRelations(); Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types persTyp = Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined; List <Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types> persTypes = pr.RelTypes; for (Pullenti.Ner.Token t = begin; t != null; t = t.Next) { if (maxCharPos > 0 && t.BeginChar > maxCharPos) { break; } if (t == nameRt) { continue; } Pullenti.Ner.Titlepage.Internal.TitleItemToken tpt = Pullenti.Ner.Titlepage.Internal.TitleItemToken.TryAttach(t); if (tpt != null) { persTyp = Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined; if (tpt.Typ == Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Typ) { if (res.Types.Count == 0) { res.AddType(tpt.Value); } else if (res.Types.Count == 1) { string ty = res.Types[0].ToUpper(); if (ty == "РЕФЕРАТ") { res.AddType(tpt.Value); } else if (ty == "АВТОРЕФЕРАТ") { if (tpt.Value == "КАНДИДАТСКАЯ ДИССЕРТАЦИЯ") { res.AddSlot(TitlePageReferent.ATTR_TYPE, "автореферат кандидатской диссертации", true, 0); } else if (tpt.Value == "ДОКТОРСКАЯ ДИССЕРТАЦИЯ") { res.AddSlot(TitlePageReferent.ATTR_TYPE, "автореферат докторской диссертации", true, 0); } else if (tpt.Value == "МАГИСТЕРСКАЯ ДИССЕРТАЦИЯ") { res.AddSlot(TitlePageReferent.ATTR_TYPE, "автореферат магистерской диссертации", true, 0); } else if (tpt.Value == "КАНДИДАТСЬКА ДИСЕРТАЦІЯ") { res.AddSlot(TitlePageReferent.ATTR_TYPE, "автореферат кандидатської дисертації", true, 0); } else if (tpt.Value == "ДОКТОРСЬКА ДИСЕРТАЦІЯ") { res.AddSlot(TitlePageReferent.ATTR_TYPE, "автореферат докторської дисертації", true, 0); } else if (tpt.Value == "МАГІСТЕРСЬКА ДИСЕРТАЦІЯ") { res.AddSlot(TitlePageReferent.ATTR_TYPE, "автореферат магістерської дисертації", true, 0); } else { res.AddType(tpt.Value); } } else if (tpt.Value == "РЕФЕРАТ" || tpt.Value == "АВТОРЕФЕРАТ") { if (!ty.Contains(tpt.Value)) { res.AddType(tpt.Value); } } } } else if (tpt.Typ == Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Speciality) { if (res.Speciality == null) { res.Speciality = tpt.Value; } } else if (persTypes.Contains(tpt.Typ)) { persTyp = tpt.Typ; } t = tpt.EndToken; if (t.EndChar > endToken.EndChar) { endToken = t; } if (t.Next != null && t.Next.IsCharOf(":-")) { t = t.Next; } continue; } if (t.EndChar > endChar) { break; } List <Pullenti.Ner.Referent> rli = t.GetReferents(); if (rli == null) { continue; } if (!t.IsNewlineBefore && (t.Previous is Pullenti.Ner.TextToken)) { string s = (t.Previous as Pullenti.Ner.TextToken).Term; if (s == "ИМЕНИ" || s == "ИМ") { continue; } if (s == "." && t.Previous.Previous != null && t.Previous.Previous.IsValue("ИМ", null)) { continue; } } foreach (Pullenti.Ner.Referent r in rli) { if (r is Pullenti.Ner.Person.PersonReferent) { if (r != rli[0]) { continue; } Pullenti.Ner.Person.PersonReferent p = r as Pullenti.Ner.Person.PersonReferent; if (persTyp != Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined) { if (t.Previous != null && t.Previous.IsChar('.')) { persTyp = Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined; } } Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types typ = pr.CalcTypFromAttrs(p); if (typ != Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined) { pr.Add(p, typ, 1); persTyp = typ; } else if (persTyp != Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined) { pr.Add(p, persTyp, 1); } else if (t.Previous != null && t.Previous.IsChar('©')) { persTyp = Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Worker; pr.Add(p, persTyp, 1); } else { for (Pullenti.Ner.Token tt = t.Next; tt != null; tt = tt.Next) { Pullenti.Ner.Referent rr = tt.GetReferent(); if (rr == res) { persTyp = Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Worker; break; } if (rr is Pullenti.Ner.Person.PersonReferent) { if (pr.CalcTypFromAttrs(r as Pullenti.Ner.Person.PersonReferent) != Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined) { break; } else { continue; } } if (rr != null) { break; } tpt = Pullenti.Ner.Titlepage.Internal.TitleItemToken.TryAttach(tt); if (tpt != null) { if (tpt.Typ != Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Typ && tpt.Typ != Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.TypAndTheme) { break; } tt = tpt.EndToken; if (tt.EndChar > endToken.EndChar) { endToken = tt; } continue; } } if (persTyp == Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined) { for (Pullenti.Ner.Token tt = t.Previous; tt != null; tt = tt.Previous) { Pullenti.Ner.Referent rr = tt.GetReferent(); if (rr == res) { persTyp = Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Worker; break; } if (rr != null) { break; } if ((tt.IsValue("СТУДЕНТ", null) || tt.IsValue("СТУДЕНТКА", null) || tt.IsValue("СЛУШАТЕЛЬ", null)) || tt.IsValue("ДИПЛОМНИК", null) || tt.IsValue("ИСПОЛНИТЕЛЬ", null)) { persTyp = Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Worker; break; } tpt = Pullenti.Ner.Titlepage.Internal.TitleItemToken.TryAttach(tt); if (tpt != null && tpt.Typ != Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Typ) { break; } } } if (persTyp != Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined) { pr.Add(p, persTyp, 1); } else { pr.Add(p, persTyp, (float)0.5); } if (t.EndChar > endToken.EndChar) { endToken = t; } } continue; } if (r == rli[0]) { persTyp = Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined; } if (r is Pullenti.Ner.Date.DateReferent) { if (res.Date == null) { res.Date = r as Pullenti.Ner.Date.DateReferent; if (t.EndChar > endToken.EndChar) { endToken = t; } } } else if (r is Pullenti.Ner.Geo.GeoReferent) { if (res.City == null && (r as Pullenti.Ner.Geo.GeoReferent).IsCity) { res.City = r as Pullenti.Ner.Geo.GeoReferent; if (t.EndChar > endToken.EndChar) { endToken = t; } } } if (r is Pullenti.Ner.Org.OrganizationReferent) { Pullenti.Ner.Org.OrganizationReferent org = r as Pullenti.Ner.Org.OrganizationReferent; if (org.Types.Contains("курс") && org.Number != null) { int i; if (int.TryParse(org.Number, out i)) { if (i > 0 && (i < 8)) { res.StudentYear = i; } } } for (; org.Higher != null; org = org.Higher) { if (org.Kind != Pullenti.Ner.Org.OrganizationKind.Department) { break; } } if (org.Kind != Pullenti.Ner.Org.OrganizationKind.Department) { if (res.Org == null) { res.Org = org; } else if (Pullenti.Ner.Org.OrganizationReferent.CanBeHigher(res.Org, org)) { res.Org = org; } } if (t.EndChar > endToken.EndChar) { endToken = t; } } if ((r is Pullenti.Ner.Uri.UriReferent) || (r is Pullenti.Ner.Geo.GeoReferent)) { if (t.EndChar > endToken.EndChar) { endToken = t; } } } } foreach (Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types ty in persTypes) { foreach (Pullenti.Ner.Person.PersonReferent p in pr.GetPersons(ty)) { if (pr.GetAttrNameForType(ty) != null) { res.AddSlot(pr.GetAttrNameForType(ty), p, false, 0); } } } if (res.GetSlotValue(TitlePageReferent.ATTR_AUTHOR) == null) { foreach (Pullenti.Ner.Person.PersonReferent p in pr.GetPersons(Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined)) { res.AddSlot(TitlePageReferent.ATTR_AUTHOR, p, false, 0); break; } } if (res.City == null && res.Org != null) { Pullenti.Ner.Slot s = res.Org.FindSlot(Pullenti.Ner.Org.OrganizationReferent.ATTR_GEO, null, true); if (s != null && (s.Value is Pullenti.Ner.Geo.GeoReferent)) { if ((s.Value as Pullenti.Ner.Geo.GeoReferent).IsCity) { res.City = s.Value as Pullenti.Ner.Geo.GeoReferent; } } } if (res.Date == null) { for (Pullenti.Ner.Token t = begin; t != null && t.EndChar <= endChar; t = t.Next) { Pullenti.Ner.Geo.GeoReferent city = t.GetReferent() as Pullenti.Ner.Geo.GeoReferent; if (city == null) { continue; } if (t.Next is Pullenti.Ner.TextToken) { if (t.Next.IsCharOf(":,") || t.Next.IsHiphen) { t = t.Next; } } Pullenti.Ner.ReferentToken rt = t.Kit.ProcessReferent(Pullenti.Ner.Date.DateAnalyzer.ANALYZER_NAME, t.Next); if (rt != null) { rt.SaveToLocalOntology(); res.Date = rt.Referent as Pullenti.Ner.Date.DateReferent; if (kit != null) { kit.EmbedToken(rt); } break; } } } if (res.Slots.Count == 0) { return(null); } else { return(res); } }