public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this); for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { Pullenti.Ner.ReferentToken rt = null; if (t.Chars.IsLetter) { Pullenti.Ner.Core.TerminToken tok = m_Ontology.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { Pullenti.Ner.Token tt = tok.EndToken.Next; if (tt != null && tt.IsChar(':')) { tt = tt.Next; } rt = this.TryAttach(tt, true); if (rt != null) { rt.BeginToken = t; } } } if (rt == null && (((t is Pullenti.Ner.ReferentToken) || t.IsNewlineBefore))) { rt = this.TryAttach(t, false); } if (rt != null) { rt.Referent = ad.RegisterReferent(rt.Referent); kit.EmbedToken(rt); t = rt; } } }
/// <summary> /// Попытаться выделить с заданного токена сущность указанным анализатором. /// Используется, если нужно "забежать вперёд" и проверить гипотезу, есть ли тут сущность конкретного типа или нет. /// </summary> /// <param name="analyzerName">имя анализатора</param> /// <param name="t">токен, с которого попробовать выделение</param> /// <return>метатокен с сущностью ReferentToken или null. Отметим, что сущность не сохранена и полученный метатокен никуда не встроен.</return> public Pullenti.Ner.ReferentToken ProcessReferent(string analyzerName, Pullenti.Ner.Token t) { if (Processor == null) { return(null); } if (m_AnalyzerStack.Contains(analyzerName)) { return(null); } if (IsRecurceOverflow) { return(null); } Pullenti.Ner.Analyzer a = Processor.FindAnalyzer(analyzerName); if (a == null) { return(null); } RecurseLevel++; m_AnalyzerStack.Add(analyzerName); Pullenti.Ner.ReferentToken res = a.ProcessReferent(t, null); m_AnalyzerStack.Remove(analyzerName); RecurseLevel--; return(res); }
static Pullenti.Ner.ReferentToken _tryAttachPureTerr(List <TerrItemToken> li, Pullenti.Ner.Core.AnalyzerData ad) { Pullenti.Ner.Address.Internal.AddressItemToken aid = null; Pullenti.Ner.Token t = li[0].EndToken.Next; if (t == null) { return(null); } Pullenti.Ner.Token tt = t; if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(tt, true, false)) { tt = tt.Next; } if (li.Count > 1) { List <TerrItemToken> tmp = new List <TerrItemToken>(li); tmp.RemoveAt(0); Pullenti.Ner.ReferentToken rt0 = TryAttachTerritory(tmp, ad, false, null, null); if (rt0 == null && tmp.Count == 2) { if (((tmp[0].TerminItem == null && tmp[1].TerminItem != null)) || ((tmp[0].TerminItem != null && tmp[1].TerminItem == null))) { if (aid == null) { rt0 = TryAttachTerritory(tmp, ad, true, null, null); } } } if (rt0 != null) { if ((rt0.Referent as Pullenti.Ner.Geo.GeoReferent).IsState) { return(null); } rt0.BeginToken = li[0].BeginToken; rt0.Morph = li[0].Morph; return(rt0); } } if (aid == null) { aid = Pullenti.Ner.Address.Internal.AddressItemToken.TryAttachOrg(tt); } if (aid != null) { Pullenti.Ner.ReferentToken rt = aid.CreateGeoOrgTerr(); if (rt == null) { return(null); } rt.BeginToken = li[0].BeginToken; Pullenti.Ner.Token t1 = rt.EndToken; if (tt != t && Pullenti.Ner.Core.BracketHelper.CanBeEndOfSequence(t1.Next, false, null, false)) { rt.EndToken = (t1 = t1.Next); } return(rt); } return(null); }
public override Pullenti.Ner.ReferentToken ProcessOntologyItem(Pullenti.Ner.Token begin) { if (begin == null) { return(null); } GoodAttributeReferent ga = new GoodAttributeReferent(); if (begin.Chars.IsLatinLetter) { if (begin.IsValue("KEYWORD", null)) { ga.Typ = GoodAttrType.Keyword; begin = begin.Next; } else if (begin.IsValue("CHARACTER", null)) { ga.Typ = GoodAttrType.Character; begin = begin.Next; } else if (begin.IsValue("PROPER", null)) { ga.Typ = GoodAttrType.Proper; begin = begin.Next; } else if (begin.IsValue("MODEL", null)) { ga.Typ = GoodAttrType.Model; begin = begin.Next; } if (begin == null) { return(null); } } Pullenti.Ner.ReferentToken res = new Pullenti.Ner.ReferentToken(ga, begin, begin); for (Pullenti.Ner.Token t = begin; t != null; t = t.Next) { if (t.IsChar(';')) { ga.AddSlot(GoodAttributeReferent.ATTR_VALUE, Pullenti.Ner.Core.MiscHelper.GetTextValue(begin, t.Previous, Pullenti.Ner.Core.GetTextAttr.No), false, 0); begin = t.Next; continue; } res.EndToken = t; } if (res.EndChar > begin.BeginChar) { ga.AddSlot(GoodAttributeReferent.ATTR_VALUE, Pullenti.Ner.Core.MiscHelper.GetTextValue(begin, res.EndToken, Pullenti.Ner.Core.GetTextAttr.No), false, 0); } if (ga.Typ == GoodAttrType.Undefined) { if (!begin.Chars.IsAllLower) { ga.Typ = GoodAttrType.Proper; } } return(res); }
public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this); int delta = 100000; int parts = (((kit.Sofa.Text.Length + delta) - 1)) / delta; if (parts == 0) { parts = 1; } int cur = 0; int nextPos = 0; List <GoodReferent> goods = new List <GoodReferent>(); for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { if (!t.IsNewlineBefore) { continue; } if (t.BeginChar > nextPos) { nextPos += delta; cur++; if (!this.OnProgress(cur, parts, kit)) { break; } } if (!t.Chars.IsLetter && t.Next != null) { t = t.Next; } List <Pullenti.Ner.ReferentToken> rts = Pullenti.Ner.Goods.Internal.GoodAttrToken.TryParseList(t); if (rts == null || rts.Count == 0) { continue; } GoodReferent good = new GoodReferent(); foreach (Pullenti.Ner.ReferentToken rt in rts) { rt.Referent = ad.RegisterReferent(rt.Referent); if (good.FindSlot(GoodReferent.ATTR_ATTR, rt.Referent, true) == null) { good.AddSlot(GoodReferent.ATTR_ATTR, rt.Referent, false, 0); } kit.EmbedToken(rt); } goods.Add(good); Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(good, rts[0], rts[rts.Count - 1]); kit.EmbedToken(rt0); t = rt0; } foreach (GoodReferent g in goods) { ad.Referents.Add(g); } }
PhoneReferent GetNextPhone(Pullenti.Ner.Token t, int lev) { if (t != null && t.IsChar(',')) t = t.Next; if (t == null || lev > 3) return null; List<Pullenti.Ner.Phone.Internal.PhoneItemToken> its = Pullenti.Ner.Phone.Internal.PhoneItemToken.TryAttachAll(t, 15); if (its == null) return null; Pullenti.Ner.ReferentToken rt = this._TryAttach_(its, 0, false, null, lev + 1); if (rt == null) return null; return rt.Referent as PhoneReferent; }
public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this); for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { Pullenti.Ner.ReferentToken mon = TryParse(t); if (mon != null) { mon.Referent = ad.RegisterReferent(mon.Referent); kit.EmbedToken(mon); t = mon; continue; } } }
static void _correctReferents(Pullenti.Ner.Token t) { Pullenti.Ner.ReferentToken rt = t as Pullenti.Ner.ReferentToken; if (rt == null) { return; } if (rt.Referent != null && (rt.Referent.Tag is Pullenti.Ner.Referent)) { rt.Referent = rt.Referent.Tag as Pullenti.Ner.Referent; } for (Pullenti.Ner.Token tt = rt.BeginToken; tt != null && tt.EndChar <= rt.EndChar; tt = tt.Next) { _correctReferents(tt); } }
public override Pullenti.Ner.ReferentToken ProcessReferent(Pullenti.Ner.Token begin, Pullenti.Ner.Token end) { List <Pullenti.Ner.Named.Internal.NamedItemToken> li = Pullenti.Ner.Named.Internal.NamedItemToken.TryParseList(begin, null); if (li == null || li.Count == 0) { return(null); } Pullenti.Ner.ReferentToken rt = _tryAttach(li); if (rt == null) { return(null); } rt.Data = begin.Kit.GetAnalyzerData(this); return(rt); }
public static void Process(Pullenti.Ner.Core.AnalysisKit kit, Pullenti.Ner.Core.AnalyzerData ad) { for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { if (!Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t)) { continue; } Pullenti.Ner.ReferentToken rt = TryParseThesis(t); if (rt == null) { continue; } rt.Referent = ad.RegisterReferent(rt.Referent); kit.EmbedToken(rt); t = rt; } }
public override Pullenti.Ner.ReferentToken ProcessReferent(Pullenti.Ner.Token begin, Pullenti.Ner.Token end) { List<Pullenti.Ner.Phone.Internal.PhoneItemToken> pli = Pullenti.Ner.Phone.Internal.PhoneItemToken.TryAttachAll(begin, 15); if (pli == null || pli.Count == 0) return null; int i = 0; for (; i < pli.Count; i++) { if (pli[i].ItemType != Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Prefix) break; } Pullenti.Ner.ReferentToken rt = this._TryAttach_(pli, i, true, null, 0); if (rt != null) { rt.BeginToken = begin; return rt; } return null; }
public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Core.AnalyzerDataWithOntology ad = kit.GetAnalyzerData(this) as Pullenti.Ner.Core.AnalyzerDataWithOntology; for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { List <Pullenti.Ner.Named.Internal.NamedItemToken> li = Pullenti.Ner.Named.Internal.NamedItemToken.TryParseList(t, ad.LocalOntology); if (li == null || li.Count == 0) { continue; } Pullenti.Ner.ReferentToken rt = _tryAttach(li); if (rt != null) { rt.Referent = ad.RegisterReferent(rt.Referent); kit.EmbedToken(rt); t = rt; continue; } } }
public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this); int delta = 100000; int parts = (((kit.Sofa.Text.Length + delta) - 1)) / delta; if (parts == 0) { parts = 1; } int cur = 0; int nextPos = 0; for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { if (t.BeginChar > nextPos) { nextPos += delta; cur++; if (!this.OnProgress(cur, parts, kit)) { break; } } Pullenti.Ner.Goods.Internal.GoodAttrToken at = Pullenti.Ner.Goods.Internal.GoodAttrToken.TryParse(t, null, true, true); if (at == null) { continue; } GoodAttributeReferent attr = at._createAttr(); if (attr == null) { t = at.EndToken; continue; } Pullenti.Ner.ReferentToken rt = new Pullenti.Ner.ReferentToken(attr, at.BeginToken, at.EndToken); rt.Referent = ad.RegisterReferent(attr); kit.EmbedToken(rt); t = rt; } }
List<Pullenti.Ner.ReferentToken> TryAttach(List<Pullenti.Ner.Phone.Internal.PhoneItemToken> pli, int ind, bool isPhoneBefore, PhoneReferent prevPhone) { Pullenti.Ner.ReferentToken rt = this._TryAttach_(pli, ind, isPhoneBefore, prevPhone, 0); if (rt == null) return null; List<Pullenti.Ner.ReferentToken> res = new List<Pullenti.Ner.ReferentToken>(); res.Add(rt); for (int i = 0; i < 5; i++) { PhoneReferent ph0 = rt.Referent as PhoneReferent; if (ph0.AddNumber != null) return res; Pullenti.Ner.Phone.Internal.PhoneItemToken alt = Pullenti.Ner.Phone.Internal.PhoneItemToken.TryAttachAlternate(rt.EndToken.Next, ph0, pli); if (alt == null) break; PhoneReferent ph = new PhoneReferent(); foreach (Pullenti.Ner.Slot s in rt.Referent.Slots) { ph.AddSlot(s.TypeName, s.Value, false, 0); } string num = ph.Number; if (num == null || num.Length <= alt.Value.Length) break; ph.Number = num.Substring(0, num.Length - alt.Value.Length) + alt.Value; ph.m_Template = ph0.m_Template; Pullenti.Ner.ReferentToken rt2 = new Pullenti.Ner.ReferentToken(ph, alt.BeginToken, alt.EndToken); res.Add(rt2); rt = rt2; } Pullenti.Ner.Phone.Internal.PhoneItemToken add = Pullenti.Ner.Phone.Internal.PhoneItemToken.TryAttachAdditional(rt.EndToken.Next); if (add != null) { foreach (Pullenti.Ner.ReferentToken rr in res) { (rr.Referent as PhoneReferent).AddNumber = add.Value; } res[res.Count - 1].EndToken = add.EndToken; } return res; }
static Pullenti.Ner.Token DeserializeToken(Stream stream, Pullenti.Ner.Core.AnalysisKit kit, int vers) { short typ = DeserializeShort(stream); if (typ == 0) { return(null); } Pullenti.Ner.Token t = null; if (typ == 1) { t = new Pullenti.Ner.TextToken(null, kit); } else if (typ == 2) { t = new Pullenti.Ner.NumberToken(null, null, null, Pullenti.Ner.NumberSpellingType.Digit, kit); } else if (typ == 3) { t = new Pullenti.Ner.ReferentToken(null, null, null, kit); } else { t = new Pullenti.Ner.MetaToken(null, null, kit); } t.Deserialize(stream, kit, vers); if (t is Pullenti.Ner.MetaToken) { Pullenti.Ner.Token tt = DeserializeTokens(stream, kit, vers); if (tt != null) { (t as Pullenti.Ner.MetaToken).m_BeginToken = tt; for (; tt != null; tt = tt.Next) { (t as Pullenti.Ner.MetaToken).m_EndToken = tt; } } } return(t); }
static UriItemToken _AttachUriContent(Pullenti.Ner.Token t0, string chars, bool canBeWhitespaces = false) { StringBuilder txt = new StringBuilder(); Pullenti.Ner.Token t1 = t0; UriItemToken dom = AttachDomainName(t0, true, canBeWhitespaces); if (dom != null) { if (dom.Value.Length < 3) { return(null); } } char openChar = (char)0; Pullenti.Ner.Token t = t0; if (dom != null) { t = dom.EndToken.Next; } for (; t != null; t = t.Next) { if (t != t0 && t.IsWhitespaceBefore) { if (t.IsNewlineBefore || !canBeWhitespaces) { break; } if (dom == null) { break; } if (t.Previous.IsHiphen) { } else if (t.Previous.IsCharOf(",;")) { break; } else if (t.Previous.IsChar('.') && t.Chars.IsLetter && t.LengthChar == 2) { } else { bool ok = false; Pullenti.Ner.Token tt1 = t; if (t.IsCharOf("\\/")) { tt1 = t.Next; } Pullenti.Ner.Token tt0 = tt1; for (; tt1 != null; tt1 = tt1.Next) { if (tt1 != tt0 && tt1.IsWhitespaceBefore) { break; } if (tt1 is Pullenti.Ner.NumberToken) { continue; } if (!(tt1 is Pullenti.Ner.TextToken)) { break; } string term1 = (tt1 as Pullenti.Ner.TextToken).Term; if (((term1 == "HTM" || term1 == "HTML" || term1 == "SHTML") || term1 == "ASP" || term1 == "ASPX") || term1 == "JSP") { ok = true; break; } if (!tt1.Chars.IsLetter) { if (tt1.IsCharOf("\\/")) { ok = true; break; } if (!tt1.IsCharOf(chars)) { break; } } else if (!tt1.Chars.IsLatinLetter) { break; } } if (!ok) { break; } } } if (t is Pullenti.Ner.NumberToken) { Pullenti.Ner.NumberToken nt = t as Pullenti.Ner.NumberToken; txt.Append(nt.GetSourceText()); t1 = t; continue; } Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; if (tt == null) { Pullenti.Ner.ReferentToken rt = t as Pullenti.Ner.ReferentToken; if (rt != null && rt.BeginToken.IsValue("РФ", null)) { if (txt.Length > 0 && txt[txt.Length - 1] == '.') { txt.Append(rt.BeginToken.GetSourceText()); t1 = t; continue; } } if (rt != null && rt.Chars.IsLatinLetter && rt.BeginToken == rt.EndToken) { txt.Append(rt.BeginToken.GetSourceText()); t1 = t; continue; } break; } string src = tt.GetSourceText(); char ch = src[0]; if (!char.IsLetter(ch)) { if (chars.IndexOf(ch) < 0) { break; } if (ch == '(' || ch == '[') { openChar = ch; } else if (ch == ')') { if (openChar != '(') { break; } openChar = (char)0; } else if (ch == ']') { if (openChar != '[') { break; } openChar = (char)0; } } txt.Append(src); t1 = t; } if (txt.Length == 0) { return(dom); } int i; for (i = 0; i < txt.Length; i++) { if (char.IsLetterOrDigit(txt[i])) { break; } } if (i >= txt.Length) { return(dom); } if (txt[txt.Length - 1] == '.' || txt[txt.Length - 1] == '/') { txt.Length--; t1 = t1.Previous; } if (dom != null) { txt.Insert(0, dom.Value); } string tmp = txt.ToString(); if (tmp.StartsWith("\\\\")) { txt.Replace("\\\\", "//"); tmp = txt.ToString(); } if (tmp.StartsWith("//")) { tmp = tmp.Substring(2); } if (string.Compare(tmp, "WWW", true) == 0) { return(null); } UriItemToken res = new UriItemToken(t0, t1) { Value = txt.ToString() }; return(res); }
public static OrgItemEponymToken TryAttach(Pullenti.Ner.Token t, bool mustHasPrefix = false) { Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; if (tt == null) { if (t == null) { return(null); } Pullenti.Ner.Referent r1 = t.GetReferent(); if (r1 != null && r1.TypeName == "DATE") { string str = r1.ToString().ToUpper(); if ((str == "1 МАЯ" || str == "7 ОКТЯБРЯ" || str == "9 МАЯ") || str == "8 МАРТА") { OrgItemEponymToken dt = new OrgItemEponymToken(t, t) { Eponyms = new List <string>() }; dt.Eponyms.Add(str); return(dt); } } Pullenti.Ner.NumberToken age = Pullenti.Ner.Core.NumberHelper.TryParseAge(t); if ((age != null && (((age.EndToken.Next is Pullenti.Ner.TextToken) || (age.EndToken.Next is Pullenti.Ner.ReferentToken))) && (age.WhitespacesAfterCount < 3)) && !age.EndToken.Next.Chars.IsAllLower && age.EndToken.Next.Chars.IsCyrillicLetter) { OrgItemEponymToken dt = new OrgItemEponymToken(t, age.EndToken.Next) { Eponyms = new List <string>() }; dt.Eponyms.Add(string.Format("{0} {1}", age.Value, dt.EndToken.GetSourceText().ToUpper())); return(dt); } return(null); } Pullenti.Ner.Token t1 = null; bool full = false; bool hasName = false; if (tt.Term == "ИМЕНИ" || tt.Term == "ІМЕНІ") { t1 = t.Next; full = true; hasName = true; } else if (((tt.Term == "ИМ" || tt.Term == "ІМ")) && tt.Next != null) { if (tt.Next.IsChar('.')) { t1 = tt.Next.Next; full = true; } else if ((tt.Next is Pullenti.Ner.TextToken) && tt.Chars.IsAllLower && !tt.Next.Chars.IsAllLower) { t1 = tt.Next; } hasName = true; } else if (tt.Previous != null && ((tt.Previous.IsValue("ФОНД", null) || tt.Previous.IsValue("ХРАМ", null) || tt.Previous.IsValue("ЦЕРКОВЬ", "ЦЕРКВА")))) { if ((!tt.Chars.IsCyrillicLetter || tt.Morph.Class.IsPreposition || tt.Morph.Class.IsConjunction) || !tt.Chars.IsLetter) { return(null); } if (tt.WhitespacesBeforeCount != 1) { return(null); } if (tt.Chars.IsAllLower) { return(null); } if (tt.Morph.Class.IsAdjective) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && npt.BeginToken != npt.EndToken) { return(null); } } OrgItemNameToken na = OrgItemNameToken.TryAttach(tt, null, false, true); if (na != null) { if (na.IsEmptyWord || na.IsStdName || na.IsStdTail) { return(null); } } t1 = tt; } if (t1 == null || ((t1.IsNewlineBefore && !full))) { return(null); } if (tt.Previous != null && tt.Previous.Morph.Class.IsPreposition) { return(null); } if (mustHasPrefix && !hasName) { return(null); } Pullenti.Ner.Referent r = t1.GetReferent(); if ((r != null && r.TypeName == "DATE" && full) && r.FindSlot("DAY", null, true) != null && r.FindSlot("YEAR", null, true) == null) { OrgItemEponymToken dt = new OrgItemEponymToken(t, t1) { Eponyms = new List <string>() }; dt.Eponyms.Add(r.ToString().ToUpper()); return(dt); } bool holy = false; if ((t1.IsValue("СВЯТОЙ", null) || t1.IsValue("СВЯТИЙ", null) || t1.IsValue("СВ", null)) || t1.IsValue("СВЯТ", null)) { t1 = t1.Next; holy = true; if (t1 != null && t1.IsChar('.')) { t1 = t1.Next; } } if (t1 == null) { return(null); } Pullenti.Morph.MorphClass cl = t1.GetMorphClassInDictionary(); if (cl.IsNoun || cl.IsAdjective) { Pullenti.Ner.ReferentToken rt = t1.Kit.ProcessReferent("PERSON", t1); if (rt != null && rt.Referent.TypeName == "PERSON" && rt.BeginToken != rt.EndToken) { string e = rt.Referent.GetStringValue("LASTNAME"); if (e != null) { if (rt.EndToken.IsValue(e, null)) { OrgItemEponymToken re = new OrgItemEponymToken(t, rt.EndToken); re.Eponyms.Add(rt.EndToken.GetSourceText()); return(re); } } } } Pullenti.Ner.NumberToken nt = Pullenti.Ner.Core.NumberHelper.TryParseAnniversary(t1); if (nt != null && nt.Typ == Pullenti.Ner.NumberSpellingType.Age) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(nt.EndToken.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { string s = string.Format("{0}-{1} {2}", nt.Value, (t.Kit.BaseLanguage.IsUa ? "РОКІВ" : "ЛЕТ"), Pullenti.Ner.Core.MiscHelper.GetTextValue(npt.BeginToken, npt.EndToken, Pullenti.Ner.Core.GetTextAttr.No)); OrgItemEponymToken res = new OrgItemEponymToken(t, npt.EndToken); res.Eponyms.Add(s); return(res); } } List <PersonItemToken> its = PersonItemToken.TryAttach(t1); if (its == null) { if ((t1 is Pullenti.Ner.ReferentToken) && (t1.GetReferent() is Pullenti.Ner.Geo.GeoReferent)) { string s = Pullenti.Ner.Core.MiscHelper.GetTextValue(t1, t1, Pullenti.Ner.Core.GetTextAttr.No); OrgItemEponymToken re = new OrgItemEponymToken(t, t1); re.Eponyms.Add(s); return(re); } return(null); } List <string> eponims = new List <string>(); int i = 0; int j; if (its[i].Typ == PersonItemType.LocaseWord) { i++; } if (i >= its.Count) { return(null); } if (!full) { if (its[i].BeginToken.Morph.Class.IsAdjective && !its[i].BeginToken.Morph.Class.IsProperSurname) { return(null); } } if (its[i].Typ == PersonItemType.Initial) { i++; while (true) { if ((i < its.Count) && its[i].Typ == PersonItemType.Initial) { i++; } if (i >= its.Count || ((its[i].Typ != PersonItemType.Surname && its[i].Typ != PersonItemType.Name))) { break; } eponims.Add(its[i].Value); t1 = its[i].EndToken; if ((i + 2) >= its.Count || its[i + 1].Typ != PersonItemType.And || its[i + 2].Typ != PersonItemType.Initial) { break; } i += 3; } } else if (((i + 1) < its.Count) && its[i].Typ == PersonItemType.Name && its[i + 1].Typ == PersonItemType.Surname) { eponims.Add(its[i + 1].Value); t1 = its[i + 1].EndToken; i += 2; if ((((i + 2) < its.Count) && its[i].Typ == PersonItemType.And && its[i + 1].Typ == PersonItemType.Name) && its[i + 2].Typ == PersonItemType.Surname) { eponims.Add(its[i + 2].Value); t1 = its[i + 2].EndToken; } } else if (its[i].Typ == PersonItemType.Surname) { if (its.Count == (i + 2) && its[i].Chars == its[i + 1].Chars) { its[i].Value += (" " + its[i + 1].Value); its[i].EndToken = its[i + 1].EndToken; its.RemoveAt(i + 1); } eponims.Add(its[i].Value); if (((i + 1) < its.Count) && its[i + 1].Typ == PersonItemType.Name) { if ((i + 2) == its.Count) { i++; } else if (its[i + 2].Typ != PersonItemType.Surname) { i++; } } else if (((i + 1) < its.Count) && its[i + 1].Typ == PersonItemType.Initial) { if ((i + 2) == its.Count) { i++; } else if (its[i + 2].Typ == PersonItemType.Initial && (i + 3) == its.Count) { i += 2; } } else if (((i + 2) < its.Count) && its[i + 1].Typ == PersonItemType.And && its[i + 2].Typ == PersonItemType.Surname) { bool ok = true; Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(its[i + 2].BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && !npt.Morph.Case.IsGenitive && !npt.Morph.Case.IsUndefined) { ok = false; } if (ok) { eponims.Add(its[i + 2].Value); i += 2; } } t1 = its[i].EndToken; } else if (its[i].Typ == PersonItemType.Name && holy) { t1 = its[i].EndToken; bool sec = false; if (((i + 1) < its.Count) && its[i].Chars == its[i + 1].Chars && its[i + 1].Typ != PersonItemType.Initial) { sec = true; t1 = its[i + 1].EndToken; } if (sec) { eponims.Add(string.Format("СВЯТ.{0} {1}", its[i].Value, its[i + 1].Value)); } else { eponims.Add(string.Format("СВЯТ.{0}", its[i].Value)); } } else if (full && (i + 1) == its.Count && ((its[i].Typ == PersonItemType.Name || its[i].Typ == PersonItemType.Surname))) { t1 = its[i].EndToken; eponims.Add(its[i].Value); } else if ((its[i].Typ == PersonItemType.Name && its.Count == 3 && its[i + 1].Typ == PersonItemType.Name) && its[i + 2].Typ == PersonItemType.Surname) { t1 = its[i + 2].EndToken; eponims.Add(string.Format("{0} {1} {2}", its[i].Value, its[i + 1].Value, its[i + 2].Value)); i += 2; } if (eponims.Count == 0) { return(null); } return(new OrgItemEponymToken(t, t1) { Eponyms = eponims }); }
public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { List <Pullenti.Ner.Mail.Internal.MailLine> lines = new List <Pullenti.Ner.Mail.Internal.MailLine>(); for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { Pullenti.Ner.Mail.Internal.MailLine ml = Pullenti.Ner.Mail.Internal.MailLine.Parse(t, 0, 0); if (ml == null) { continue; } if (lines.Count == 91) { } lines.Add(ml); t = ml.EndToken; } if (lines.Count == 0) { return; } int i; List <List <Pullenti.Ner.Mail.Internal.MailLine> > blocks = new List <List <Pullenti.Ner.Mail.Internal.MailLine> >(); List <Pullenti.Ner.Mail.Internal.MailLine> blk = null; for (i = 0; i < lines.Count; i++) { Pullenti.Ner.Mail.Internal.MailLine ml = lines[i]; if (ml.Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From) { bool isNew = ml.MustBeFirstLine || i == 0; if (((i + 2) < lines.Count) && (((lines[i + 1].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From || lines[i + 2].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From || lines[i + 1].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.Hello) || lines[i + 2].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.Hello))) { isNew = true; } if (!isNew) { for (int j = i - 1; j >= 0; j--) { if (lines[j].Typ != Pullenti.Ner.Mail.Internal.MailLine.Types.Undefined) { if (lines[j].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.BestRegards) { isNew = true; } break; } } } if (!isNew) { for (Pullenti.Ner.Token tt = ml.BeginToken; tt != null && tt.EndChar <= ml.EndChar; tt = tt.Next) { if (tt.GetReferent() != null) { if (tt.GetReferent().TypeName == "DATE" || tt.GetReferent().TypeName == "URI") { isNew = true; } } } } if (isNew) { blk = new List <Pullenti.Ner.Mail.Internal.MailLine>(); blocks.Add(blk); for (; i < lines.Count; i++) { if (lines[i].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From) { if (blk.Count > 0 && lines[i].MustBeFirstLine) { break; } blk.Add(lines[i]); } else if (((i + 1) < lines.Count) && lines[i + 1].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From) { int j; for (j = 0; j < blk.Count; j++) { if (blk[j].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From) { if (blk[j].IsRealFrom || blk[j].MustBeFirstLine || blk[j].MailAddr != null) { break; } } } if (j >= blk.Count) { blk.Add(lines[i]); continue; } bool ok = false; for (j = i + 1; j < lines.Count; j++) { if (lines[j].Typ != Pullenti.Ner.Mail.Internal.MailLine.Types.From) { break; } if (lines[j].IsRealFrom || lines[j].MustBeFirstLine) { ok = true; break; } if (lines[j].MailAddr != null) { ok = true; break; } } if (ok) { break; } blk.Add(lines[i]); } else { break; } } i--; continue; } } if (blk == null) { blocks.Add((blk = new List <Pullenti.Ner.Mail.Internal.MailLine>())); } blk.Add(lines[i]); } if (blocks.Count == 0) { return; } Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this); for (int j = 0; j < blocks.Count; j++) { lines = blocks[j]; if (lines.Count == 0) { continue; } i = 0; if (lines[0].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From) { Pullenti.Ner.Token t1 = lines[0].EndToken; for (; i < lines.Count; i++) { if (lines[i].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From) { t1 = lines[i].EndToken; } else if (((i + 1) < lines.Count) && lines[i + 1].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From) { } else { break; } } MailReferent mail = new MailReferent() { Kind = MailKind.Head }; Pullenti.Ner.ReferentToken mt = new Pullenti.Ner.ReferentToken(mail, lines[0].BeginToken, t1); mail.Text = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(mt, Pullenti.Ner.Core.GetTextAttr.KeepRegister); ad.RegisterReferent(mail); mail.AddOccurenceOfRefTok(mt); } int i0 = i; Pullenti.Ner.Token t2 = null; int err = 0; for (i = lines.Count - 1; i >= i0; i--) { Pullenti.Ner.Mail.Internal.MailLine li = lines[i]; if (li.Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.BestRegards) { t2 = lines[i].BeginToken; for (--i; i >= i0; i--) { if (lines[i].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.BestRegards && (lines[i].Words < 2)) { t2 = lines[i].BeginToken; } else if ((i > i0 && (lines[i].Words < 3) && lines[i - 1].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.BestRegards) && (lines[i - 1].Words < 2)) { i--; t2 = lines[i].BeginToken; } else { break; } } break; } if (li.Refs.Count > 0 && (li.Words < 3) && i > i0) { err = 0; t2 = li.BeginToken; continue; } if (li.Words > 10) { t2 = null; continue; } if (li.Words > 2) { if ((++err) > 2) { t2 = null; } } } if (t2 == null) { for (i = lines.Count - 1; i >= i0; i--) { Pullenti.Ner.Mail.Internal.MailLine li = lines[i]; if (li.Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.Undefined) { if (li.Refs.Count > 0 && (li.Refs[0] is Pullenti.Ner.Person.PersonReferent)) { if (li.Words == 0 && i > i0) { t2 = li.BeginToken; break; } } } } } for (int ii = i0; ii < lines.Count; ii++) { if (lines[ii].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.Hello) { MailReferent mail = new MailReferent() { Kind = MailKind.Hello }; Pullenti.Ner.ReferentToken mt = new Pullenti.Ner.ReferentToken(mail, lines[i0].BeginToken, lines[ii].EndToken); if (mt.LengthChar > 0) { mail.Text = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(mt, Pullenti.Ner.Core.GetTextAttr.KeepRegister); ad.RegisterReferent(mail); mail.AddOccurenceOfRefTok(mt); i0 = ii + 1; } break; } else if (lines[ii].Typ != Pullenti.Ner.Mail.Internal.MailLine.Types.Undefined || lines[ii].Words > 0 || lines[ii].Refs.Count > 0) { break; } } if (i0 < lines.Count) { if (t2 != null && t2.Previous == null) { } else { MailReferent mail = new MailReferent() { Kind = MailKind.Body }; Pullenti.Ner.ReferentToken mt = new Pullenti.Ner.ReferentToken(mail, lines[i0].BeginToken, (t2 != null && t2.Previous != null ? t2.Previous : lines[lines.Count - 1].EndToken)); if (mt.LengthChar > 0) { mail.Text = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(mt, Pullenti.Ner.Core.GetTextAttr.KeepRegister); ad.RegisterReferent(mail); mail.AddOccurenceOfRefTok(mt); } } if (t2 != null) { MailReferent mail = new MailReferent() { Kind = MailKind.Tail }; Pullenti.Ner.ReferentToken mt = new Pullenti.Ner.ReferentToken(mail, t2, lines[lines.Count - 1].EndToken); if (mt.LengthChar > 0) { mail.Text = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(mt, Pullenti.Ner.Core.GetTextAttr.KeepRegister); ad.RegisterReferent(mail); mail.AddOccurenceOfRefTok(mt); } for (i = i0; i < lines.Count; i++) { if (lines[i].BeginChar >= t2.BeginChar) { foreach (Pullenti.Ner.Referent r in lines[i].Refs) { mail.AddRef(r, 0); } } } } } } }
public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this); Pullenti.Ner.Core.TerminCollection addunits = null; if (kit.Ontology != null) { addunits = new Pullenti.Ner.Core.TerminCollection(); foreach (Pullenti.Ner.ExtOntologyItem r in kit.Ontology.Items) { UnitReferent uu = r.Referent as UnitReferent; if (uu == null) { continue; } if (uu.m_Unit != null) { continue; } foreach (Pullenti.Ner.Slot s in uu.Slots) { if (s.TypeName == UnitReferent.ATTR_NAME || s.TypeName == UnitReferent.ATTR_FULLNAME) { addunits.Add(new Pullenti.Ner.Core.Termin(s.Value as string) { Tag = uu }); } } } } for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { Pullenti.Ner.Measure.Internal.MeasureToken mt = Pullenti.Ner.Measure.Internal.MeasureToken.TryParseMinimal(t, addunits, false); if (mt == null) { mt = Pullenti.Ner.Measure.Internal.MeasureToken.TryParse(t, addunits, true, false, false, false); } if (mt == null) { continue; } List <Pullenti.Ner.ReferentToken> rts = mt.CreateRefenetsTokensWithRegister(ad, true); if (rts == null) { continue; } for (int i = 0; i < rts.Count; i++) { Pullenti.Ner.ReferentToken rt = rts[i]; t.Kit.EmbedToken(rt); t = rt; for (int j = i + 1; j < rts.Count; j++) { if (rts[j].BeginToken == rt.BeginToken) { rts[j].BeginToken = t; } if (rts[j].EndToken == rt.EndToken) { rts[j].EndToken = t; } } } } if (kit.Ontology != null) { foreach (Pullenti.Ner.Referent e in ad.Referents) { UnitReferent u = e as UnitReferent; if (u == null) { continue; } foreach (Pullenti.Ner.ExtOntologyItem r in kit.Ontology.Items) { UnitReferent uu = r.Referent as UnitReferent; if (uu == null) { continue; } bool ok = false; foreach (Pullenti.Ner.Slot s in uu.Slots) { if (s.TypeName == UnitReferent.ATTR_NAME || s.TypeName == UnitReferent.ATTR_FULLNAME) { if (u.FindSlot(null, s.Value, true) != null) { ok = true; break; } } } if (ok) { u.OntologyItems = new List <Pullenti.Ner.ExtOntologyItem>(); u.OntologyItems.Add(r); break; } } } } }
public static TitleItemToken TryAttach(Pullenti.Ner.Token t) { Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; if (tt != null) { Pullenti.Ner.Token t1 = (Pullenti.Ner.Token)tt; if (tt.Term == "ТЕМА") { TitleItemToken tit = TryAttach(tt.Next); if (tit != null && tit.Typ == Types.Typ) { t1 = tit.EndToken; if (t1.Next != null && t1.Next.IsChar(':')) { t1 = t1.Next; } return(new TitleItemToken(t, t1, Types.TypAndTheme) { Value = tit.Value }); } if (tt.Next != null && tt.Next.IsChar(':')) { t1 = tt.Next; } return(new TitleItemToken(tt, t1, Types.Theme)); } if (tt.Term == "ПО" || tt.Term == "НА") { if (tt.Next != null && tt.Next.IsValue("ТЕМА", null)) { t1 = tt.Next; if (t1.Next != null && t1.Next.IsChar(':')) { t1 = t1.Next; } return(new TitleItemToken(tt, t1, Types.Theme)); } } if (tt.Term == "ПЕРЕВОД" || tt.Term == "ПЕР") { Pullenti.Ner.Token tt2 = tt.Next; if (tt2 != null && tt2.IsChar('.')) { tt2 = tt2.Next; } if (tt2 is Pullenti.Ner.TextToken) { if ((tt2 as Pullenti.Ner.TextToken).Term == "C" || (tt2 as Pullenti.Ner.TextToken).Term == "С") { tt2 = tt2.Next; if (tt2 is Pullenti.Ner.TextToken) { return(new TitleItemToken(t, tt2, Types.Translate)); } } } } if (tt.Term == "СЕКЦИЯ" || tt.Term == "SECTION" || tt.Term == "СЕКЦІЯ") { t1 = tt.Next; if (t1 != null && t1.IsChar(':')) { t1 = t1.Next; } Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t1, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { t1 = br.EndToken; } else if (t1 != tt.Next) { for (; t1 != null; t1 = t1.Next) { if (t1.IsNewlineAfter) { break; } } if (t1 == null) { return(null); } } if (t1 != tt.Next) { return(new TitleItemToken(tt, t1, Types.Dust)); } } t1 = null; if (tt.IsValue("СПЕЦИАЛЬНОСТЬ", "СПЕЦІАЛЬНІСТЬ")) { t1 = tt.Next; } else if (tt.Morph.Class.IsPreposition && tt.Next != null && tt.Next.IsValue("СПЕЦИАЛЬНОСТЬ", "СПЕЦІАЛЬНІСТЬ")) { t1 = tt.Next.Next; } else if (tt.IsChar('/') && tt.IsNewlineBefore) { t1 = tt.Next; } if (t1 != null) { if (t1.IsCharOf(":") || t1.IsHiphen) { t1 = t1.Next; } TitleItemToken spec = TryAttachSpeciality(t1, true); if (spec != null) { spec.BeginToken = t; return(spec); } } } TitleItemToken sss = TryAttachSpeciality(t, false); if (sss != null) { return(sss); } if (t is Pullenti.Ner.ReferentToken) { return(null); } Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { string s = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false); Pullenti.Ner.Core.TerminToken tok = m_Termins.TryParse(npt.EndToken, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { Types ty = (Types)tok.Termin.Tag; if (ty == Types.Typ) { TitleItemToken tit = TryAttach(tok.EndToken.Next); if (tit != null && tit.Typ == Types.Theme) { return new TitleItemToken(npt.BeginToken, tit.EndToken, Types.TypAndTheme) { Value = s } } ; if (s == "РАБОТА" || s == "РОБОТА" || s == "ПРОЕКТ") { return(null); } Pullenti.Ner.Token t1 = tok.EndToken; if (s == "ДИССЕРТАЦИЯ" || s == "ДИСЕРТАЦІЯ") { int err = 0; for (Pullenti.Ner.Token ttt = t1.Next; ttt != null; ttt = ttt.Next) { if (ttt.Morph.Class.IsPreposition) { continue; } if (ttt.IsValue("СОИСКАНИЕ", "")) { continue; } Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(ttt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt1 != null && npt1.Noun.IsValue("СТЕПЕНЬ", "СТУПІНЬ")) { t1 = (ttt = npt1.EndToken); continue; } Pullenti.Ner.ReferentToken rt = t1.Kit.ProcessReferent("PERSON", ttt); if (rt != null && (rt.Referent is Pullenti.Ner.Person.PersonPropertyReferent)) { Pullenti.Ner.Person.PersonPropertyReferent ppr = rt.Referent as Pullenti.Ner.Person.PersonPropertyReferent; if (ppr.Name == "доктор наук") { t1 = rt.EndToken; s = "ДОКТОРСКАЯ ДИССЕРТАЦИЯ"; break; } else if (ppr.Name == "кандидат наук") { t1 = rt.EndToken; s = "КАНДИДАТСКАЯ ДИССЕРТАЦИЯ"; break; } else if (ppr.Name == "магистр") { t1 = rt.EndToken; s = "МАГИСТЕРСКАЯ ДИССЕРТАЦИЯ"; break; } } if (ttt.IsValue("ДОКТОР", null) || ttt.IsValue("КАНДИДАТ", null) || ttt.IsValue("МАГИСТР", "МАГІСТР")) { t1 = ttt; npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(ttt.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt1 != null && npt1.EndToken.IsValue("НАУК", null)) { t1 = npt1.EndToken; } s = (ttt.IsValue("МАГИСТР", "МАГІСТР") ? "МАГИСТЕРСКАЯ ДИССЕРТАЦИЯ" : (ttt.IsValue("ДОКТОР", null) ? "ДОКТОРСКАЯ ДИССЕРТАЦИЯ" : "КАНДИДАТСКАЯ ДИССЕРТАЦИЯ")); break; } if ((++err) > 3) { break; } } } if (t1.Next != null && t1.Next.IsChar('.')) { t1 = t1.Next; } if (s.EndsWith("ОТЧЕТ") && t1.Next != null && t1.Next.IsValue("О", null)) { Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t1.Next, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition, 0, null); if (npt1 != null && npt1.Morph.Case.IsPrepositional) { t1 = npt1.EndToken; } } return(new TitleItemToken(npt.BeginToken, t1, ty) { Value = s }); } } } Pullenti.Ner.Core.TerminToken tok1 = m_Termins.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok1 != null) { Pullenti.Ner.Token t1 = tok1.EndToken; TitleItemToken re = new TitleItemToken(tok1.BeginToken, t1, (Types)tok1.Termin.Tag); return(re); } if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(t, false, false)) { tok1 = m_Termins.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No); if (tok1 != null && Pullenti.Ner.Core.BracketHelper.CanBeEndOfSequence(tok1.EndToken.Next, false, null, false)) { Pullenti.Ner.Token t1 = tok1.EndToken.Next; return(new TitleItemToken(tok1.BeginToken, t1, (Types)tok1.Termin.Tag)); } } return(null); }
public static CanonicDecreeRefUri TryCreateCanonicDecreeRefUri(Pullenti.Ner.Token t) { if (!(t is Pullenti.Ner.ReferentToken)) { return(null); } Pullenti.Ner.Decree.DecreeReferent dr = t.GetReferent() as Pullenti.Ner.Decree.DecreeReferent; CanonicDecreeRefUri res; if (dr != null) { if (dr.Kind == Pullenti.Ner.Decree.DecreeKind.Publisher) { return(null); } res = new CanonicDecreeRefUri(t.Kit.Sofa.Text) { Ref = dr, BeginChar = t.BeginChar, EndChar = t.EndChar }; if ((t.Previous != null && t.Previous.IsChar('(') && t.Next != null) && t.Next.IsChar(')')) { return(res); } if ((t as Pullenti.Ner.ReferentToken).MiscAttrs != 0) { return(res); } Pullenti.Ner.ReferentToken rt = t as Pullenti.Ner.ReferentToken; if (rt.BeginToken.IsChar('(') && rt.EndToken.IsChar(')')) { res = new CanonicDecreeRefUri(t.Kit.Sofa.Text) { Ref = dr, BeginChar = rt.BeginToken.Next.BeginChar, EndChar = rt.EndToken.Previous.EndChar }; return(res); } List <DecreeToken> nextDecreeItems = null; if ((t.Next != null && t.Next.IsCommaAnd && (t.Next.Next is Pullenti.Ner.ReferentToken)) && (t.Next.Next.GetReferent() is Pullenti.Ner.Decree.DecreeReferent)) { nextDecreeItems = DecreeToken.TryAttachList((t.Next.Next as Pullenti.Ner.ReferentToken).BeginToken, null, 10, false); if (nextDecreeItems != null && nextDecreeItems.Count > 1) { for (int i = 0; i < (nextDecreeItems.Count - 1); i++) { if (nextDecreeItems[i].IsNewlineAfter) { nextDecreeItems.RemoveRange(i + 1, nextDecreeItems.Count - i - 1); break; } } } } bool wasTyp = false; bool wasNum = false; for (Pullenti.Ner.Token tt = (t as Pullenti.Ner.MetaToken).BeginToken; tt != null && tt.EndChar <= t.EndChar; tt = tt.Next) { if (tt.BeginChar == t.BeginChar && tt.IsChar('(') && tt.Next != null) { res.BeginChar = tt.Next.BeginChar; } if (tt.IsChar('(') && tt.Next != null && tt.Next.IsValue("ДАЛЕЕ", null)) { if (res.EndChar >= tt.BeginChar) { res.EndChar = tt.Previous.EndChar; } break; } if (tt.EndChar == t.EndChar && tt.IsChar(')')) { res.EndChar = tt.Previous.EndChar; for (Pullenti.Ner.Token tt1 = tt.Previous; tt1 != null && tt1.BeginChar >= res.BeginChar; tt1 = tt1.Previous) { if (tt1.IsChar('(') && tt1.Previous != null) { if (res.BeginChar < tt1.Previous.BeginChar) { res.EndChar = tt1.Previous.EndChar; } } } } List <DecreeToken> li = DecreeToken.TryAttachList(tt, null, 10, false); if (li != null && li.Count > 0) { for (int ii = 0; ii < (li.Count - 1); ii++) { if (li[ii].Typ == DecreeToken.ItemType.Typ && li[ii + 1].Typ == DecreeToken.ItemType.Terr) { res.TypeWithGeo = Pullenti.Ner.Core.MiscHelper.GetTextValue(li[ii].BeginToken, li[ii + 1].EndToken, Pullenti.Ner.Core.GetTextAttr.FirstNounGroupToNominativeSingle); } } if ((nextDecreeItems != null && nextDecreeItems.Count > 1 && (nextDecreeItems.Count < li.Count)) && nextDecreeItems[0].Typ != DecreeToken.ItemType.Typ) { int d = li.Count - nextDecreeItems.Count; int j; for (j = 0; j < nextDecreeItems.Count; j++) { if (nextDecreeItems[j].Typ != li[d + j].Typ) { break; } } if (j >= nextDecreeItems.Count) { li.RemoveRange(0, d); res.BeginChar = li[0].BeginChar; } } else if ((nextDecreeItems != null && nextDecreeItems.Count == 1 && nextDecreeItems[0].Typ == DecreeToken.ItemType.Name) && li.Count == 2 && li[1].Typ == DecreeToken.ItemType.Name) { res.BeginChar = li[1].BeginChar; res.EndChar = li[1].EndChar; break; } else if ((nextDecreeItems != null && nextDecreeItems.Count == 1 && nextDecreeItems[0].Typ == DecreeToken.ItemType.Number) && li[li.Count - 1].Typ == DecreeToken.ItemType.Number) { res.BeginChar = li[li.Count - 1].BeginChar; res.EndChar = li[li.Count - 1].EndChar; } for (int i = 0; i < li.Count; i++) { DecreeToken l = li[i]; if (l.BeginChar > t.EndChar) { li.RemoveRange(i, li.Count - i); break; } if (l.Typ == DecreeToken.ItemType.Name) { if (!wasNum) { if (dr.Kind == Pullenti.Ner.Decree.DecreeKind.Contract) { continue; } if (((i + 1) < li.Count) && ((li[i + 1].Typ == DecreeToken.ItemType.Date || li[i + 1].Typ == DecreeToken.ItemType.Number))) { continue; } } int ee = l.BeginToken.Previous.EndChar; if (ee > res.BeginChar && (ee < res.EndChar)) { res.EndChar = ee; } break; } if (l.Typ == DecreeToken.ItemType.Number) { wasNum = true; } if (i == 0) { if (l.Typ == DecreeToken.ItemType.Typ) { wasTyp = true; } else if (l.Typ == DecreeToken.ItemType.Owner || l.Typ == DecreeToken.ItemType.Org) { if (((i + 1) < li.Count) && ((li[1].Typ == DecreeToken.ItemType.Date || li[1].Typ == DecreeToken.ItemType.Number))) { wasTyp = true; } } if (wasTyp) { Pullenti.Ner.Token tt0 = l.BeginToken.Previous; if (tt0 != null && tt0.IsChar('.')) { tt0 = tt0.Previous; } if (tt0 != null && ((tt0.IsValue("УТВЕРЖДЕННЫЙ", null) || tt0.IsValue("УТВЕРДИТЬ", null) || tt0.IsValue("УТВ", null)))) { if (l.BeginChar > res.BeginChar) { res.BeginChar = l.BeginChar; if (res.EndChar < res.BeginChar) { res.EndChar = t.EndChar; } res.IsAdopted = true; } } } } } if (li.Count > 0) { tt = li[li.Count - 1].EndToken; if (tt.IsChar(')')) { tt = tt.Previous; } continue; } } if (wasTyp) { DecreeToken na = DecreeToken.TryAttachName(tt, dr.Typ0, true, false); if (na != null && tt.BeginChar > t.BeginChar) { Pullenti.Ner.Token tt1 = na.EndToken.Next; if (tt1 != null && tt1.IsCharOf(",()")) { tt1 = tt1.Next; } if (tt1 != null && (tt1.EndChar < t.EndChar)) { if (tt1.IsValue("УТВЕРЖДЕННЫЙ", null) || tt1.IsValue("УТВЕРДИТЬ", null) || tt1.IsValue("УТВ", null)) { tt = tt1; continue; } } if (tt.Previous != null && tt.Previous.IsChar(':') && na.EndChar <= res.EndChar) { res.BeginChar = tt.BeginChar; break; } if (tt.Previous.EndChar > res.BeginChar) { res.EndChar = tt.Previous.EndChar; break; } } } } return(res); } Pullenti.Ner.Decree.DecreePartReferent dpr = t.GetReferent() as Pullenti.Ner.Decree.DecreePartReferent; if (dpr == null) { return(null); } if ((t.Previous != null && t.Previous.IsHiphen && (t.Previous.Previous is Pullenti.Ner.ReferentToken)) && (t.Previous.Previous.GetReferent() is Pullenti.Ner.Decree.DecreePartReferent)) { if (Pullenti.Ner.Decree.DecreePartReferent.CreateRangeReferent(t.Previous.Previous.GetReferent() as Pullenti.Ner.Decree.DecreePartReferent, dpr) != null) { return(null); } } Pullenti.Ner.Token t1 = t; bool hasDiap = false; Pullenti.Ner.ReferentToken DiapRef = null; if ((t.Next != null && t.Next.IsHiphen && (t.Next.Next is Pullenti.Ner.ReferentToken)) && (t.Next.Next.GetReferent() is Pullenti.Ner.Decree.DecreePartReferent)) { Pullenti.Ner.Decree.DecreePartReferent diap = Pullenti.Ner.Decree.DecreePartReferent.CreateRangeReferent(dpr as Pullenti.Ner.Decree.DecreePartReferent, t.Next.Next.GetReferent() as Pullenti.Ner.Decree.DecreePartReferent); if (diap != null) { dpr = diap; hasDiap = true; t1 = t.Next.Next; DiapRef = t1 as Pullenti.Ner.ReferentToken; } } res = new CanonicDecreeRefUri(t.Kit.Sofa.Text) { Ref = dpr, BeginChar = t.BeginChar, EndChar = t1.EndChar, IsDiap = hasDiap }; if ((t.Previous != null && t.Previous.IsChar('(') && t1.Next != null) && t1.Next.IsChar(')')) { return(res); } for (Pullenti.Ner.Token tt = (t as Pullenti.Ner.MetaToken).BeginToken; tt != null && tt.EndChar <= t.EndChar; tt = tt.Next) { if (tt.GetReferent() is Pullenti.Ner.Decree.DecreeReferent) { if (tt.BeginChar > t.BeginChar) { res.EndChar = tt.Previous.EndChar; if (tt.Previous.Morph.Class.IsPreposition && tt.Previous.Previous != null) { res.EndChar = tt.Previous.Previous.EndChar; } } else if (tt.EndChar < t.EndChar) { res.BeginChar = tt.BeginChar; } break; } } bool hasSameBefore = _hasSameDecree(t, dpr, true); bool hasSameAfter = _hasSameDecree(t, dpr, false); PartToken.ItemType ptmin = PartToken.ItemType.Prefix; PartToken.ItemType ptmin2 = PartToken.ItemType.Prefix; int max = 0; int max2 = 00; foreach (Pullenti.Ner.Slot s in dpr.Slots) { PartToken.ItemType pt = PartToken._getTypeByAttrName(s.TypeName); if (pt == PartToken.ItemType.Prefix) { continue; } int co = PartToken._getRank(pt); if (co < 1) { if (pt == PartToken.ItemType.Part && dpr.FindSlot(Pullenti.Ner.Decree.DecreePartReferent.ATTR_CLAUSE, null, true) != null) { co = PartToken._getRank(PartToken.ItemType.Paragraph); } else { continue; } } if (co > max) { max2 = max; ptmin2 = ptmin; max = co; ptmin = pt; } else if (co > max2) { max2 = co; ptmin2 = pt; } } if (ptmin != PartToken.ItemType.Prefix) { for (Pullenti.Ner.Token tt = (t as Pullenti.Ner.MetaToken).BeginToken; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next) { if (tt.BeginChar >= res.BeginChar) { PartToken pt = PartToken.TryAttach(tt, null, false, false); if (pt != null && pt.Typ == ptmin) { res.BeginChar = pt.BeginChar; res.EndChar = pt.EndChar; if (pt.Typ == PartToken.ItemType.Appendix && pt.EndToken.IsValue("К", null) && pt.BeginToken != pt.EndToken) { res.EndChar = pt.EndToken.Previous.EndChar; } if (pt.EndChar == t.EndChar) { if ((t.Next != null && t.Next.IsCommaAnd && (t.Next.Next is Pullenti.Ner.ReferentToken)) && (t.Next.Next.GetReferent() is Pullenti.Ner.Decree.DecreePartReferent)) { Pullenti.Ner.Token tt1 = (t.Next.Next as Pullenti.Ner.ReferentToken).BeginToken; bool ok = true; if (tt1.Chars.IsLetter) { ok = false; } if (ok) { foreach (PartToken.PartValue v in pt.Values) { res.BeginChar = v.BeginChar; res.EndChar = v.EndChar; break; } } } } if (!hasDiap) { return(res); } break; } } } if (hasDiap && DiapRef != null) { for (Pullenti.Ner.Token tt = DiapRef.BeginToken; tt != null && tt.EndChar <= DiapRef.EndChar; tt = tt.Next) { if (tt.IsChar(',')) { break; } if (tt != DiapRef.BeginToken && tt.IsWhitespaceBefore) { break; } res.EndChar = tt.EndChar; } return(res); } } if (((hasSameBefore || hasSameAfter)) && ptmin != PartToken.ItemType.Prefix) { for (Pullenti.Ner.Token tt = (t as Pullenti.Ner.MetaToken).BeginToken; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next) { if (tt.BeginChar >= res.BeginChar) { PartToken pt = (!hasSameBefore ? PartToken.TryAttach(tt, null, false, false) : null); if (pt != null) { if (pt.Typ == ptmin) { foreach (PartToken.PartValue v in pt.Values) { res.BeginChar = v.BeginChar; res.EndChar = v.EndChar; return(res); } } tt = pt.EndToken; continue; } if ((tt is Pullenti.Ner.NumberToken) && tt.BeginChar == res.BeginChar) { res.EndChar = tt.EndChar; for (; tt != null && tt.Next != null;) { if (!tt.Next.IsChar('.') || tt.IsWhitespaceAfter || tt.Next.IsWhitespaceAfter) { break; } if (!(tt.Next.Next is Pullenti.Ner.NumberToken)) { break; } tt = tt.Next.Next; res.EndChar = tt.EndChar; } if (tt.Next != null && tt.Next.IsHiphen) { if (tt.Next.Next is Pullenti.Ner.NumberToken) { tt = tt.Next.Next; res.EndChar = tt.EndChar; for (; tt != null && tt.Next != null;) { if (!tt.Next.IsChar('.') || tt.IsWhitespaceAfter || tt.Next.IsWhitespaceAfter) { break; } if (!(tt.Next.Next is Pullenti.Ner.NumberToken)) { break; } tt = tt.Next.Next; res.EndChar = tt.EndChar; } } else if (tt.Next.Next != null && (tt.Next.Next.GetReferent() is Pullenti.Ner.Decree.DecreePartReferent) && hasDiap) { res.EndChar = (tt.Next.Next as Pullenti.Ner.MetaToken).BeginToken.EndChar; } } return(res); } if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(tt, true, false) && tt.BeginChar == res.BeginChar && hasSameBefore) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(tt, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null && br.EndToken.Previous == tt.Next) { res.EndChar = br.EndChar; return(res); } } } } return(res); } if (!hasSameBefore && !hasSameAfter && ptmin != PartToken.ItemType.Prefix) { for (Pullenti.Ner.Token tt = (t as Pullenti.Ner.MetaToken).BeginToken; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next) { if (tt.BeginChar >= res.BeginChar) { List <PartToken> pts = PartToken.TryAttachList(tt, false, 40); if (pts == null || pts.Count == 0) { break; } for (int i = 0; i < pts.Count; i++) { if (pts[i].Typ == ptmin) { res.BeginChar = pts[i].BeginChar; res.EndChar = pts[i].EndChar; tt = pts[i].EndToken; if (tt.Next != null && tt.Next.IsHiphen) { if (tt.Next.Next is Pullenti.Ner.NumberToken) { res.EndChar = tt.Next.Next.EndChar; } else if (tt.Next.Next != null && (tt.Next.Next.GetReferent() is Pullenti.Ner.Decree.DecreePartReferent) && hasDiap) { res.EndChar = (tt.Next.Next as Pullenti.Ner.MetaToken).BeginToken.EndChar; } } return(res); } } } } } return(res); }
public static Pullenti.Ner.ReferentToken TryParse(Pullenti.Ner.Token t) { if (t == null) { return(null); } if (!(t is Pullenti.Ner.NumberToken) && t.LengthChar != 1) { return(null); } Pullenti.Ner.Core.NumberExToken nex = Pullenti.Ner.Core.NumberHelper.TryParseNumberWithPostfix(t); if (nex == null || nex.ExTyp != Pullenti.Ner.Core.NumberExType.Money) { if ((t is Pullenti.Ner.NumberToken) && (t.Next is Pullenti.Ner.TextToken) && (t.Next.Next is Pullenti.Ner.NumberToken)) { if (t.Next.IsHiphen || t.Next.Morph.Class.IsPreposition) { Pullenti.Ner.Core.NumberExToken res1 = Pullenti.Ner.Core.NumberHelper.TryParseNumberWithPostfix(t.Next.Next); if (res1 != null && res1.ExTyp == Pullenti.Ner.Core.NumberExType.Money) { MoneyReferent res0 = new MoneyReferent(); if ((t.Next.IsHiphen && res1.RealValue == 0 && res1.EndToken.Next != null) && res1.EndToken.Next.IsChar('(')) { Pullenti.Ner.Core.NumberExToken nex2 = Pullenti.Ner.Core.NumberHelper.TryParseNumberWithPostfix(res1.EndToken.Next.Next); if ((nex2 != null && nex2.ExTypParam == res1.ExTypParam && nex2.EndToken.Next != null) && nex2.EndToken.Next.IsChar(')')) { if (nex2.Value == (t as Pullenti.Ner.NumberToken).Value) { res0.Currency = nex2.ExTypParam; res0.AddSlot(MoneyReferent.ATTR_VALUE, nex2.Value, true, 0); return(new Pullenti.Ner.ReferentToken(res0, t, nex2.EndToken.Next)); } if (t.Previous is Pullenti.Ner.NumberToken) { if (nex2.Value == ((((t.Previous as Pullenti.Ner.NumberToken).RealValue * 1000) + (t as Pullenti.Ner.NumberToken).Value))) { res0.Currency = nex2.ExTypParam; res0.AddSlot(MoneyReferent.ATTR_VALUE, nex2.Value, true, 0); return(new Pullenti.Ner.ReferentToken(res0, t.Previous, nex2.EndToken.Next)); } else if (t.Previous.Previous is Pullenti.Ner.NumberToken) { if (nex2.RealValue == ((((t.Previous.Previous as Pullenti.Ner.NumberToken).RealValue * 1000000) + ((t.Previous as Pullenti.Ner.NumberToken).RealValue * 1000) + (t as Pullenti.Ner.NumberToken).RealValue))) { res0.Currency = nex2.ExTypParam; res0.AddSlot(MoneyReferent.ATTR_VALUE, nex2.Value, true, 0); return(new Pullenti.Ner.ReferentToken(res0, t.Previous.Previous, nex2.EndToken.Next)); } } } } } res0.Currency = res1.ExTypParam; res0.AddSlot(MoneyReferent.ATTR_VALUE, (t as Pullenti.Ner.NumberToken).Value, false, 0); return(new Pullenti.Ner.ReferentToken(res0, t, t)); } } } return(null); } MoneyReferent res = new MoneyReferent(); res.Currency = nex.ExTypParam; string val = nex.Value; if (val.IndexOf('.') > 0) { val = val.Substring(0, val.IndexOf('.')); } res.AddSlot(MoneyReferent.ATTR_VALUE, val, true, 0); int re = (int)Math.Round(((nex.RealValue - res.Value)) * 100, 6); if (re != 0) { res.AddSlot(MoneyReferent.ATTR_REST, re.ToString(), true, 0); } if (nex.RealValue != nex.AltRealValue) { if (Math.Floor(res.Value) != Math.Floor(nex.AltRealValue)) { val = Pullenti.Ner.Core.NumberHelper.DoubleToString(nex.AltRealValue); if (val.IndexOf('.') > 0) { val = val.Substring(0, val.IndexOf('.')); } res.AddSlot(MoneyReferent.ATTR_ALTVALUE, val, true, 0); } re = (int)Math.Round(((nex.AltRealValue - ((long)nex.AltRealValue))) * 100, 6); if (re != res.Rest && re != 0) { res.AddSlot(MoneyReferent.ATTR_ALTREST, ((int)re).ToString(), true, 0); } } if (nex.AltRestMoney > 0) { res.AddSlot(MoneyReferent.ATTR_ALTREST, nex.AltRestMoney.ToString(), true, 0); } Pullenti.Ner.Token t1 = nex.EndToken; if (t1.Next != null && t1.Next.IsChar('(')) { Pullenti.Ner.ReferentToken rt = TryParse(t1.Next.Next); if ((rt != null && rt.Referent.CanBeEquals(res, Pullenti.Ner.Core.ReferentsEqualType.WithinOneText) && rt.EndToken.Next != null) && rt.EndToken.Next.IsChar(')')) { t1 = rt.EndToken.Next; } else { rt = TryParse(t1.Next); if (rt != null && rt.Referent.CanBeEquals(res, Pullenti.Ner.Core.ReferentsEqualType.WithinOneText)) { t1 = rt.EndToken; } } } if (res.AltValue != null && res.AltValue.Value > res.Value) { if (t.WhitespacesBeforeCount == 1 && (t.Previous is Pullenti.Ner.NumberToken)) { int delt = (int)((res.AltValue.Value - res.Value)); if ((((res.Value < 1000) && ((delt % 1000)) == 0)) || (((res.Value < 1000000) && ((delt % 1000000)) == 0))) { t = t.Previous; res.AddSlot(MoneyReferent.ATTR_VALUE, res.GetStringValue(MoneyReferent.ATTR_ALTVALUE), true, 0); res.AddSlot(MoneyReferent.ATTR_ALTVALUE, null, true, 0); } } } return(new Pullenti.Ner.ReferentToken(res, t, t1)); }
internal static Pullenti.Ner.ReferentToken CreateReferentToken(Pullenti.Ner.Person.PersonReferent p, Pullenti.Ner.Token begin, Pullenti.Ner.Token end, Pullenti.Ner.MorphCollection morph, List <PersonAttrToken> attrs, Pullenti.Ner.Person.PersonAnalyzer.PersonAnalyzerData ad, bool forAttribute, bool afterBePredicate) { if (p == null) { return(null); } bool hasPrefix = false; if (attrs != null) { foreach (PersonAttrToken a in attrs) { if (a.Typ == PersonAttrTerminType.BestRegards) { hasPrefix = true; } else { if (a.BeginChar < begin.BeginChar) { begin = a.BeginToken; if ((a.EndToken.Next != null && a.EndToken.Next.IsChar(')') && begin.Previous != null) && begin.Previous.IsChar('(')) { begin = begin.Previous; } } if (a.Typ != PersonAttrTerminType.Prefix) { if (a.Age != null) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_AGE, a.Age, false, 0); } if (a.PropRef == null) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, a.Value, false, 0); } else { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, a, false, 0); } } else if (a.Gender == Pullenti.Morph.MorphGender.Feminie && !p.IsFemale) { p.IsFemale = true; } else if (a.Gender == Pullenti.Morph.MorphGender.Masculine && !p.IsMale) { p.IsMale = true; } } } } else if ((begin.Previous is Pullenti.Ner.TextToken) && (begin.WhitespacesBeforeCount < 3)) { if ((begin.Previous as Pullenti.Ner.TextToken).Term == "ИП") { PersonAttrToken a = new PersonAttrToken(begin.Previous, begin.Previous); a.PropRef = new Pullenti.Ner.Person.PersonPropertyReferent(); a.PropRef.Name = "индивидуальный предприниматель"; p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, a, false, 0); begin = begin.Previous; } } Pullenti.Ner.MorphCollection m0 = new Pullenti.Ner.MorphCollection(); foreach (Pullenti.Morph.MorphBaseInfo it in morph.Items) { Pullenti.Morph.MorphBaseInfo bi = new Pullenti.Morph.MorphBaseInfo(); bi.CopyFrom(it); bi.Number = Pullenti.Morph.MorphNumber.Singular; if (bi.Gender == Pullenti.Morph.MorphGender.Undefined) { if (p.IsMale && !p.IsFemale) { bi.Gender = Pullenti.Morph.MorphGender.Masculine; } if (!p.IsMale && p.IsFemale) { bi.Gender = Pullenti.Morph.MorphGender.Feminie; } } m0.AddItem(bi); } morph = m0; if ((attrs != null && attrs.Count > 0 && !attrs[0].Morph.Case.IsUndefined) && morph.Case.IsUndefined) { morph.Case = attrs[0].Morph.Case; if (attrs[0].Morph.Number == Pullenti.Morph.MorphNumber.Singular) { morph.Number = Pullenti.Morph.MorphNumber.Singular; } if (p.IsMale && !p.IsFemale) { morph.Gender = Pullenti.Morph.MorphGender.Masculine; } else if (p.IsFemale) { morph.Gender = Pullenti.Morph.MorphGender.Feminie; } } if (begin.Previous != null) { Pullenti.Ner.Token ttt = begin.Previous; if (ttt.IsValue("ИМЕНИ", "ІМЕНІ")) { forAttribute = true; } else { if (ttt.IsChar('.') && ttt.Previous != null) { ttt = ttt.Previous; } if (ttt.WhitespacesAfterCount < 3) { if (ttt.IsValue("ИМ", "ІМ")) { forAttribute = true; } } } } if (forAttribute) { return new Pullenti.Ner.ReferentToken(p, begin, end) { Morph = morph, MiscAttrs = (int)p.m_PersonIdentityTyp } } ; if ((begin.Previous != null && begin.Previous.IsCommaAnd && (begin.Previous.Previous is Pullenti.Ner.ReferentToken)) && (begin.Previous.Previous.GetReferent() is Pullenti.Ner.Person.PersonReferent)) { Pullenti.Ner.ReferentToken rt00 = begin.Previous.Previous as Pullenti.Ner.ReferentToken; for (Pullenti.Ner.Token ttt = (Pullenti.Ner.Token)rt00; ttt != null;) { if (ttt.Previous == null || !(ttt.Previous.Previous is Pullenti.Ner.ReferentToken)) { break; } if (!ttt.Previous.IsCommaAnd || !(ttt.Previous.Previous.GetReferent() is Pullenti.Ner.Person.PersonReferent)) { break; } rt00 = ttt.Previous.Previous as Pullenti.Ner.ReferentToken; ttt = rt00; } if (rt00.BeginToken.GetReferent() is Pullenti.Ner.Person.PersonPropertyReferent) { bool ok = false; if ((rt00.BeginToken as Pullenti.Ner.ReferentToken).EndToken.Next != null && (rt00.BeginToken as Pullenti.Ner.ReferentToken).EndToken.Next.IsChar(':')) { ok = true; } else if (rt00.BeginToken.Morph.Number == Pullenti.Morph.MorphNumber.Plural) { ok = true; } if (ok) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, rt00.BeginToken.GetReferent(), false, 0); } } } if (ad != null) { if (ad.OverflowLevel > 10) { return new Pullenti.Ner.ReferentToken(p, begin, end) { Morph = morph, MiscAttrs = (int)p.m_PersonIdentityTyp } } ; ad.OverflowLevel++; } List <PersonAttrToken> attrs1 = null; bool hasPosition = false; bool openBr = false; for (Pullenti.Ner.Token t = end.Next; t != null; t = t.Next) { if (t.IsTableControlChar) { break; } if (t.IsNewlineBefore) { if (t.NewlinesBeforeCount > 2) { break; } if (attrs1 != null && attrs1.Count > 0) { break; } Pullenti.Ner.Mail.Internal.MailLine ml = Pullenti.Ner.Mail.Internal.MailLine.Parse(t, 0, 0); if (ml != null && ml.Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From) { break; } if (t.Chars.IsCapitalUpper) { PersonAttrToken attr1 = PersonAttrToken.TryAttach(t, (ad == null ? null : ad.LocalOntology), PersonAttrToken.PersonAttrAttachAttrs.No); bool ok1 = false; if (attr1 != null) { if (hasPrefix || attr1.IsNewlineAfter || ((attr1.EndToken.Next != null && attr1.EndToken.Next.IsTableControlChar))) { ok1 = true; } else { for (Pullenti.Ner.Token tt2 = t.Next; tt2 != null && tt2.EndChar <= attr1.EndChar; tt2 = tt2.Next) { if (tt2.IsWhitespaceBefore) { ok1 = true; } } } } else { Pullenti.Ner.Token ttt = CorrectTailAttributes(p, t); if (ttt != null && ttt != t) { end = (t = ttt); continue; } } if (!ok1) { break; } } } if (t.IsHiphen || t.IsCharOf("_>|")) { continue; } if (t.IsValue("МОДЕЛЬ", null)) { break; } Pullenti.Ner.Token tt = CorrectTailAttributes(p, t); if (tt != t && tt != null) { end = (t = tt); continue; } bool isBe = false; if (t.IsChar('(') && t == end.Next) { openBr = true; t = t.Next; if (t == null) { break; } PersonItemToken pit1 = PersonItemToken.TryAttach(t, null, PersonItemToken.ParseAttr.No, null); if ((pit1 != null && t.Chars.IsCapitalUpper && pit1.EndToken.Next != null) && (t is Pullenti.Ner.TextToken) && pit1.EndToken.Next.IsChar(')')) { if (pit1.Lastname != null) { Pullenti.Morph.MorphBaseInfo inf = new Pullenti.Morph.MorphBaseInfo() { Case = Pullenti.Morph.MorphCase.Nominative }; if (p.IsMale) { inf.Gender |= Pullenti.Morph.MorphGender.Masculine; } if (p.IsFemale) { inf.Gender |= Pullenti.Morph.MorphGender.Feminie; } PersonMorphCollection sur = PersonIdentityToken.CreateLastname(pit1, inf); if (sur != null) { p.AddFioIdentity(sur, null, null); end = (t = pit1.EndToken.Next); continue; } } } if ((t is Pullenti.Ner.TextToken) && t.Chars.IsLatinLetter) { List <PersonItemToken> pits = PersonItemToken.TryAttachList(t, null, PersonItemToken.ParseAttr.CanBeLatin, 10); if (((pits != null && pits.Count >= 2 && pits.Count <= 3) && pits[0].Chars.IsLatinLetter && pits[1].Chars.IsLatinLetter) && pits[pits.Count - 1].EndToken.Next != null && pits[pits.Count - 1].EndToken.Next.IsChar(')')) { Pullenti.Ner.Person.PersonReferent pr2 = new Pullenti.Ner.Person.PersonReferent(); int cou = 0; foreach (PersonItemToken pi in pits) { foreach (Pullenti.Ner.Slot si in p.Slots) { if (si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_FIRSTNAME || si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_MIDDLENAME || si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_LASTNAME) { if (Pullenti.Ner.Core.MiscHelper.CanBeEqualCyrAndLatSS(si.Value.ToString(), pi.Value)) { cou++; pr2.AddSlot(si.TypeName, pi.Value, false, 0); break; } } } } if (cou == pits.Count) { foreach (Pullenti.Ner.Slot si in pr2.Slots) { p.AddSlot(si.TypeName, si.Value, false, 0); } end = (t = pits[pits.Count - 1].EndToken.Next); continue; } } } } else if (t.IsComma) { t = t.Next; if ((t is Pullenti.Ner.TextToken) && (t as Pullenti.Ner.TextToken).IsValue("WHO", null)) { continue; } if ((t is Pullenti.Ner.TextToken) && t.Chars.IsLatinLetter) { List <PersonItemToken> pits = PersonItemToken.TryAttachList(t, null, PersonItemToken.ParseAttr.CanBeLatin, 10); if ((pits != null && pits.Count >= 2 && pits.Count <= 3) && pits[0].Chars.IsLatinLetter && pits[1].Chars.IsLatinLetter) { Pullenti.Ner.Person.PersonReferent pr2 = new Pullenti.Ner.Person.PersonReferent(); int cou = 0; foreach (PersonItemToken pi in pits) { foreach (Pullenti.Ner.Slot si in p.Slots) { if (si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_FIRSTNAME || si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_MIDDLENAME || si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_LASTNAME) { if (Pullenti.Ner.Core.MiscHelper.CanBeEqualCyrAndLatSS(si.Value.ToString(), pi.Value)) { cou++; pr2.AddSlot(si.TypeName, pi.Value, false, 0); break; } } } } if (cou == pits.Count) { foreach (Pullenti.Ner.Slot si in pr2.Slots) { p.AddSlot(si.TypeName, si.Value, false, 0); } end = (t = pits[pits.Count - 1].EndToken); continue; } } } } else if ((t is Pullenti.Ner.TextToken) && (t as Pullenti.Ner.TextToken).IsVerbBe) { t = t.Next; } else if (t.IsAnd && t.IsWhitespaceAfter && !t.IsNewlineAfter) { if (t == end.Next) { break; } t = t.Next; } else if (t.IsHiphen && t == end.Next) { t = t.Next; } else if (t.IsChar('.') && t == end.Next && hasPrefix) { t = t.Next; } Pullenti.Ner.Token ttt2 = CreateNickname(p, t); if (ttt2 != null) { t = (end = ttt2); continue; } if (t == null) { break; } PersonAttrToken attr = null; attr = PersonAttrToken.TryAttach(t, (ad == null ? null : ad.LocalOntology), PersonAttrToken.PersonAttrAttachAttrs.No); if (attr == null) { if ((t != null && t.GetReferent() != null && t.GetReferent().TypeName == "GEO") && attrs1 != null && openBr) { continue; } if ((t.Chars.IsCapitalUpper && openBr && t.Next != null) && t.Next.IsChar(')')) { if (p.FindSlot(Pullenti.Ner.Person.PersonReferent.ATTR_LASTNAME, null, true) == null) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_LASTNAME, t.GetSourceText().ToUpper(), false, 0); t = t.Next; end = t; } } if (t != null && t.IsValue("КОТОРЫЙ", null) && t.Morph.Number == Pullenti.Morph.MorphNumber.Singular) { if (!p.IsFemale && t.Morph.Gender == Pullenti.Morph.MorphGender.Feminie) { p.IsFemale = true; p.CorrectData(); } else if (!p.IsMale && t.Morph.Gender == Pullenti.Morph.MorphGender.Masculine) { p.IsMale = true; p.CorrectData(); } } break; } if (attr.Morph.Number == Pullenti.Morph.MorphNumber.Plural) { break; } if (attr.Typ == PersonAttrTerminType.BestRegards) { break; } if (attr.IsDoubt) { if (hasPrefix) { } else if (t.IsNewlineBefore && attr.IsNewlineAfter) { } else if (t.Previous != null && ((t.Previous.IsHiphen || t.Previous.IsChar(':')))) { } else { break; } } if (!morph.Case.IsUndefined && !attr.Morph.Case.IsUndefined) { if (((morph.Case & attr.Morph.Case)).IsUndefined && !isBe) { break; } } if (openBr) { if (Pullenti.Ner.Person.PersonAnalyzer.TryAttachPerson(t, ad, false, 0, true) != null) { break; } } if (attrs1 == null) { if (t.Previous.IsComma && t.Previous == end.Next) { Pullenti.Ner.Token ttt = attr.EndToken.Next; if (ttt != null) { if (ttt.Morph.Class.IsVerb) { if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(begin)) { } else { break; } } } } attrs1 = new List <PersonAttrToken>(); } attrs1.Add(attr); if (attr.Typ == PersonAttrTerminType.Position || attr.Typ == PersonAttrTerminType.King) { if (!isBe) { hasPosition = true; } } else if (attr.Typ != PersonAttrTerminType.Prefix) { if (attr.Typ == PersonAttrTerminType.Other && attr.Age != null) { } else { attrs1 = null; break; } } t = attr.EndToken; } if (attrs1 != null && hasPosition && attrs != null) { Pullenti.Ner.Token te1 = attrs[attrs.Count - 1].EndToken.Next; Pullenti.Ner.Token te2 = attrs1[0].BeginToken; if (te1.WhitespacesAfterCount > te2.WhitespacesBeforeCount && (te2.WhitespacesBeforeCount < 2)) { } else if (attrs1[0].Age != null) { } else if (((te1.IsHiphen || te1.IsChar(':'))) && !attrs1[0].IsNewlineBefore && ((te2.Previous.IsComma || te2.Previous == end))) { } else { foreach (PersonAttrToken a in attrs) { if (a.Typ == PersonAttrTerminType.Position) { Pullenti.Ner.Token te = attrs1[attrs1.Count - 1].EndToken; if (te.Next != null) { if (!te.Next.IsChar('.')) { attrs1 = null; break; } } } } } } if (attrs1 != null && !hasPrefix) { PersonAttrToken attr = attrs1[attrs1.Count - 1]; bool ok = false; if (attr.EndToken.Next != null && attr.EndToken.Next.Chars.IsCapitalUpper) { ok = true; } else { Pullenti.Ner.ReferentToken rt = Pullenti.Ner.Person.PersonAnalyzer.TryAttachPerson(attr.BeginToken, ad, false, -1, false); if (rt != null && (rt.Referent is Pullenti.Ner.Person.PersonReferent)) { ok = true; } } if (ok) { if (attr.BeginToken.WhitespacesBeforeCount > attr.EndToken.WhitespacesAfterCount) { attrs1 = null; } else if (attr.BeginToken.WhitespacesBeforeCount == attr.EndToken.WhitespacesAfterCount) { Pullenti.Ner.ReferentToken rt1 = Pullenti.Ner.Person.PersonAnalyzer.TryAttachPerson(attr.BeginToken, ad, false, -1, false); if (rt1 != null) { attrs1 = null; } } } } if (attrs1 != null) { foreach (PersonAttrToken a in attrs1) { if (a.Typ != PersonAttrTerminType.Prefix) { if (a.Age != null) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_AGE, a.Age, true, 0); } else if (a.PropRef == null) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, a.Value, false, 0); } else { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, a, false, 0); } end = a.EndToken; if (a.Gender != Pullenti.Morph.MorphGender.Undefined && !p.IsFemale && !p.IsMale) { if (a.Gender == Pullenti.Morph.MorphGender.Masculine && !p.IsMale) { p.IsMale = true; p.CorrectData(); } else if (a.Gender == Pullenti.Morph.MorphGender.Feminie && !p.IsFemale) { p.IsFemale = true; p.CorrectData(); } } } } if (openBr) { if (end.Next != null && end.Next.IsChar(')')) { end = end.Next; } } } int crlfCou = 0; for (Pullenti.Ner.Token t = end.Next; t != null; t = t.Next) { if (t.IsTableControlChar) { break; } if (t.IsNewlineBefore) { Pullenti.Ner.Mail.Internal.MailLine ml = Pullenti.Ner.Mail.Internal.MailLine.Parse(t, 0, 0); if (ml != null && ml.Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From) { break; } crlfCou++; } if (t.IsCharOf(":,(") || t.IsHiphen) { continue; } if (t.IsChar('.') && t == end.Next) { continue; } Pullenti.Ner.Referent r = t.GetReferent(); if (r != null) { if (r.TypeName == "PHONE" || r.TypeName == "URI" || r.TypeName == "ADDRESS") { string ty = r.GetStringValue("SCHEME"); if (r.TypeName == "URI") { if ((ty != "mailto" && ty != "skype" && ty != "ICQ") && ty != "http") { break; } } p.AddContact(r); end = t; crlfCou = 0; continue; } } if (r is Pullenti.Ner.Person.PersonIdentityReferent) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_IDDOC, r, false, 0); end = t; crlfCou = 0; continue; } if (r != null && r.TypeName == "ORGANIZATION") { if (t.Next != null && t.Next.Morph.Class.IsVerb) { break; } if (begin.Previous != null && begin.Previous.Morph.Class.IsVerb) { break; } if (t.WhitespacesAfterCount == 1) { break; } bool exist = false; foreach (Pullenti.Ner.Slot s in p.Slots) { if (s.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_ATTR && (s.Value is Pullenti.Ner.Person.PersonPropertyReferent)) { Pullenti.Ner.Person.PersonPropertyReferent pr = s.Value as Pullenti.Ner.Person.PersonPropertyReferent; if (pr.FindSlot(Pullenti.Ner.Person.PersonPropertyReferent.ATTR_REF, r, true) != null) { exist = true; break; } } else if (s.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_ATTR && (s.Value is PersonAttrToken)) { PersonAttrToken pr = s.Value as PersonAttrToken; if (pr.Referent.FindSlot(Pullenti.Ner.Person.PersonPropertyReferent.ATTR_REF, r, true) != null) { exist = true; break; } } } if (!exist) { PersonAttrToken pat = new PersonAttrToken(t, t); pat.PropRef = new Pullenti.Ner.Person.PersonPropertyReferent() { Name = "сотрудник" }; pat.PropRef.AddSlot(Pullenti.Ner.Person.PersonPropertyReferent.ATTR_REF, r, false, 0); p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, pat, false, 0); } continue; } if (r != null) { break; } if (!hasPrefix || crlfCou >= 2) { break; } Pullenti.Ner.ReferentToken rt = t.Kit.ProcessReferent("PERSON", t); if (rt != null) { break; } } if (ad != null) { ad.OverflowLevel--; } if (begin.IsValue("НА", null) && begin.Next != null && begin.Next.IsValue("ИМЯ", null)) { Pullenti.Ner.Token t0 = begin.Previous; if (t0 != null && t0.IsComma) { t0 = t0.Previous; } if (t0 != null && (t0.GetReferent() is Pullenti.Ner.Person.PersonIdentityReferent)) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_IDDOC, t0.GetReferent(), false, 0); } } return(new Pullenti.Ner.ReferentToken(p, begin, end) { Morph = morph, MiscAttrs = (int)p.m_PersonIdentityTyp }); }
Pullenti.Ner.ReferentToken _TryAttach_(List<Pullenti.Ner.Phone.Internal.PhoneItemToken> pli, int ind, bool isPhoneBefore, PhoneReferent prevPhone, int lev = 0) { if (ind >= pli.Count || lev > 4) return null; string countryCode = null; string cityCode = null; int j = ind; if (prevPhone != null && prevPhone.m_Template != null && pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Number) { StringBuilder tmp = new StringBuilder(); for (int jj = j; jj < pli.Count; jj++) { if (pli[jj].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Number) tmp.Append(pli[jj].Value.Length); else if (pli[jj].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Delim) { if (pli[jj].Value == " ") break; tmp.Append(pli[jj].Value); continue; } else break; string templ0 = tmp.ToString(); if (templ0 == prevPhone.m_Template) { if ((jj + 1) < pli.Count) { if (pli[jj + 1].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Prefix && (jj + 2) == pli.Count) { } else pli.RemoveRange(jj + 1, pli.Count - jj - 1); } break; } } } if ((j < pli.Count) && pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.CountryCode) { countryCode = pli[j].Value; if (countryCode != "8") { string cc = Pullenti.Ner.Phone.Internal.PhoneHelper.GetCountryPrefix(countryCode); if (cc != null && (cc.Length < countryCode.Length)) { cityCode = countryCode.Substring(cc.Length); countryCode = cc; } } j++; } else if ((j < pli.Count) && pli[j].CanBeCountryPrefix) { int k = j + 1; if ((k < pli.Count) && pli[k].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Delim) k++; Pullenti.Ner.ReferentToken rrt = this._TryAttach_(pli, k, isPhoneBefore, null, lev + 1); if (rrt != null) { if ((((isPhoneBefore && pli[j + 1].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Delim && pli[j + 1].BeginToken.IsHiphen) && pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Number && pli[j].Value.Length == 3) && ((j + 2) < pli.Count) && pli[j + 2].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Number) && pli[j + 2].Value.Length == 3) { } else { countryCode = pli[j].Value; j++; } } } if (((j < pli.Count) && pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Number && ((pli[j].Value[0] == '8' || pli[j].Value[0] == '7'))) && countryCode == null) { if (pli[j].Value.Length == 1) { countryCode = pli[j].Value; j++; } else if (pli[j].Value.Length == 4) { countryCode = pli[j].Value.Substring(0, 1); if (cityCode == null) cityCode = pli[j].Value.Substring(1); else cityCode += pli[j].Value.Substring(1); j++; } else if (pli[j].Value.Length == 11 && j == (pli.Count - 1) && isPhoneBefore) { PhoneReferent ph0 = new PhoneReferent(); if (pli[j].Value[0] != '8') ph0.CountryCode = pli[j].Value.Substring(0, 1); ph0.Number = pli[j].Value.Substring(1, 3) + pli[j].Value.Substring(4); return new Pullenti.Ner.ReferentToken(ph0, pli[0].BeginToken, pli[j].EndToken); } else if (cityCode == null && pli[j].Value.Length > 3 && ((j + 1) < pli.Count)) { int sum = 0; foreach (Pullenti.Ner.Phone.Internal.PhoneItemToken it in pli) { if (it.ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Number) sum += it.Value.Length; } if (sum == 11) { cityCode = pli[j].Value.Substring(1); j++; } } } if ((j < pli.Count) && pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.CityCode) { if (cityCode == null) cityCode = pli[j].Value; else cityCode += pli[j].Value; j++; } if ((j < pli.Count) && pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Delim) j++; if ((countryCode == "8" && cityCode == null && ((j + 3) < pli.Count)) && pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Number) { if (pli[j].Value.Length == 3 || pli[j].Value.Length == 4) { cityCode = pli[j].Value; j++; if ((j < pli.Count) && pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Delim) j++; } } int normalNumLen = 0; if (countryCode == "421") normalNumLen = 9; StringBuilder num = new StringBuilder(); StringBuilder templ = new StringBuilder(); List<int> partLength = new List<int>(); string delim = null; bool ok = false; string additional = null; bool std = false; if (countryCode != null && ((j + 4) < pli.Count) && j > 0) { if (((((pli[j - 1].Value == "-" || pli[j - 1].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.CountryCode)) && pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Number && pli[j + 1].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Delim) && pli[j + 2].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Number && pli[j + 3].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Delim) && pli[j + 4].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Number) { if ((((pli[j].Value.Length + pli[j + 2].Value.Length) == 6 || ((pli[j].Value.Length == 4 && pli[j + 2].Value.Length == 5)))) && ((pli[j + 4].Value.Length == 4 || pli[j + 4].Value.Length == 1))) { num.Append(pli[j].Value); num.Append(pli[j + 2].Value); num.Append(pli[j + 4].Value); templ.AppendFormat("{0}{1}{2}{3}{4}", pli[j].Value.Length, pli[j + 1].Value, pli[j + 2].Value.Length, pli[j + 3].Value, pli[j + 4].Value.Length); std = true; ok = true; j += 5; } } } for (; j < pli.Count; j++) { if (std) break; if (pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Delim) { if (pli[j].IsInBrackets) continue; if (j > 0 && pli[j - 1].IsInBrackets) continue; if (templ.Length > 0) templ.Append(pli[j].Value); if (delim == null) delim = pli[j].Value; else if (pli[j].Value != delim) { if ((partLength.Count == 2 && ((partLength[0] == 3 || partLength[0] == 4)) && cityCode == null) && partLength[1] == 3) { cityCode = num.ToString().Substring(0, partLength[0]); num.Remove(0, partLength[0]); partLength.RemoveAt(0); delim = pli[j].Value; continue; } if (isPhoneBefore && ((j + 1) < pli.Count) && pli[j + 1].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Number) { if (num.Length < 6) continue; if (normalNumLen > 0 && (num.Length + pli[j + 1].Value.Length) == normalNumLen) continue; } break; } else continue; ok = false; } else if (pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Number) { if (num.Length == 0 && pli[j].BeginToken.Previous != null && pli[j].BeginToken.Previous.IsTableControlChar) { Pullenti.Ner.Token tt = pli[pli.Count - 1].EndToken.Next; if (tt != null && tt.IsCharOf(",.")) tt = tt.Next; if (tt is Pullenti.Ner.NumberToken) return null; } if ((num.Length + pli[j].Value.Length) > 13) { if (j > 0 && pli[j - 1].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Delim) j--; ok = true; break; } num.Append(pli[j].Value); partLength.Add(pli[j].Value.Length); templ.Append(pli[j].Value.Length); ok = true; if (num.Length > 10) { j++; if ((j < pli.Count) && pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.AddNumber) { additional = pli[j].Value; j++; } break; } } else if (pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.AddNumber) { additional = pli[j].Value; j++; break; } else break; } if ((j == (pli.Count - 1) && pli[j].IsInBrackets && ((pli[j].Value.Length == 3 || pli[j].Value.Length == 4))) && additional == null) { additional = pli[j].Value; j++; } if ((j < pli.Count) && pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Prefix && pli[j].IsInBrackets) { isPhoneBefore = true; j++; } if ((countryCode == null && cityCode != null && cityCode.Length > 3) && (num.Length < 8) && cityCode[0] != '8') { if ((cityCode.Length + num.Length) == 10) { } else { string cc = Pullenti.Ner.Phone.Internal.PhoneHelper.GetCountryPrefix(cityCode); if (cc != null) { if (cc.Length > 1 && (cityCode.Length - cc.Length) > 1) { countryCode = cc; cityCode = cityCode.Substring(cc.Length); } } } } if (countryCode == null && cityCode != null && cityCode.StartsWith("00")) { string cc = Pullenti.Ner.Phone.Internal.PhoneHelper.GetCountryPrefix(cityCode.Substring(2)); if (cc != null) { if (cityCode.Length > (cc.Length + 3)) { countryCode = cc; cityCode = cityCode.Substring(cc.Length + 2); } } } if (num.Length == 0 && cityCode != null) { if (cityCode.Length == 10) { num.Append(cityCode.Substring(3)); partLength.Add(num.Length); cityCode = cityCode.Substring(0, 3); ok = true; } else if (((cityCode.Length == 9 || cityCode.Length == 11 || cityCode.Length == 8)) && ((isPhoneBefore || countryCode != null))) { num.Append(cityCode); partLength.Add(num.Length); cityCode = null; ok = true; } } if (num.Length < 4) ok = false; if (num.Length < 7) { if (cityCode != null && (cityCode.Length + num.Length) > 7) { if (!isPhoneBefore && cityCode.Length == 3) { int ii; for (ii = 0; ii < partLength.Count; ii++) { if (partLength[ii] == 3) { } else if (partLength[ii] > 3) break; else if ((ii < (partLength.Count - 1)) || (partLength[ii] < 2)) break; } if (ii >= partLength.Count) { if (countryCode == "61") { } else ok = false; } } } else if (((num.Length == 6 || num.Length == 5)) && ((partLength.Count >= 1 && partLength.Count <= 3)) && isPhoneBefore) { if (pli[0].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Prefix && pli[0].Kind == PhoneKind.Home) ok = false; } else if (prevPhone != null && prevPhone.Number != null && ((prevPhone.Number.Length == num.Length || prevPhone.Number.Length == (num.Length + 3) || prevPhone.Number.Length == (num.Length + 4)))) { } else if (num.Length > 4 && prevPhone != null && templ.ToString() == prevPhone.m_Template) ok = true; else ok = false; } if (delim == "." && countryCode == null && cityCode == null) ok = false; if ((isPhoneBefore && countryCode == null && cityCode == null) && num.Length > 10) { string cc = Pullenti.Ner.Phone.Internal.PhoneHelper.GetCountryPrefix(num.ToString()); if (cc != null) { if ((num.Length - cc.Length) == 9) { countryCode = cc; num.Remove(0, cc.Length); ok = true; } } } if (ok) { if (std) { } else if (prevPhone != null && prevPhone.Number != null && (((prevPhone.Number.Length == num.Length || prevPhone.Number.Length == (num.Length + 3) || prevPhone.Number.Length == (num.Length + 4)) || prevPhone.m_Template == templ.ToString()))) { } else if ((partLength.Count == 3 && partLength[0] == 3 && partLength[1] == 2) && partLength[2] == 2) { } else if (partLength.Count == 3 && isPhoneBefore) { } else if ((partLength.Count == 4 && ((partLength[0] + partLength[1]) == 3) && partLength[2] == 2) && partLength[3] == 2) { } else if ((partLength.Count == 4 && partLength[0] == 3 && partLength[1] == 3) && partLength[2] == 2 && partLength[3] == 2) { } else if (partLength.Count == 5 && (partLength[1] + partLength[2]) == 4 && (partLength[3] + partLength[4]) == 4) { } else if (partLength.Count > 4) ok = false; else if (partLength.Count > 3 && cityCode != null) ok = false; else if ((isPhoneBefore || cityCode != null || countryCode != null) || additional != null) ok = true; else { ok = false; if (((num.Length == 6 || num.Length == 7)) && (partLength.Count < 4) && j > 0) { PhoneReferent nextPh = this.GetNextPhone(pli[j - 1].EndToken.Next, lev + 1); if (nextPh != null) { int d = nextPh.Number.Length - num.Length; if (d == 0 || d == 3 || d == 4) ok = true; } } } } Pullenti.Ner.Token end = (j > 0 ? pli[j - 1].EndToken : null); if (end == null) ok = false; if ((ok && cityCode == null && countryCode == null) && prevPhone == null && !isPhoneBefore) { if (!end.IsWhitespaceAfter && end.Next != null) { Pullenti.Ner.Token tt = end.Next; if (tt.IsCharOf(".,)") && tt.Next != null) tt = tt.Next; if (!tt.IsWhitespaceBefore) ok = false; } } if (!ok) return null; if (templ.Length > 0 && !char.IsDigit(templ[templ.Length - 1])) templ.Length--; if ((countryCode == null && cityCode != null && cityCode.Length > 3) && num.Length > 6) { string cc = Pullenti.Ner.Phone.Internal.PhoneHelper.GetCountryPrefix(cityCode); if (cc != null && ((cc.Length + 1) < cityCode.Length)) { countryCode = cc; cityCode = cityCode.Substring(cc.Length); } } if (pli[0].BeginToken.Previous != null) { if (pli[0].BeginToken.Previous.IsValue("ГОСТ", null) || pli[0].BeginToken.Previous.IsValue("ТУ", null)) return null; } PhoneReferent ph = new PhoneReferent(); if (countryCode != null) ph.CountryCode = countryCode; string number = num.ToString(); if ((cityCode == null && num.Length > 7 && partLength.Count > 0) && (partLength[0] < 5)) { cityCode = number.Substring(0, partLength[0]); number = number.Substring(partLength[0]); } if (cityCode == null && num.Length == 11 && num[0] == '8') { cityCode = number.Substring(1, 3); number = number.Substring(4); } if (cityCode == null && num.Length == 10) { cityCode = number.Substring(0, 3); number = number.Substring(3); } if (cityCode != null) number = cityCode + number; else if (countryCode == null && prevPhone != null) { bool ok1 = false; if (prevPhone.Number.Length >= (number.Length + 2)) ok1 = true; else if (templ.Length > 0 && prevPhone.m_Template != null && Pullenti.Morph.LanguageHelper.EndsWith(prevPhone.m_Template, templ.ToString())) ok1 = true; if (ok1 && prevPhone.Number.Length > number.Length) number = prevPhone.Number.Substring(0, prevPhone.Number.Length - number.Length) + number; } if (ph.CountryCode == null && prevPhone != null && prevPhone.CountryCode != null) { if (prevPhone.Number.Length == number.Length) ph.CountryCode = prevPhone.CountryCode; } ok = false; foreach (char d in number) { if (d != '0') { ok = true; break; } } if (!ok) return null; if (countryCode != null) { if (number.Length < 7) return null; } else { string s = Pullenti.Ner.Phone.Internal.PhoneHelper.GetCountryPrefix(number); if (s != null) { string num2 = number.Substring(s.Length); if (num2.Length >= 10 && num2.Length <= 11) { number = num2; if (s != "7") ph.CountryCode = s; } } if (number.Length == 8 && prevPhone == null) return null; } if (number.Length > 11) { if ((number.Length < 14) && ((countryCode == "1" || countryCode == "43"))) { } else return null; } ph.Number = number; if (additional != null) ph.AddSlot(PhoneReferent.ATTR_ADDNUMBER, additional, true, 0); if (!isPhoneBefore && end.Next != null && !end.IsNewlineAfter) { if (end.Next.IsCharOf("+=") || end.Next.IsHiphen) return null; } if (countryCode != null && countryCode == "7") { if (number.Length != 10) return null; } ph.m_Template = templ.ToString(); if (j == (pli.Count - 1) && pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Prefix && !pli[j].IsNewlineBefore) { end = pli[j].EndToken; if (pli[j].Kind != PhoneKind.Undefined) ph.Kind = pli[j].Kind; } Pullenti.Ner.ReferentToken res = new Pullenti.Ner.ReferentToken(ph, pli[0].BeginToken, end); if (pli[0].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Prefix && pli[0].EndToken.Next.IsTableControlChar) res.BeginToken = pli[1].BeginToken; return res; }
public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this); Pullenti.Ner.Core.TerminCollection models = new Pullenti.Ner.Core.TerminCollection(); Dictionary <string, List <Pullenti.Ner.Referent> > objsByModel = new Dictionary <string, List <Pullenti.Ner.Referent> >(); Pullenti.Ner.Core.TerminCollection objByNames = new Pullenti.Ner.Core.TerminCollection(); for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { List <Pullenti.Ner.Weapon.Internal.WeaponItemToken> its = Pullenti.Ner.Weapon.Internal.WeaponItemToken.TryParseList(t, 10); if (its == null) { continue; } List <Pullenti.Ner.ReferentToken> rts = this.TryAttach(its, false); if (rts != null) { foreach (Pullenti.Ner.ReferentToken rt in rts) { rt.Referent = ad.RegisterReferent(rt.Referent); kit.EmbedToken(rt); t = rt; foreach (Pullenti.Ner.Slot s in rt.Referent.Slots) { if (s.TypeName == WeaponReferent.ATTR_MODEL) { string mod = s.Value.ToString(); for (int k = 0; k < 2; k++) { if (!char.IsDigit(mod[0])) { List <Pullenti.Ner.Referent> li; if (!objsByModel.TryGetValue(mod, out li)) { objsByModel.Add(mod, (li = new List <Pullenti.Ner.Referent>())); } if (!li.Contains(rt.Referent)) { li.Add(rt.Referent); } models.AddString(mod, li, null, false); } if (k > 0) { break; } string brand = rt.Referent.GetStringValue(WeaponReferent.ATTR_BRAND); if (brand == null) { break; } mod = string.Format("{0} {1}", brand, mod); } } else if (s.TypeName == WeaponReferent.ATTR_NAME) { objByNames.Add(new Pullenti.Ner.Core.Termin(s.Value.ToString()) { Tag = rt.Referent }); } } } } } if (objsByModel.Count == 0 && objByNames.Termins.Count == 0) { return; } for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 10); if (br != null) { Pullenti.Ner.Core.TerminToken toks = objByNames.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No); if (toks != null && toks.EndToken.Next == br.EndToken) { Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(toks.Termin.Tag as Pullenti.Ner.Referent, br.BeginToken, br.EndToken); kit.EmbedToken(rt0); t = rt0; continue; } } if (!(t is Pullenti.Ner.TextToken)) { continue; } if (!t.Chars.IsLetter) { continue; } Pullenti.Ner.Core.TerminToken tok = models.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok == null) { if (!t.Chars.IsAllLower) { tok = objByNames.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); } if (tok == null) { continue; } } if (!tok.IsWhitespaceAfter) { if (tok.EndToken.Next == null || !tok.EndToken.Next.IsCharOf(",.)")) { if (!Pullenti.Ner.Core.BracketHelper.IsBracket(tok.EndToken.Next, false)) { continue; } } } Pullenti.Ner.Referent tr = null; List <Pullenti.Ner.Referent> li = tok.Termin.Tag as List <Pullenti.Ner.Referent>; if (li != null && li.Count == 1) { tr = li[0]; } else { tr = tok.Termin.Tag as Pullenti.Ner.Referent; } if (tr != null) { Pullenti.Ner.Weapon.Internal.WeaponItemToken tit = Pullenti.Ner.Weapon.Internal.WeaponItemToken.TryParse(tok.BeginToken.Previous, null, false, true); if (tit != null && tit.Typ == Pullenti.Ner.Weapon.Internal.WeaponItemToken.Typs.Brand) { tr.AddSlot(WeaponReferent.ATTR_BRAND, tit.Value, false, 0); tok.BeginToken = tit.BeginToken; } Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(tr, tok.BeginToken, tok.EndToken); kit.EmbedToken(rt0); t = rt0; continue; } } }
public static List <WeaponItemToken> TryParseList(Pullenti.Ner.Token t, int maxCount = 10) { WeaponItemToken tr = TryParse(t, null, false, false); if (tr == null) { return(null); } if (tr.Typ == Typs.Class || tr.Typ == Typs.Date) { return(null); } WeaponItemToken tr0 = tr; List <WeaponItemToken> res = new List <WeaponItemToken>(); if (tr.InnerTokens.Count > 0) { res.AddRange(tr.InnerTokens); if (res[0].BeginChar > tr.BeginChar) { res[0].BeginToken = tr.BeginToken; } } res.Add(tr); t = tr.EndToken.Next; if (tr.Typ == Typs.Noun) { for (; t != null; t = t.Next) { if (t.IsChar(':') || t.IsHiphen) { } else { break; } } } bool andConj = false; for (; t != null; t = t.Next) { if (maxCount > 0 && res.Count >= maxCount) { break; } if (t.IsChar(':')) { continue; } if (tr0.Typ == Typs.Noun) { if (t.IsHiphen && t.Next != null) { t = t.Next; } } tr = TryParse(t, tr0, false, false); if (tr == null) { if (Pullenti.Ner.Core.BracketHelper.CanBeEndOfSequence(t, true, null, false) && t.Next != null) { if (tr0.Typ == Typs.Model || tr0.Typ == Typs.Brand) { Pullenti.Ner.Token tt1 = t.Next; if (tt1 != null && tt1.IsComma) { tt1 = tt1.Next; } tr = TryParse(tt1, tr0, false, false); } } } if (tr == null && (t is Pullenti.Ner.ReferentToken)) { Pullenti.Ner.ReferentToken rt = t as Pullenti.Ner.ReferentToken; if (rt.BeginToken == rt.EndToken && (rt.BeginToken is Pullenti.Ner.TextToken)) { tr = TryParse(rt.BeginToken, tr0, false, false); if (tr != null && tr.BeginToken == tr.EndToken) { tr.BeginToken = (tr.EndToken = t); } } } if (tr == null && t.IsChar('(')) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { Pullenti.Ner.Token tt = br.EndToken.Next; if (tt != null && tt.IsComma) { tt = tt.Next; } tr = TryParse(tt, tr0, false, false); if (tr != null && tr.Typ == Typs.Number) { } else { tr = null; } } } if (tr == null && t.IsHiphen) { if (tr0.Typ == Typs.Brand || tr0.Typ == Typs.Model) { tr = TryParse(t.Next, tr0, false, false); } } if (tr == null && t.IsComma) { if ((tr0.Typ == Typs.Name || tr0.Typ == Typs.Brand || tr0.Typ == Typs.Model) || tr0.Typ == Typs.Class || tr0.Typ == Typs.Date) { tr = TryParse(t.Next, tr0, true, false); if (tr != null) { if (tr.Typ == Typs.Number) { } else { tr = null; } } } } if (tr == null) { break; } if (t.IsNewlineBefore) { if (tr.Typ != Typs.Number) { break; } } if (tr.InnerTokens.Count > 0) { res.AddRange(tr.InnerTokens); } res.Add(tr); tr0 = tr; t = tr.EndToken; if (andConj) { break; } } for (int i = 0; i < (res.Count - 1); i++) { if (res[i].Typ == Typs.Model && res[i + 1].Typ == Typs.Model) { res[i].EndToken = res[i + 1].EndToken; res[i].Value = string.Format("{0}{1}{2}", res[i].Value, (res[i].EndToken.Next != null && res[i].EndToken.Next.IsHiphen ? '-' : ' '), res[i + 1].Value); res.RemoveAt(i + 1); i--; } } return(res); }
public static BookLinkToken TryParseAuthor(Pullenti.Ner.Token t, Pullenti.Ner.Person.Internal.FioTemplateType prevPersTemplate = Pullenti.Ner.Person.Internal.FioTemplateType.Undefined) { if (t == null) { return(null); } Pullenti.Ner.ReferentToken rtp = Pullenti.Ner.Person.Internal.PersonItemToken.TryParsePerson(t, prevPersTemplate); if (rtp != null) { BookLinkToken re; if (rtp.Data == null) { re = new BookLinkToken(t, (rtp == t ? t : rtp.EndToken)) { Typ = BookLinkTyp.Person, Ref = rtp.Referent } } ; else { re = new BookLinkToken(t, rtp.EndToken) { Typ = BookLinkTyp.Person, Tok = rtp } }; re.PersonTemplate = (Pullenti.Ner.Person.Internal.FioTemplateType)rtp.MiscAttrs; for (Pullenti.Ner.Token tt = rtp.BeginToken; tt != null && tt.EndChar <= rtp.EndChar; tt = tt.Next) { if (!(tt.GetReferent() is Pullenti.Ner.Person.PersonPropertyReferent)) { continue; } Pullenti.Ner.ReferentToken rt = tt as Pullenti.Ner.ReferentToken; if (rt.BeginToken.Chars.IsCapitalUpper && tt != rtp.BeginToken) { re.StartOfName = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(rt, Pullenti.Ner.Core.GetTextAttr.KeepRegister); break; } return(null); } return(re); } if (t.IsChar('[')) { BookLinkToken re = TryParseAuthor(t.Next, Pullenti.Ner.Person.Internal.FioTemplateType.Undefined); if (re != null && re.EndToken.Next != null && re.EndToken.Next.IsChar(']')) { re.BeginToken = t; re.EndToken = re.EndToken.Next; return(re); } } if (((t.IsValue("И", null) || t.IsValue("ET", null))) && t.Next != null) { if (t.Next.IsValue("ДРУГИЕ", null) || t.Next.IsValue("ДР", null) || t.Next.IsValue("AL", null)) { BookLinkToken res = new BookLinkToken(t, t.Next) { Typ = BookLinkTyp.AndOthers }; if (t.Next.Next != null && t.Next.Next.IsChar('.')) { res.EndToken = res.EndToken.Next; } return(res); } } return(null); }
public Pullenti.Ner.ReferentToken TryAttach(Pullenti.Ner.Token t, bool forOntology = false) { if (t == null) { return(null); } Pullenti.Ner.ReferentToken rt0 = this.TryAttachSpec(t); if (rt0 != null) { return(rt0); } if (t.Chars.IsAllLower) { if (!t.IsWhitespaceAfter && (t.Next is Pullenti.Ner.NumberToken)) { if (t.Previous == null || t.IsWhitespaceBefore || t.Previous.IsCharOf(",:")) { } else { return(null); } } else { return(null); } } StringBuilder tmp = new StringBuilder(); Pullenti.Ner.Token t1 = t; bool hiph = false; bool ok = true; int nums = 0; int chars = 0; for (Pullenti.Ner.Token w = t1.Next; w != null; w = w.Next) { if (w.IsWhitespaceBefore && !forOntology) { break; } if (w.IsCharOf("/\\_") || w.IsHiphen) { hiph = true; tmp.Append('-'); continue; } hiph = false; Pullenti.Ner.NumberToken nt = w as Pullenti.Ner.NumberToken; if (nt != null) { if (nt.Typ != Pullenti.Ner.NumberSpellingType.Digit) { break; } t1 = nt; tmp.Append(nt.GetSourceText()); nums++; continue; } Pullenti.Ner.TextToken tt = w as Pullenti.Ner.TextToken; if (tt == null) { break; } if (tt.LengthChar > 3) { ok = false; break; } if (!char.IsLetter(tt.Term[0])) { if (tt.IsCharOf(",:") || Pullenti.Ner.Core.BracketHelper.CanBeEndOfSequence(tt, false, null, false)) { break; } if (!tt.IsCharOf("+*&^#@!")) { ok = false; break; } chars++; } t1 = tt; tmp.Append(tt.GetSourceText()); } if (!forOntology) { if ((tmp.Length < 1) || !ok || hiph) { return(null); } if (tmp.Length > 12) { return(null); } char last = tmp[tmp.Length - 1]; if (last == '!') { return(null); } if ((nums + chars) == 0) { return(null); } if (!this.CheckAttach(t, t1)) { return(null); } } DenominationReferent newDr = new DenominationReferent(); newDr.AddValue(t, t1); return(new Pullenti.Ner.ReferentToken(newDr, t, t1)); }
// Основная функция выделения объектов public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Core.AnalyzerDataWithOntology ad = kit.GetAnalyzerData(this) as Pullenti.Ner.Core.AnalyzerDataWithOntology; for (int k = 0; k < 2; k++) { bool detectNewDenoms = false; DateTime dt = DateTime.Now; for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { if (t.IsWhitespaceBefore) { } else if (t.Previous != null && ((t.Previous.IsCharOf(",") || Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(t.Previous, false, false)))) { } else { continue; } Pullenti.Ner.ReferentToken rt0 = this.TryAttachSpec(t); if (rt0 != null) { rt0.Referent = ad.RegisterReferent(rt0.Referent); kit.EmbedToken(rt0); t = rt0; continue; } if (!t.Chars.IsLetter) { continue; } if (!this.CanBeStartOfDenom(t)) { continue; } if (((DateTime.Now - dt)).TotalMinutes > 1) { break; } List <Pullenti.Ner.Core.IntOntologyToken> ot = null; ot = ad.LocalOntology.TryAttach(t, null, false); if (ot != null && (ot[0].Item.Referent is DenominationReferent)) { if (this.CheckAttach(ot[0].BeginToken, ot[0].EndToken)) { DenominationReferent cl = ot[0].Item.Referent.Clone() as DenominationReferent; cl.Occurrence.Clear(); Pullenti.Ner.ReferentToken rt = new Pullenti.Ner.ReferentToken(cl, ot[0].BeginToken, ot[0].EndToken); kit.EmbedToken(rt); t = rt; continue; } } if (k > 0) { continue; } if (t != null && t.Kit.Ontology != null) { if ((((ot = t.Kit.Ontology.AttachToken(DenominationReferent.OBJ_TYPENAME, t)))) != null) { if (this.CheckAttach(ot[0].BeginToken, ot[0].EndToken)) { DenominationReferent dr = new DenominationReferent(); dr.MergeSlots(ot[0].Item.Referent, true); Pullenti.Ner.ReferentToken rt = new Pullenti.Ner.ReferentToken(ad.RegisterReferent(dr), ot[0].BeginToken, ot[0].EndToken); kit.EmbedToken(rt); t = rt; continue; } } } rt0 = this.TryAttach(t, false); if (rt0 != null) { rt0.Referent = ad.RegisterReferent(rt0.Referent); kit.EmbedToken(rt0); detectNewDenoms = true; t = rt0; if (ad.LocalOntology.Items.Count > 1000) { break; } } } if (!detectNewDenoms) { break; } } }
public static Pullenti.Ner.ReferentToken TryAttachOrg(Pullenti.Ner.Token t, bool canBeCyr = false) { if (t == null) { return(null); } bool br = false; if (t.IsChar('(') && t.Next != null) { t = t.Next; br = true; } if (t is Pullenti.Ner.NumberToken) { if ((t as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Words && t.Morph.Class.IsAdjective && t.Chars.IsCapitalUpper) { } else { return(null); } } else { if (t.Chars.IsAllLower) { return(null); } if ((t.LengthChar < 3) && !t.Chars.IsLetter) { return(null); } if (!t.Chars.IsLatinLetter) { if (!canBeCyr || !t.Chars.IsCyrillicLetter) { return(null); } } } Pullenti.Ner.Token t0 = t; Pullenti.Ner.Token t1 = t0; int namWo = 0; OrgItemEngItem tok = null; Pullenti.Ner.Geo.GeoReferent geo = null; OrgItemTypeToken addTyp = null; for (; t != null; t = t.Next) { if (t != t0 && t.WhitespacesBeforeCount > 1) { break; } if (t.IsChar(')')) { break; } if (t.IsChar('(') && t.Next != null) { if ((t.Next.GetReferent() is Pullenti.Ner.Geo.GeoReferent) && t.Next.Next != null && t.Next.Next.IsChar(')')) { geo = t.Next.GetReferent() as Pullenti.Ner.Geo.GeoReferent; t = t.Next.Next; continue; } OrgItemTypeToken typ = OrgItemTypeToken.TryAttach(t.Next, true, null); if ((typ != null && typ.EndToken.Next != null && typ.EndToken.Next.IsChar(')')) && typ.Chars.IsLatinLetter) { addTyp = typ; t = typ.EndToken.Next; continue; } if (((t.Next is Pullenti.Ner.TextToken) && t.Next.Next != null && t.Next.Next.IsChar(')')) && t.Next.Chars.IsCapitalUpper) { t1 = (t = t.Next.Next); continue; } break; } tok = TryAttach(t, canBeCyr); if (tok == null && t.IsCharOf(".,") && t.Next != null) { tok = TryAttach(t.Next, canBeCyr); if (tok == null && t.Next.IsCharOf(",.")) { tok = TryAttach(t.Next.Next, canBeCyr); } } if (tok != null) { if (tok.LengthChar == 1 && t0.Chars.IsCyrillicLetter) { return(null); } break; } if (t.IsHiphen && !t.IsWhitespaceAfter && !t.IsWhitespaceBefore) { continue; } if (t.IsCharOf("&+") || t.IsAnd) { continue; } if (t.IsChar('.')) { if (t.Previous != null && t.Previous.LengthChar == 1) { continue; } else if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t.Next)) { break; } } if (!t.Chars.IsLatinLetter) { if (!canBeCyr || !t.Chars.IsCyrillicLetter) { break; } } if (t.Chars.IsAllLower) { if (t.Morph.Class.IsPreposition || t.Morph.Class.IsConjunction) { continue; } if (br) { continue; } break; } Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary(); if (mc.IsVerb) { if (t.Next != null && t.Next.Morph.Class.IsPreposition) { break; } } if (t.Next != null && t.Next.IsValue("OF", null)) { break; } if (t is Pullenti.Ner.TextToken) { namWo++; } t1 = t; } if (tok == null) { return(null); } if (t0 == tok.BeginToken) { Pullenti.Ner.Core.BracketSequenceToken br2 = Pullenti.Ner.Core.BracketHelper.TryParse(tok.EndToken.Next, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br2 != null) { Pullenti.Ner.Org.OrganizationReferent org1 = new Pullenti.Ner.Org.OrganizationReferent(); if (tok.ShortValue != null) { org1.AddTypeStr(tok.ShortValue); } org1.AddTypeStr(tok.FullValue); string nam1 = Pullenti.Ner.Core.MiscHelper.GetTextValue(br2.BeginToken, br2.EndToken, Pullenti.Ner.Core.GetTextAttr.No); if (nam1 != null) { org1.AddName(nam1, true, null); return(new Pullenti.Ner.ReferentToken(org1, t0, br2.EndToken)); } } return(null); } Pullenti.Ner.Org.OrganizationReferent org = new Pullenti.Ner.Org.OrganizationReferent(); Pullenti.Ner.Token te = tok.EndToken; if (tok.IsBank) { t1 = tok.EndToken; } if (tok.FullValue == "company" && (tok.WhitespacesAfterCount < 3)) { OrgItemEngItem tok1 = TryAttach(tok.EndToken.Next, canBeCyr); if (tok1 != null) { t1 = tok.EndToken; tok = tok1; te = tok.EndToken; } } if (tok.FullValue == "company") { if (namWo == 0) { return(null); } } string nam = Pullenti.Ner.Core.MiscHelper.GetTextValue(t0, t1, Pullenti.Ner.Core.GetTextAttr.IgnoreArticles); if (nam == "STOCK" && tok.FullValue == "company") { return(null); } string altNam = null; if (string.IsNullOrEmpty(nam)) { return(null); } if (nam.IndexOf('(') > 0) { int i1 = nam.IndexOf('('); int i2 = nam.IndexOf(')'); if (i1 < i2) { altNam = nam; string tai = null; if ((i2 + 1) < nam.Length) { tai = nam.Substring(i2).Trim(); } nam = nam.Substring(0, i1).Trim(); if (tai != null) { nam = string.Format("{0} {1}", nam, tai); } } } if (tok.IsBank) { org.AddTypeStr((tok.Kit.BaseLanguage.IsEn ? "bank" : "банк")); org.AddProfile(Pullenti.Ner.Org.OrgProfile.Finance); if ((t1.Next != null && t1.Next.IsValue("OF", null) && t1.Next.Next != null) && t1.Next.Next.Chars.IsLatinLetter) { OrgItemNameToken nam0 = OrgItemNameToken.TryAttach(t1.Next, null, false, false); if (nam0 != null) { te = nam0.EndToken; } else { te = t1.Next.Next; } nam = Pullenti.Ner.Core.MiscHelper.GetTextValue(t0, te, Pullenti.Ner.Core.GetTextAttr.No); if (te.GetReferent() is Pullenti.Ner.Geo.GeoReferent) { org.AddGeoObject(te.GetReferent() as Pullenti.Ner.Geo.GeoReferent); } } else if (t0 == t1) { return(null); } } else { if (tok.ShortValue != null) { org.AddTypeStr(tok.ShortValue); } org.AddTypeStr(tok.FullValue); } if (string.IsNullOrEmpty(nam)) { return(null); } org.AddName(nam, true, null); if (altNam != null) { org.AddName(altNam, true, null); } Pullenti.Ner.ReferentToken res = new Pullenti.Ner.ReferentToken(org, t0, te); t = te; while (t.Next != null) { if (t.Next.IsCharOf(",.")) { t = t.Next; } else { break; } } if (t.WhitespacesAfterCount < 2) { tok = TryAttach(t.Next, canBeCyr); if (tok != null) { if (tok.ShortValue != null) { org.AddTypeStr(tok.ShortValue); } org.AddTypeStr(tok.FullValue); res.EndToken = tok.EndToken; } } if (geo != null) { org.AddGeoObject(geo); } if (addTyp != null) { org.AddType(addTyp, false); } if (!br) { return(res); } t = res.EndToken; if (t.Next == null || t.Next.IsChar(')')) { res.EndToken = t.Next; } else { return(null); } return(res); }