public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this); for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { Pullenti.Ner.ReferentToken rt = null; if (t.Chars.IsLetter) { Pullenti.Ner.Core.TerminToken tok = m_Ontology.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { Pullenti.Ner.Token tt = tok.EndToken.Next; if (tt != null && tt.IsChar(':')) { tt = tt.Next; } rt = this.TryAttach(tt, true); if (rt != null) { rt.BeginToken = t; } } } if (rt == null && (((t is Pullenti.Ner.ReferentToken) || t.IsNewlineBefore))) { rt = this.TryAttach(t, false); } if (rt != null) { rt.Referent = ad.RegisterReferent(rt.Referent); kit.EmbedToken(rt); t = rt; } } }
public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this); int delta = 100000; int parts = (((kit.Sofa.Text.Length + delta) - 1)) / delta; if (parts == 0) { parts = 1; } int cur = 0; int nextPos = 0; List <GoodReferent> goods = new List <GoodReferent>(); for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { if (!t.IsNewlineBefore) { continue; } if (t.BeginChar > nextPos) { nextPos += delta; cur++; if (!this.OnProgress(cur, parts, kit)) { break; } } if (!t.Chars.IsLetter && t.Next != null) { t = t.Next; } List <Pullenti.Ner.ReferentToken> rts = Pullenti.Ner.Goods.Internal.GoodAttrToken.TryParseList(t); if (rts == null || rts.Count == 0) { continue; } GoodReferent good = new GoodReferent(); foreach (Pullenti.Ner.ReferentToken rt in rts) { rt.Referent = ad.RegisterReferent(rt.Referent); if (good.FindSlot(GoodReferent.ATTR_ATTR, rt.Referent, true) == null) { good.AddSlot(GoodReferent.ATTR_ATTR, rt.Referent, false, 0); } kit.EmbedToken(rt); } goods.Add(good); Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(good, rts[0], rts[rts.Count - 1]); kit.EmbedToken(rt0); t = rt0; } foreach (GoodReferent g in goods) { ad.Referents.Add(g); } }
public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this); Pullenti.Ner.Token et; TitlePageReferent tpr = _process(kit.FirstToken, 0, kit, out et); if (tpr != null) { ad.RegisterReferent(tpr); } }
public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this); for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { Pullenti.Ner.ReferentToken mon = TryParse(t); if (mon != null) { mon.Referent = ad.RegisterReferent(mon.Referent); kit.EmbedToken(mon); t = mon; continue; } } }
internal void _createRes(Pullenti.Ner.Core.AnalysisKit kit, AnalysisResult ar, ExtOntology extOntology, bool noLog) { Stopwatch sw = Stopwatch.StartNew(); int ontoAttached = 0; for (int k = 0; k < 2; k++) { foreach (Analyzer c in Analyzers) { if (k == 0) { if (!c.IsSpecific) { continue; } } else if (c.IsSpecific) { continue; } Pullenti.Ner.Core.AnalyzerData dat = kit.GetAnalyzerData(c); if (dat != null && dat.Referents.Count > 0) { if (extOntology != null) { foreach (Referent r in dat.Referents) { if (r.OntologyItems == null) { if ((((r.OntologyItems = extOntology.AttachReferent(r)))) != null) { ontoAttached++; } } } } ar.Entities.AddRange(dat.Referents); } } } sw.Stop(); if (extOntology != null && !noLog) { string msg = string.Format("Привязано {0} объектов к внешней отнологии ({1} элементов) за {2}", ontoAttached, extOntology.Items.Count, OutSecs(sw.ElapsedMilliseconds)); this.OnMessage(msg); ar.Log.Add(msg); } }
public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Token t = kit.FirstToken; Pullenti.Ner.Token t1 = t; if (t == null) { return; } Pullenti.Ner.Instrument.Internal.FragToken dfr = Pullenti.Ner.Instrument.Internal.FragToken.CreateDocument(t, 0, InstrumentKind.Undefined); if (dfr == null) { return; } Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this); InstrumentBlockReferent res = dfr.CreateReferent(ad); }
public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Core.AnalyzerDataWithOntology ad = kit.GetAnalyzerData(this) as Pullenti.Ner.Core.AnalyzerDataWithOntology; for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { List <Pullenti.Ner.Named.Internal.NamedItemToken> li = Pullenti.Ner.Named.Internal.NamedItemToken.TryParseList(t, ad.LocalOntology); if (li == null || li.Count == 0) { continue; } Pullenti.Ner.ReferentToken rt = _tryAttach(li); if (rt != null) { rt.Referent = ad.RegisterReferent(rt.Referent); kit.EmbedToken(rt); t = rt; continue; } } }
public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this); int delta = 100000; int parts = (((kit.Sofa.Text.Length + delta) - 1)) / delta; if (parts == 0) { parts = 1; } int cur = 0; int nextPos = 0; for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { if (t.BeginChar > nextPos) { nextPos += delta; cur++; if (!this.OnProgress(cur, parts, kit)) { break; } } Pullenti.Ner.Goods.Internal.GoodAttrToken at = Pullenti.Ner.Goods.Internal.GoodAttrToken.TryParse(t, null, true, true); if (at == null) { continue; } GoodAttributeReferent attr = at._createAttr(); if (attr == null) { t = at.EndToken; continue; } Pullenti.Ner.ReferentToken rt = new Pullenti.Ner.ReferentToken(attr, at.BeginToken, at.EndToken); rt.Referent = ad.RegisterReferent(attr); kit.EmbedToken(rt); t = rt; } }
// Основная функция выделения объектов public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Core.AnalyzerDataWithOntology ad = kit.GetAnalyzerData(this) as Pullenti.Ner.Core.AnalyzerDataWithOntology; for (int k = 0; k < 2; k++) { bool detectNewDenoms = false; DateTime dt = DateTime.Now; for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { if (t.IsWhitespaceBefore) { } else if (t.Previous != null && ((t.Previous.IsCharOf(",") || Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(t.Previous, false, false)))) { } else { continue; } Pullenti.Ner.ReferentToken rt0 = this.TryAttachSpec(t); if (rt0 != null) { rt0.Referent = ad.RegisterReferent(rt0.Referent); kit.EmbedToken(rt0); t = rt0; continue; } if (!t.Chars.IsLetter) { continue; } if (!this.CanBeStartOfDenom(t)) { continue; } if (((DateTime.Now - dt)).TotalMinutes > 1) { break; } List <Pullenti.Ner.Core.IntOntologyToken> ot = null; ot = ad.LocalOntology.TryAttach(t, null, false); if (ot != null && (ot[0].Item.Referent is DenominationReferent)) { if (this.CheckAttach(ot[0].BeginToken, ot[0].EndToken)) { DenominationReferent cl = ot[0].Item.Referent.Clone() as DenominationReferent; cl.Occurrence.Clear(); Pullenti.Ner.ReferentToken rt = new Pullenti.Ner.ReferentToken(cl, ot[0].BeginToken, ot[0].EndToken); kit.EmbedToken(rt); t = rt; continue; } } if (k > 0) { continue; } if (t != null && t.Kit.Ontology != null) { if ((((ot = t.Kit.Ontology.AttachToken(DenominationReferent.OBJ_TYPENAME, t)))) != null) { if (this.CheckAttach(ot[0].BeginToken, ot[0].EndToken)) { DenominationReferent dr = new DenominationReferent(); dr.MergeSlots(ot[0].Item.Referent, true); Pullenti.Ner.ReferentToken rt = new Pullenti.Ner.ReferentToken(ad.RegisterReferent(dr), ot[0].BeginToken, ot[0].EndToken); kit.EmbedToken(rt); t = rt; continue; } } } rt0 = this.TryAttach(t, false); if (rt0 != null) { rt0.Referent = ad.RegisterReferent(rt0.Referent); kit.EmbedToken(rt0); detectNewDenoms = true; t = rt0; if (ad.LocalOntology.Items.Count > 1000) { break; } } } if (!detectNewDenoms) { break; } } }
public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { List <Pullenti.Ner.Mail.Internal.MailLine> lines = new List <Pullenti.Ner.Mail.Internal.MailLine>(); for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { Pullenti.Ner.Mail.Internal.MailLine ml = Pullenti.Ner.Mail.Internal.MailLine.Parse(t, 0, 0); if (ml == null) { continue; } if (lines.Count == 91) { } lines.Add(ml); t = ml.EndToken; } if (lines.Count == 0) { return; } int i; List <List <Pullenti.Ner.Mail.Internal.MailLine> > blocks = new List <List <Pullenti.Ner.Mail.Internal.MailLine> >(); List <Pullenti.Ner.Mail.Internal.MailLine> blk = null; for (i = 0; i < lines.Count; i++) { Pullenti.Ner.Mail.Internal.MailLine ml = lines[i]; if (ml.Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From) { bool isNew = ml.MustBeFirstLine || i == 0; if (((i + 2) < lines.Count) && (((lines[i + 1].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From || lines[i + 2].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From || lines[i + 1].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.Hello) || lines[i + 2].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.Hello))) { isNew = true; } if (!isNew) { for (int j = i - 1; j >= 0; j--) { if (lines[j].Typ != Pullenti.Ner.Mail.Internal.MailLine.Types.Undefined) { if (lines[j].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.BestRegards) { isNew = true; } break; } } } if (!isNew) { for (Pullenti.Ner.Token tt = ml.BeginToken; tt != null && tt.EndChar <= ml.EndChar; tt = tt.Next) { if (tt.GetReferent() != null) { if (tt.GetReferent().TypeName == "DATE" || tt.GetReferent().TypeName == "URI") { isNew = true; } } } } if (isNew) { blk = new List <Pullenti.Ner.Mail.Internal.MailLine>(); blocks.Add(blk); for (; i < lines.Count; i++) { if (lines[i].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From) { if (blk.Count > 0 && lines[i].MustBeFirstLine) { break; } blk.Add(lines[i]); } else if (((i + 1) < lines.Count) && lines[i + 1].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From) { int j; for (j = 0; j < blk.Count; j++) { if (blk[j].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From) { if (blk[j].IsRealFrom || blk[j].MustBeFirstLine || blk[j].MailAddr != null) { break; } } } if (j >= blk.Count) { blk.Add(lines[i]); continue; } bool ok = false; for (j = i + 1; j < lines.Count; j++) { if (lines[j].Typ != Pullenti.Ner.Mail.Internal.MailLine.Types.From) { break; } if (lines[j].IsRealFrom || lines[j].MustBeFirstLine) { ok = true; break; } if (lines[j].MailAddr != null) { ok = true; break; } } if (ok) { break; } blk.Add(lines[i]); } else { break; } } i--; continue; } } if (blk == null) { blocks.Add((blk = new List <Pullenti.Ner.Mail.Internal.MailLine>())); } blk.Add(lines[i]); } if (blocks.Count == 0) { return; } Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this); for (int j = 0; j < blocks.Count; j++) { lines = blocks[j]; if (lines.Count == 0) { continue; } i = 0; if (lines[0].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From) { Pullenti.Ner.Token t1 = lines[0].EndToken; for (; i < lines.Count; i++) { if (lines[i].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From) { t1 = lines[i].EndToken; } else if (((i + 1) < lines.Count) && lines[i + 1].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From) { } else { break; } } MailReferent mail = new MailReferent() { Kind = MailKind.Head }; Pullenti.Ner.ReferentToken mt = new Pullenti.Ner.ReferentToken(mail, lines[0].BeginToken, t1); mail.Text = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(mt, Pullenti.Ner.Core.GetTextAttr.KeepRegister); ad.RegisterReferent(mail); mail.AddOccurenceOfRefTok(mt); } int i0 = i; Pullenti.Ner.Token t2 = null; int err = 0; for (i = lines.Count - 1; i >= i0; i--) { Pullenti.Ner.Mail.Internal.MailLine li = lines[i]; if (li.Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.BestRegards) { t2 = lines[i].BeginToken; for (--i; i >= i0; i--) { if (lines[i].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.BestRegards && (lines[i].Words < 2)) { t2 = lines[i].BeginToken; } else if ((i > i0 && (lines[i].Words < 3) && lines[i - 1].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.BestRegards) && (lines[i - 1].Words < 2)) { i--; t2 = lines[i].BeginToken; } else { break; } } break; } if (li.Refs.Count > 0 && (li.Words < 3) && i > i0) { err = 0; t2 = li.BeginToken; continue; } if (li.Words > 10) { t2 = null; continue; } if (li.Words > 2) { if ((++err) > 2) { t2 = null; } } } if (t2 == null) { for (i = lines.Count - 1; i >= i0; i--) { Pullenti.Ner.Mail.Internal.MailLine li = lines[i]; if (li.Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.Undefined) { if (li.Refs.Count > 0 && (li.Refs[0] is Pullenti.Ner.Person.PersonReferent)) { if (li.Words == 0 && i > i0) { t2 = li.BeginToken; break; } } } } } for (int ii = i0; ii < lines.Count; ii++) { if (lines[ii].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.Hello) { MailReferent mail = new MailReferent() { Kind = MailKind.Hello }; Pullenti.Ner.ReferentToken mt = new Pullenti.Ner.ReferentToken(mail, lines[i0].BeginToken, lines[ii].EndToken); if (mt.LengthChar > 0) { mail.Text = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(mt, Pullenti.Ner.Core.GetTextAttr.KeepRegister); ad.RegisterReferent(mail); mail.AddOccurenceOfRefTok(mt); i0 = ii + 1; } break; } else if (lines[ii].Typ != Pullenti.Ner.Mail.Internal.MailLine.Types.Undefined || lines[ii].Words > 0 || lines[ii].Refs.Count > 0) { break; } } if (i0 < lines.Count) { if (t2 != null && t2.Previous == null) { } else { MailReferent mail = new MailReferent() { Kind = MailKind.Body }; Pullenti.Ner.ReferentToken mt = new Pullenti.Ner.ReferentToken(mail, lines[i0].BeginToken, (t2 != null && t2.Previous != null ? t2.Previous : lines[lines.Count - 1].EndToken)); if (mt.LengthChar > 0) { mail.Text = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(mt, Pullenti.Ner.Core.GetTextAttr.KeepRegister); ad.RegisterReferent(mail); mail.AddOccurenceOfRefTok(mt); } } if (t2 != null) { MailReferent mail = new MailReferent() { Kind = MailKind.Tail }; Pullenti.Ner.ReferentToken mt = new Pullenti.Ner.ReferentToken(mail, t2, lines[lines.Count - 1].EndToken); if (mt.LengthChar > 0) { mail.Text = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(mt, Pullenti.Ner.Core.GetTextAttr.KeepRegister); ad.RegisterReferent(mail); mail.AddOccurenceOfRefTok(mt); } for (i = i0; i < lines.Count; i++) { if (lines[i].BeginChar >= t2.BeginChar) { foreach (Pullenti.Ner.Referent r in lines[i].Refs) { mail.AddRef(r, 0); } } } } } } }
public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this); Pullenti.Ner.Core.TerminCollection addunits = null; if (kit.Ontology != null) { addunits = new Pullenti.Ner.Core.TerminCollection(); foreach (Pullenti.Ner.ExtOntologyItem r in kit.Ontology.Items) { UnitReferent uu = r.Referent as UnitReferent; if (uu == null) { continue; } if (uu.m_Unit != null) { continue; } foreach (Pullenti.Ner.Slot s in uu.Slots) { if (s.TypeName == UnitReferent.ATTR_NAME || s.TypeName == UnitReferent.ATTR_FULLNAME) { addunits.Add(new Pullenti.Ner.Core.Termin(s.Value as string) { Tag = uu }); } } } } for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { Pullenti.Ner.Measure.Internal.MeasureToken mt = Pullenti.Ner.Measure.Internal.MeasureToken.TryParseMinimal(t, addunits, false); if (mt == null) { mt = Pullenti.Ner.Measure.Internal.MeasureToken.TryParse(t, addunits, true, false, false, false); } if (mt == null) { continue; } List <Pullenti.Ner.ReferentToken> rts = mt.CreateRefenetsTokensWithRegister(ad, true); if (rts == null) { continue; } for (int i = 0; i < rts.Count; i++) { Pullenti.Ner.ReferentToken rt = rts[i]; t.Kit.EmbedToken(rt); t = rt; for (int j = i + 1; j < rts.Count; j++) { if (rts[j].BeginToken == rt.BeginToken) { rts[j].BeginToken = t; } if (rts[j].EndToken == rt.EndToken) { rts[j].EndToken = t; } } } } if (kit.Ontology != null) { foreach (Pullenti.Ner.Referent e in ad.Referents) { UnitReferent u = e as UnitReferent; if (u == null) { continue; } foreach (Pullenti.Ner.ExtOntologyItem r in kit.Ontology.Items) { UnitReferent uu = r.Referent as UnitReferent; if (uu == null) { continue; } bool ok = false; foreach (Pullenti.Ner.Slot s in uu.Slots) { if (s.TypeName == UnitReferent.ATTR_NAME || s.TypeName == UnitReferent.ATTR_FULLNAME) { if (u.FindSlot(null, s.Value, true) != null) { ok = true; break; } } } if (ok) { u.OntologyItems = new List <Pullenti.Ner.ExtOntologyItem>(); u.OntologyItems.Add(r); break; } } } } }
public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { PhoneAnalizerData ad = kit.GetAnalyzerData(this) as PhoneAnalizerData; for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { List<Pullenti.Ner.Phone.Internal.PhoneItemToken> pli = Pullenti.Ner.Phone.Internal.PhoneItemToken.TryAttachAll(t, 15); if (pli == null || pli.Count == 0) continue; PhoneReferent prevPhone = null; int kkk = 0; for (Pullenti.Ner.Token tt = t.Previous; tt != null; tt = tt.Previous) { if (tt.GetReferent() is PhoneReferent) { prevPhone = tt.GetReferent() as PhoneReferent; break; } else if (tt is Pullenti.Ner.ReferentToken) { } else if (tt.IsChar(')')) { Pullenti.Ner.Token ttt = tt.Previous; int cou = 0; for (; ttt != null; ttt = ttt.Previous) { if (ttt.IsChar('(')) break; else if ((++cou) > 100) break; } if (ttt == null || !ttt.IsChar('(')) break; tt = ttt; } else if (!tt.IsCharOf(",;/\\") && !tt.IsAnd) { if ((++kkk) > 5) break; if (tt.IsNewlineBefore || tt.IsNewlineAfter) break; } } int j = 0; bool isPhoneBefore = false; bool isPref = false; PhoneKind ki = PhoneKind.Undefined; while (j < pli.Count) { if (pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Prefix) { if (ki == PhoneKind.Undefined) ki = pli[j].Kind; isPref = true; isPhoneBefore = true; j++; if ((j < pli.Count) && pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Delim) j++; } else if (((j + 1) < pli.Count) && pli[j + 1].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Prefix && j == 0) { if (ki == PhoneKind.Undefined) ki = pli[0].Kind; isPref = true; pli.RemoveAt(0); } else break; } if (prevPhone != null) isPhoneBefore = true; if (pli.Count == 1 && pli[0].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Number) { Pullenti.Ner.Token tt = t.Previous; if ((tt is Pullenti.Ner.TextToken) && !tt.Chars.IsLetter) tt = tt.Previous; if (tt is Pullenti.Ner.TextToken) { if (Pullenti.Ner.Uri.UriAnalyzer.m_Schemes.TryParse(tt, Pullenti.Ner.Core.TerminParseAttr.No) != null) continue; } } List<Pullenti.Ner.ReferentToken> rts = this.TryAttach(pli, j, isPhoneBefore, prevPhone); if (rts == null) { for (j = 1; j < pli.Count; j++) { if (pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Prefix) { pli.RemoveRange(0, j); rts = this.TryAttach(pli, 1, true, prevPhone); break; } } } if (rts == null) t = pli[pli.Count - 1].EndToken; else { if ((ki == PhoneKind.Undefined && prevPhone != null && !isPref) && prevPhone.Kind != PhoneKind.Mobile && kkk == 0) ki = prevPhone.Kind; foreach (Pullenti.Ner.ReferentToken rt in rts) { PhoneReferent ph = rt.Referent as PhoneReferent; if (ki != PhoneKind.Undefined) ph.Kind = ki; else { if (rt == rts[0] && (rt.WhitespacesBeforeCount < 3)) { Pullenti.Ner.Token tt1 = rt.BeginToken.Previous; if (tt1 != null && tt1.IsTableControlChar) tt1 = tt1.Previous; if ((tt1 is Pullenti.Ner.TextToken) && ((tt1.IsNewlineBefore || ((tt1.Previous != null && tt1.Previous.IsTableControlChar))))) { string term = (tt1 as Pullenti.Ner.TextToken).Term; if (term == "T" || term == "Т") rt.BeginToken = tt1; else if (term == "Ф" || term == "F") { ph.Kind = (ki = PhoneKind.Fax); rt.BeginToken = tt1; } else if (term == "M" || term == "М") { ph.Kind = (ki = PhoneKind.Mobile); rt.BeginToken = tt1; } } } ph.Correct(); } rt.Referent = ad.RegisterReferent(rt.Referent); kit.EmbedToken(rt); t = rt; } } } }
public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this); Pullenti.Ner.Core.TerminCollection models = new Pullenti.Ner.Core.TerminCollection(); Dictionary <string, List <Pullenti.Ner.Referent> > objsByModel = new Dictionary <string, List <Pullenti.Ner.Referent> >(); Pullenti.Ner.Core.TerminCollection objByNames = new Pullenti.Ner.Core.TerminCollection(); for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { List <Pullenti.Ner.Weapon.Internal.WeaponItemToken> its = Pullenti.Ner.Weapon.Internal.WeaponItemToken.TryParseList(t, 10); if (its == null) { continue; } List <Pullenti.Ner.ReferentToken> rts = this.TryAttach(its, false); if (rts != null) { foreach (Pullenti.Ner.ReferentToken rt in rts) { rt.Referent = ad.RegisterReferent(rt.Referent); kit.EmbedToken(rt); t = rt; foreach (Pullenti.Ner.Slot s in rt.Referent.Slots) { if (s.TypeName == WeaponReferent.ATTR_MODEL) { string mod = s.Value.ToString(); for (int k = 0; k < 2; k++) { if (!char.IsDigit(mod[0])) { List <Pullenti.Ner.Referent> li; if (!objsByModel.TryGetValue(mod, out li)) { objsByModel.Add(mod, (li = new List <Pullenti.Ner.Referent>())); } if (!li.Contains(rt.Referent)) { li.Add(rt.Referent); } models.AddString(mod, li, null, false); } if (k > 0) { break; } string brand = rt.Referent.GetStringValue(WeaponReferent.ATTR_BRAND); if (brand == null) { break; } mod = string.Format("{0} {1}", brand, mod); } } else if (s.TypeName == WeaponReferent.ATTR_NAME) { objByNames.Add(new Pullenti.Ner.Core.Termin(s.Value.ToString()) { Tag = rt.Referent }); } } } } } if (objsByModel.Count == 0 && objByNames.Termins.Count == 0) { return; } for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 10); if (br != null) { Pullenti.Ner.Core.TerminToken toks = objByNames.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No); if (toks != null && toks.EndToken.Next == br.EndToken) { Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(toks.Termin.Tag as Pullenti.Ner.Referent, br.BeginToken, br.EndToken); kit.EmbedToken(rt0); t = rt0; continue; } } if (!(t is Pullenti.Ner.TextToken)) { continue; } if (!t.Chars.IsLetter) { continue; } Pullenti.Ner.Core.TerminToken tok = models.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok == null) { if (!t.Chars.IsAllLower) { tok = objByNames.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); } if (tok == null) { continue; } } if (!tok.IsWhitespaceAfter) { if (tok.EndToken.Next == null || !tok.EndToken.Next.IsCharOf(",.)")) { if (!Pullenti.Ner.Core.BracketHelper.IsBracket(tok.EndToken.Next, false)) { continue; } } } Pullenti.Ner.Referent tr = null; List <Pullenti.Ner.Referent> li = tok.Termin.Tag as List <Pullenti.Ner.Referent>; if (li != null && li.Count == 1) { tr = li[0]; } else { tr = tok.Termin.Tag as Pullenti.Ner.Referent; } if (tr != null) { Pullenti.Ner.Weapon.Internal.WeaponItemToken tit = Pullenti.Ner.Weapon.Internal.WeaponItemToken.TryParse(tok.BeginToken.Previous, null, false, true); if (tit != null && tit.Typ == Pullenti.Ner.Weapon.Internal.WeaponItemToken.Typs.Brand) { tr.AddSlot(WeaponReferent.ATTR_BRAND, tit.Value, false, 0); tok.BeginToken = tit.BeginToken; } Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(tr, tok.BeginToken, tok.EndToken); kit.EmbedToken(rt0); t = rt0; continue; } } }
public static void RefreshGenerals(Pullenti.Ner.Processor proc, Pullenti.Ner.Core.AnalysisKit kit) { Dictionary <string, Dictionary <string, List <Pullenti.Ner.Referent> > > all = new Dictionary <string, Dictionary <string, List <Pullenti.Ner.Referent> > >(); List <Node> allRefs = new List <Node>(); foreach (Pullenti.Ner.Analyzer a in proc.Analyzers) { Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(a); if (ad == null) { continue; } foreach (Pullenti.Ner.Referent r in ad.Referents) { Node nod = new Node() { Ref = r, Ad = ad }; allRefs.Add(nod); r.Tag = nod; Dictionary <string, List <Pullenti.Ner.Referent> > si; if (!all.TryGetValue(a.Name, out si)) { all.Add(a.Name, (si = new Dictionary <string, List <Pullenti.Ner.Referent> >())); } List <string> strs = r.GetCompareStrings(); if (strs == null || strs.Count == 0) { continue; } foreach (string s in strs) { if (s == null) { continue; } List <Pullenti.Ner.Referent> li; if (!si.TryGetValue(s, out li)) { si.Add(s, (li = new List <Pullenti.Ner.Referent>())); } li.Add(r); } } } foreach (Node r in allRefs) { foreach (Pullenti.Ner.Slot s in r.Ref.Slots) { if (s.Value is Pullenti.Ner.Referent) { Pullenti.Ner.Referent to = s.Value as Pullenti.Ner.Referent; Node tn = to.Tag as Node; if (tn == null) { continue; } if (tn.RefsFrom == null) { tn.RefsFrom = new List <Node>(); } tn.RefsFrom.Add(r); if (r.RefsTo == null) { r.RefsTo = new List <Node>(); } r.RefsTo.Add(tn); } } } foreach (Dictionary <string, List <Pullenti.Ner.Referent> > ty in all.Values) { foreach (List <Pullenti.Ner.Referent> li in ty.Values) { if (li.Count < 2) { continue; } if (li.Count > 3000) { continue; } for (int i = 0; i < li.Count; i++) { for (int j = i + 1; j < li.Count; j++) { Node n1 = null; Node n2 = null; if (li[i].CanBeGeneralFor(li[j]) && !li[j].CanBeGeneralFor(li[i])) { n1 = li[i].Tag as Node; n2 = li[j].Tag as Node; } else if (li[j].CanBeGeneralFor(li[i]) && !li[i].CanBeGeneralFor(li[j])) { n1 = li[j].Tag as Node; n2 = li[i].Tag as Node; } if (n1 != null && n2 != null) { if (n1.GenFrom == null) { n1.GenFrom = new List <Node>(); } if (!n1.GenFrom.Contains(n2)) { n1.GenFrom.Add(n2); } if (n2.GenTo == null) { n2.GenTo = new List <Node>(); } if (!n2.GenTo.Contains(n1)) { n2.GenTo.Add(n1); } } } } } } foreach (Node n in allRefs) { if (n.GenTo != null && n.GenTo.Count > 1) { for (int i = n.GenTo.Count - 1; i >= 0; i--) { Node p = n.GenTo[i]; bool del = false; for (int j = 0; j < n.GenTo.Count; j++) { if (j != i && n.GenTo[j].IsInGenParentsOrHigher(p)) { del = true; } } if (del) { p.GenFrom.Remove(n); n.GenTo.RemoveAt(i); } } } } foreach (Node n in allRefs) { if (!n.Deleted && n.GenTo != null && n.GenTo.Count == 1) { Node p = n.GenTo[0]; if (p.GenFrom.Count == 1) { n.Ref.MergeSlots(p.Ref, true); p.Ref.Tag = n.Ref; p.ReplaceValues(n); foreach (Pullenti.Ner.TextAnnotation o in p.Ref.Occurrence) { n.Ref.AddOccurence(o); } p.Deleted = true; } else { n.Ref.GeneralReferent = p.Ref; } } } for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { _correctReferents(t); } foreach (Node n in allRefs) { if (n.Deleted) { n.Ad.RemoveReferent(n.Ref); } n.Ref.Tag = null; } }
// Основная функция выделения телефонов public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this); bool hasDenoms = false; foreach (Pullenti.Ner.Analyzer a in kit.Processor.Analyzers) { if ((a is Pullenti.Ner.Denomination.DenominationAnalyzer) && !a.IgnoreThisAnalyzer) { hasDenoms = true; } } if (!hasDenoms) { Pullenti.Ner.Denomination.DenominationAnalyzer a = new Pullenti.Ner.Denomination.DenominationAnalyzer(); a.Process(kit); } List <KeywordReferent> li = new List <KeywordReferent>(); StringBuilder tmp = new StringBuilder(); List <string> tmp2 = new List <string>(); int max = 0; for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { max++; } int cur = 0; for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next, cur++) { Pullenti.Ner.Referent r = t.GetReferent(); if (r != null) { t = this._addReferents(ad, t, cur, max); continue; } if (!(t is Pullenti.Ner.TextToken)) { continue; } if (!t.Chars.IsLetter || (t.LengthChar < 3)) { continue; } string term = (t as Pullenti.Ner.TextToken).Term; if (term == "ЕСТЬ") { if ((t.Previous is Pullenti.Ner.TextToken) && t.Previous.Morph.Class.IsVerb) { } else { continue; } } Pullenti.Ner.Core.NounPhraseToken npt = null; npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.AdjectiveCanBeLast | Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition, 0, null); if (npt == null) { Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary(); if (mc.IsVerb && !mc.IsPreposition) { if ((t as Pullenti.Ner.TextToken).IsVerbBe) { continue; } if (t.IsValue("МОЧЬ", null) || t.IsValue("WOULD", null)) { continue; } KeywordReferent kref = new KeywordReferent() { Typ = KeywordType.Predicate }; string norm = t.GetNormalCaseText(Pullenti.Morph.MorphClass.Verb, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false); if (norm == null) { norm = (t as Pullenti.Ner.TextToken).Lemma; } if (norm.EndsWith("ЬСЯ")) { norm = norm.Substring(0, norm.Length - 2); } kref.AddSlot(KeywordReferent.ATTR_VALUE, norm, false, 0); List <Pullenti.Semantic.Utils.DerivateGroup> drv = Pullenti.Semantic.Utils.DerivateService.FindDerivates(norm, true, t.Morph.Language); _addNormals(kref, drv, norm); kref = ad.RegisterReferent(kref) as KeywordReferent; _setRank(kref, cur, max); Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(ad.RegisterReferent(kref), t, t) { Morph = t.Morph }; kit.EmbedToken(rt1); t = rt1; continue; } continue; } if (npt.InternalNoun != null) { continue; } if (npt.EndToken.IsValue("ЦЕЛОМ", null) || npt.EndToken.IsValue("ЧАСТНОСТИ", null)) { if (npt.Preposition != null) { t = npt.EndToken; continue; } } if (npt.EndToken.IsValue("СТОРОНЫ", null) && npt.Preposition != null && npt.Preposition.Normal == "С") { t = npt.EndToken; continue; } if (npt.BeginToken == npt.EndToken) { Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary(); if (mc.IsPreposition) { continue; } else if (mc.IsAdverb) { if (t.IsValue("ПОТОМ", null)) { continue; } } } else { } li.Clear(); Pullenti.Ner.Token t0 = t; for (Pullenti.Ner.Token tt = t; tt != null && tt.EndChar <= npt.EndChar; tt = tt.Next) { if (!(tt is Pullenti.Ner.TextToken)) { continue; } if (tt.IsValue("NATURAL", null)) { } if ((tt.LengthChar < 3) || !tt.Chars.IsLetter) { continue; } Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary(); if ((mc.IsPreposition || mc.IsPronoun || mc.IsPersonalPronoun) || mc.IsConjunction) { if (tt.IsValue("ОТНОШЕНИЕ", null)) { } else { continue; } } if (mc.IsMisc) { if (Pullenti.Ner.Core.MiscHelper.IsEngArticle(tt)) { continue; } } KeywordReferent kref = new KeywordReferent() { Typ = KeywordType.Object }; string norm = (tt as Pullenti.Ner.TextToken).Lemma; kref.AddSlot(KeywordReferent.ATTR_VALUE, norm, false, 0); if (norm != "ЕСТЬ") { List <Pullenti.Semantic.Utils.DerivateGroup> drv = Pullenti.Semantic.Utils.DerivateService.FindDerivates(norm, true, tt.Morph.Language); _addNormals(kref, drv, norm); } kref = ad.RegisterReferent(kref) as KeywordReferent; _setRank(kref, cur, max); Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(kref, tt, tt) { Morph = tt.Morph }; kit.EmbedToken(rt1); if (tt == t && li.Count == 0) { t0 = rt1; } t = rt1; li.Add(kref); } if (li.Count > 1) { KeywordReferent kref = new KeywordReferent() { Typ = KeywordType.Object }; tmp.Length = 0; tmp2.Clear(); bool hasNorm = false; foreach (KeywordReferent kw in li) { string s = kw.GetStringValue(KeywordReferent.ATTR_VALUE); if (tmp.Length > 0) { tmp.Append(' '); } tmp.Append(s); string n = kw.GetStringValue(KeywordReferent.ATTR_NORMAL); if (n != null) { hasNorm = true; tmp2.Add(n); } else { tmp2.Add(s); } kref.AddSlot(KeywordReferent.ATTR_REF, kw, false, 0); } string val = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false); kref.AddSlot(KeywordReferent.ATTR_VALUE, val, false, 0); tmp.Length = 0; tmp2.Sort(); foreach (string s in tmp2) { if (tmp.Length > 0) { tmp.Append(' '); } tmp.Append(s); } string norm = tmp.ToString(); if (norm != val) { kref.AddSlot(KeywordReferent.ATTR_NORMAL, norm, false, 0); } kref = ad.RegisterReferent(kref) as KeywordReferent; _setRank(kref, cur, max); Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(kref, t0, t) { Morph = npt.Morph }; kit.EmbedToken(rt1); t = rt1; } } cur = 0; for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next, cur++) { KeywordReferent kw = t.GetReferent() as KeywordReferent; if (kw == null || kw.Typ != KeywordType.Object) { continue; } if (t.Next == null || kw.ChildWords > 2) { continue; } Pullenti.Ner.Token t1 = t.Next; if (t1.IsValue("OF", null) && (t1.WhitespacesAfterCount < 3) && t1.Next != null) { t1 = t1.Next; if ((t1 is Pullenti.Ner.TextToken) && Pullenti.Ner.Core.MiscHelper.IsEngArticle(t1) && t1.Next != null) { t1 = t1.Next; } } else if (!t1.Morph.Case.IsGenitive || t.WhitespacesAfterCount > 1) { continue; } KeywordReferent kw2 = t1.GetReferent() as KeywordReferent; if (kw2 == null) { continue; } if (kw == kw2) { continue; } if (kw2.Typ != KeywordType.Object || (kw.ChildWords + kw2.ChildWords) > 3) { continue; } KeywordReferent kwUn = new KeywordReferent(); kwUn.Union(kw, kw2, Pullenti.Ner.Core.MiscHelper.GetTextValue(t1, t1, Pullenti.Ner.Core.GetTextAttr.No)); kwUn = ad.RegisterReferent(kwUn) as KeywordReferent; _setRank(kwUn, cur, max); Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(kwUn, t, t1) { Morph = t.Morph }; kit.EmbedToken(rt1); t = rt1; } if (SortKeywordsByRank) { List <Pullenti.Ner.Referent> all = new List <Pullenti.Ner.Referent>(ad.Referents); all.Sort(new CompByRank()); ad.Referents = all; } if (AnnotationMaxSentences > 0) { KeywordReferent ano = Pullenti.Ner.Keyword.Internal.AutoannoSentToken.CreateAnnotation(kit, AnnotationMaxSentences); if (ano != null) { ad.RegisterReferent(ano); } } }
void _process2(Pullenti.Ner.Core.AnalysisKit kit, AnalysisResult ar, bool noLog) { string msg; Stopwatch sw = Stopwatch.StartNew(); bool stopByTimeout = false; List <Analyzer> anals = new List <Analyzer>(m_Analyzers); for (int ii = 0; ii < anals.Count; ii++) { Analyzer c = anals[ii]; if (c.IgnoreThisAnalyzer) { continue; } if (m_Breaked) { if (!noLog) { msg = "Процесс прерван пользователем"; this.OnMessage(msg); ar.Log.Add(msg); } break; } if (TimeoutSeconds > 0 && !stopByTimeout) { if (((DateTime.Now - kit.StartDate)).TotalSeconds > TimeoutSeconds) { m_Breaked = true; if (!noLog) { msg = "Процесс прерван по таймауту"; this.OnMessage(msg); ar.Log.Add(msg); } stopByTimeout = true; } } if (stopByTimeout) { if (c.Name == "INSTRUMENT") { } else { continue; } } if (!noLog) { this.OnProgressHandler(c, new ProgressChangedEventArgs(0, string.Format("Работа \"{0}\"", c.Caption))); } try { sw.Reset(); sw.Start(); c.Process(kit); sw.Stop(); Pullenti.Ner.Core.AnalyzerData dat = kit.GetAnalyzerData(c); if (!noLog) { msg = string.Format("Анализатор \"{0}\" выделил {1} объект(ов) за {2}", c.Caption, (dat == null ? 0 : dat.Referents.Count), OutSecs(sw.ElapsedMilliseconds)); this.OnMessage(msg); ar.Log.Add(msg); } } catch (Exception ex) { if (!noLog) { ex = new Exception(string.Format("Ошибка в анализаторе \"{0}\" ({1})", c.Caption, ex.Message), ex); this.OnMessage(ex); ar.AddException(ex); } } } if (!noLog) { this.OnProgressHandler(null, new ProgressChangedEventArgs(0, "Пересчёт отношений обобщения")); } try { sw.Reset(); sw.Start(); Pullenti.Ner.Core.Internal.GeneralRelationHelper.RefreshGenerals(this, kit); sw.Stop(); if (!noLog) { msg = string.Format("Отношение обобщение пересчитано за {0}", OutSecs(sw.ElapsedMilliseconds)); this.OnMessage(msg); ar.Log.Add(msg); } } catch (Exception ex) { if (!noLog) { ex = new Exception("Ошибка пересчёта отношения обобщения", ex); this.OnMessage(ex); ar.AddException(ex); } } }