protected bool OnProgress(int pos, int max, Pullenti.Ner.Core.AnalysisKit kit) { bool ret = true; if (Progress != null) { if (pos >= 0 && pos <= max && max > 0) { int percent = pos; if (max > 1000000) { percent /= ((max / 1000)); } else { percent = ((100 * percent)) / max; } if (percent != lastPercent) { ProgressChangedEventArgs arg = new ProgressChangedEventArgs((int)percent, null); Progress(this, arg) /* error */; if (Cancel != null) { CancelEventArgs cea = new CancelEventArgs(); Cancel(kit, cea) /* error */; ret = !cea.Cancel; } } lastPercent = percent; } } return(ret); }
public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this); for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { Pullenti.Ner.ReferentToken rt = null; if (t.Chars.IsLetter) { Pullenti.Ner.Core.TerminToken tok = m_Ontology.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { Pullenti.Ner.Token tt = tok.EndToken.Next; if (tt != null && tt.IsChar(':')) { tt = tt.Next; } rt = this.TryAttach(tt, true); if (rt != null) { rt.BeginToken = t; } } } if (rt == null && (((t is Pullenti.Ner.ReferentToken) || t.IsNewlineBefore))) { rt = this.TryAttach(t, false); } if (rt != null) { rt.Referent = ad.RegisterReferent(rt.Referent); kit.EmbedToken(rt); t = rt; } } }
public ReferentToken(Referent entity, Token begin, Token end, Pullenti.Ner.Core.AnalysisKit kit = null) : base(begin, end, kit) { Referent = entity; if (Morph == null) { Morph = new MorphCollection(); } }
internal override void Deserialize(Stream stream, Pullenti.Ner.Core.AnalysisKit kit, int vers) { base.Deserialize(stream, kit, vers); Term = Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeString(stream); Lemma = Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeString(stream); InvariantPrefixLengthOfMorphVars = Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeShort(stream); MaxLengthOfMorphVars = Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeShort(stream); }
public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this); int delta = 100000; int parts = (((kit.Sofa.Text.Length + delta) - 1)) / delta; if (parts == 0) { parts = 1; } int cur = 0; int nextPos = 0; List <GoodReferent> goods = new List <GoodReferent>(); for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { if (!t.IsNewlineBefore) { continue; } if (t.BeginChar > nextPos) { nextPos += delta; cur++; if (!this.OnProgress(cur, parts, kit)) { break; } } if (!t.Chars.IsLetter && t.Next != null) { t = t.Next; } List <Pullenti.Ner.ReferentToken> rts = Pullenti.Ner.Goods.Internal.GoodAttrToken.TryParseList(t); if (rts == null || rts.Count == 0) { continue; } GoodReferent good = new GoodReferent(); foreach (Pullenti.Ner.ReferentToken rt in rts) { rt.Referent = ad.RegisterReferent(rt.Referent); if (good.FindSlot(GoodReferent.ATTR_ATTR, rt.Referent, true) == null) { good.AddSlot(GoodReferent.ATTR_ATTR, rt.Referent, false, 0); } kit.EmbedToken(rt); } goods.Add(good); Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(good, rts[0], rts[rts.Count - 1]); kit.EmbedToken(rt0); t = rt0; } foreach (GoodReferent g in goods) { ad.Referents.Add(g); } }
internal override void Deserialize(Stream stream, Pullenti.Ner.Core.AnalysisKit kit, int vers) { base.Deserialize(stream, kit, vers); int id = Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeInt(stream); if (id > 0) { Referent = kit.Entities[id - 1]; } }
public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this); Pullenti.Ner.Token et; TitlePageReferent tpr = _process(kit.FirstToken, 0, kit, out et); if (tpr != null) { ad.RegisterReferent(tpr); } }
public static void Initialize() { if (m_Inited) { return; } m_Inited = true; string obj = ResourceHelper.GetString("ShortNames.txt"); if (obj != null) { Pullenti.Ner.Core.AnalysisKit kit = new Pullenti.Ner.Core.AnalysisKit(new Pullenti.Ner.SourceOfAnalysis(obj)); for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { if (t.IsNewlineBefore) { Pullenti.Morph.MorphGender g = (t.IsValue("F", null) ? Pullenti.Morph.MorphGender.Feminie : Pullenti.Morph.MorphGender.Masculine); t = t.Next; string nam = (t as Pullenti.Ner.TextToken).Term; List <string> shos = new List <string>(); for (t = t.Next; t != null; t = t.Next) { if (t.IsNewlineBefore) { break; } else { shos.Add((t as Pullenti.Ner.TextToken).Term); } } foreach (string s in shos) { List <ShortnameVar> li = null; if (!m_Shorts_Names.TryGetValue(s, out li)) { m_Shorts_Names.Add(s, (li = new List <ShortnameVar>())); } li.Add(new ShortnameVar() { Name = nam, Gender = g }); } if (t == null) { break; } t = t.Previous; } } } }
internal virtual void Deserialize(Stream stream, Pullenti.Ner.Core.AnalysisKit kit, int vers) { Kit = kit; m_BeginChar = Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeInt(stream); m_EndChar = Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeInt(stream); m_Attrs = (short)Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeInt(stream); Chars = new Pullenti.Morph.CharsInfo() { Value = (short)Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeInt(stream) }; m_Morph = new MorphCollection(); m_Morph.Deserialize(stream); }
public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this); for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { Pullenti.Ner.ReferentToken mon = TryParse(t); if (mon != null) { mon.Referent = ad.RegisterReferent(mon.Referent); kit.EmbedToken(mon); t = mon; continue; } } }
/// <summary> /// Доделать результат, который был сделан другим процессором /// </summary> /// <param name="ar">то, что было сделано другим процессором</param> public void ProcessNext(AnalysisResult ar) { if (ar == null) { return; } Pullenti.Ner.Core.AnalysisKit kit = new Pullenti.Ner.Core.AnalysisKit() { Processor = this, Ontology = ar.Ontology }; kit.InitFrom(ar); this._process2(kit, ar, false); this._createRes(kit, ar, ar.Ontology, false); ar.FirstToken = kit.FirstToken; }
internal void _createRes(Pullenti.Ner.Core.AnalysisKit kit, AnalysisResult ar, ExtOntology extOntology, bool noLog) { Stopwatch sw = Stopwatch.StartNew(); int ontoAttached = 0; for (int k = 0; k < 2; k++) { foreach (Analyzer c in Analyzers) { if (k == 0) { if (!c.IsSpecific) { continue; } } else if (c.IsSpecific) { continue; } Pullenti.Ner.Core.AnalyzerData dat = kit.GetAnalyzerData(c); if (dat != null && dat.Referents.Count > 0) { if (extOntology != null) { foreach (Referent r in dat.Referents) { if (r.OntologyItems == null) { if ((((r.OntologyItems = extOntology.AttachReferent(r)))) != null) { ontoAttached++; } } } } ar.Entities.AddRange(dat.Referents); } } } sw.Stop(); if (extOntology != null && !noLog) { string msg = string.Format("Привязано {0} объектов к внешней отнологии ({1} элементов) за {2}", ontoAttached, extOntology.Items.Count, OutSecs(sw.ElapsedMilliseconds)); this.OnMessage(msg); ar.Log.Add(msg); } }
internal override void Deserialize(Stream stream, Pullenti.Ner.Core.AnalysisKit kit, int vers) { base.Deserialize(stream, kit, vers); if (vers == 0) { byte[] buf = new byte[(int)8]; stream.Read(buf, 0, 8); long lo = BitConverter.ToInt64(buf, 0); Value = lo.ToString(); } else { Value = Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeString(stream); } Typ = (NumberSpellingType)Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeInt(stream); }
public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Token t = kit.FirstToken; Pullenti.Ner.Token t1 = t; if (t == null) { return; } Pullenti.Ner.Instrument.Internal.FragToken dfr = Pullenti.Ner.Instrument.Internal.FragToken.CreateDocument(t, 0, InstrumentKind.Undefined); if (dfr == null) { return; } Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this); InstrumentBlockReferent res = dfr.CreateReferent(ad); }
public static void Process(Pullenti.Ner.Core.AnalysisKit kit, Pullenti.Ner.Core.AnalyzerData ad) { for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { if (!Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t)) { continue; } Pullenti.Ner.ReferentToken rt = TryParseThesis(t); if (rt == null) { continue; } rt.Referent = ad.RegisterReferent(rt.Referent); kit.EmbedToken(rt); t = rt; } }
public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Core.AnalyzerDataWithOntology ad = kit.GetAnalyzerData(this) as Pullenti.Ner.Core.AnalyzerDataWithOntology; for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { List <Pullenti.Ner.Named.Internal.NamedItemToken> li = Pullenti.Ner.Named.Internal.NamedItemToken.TryParseList(t, ad.LocalOntology); if (li == null || li.Count == 0) { continue; } Pullenti.Ner.ReferentToken rt = _tryAttach(li); if (rt != null) { rt.Referent = ad.RegisterReferent(rt.Referent); kit.EmbedToken(rt); t = rt; continue; } } }
public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this); int delta = 100000; int parts = (((kit.Sofa.Text.Length + delta) - 1)) / delta; if (parts == 0) { parts = 1; } int cur = 0; int nextPos = 0; for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { if (t.BeginChar > nextPos) { nextPos += delta; cur++; if (!this.OnProgress(cur, parts, kit)) { break; } } Pullenti.Ner.Goods.Internal.GoodAttrToken at = Pullenti.Ner.Goods.Internal.GoodAttrToken.TryParse(t, null, true, true); if (at == null) { continue; } GoodAttributeReferent attr = at._createAttr(); if (attr == null) { t = at.EndToken; continue; } Pullenti.Ner.ReferentToken rt = new Pullenti.Ner.ReferentToken(attr, at.BeginToken, at.EndToken); rt.Referent = ad.RegisterReferent(attr); kit.EmbedToken(rt); t = rt; } }
static Pullenti.Ner.Token DeserializeToken(Stream stream, Pullenti.Ner.Core.AnalysisKit kit, int vers) { short typ = DeserializeShort(stream); if (typ == 0) { return(null); } Pullenti.Ner.Token t = null; if (typ == 1) { t = new Pullenti.Ner.TextToken(null, kit); } else if (typ == 2) { t = new Pullenti.Ner.NumberToken(null, null, null, Pullenti.Ner.NumberSpellingType.Digit, kit); } else if (typ == 3) { t = new Pullenti.Ner.ReferentToken(null, null, null, kit); } else { t = new Pullenti.Ner.MetaToken(null, null, kit); } t.Deserialize(stream, kit, vers); if (t is Pullenti.Ner.MetaToken) { Pullenti.Ner.Token tt = DeserializeTokens(stream, kit, vers); if (tt != null) { (t as Pullenti.Ner.MetaToken).m_BeginToken = tt; for (; tt != null; tt = tt.Next) { (t as Pullenti.Ner.MetaToken).m_EndToken = tt; } } } return(t); }
public static Pullenti.Ner.Token DeserializeTokens(Stream stream, Pullenti.Ner.Core.AnalysisKit kit, int vers) { int cou = DeserializeInt(stream); if (cou == 0) { return(null); } Pullenti.Ner.Token res = null; Pullenti.Ner.Token prev = null; for (; cou > 0; cou--) { Pullenti.Ner.Token t = DeserializeToken(stream, kit, vers); if (t == null) { continue; } if (res == null) { res = t; } if (prev != null) { t.Previous = prev; } prev = t; } for (Pullenti.Ner.Token t = res; t != null; t = t.Next) { if (t is Pullenti.Ner.MetaToken) { _corrPrevNext(t as Pullenti.Ner.MetaToken, t.Previous, t.Next); } } return(res); }
public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { List <Pullenti.Ner.Mail.Internal.MailLine> lines = new List <Pullenti.Ner.Mail.Internal.MailLine>(); for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { Pullenti.Ner.Mail.Internal.MailLine ml = Pullenti.Ner.Mail.Internal.MailLine.Parse(t, 0, 0); if (ml == null) { continue; } if (lines.Count == 91) { } lines.Add(ml); t = ml.EndToken; } if (lines.Count == 0) { return; } int i; List <List <Pullenti.Ner.Mail.Internal.MailLine> > blocks = new List <List <Pullenti.Ner.Mail.Internal.MailLine> >(); List <Pullenti.Ner.Mail.Internal.MailLine> blk = null; for (i = 0; i < lines.Count; i++) { Pullenti.Ner.Mail.Internal.MailLine ml = lines[i]; if (ml.Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From) { bool isNew = ml.MustBeFirstLine || i == 0; if (((i + 2) < lines.Count) && (((lines[i + 1].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From || lines[i + 2].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From || lines[i + 1].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.Hello) || lines[i + 2].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.Hello))) { isNew = true; } if (!isNew) { for (int j = i - 1; j >= 0; j--) { if (lines[j].Typ != Pullenti.Ner.Mail.Internal.MailLine.Types.Undefined) { if (lines[j].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.BestRegards) { isNew = true; } break; } } } if (!isNew) { for (Pullenti.Ner.Token tt = ml.BeginToken; tt != null && tt.EndChar <= ml.EndChar; tt = tt.Next) { if (tt.GetReferent() != null) { if (tt.GetReferent().TypeName == "DATE" || tt.GetReferent().TypeName == "URI") { isNew = true; } } } } if (isNew) { blk = new List <Pullenti.Ner.Mail.Internal.MailLine>(); blocks.Add(blk); for (; i < lines.Count; i++) { if (lines[i].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From) { if (blk.Count > 0 && lines[i].MustBeFirstLine) { break; } blk.Add(lines[i]); } else if (((i + 1) < lines.Count) && lines[i + 1].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From) { int j; for (j = 0; j < blk.Count; j++) { if (blk[j].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From) { if (blk[j].IsRealFrom || blk[j].MustBeFirstLine || blk[j].MailAddr != null) { break; } } } if (j >= blk.Count) { blk.Add(lines[i]); continue; } bool ok = false; for (j = i + 1; j < lines.Count; j++) { if (lines[j].Typ != Pullenti.Ner.Mail.Internal.MailLine.Types.From) { break; } if (lines[j].IsRealFrom || lines[j].MustBeFirstLine) { ok = true; break; } if (lines[j].MailAddr != null) { ok = true; break; } } if (ok) { break; } blk.Add(lines[i]); } else { break; } } i--; continue; } } if (blk == null) { blocks.Add((blk = new List <Pullenti.Ner.Mail.Internal.MailLine>())); } blk.Add(lines[i]); } if (blocks.Count == 0) { return; } Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this); for (int j = 0; j < blocks.Count; j++) { lines = blocks[j]; if (lines.Count == 0) { continue; } i = 0; if (lines[0].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From) { Pullenti.Ner.Token t1 = lines[0].EndToken; for (; i < lines.Count; i++) { if (lines[i].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From) { t1 = lines[i].EndToken; } else if (((i + 1) < lines.Count) && lines[i + 1].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From) { } else { break; } } MailReferent mail = new MailReferent() { Kind = MailKind.Head }; Pullenti.Ner.ReferentToken mt = new Pullenti.Ner.ReferentToken(mail, lines[0].BeginToken, t1); mail.Text = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(mt, Pullenti.Ner.Core.GetTextAttr.KeepRegister); ad.RegisterReferent(mail); mail.AddOccurenceOfRefTok(mt); } int i0 = i; Pullenti.Ner.Token t2 = null; int err = 0; for (i = lines.Count - 1; i >= i0; i--) { Pullenti.Ner.Mail.Internal.MailLine li = lines[i]; if (li.Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.BestRegards) { t2 = lines[i].BeginToken; for (--i; i >= i0; i--) { if (lines[i].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.BestRegards && (lines[i].Words < 2)) { t2 = lines[i].BeginToken; } else if ((i > i0 && (lines[i].Words < 3) && lines[i - 1].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.BestRegards) && (lines[i - 1].Words < 2)) { i--; t2 = lines[i].BeginToken; } else { break; } } break; } if (li.Refs.Count > 0 && (li.Words < 3) && i > i0) { err = 0; t2 = li.BeginToken; continue; } if (li.Words > 10) { t2 = null; continue; } if (li.Words > 2) { if ((++err) > 2) { t2 = null; } } } if (t2 == null) { for (i = lines.Count - 1; i >= i0; i--) { Pullenti.Ner.Mail.Internal.MailLine li = lines[i]; if (li.Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.Undefined) { if (li.Refs.Count > 0 && (li.Refs[0] is Pullenti.Ner.Person.PersonReferent)) { if (li.Words == 0 && i > i0) { t2 = li.BeginToken; break; } } } } } for (int ii = i0; ii < lines.Count; ii++) { if (lines[ii].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.Hello) { MailReferent mail = new MailReferent() { Kind = MailKind.Hello }; Pullenti.Ner.ReferentToken mt = new Pullenti.Ner.ReferentToken(mail, lines[i0].BeginToken, lines[ii].EndToken); if (mt.LengthChar > 0) { mail.Text = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(mt, Pullenti.Ner.Core.GetTextAttr.KeepRegister); ad.RegisterReferent(mail); mail.AddOccurenceOfRefTok(mt); i0 = ii + 1; } break; } else if (lines[ii].Typ != Pullenti.Ner.Mail.Internal.MailLine.Types.Undefined || lines[ii].Words > 0 || lines[ii].Refs.Count > 0) { break; } } if (i0 < lines.Count) { if (t2 != null && t2.Previous == null) { } else { MailReferent mail = new MailReferent() { Kind = MailKind.Body }; Pullenti.Ner.ReferentToken mt = new Pullenti.Ner.ReferentToken(mail, lines[i0].BeginToken, (t2 != null && t2.Previous != null ? t2.Previous : lines[lines.Count - 1].EndToken)); if (mt.LengthChar > 0) { mail.Text = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(mt, Pullenti.Ner.Core.GetTextAttr.KeepRegister); ad.RegisterReferent(mail); mail.AddOccurenceOfRefTok(mt); } } if (t2 != null) { MailReferent mail = new MailReferent() { Kind = MailKind.Tail }; Pullenti.Ner.ReferentToken mt = new Pullenti.Ner.ReferentToken(mail, t2, lines[lines.Count - 1].EndToken); if (mt.LengthChar > 0) { mail.Text = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(mt, Pullenti.Ner.Core.GetTextAttr.KeepRegister); ad.RegisterReferent(mail); mail.AddOccurenceOfRefTok(mt); } for (i = i0; i < lines.Count; i++) { if (lines[i].BeginChar >= t2.BeginChar) { foreach (Pullenti.Ner.Referent r in lines[i].Refs) { mail.AddRef(r, 0); } } } } } } }
// Основная функция выделения объектов public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Core.AnalyzerDataWithOntology ad = kit.GetAnalyzerData(this) as Pullenti.Ner.Core.AnalyzerDataWithOntology; for (int k = 0; k < 2; k++) { bool detectNewDenoms = false; DateTime dt = DateTime.Now; for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { if (t.IsWhitespaceBefore) { } else if (t.Previous != null && ((t.Previous.IsCharOf(",") || Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(t.Previous, false, false)))) { } else { continue; } Pullenti.Ner.ReferentToken rt0 = this.TryAttachSpec(t); if (rt0 != null) { rt0.Referent = ad.RegisterReferent(rt0.Referent); kit.EmbedToken(rt0); t = rt0; continue; } if (!t.Chars.IsLetter) { continue; } if (!this.CanBeStartOfDenom(t)) { continue; } if (((DateTime.Now - dt)).TotalMinutes > 1) { break; } List <Pullenti.Ner.Core.IntOntologyToken> ot = null; ot = ad.LocalOntology.TryAttach(t, null, false); if (ot != null && (ot[0].Item.Referent is DenominationReferent)) { if (this.CheckAttach(ot[0].BeginToken, ot[0].EndToken)) { DenominationReferent cl = ot[0].Item.Referent.Clone() as DenominationReferent; cl.Occurrence.Clear(); Pullenti.Ner.ReferentToken rt = new Pullenti.Ner.ReferentToken(cl, ot[0].BeginToken, ot[0].EndToken); kit.EmbedToken(rt); t = rt; continue; } } if (k > 0) { continue; } if (t != null && t.Kit.Ontology != null) { if ((((ot = t.Kit.Ontology.AttachToken(DenominationReferent.OBJ_TYPENAME, t)))) != null) { if (this.CheckAttach(ot[0].BeginToken, ot[0].EndToken)) { DenominationReferent dr = new DenominationReferent(); dr.MergeSlots(ot[0].Item.Referent, true); Pullenti.Ner.ReferentToken rt = new Pullenti.Ner.ReferentToken(ad.RegisterReferent(dr), ot[0].BeginToken, ot[0].EndToken); kit.EmbedToken(rt); t = rt; continue; } } } rt0 = this.TryAttach(t, false); if (rt0 != null) { rt0.Referent = ad.RegisterReferent(rt0.Referent); kit.EmbedToken(rt0); detectNewDenoms = true; t = rt0; if (ad.LocalOntology.Items.Count > 1000) { break; } } } if (!detectNewDenoms) { break; } } }
/// <summary> /// Запустить анализ /// </summary> /// <param name="kit">контейнер с данными</param> public virtual void Process(Pullenti.Ner.Core.AnalysisKit kit) { }
public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this); Pullenti.Ner.Core.TerminCollection addunits = null; if (kit.Ontology != null) { addunits = new Pullenti.Ner.Core.TerminCollection(); foreach (Pullenti.Ner.ExtOntologyItem r in kit.Ontology.Items) { UnitReferent uu = r.Referent as UnitReferent; if (uu == null) { continue; } if (uu.m_Unit != null) { continue; } foreach (Pullenti.Ner.Slot s in uu.Slots) { if (s.TypeName == UnitReferent.ATTR_NAME || s.TypeName == UnitReferent.ATTR_FULLNAME) { addunits.Add(new Pullenti.Ner.Core.Termin(s.Value as string) { Tag = uu }); } } } } for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { Pullenti.Ner.Measure.Internal.MeasureToken mt = Pullenti.Ner.Measure.Internal.MeasureToken.TryParseMinimal(t, addunits, false); if (mt == null) { mt = Pullenti.Ner.Measure.Internal.MeasureToken.TryParse(t, addunits, true, false, false, false); } if (mt == null) { continue; } List <Pullenti.Ner.ReferentToken> rts = mt.CreateRefenetsTokensWithRegister(ad, true); if (rts == null) { continue; } for (int i = 0; i < rts.Count; i++) { Pullenti.Ner.ReferentToken rt = rts[i]; t.Kit.EmbedToken(rt); t = rt; for (int j = i + 1; j < rts.Count; j++) { if (rts[j].BeginToken == rt.BeginToken) { rts[j].BeginToken = t; } if (rts[j].EndToken == rt.EndToken) { rts[j].EndToken = t; } } } } if (kit.Ontology != null) { foreach (Pullenti.Ner.Referent e in ad.Referents) { UnitReferent u = e as UnitReferent; if (u == null) { continue; } foreach (Pullenti.Ner.ExtOntologyItem r in kit.Ontology.Items) { UnitReferent uu = r.Referent as UnitReferent; if (uu == null) { continue; } bool ok = false; foreach (Pullenti.Ner.Slot s in uu.Slots) { if (s.TypeName == UnitReferent.ATTR_NAME || s.TypeName == UnitReferent.ATTR_FULLNAME) { if (u.FindSlot(null, s.Value, true) != null) { ok = true; break; } } } if (ok) { u.OntologyItems = new List <Pullenti.Ner.ExtOntologyItem>(); u.OntologyItems.Add(r); break; } } } } }
internal AnalysisResult _process(SourceOfAnalysis text, bool ontoRegine, bool noLog, ExtOntology extOntology = null, Pullenti.Morph.MorphLang lang = null) { m_Breaked = false; this.PrepareProgress(); Stopwatch sw0 = Stopwatch.StartNew(); this.ManageReferentLinks(); if (!noLog) { this.OnProgressHandler(this, new ProgressChangedEventArgs(0, "Морфологический анализ")); } Pullenti.Ner.Core.AnalysisKit kit = new Pullenti.Ner.Core.AnalysisKit(text, false, lang, OnProgressHandler) { Ontology = extOntology, Processor = this, OntoRegime = ontoRegine }; AnalysisResult ar = new AnalysisResult(); sw0.Stop(); string msg; this.OnProgressHandler(this, new ProgressChangedEventArgs(100, string.Format("Морфологический анализ завершён"))); int k = 0; for (Token t = kit.FirstToken; t != null; t = t.Next) { k++; } if (!noLog) { msg = string.Format("Из {0} символов текста выделено {1} термов за {2} ms", text.Text.Length, k, sw0.ElapsedMilliseconds); if (!kit.BaseLanguage.IsUndefined) { msg += string.Format(", базовый язык {0}", kit.BaseLanguage.ToString()); } this.OnMessage(msg); ar.Log.Add(msg); if (text.CrlfCorrectedCount > 0) { ar.Log.Add(string.Format("{0} переходов на новую строку заменены на пробел", text.CrlfCorrectedCount)); } if (kit.FirstToken == null) { ar.Log.Add("Пустой текст"); } } sw0.Start(); if (kit.FirstToken != null) { this._process2(kit, ar, noLog); } if (!ontoRegine) { this._createRes(kit, ar, extOntology, noLog); } sw0.Stop(); if (!noLog) { if (sw0.ElapsedMilliseconds > 5000) { float f = (float)text.Text.Length; f /= sw0.ElapsedMilliseconds; msg = string.Format("Обработка {0} знаков выполнена за {1} ({2} Kb/sec)", text.Text.Length, OutSecs(sw0.ElapsedMilliseconds), f); } else { msg = string.Format("Обработка {0} знаков выполнена за {1}", text.Text.Length, OutSecs(sw0.ElapsedMilliseconds)); } this.OnMessage(msg); ar.Log.Add(msg); } if (TimeoutSeconds > 0) { if (((DateTime.Now - kit.StartDate)).TotalSeconds > TimeoutSeconds) { ar.IsTimeoutBreaked = true; } } ar.Sofa = text; if (!ontoRegine) { ar.Entities.AddRange(kit.Entities); } ar.FirstToken = kit.FirstToken; ar.Ontology = extOntology; ar.BaseLanguage = kit.BaseLanguage; return(ar); }
public static void RefreshGenerals(Pullenti.Ner.Processor proc, Pullenti.Ner.Core.AnalysisKit kit) { Dictionary <string, Dictionary <string, List <Pullenti.Ner.Referent> > > all = new Dictionary <string, Dictionary <string, List <Pullenti.Ner.Referent> > >(); List <Node> allRefs = new List <Node>(); foreach (Pullenti.Ner.Analyzer a in proc.Analyzers) { Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(a); if (ad == null) { continue; } foreach (Pullenti.Ner.Referent r in ad.Referents) { Node nod = new Node() { Ref = r, Ad = ad }; allRefs.Add(nod); r.Tag = nod; Dictionary <string, List <Pullenti.Ner.Referent> > si; if (!all.TryGetValue(a.Name, out si)) { all.Add(a.Name, (si = new Dictionary <string, List <Pullenti.Ner.Referent> >())); } List <string> strs = r.GetCompareStrings(); if (strs == null || strs.Count == 0) { continue; } foreach (string s in strs) { if (s == null) { continue; } List <Pullenti.Ner.Referent> li; if (!si.TryGetValue(s, out li)) { si.Add(s, (li = new List <Pullenti.Ner.Referent>())); } li.Add(r); } } } foreach (Node r in allRefs) { foreach (Pullenti.Ner.Slot s in r.Ref.Slots) { if (s.Value is Pullenti.Ner.Referent) { Pullenti.Ner.Referent to = s.Value as Pullenti.Ner.Referent; Node tn = to.Tag as Node; if (tn == null) { continue; } if (tn.RefsFrom == null) { tn.RefsFrom = new List <Node>(); } tn.RefsFrom.Add(r); if (r.RefsTo == null) { r.RefsTo = new List <Node>(); } r.RefsTo.Add(tn); } } } foreach (Dictionary <string, List <Pullenti.Ner.Referent> > ty in all.Values) { foreach (List <Pullenti.Ner.Referent> li in ty.Values) { if (li.Count < 2) { continue; } if (li.Count > 3000) { continue; } for (int i = 0; i < li.Count; i++) { for (int j = i + 1; j < li.Count; j++) { Node n1 = null; Node n2 = null; if (li[i].CanBeGeneralFor(li[j]) && !li[j].CanBeGeneralFor(li[i])) { n1 = li[i].Tag as Node; n2 = li[j].Tag as Node; } else if (li[j].CanBeGeneralFor(li[i]) && !li[i].CanBeGeneralFor(li[j])) { n1 = li[j].Tag as Node; n2 = li[i].Tag as Node; } if (n1 != null && n2 != null) { if (n1.GenFrom == null) { n1.GenFrom = new List <Node>(); } if (!n1.GenFrom.Contains(n2)) { n1.GenFrom.Add(n2); } if (n2.GenTo == null) { n2.GenTo = new List <Node>(); } if (!n2.GenTo.Contains(n1)) { n2.GenTo.Add(n1); } } } } } } foreach (Node n in allRefs) { if (n.GenTo != null && n.GenTo.Count > 1) { for (int i = n.GenTo.Count - 1; i >= 0; i--) { Node p = n.GenTo[i]; bool del = false; for (int j = 0; j < n.GenTo.Count; j++) { if (j != i && n.GenTo[j].IsInGenParentsOrHigher(p)) { del = true; } } if (del) { p.GenFrom.Remove(n); n.GenTo.RemoveAt(i); } } } } foreach (Node n in allRefs) { if (!n.Deleted && n.GenTo != null && n.GenTo.Count == 1) { Node p = n.GenTo[0]; if (p.GenFrom.Count == 1) { n.Ref.MergeSlots(p.Ref, true); p.Ref.Tag = n.Ref; p.ReplaceValues(n); foreach (Pullenti.Ner.TextAnnotation o in p.Ref.Occurrence) { n.Ref.AddOccurence(o); } p.Deleted = true; } else { n.Ref.GeneralReferent = p.Ref; } } } for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { _correctReferents(t); } foreach (Node n in allRefs) { if (n.Deleted) { n.Ad.RemoveReferent(n.Ref); } n.Ref.Tag = null; } }
void _process2(Pullenti.Ner.Core.AnalysisKit kit, AnalysisResult ar, bool noLog) { string msg; Stopwatch sw = Stopwatch.StartNew(); bool stopByTimeout = false; List <Analyzer> anals = new List <Analyzer>(m_Analyzers); for (int ii = 0; ii < anals.Count; ii++) { Analyzer c = anals[ii]; if (c.IgnoreThisAnalyzer) { continue; } if (m_Breaked) { if (!noLog) { msg = "Процесс прерван пользователем"; this.OnMessage(msg); ar.Log.Add(msg); } break; } if (TimeoutSeconds > 0 && !stopByTimeout) { if (((DateTime.Now - kit.StartDate)).TotalSeconds > TimeoutSeconds) { m_Breaked = true; if (!noLog) { msg = "Процесс прерван по таймауту"; this.OnMessage(msg); ar.Log.Add(msg); } stopByTimeout = true; } } if (stopByTimeout) { if (c.Name == "INSTRUMENT") { } else { continue; } } if (!noLog) { this.OnProgressHandler(c, new ProgressChangedEventArgs(0, string.Format("Работа \"{0}\"", c.Caption))); } try { sw.Reset(); sw.Start(); c.Process(kit); sw.Stop(); Pullenti.Ner.Core.AnalyzerData dat = kit.GetAnalyzerData(c); if (!noLog) { msg = string.Format("Анализатор \"{0}\" выделил {1} объект(ов) за {2}", c.Caption, (dat == null ? 0 : dat.Referents.Count), OutSecs(sw.ElapsedMilliseconds)); this.OnMessage(msg); ar.Log.Add(msg); } } catch (Exception ex) { if (!noLog) { ex = new Exception(string.Format("Ошибка в анализаторе \"{0}\" ({1})", c.Caption, ex.Message), ex); this.OnMessage(ex); ar.AddException(ex); } } } if (!noLog) { this.OnProgressHandler(null, new ProgressChangedEventArgs(0, "Пересчёт отношений обобщения")); } try { sw.Reset(); sw.Start(); Pullenti.Ner.Core.Internal.GeneralRelationHelper.RefreshGenerals(this, kit); sw.Stop(); if (!noLog) { msg = string.Format("Отношение обобщение пересчитано за {0}", OutSecs(sw.ElapsedMilliseconds)); this.OnMessage(msg); ar.Log.Add(msg); } } catch (Exception ex) { if (!noLog) { ex = new Exception("Ошибка пересчёта отношения обобщения", ex); this.OnMessage(ex); ar.AddException(ex); } } }
public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { PhoneAnalizerData ad = kit.GetAnalyzerData(this) as PhoneAnalizerData; for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { List<Pullenti.Ner.Phone.Internal.PhoneItemToken> pli = Pullenti.Ner.Phone.Internal.PhoneItemToken.TryAttachAll(t, 15); if (pli == null || pli.Count == 0) continue; PhoneReferent prevPhone = null; int kkk = 0; for (Pullenti.Ner.Token tt = t.Previous; tt != null; tt = tt.Previous) { if (tt.GetReferent() is PhoneReferent) { prevPhone = tt.GetReferent() as PhoneReferent; break; } else if (tt is Pullenti.Ner.ReferentToken) { } else if (tt.IsChar(')')) { Pullenti.Ner.Token ttt = tt.Previous; int cou = 0; for (; ttt != null; ttt = ttt.Previous) { if (ttt.IsChar('(')) break; else if ((++cou) > 100) break; } if (ttt == null || !ttt.IsChar('(')) break; tt = ttt; } else if (!tt.IsCharOf(",;/\\") && !tt.IsAnd) { if ((++kkk) > 5) break; if (tt.IsNewlineBefore || tt.IsNewlineAfter) break; } } int j = 0; bool isPhoneBefore = false; bool isPref = false; PhoneKind ki = PhoneKind.Undefined; while (j < pli.Count) { if (pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Prefix) { if (ki == PhoneKind.Undefined) ki = pli[j].Kind; isPref = true; isPhoneBefore = true; j++; if ((j < pli.Count) && pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Delim) j++; } else if (((j + 1) < pli.Count) && pli[j + 1].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Prefix && j == 0) { if (ki == PhoneKind.Undefined) ki = pli[0].Kind; isPref = true; pli.RemoveAt(0); } else break; } if (prevPhone != null) isPhoneBefore = true; if (pli.Count == 1 && pli[0].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Number) { Pullenti.Ner.Token tt = t.Previous; if ((tt is Pullenti.Ner.TextToken) && !tt.Chars.IsLetter) tt = tt.Previous; if (tt is Pullenti.Ner.TextToken) { if (Pullenti.Ner.Uri.UriAnalyzer.m_Schemes.TryParse(tt, Pullenti.Ner.Core.TerminParseAttr.No) != null) continue; } } List<Pullenti.Ner.ReferentToken> rts = this.TryAttach(pli, j, isPhoneBefore, prevPhone); if (rts == null) { for (j = 1; j < pli.Count; j++) { if (pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Prefix) { pli.RemoveRange(0, j); rts = this.TryAttach(pli, 1, true, prevPhone); break; } } } if (rts == null) t = pli[pli.Count - 1].EndToken; else { if ((ki == PhoneKind.Undefined && prevPhone != null && !isPref) && prevPhone.Kind != PhoneKind.Mobile && kkk == 0) ki = prevPhone.Kind; foreach (Pullenti.Ner.ReferentToken rt in rts) { PhoneReferent ph = rt.Referent as PhoneReferent; if (ki != PhoneKind.Undefined) ph.Kind = ki; else { if (rt == rts[0] && (rt.WhitespacesBeforeCount < 3)) { Pullenti.Ner.Token tt1 = rt.BeginToken.Previous; if (tt1 != null && tt1.IsTableControlChar) tt1 = tt1.Previous; if ((tt1 is Pullenti.Ner.TextToken) && ((tt1.IsNewlineBefore || ((tt1.Previous != null && tt1.Previous.IsTableControlChar))))) { string term = (tt1 as Pullenti.Ner.TextToken).Term; if (term == "T" || term == "Т") rt.BeginToken = tt1; else if (term == "Ф" || term == "F") { ph.Kind = (ki = PhoneKind.Fax); rt.BeginToken = tt1; } else if (term == "M" || term == "М") { ph.Kind = (ki = PhoneKind.Mobile); rt.BeginToken = tt1; } } } ph.Correct(); } rt.Referent = ad.RegisterReferent(rt.Referent); kit.EmbedToken(rt); t = rt; } } } }
public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this); Pullenti.Ner.Core.TerminCollection models = new Pullenti.Ner.Core.TerminCollection(); Dictionary <string, List <Pullenti.Ner.Referent> > objsByModel = new Dictionary <string, List <Pullenti.Ner.Referent> >(); Pullenti.Ner.Core.TerminCollection objByNames = new Pullenti.Ner.Core.TerminCollection(); for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { List <Pullenti.Ner.Weapon.Internal.WeaponItemToken> its = Pullenti.Ner.Weapon.Internal.WeaponItemToken.TryParseList(t, 10); if (its == null) { continue; } List <Pullenti.Ner.ReferentToken> rts = this.TryAttach(its, false); if (rts != null) { foreach (Pullenti.Ner.ReferentToken rt in rts) { rt.Referent = ad.RegisterReferent(rt.Referent); kit.EmbedToken(rt); t = rt; foreach (Pullenti.Ner.Slot s in rt.Referent.Slots) { if (s.TypeName == WeaponReferent.ATTR_MODEL) { string mod = s.Value.ToString(); for (int k = 0; k < 2; k++) { if (!char.IsDigit(mod[0])) { List <Pullenti.Ner.Referent> li; if (!objsByModel.TryGetValue(mod, out li)) { objsByModel.Add(mod, (li = new List <Pullenti.Ner.Referent>())); } if (!li.Contains(rt.Referent)) { li.Add(rt.Referent); } models.AddString(mod, li, null, false); } if (k > 0) { break; } string brand = rt.Referent.GetStringValue(WeaponReferent.ATTR_BRAND); if (brand == null) { break; } mod = string.Format("{0} {1}", brand, mod); } } else if (s.TypeName == WeaponReferent.ATTR_NAME) { objByNames.Add(new Pullenti.Ner.Core.Termin(s.Value.ToString()) { Tag = rt.Referent }); } } } } } if (objsByModel.Count == 0 && objByNames.Termins.Count == 0) { return; } for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 10); if (br != null) { Pullenti.Ner.Core.TerminToken toks = objByNames.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No); if (toks != null && toks.EndToken.Next == br.EndToken) { Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(toks.Termin.Tag as Pullenti.Ner.Referent, br.BeginToken, br.EndToken); kit.EmbedToken(rt0); t = rt0; continue; } } if (!(t is Pullenti.Ner.TextToken)) { continue; } if (!t.Chars.IsLetter) { continue; } Pullenti.Ner.Core.TerminToken tok = models.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok == null) { if (!t.Chars.IsAllLower) { tok = objByNames.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); } if (tok == null) { continue; } } if (!tok.IsWhitespaceAfter) { if (tok.EndToken.Next == null || !tok.EndToken.Next.IsCharOf(",.)")) { if (!Pullenti.Ner.Core.BracketHelper.IsBracket(tok.EndToken.Next, false)) { continue; } } } Pullenti.Ner.Referent tr = null; List <Pullenti.Ner.Referent> li = tok.Termin.Tag as List <Pullenti.Ner.Referent>; if (li != null && li.Count == 1) { tr = li[0]; } else { tr = tok.Termin.Tag as Pullenti.Ner.Referent; } if (tr != null) { Pullenti.Ner.Weapon.Internal.WeaponItemToken tit = Pullenti.Ner.Weapon.Internal.WeaponItemToken.TryParse(tok.BeginToken.Previous, null, false, true); if (tit != null && tit.Typ == Pullenti.Ner.Weapon.Internal.WeaponItemToken.Typs.Brand) { tr.AddSlot(WeaponReferent.ATTR_BRAND, tit.Value, false, 0); tok.BeginToken = tit.BeginToken; } Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(tr, tok.BeginToken, tok.EndToken); kit.EmbedToken(rt0); t = rt0; continue; } } }
// Основная функция выделения телефонов public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this); bool hasDenoms = false; foreach (Pullenti.Ner.Analyzer a in kit.Processor.Analyzers) { if ((a is Pullenti.Ner.Denomination.DenominationAnalyzer) && !a.IgnoreThisAnalyzer) { hasDenoms = true; } } if (!hasDenoms) { Pullenti.Ner.Denomination.DenominationAnalyzer a = new Pullenti.Ner.Denomination.DenominationAnalyzer(); a.Process(kit); } List <KeywordReferent> li = new List <KeywordReferent>(); StringBuilder tmp = new StringBuilder(); List <string> tmp2 = new List <string>(); int max = 0; for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { max++; } int cur = 0; for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next, cur++) { Pullenti.Ner.Referent r = t.GetReferent(); if (r != null) { t = this._addReferents(ad, t, cur, max); continue; } if (!(t is Pullenti.Ner.TextToken)) { continue; } if (!t.Chars.IsLetter || (t.LengthChar < 3)) { continue; } string term = (t as Pullenti.Ner.TextToken).Term; if (term == "ЕСТЬ") { if ((t.Previous is Pullenti.Ner.TextToken) && t.Previous.Morph.Class.IsVerb) { } else { continue; } } Pullenti.Ner.Core.NounPhraseToken npt = null; npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.AdjectiveCanBeLast | Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition, 0, null); if (npt == null) { Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary(); if (mc.IsVerb && !mc.IsPreposition) { if ((t as Pullenti.Ner.TextToken).IsVerbBe) { continue; } if (t.IsValue("МОЧЬ", null) || t.IsValue("WOULD", null)) { continue; } KeywordReferent kref = new KeywordReferent() { Typ = KeywordType.Predicate }; string norm = t.GetNormalCaseText(Pullenti.Morph.MorphClass.Verb, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false); if (norm == null) { norm = (t as Pullenti.Ner.TextToken).Lemma; } if (norm.EndsWith("ЬСЯ")) { norm = norm.Substring(0, norm.Length - 2); } kref.AddSlot(KeywordReferent.ATTR_VALUE, norm, false, 0); List <Pullenti.Semantic.Utils.DerivateGroup> drv = Pullenti.Semantic.Utils.DerivateService.FindDerivates(norm, true, t.Morph.Language); _addNormals(kref, drv, norm); kref = ad.RegisterReferent(kref) as KeywordReferent; _setRank(kref, cur, max); Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(ad.RegisterReferent(kref), t, t) { Morph = t.Morph }; kit.EmbedToken(rt1); t = rt1; continue; } continue; } if (npt.InternalNoun != null) { continue; } if (npt.EndToken.IsValue("ЦЕЛОМ", null) || npt.EndToken.IsValue("ЧАСТНОСТИ", null)) { if (npt.Preposition != null) { t = npt.EndToken; continue; } } if (npt.EndToken.IsValue("СТОРОНЫ", null) && npt.Preposition != null && npt.Preposition.Normal == "С") { t = npt.EndToken; continue; } if (npt.BeginToken == npt.EndToken) { Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary(); if (mc.IsPreposition) { continue; } else if (mc.IsAdverb) { if (t.IsValue("ПОТОМ", null)) { continue; } } } else { } li.Clear(); Pullenti.Ner.Token t0 = t; for (Pullenti.Ner.Token tt = t; tt != null && tt.EndChar <= npt.EndChar; tt = tt.Next) { if (!(tt is Pullenti.Ner.TextToken)) { continue; } if (tt.IsValue("NATURAL", null)) { } if ((tt.LengthChar < 3) || !tt.Chars.IsLetter) { continue; } Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary(); if ((mc.IsPreposition || mc.IsPronoun || mc.IsPersonalPronoun) || mc.IsConjunction) { if (tt.IsValue("ОТНОШЕНИЕ", null)) { } else { continue; } } if (mc.IsMisc) { if (Pullenti.Ner.Core.MiscHelper.IsEngArticle(tt)) { continue; } } KeywordReferent kref = new KeywordReferent() { Typ = KeywordType.Object }; string norm = (tt as Pullenti.Ner.TextToken).Lemma; kref.AddSlot(KeywordReferent.ATTR_VALUE, norm, false, 0); if (norm != "ЕСТЬ") { List <Pullenti.Semantic.Utils.DerivateGroup> drv = Pullenti.Semantic.Utils.DerivateService.FindDerivates(norm, true, tt.Morph.Language); _addNormals(kref, drv, norm); } kref = ad.RegisterReferent(kref) as KeywordReferent; _setRank(kref, cur, max); Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(kref, tt, tt) { Morph = tt.Morph }; kit.EmbedToken(rt1); if (tt == t && li.Count == 0) { t0 = rt1; } t = rt1; li.Add(kref); } if (li.Count > 1) { KeywordReferent kref = new KeywordReferent() { Typ = KeywordType.Object }; tmp.Length = 0; tmp2.Clear(); bool hasNorm = false; foreach (KeywordReferent kw in li) { string s = kw.GetStringValue(KeywordReferent.ATTR_VALUE); if (tmp.Length > 0) { tmp.Append(' '); } tmp.Append(s); string n = kw.GetStringValue(KeywordReferent.ATTR_NORMAL); if (n != null) { hasNorm = true; tmp2.Add(n); } else { tmp2.Add(s); } kref.AddSlot(KeywordReferent.ATTR_REF, kw, false, 0); } string val = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false); kref.AddSlot(KeywordReferent.ATTR_VALUE, val, false, 0); tmp.Length = 0; tmp2.Sort(); foreach (string s in tmp2) { if (tmp.Length > 0) { tmp.Append(' '); } tmp.Append(s); } string norm = tmp.ToString(); if (norm != val) { kref.AddSlot(KeywordReferent.ATTR_NORMAL, norm, false, 0); } kref = ad.RegisterReferent(kref) as KeywordReferent; _setRank(kref, cur, max); Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(kref, t0, t) { Morph = npt.Morph }; kit.EmbedToken(rt1); t = rt1; } } cur = 0; for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next, cur++) { KeywordReferent kw = t.GetReferent() as KeywordReferent; if (kw == null || kw.Typ != KeywordType.Object) { continue; } if (t.Next == null || kw.ChildWords > 2) { continue; } Pullenti.Ner.Token t1 = t.Next; if (t1.IsValue("OF", null) && (t1.WhitespacesAfterCount < 3) && t1.Next != null) { t1 = t1.Next; if ((t1 is Pullenti.Ner.TextToken) && Pullenti.Ner.Core.MiscHelper.IsEngArticle(t1) && t1.Next != null) { t1 = t1.Next; } } else if (!t1.Morph.Case.IsGenitive || t.WhitespacesAfterCount > 1) { continue; } KeywordReferent kw2 = t1.GetReferent() as KeywordReferent; if (kw2 == null) { continue; } if (kw == kw2) { continue; } if (kw2.Typ != KeywordType.Object || (kw.ChildWords + kw2.ChildWords) > 3) { continue; } KeywordReferent kwUn = new KeywordReferent(); kwUn.Union(kw, kw2, Pullenti.Ner.Core.MiscHelper.GetTextValue(t1, t1, Pullenti.Ner.Core.GetTextAttr.No)); kwUn = ad.RegisterReferent(kwUn) as KeywordReferent; _setRank(kwUn, cur, max); Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(kwUn, t, t1) { Morph = t.Morph }; kit.EmbedToken(rt1); t = rt1; } if (SortKeywordsByRank) { List <Pullenti.Ner.Referent> all = new List <Pullenti.Ner.Referent>(ad.Referents); all.Sort(new CompByRank()); ad.Referents = all; } if (AnnotationMaxSentences > 0) { KeywordReferent ano = Pullenti.Ner.Keyword.Internal.AutoannoSentToken.CreateAnnotation(kit, AnnotationMaxSentences); if (ano != null) { ad.RegisterReferent(ano); } } }
public Token(Pullenti.Ner.Core.AnalysisKit kit, int begin, int end) { Kit = kit; m_BeginChar = begin; m_EndChar = end; }