public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this); for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { Pullenti.Ner.ReferentToken rt = null; if (t.Chars.IsLetter) { Pullenti.Ner.Core.TerminToken tok = m_Ontology.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { Pullenti.Ner.Token tt = tok.EndToken.Next; if (tt != null && tt.IsChar(':')) { tt = tt.Next; } rt = this.TryAttach(tt, true); if (rt != null) { rt.BeginToken = t; } } } if (rt == null && (((t is Pullenti.Ner.ReferentToken) || t.IsNewlineBefore))) { rt = this.TryAttach(t, false); } if (rt != null) { rt.Referent = ad.RegisterReferent(rt.Referent); kit.EmbedToken(rt); t = rt; } } }
public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this); int delta = 100000; int parts = (((kit.Sofa.Text.Length + delta) - 1)) / delta; if (parts == 0) { parts = 1; } int cur = 0; int nextPos = 0; List <GoodReferent> goods = new List <GoodReferent>(); for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { if (!t.IsNewlineBefore) { continue; } if (t.BeginChar > nextPos) { nextPos += delta; cur++; if (!this.OnProgress(cur, parts, kit)) { break; } } if (!t.Chars.IsLetter && t.Next != null) { t = t.Next; } List <Pullenti.Ner.ReferentToken> rts = Pullenti.Ner.Goods.Internal.GoodAttrToken.TryParseList(t); if (rts == null || rts.Count == 0) { continue; } GoodReferent good = new GoodReferent(); foreach (Pullenti.Ner.ReferentToken rt in rts) { rt.Referent = ad.RegisterReferent(rt.Referent); if (good.FindSlot(GoodReferent.ATTR_ATTR, rt.Referent, true) == null) { good.AddSlot(GoodReferent.ATTR_ATTR, rt.Referent, false, 0); } kit.EmbedToken(rt); } goods.Add(good); Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(good, rts[0], rts[rts.Count - 1]); kit.EmbedToken(rt0); t = rt0; } foreach (GoodReferent g in goods) { ad.Referents.Add(g); } }
public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this); Pullenti.Ner.Token et; TitlePageReferent tpr = _process(kit.FirstToken, 0, kit, out et); if (tpr != null) { ad.RegisterReferent(tpr); } }
public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this); for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { Pullenti.Ner.ReferentToken mon = TryParse(t); if (mon != null) { mon.Referent = ad.RegisterReferent(mon.Referent); kit.EmbedToken(mon); t = mon; continue; } } }
internal void _createRes(Pullenti.Ner.Core.AnalysisKit kit, AnalysisResult ar, ExtOntology extOntology, bool noLog) { Stopwatch sw = Stopwatch.StartNew(); int ontoAttached = 0; for (int k = 0; k < 2; k++) { foreach (Analyzer c in Analyzers) { if (k == 0) { if (!c.IsSpecific) { continue; } } else if (c.IsSpecific) { continue; } Pullenti.Ner.Core.AnalyzerData dat = kit.GetAnalyzerData(c); if (dat != null && dat.Referents.Count > 0) { if (extOntology != null) { foreach (Referent r in dat.Referents) { if (r.OntologyItems == null) { if ((((r.OntologyItems = extOntology.AttachReferent(r)))) != null) { ontoAttached++; } } } } ar.Entities.AddRange(dat.Referents); } } } sw.Stop(); if (extOntology != null && !noLog) { string msg = string.Format("Привязано {0} объектов к внешней отнологии ({1} элементов) за {2}", ontoAttached, extOntology.Items.Count, OutSecs(sw.ElapsedMilliseconds)); this.OnMessage(msg); ar.Log.Add(msg); } }
public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Token t = kit.FirstToken; Pullenti.Ner.Token t1 = t; if (t == null) { return; } Pullenti.Ner.Instrument.Internal.FragToken dfr = Pullenti.Ner.Instrument.Internal.FragToken.CreateDocument(t, 0, InstrumentKind.Undefined); if (dfr == null) { return; } Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this); InstrumentBlockReferent res = dfr.CreateReferent(ad); }
public Pullenti.Ner.Measure.UnitReferent CreateReferentWithRegister(Pullenti.Ner.Core.AnalyzerData ad) { Pullenti.Ner.Measure.UnitReferent ur = ExtOnto; if (Unit != null) { ur = _createReferent(Unit); } else if (UnknownName != null) { ur = new Pullenti.Ner.Measure.UnitReferent(); ur.AddSlot(Pullenti.Ner.Measure.UnitReferent.ATTR_NAME, UnknownName, false, 0); ur.IsUnknown = true; } if (Pow != 1) { ur.AddSlot(Pullenti.Ner.Measure.UnitReferent.ATTR_POW, Pow.ToString(), false, 0); } List <Pullenti.Ner.Measure.UnitReferent> owns = new List <Pullenti.Ner.Measure.UnitReferent>(); owns.Add(ur); if (Unit != null) { for (Unit uu = Unit.BaseUnit; uu != null; uu = uu.BaseUnit) { Pullenti.Ner.Measure.UnitReferent ur0 = _createReferent(uu); owns.Add(ur0); } } for (int i = owns.Count - 1; i >= 0; i--) { if (ad != null) { owns[i] = ad.RegisterReferent(owns[i]) as Pullenti.Ner.Measure.UnitReferent; } if (i > 0) { owns[i - 1].AddSlot(Pullenti.Ner.Measure.UnitReferent.ATTR_BASEUNIT, owns[i], false, 0); if ((owns[i - 1].Tag as Unit).BaseMultiplier != 0) { owns[i - 1].AddSlot(Pullenti.Ner.Measure.UnitReferent.ATTR_BASEFACTOR, Pullenti.Ner.Core.NumberHelper.DoubleToString((owns[i - 1].Tag as Unit).BaseMultiplier), false, 0); } } } return(owns[0]); }
public static void Process(Pullenti.Ner.Core.AnalysisKit kit, Pullenti.Ner.Core.AnalyzerData ad) { for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { if (!Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t)) { continue; } Pullenti.Ner.ReferentToken rt = TryParseThesis(t); if (rt == null) { continue; } rt.Referent = ad.RegisterReferent(rt.Referent); kit.EmbedToken(rt); t = rt; } }
public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this); int delta = 100000; int parts = (((kit.Sofa.Text.Length + delta) - 1)) / delta; if (parts == 0) { parts = 1; } int cur = 0; int nextPos = 0; for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { if (t.BeginChar > nextPos) { nextPos += delta; cur++; if (!this.OnProgress(cur, parts, kit)) { break; } } Pullenti.Ner.Goods.Internal.GoodAttrToken at = Pullenti.Ner.Goods.Internal.GoodAttrToken.TryParse(t, null, true, true); if (at == null) { continue; } GoodAttributeReferent attr = at._createAttr(); if (attr == null) { t = at.EndToken; continue; } Pullenti.Ner.ReferentToken rt = new Pullenti.Ner.ReferentToken(attr, at.BeginToken, at.EndToken); rt.Referent = ad.RegisterReferent(attr); kit.EmbedToken(rt); t = rt; } }
static Pullenti.Ner.ReferentToken _tryAttachPureTerr(List <TerrItemToken> li, Pullenti.Ner.Core.AnalyzerData ad) { Pullenti.Ner.Address.Internal.AddressItemToken aid = null; Pullenti.Ner.Token t = li[0].EndToken.Next; if (t == null) { return(null); } Pullenti.Ner.Token tt = t; if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(tt, true, false)) { tt = tt.Next; } if (li.Count > 1) { List <TerrItemToken> tmp = new List <TerrItemToken>(li); tmp.RemoveAt(0); Pullenti.Ner.ReferentToken rt0 = TryAttachTerritory(tmp, ad, false, null, null); if (rt0 == null && tmp.Count == 2) { if (((tmp[0].TerminItem == null && tmp[1].TerminItem != null)) || ((tmp[0].TerminItem != null && tmp[1].TerminItem == null))) { if (aid == null) { rt0 = TryAttachTerritory(tmp, ad, true, null, null); } } } if (rt0 != null) { if ((rt0.Referent as Pullenti.Ner.Geo.GeoReferent).IsState) { return(null); } rt0.BeginToken = li[0].BeginToken; rt0.Morph = li[0].Morph; return(rt0); } } if (aid == null) { aid = Pullenti.Ner.Address.Internal.AddressItemToken.TryAttachOrg(tt); } if (aid != null) { Pullenti.Ner.ReferentToken rt = aid.CreateGeoOrgTerr(); if (rt == null) { return(null); } rt.BeginToken = li[0].BeginToken; Pullenti.Ner.Token t1 = rt.EndToken; if (tt != t && Pullenti.Ner.Core.BracketHelper.CanBeEndOfSequence(t1.Next, false, null, false)) { rt.EndToken = (t1 = t1.Next); } return(rt); } return(null); }
public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this); Pullenti.Ner.Core.TerminCollection addunits = null; if (kit.Ontology != null) { addunits = new Pullenti.Ner.Core.TerminCollection(); foreach (Pullenti.Ner.ExtOntologyItem r in kit.Ontology.Items) { UnitReferent uu = r.Referent as UnitReferent; if (uu == null) { continue; } if (uu.m_Unit != null) { continue; } foreach (Pullenti.Ner.Slot s in uu.Slots) { if (s.TypeName == UnitReferent.ATTR_NAME || s.TypeName == UnitReferent.ATTR_FULLNAME) { addunits.Add(new Pullenti.Ner.Core.Termin(s.Value as string) { Tag = uu }); } } } } for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { Pullenti.Ner.Measure.Internal.MeasureToken mt = Pullenti.Ner.Measure.Internal.MeasureToken.TryParseMinimal(t, addunits, false); if (mt == null) { mt = Pullenti.Ner.Measure.Internal.MeasureToken.TryParse(t, addunits, true, false, false, false); } if (mt == null) { continue; } List <Pullenti.Ner.ReferentToken> rts = mt.CreateRefenetsTokensWithRegister(ad, true); if (rts == null) { continue; } for (int i = 0; i < rts.Count; i++) { Pullenti.Ner.ReferentToken rt = rts[i]; t.Kit.EmbedToken(rt); t = rt; for (int j = i + 1; j < rts.Count; j++) { if (rts[j].BeginToken == rt.BeginToken) { rts[j].BeginToken = t; } if (rts[j].EndToken == rt.EndToken) { rts[j].EndToken = t; } } } } if (kit.Ontology != null) { foreach (Pullenti.Ner.Referent e in ad.Referents) { UnitReferent u = e as UnitReferent; if (u == null) { continue; } foreach (Pullenti.Ner.ExtOntologyItem r in kit.Ontology.Items) { UnitReferent uu = r.Referent as UnitReferent; if (uu == null) { continue; } bool ok = false; foreach (Pullenti.Ner.Slot s in uu.Slots) { if (s.TypeName == UnitReferent.ATTR_NAME || s.TypeName == UnitReferent.ATTR_FULLNAME) { if (u.FindSlot(null, s.Value, true) != null) { ok = true; break; } } } if (ok) { u.OntologyItems = new List <Pullenti.Ner.ExtOntologyItem>(); u.OntologyItems.Add(r); break; } } } } }
public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { List <Pullenti.Ner.Mail.Internal.MailLine> lines = new List <Pullenti.Ner.Mail.Internal.MailLine>(); for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { Pullenti.Ner.Mail.Internal.MailLine ml = Pullenti.Ner.Mail.Internal.MailLine.Parse(t, 0, 0); if (ml == null) { continue; } if (lines.Count == 91) { } lines.Add(ml); t = ml.EndToken; } if (lines.Count == 0) { return; } int i; List <List <Pullenti.Ner.Mail.Internal.MailLine> > blocks = new List <List <Pullenti.Ner.Mail.Internal.MailLine> >(); List <Pullenti.Ner.Mail.Internal.MailLine> blk = null; for (i = 0; i < lines.Count; i++) { Pullenti.Ner.Mail.Internal.MailLine ml = lines[i]; if (ml.Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From) { bool isNew = ml.MustBeFirstLine || i == 0; if (((i + 2) < lines.Count) && (((lines[i + 1].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From || lines[i + 2].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From || lines[i + 1].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.Hello) || lines[i + 2].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.Hello))) { isNew = true; } if (!isNew) { for (int j = i - 1; j >= 0; j--) { if (lines[j].Typ != Pullenti.Ner.Mail.Internal.MailLine.Types.Undefined) { if (lines[j].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.BestRegards) { isNew = true; } break; } } } if (!isNew) { for (Pullenti.Ner.Token tt = ml.BeginToken; tt != null && tt.EndChar <= ml.EndChar; tt = tt.Next) { if (tt.GetReferent() != null) { if (tt.GetReferent().TypeName == "DATE" || tt.GetReferent().TypeName == "URI") { isNew = true; } } } } if (isNew) { blk = new List <Pullenti.Ner.Mail.Internal.MailLine>(); blocks.Add(blk); for (; i < lines.Count; i++) { if (lines[i].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From) { if (blk.Count > 0 && lines[i].MustBeFirstLine) { break; } blk.Add(lines[i]); } else if (((i + 1) < lines.Count) && lines[i + 1].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From) { int j; for (j = 0; j < blk.Count; j++) { if (blk[j].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From) { if (blk[j].IsRealFrom || blk[j].MustBeFirstLine || blk[j].MailAddr != null) { break; } } } if (j >= blk.Count) { blk.Add(lines[i]); continue; } bool ok = false; for (j = i + 1; j < lines.Count; j++) { if (lines[j].Typ != Pullenti.Ner.Mail.Internal.MailLine.Types.From) { break; } if (lines[j].IsRealFrom || lines[j].MustBeFirstLine) { ok = true; break; } if (lines[j].MailAddr != null) { ok = true; break; } } if (ok) { break; } blk.Add(lines[i]); } else { break; } } i--; continue; } } if (blk == null) { blocks.Add((blk = new List <Pullenti.Ner.Mail.Internal.MailLine>())); } blk.Add(lines[i]); } if (blocks.Count == 0) { return; } Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this); for (int j = 0; j < blocks.Count; j++) { lines = blocks[j]; if (lines.Count == 0) { continue; } i = 0; if (lines[0].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From) { Pullenti.Ner.Token t1 = lines[0].EndToken; for (; i < lines.Count; i++) { if (lines[i].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From) { t1 = lines[i].EndToken; } else if (((i + 1) < lines.Count) && lines[i + 1].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From) { } else { break; } } MailReferent mail = new MailReferent() { Kind = MailKind.Head }; Pullenti.Ner.ReferentToken mt = new Pullenti.Ner.ReferentToken(mail, lines[0].BeginToken, t1); mail.Text = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(mt, Pullenti.Ner.Core.GetTextAttr.KeepRegister); ad.RegisterReferent(mail); mail.AddOccurenceOfRefTok(mt); } int i0 = i; Pullenti.Ner.Token t2 = null; int err = 0; for (i = lines.Count - 1; i >= i0; i--) { Pullenti.Ner.Mail.Internal.MailLine li = lines[i]; if (li.Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.BestRegards) { t2 = lines[i].BeginToken; for (--i; i >= i0; i--) { if (lines[i].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.BestRegards && (lines[i].Words < 2)) { t2 = lines[i].BeginToken; } else if ((i > i0 && (lines[i].Words < 3) && lines[i - 1].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.BestRegards) && (lines[i - 1].Words < 2)) { i--; t2 = lines[i].BeginToken; } else { break; } } break; } if (li.Refs.Count > 0 && (li.Words < 3) && i > i0) { err = 0; t2 = li.BeginToken; continue; } if (li.Words > 10) { t2 = null; continue; } if (li.Words > 2) { if ((++err) > 2) { t2 = null; } } } if (t2 == null) { for (i = lines.Count - 1; i >= i0; i--) { Pullenti.Ner.Mail.Internal.MailLine li = lines[i]; if (li.Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.Undefined) { if (li.Refs.Count > 0 && (li.Refs[0] is Pullenti.Ner.Person.PersonReferent)) { if (li.Words == 0 && i > i0) { t2 = li.BeginToken; break; } } } } } for (int ii = i0; ii < lines.Count; ii++) { if (lines[ii].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.Hello) { MailReferent mail = new MailReferent() { Kind = MailKind.Hello }; Pullenti.Ner.ReferentToken mt = new Pullenti.Ner.ReferentToken(mail, lines[i0].BeginToken, lines[ii].EndToken); if (mt.LengthChar > 0) { mail.Text = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(mt, Pullenti.Ner.Core.GetTextAttr.KeepRegister); ad.RegisterReferent(mail); mail.AddOccurenceOfRefTok(mt); i0 = ii + 1; } break; } else if (lines[ii].Typ != Pullenti.Ner.Mail.Internal.MailLine.Types.Undefined || lines[ii].Words > 0 || lines[ii].Refs.Count > 0) { break; } } if (i0 < lines.Count) { if (t2 != null && t2.Previous == null) { } else { MailReferent mail = new MailReferent() { Kind = MailKind.Body }; Pullenti.Ner.ReferentToken mt = new Pullenti.Ner.ReferentToken(mail, lines[i0].BeginToken, (t2 != null && t2.Previous != null ? t2.Previous : lines[lines.Count - 1].EndToken)); if (mt.LengthChar > 0) { mail.Text = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(mt, Pullenti.Ner.Core.GetTextAttr.KeepRegister); ad.RegisterReferent(mail); mail.AddOccurenceOfRefTok(mt); } } if (t2 != null) { MailReferent mail = new MailReferent() { Kind = MailKind.Tail }; Pullenti.Ner.ReferentToken mt = new Pullenti.Ner.ReferentToken(mail, t2, lines[lines.Count - 1].EndToken); if (mt.LengthChar > 0) { mail.Text = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(mt, Pullenti.Ner.Core.GetTextAttr.KeepRegister); ad.RegisterReferent(mail); mail.AddOccurenceOfRefTok(mt); } for (i = i0; i < lines.Count; i++) { if (lines[i].BeginChar >= t2.BeginChar) { foreach (Pullenti.Ner.Referent r in lines[i].Refs) { mail.AddRef(r, 0); } } } } } } }
void _process2(Pullenti.Ner.Core.AnalysisKit kit, AnalysisResult ar, bool noLog) { string msg; Stopwatch sw = Stopwatch.StartNew(); bool stopByTimeout = false; List <Analyzer> anals = new List <Analyzer>(m_Analyzers); for (int ii = 0; ii < anals.Count; ii++) { Analyzer c = anals[ii]; if (c.IgnoreThisAnalyzer) { continue; } if (m_Breaked) { if (!noLog) { msg = "Процесс прерван пользователем"; this.OnMessage(msg); ar.Log.Add(msg); } break; } if (TimeoutSeconds > 0 && !stopByTimeout) { if (((DateTime.Now - kit.StartDate)).TotalSeconds > TimeoutSeconds) { m_Breaked = true; if (!noLog) { msg = "Процесс прерван по таймауту"; this.OnMessage(msg); ar.Log.Add(msg); } stopByTimeout = true; } } if (stopByTimeout) { if (c.Name == "INSTRUMENT") { } else { continue; } } if (!noLog) { this.OnProgressHandler(c, new ProgressChangedEventArgs(0, string.Format("Работа \"{0}\"", c.Caption))); } try { sw.Reset(); sw.Start(); c.Process(kit); sw.Stop(); Pullenti.Ner.Core.AnalyzerData dat = kit.GetAnalyzerData(c); if (!noLog) { msg = string.Format("Анализатор \"{0}\" выделил {1} объект(ов) за {2}", c.Caption, (dat == null ? 0 : dat.Referents.Count), OutSecs(sw.ElapsedMilliseconds)); this.OnMessage(msg); ar.Log.Add(msg); } } catch (Exception ex) { if (!noLog) { ex = new Exception(string.Format("Ошибка в анализаторе \"{0}\" ({1})", c.Caption, ex.Message), ex); this.OnMessage(ex); ar.AddException(ex); } } } if (!noLog) { this.OnProgressHandler(null, new ProgressChangedEventArgs(0, "Пересчёт отношений обобщения")); } try { sw.Reset(); sw.Start(); Pullenti.Ner.Core.Internal.GeneralRelationHelper.RefreshGenerals(this, kit); sw.Stop(); if (!noLog) { msg = string.Format("Отношение обобщение пересчитано за {0}", OutSecs(sw.ElapsedMilliseconds)); this.OnMessage(msg); ar.Log.Add(msg); } } catch (Exception ex) { if (!noLog) { ex = new Exception("Ошибка пересчёта отношения обобщения", ex); this.OnMessage(ex); ar.AddException(ex); } } }
public List <Pullenti.Ner.ReferentToken> CreateRefenetsTokensWithRegister(Pullenti.Ner.Core.AnalyzerData ad, bool register = true) { if (Internals.Count == 0 && !Reliable) { if (Nums.Units.Count == 1 && Nums.Units[0].IsDoubt) { if (Nums.Units[0].UnknownName != null) { } else if (Nums.IsNewlineBefore) { } else if (Nums.Units[0].BeginToken.LengthChar > 1 && Nums.Units[0].BeginToken.GetMorphClassInDictionary().IsUndefined) { } else if (Nums.FromVal == null || Nums.ToVal == null) { return(null); } } } List <Pullenti.Ner.ReferentToken> res = new List <Pullenti.Ner.ReferentToken>(); if (((Nums == null || Nums.PlusMinusPercent)) && Internals.Count > 0) { List <Pullenti.Ner.ReferentToken> liEx = null; if (InternalEx != null) { liEx = InternalEx.CreateRefenetsTokensWithRegister(ad, true); if (liEx != null) { res.AddRange(liEx); } } Pullenti.Ner.Measure.MeasureReferent mr = new Pullenti.Ner.Measure.MeasureReferent(); string templ0 = "1"; string templ = null; if (Name != null) { mr.AddSlot(Pullenti.Ner.Measure.MeasureReferent.ATTR_NAME, Name, false, 0); } List <Pullenti.Ner.Measure.MeasureReferent> ints = new List <Pullenti.Ner.Measure.MeasureReferent>(); for (int k = 0; k < Internals.Count; k++) { MeasureToken ii = Internals[k]; ii.Reliable = true; List <Pullenti.Ner.ReferentToken> li = ii.CreateRefenetsTokensWithRegister(ad, false); if (li == null) { continue; } res.AddRange(li); Pullenti.Ner.Measure.MeasureReferent mr0 = res[res.Count - 1].Referent as Pullenti.Ner.Measure.MeasureReferent; if (liEx != null) { mr0.AddSlot(Pullenti.Ner.Measure.MeasureReferent.ATTR_REF, liEx[liEx.Count - 1], false, 0); } if (k == 0 && !IsEmpty) { templ0 = mr0.Template; mr0.Template = "1"; } if (ad != null) { mr0 = ad.RegisterReferent(mr0) as Pullenti.Ner.Measure.MeasureReferent; } mr.AddSlot(Pullenti.Ner.Measure.MeasureReferent.ATTR_VALUE, mr0, false, 0); ints.Add(mr0); if (templ == null) { templ = "1"; } else { int nu = mr.GetStringValues(Pullenti.Ner.Measure.MeasureReferent.ATTR_VALUE).Count; templ = string.Format("{0}{1}{2}", templ, (IsSet ? ", " : " × "), nu); } } if (IsSet) { templ = "{" + templ + "}"; } if (templ0 != "1") { templ = templ0.Replace("1", templ); } if (Nums != null && Nums.PlusMinusPercent && Nums.SingleVal != null) { templ = string.Format("[{0} ±{1}%]", templ, Internals.Count + 1); mr.AddValue(Nums.SingleVal.Value); } mr.Template = templ; int i; bool hasLength = false; Pullenti.Ner.Measure.UnitReferent uref = null; for (i = 0; i < ints.Count; i++) { if (ints[i].Kind == Pullenti.Ner.Measure.MeasureKind.Length) { hasLength = true; uref = ints[i].GetSlotValue(Pullenti.Ner.Measure.MeasureReferent.ATTR_UNIT) as Pullenti.Ner.Measure.UnitReferent; } else if (ints[i].Units.Count > 0) { break; } } if (ints.Count > 1 && hasLength && uref != null) { foreach (Pullenti.Ner.Measure.MeasureReferent ii in ints) { if (ii.FindSlot(Pullenti.Ner.Measure.MeasureReferent.ATTR_UNIT, null, true) == null) { ii.AddSlot(Pullenti.Ner.Measure.MeasureReferent.ATTR_UNIT, uref, false, 0); ii.Kind = Pullenti.Ner.Measure.MeasureKind.Length; } } } if (ints.Count == 3) { if (ints[0].Kind == Pullenti.Ner.Measure.MeasureKind.Length && ints[1].Kind == Pullenti.Ner.Measure.MeasureKind.Length && ints[2].Kind == Pullenti.Ner.Measure.MeasureKind.Length) { mr.Kind = Pullenti.Ner.Measure.MeasureKind.Volume; } else if (ints[0].Units.Count == 0 && ints[1].Units.Count == 0 && ints[2].Units.Count == 0) { string nam = mr.GetStringValue(Pullenti.Ner.Measure.MeasureReferent.ATTR_NAME); if (nam != null) { if (nam.Contains("РАЗМЕР") || nam.Contains("ГАБАРИТ")) { mr.Kind = Pullenti.Ner.Measure.MeasureKind.Volume; } } } } if (ints.Count == 2) { if (ints[0].Kind == Pullenti.Ner.Measure.MeasureKind.Length && ints[1].Kind == Pullenti.Ner.Measure.MeasureKind.Length) { mr.Kind = Pullenti.Ner.Measure.MeasureKind.Area; } } if (!IsEmpty) { if (ad != null) { mr = ad.RegisterReferent(mr) as Pullenti.Ner.Measure.MeasureReferent; } res.Add(new Pullenti.Ner.ReferentToken(mr, BeginToken, EndToken)); } return(res); } List <Pullenti.Ner.ReferentToken> re2 = Nums.CreateRefenetsTokensWithRegister(ad, Name, register); foreach (MeasureToken ii in Internals) { List <Pullenti.Ner.ReferentToken> li = ii.CreateRefenetsTokensWithRegister(ad, true); if (li == null) { continue; } res.AddRange(li); re2[re2.Count - 1].Referent.AddSlot(Pullenti.Ner.Measure.MeasureReferent.ATTR_REF, res[res.Count - 1].Referent, false, 0); } re2[re2.Count - 1].BeginToken = BeginToken; re2[re2.Count - 1].EndToken = EndToken; res.AddRange(re2); return(res); }
static Pullenti.Ner.ReferentToken _tryAttachMoscowAO(List <TerrItemToken> li, Pullenti.Ner.Core.AnalyzerData ad) { if (li[0].TerminItem == null || !li[0].TerminItem.IsMoscowRegion) { return(null); } if (li[0].IsDoubt) { bool ok = false; if (CityAttachHelper.CheckCityAfter(li[0].EndToken.Next)) { ok = true; } else { List <Pullenti.Ner.Address.Internal.AddressItemToken> ali = Pullenti.Ner.Address.Internal.AddressItemToken.TryParseList(li[0].EndToken.Next, null, 2); if (ali != null && ali.Count > 0 && ali[0].Typ == Pullenti.Ner.Address.Internal.AddressItemToken.ItemType.Street) { ok = true; } } if (!ok) { return(null); } } Pullenti.Ner.Geo.GeoReferent reg = new Pullenti.Ner.Geo.GeoReferent(); string typ = "АДМИНИСТРАТИВНЫЙ ОКРУГ"; reg.AddTyp(typ); string name = li[0].TerminItem.CanonicText; if (Pullenti.Morph.LanguageHelper.EndsWith(name, typ)) { name = name.Substring(0, name.Length - typ.Length - 1).Trim(); } reg.AddName(name); return(new Pullenti.Ner.ReferentToken(reg, li[0].BeginToken, li[0].EndToken)); }
public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this); Pullenti.Ner.Core.TerminCollection models = new Pullenti.Ner.Core.TerminCollection(); Dictionary <string, List <Pullenti.Ner.Referent> > objsByModel = new Dictionary <string, List <Pullenti.Ner.Referent> >(); Pullenti.Ner.Core.TerminCollection objByNames = new Pullenti.Ner.Core.TerminCollection(); for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { List <Pullenti.Ner.Weapon.Internal.WeaponItemToken> its = Pullenti.Ner.Weapon.Internal.WeaponItemToken.TryParseList(t, 10); if (its == null) { continue; } List <Pullenti.Ner.ReferentToken> rts = this.TryAttach(its, false); if (rts != null) { foreach (Pullenti.Ner.ReferentToken rt in rts) { rt.Referent = ad.RegisterReferent(rt.Referent); kit.EmbedToken(rt); t = rt; foreach (Pullenti.Ner.Slot s in rt.Referent.Slots) { if (s.TypeName == WeaponReferent.ATTR_MODEL) { string mod = s.Value.ToString(); for (int k = 0; k < 2; k++) { if (!char.IsDigit(mod[0])) { List <Pullenti.Ner.Referent> li; if (!objsByModel.TryGetValue(mod, out li)) { objsByModel.Add(mod, (li = new List <Pullenti.Ner.Referent>())); } if (!li.Contains(rt.Referent)) { li.Add(rt.Referent); } models.AddString(mod, li, null, false); } if (k > 0) { break; } string brand = rt.Referent.GetStringValue(WeaponReferent.ATTR_BRAND); if (brand == null) { break; } mod = string.Format("{0} {1}", brand, mod); } } else if (s.TypeName == WeaponReferent.ATTR_NAME) { objByNames.Add(new Pullenti.Ner.Core.Termin(s.Value.ToString()) { Tag = rt.Referent }); } } } } } if (objsByModel.Count == 0 && objByNames.Termins.Count == 0) { return; } for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 10); if (br != null) { Pullenti.Ner.Core.TerminToken toks = objByNames.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No); if (toks != null && toks.EndToken.Next == br.EndToken) { Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(toks.Termin.Tag as Pullenti.Ner.Referent, br.BeginToken, br.EndToken); kit.EmbedToken(rt0); t = rt0; continue; } } if (!(t is Pullenti.Ner.TextToken)) { continue; } if (!t.Chars.IsLetter) { continue; } Pullenti.Ner.Core.TerminToken tok = models.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok == null) { if (!t.Chars.IsAllLower) { tok = objByNames.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); } if (tok == null) { continue; } } if (!tok.IsWhitespaceAfter) { if (tok.EndToken.Next == null || !tok.EndToken.Next.IsCharOf(",.)")) { if (!Pullenti.Ner.Core.BracketHelper.IsBracket(tok.EndToken.Next, false)) { continue; } } } Pullenti.Ner.Referent tr = null; List <Pullenti.Ner.Referent> li = tok.Termin.Tag as List <Pullenti.Ner.Referent>; if (li != null && li.Count == 1) { tr = li[0]; } else { tr = tok.Termin.Tag as Pullenti.Ner.Referent; } if (tr != null) { Pullenti.Ner.Weapon.Internal.WeaponItemToken tit = Pullenti.Ner.Weapon.Internal.WeaponItemToken.TryParse(tok.BeginToken.Previous, null, false, true); if (tit != null && tit.Typ == Pullenti.Ner.Weapon.Internal.WeaponItemToken.Typs.Brand) { tr.AddSlot(WeaponReferent.ATTR_BRAND, tit.Value, false, 0); tok.BeginToken = tit.BeginToken; } Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(tr, tok.BeginToken, tok.EndToken); kit.EmbedToken(rt0); t = rt0; continue; } } }
public static void RefreshGenerals(Pullenti.Ner.Processor proc, Pullenti.Ner.Core.AnalysisKit kit) { Dictionary <string, Dictionary <string, List <Pullenti.Ner.Referent> > > all = new Dictionary <string, Dictionary <string, List <Pullenti.Ner.Referent> > >(); List <Node> allRefs = new List <Node>(); foreach (Pullenti.Ner.Analyzer a in proc.Analyzers) { Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(a); if (ad == null) { continue; } foreach (Pullenti.Ner.Referent r in ad.Referents) { Node nod = new Node() { Ref = r, Ad = ad }; allRefs.Add(nod); r.Tag = nod; Dictionary <string, List <Pullenti.Ner.Referent> > si; if (!all.TryGetValue(a.Name, out si)) { all.Add(a.Name, (si = new Dictionary <string, List <Pullenti.Ner.Referent> >())); } List <string> strs = r.GetCompareStrings(); if (strs == null || strs.Count == 0) { continue; } foreach (string s in strs) { if (s == null) { continue; } List <Pullenti.Ner.Referent> li; if (!si.TryGetValue(s, out li)) { si.Add(s, (li = new List <Pullenti.Ner.Referent>())); } li.Add(r); } } } foreach (Node r in allRefs) { foreach (Pullenti.Ner.Slot s in r.Ref.Slots) { if (s.Value is Pullenti.Ner.Referent) { Pullenti.Ner.Referent to = s.Value as Pullenti.Ner.Referent; Node tn = to.Tag as Node; if (tn == null) { continue; } if (tn.RefsFrom == null) { tn.RefsFrom = new List <Node>(); } tn.RefsFrom.Add(r); if (r.RefsTo == null) { r.RefsTo = new List <Node>(); } r.RefsTo.Add(tn); } } } foreach (Dictionary <string, List <Pullenti.Ner.Referent> > ty in all.Values) { foreach (List <Pullenti.Ner.Referent> li in ty.Values) { if (li.Count < 2) { continue; } if (li.Count > 3000) { continue; } for (int i = 0; i < li.Count; i++) { for (int j = i + 1; j < li.Count; j++) { Node n1 = null; Node n2 = null; if (li[i].CanBeGeneralFor(li[j]) && !li[j].CanBeGeneralFor(li[i])) { n1 = li[i].Tag as Node; n2 = li[j].Tag as Node; } else if (li[j].CanBeGeneralFor(li[i]) && !li[i].CanBeGeneralFor(li[j])) { n1 = li[j].Tag as Node; n2 = li[i].Tag as Node; } if (n1 != null && n2 != null) { if (n1.GenFrom == null) { n1.GenFrom = new List <Node>(); } if (!n1.GenFrom.Contains(n2)) { n1.GenFrom.Add(n2); } if (n2.GenTo == null) { n2.GenTo = new List <Node>(); } if (!n2.GenTo.Contains(n1)) { n2.GenTo.Add(n1); } } } } } } foreach (Node n in allRefs) { if (n.GenTo != null && n.GenTo.Count > 1) { for (int i = n.GenTo.Count - 1; i >= 0; i--) { Node p = n.GenTo[i]; bool del = false; for (int j = 0; j < n.GenTo.Count; j++) { if (j != i && n.GenTo[j].IsInGenParentsOrHigher(p)) { del = true; } } if (del) { p.GenFrom.Remove(n); n.GenTo.RemoveAt(i); } } } } foreach (Node n in allRefs) { if (!n.Deleted && n.GenTo != null && n.GenTo.Count == 1) { Node p = n.GenTo[0]; if (p.GenFrom.Count == 1) { n.Ref.MergeSlots(p.Ref, true); p.Ref.Tag = n.Ref; p.ReplaceValues(n); foreach (Pullenti.Ner.TextAnnotation o in p.Ref.Occurrence) { n.Ref.AddOccurence(o); } p.Deleted = true; } else { n.Ref.GeneralReferent = p.Ref; } } } for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { _correctReferents(t); } foreach (Node n in allRefs) { if (n.Deleted) { n.Ad.RemoveReferent(n.Ref); } n.Ref.Tag = null; } }
public List <Pullenti.Ner.ReferentToken> CreateRefenetsTokensWithRegister(Pullenti.Ner.Core.AnalyzerData ad, string name, bool regist = true) { if (name == "T =") { name = "ТЕМПЕРАТУРА"; } List <Pullenti.Ner.ReferentToken> res = new List <Pullenti.Ner.ReferentToken>(); foreach (UnitToken u in Units) { Pullenti.Ner.ReferentToken rt = new Pullenti.Ner.ReferentToken(u.CreateReferentWithRegister(ad), u.BeginToken, u.EndToken); res.Add(rt); } Pullenti.Ner.Measure.MeasureReferent mr = new Pullenti.Ner.Measure.MeasureReferent(); string templ = "1"; if (SingleVal != null) { mr.AddValue(SingleVal.Value); if (PlusMinus != null) { templ = string.Format("[1 ±2{0}]", (PlusMinusPercent ? "%" : "")); mr.AddValue(PlusMinus.Value); } else if (About) { templ = "~1"; } } else { if (Not && ((FromVal == null || ToVal == null))) { bool b = FromInclude; FromInclude = ToInclude; ToInclude = b; double?v = FromVal; FromVal = ToVal; ToVal = v; } int num = 1; if (FromVal != null) { mr.AddValue(FromVal.Value); templ = (FromInclude ? "[1" : "]1"); num++; } else { templ = "]"; } if (ToVal != null) { mr.AddValue(ToVal.Value); templ = string.Format("{0} .. {1}{2}", templ, num, (ToInclude ? ']' : '[')); } else { templ += " .. ["; } } mr.Template = templ; foreach (Pullenti.Ner.ReferentToken rt in res) { mr.AddSlot(Pullenti.Ner.Measure.MeasureReferent.ATTR_UNIT, rt.Referent, false, 0); } if (name != null) { mr.AddSlot(Pullenti.Ner.Measure.MeasureReferent.ATTR_NAME, name, false, 0); } if (DivNum != null) { List <Pullenti.Ner.ReferentToken> dn = DivNum.CreateRefenetsTokensWithRegister(ad, null, true); res.AddRange(dn); mr.AddSlot(Pullenti.Ner.Measure.MeasureReferent.ATTR_REF, dn[dn.Count - 1].Referent, false, 0); } Pullenti.Ner.Measure.MeasureKind ki = UnitToken.CalcKind(Units); if (ki != Pullenti.Ner.Measure.MeasureKind.Undefined) { mr.Kind = ki; } if (regist && ad != null) { mr = ad.RegisterReferent(mr) as Pullenti.Ner.Measure.MeasureReferent; } res.Add(new Pullenti.Ner.ReferentToken(mr, BeginToken, EndToken)); return(res); }
// Основная функция выделения телефонов public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this); bool hasDenoms = false; foreach (Pullenti.Ner.Analyzer a in kit.Processor.Analyzers) { if ((a is Pullenti.Ner.Denomination.DenominationAnalyzer) && !a.IgnoreThisAnalyzer) { hasDenoms = true; } } if (!hasDenoms) { Pullenti.Ner.Denomination.DenominationAnalyzer a = new Pullenti.Ner.Denomination.DenominationAnalyzer(); a.Process(kit); } List <KeywordReferent> li = new List <KeywordReferent>(); StringBuilder tmp = new StringBuilder(); List <string> tmp2 = new List <string>(); int max = 0; for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { max++; } int cur = 0; for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next, cur++) { Pullenti.Ner.Referent r = t.GetReferent(); if (r != null) { t = this._addReferents(ad, t, cur, max); continue; } if (!(t is Pullenti.Ner.TextToken)) { continue; } if (!t.Chars.IsLetter || (t.LengthChar < 3)) { continue; } string term = (t as Pullenti.Ner.TextToken).Term; if (term == "ЕСТЬ") { if ((t.Previous is Pullenti.Ner.TextToken) && t.Previous.Morph.Class.IsVerb) { } else { continue; } } Pullenti.Ner.Core.NounPhraseToken npt = null; npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.AdjectiveCanBeLast | Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition, 0, null); if (npt == null) { Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary(); if (mc.IsVerb && !mc.IsPreposition) { if ((t as Pullenti.Ner.TextToken).IsVerbBe) { continue; } if (t.IsValue("МОЧЬ", null) || t.IsValue("WOULD", null)) { continue; } KeywordReferent kref = new KeywordReferent() { Typ = KeywordType.Predicate }; string norm = t.GetNormalCaseText(Pullenti.Morph.MorphClass.Verb, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false); if (norm == null) { norm = (t as Pullenti.Ner.TextToken).Lemma; } if (norm.EndsWith("ЬСЯ")) { norm = norm.Substring(0, norm.Length - 2); } kref.AddSlot(KeywordReferent.ATTR_VALUE, norm, false, 0); List <Pullenti.Semantic.Utils.DerivateGroup> drv = Pullenti.Semantic.Utils.DerivateService.FindDerivates(norm, true, t.Morph.Language); _addNormals(kref, drv, norm); kref = ad.RegisterReferent(kref) as KeywordReferent; _setRank(kref, cur, max); Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(ad.RegisterReferent(kref), t, t) { Morph = t.Morph }; kit.EmbedToken(rt1); t = rt1; continue; } continue; } if (npt.InternalNoun != null) { continue; } if (npt.EndToken.IsValue("ЦЕЛОМ", null) || npt.EndToken.IsValue("ЧАСТНОСТИ", null)) { if (npt.Preposition != null) { t = npt.EndToken; continue; } } if (npt.EndToken.IsValue("СТОРОНЫ", null) && npt.Preposition != null && npt.Preposition.Normal == "С") { t = npt.EndToken; continue; } if (npt.BeginToken == npt.EndToken) { Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary(); if (mc.IsPreposition) { continue; } else if (mc.IsAdverb) { if (t.IsValue("ПОТОМ", null)) { continue; } } } else { } li.Clear(); Pullenti.Ner.Token t0 = t; for (Pullenti.Ner.Token tt = t; tt != null && tt.EndChar <= npt.EndChar; tt = tt.Next) { if (!(tt is Pullenti.Ner.TextToken)) { continue; } if (tt.IsValue("NATURAL", null)) { } if ((tt.LengthChar < 3) || !tt.Chars.IsLetter) { continue; } Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary(); if ((mc.IsPreposition || mc.IsPronoun || mc.IsPersonalPronoun) || mc.IsConjunction) { if (tt.IsValue("ОТНОШЕНИЕ", null)) { } else { continue; } } if (mc.IsMisc) { if (Pullenti.Ner.Core.MiscHelper.IsEngArticle(tt)) { continue; } } KeywordReferent kref = new KeywordReferent() { Typ = KeywordType.Object }; string norm = (tt as Pullenti.Ner.TextToken).Lemma; kref.AddSlot(KeywordReferent.ATTR_VALUE, norm, false, 0); if (norm != "ЕСТЬ") { List <Pullenti.Semantic.Utils.DerivateGroup> drv = Pullenti.Semantic.Utils.DerivateService.FindDerivates(norm, true, tt.Morph.Language); _addNormals(kref, drv, norm); } kref = ad.RegisterReferent(kref) as KeywordReferent; _setRank(kref, cur, max); Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(kref, tt, tt) { Morph = tt.Morph }; kit.EmbedToken(rt1); if (tt == t && li.Count == 0) { t0 = rt1; } t = rt1; li.Add(kref); } if (li.Count > 1) { KeywordReferent kref = new KeywordReferent() { Typ = KeywordType.Object }; tmp.Length = 0; tmp2.Clear(); bool hasNorm = false; foreach (KeywordReferent kw in li) { string s = kw.GetStringValue(KeywordReferent.ATTR_VALUE); if (tmp.Length > 0) { tmp.Append(' '); } tmp.Append(s); string n = kw.GetStringValue(KeywordReferent.ATTR_NORMAL); if (n != null) { hasNorm = true; tmp2.Add(n); } else { tmp2.Add(s); } kref.AddSlot(KeywordReferent.ATTR_REF, kw, false, 0); } string val = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false); kref.AddSlot(KeywordReferent.ATTR_VALUE, val, false, 0); tmp.Length = 0; tmp2.Sort(); foreach (string s in tmp2) { if (tmp.Length > 0) { tmp.Append(' '); } tmp.Append(s); } string norm = tmp.ToString(); if (norm != val) { kref.AddSlot(KeywordReferent.ATTR_NORMAL, norm, false, 0); } kref = ad.RegisterReferent(kref) as KeywordReferent; _setRank(kref, cur, max); Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(kref, t0, t) { Morph = npt.Morph }; kit.EmbedToken(rt1); t = rt1; } } cur = 0; for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next, cur++) { KeywordReferent kw = t.GetReferent() as KeywordReferent; if (kw == null || kw.Typ != KeywordType.Object) { continue; } if (t.Next == null || kw.ChildWords > 2) { continue; } Pullenti.Ner.Token t1 = t.Next; if (t1.IsValue("OF", null) && (t1.WhitespacesAfterCount < 3) && t1.Next != null) { t1 = t1.Next; if ((t1 is Pullenti.Ner.TextToken) && Pullenti.Ner.Core.MiscHelper.IsEngArticle(t1) && t1.Next != null) { t1 = t1.Next; } } else if (!t1.Morph.Case.IsGenitive || t.WhitespacesAfterCount > 1) { continue; } KeywordReferent kw2 = t1.GetReferent() as KeywordReferent; if (kw2 == null) { continue; } if (kw == kw2) { continue; } if (kw2.Typ != KeywordType.Object || (kw.ChildWords + kw2.ChildWords) > 3) { continue; } KeywordReferent kwUn = new KeywordReferent(); kwUn.Union(kw, kw2, Pullenti.Ner.Core.MiscHelper.GetTextValue(t1, t1, Pullenti.Ner.Core.GetTextAttr.No)); kwUn = ad.RegisterReferent(kwUn) as KeywordReferent; _setRank(kwUn, cur, max); Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(kwUn, t, t1) { Morph = t.Morph }; kit.EmbedToken(rt1); t = rt1; } if (SortKeywordsByRank) { List <Pullenti.Ner.Referent> all = new List <Pullenti.Ner.Referent>(ad.Referents); all.Sort(new CompByRank()); ad.Referents = all; } if (AnnotationMaxSentences > 0) { KeywordReferent ano = Pullenti.Ner.Keyword.Internal.AutoannoSentToken.CreateAnnotation(kit, AnnotationMaxSentences); if (ano != null) { ad.RegisterReferent(ano); } } }
public static Pullenti.Ner.ReferentToken TryAttachTerritory(List <TerrItemToken> li, Pullenti.Ner.Core.AnalyzerData ad, bool attachAlways = false, List <CityItemToken> cits = null, List <Pullenti.Ner.Geo.GeoReferent> exists = null) { if (li == null || li.Count == 0) { return(null); } TerrItemToken exObj = null; TerrItemToken newName = null; List <TerrItemToken> adjList = new List <TerrItemToken>(); TerrItemToken noun = null; TerrItemToken addNoun = null; Pullenti.Ner.ReferentToken rt = _tryAttachMoscowAO(li, ad); if (rt != null) { return(rt); } if (li[0].TerminItem != null && li[0].TerminItem.CanonicText == "ТЕРРИТОРИЯ") { Pullenti.Ner.ReferentToken res2 = _tryAttachPureTerr(li, ad); return(res2); } if (li.Count == 2) { if (li[0].Rzd != null && li[1].RzdDir != null) { Pullenti.Ner.Geo.GeoReferent rzd = new Pullenti.Ner.Geo.GeoReferent(); rzd.AddName(li[1].RzdDir); rzd.AddTypTer(li[0].Kit.BaseLanguage); rzd.AddSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_REF, li[0].Rzd.Referent, false, 0); rzd.AddExtReferent(li[0].Rzd); return(new Pullenti.Ner.ReferentToken(rzd, li[0].BeginToken, li[1].EndToken)); } if (li[1].Rzd != null && li[0].RzdDir != null) { Pullenti.Ner.Geo.GeoReferent rzd = new Pullenti.Ner.Geo.GeoReferent(); rzd.AddName(li[0].RzdDir); rzd.AddTypTer(li[0].Kit.BaseLanguage); rzd.AddSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_REF, li[1].Rzd.Referent, false, 0); rzd.AddExtReferent(li[1].Rzd); return(new Pullenti.Ner.ReferentToken(rzd, li[0].BeginToken, li[1].EndToken)); } } bool canBeCityBefore = false; bool adjTerrBefore = false; if (cits != null) { if (cits[0].Typ == CityItemToken.ItemType.City) { canBeCityBefore = true; } else if (cits[0].Typ == CityItemToken.ItemType.Noun && cits.Count > 1) { canBeCityBefore = true; } } int k; for (k = 0; k < li.Count; k++) { if (li[k].OntoItem != null) { if (exObj != null || newName != null) { break; } if (noun != null) { if (k == 1) { if (noun.TerminItem.CanonicText == "РАЙОН" || noun.TerminItem.CanonicText == "ОБЛАСТЬ" || noun.TerminItem.CanonicText == "СОЮЗ") { if (li[k].OntoItem.Referent is Pullenti.Ner.Geo.GeoReferent) { if ((li[k].OntoItem.Referent as Pullenti.Ner.Geo.GeoReferent).IsState) { break; } } bool ok = false; Pullenti.Ner.Token tt = li[k].EndToken.Next; if (tt == null) { ok = true; } else if (tt.IsCharOf(",.")) { ok = true; } if (!ok) { ok = MiscLocationHelper.CheckGeoObjectBefore(li[0].BeginToken); } if (!ok) { Pullenti.Ner.Address.Internal.AddressItemToken adr = Pullenti.Ner.Address.Internal.AddressItemToken.TryParse(tt, null, false, false, null); if (adr != null) { if (adr.Typ == Pullenti.Ner.Address.Internal.AddressItemToken.ItemType.Street) { ok = true; } } } if (!ok) { break; } } if (li[k].OntoItem != null) { if (noun.BeginToken.IsValue("МО", null) || noun.BeginToken.IsValue("ЛО", null)) { return(null); } } } } exObj = li[k]; } else if (li[k].TerminItem != null) { if (noun != null) { break; } if (li[k].TerminItem.IsAlwaysPrefix && k > 0) { break; } if (k > 0 && li[k].IsDoubt) { if (li[k].BeginToken == li[k].EndToken && li[k].BeginToken.IsValue("ЗАО", null)) { break; } } if (li[k].TerminItem.IsAdjective || li[k].IsGeoInDictionary) { adjList.Add(li[k]); } else { if (exObj != null) { Pullenti.Ner.Geo.GeoReferent geo = exObj.OntoItem.Referent as Pullenti.Ner.Geo.GeoReferent; if (geo == null) { break; } if (exObj.IsAdjective && ((li[k].TerminItem.CanonicText == "СОЮЗ" || li[k].TerminItem.CanonicText == "ФЕДЕРАЦИЯ"))) { string str = exObj.OntoItem.ToString(); if (!str.Contains(li[k].TerminItem.CanonicText)) { return(null); } } if (li[k].TerminItem.CanonicText == "РАЙОН" || li[k].TerminItem.CanonicText == "ОКРУГ" || li[k].TerminItem.CanonicText == "КРАЙ") { StringBuilder tmp = new StringBuilder(); foreach (Pullenti.Ner.Slot s in geo.Slots) { if (s.TypeName == Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE) { tmp.AppendFormat("{0};", s.Value); } } if (!tmp.ToString().ToUpper().Contains(li[k].TerminItem.CanonicText)) { if (k != 1 || newName != null) { break; } newName = li[0]; newName.IsAdjective = true; newName.OntoItem = null; exObj = null; } } } noun = li[k]; if (k == 0) { TerrItemToken tt = TerrItemToken.TryParse(li[k].BeginToken.Previous, null, true, false, null); if (tt != null && tt.Morph.Class.IsAdjective) { adjTerrBefore = true; } } } } else { if (exObj != null) { break; } if (newName != null) { break; } newName = li[k]; } } string name = null; string altName = null; string fullName = null; Pullenti.Ner.MorphCollection morph = null; if (exObj != null) { if (exObj.IsAdjective && !exObj.Morph.Language.IsEn && noun == null) { if (attachAlways && exObj.EndToken.Next != null) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(exObj.BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (exObj.EndToken.Next.IsCommaAnd) { } else if (npt == null) { } else { Pullenti.Ner.Address.Internal.StreetItemToken str = Pullenti.Ner.Address.Internal.StreetItemToken.TryParse(exObj.EndToken.Next, null, false, null, false); if (str != null) { if (str.Typ == Pullenti.Ner.Address.Internal.StreetItemType.Noun && str.EndToken == npt.EndToken) { return(null); } } } } else { CityItemToken cit = CityItemToken.TryParse(exObj.EndToken.Next, null, false, null); if (cit != null && ((cit.Typ == CityItemToken.ItemType.Noun || cit.Typ == CityItemToken.ItemType.City))) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(exObj.BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && npt.EndToken == cit.EndToken) { } else { return(null); } } else if (exObj.BeginToken.IsValue("ПОДНЕБЕСНЫЙ", null)) { } else { return(null); } } } if (noun == null && exObj.CanBeCity) { CityItemToken cit0 = CityItemToken.TryParseBack(exObj.BeginToken.Previous); if (cit0 != null && cit0.Typ != CityItemToken.ItemType.ProperName) { return(null); } } if (exObj.IsDoubt && noun == null) { bool ok2 = false; if (_canBeGeoAfter(exObj.EndToken.Next)) { ok2 = true; } else if (!exObj.CanBeSurname && !exObj.CanBeCity) { if ((exObj.EndToken.Next != null && exObj.EndToken.Next.IsChar(')') && exObj.BeginToken.Previous != null) && exObj.BeginToken.Previous.IsChar('(')) { ok2 = true; } else if (exObj.Chars.IsLatinLetter && exObj.BeginToken.Previous != null) { if (exObj.BeginToken.Previous.IsValue("IN", null)) { ok2 = true; } else if (exObj.BeginToken.Previous.IsValue("THE", null) && exObj.BeginToken.Previous.Previous != null && exObj.BeginToken.Previous.Previous.IsValue("IN", null)) { ok2 = true; } } } if (!ok2) { CityItemToken cit0 = CityItemToken.TryParseBack(exObj.BeginToken.Previous); if (cit0 != null && cit0.Typ != CityItemToken.ItemType.ProperName) { } else if (MiscLocationHelper.CheckGeoObjectBefore(exObj.BeginToken.Previous)) { } else { return(null); } } } name = exObj.OntoItem.CanonicText; morph = exObj.Morph; } else if (newName != null) { if (noun == null) { return(null); } for (int j = 1; j < k; j++) { if (li[j].IsNewlineBefore && !li[0].IsNewlineBefore) { if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(li[j].BeginToken, false, false)) { } else { return(null); } } } morph = noun.Morph; if (newName.IsAdjective) { if (noun.TerminItem.Acronym == "АО") { if (noun.BeginToken != noun.EndToken) { return(null); } if (newName.Morph.Gender != Pullenti.Morph.MorphGender.Feminie) { return(null); } } Pullenti.Ner.Geo.GeoReferent geoBefore = null; Pullenti.Ner.Token tt0 = li[0].BeginToken.Previous; if (tt0 != null && tt0.IsCommaAnd) { tt0 = tt0.Previous; } if (!li[0].IsNewlineBefore && tt0 != null) { geoBefore = tt0.GetReferent() as Pullenti.Ner.Geo.GeoReferent; } if (li.IndexOf(noun) < li.IndexOf(newName)) { if (noun.TerminItem.IsState) { return(null); } if (newName.CanBeSurname && geoBefore == null) { if (((noun.Morph.Case & newName.Morph.Case)).IsUndefined) { return(null); } } if (Pullenti.Ner.Core.MiscHelper.IsExistsInDictionary(newName.BeginToken, newName.EndToken, Pullenti.Morph.MorphClass.Adjective | Pullenti.Morph.MorphClass.Pronoun | Pullenti.Morph.MorphClass.Verb)) { if (noun.BeginToken != newName.BeginToken) { if (geoBefore == null) { if (li.Count == 2 && _canBeGeoAfter(li[1].EndToken.Next)) { } else if (li.Count == 3 && li[2].TerminItem != null && _canBeGeoAfter(li[2].EndToken.Next)) { } else if (newName.IsGeoInDictionary) { } else if (newName.EndToken.IsNewlineAfter) { } else { return(null); } } } } Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(newName.EndToken, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePronouns, 0, null); if (npt != null && npt.EndToken != newName.EndToken) { if (li.Count >= 3 && li[2].TerminItem != null && npt.EndToken == li[2].EndToken) { addNoun = li[2]; } else { return(null); } } Pullenti.Ner.ReferentToken rtp = newName.Kit.ProcessReferent("PERSON", newName.BeginToken); if (rtp != null) { return(null); } name = Pullenti.Ner.Core.ProperNameHelper.GetNameEx(newName.BeginToken, newName.EndToken, Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphCase.Undefined, noun.TerminItem.Gender, false, false); } else { bool ok = false; if (((k + 1) < li.Count) && li[k].TerminItem == null && li[k + 1].TerminItem != null) { ok = true; } else if ((k < li.Count) && li[k].OntoItem != null) { ok = true; } else if (k == li.Count && !newName.IsAdjInDictionary) { ok = true; } else if (MiscLocationHelper.CheckGeoObjectBefore(li[0].BeginToken) || canBeCityBefore) { ok = true; } else if (MiscLocationHelper.CheckGeoObjectAfter(li[k - 1].EndToken, false)) { ok = true; } else if (li.Count == 3 && k == 2) { CityItemToken cit = CityItemToken.TryParse(li[2].BeginToken, null, false, null); if (cit != null) { if (cit.Typ == CityItemToken.ItemType.City || cit.Typ == CityItemToken.ItemType.Noun) { ok = true; } } } else if (li.Count == 2) { ok = _canBeGeoAfter(li[li.Count - 1].EndToken.Next); } if (!ok && !li[0].IsNewlineBefore && !li[0].Chars.IsAllLower) { Pullenti.Ner.ReferentToken rt00 = li[0].Kit.ProcessReferent("PERSONPROPERTY", li[0].BeginToken.Previous); if (rt00 != null) { ok = true; } } if (noun.TerminItem != null && noun.TerminItem.IsStrong && newName.IsAdjective) { ok = true; } if (noun.IsDoubt && adjList.Count == 0 && geoBefore == null) { return(null); } name = Pullenti.Ner.Core.ProperNameHelper.GetNameEx(newName.BeginToken, newName.EndToken, Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphCase.Undefined, noun.TerminItem.Gender, false, false); if (!ok && !attachAlways) { if (Pullenti.Ner.Core.MiscHelper.IsExistsInDictionary(newName.BeginToken, newName.EndToken, Pullenti.Morph.MorphClass.Adjective | Pullenti.Morph.MorphClass.Pronoun | Pullenti.Morph.MorphClass.Verb)) { if (exists != null) { foreach (Pullenti.Ner.Geo.GeoReferent e in exists) { if (e.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_NAME, name, true) != null) { ok = true; break; } } } if (!ok) { return(null); } } } fullName = string.Format("{0} {1}", Pullenti.Ner.Core.ProperNameHelper.GetNameEx(li[0].BeginToken, noun.BeginToken.Previous, Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphCase.Undefined, noun.TerminItem.Gender, false, false), noun.TerminItem.CanonicText); } } else { if (!attachAlways || ((noun.TerminItem != null && noun.TerminItem.CanonicText == "ФЕДЕРАЦИЯ"))) { bool isLatin = noun.Chars.IsLatinLetter && newName.Chars.IsLatinLetter; if (li.IndexOf(noun) > li.IndexOf(newName)) { if (!isLatin) { return(null); } } if (!newName.IsDistrictName && !Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(newName.BeginToken, false, false)) { if (adjList.Count == 0 && Pullenti.Ner.Core.MiscHelper.IsExistsInDictionary(newName.BeginToken, newName.EndToken, Pullenti.Morph.MorphClass.Noun | Pullenti.Morph.MorphClass.Pronoun)) { if (li.Count == 2 && noun.IsCityRegion && (noun.WhitespacesAfterCount < 2)) { } else { return(null); } } if (!isLatin) { if ((noun.TerminItem.IsRegion && !attachAlways && ((!adjTerrBefore || newName.IsDoubt))) && !noun.IsCityRegion && !noun.TerminItem.IsSpecificPrefix) { if (!MiscLocationHelper.CheckGeoObjectBefore(noun.BeginToken)) { if (!noun.IsDoubt && noun.BeginToken != noun.EndToken) { } else if ((noun.TerminItem.IsAlwaysPrefix && li.Count == 2 && li[0] == noun) && li[1] == newName) { } else { return(null); } } } if (noun.IsDoubt && adjList.Count == 0) { if (noun.TerminItem.Acronym == "МО" || noun.TerminItem.Acronym == "ЛО") { if (k == (li.Count - 1) && li[k].TerminItem != null) { addNoun = li[k]; k++; } else if (li.Count == 2 && noun == li[0] && newName.ToString().EndsWith("совет")) { } else { return(null); } } else { return(null); } } Pullenti.Ner.ReferentToken pers = newName.Kit.ProcessReferent("PERSON", newName.BeginToken); if (pers != null) { return(null); } } } } name = Pullenti.Ner.Core.MiscHelper.GetTextValue(newName.BeginToken, newName.EndToken, Pullenti.Ner.Core.GetTextAttr.No); if (newName.BeginToken != newName.EndToken) { for (Pullenti.Ner.Token ttt = newName.BeginToken.Next; ttt != null && ttt.EndChar <= newName.EndChar; ttt = ttt.Next) { if (ttt.Chars.IsLetter) { TerrItemToken ty = TerrItemToken.TryParse(ttt, null, false, false, null); if ((ty != null && ty.TerminItem != null && noun != null) && ((ty.TerminItem.CanonicText.Contains(noun.TerminItem.CanonicText) || noun.TerminItem.CanonicText.Contains(ty.TerminItem.CanonicText)))) { name = Pullenti.Ner.Core.MiscHelper.GetTextValue(newName.BeginToken, ttt.Previous, Pullenti.Ner.Core.GetTextAttr.No); break; } } } } if (adjList.Count > 0) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(adjList[0].BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && npt.EndToken == noun.EndToken) { altName = string.Format("{0} {1}", npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false), name); } } } } else { if ((li.Count == 1 && noun != null && noun.EndToken.Next != null) && (noun.EndToken.Next.GetReferent() is Pullenti.Ner.Geo.GeoReferent)) { Pullenti.Ner.Geo.GeoReferent g = noun.EndToken.Next.GetReferent() as Pullenti.Ner.Geo.GeoReferent; if (noun.TerminItem != null) { string tyy = noun.TerminItem.CanonicText.ToLower(); bool ooo = false; if (g.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, tyy, true) != null) { ooo = true; } else if (tyy.EndsWith("район") && g.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, "район", true) != null) { ooo = true; } if (ooo) { return new Pullenti.Ner.ReferentToken(g, noun.BeginToken, noun.EndToken.Next) { Morph = noun.BeginToken.Morph } } ; } } if ((li.Count == 1 && noun == li[0] && li[0].TerminItem != null) && TerrItemToken.TryParse(li[0].EndToken.Next, null, true, false, null) == null && TerrItemToken.TryParse(li[0].BeginToken.Previous, null, true, false, null) == null) { if (li[0].Morph.Number == Pullenti.Morph.MorphNumber.Plural) { return(null); } int cou = 0; string str = li[0].TerminItem.CanonicText.ToLower(); for (Pullenti.Ner.Token tt = li[0].BeginToken.Previous; tt != null; tt = tt.Previous) { if (tt.IsNewlineAfter) { cou += 10; } else { cou++; } if (cou > 500) { break; } Pullenti.Ner.Geo.GeoReferent g = tt.GetReferent() as Pullenti.Ner.Geo.GeoReferent; if (g == null) { continue; } bool ok = true; cou = 0; for (tt = li[0].EndToken.Next; tt != null; tt = tt.Next) { if (tt.IsNewlineBefore) { cou += 10; } else { cou++; } if (cou > 500) { break; } TerrItemToken tee = TerrItemToken.TryParse(tt, null, true, false, null); if (tee == null) { continue; } ok = false; break; } if (ok) { for (int ii = 0; g != null && (ii < 3); g = g.Higher, ii++) { if (g.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, str, true) != null) { return new Pullenti.Ner.ReferentToken(g, li[0].BeginToken, li[0].EndToken) { Morph = noun.BeginToken.Morph } } ; } } break; } } return(null); } Pullenti.Ner.Geo.GeoReferent ter = null; if (exObj != null && (exObj.Tag is Pullenti.Ner.Geo.GeoReferent)) { ter = exObj.Tag as Pullenti.Ner.Geo.GeoReferent; } else { ter = new Pullenti.Ner.Geo.GeoReferent(); if (exObj != null) { Pullenti.Ner.Geo.GeoReferent geo = exObj.OntoItem.Referent as Pullenti.Ner.Geo.GeoReferent; if (geo != null && !geo.IsCity) { ter.MergeSlots2(geo, li[0].Kit.BaseLanguage); } else { ter.AddName(name); } if (noun == null && exObj.CanBeCity) { ter.AddTypCity(li[0].Kit.BaseLanguage); } else { } } else if (newName != null) { ter.AddName(name); if (altName != null) { ter.AddName(altName); } } if (noun != null) { if (noun.TerminItem.CanonicText == "АО") { ter.AddTyp((li[0].Kit.BaseLanguage.IsUa ? "АВТОНОМНИЙ ОКРУГ" : "АВТОНОМНЫЙ ОКРУГ")); } else if (noun.TerminItem.CanonicText == "МУНИЦИПАЛЬНОЕ СОБРАНИЕ" || noun.TerminItem.CanonicText == "МУНІЦИПАЛЬНЕ ЗБОРИ") { ter.AddTyp((li[0].Kit.BaseLanguage.IsUa ? "МУНІЦИПАЛЬНЕ УТВОРЕННЯ" : "МУНИЦИПАЛЬНОЕ ОБРАЗОВАНИЕ")); } else if (noun.TerminItem.Acronym == "МО" && addNoun != null) { ter.AddTyp(addNoun.TerminItem.CanonicText); } else { if (noun.TerminItem.CanonicText == "СОЮЗ" && exObj != null && exObj.EndChar > noun.EndChar) { return new Pullenti.Ner.ReferentToken(ter, exObj.BeginToken, exObj.EndToken) { Morph = exObj.Morph } } ; ter.AddTyp(noun.TerminItem.CanonicText); if (noun.TerminItem.IsRegion && ter.IsState) { ter.AddTypReg(li[0].Kit.BaseLanguage); } } } if (ter.IsState && ter.IsRegion) { foreach (TerrItemToken a in adjList) { if (a.TerminItem.IsRegion) { ter.AddTypReg(li[0].Kit.BaseLanguage); break; } } } if (ter.IsState) { if (fullName != null) { ter.AddName(fullName); } } } Pullenti.Ner.ReferentToken res = new Pullenti.Ner.ReferentToken(ter, li[0].BeginToken, li[k - 1].EndToken); if (noun != null && noun.Morph.Class.IsNoun) { res.Morph = noun.Morph; } else { res.Morph = new Pullenti.Ner.MorphCollection(); for (int ii = 0; ii < k; ii++) { foreach (Pullenti.Morph.MorphBaseInfo v in li[ii].Morph.Items) { Pullenti.Morph.MorphBaseInfo bi = new Pullenti.Morph.MorphBaseInfo(); bi.CopyFrom(v); if (noun != null) { if (bi.Class.IsAdjective) { bi.Class = Pullenti.Morph.MorphClass.Noun; } } res.Morph.AddItem(bi); } } } if (li[0].TerminItem != null && li[0].TerminItem.IsSpecificPrefix) { res.BeginToken = li[0].EndToken.Next; } if (addNoun != null && addNoun.EndChar > res.EndChar) { res.EndToken = addNoun.EndToken; } if ((res.BeginToken.Previous is Pullenti.Ner.TextToken) && (res.WhitespacesBeforeCount < 2)) { Pullenti.Ner.TextToken tt = res.BeginToken.Previous as Pullenti.Ner.TextToken; if (tt.Term == "АР") { foreach (string ty in ter.Typs) { if (ty.Contains("республика") || ty.Contains("республіка")) { res.BeginToken = tt; break; } } } } return(res); }
Pullenti.Ner.Token _addReferents(Pullenti.Ner.Core.AnalyzerData ad, Pullenti.Ner.Token t, int cur, int max) { if (!(t is Pullenti.Ner.ReferentToken)) { return(t); } Pullenti.Ner.Referent r = t.GetReferent(); if (r == null) { return(t); } if (r is Pullenti.Ner.Denomination.DenominationReferent) { Pullenti.Ner.Denomination.DenominationReferent dr = r as Pullenti.Ner.Denomination.DenominationReferent; KeywordReferent kref0 = new KeywordReferent() { Typ = KeywordType.Referent }; foreach (Pullenti.Ner.Slot s in dr.Slots) { if (s.TypeName == Pullenti.Ner.Denomination.DenominationReferent.ATTR_VALUE) { kref0.AddSlot(KeywordReferent.ATTR_NORMAL, s.Value, false, 0); } } kref0.AddSlot(KeywordReferent.ATTR_REF, dr, false, 0); Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(ad.RegisterReferent(kref0), t, t); t.Kit.EmbedToken(rt0); return(rt0); } if ((r is Pullenti.Ner.Phone.PhoneReferent) || (r is Pullenti.Ner.Uri.UriReferent) || (r is Pullenti.Ner.Bank.BankDataReferent)) { return(t); } if (r is Pullenti.Ner.Money.MoneyReferent) { Pullenti.Ner.Money.MoneyReferent mr = r as Pullenti.Ner.Money.MoneyReferent; KeywordReferent kref0 = new KeywordReferent() { Typ = KeywordType.Object }; kref0.AddSlot(KeywordReferent.ATTR_NORMAL, mr.Currency, false, 0); Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(ad.RegisterReferent(kref0), t, t); t.Kit.EmbedToken(rt0); return(rt0); } if (r.TypeName == "DATE" || r.TypeName == "DATERANGE" || r.TypeName == "BOOKLINKREF") { return(t); } for (Pullenti.Ner.Token tt = (t as Pullenti.Ner.MetaToken).BeginToken; tt != null && tt.EndChar <= t.EndChar; tt = tt.Next) { if (tt is Pullenti.Ner.ReferentToken) { this._addReferents(ad, tt, cur, max); } } KeywordReferent kref = new KeywordReferent() { Typ = KeywordType.Referent }; string norm = null; if (r.TypeName == "GEO") { norm = r.GetStringValue("ALPHA2"); } if (norm == null) { norm = r.ToString(true, null, 0); } if (norm != null) { kref.AddSlot(KeywordReferent.ATTR_NORMAL, norm.ToUpper(), false, 0); } kref.AddSlot(KeywordReferent.ATTR_REF, t.GetReferent(), false, 0); _setRank(kref, cur, max); Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(ad.RegisterReferent(kref), t, t); t.Kit.EmbedToken(rt1); return(rt1); }