public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this); Pullenti.Ner.Core.TerminCollection models = new Pullenti.Ner.Core.TerminCollection(); Dictionary <string, List <Pullenti.Ner.Referent> > objsByModel = new Dictionary <string, List <Pullenti.Ner.Referent> >(); Pullenti.Ner.Core.TerminCollection objByNames = new Pullenti.Ner.Core.TerminCollection(); for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { List <Pullenti.Ner.Transport.Internal.TransItemToken> its = Pullenti.Ner.Transport.Internal.TransItemToken.TryParseList(t, 10); if (its == null) { continue; } List <Pullenti.Ner.ReferentToken> rts = this.TryAttach(its, false); if (rts != null) { foreach (Pullenti.Ner.ReferentToken rt in rts) { int cou = 0; for (Pullenti.Ner.Token tt = t.Previous; tt != null && (cou < 1000); tt = tt.Previous, cou++) { TransportReferent tr = tt.GetReferent() as TransportReferent; if (tr == null) { continue; } bool ok = true; foreach (Pullenti.Ner.Slot s in rt.Referent.Slots) { if (tr.FindSlot(s.TypeName, s.Value, true) == null) { ok = false; break; } } if (ok) { rt.Referent = tr; break; } } rt.Referent = ad.RegisterReferent(rt.Referent); kit.EmbedToken(rt); t = rt; foreach (Pullenti.Ner.Slot s in rt.Referent.Slots) { if (s.TypeName == TransportReferent.ATTR_MODEL) { string mod = s.Value.ToString(); for (int k = 0; k < 2; k++) { if (!char.IsDigit(mod[0])) { List <Pullenti.Ner.Referent> li; if (!objsByModel.TryGetValue(mod, out li)) { objsByModel.Add(mod, (li = new List <Pullenti.Ner.Referent>())); } if (!li.Contains(rt.Referent)) { li.Add(rt.Referent); } models.AddString(mod, li, null, false); } if (k > 0) { break; } string brand = rt.Referent.GetStringValue(TransportReferent.ATTR_BRAND); if (brand == null) { break; } mod = string.Format("{0} {1}", brand, mod); } } else if (s.TypeName == TransportReferent.ATTR_NAME) { objByNames.Add(new Pullenti.Ner.Core.Termin(s.Value.ToString()) { Tag = rt.Referent }); } } } } } if (objsByModel.Count == 0 && objByNames.Termins.Count == 0) { return; } for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 10); if (br != null) { Pullenti.Ner.Core.TerminToken toks = objByNames.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No); if (toks != null && toks.EndToken.Next == br.EndToken) { Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(toks.Termin.Tag as Pullenti.Ner.Referent, br.BeginToken, br.EndToken); kit.EmbedToken(rt0); t = rt0; continue; } } if (!(t is Pullenti.Ner.TextToken)) { continue; } if (!t.Chars.IsLetter) { continue; } Pullenti.Ner.Core.TerminToken tok = models.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok == null) { if (!t.Chars.IsAllLower) { tok = objByNames.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); } if (tok == null) { continue; } } if (!tok.IsWhitespaceAfter) { if (tok.EndToken.Next == null || !tok.EndToken.Next.IsCharOf(",.)")) { if (!Pullenti.Ner.Core.BracketHelper.IsBracket(tok.EndToken.Next, false)) { continue; } } } Pullenti.Ner.Referent tr = null; List <Pullenti.Ner.Referent> li = tok.Termin.Tag as List <Pullenti.Ner.Referent>; if (li != null && li.Count == 1) { tr = li[0]; } else { tr = tok.Termin.Tag as Pullenti.Ner.Referent; } if (tr != null) { Pullenti.Ner.Transport.Internal.TransItemToken tit = Pullenti.Ner.Transport.Internal.TransItemToken.TryParse(tok.BeginToken.Previous, null, false, true); if (tit != null && tit.Typ == Pullenti.Ner.Transport.Internal.TransItemToken.Typs.Brand) { tr.AddSlot(TransportReferent.ATTR_BRAND, tit.Value, false, 0); tok.BeginToken = tit.BeginToken; } Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(tr, tok.BeginToken, tok.EndToken); kit.EmbedToken(rt0); t = rt0; continue; } } }
static WeaponItemToken _TryParse(Pullenti.Ner.Token t, WeaponItemToken prev, bool afterConj, bool attachHigh = false) { if (t == null) { return(null); } if (Pullenti.Ner.Core.BracketHelper.IsBracket(t, true)) { WeaponItemToken wit = _TryParse(t.Next, prev, afterConj, attachHigh); if (wit != null) { if (wit.EndToken.Next == null) { wit.BeginToken = t; return(wit); } if (Pullenti.Ner.Core.BracketHelper.IsBracket(wit.EndToken.Next, true)) { wit.BeginToken = t; wit.EndToken = wit.EndToken.Next; return(wit); } } } Pullenti.Ner.Core.TerminToken tok = m_Ontology.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { WeaponItemToken res = new WeaponItemToken(t, tok.EndToken); res.Typ = (Typs)tok.Termin.Tag; if (res.Typ == Typs.Noun) { res.Value = tok.Termin.CanonicText; if (tok.Termin.Tag2 != null) { res.IsDoubt = true; } for (Pullenti.Ner.Token tt = res.EndToken.Next; tt != null; tt = tt.Next) { if (tt.WhitespacesBeforeCount > 2) { break; } WeaponItemToken wit = _TryParse(tt, null, false, false); if (wit != null) { if (wit.Typ == Typs.Brand) { res.InnerTokens.Add(wit); res.EndToken = (tt = wit.EndToken); continue; } break; } if (!(tt is Pullenti.Ner.TextToken)) { break; } Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary(); if (mc == Pullenti.Morph.MorphClass.Adjective) { if (res.AltValue == null) { res.AltValue = res.Value; } if (res.AltValue.EndsWith(res.Value)) { res.AltValue = res.AltValue.Substring(0, res.AltValue.Length - res.Value.Length); } res.AltValue = string.Format("{0}{1} {2}", res.AltValue, (tt as Pullenti.Ner.TextToken).Term, res.Value); res.EndToken = tt; continue; } break; } return(res); } if (res.Typ == Typs.Brand || res.Typ == Typs.Name) { res.Value = tok.Termin.CanonicText; return(res); } if (res.Typ == Typs.Model) { res.Value = tok.Termin.CanonicText; if (tok.Termin.Tag2 is List <Pullenti.Ner.Core.Termin> ) { List <Pullenti.Ner.Core.Termin> li = tok.Termin.Tag2 as List <Pullenti.Ner.Core.Termin>; foreach (Pullenti.Ner.Core.Termin to in li) { WeaponItemToken wit = new WeaponItemToken(t, tok.EndToken) { Typ = (Typs)to.Tag, Value = to.CanonicText, IsInternal = tok.BeginToken == tok.EndToken }; res.InnerTokens.Add(wit); if (to.AdditionalVars != null && to.AdditionalVars.Count > 0) { wit.AltValue = to.AdditionalVars[0].CanonicText; } } } res._correctModel(); return(res); } } Pullenti.Ner.Token nnn = Pullenti.Ner.Core.MiscHelper.CheckNumberPrefix(t); if (nnn != null) { Pullenti.Ner.Transport.Internal.TransItemToken tit = Pullenti.Ner.Transport.Internal.TransItemToken._attachNumber(nnn, true); if (tit != null) { WeaponItemToken res = new WeaponItemToken(t, tit.EndToken) { Typ = Typs.Number }; res.Value = tit.Value; res.AltValue = tit.AltValue; return(res); } } if (((t is Pullenti.Ner.TextToken) && t.Chars.IsLetter && t.Chars.IsAllUpper) && (t.LengthChar < 4)) { if ((t.Next != null && ((t.Next.IsHiphen || t.Next.IsChar('.'))) && (t.Next.WhitespacesAfterCount < 2)) && (t.Next.Next is Pullenti.Ner.NumberToken)) { WeaponItemToken res = new WeaponItemToken(t, t.Next) { Typ = Typs.Model, IsDoubt = true }; res.Value = (t as Pullenti.Ner.TextToken).Term; res._correctModel(); return(res); } if ((t.Next is Pullenti.Ner.NumberToken) && !t.IsWhitespaceAfter) { WeaponItemToken res = new WeaponItemToken(t, t) { Typ = Typs.Model, IsDoubt = true }; res.Value = (t as Pullenti.Ner.TextToken).Term; res._correctModel(); return(res); } if ((t as Pullenti.Ner.TextToken).Term == "СП" && (t.WhitespacesAfterCount < 3) && (t.Next is Pullenti.Ner.TextToken)) { WeaponItemToken pp = _TryParse(t.Next, null, false, false); if (pp != null && ((pp.Typ == Typs.Model || pp.Typ == Typs.Brand))) { WeaponItemToken res = new WeaponItemToken(t, t) { Typ = Typs.Noun }; res.Value = "ПИСТОЛЕТ"; res.AltValue = "СЛУЖЕБНЫЙ ПИСТОЛЕТ"; return(res); } } } if (((t is Pullenti.Ner.TextToken) && t.Chars.IsLetter && !t.Chars.IsAllLower) && t.LengthChar > 2) { bool ok = false; if (prev != null && ((prev.Typ == Typs.Noun || prev.Typ == Typs.Model || prev.Typ == Typs.Brand))) { ok = true; } else if (prev == null && t.Previous != null && t.Previous.IsCommaAnd) { ok = true; } if (ok) { WeaponItemToken res = new WeaponItemToken(t, t) { Typ = Typs.Name, IsDoubt = true }; res.Value = (t as Pullenti.Ner.TextToken).Term; if ((t.Next != null && t.Next.IsHiphen && (t.Next.Next is Pullenti.Ner.TextToken)) && t.Next.Next.Chars == t.Chars) { res.Value = string.Format("{0}-{1}", res.Value, (t.Next.Next as Pullenti.Ner.TextToken).Term); res.EndToken = t.Next.Next; } if (prev != null && prev.Typ == Typs.Noun) { res.Typ = Typs.Brand; } if (res.EndToken.Next != null && res.EndToken.Next.IsHiphen && (res.EndToken.Next.Next is Pullenti.Ner.NumberToken)) { res.Typ = Typs.Model; res._correctModel(); } else if (!res.EndToken.IsWhitespaceAfter && (res.EndToken.Next is Pullenti.Ner.NumberToken)) { res.Typ = Typs.Model; res._correctModel(); } return(res); } } if (t.IsValue("МАРКА", null)) { WeaponItemToken res = _TryParse(t.Next, prev, afterConj, false); if (res != null && res.Typ == Typs.Brand) { res.BeginToken = t; return(res); } if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(t.Next, true, false)) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t.Next, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { return new WeaponItemToken(t, br.EndToken) { Typ = Typs.Brand, Value = Pullenti.Ner.Core.MiscHelper.GetTextValue(br.BeginToken, br.EndToken, Pullenti.Ner.Core.GetTextAttr.No) } } ; } if (((t is Pullenti.Ner.TextToken) && (t.Next is Pullenti.Ner.TextToken) && t.Next.LengthChar > 1) && !t.Next.Chars.IsAllLower) { return new WeaponItemToken(t, t.Next) { Typ = Typs.Brand, Value = (t as Pullenti.Ner.TextToken).Term } } ; } if (t.IsValue("КАЛИБР", "КАЛІБР")) { Pullenti.Ner.Token tt1 = t.Next; if (tt1 != null && ((tt1.IsHiphen || tt1.IsChar(':')))) { tt1 = tt1.Next; } Pullenti.Ner.Measure.Internal.NumbersWithUnitToken num = Pullenti.Ner.Measure.Internal.NumbersWithUnitToken.TryParse(tt1, null, false, false, false, false); if (num != null && num.SingleVal != null) { return new WeaponItemToken(t, num.EndToken) { Typ = Typs.Caliber, Value = Pullenti.Ner.Core.NumberHelper.DoubleToString(num.SingleVal.Value) } } ; } if (t is Pullenti.Ner.NumberToken) { Pullenti.Ner.Measure.Internal.NumbersWithUnitToken num = Pullenti.Ner.Measure.Internal.NumbersWithUnitToken.TryParse(t, null, false, false, false, false); if (num != null && num.SingleVal != null) { if (num.Units.Count == 1 && num.Units[0].Unit != null && num.Units[0].Unit.NameCyr == "мм") { return new WeaponItemToken(t, num.EndToken) { Typ = Typs.Caliber, Value = Pullenti.Ner.Core.NumberHelper.DoubleToString(num.SingleVal.Value) } } ; if (num.EndToken.Next != null && num.EndToken.Next.IsValue("КАЛИБР", "КАЛІБР")) { return new WeaponItemToken(t, num.EndToken.Next) { Typ = Typs.Caliber, Value = Pullenti.Ner.Core.NumberHelper.DoubleToString(num.SingleVal.Value) } } ; } } if (t.IsValue("ПРОИЗВОДСТВО", "ВИРОБНИЦТВО")) { Pullenti.Ner.Token tt1 = t.Next; if (tt1 != null && ((tt1.IsHiphen || tt1.IsChar(':')))) { tt1 = tt1.Next; } if (tt1 is Pullenti.Ner.ReferentToken) { if ((tt1.GetReferent() is Pullenti.Ner.Org.OrganizationReferent) || (tt1.GetReferent() is Pullenti.Ner.Geo.GeoReferent)) { return new WeaponItemToken(t, tt1) { Typ = Typs.Developer, Ref = tt1.GetReferent() } } ; } } return(null); } void _correctModel() { Pullenti.Ner.Token tt = EndToken.Next; if (tt == null || tt.WhitespacesBeforeCount > 2) { return; } if (tt.IsValue(":\\/.", null) || tt.IsHiphen) { tt = tt.Next; } if (tt is Pullenti.Ner.NumberToken) { StringBuilder tmp = new StringBuilder(); tmp.Append((tt as Pullenti.Ner.NumberToken).Value); bool isLat = Pullenti.Morph.LanguageHelper.IsLatinChar(Value[0]); EndToken = tt; for (tt = tt.Next; tt != null; tt = tt.Next) { if ((tt is Pullenti.Ner.TextToken) && tt.LengthChar == 1 && tt.Chars.IsLetter) { if (!tt.IsWhitespaceBefore || ((tt.Previous != null && tt.Previous.IsHiphen))) { char ch = (tt as Pullenti.Ner.TextToken).Term[0]; EndToken = tt; char ch2 = (char)0; if (Pullenti.Morph.LanguageHelper.IsLatinChar(ch) && !isLat) { ch2 = Pullenti.Morph.LanguageHelper.GetCyrForLat(ch); if (ch2 != ((char)0)) { ch = ch2; } } else if (Pullenti.Morph.LanguageHelper.IsCyrillicChar(ch) && isLat) { ch2 = Pullenti.Morph.LanguageHelper.GetLatForCyr(ch); if (ch2 != ((char)0)) { ch = ch2; } } tmp.Append(ch); continue; } } break; } Value = string.Format("{0}-{1}", Value, tmp.ToString()); AltValue = Pullenti.Ner.Core.MiscHelper.CreateCyrLatAlternative(Value); } if (!EndToken.IsWhitespaceAfter && EndToken.Next != null && ((EndToken.Next.IsHiphen || EndToken.Next.IsCharOf("\\/")))) { if (!EndToken.Next.IsWhitespaceAfter && (EndToken.Next.Next is Pullenti.Ner.NumberToken)) { EndToken = EndToken.Next.Next; Value = string.Format("{0}-{1}", Value, (EndToken as Pullenti.Ner.NumberToken).Value); if (AltValue != null) { AltValue = string.Format("{0}-{1}", AltValue, (EndToken as Pullenti.Ner.NumberToken).Value); } } } }
List <Pullenti.Ner.ReferentToken> TryAttach(List <Pullenti.Ner.Transport.Internal.TransItemToken> its, bool attach) { TransportReferent tr = new TransportReferent(); int i; Pullenti.Ner.Token t1 = null; bool brandIsDoubt = false; for (i = 0; i < its.Count; i++) { if (its[i].Typ == Pullenti.Ner.Transport.Internal.TransItemToken.Typs.Noun) { if (tr.FindSlot(TransportReferent.ATTR_TYPE, null, true) != null) { break; } if (its[i].Kind != TransportKind.Undefined) { if (tr.Kind != TransportKind.Undefined && its[i].Kind != tr.Kind) { break; } else { tr.Kind = its[i].Kind; } } tr.AddSlot(TransportReferent.ATTR_TYPE, its[i].Value, false, 0); if (its[i].AltValue != null) { tr.AddSlot(TransportReferent.ATTR_TYPE, its[i].AltValue, false, 0); } if (its[i].State != null) { tr.AddGeo(its[i].State); } t1 = its[i].EndToken; continue; } if (its[i].Typ == Pullenti.Ner.Transport.Internal.TransItemToken.Typs.Geo) { if (its[i].State != null) { tr.AddGeo(its[i].State); } else if (its[i].Ref != null) { tr.AddGeo(its[i].Ref); } t1 = its[i].EndToken; continue; } if (its[i].Typ == Pullenti.Ner.Transport.Internal.TransItemToken.Typs.Brand) { if (tr.FindSlot(TransportReferent.ATTR_BRAND, null, true) != null) { if (tr.FindSlot(TransportReferent.ATTR_BRAND, its[i].Value, true) == null) { break; } } if (its[i].Kind != TransportKind.Undefined) { if (tr.Kind != TransportKind.Undefined && its[i].Kind != tr.Kind) { break; } else { tr.Kind = its[i].Kind; } } tr.AddSlot(TransportReferent.ATTR_BRAND, its[i].Value, false, 0); t1 = its[i].EndToken; brandIsDoubt = its[i].IsDoubt; continue; } if (its[i].Typ == Pullenti.Ner.Transport.Internal.TransItemToken.Typs.Model) { if (tr.FindSlot(TransportReferent.ATTR_MODEL, null, true) != null) { break; } tr.AddSlot(TransportReferent.ATTR_MODEL, its[i].Value, false, 0); if (its[i].AltValue != null) { tr.AddSlot(TransportReferent.ATTR_MODEL, its[i].AltValue, false, 0); } t1 = its[i].EndToken; continue; } if (its[i].Typ == Pullenti.Ner.Transport.Internal.TransItemToken.Typs.Class) { if (tr.FindSlot(TransportReferent.ATTR_CLASS, null, true) != null) { break; } tr.AddSlot(TransportReferent.ATTR_CLASS, its[i].Value, false, 0); t1 = its[i].EndToken; continue; } if (its[i].Typ == Pullenti.Ner.Transport.Internal.TransItemToken.Typs.Name) { if (tr.FindSlot(TransportReferent.ATTR_NAME, null, true) != null) { break; } tr.AddSlot(TransportReferent.ATTR_NAME, its[i].Value, false, 0); if (its[i].AltValue != null) { tr.AddSlot(TransportReferent.ATTR_NAME, its[i].AltValue, false, 0); } t1 = its[i].EndToken; continue; } if (its[i].Typ == Pullenti.Ner.Transport.Internal.TransItemToken.Typs.Number) { if (tr.FindSlot(TransportReferent.ATTR_NUMBER, null, true) != null) { break; } if (its[i].Kind != TransportKind.Undefined) { if (tr.Kind != TransportKind.Undefined && its[i].Kind != tr.Kind) { break; } else { tr.Kind = its[i].Kind; } } tr.AddSlot(TransportReferent.ATTR_NUMBER, its[i].Value, false, 0); if (its[i].AltValue != null) { tr.AddSlot(TransportReferent.ATTR_NUMBER_REGION, its[i].AltValue, false, 0); } t1 = its[i].EndToken; continue; } if (its[i].Typ == Pullenti.Ner.Transport.Internal.TransItemToken.Typs.Org) { if (tr.FindSlot(TransportReferent.ATTR_ORG, null, true) != null) { break; } if (!its[i].Morph.Case.IsUndefined && !its[i].Morph.Case.IsGenitive) { break; } tr.AddSlot(TransportReferent.ATTR_ORG, its[i].Ref, true, 0); t1 = its[i].EndToken; continue; } if (its[i].Typ == Pullenti.Ner.Transport.Internal.TransItemToken.Typs.Date) { if (tr.FindSlot(TransportReferent.ATTR_DATE, null, true) != null) { break; } tr.AddSlot(TransportReferent.ATTR_DATE, its[i].Ref, true, 0); t1 = its[i].EndToken; continue; } if (its[i].Typ == Pullenti.Ner.Transport.Internal.TransItemToken.Typs.Route) { if (tr.FindSlot(TransportReferent.ATTR_ROUTEPOINT, null, true) != null) { break; } foreach (object o in its[i].RouteItems) { tr.AddSlot(TransportReferent.ATTR_ROUTEPOINT, o, false, 0); } t1 = its[i].EndToken; continue; } } if (!tr.Check(attach, brandIsDoubt)) { return(null); } List <Pullenti.Ner.ReferentToken> res = new List <Pullenti.Ner.ReferentToken>(); res.Add(new Pullenti.Ner.ReferentToken(tr, its[0].BeginToken, t1)); if ((i < its.Count) && tr.Kind == TransportKind.Ship && its[i - 1].Typ == Pullenti.Ner.Transport.Internal.TransItemToken.Typs.Name) { for (; i < its.Count; i++) { if (its[i].Typ != Pullenti.Ner.Transport.Internal.TransItemToken.Typs.Name || !its[i].IsAfterConjunction) { break; } TransportReferent tr1 = new TransportReferent(); tr1.MergeSlots(tr, true); tr1.AddSlot(TransportReferent.ATTR_NAME, its[i].Value, true, 0); res.Add(new Pullenti.Ner.ReferentToken(tr1, its[i].BeginToken, its[i].EndToken)); } } else if (i == its.Count && its[its.Count - 1].Typ == Pullenti.Ner.Transport.Internal.TransItemToken.Typs.Number) { for (Pullenti.Ner.Token tt = t1.Next; tt != null; tt = tt.Next) { if (!tt.IsCommaAnd) { break; } Pullenti.Ner.Transport.Internal.TransItemToken nn = Pullenti.Ner.Transport.Internal.TransItemToken._attachRusAutoNumber(tt.Next); if (nn == null) { nn = Pullenti.Ner.Transport.Internal.TransItemToken._attachNumber(tt.Next, false); } if (nn == null || nn.Typ != Pullenti.Ner.Transport.Internal.TransItemToken.Typs.Number) { break; } TransportReferent tr1 = new TransportReferent(); foreach (Pullenti.Ner.Slot s in tr.Slots) { if (s.TypeName != TransportReferent.ATTR_NUMBER) { if (s.TypeName == TransportReferent.ATTR_NUMBER_REGION && nn.AltValue != null) { continue; } tr1.AddSlot(s.TypeName, s.Value, false, 0); } } tr1.AddSlot(TransportReferent.ATTR_NUMBER, nn.Value, true, 0); if (nn.AltValue != null) { tr1.AddSlot(TransportReferent.ATTR_NUMBER_REGION, nn.AltValue, true, 0); } res.Add(new Pullenti.Ner.ReferentToken(tr1, nn.BeginToken, nn.EndToken)); tt = nn.EndToken; } } return(res); }