static bool _checkTok(Pullenti.Ner.Core.TerminToken tok) { if (tok.Termin.Acronym == "SA") { Pullenti.Ner.Token tt0 = tok.BeginToken.Previous; if (tt0 != null && tt0.IsChar('.')) { tt0 = tt0.Previous; } if (tt0 is Pullenti.Ner.TextToken) { if ((tt0 as Pullenti.Ner.TextToken).Term == "U") { return(false); } } } else if (tok.BeginToken.IsValue("CO", null) && tok.BeginToken == tok.EndToken) { if (tok.EndToken.Next != null && tok.EndToken.Next.IsHiphen) { return(false); } } if (!tok.IsWhitespaceAfter) { if (tok.EndToken.Next is Pullenti.Ner.NumberToken) { return(false); } } return(true); }
public static Pullenti.Ner.MetaToken TryAttachNordWest(Pullenti.Ner.Token t) { if (!(t is Pullenti.Ner.TextToken)) { return(null); } Pullenti.Ner.Core.TerminToken tok = m_Nords.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok == null) { return(null); } Pullenti.Ner.MetaToken res = new Pullenti.Ner.MetaToken(t, t) { Morph = t.Morph }; Pullenti.Ner.Token t1 = null; if ((t.Next != null && t.Next.IsHiphen && !t.IsWhitespaceAfter) && !t.IsWhitespaceAfter) { t1 = t.Next.Next; } else if (t.Morph.Class.IsAdjective && (t.WhitespacesAfterCount < 2)) { t1 = t.Next; } if (t1 != null) { if ((((tok = m_Nords.TryParse(t1, Pullenti.Ner.Core.TerminParseAttr.No)))) != null) { res.EndToken = tok.EndToken; res.Morph = tok.Morph; } } return(res); }
public static Pullenti.Ner.Core.NumberExToken TryAttachPostfixOnly(Pullenti.Ner.Token t) { if (t == null) { return(null); } Pullenti.Ner.Core.TerminToken tok = m_Postfixes.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); Pullenti.Ner.Core.NumberExToken res = null; if (tok != null) { res = new Pullenti.Ner.Core.NumberExToken(t, tok.EndToken, "", Pullenti.Ner.NumberSpellingType.Digit, (Pullenti.Ner.Core.NumberExType)tok.Termin.Tag) { Tag = tok.Termin } } ; else { res = _attachSpecPostfix(t); } if (res != null) { _correctExtTypes(res); } return(res); }
public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this); for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { Pullenti.Ner.ReferentToken rt = null; if (t.Chars.IsLetter) { Pullenti.Ner.Core.TerminToken tok = m_Ontology.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { Pullenti.Ner.Token tt = tok.EndToken.Next; if (tt != null && tt.IsChar(':')) { tt = tt.Next; } rt = this.TryAttach(tt, true); if (rt != null) { rt.BeginToken = t; } } } if (rt == null && (((t is Pullenti.Ner.ReferentToken) || t.IsNewlineBefore))) { rt = this.TryAttach(t, false); } if (rt != null) { rt.Referent = ad.RegisterReferent(rt.Referent); kit.EmbedToken(rt); t = rt; } } }
public static DelimToken TryParse(Pullenti.Ner.Token t) { if (!(t is Pullenti.Ner.TextToken)) { return(null); } if (t.IsCommaAnd) { DelimToken res0 = TryParse(t.Next); if (res0 != null) { res0.BeginToken = t; return(res0); } return(null); } Pullenti.Ner.Core.TerminToken tok = m_Onto.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { DelimToken res = new DelimToken(t, tok.EndToken); res.Typ = (DelimType)tok.Termin.Tag; res.Doublt = tok.Termin.Tag2 != null; DelimToken res2 = TryParse(res.EndToken.Next); if (res2 != null) { if (res2.Typ == res.Typ) { res.EndToken = res2.EndToken; res.Doublt = false; } } if (t.Morph.Class.IsPronoun) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.ParseAdverbs, 0, null); if (npt != null && npt.EndChar > res.EndChar) { return(null); } } return(res); } return(null); }
public static OrgItemEngItem TryAttach(Pullenti.Ner.Token t, bool canBeCyr = false) { if (t == null || !(t is Pullenti.Ner.TextToken)) { return(null); } Pullenti.Ner.Core.TerminToken tok = (canBeCyr ? m_Ontology.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No) : null); if (!t.Chars.IsLatinLetter && tok == null) { if (!t.IsAnd || t.Next == null) { return(null); } if (t.Next.IsValue("COMPANY", null) || t.Next.IsValue("CO", null)) { OrgItemEngItem res = new OrgItemEngItem(t, t.Next); res.FullValue = "company"; if (res.EndToken.Next != null && res.EndToken.Next.IsChar('.')) { res.EndToken = res.EndToken.Next; } return(res); } return(null); } if (t.Chars.IsLatinLetter) { tok = m_Ontology.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); } if (tok != null) { if (!_checkTok(tok)) { return(null); } OrgItemEngItem res = new OrgItemEngItem(tok.BeginToken, tok.EndToken); res.FullValue = tok.Termin.CanonicText.ToLower(); res.ShortValue = tok.Termin.Acronym; return(res); } return(null); }
static void _correctExtTypes(Pullenti.Ner.Core.NumberExToken ex) { Pullenti.Ner.Token t = ex.EndToken.Next; if (t == null) { return; } Pullenti.Ner.Core.NumberExType ty = ex.ExTyp; Pullenti.Ner.Token tt = _corrExTyp2(t, ref ty); if (tt != null) { ex.ExTyp = ty; ex.EndToken = tt; t = tt.Next; } if (t == null || t.Next == null) { return; } if (t.IsCharOf("/\\") || t.IsValue("НА", null)) { } else { return; } Pullenti.Ner.Core.TerminToken tok = m_Postfixes.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null && (((Pullenti.Ner.Core.NumberExType)tok.Termin.Tag) != Pullenti.Ner.Core.NumberExType.Money)) { ex.ExTyp2 = (Pullenti.Ner.Core.NumberExType)tok.Termin.Tag; ex.EndToken = tok.EndToken; ty = ex.ExTyp2; tt = _corrExTyp2(ex.EndToken.Next, ref ty); if (tt != null) { ex.ExTyp2 = ty; ex.EndToken = tt; t = tt.Next; } } }
public static Pullenti.Ner.ReferentToken TryAttachStateUSATerritory(Pullenti.Ner.Token t) { if (t == null || !t.Chars.IsLatinLetter) { return(null); } Pullenti.Ner.Core.TerminToken tok = TerrItemToken.m_GeoAbbrs.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok == null) { return(null); } Pullenti.Ner.Geo.GeoReferent g = tok.Termin.Tag as Pullenti.Ner.Geo.GeoReferent; if (g == null) { return(null); } if (tok.EndToken.Next != null && tok.EndToken.Next.IsChar('.')) { tok.EndToken = tok.EndToken.Next; } Pullenti.Ner.Referent gg = g.Clone(); gg.Occurrence.Clear(); return(new Pullenti.Ner.ReferentToken(gg, tok.BeginToken, tok.EndToken)); }
static WeaponItemToken _TryParse(Pullenti.Ner.Token t, WeaponItemToken prev, bool afterConj, bool attachHigh = false) { if (t == null) { return(null); } if (Pullenti.Ner.Core.BracketHelper.IsBracket(t, true)) { WeaponItemToken wit = _TryParse(t.Next, prev, afterConj, attachHigh); if (wit != null) { if (wit.EndToken.Next == null) { wit.BeginToken = t; return(wit); } if (Pullenti.Ner.Core.BracketHelper.IsBracket(wit.EndToken.Next, true)) { wit.BeginToken = t; wit.EndToken = wit.EndToken.Next; return(wit); } } } Pullenti.Ner.Core.TerminToken tok = m_Ontology.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { WeaponItemToken res = new WeaponItemToken(t, tok.EndToken); res.Typ = (Typs)tok.Termin.Tag; if (res.Typ == Typs.Noun) { res.Value = tok.Termin.CanonicText; if (tok.Termin.Tag2 != null) { res.IsDoubt = true; } for (Pullenti.Ner.Token tt = res.EndToken.Next; tt != null; tt = tt.Next) { if (tt.WhitespacesBeforeCount > 2) { break; } WeaponItemToken wit = _TryParse(tt, null, false, false); if (wit != null) { if (wit.Typ == Typs.Brand) { res.InnerTokens.Add(wit); res.EndToken = (tt = wit.EndToken); continue; } break; } if (!(tt is Pullenti.Ner.TextToken)) { break; } Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary(); if (mc == Pullenti.Morph.MorphClass.Adjective) { if (res.AltValue == null) { res.AltValue = res.Value; } if (res.AltValue.EndsWith(res.Value)) { res.AltValue = res.AltValue.Substring(0, res.AltValue.Length - res.Value.Length); } res.AltValue = string.Format("{0}{1} {2}", res.AltValue, (tt as Pullenti.Ner.TextToken).Term, res.Value); res.EndToken = tt; continue; } break; } return(res); } if (res.Typ == Typs.Brand || res.Typ == Typs.Name) { res.Value = tok.Termin.CanonicText; return(res); } if (res.Typ == Typs.Model) { res.Value = tok.Termin.CanonicText; if (tok.Termin.Tag2 is List <Pullenti.Ner.Core.Termin> ) { List <Pullenti.Ner.Core.Termin> li = tok.Termin.Tag2 as List <Pullenti.Ner.Core.Termin>; foreach (Pullenti.Ner.Core.Termin to in li) { WeaponItemToken wit = new WeaponItemToken(t, tok.EndToken) { Typ = (Typs)to.Tag, Value = to.CanonicText, IsInternal = tok.BeginToken == tok.EndToken }; res.InnerTokens.Add(wit); if (to.AdditionalVars != null && to.AdditionalVars.Count > 0) { wit.AltValue = to.AdditionalVars[0].CanonicText; } } } res._correctModel(); return(res); } } Pullenti.Ner.Token nnn = Pullenti.Ner.Core.MiscHelper.CheckNumberPrefix(t); if (nnn != null) { Pullenti.Ner.Transport.Internal.TransItemToken tit = Pullenti.Ner.Transport.Internal.TransItemToken._attachNumber(nnn, true); if (tit != null) { WeaponItemToken res = new WeaponItemToken(t, tit.EndToken) { Typ = Typs.Number }; res.Value = tit.Value; res.AltValue = tit.AltValue; return(res); } } if (((t is Pullenti.Ner.TextToken) && t.Chars.IsLetter && t.Chars.IsAllUpper) && (t.LengthChar < 4)) { if ((t.Next != null && ((t.Next.IsHiphen || t.Next.IsChar('.'))) && (t.Next.WhitespacesAfterCount < 2)) && (t.Next.Next is Pullenti.Ner.NumberToken)) { WeaponItemToken res = new WeaponItemToken(t, t.Next) { Typ = Typs.Model, IsDoubt = true }; res.Value = (t as Pullenti.Ner.TextToken).Term; res._correctModel(); return(res); } if ((t.Next is Pullenti.Ner.NumberToken) && !t.IsWhitespaceAfter) { WeaponItemToken res = new WeaponItemToken(t, t) { Typ = Typs.Model, IsDoubt = true }; res.Value = (t as Pullenti.Ner.TextToken).Term; res._correctModel(); return(res); } if ((t as Pullenti.Ner.TextToken).Term == "СП" && (t.WhitespacesAfterCount < 3) && (t.Next is Pullenti.Ner.TextToken)) { WeaponItemToken pp = _TryParse(t.Next, null, false, false); if (pp != null && ((pp.Typ == Typs.Model || pp.Typ == Typs.Brand))) { WeaponItemToken res = new WeaponItemToken(t, t) { Typ = Typs.Noun }; res.Value = "ПИСТОЛЕТ"; res.AltValue = "СЛУЖЕБНЫЙ ПИСТОЛЕТ"; return(res); } } } if (((t is Pullenti.Ner.TextToken) && t.Chars.IsLetter && !t.Chars.IsAllLower) && t.LengthChar > 2) { bool ok = false; if (prev != null && ((prev.Typ == Typs.Noun || prev.Typ == Typs.Model || prev.Typ == Typs.Brand))) { ok = true; } else if (prev == null && t.Previous != null && t.Previous.IsCommaAnd) { ok = true; } if (ok) { WeaponItemToken res = new WeaponItemToken(t, t) { Typ = Typs.Name, IsDoubt = true }; res.Value = (t as Pullenti.Ner.TextToken).Term; if ((t.Next != null && t.Next.IsHiphen && (t.Next.Next is Pullenti.Ner.TextToken)) && t.Next.Next.Chars == t.Chars) { res.Value = string.Format("{0}-{1}", res.Value, (t.Next.Next as Pullenti.Ner.TextToken).Term); res.EndToken = t.Next.Next; } if (prev != null && prev.Typ == Typs.Noun) { res.Typ = Typs.Brand; } if (res.EndToken.Next != null && res.EndToken.Next.IsHiphen && (res.EndToken.Next.Next is Pullenti.Ner.NumberToken)) { res.Typ = Typs.Model; res._correctModel(); } else if (!res.EndToken.IsWhitespaceAfter && (res.EndToken.Next is Pullenti.Ner.NumberToken)) { res.Typ = Typs.Model; res._correctModel(); } return(res); } } if (t.IsValue("МАРКА", null)) { WeaponItemToken res = _TryParse(t.Next, prev, afterConj, false); if (res != null && res.Typ == Typs.Brand) { res.BeginToken = t; return(res); } if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(t.Next, true, false)) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t.Next, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { return new WeaponItemToken(t, br.EndToken) { Typ = Typs.Brand, Value = Pullenti.Ner.Core.MiscHelper.GetTextValue(br.BeginToken, br.EndToken, Pullenti.Ner.Core.GetTextAttr.No) } } ; } if (((t is Pullenti.Ner.TextToken) && (t.Next is Pullenti.Ner.TextToken) && t.Next.LengthChar > 1) && !t.Next.Chars.IsAllLower) { return new WeaponItemToken(t, t.Next) { Typ = Typs.Brand, Value = (t as Pullenti.Ner.TextToken).Term } } ; } if (t.IsValue("КАЛИБР", "КАЛІБР")) { Pullenti.Ner.Token tt1 = t.Next; if (tt1 != null && ((tt1.IsHiphen || tt1.IsChar(':')))) { tt1 = tt1.Next; } Pullenti.Ner.Measure.Internal.NumbersWithUnitToken num = Pullenti.Ner.Measure.Internal.NumbersWithUnitToken.TryParse(tt1, null, false, false, false, false); if (num != null && num.SingleVal != null) { return new WeaponItemToken(t, num.EndToken) { Typ = Typs.Caliber, Value = Pullenti.Ner.Core.NumberHelper.DoubleToString(num.SingleVal.Value) } } ; } if (t is Pullenti.Ner.NumberToken) { Pullenti.Ner.Measure.Internal.NumbersWithUnitToken num = Pullenti.Ner.Measure.Internal.NumbersWithUnitToken.TryParse(t, null, false, false, false, false); if (num != null && num.SingleVal != null) { if (num.Units.Count == 1 && num.Units[0].Unit != null && num.Units[0].Unit.NameCyr == "мм") { return new WeaponItemToken(t, num.EndToken) { Typ = Typs.Caliber, Value = Pullenti.Ner.Core.NumberHelper.DoubleToString(num.SingleVal.Value) } } ; if (num.EndToken.Next != null && num.EndToken.Next.IsValue("КАЛИБР", "КАЛІБР")) { return new WeaponItemToken(t, num.EndToken.Next) { Typ = Typs.Caliber, Value = Pullenti.Ner.Core.NumberHelper.DoubleToString(num.SingleVal.Value) } } ; } } if (t.IsValue("ПРОИЗВОДСТВО", "ВИРОБНИЦТВО")) { Pullenti.Ner.Token tt1 = t.Next; if (tt1 != null && ((tt1.IsHiphen || tt1.IsChar(':')))) { tt1 = tt1.Next; } if (tt1 is Pullenti.Ner.ReferentToken) { if ((tt1.GetReferent() is Pullenti.Ner.Org.OrganizationReferent) || (tt1.GetReferent() is Pullenti.Ner.Geo.GeoReferent)) { return new WeaponItemToken(t, tt1) { Typ = Typs.Developer, Ref = tt1.GetReferent() } } ; } } return(null); } void _correctModel() { Pullenti.Ner.Token tt = EndToken.Next; if (tt == null || tt.WhitespacesBeforeCount > 2) { return; } if (tt.IsValue(":\\/.", null) || tt.IsHiphen) { tt = tt.Next; } if (tt is Pullenti.Ner.NumberToken) { StringBuilder tmp = new StringBuilder(); tmp.Append((tt as Pullenti.Ner.NumberToken).Value); bool isLat = Pullenti.Morph.LanguageHelper.IsLatinChar(Value[0]); EndToken = tt; for (tt = tt.Next; tt != null; tt = tt.Next) { if ((tt is Pullenti.Ner.TextToken) && tt.LengthChar == 1 && tt.Chars.IsLetter) { if (!tt.IsWhitespaceBefore || ((tt.Previous != null && tt.Previous.IsHiphen))) { char ch = (tt as Pullenti.Ner.TextToken).Term[0]; EndToken = tt; char ch2 = (char)0; if (Pullenti.Morph.LanguageHelper.IsLatinChar(ch) && !isLat) { ch2 = Pullenti.Morph.LanguageHelper.GetCyrForLat(ch); if (ch2 != ((char)0)) { ch = ch2; } } else if (Pullenti.Morph.LanguageHelper.IsCyrillicChar(ch) && isLat) { ch2 = Pullenti.Morph.LanguageHelper.GetLatForCyr(ch); if (ch2 != ((char)0)) { ch = ch2; } } tmp.Append(ch); continue; } } break; } Value = string.Format("{0}-{1}", Value, tmp.ToString()); AltValue = Pullenti.Ner.Core.MiscHelper.CreateCyrLatAlternative(Value); } if (!EndToken.IsWhitespaceAfter && EndToken.Next != null && ((EndToken.Next.IsHiphen || EndToken.Next.IsCharOf("\\/")))) { if (!EndToken.Next.IsWhitespaceAfter && (EndToken.Next.Next is Pullenti.Ner.NumberToken)) { EndToken = EndToken.Next.Next; Value = string.Format("{0}-{1}", Value, (EndToken as Pullenti.Ner.NumberToken).Value); if (AltValue != null) { AltValue = string.Format("{0}-{1}", AltValue, (EndToken as Pullenti.Ner.NumberToken).Value); } } } }
public static MailLine Parse(Pullenti.Ner.Token t0, int lev, int maxCount = 0) { if (t0 == null) { return(null); } MailLine res = new MailLine(t0, t0); bool pr = true; int cou = 0; for (Pullenti.Ner.Token t = t0; t != null; t = t.Next, cou++) { if (t.IsNewlineBefore && t0 != t) { break; } if (maxCount > 0 && cou > maxCount) { break; } res.EndToken = t; if (t.IsTableControlChar || t.IsHiphen) { continue; } if (pr) { if ((t is Pullenti.Ner.TextToken) && t.IsCharOf(">|")) { res.Lev++; } else { pr = false; Pullenti.Ner.Core.TerminToken tok = m_FromWords.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null && tok.EndToken.Next != null && tok.EndToken.Next.IsChar(':')) { res.Typ = Types.From; t = tok.EndToken.Next; continue; } } } if (t is Pullenti.Ner.ReferentToken) { Pullenti.Ner.Referent r = t.GetReferent(); if (r != null) { if ((((r is Pullenti.Ner.Person.PersonReferent) || (r is Pullenti.Ner.Geo.GeoReferent) || (r is Pullenti.Ner.Address.AddressReferent)) || r.TypeName == "PHONE" || r.TypeName == "URI") || (r is Pullenti.Ner.Person.PersonPropertyReferent) || r.TypeName == "ORGANIZATION") { res.Refs.Add(r); } } } } if (res.Typ == Types.Undefined) { Pullenti.Ner.Token t = t0; for (; t != null && (t.EndChar < res.EndChar); t = t.Next) { if (!t.IsHiphen && t.Chars.IsLetter) { break; } } int ok = 0; int nams = 0; int oth = 0; Pullenti.Ner.Token lastComma = null; for (; t != null && (t.EndChar < res.EndChar); t = t.Next) { if (t.GetReferent() is Pullenti.Ner.Person.PersonReferent) { nams++; continue; } if (t is Pullenti.Ner.TextToken) { if (!t.Chars.IsLetter) { lastComma = t; continue; } Pullenti.Ner.Core.TerminToken tok = m_HelloWords.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { ok++; t = tok.EndToken; continue; } if (t.IsValue("ВСЕ", null) || t.IsValue("ALL", null) || t.IsValue("TEAM", null)) { nams++; continue; } Pullenti.Ner.Person.Internal.PersonItemToken pit = Pullenti.Ner.Person.Internal.PersonItemToken.TryAttach(t, null, Pullenti.Ner.Person.Internal.PersonItemToken.ParseAttr.No, null); if (pit != null) { nams++; t = pit.EndToken; continue; } } if ((++oth) > 3) { if (ok > 0 && lastComma != null) { res.EndToken = lastComma; oth = 0; } break; } } if ((oth < 3) && ok > 0) { res.Typ = Types.Hello; } } if (res.Typ == Types.Undefined) { int okWords = 0; if (t0.IsValue("HAVE", null)) { } for (Pullenti.Ner.Token t = t0; t != null && t.EndChar <= res.EndChar; t = t.Next) { if (!(t is Pullenti.Ner.TextToken)) { continue; } if (t.IsChar('<')) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { t = br.EndToken; continue; } } if (!t.IsLetters || t.IsTableControlChar) { continue; } Pullenti.Ner.Core.TerminToken tok = m_RegardWords.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { okWords++; for (; t != null && t.EndChar <= tok.EndChar; t = t.Next) { t.Tag = tok.Termin; } t = tok.EndToken; if ((t.Next is Pullenti.Ner.TextToken) && t.Next.Morph.Case.IsGenitive) { for (t = t.Next; t.EndChar <= res.EndChar; t = t.Next) { if (t.Morph.Class.IsConjunction) { continue; } Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt1 == null) { break; } if (!npt1.Morph.Case.IsGenitive) { break; } for (; t.EndChar < npt1.EndChar; t = t.Next) { t.Tag = t; } t.Tag = t; } } continue; } if ((t.Morph.Class.IsPreposition || t.Morph.Class.IsConjunction || t.Morph.Class.IsMisc) || t.IsValue("C", null)) { continue; } if ((okWords > 0 && t.Previous != null && t.Previous.IsComma) && t.Previous.BeginChar > t0.BeginChar && !t.Chars.IsAllLower) { res.EndToken = t.Previous; break; } Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt == null) { if ((res.EndChar - t.EndChar) > 10) { okWords = 0; } break; } tok = m_RegardWords.TryParse(npt.EndToken, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null && (npt.EndToken is Pullenti.Ner.TextToken)) { string term = (npt.EndToken as Pullenti.Ner.TextToken).Term; if (term == "ДЕЛ") { tok = null; } } if (tok == null) { if (npt.Noun.IsValue("НАДЕЖДА", null)) { t.Tag = t; } else if (okWords > 0 && t.IsValue("NICE", null) && ((res.EndChar - npt.EndChar) < 13)) { t.Tag = t; } else { okWords = 0; } break; } okWords++; for (; t != null && t.EndChar <= tok.EndChar; t = t.Next) { t.Tag = tok.Termin; } t = tok.EndToken; } if (okWords > 0) { res.Typ = Types.BestRegards; } } if (res.Typ == Types.Undefined) { Pullenti.Ner.Token t = t0; for (; t != null && (t.EndChar < res.EndChar); t = t.Next) { if (!(t is Pullenti.Ner.TextToken)) { break; } else if (!t.IsHiphen && t.Chars.IsLetter) { break; } } if (t != null) { if (t != t0) { } if (((t.IsValue("ПЕРЕСЫЛАЕМОЕ", null) || t.IsValue("ПЕРЕАДРЕСОВАННОЕ", null))) && t.Next != null && t.Next.IsValue("СООБЩЕНИЕ", null)) { res.Typ = Types.From; res.MustBeFirstLine = true; } else if ((t.IsValue("НАЧАЛО", null) && t.Next != null && ((t.Next.IsValue("ПЕРЕСЫЛАЕМОЕ", null) || t.Next.IsValue("ПЕРЕАДРЕСОВАННОЕ", null)))) && t.Next.Next != null && t.Next.Next.IsValue("СООБЩЕНИЕ", null)) { res.Typ = Types.From; res.MustBeFirstLine = true; } else if (t.IsValue("ORIGINAL", null) && t.Next != null && ((t.Next.IsValue("MESSAGE", null) || t.Next.IsValue("APPOINTMENT", null)))) { res.Typ = Types.From; res.MustBeFirstLine = true; } else if (t.IsValue("ПЕРЕСЛАНО", null) && t.Next != null && t.Next.IsValue("ПОЛЬЗОВАТЕЛЕМ", null)) { res.Typ = Types.From; res.MustBeFirstLine = true; } else if (((t.GetReferent() != null && t.GetReferent().TypeName == "DATE")) || ((t.IsValue("IL", null) && t.Next != null && t.Next.IsValue("GIORNO", null))) || ((t.IsValue("ON", null) && (t.Next is Pullenti.Ner.ReferentToken) && t.Next.GetReferent().TypeName == "DATE"))) { bool hasFrom = false; bool hasDate = t.GetReferent() != null && t.GetReferent().TypeName == "DATE"; if (t.IsNewlineAfter && (lev < 5)) { MailLine res1 = Parse(t.Next, lev + 1, 0); if (res1 != null && res1.Typ == Types.Hello) { res.Typ = Types.From; } } MailLine next = Parse(res.EndToken.Next, lev + 1, 0); if (next != null) { if (next.Typ != Types.Undefined) { next = null; } } int tmax = res.EndChar; if (next != null) { tmax = next.EndChar; } Pullenti.Ner.Core.BracketSequenceToken br1 = null; for (; t != null && t.EndChar <= tmax; t = t.Next) { if (t.IsValue("ОТ", null) || t.IsValue("FROM", null)) { hasFrom = true; } else if (t.GetReferent() != null && ((t.GetReferent().TypeName == "URI" || (t.GetReferent() is Pullenti.Ner.Person.PersonReferent)))) { if (t.GetReferent().TypeName == "URI" && hasDate) { if (br1 != null) { hasFrom = true; next = null; } if (t.Previous.IsChar('<') && t.Next != null && t.Next.IsChar('>')) { t = t.Next; if (t.Next != null && t.Next.IsChar(':')) { t = t.Next; } if (t.IsNewlineAfter) { hasFrom = true; next = null; } } } for (t = t.Next; t != null && t.EndChar <= res.EndChar; t = t.Next) { if (t.IsValue("HA", null) && t.Next != null && t.Next.IsValue("SCRITTO", null)) { hasFrom = true; break; } else if (((t.IsValue("НАПИСАТЬ", null) || t.IsValue("WROTE", null))) && ((res.EndChar - t.EndChar) < 10)) { hasFrom = true; break; } } if (hasFrom) { res.Typ = Types.From; if (next != null && t.EndChar >= next.BeginChar) { res.EndToken = next.EndToken; } } break; } else if (br1 == null && !t.IsChar('<') && Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(t, true, false)) { br1 = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br1 != null) { t = br1.EndToken; } } } } else { bool hasUri = false; for (; t != null && (t.EndChar < res.EndChar); t = t.Next) { if (t.GetReferent() != null && ((t.GetReferent().TypeName == "URI" || (t.GetReferent() is Pullenti.Ner.Person.PersonReferent)))) { hasUri = true; } else if (t.IsValue("ПИСАТЬ", null) && hasUri) { if (t.Next != null && t.Next.IsChar('(')) { if (hasUri) { res.Typ = Types.From; } break; } } } } } } return(res); }
public static NamedItemToken TryParse(Pullenti.Ner.Token t, Pullenti.Ner.Core.IntOntologyCollection locOnto) { if (t == null) { return(null); } if (t is Pullenti.Ner.ReferentToken) { Pullenti.Ner.Referent r = t.GetReferent(); if ((r.TypeName == "PERSON" || r.TypeName == "PERSONPROPERTY" || (r is Pullenti.Ner.Geo.GeoReferent)) || r.TypeName == "ORGANIZATION") { return new NamedItemToken(t, t) { Ref = r, Morph = t.Morph } } ; return(null); } Pullenti.Ner.Core.TerminToken typ = m_Types.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); Pullenti.Ner.Core.TerminToken nam = m_Names.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (typ != null) { if (!(t is Pullenti.Ner.TextToken)) { return(null); } NamedItemToken res = new NamedItemToken(typ.BeginToken, typ.EndToken) { Morph = typ.Morph, Chars = typ.Chars }; res.Kind = (Pullenti.Ner.Named.NamedEntityKind)typ.Termin.Tag; res.TypeValue = typ.Termin.CanonicText; if ((nam != null && nam.EndToken == typ.EndToken && !t.Chars.IsAllLower) && ((Pullenti.Ner.Named.NamedEntityKind)nam.Termin.Tag) == res.Kind) { res.NameValue = nam.Termin.CanonicText; res.IsWellknown = true; } return(res); } if (nam != null) { if (nam.BeginToken.Chars.IsAllLower) { return(null); } NamedItemToken res = new NamedItemToken(nam.BeginToken, nam.EndToken) { Morph = nam.Morph, Chars = nam.Chars }; res.Kind = (Pullenti.Ner.Named.NamedEntityKind)nam.Termin.Tag; res.NameValue = nam.Termin.CanonicText; bool ok = true; if (!t.IsWhitespaceBefore && t.Previous != null) { ok = false; } else if (!t.IsWhitespaceAfter && t.Next != null) { if (t.Next.IsCharOf(",.;!?") && t.Next.IsWhitespaceAfter) { } else { ok = false; } } if (ok) { res.IsWellknown = true; res.TypeValue = nam.Termin.Tag2 as string; } return(res); } Pullenti.Ner.MetaToken adj = Pullenti.Ner.Geo.Internal.MiscLocationHelper.TryAttachNordWest(t); if (adj != null) { if (adj.Morph.Class.IsNoun) { if (adj.EndToken.IsValue("ВОСТОК", null)) { if (adj.BeginToken == adj.EndToken) { return(null); } NamedItemToken re = new NamedItemToken(t, adj.EndToken) { Morph = adj.Morph }; re.Kind = Pullenti.Ner.Named.NamedEntityKind.Location; re.NameValue = Pullenti.Ner.Core.MiscHelper.GetTextValue(t, adj.EndToken, Pullenti.Ner.Core.GetTextAttr.FirstNounGroupToNominative); re.IsWellknown = true; return(re); } return(null); } if (adj.WhitespacesAfterCount > 2) { return(null); } if ((adj.EndToken.Next is Pullenti.Ner.ReferentToken) && (adj.EndToken.Next.GetReferent() is Pullenti.Ner.Geo.GeoReferent)) { NamedItemToken re = new NamedItemToken(t, adj.EndToken.Next) { Morph = adj.EndToken.Next.Morph }; re.Kind = Pullenti.Ner.Named.NamedEntityKind.Location; re.NameValue = Pullenti.Ner.Core.MiscHelper.GetTextValue(t, adj.EndToken.Next, Pullenti.Ner.Core.GetTextAttr.FirstNounGroupToNominative); re.IsWellknown = true; re.Ref = adj.EndToken.Next.GetReferent(); return(re); } NamedItemToken res = TryParse(adj.EndToken.Next, locOnto); if (res != null && res.Kind == Pullenti.Ner.Named.NamedEntityKind.Location) { string s = adj.GetNormalCaseText(Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphNumber.Singular, res.Morph.Gender, false); if (s != null) { if (res.NameValue == null) { res.NameValue = s.ToUpper(); } else { res.NameValue = string.Format("{0} {1}", s.ToUpper(), res.NameValue); res.TypeValue = null; } res.BeginToken = t; res.Chars = t.Chars; res.IsWellknown = true; return(res); } } } if (t.Chars.IsCapitalUpper && !Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t)) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && npt.Adjectives.Count > 0) { NamedItemToken test = TryParse(npt.Noun.BeginToken, locOnto); if (test != null && test.EndToken == npt.EndToken && test.TypeValue != null) { test.BeginToken = t; StringBuilder tmp = new StringBuilder(); foreach (Pullenti.Ner.MetaToken a in npt.Adjectives) { string s = a.GetNormalCaseText(Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphNumber.Singular, test.Morph.Gender, false); if (tmp.Length > 0) { tmp.Append(' '); } tmp.Append(s); } test.NameValue = tmp.ToString(); test.Chars = t.Chars; if (test.Kind == Pullenti.Ner.Named.NamedEntityKind.Location) { test.IsWellknown = true; } return(test); } } } if ((Pullenti.Ner.Core.BracketHelper.IsBracket(t, true) && t.Next != null && t.Next.Chars.IsLetter) && !t.Next.Chars.IsAllLower) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { NamedItemToken res = new NamedItemToken(t, br.EndToken); res.IsInBracket = true; res.NameValue = Pullenti.Ner.Core.MiscHelper.GetTextValue(t, br.EndToken, Pullenti.Ner.Core.GetTextAttr.No); nam = m_Names.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No); if (nam != null && nam.EndToken == br.EndToken.Previous) { res.Kind = (Pullenti.Ner.Named.NamedEntityKind)nam.Termin.Tag; res.IsWellknown = true; res.NameValue = nam.Termin.CanonicText; } return(res); } } if (((t is Pullenti.Ner.TextToken) && t.Chars.IsLetter && !t.Chars.IsAllLower) && t.LengthChar > 2) { NamedItemToken res = new NamedItemToken(t, t) { Morph = t.Morph }; string str = (t as Pullenti.Ner.TextToken).Term; if (str.EndsWith("О") || str.EndsWith("И") || str.EndsWith("Ы")) { res.NameValue = str; } else { res.NameValue = t.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false); } res.Chars = t.Chars; if (((!t.IsWhitespaceAfter && t.Next != null && t.Next.IsHiphen) && (t.Next.Next is Pullenti.Ner.TextToken) && !t.Next.Next.IsWhitespaceAfter) && t.Chars.IsCyrillicLetter == t.Next.Next.Chars.IsCyrillicLetter) { t = (res.EndToken = t.Next.Next); res.NameValue = string.Format("{0}-{1}", res.NameValue, t.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false)); } return(res); } return(null); }
static PersonIdToken TryParse(Pullenti.Ner.Token t, PersonIdToken prev) { if (t.IsValue("СВИДЕТЕЛЬСТВО", null)) { Pullenti.Ner.Token tt1 = t; bool ip = false; bool reg = false; for (Pullenti.Ner.Token tt = t.Next; tt != null; tt = tt.Next) { if (tt.IsCommaAnd || tt.Morph.Class.IsPreposition) { continue; } if (tt.IsValue("РЕГИСТРАЦИЯ", null) || tt.IsValue("РЕЕСТР", null) || tt.IsValue("ЗАРЕГИСТРИРОВАТЬ", null)) { reg = true; tt1 = tt; } else if (tt.IsValue("ИНДИВИДУАЛЬНЫЙ", null) || tt.IsValue("ИП", null)) { ip = true; tt1 = tt; } else if ((tt.IsValue("ВНЕСЕНИЕ", null) || tt.IsValue("ГОСУДАРСТВЕННЫЙ", null) || tt.IsValue("ЕДИНЫЙ", null)) || tt.IsValue("ЗАПИСЬ", null) || tt.IsValue("ПРЕДПРИНИМАТЕЛЬ", null)) { tt1 = tt; } else if (tt.GetReferent() != null && tt.GetReferent().TypeName == "DATERANGE") { tt1 = tt; } else { break; } } if (reg && ip) { return new PersonIdToken(t, tt1) { Typ = Typs.Keyword, Value = "СВИДЕТЕЛЬСТВО О ГОСУДАРСТВЕННОЙ РЕГИСТРАЦИИ ФИЗИЧЕСКОГО ЛИЦА В КАЧЕСТВЕ ИНДИВИДУАЛЬНОГО ПРЕДПРИНИМАТЕЛЯ" } } ; } Pullenti.Ner.Core.TerminToken tok = m_Ontology.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { Typs ty = (Typs)tok.Termin.Tag; PersonIdToken res = new PersonIdToken(tok.BeginToken, tok.EndToken) { Typ = ty, Value = tok.Termin.CanonicText }; if (prev == null) { if (ty != Typs.Keyword) { return(null); } for (t = tok.EndToken.Next; t != null; t = t.Next) { Pullenti.Ner.Referent r = t.GetReferent(); if (r != null && (r is Pullenti.Ner.Geo.GeoReferent)) { res.Referent = r; res.EndToken = t; continue; } if (t.IsValue("ГРАЖДАНИН", null) && t.Next != null && (t.Next.GetReferent() is Pullenti.Ner.Geo.GeoReferent)) { res.Referent = t.Next.GetReferent(); t = (res.EndToken = t.Next); continue; } if (r != null) { break; } PersonAttrToken ait = PersonAttrToken.TryAttach(t, null, PersonAttrToken.PersonAttrAttachAttrs.No); if (ait != null) { if (ait.Referent != null) { foreach (Pullenti.Ner.Slot s in ait.Referent.Slots) { if (s.TypeName == Pullenti.Ner.Person.PersonPropertyReferent.ATTR_REF && (s.Value is Pullenti.Ner.Geo.GeoReferent)) { res.Referent = s.Value as Pullenti.Ner.Referent; } } } res.EndToken = ait.EndToken; break; } if (t.IsValue("ДАННЫЙ", null)) { res.EndToken = t; continue; } break; } if ((res.Referent is Pullenti.Ner.Geo.GeoReferent) && !(res.Referent as Pullenti.Ner.Geo.GeoReferent).IsState) { res.Referent = null; } return(res); } if (ty == Typs.Number) { StringBuilder tmp = new StringBuilder(); Pullenti.Ner.Token tt = tok.EndToken.Next; if (tt != null && tt.IsChar(':')) { tt = tt.Next; } for (; tt != null; tt = tt.Next) { if (tt.IsNewlineBefore) { break; } if (!(tt is Pullenti.Ner.NumberToken)) { break; } tmp.Append(tt.GetSourceText()); res.EndToken = tt; } if (tmp.Length < 1) { return(null); } res.Value = tmp.ToString(); res.HasPrefix = true; return(res); } if (ty == Typs.Seria) { StringBuilder tmp = new StringBuilder(); Pullenti.Ner.Token tt = tok.EndToken.Next; if (tt != null && tt.IsChar(':')) { tt = tt.Next; } bool nextNum = false; for (; tt != null; tt = tt.Next) { if (tt.IsNewlineBefore) { break; } if (Pullenti.Ner.Core.MiscHelper.CheckNumberPrefix(tt) != null) { nextNum = true; break; } if (!(tt is Pullenti.Ner.NumberToken)) { if (!(tt is Pullenti.Ner.TextToken)) { break; } if (!tt.Chars.IsAllUpper) { break; } Pullenti.Ner.NumberToken nu = Pullenti.Ner.Core.NumberHelper.TryParseRoman(tt); if (nu != null) { tmp.Append(nu.GetSourceText()); tt = nu.EndToken; } else if (tt.LengthChar != 2) { break; } else { tmp.Append((tt as Pullenti.Ner.TextToken).Term); res.EndToken = tt; } if (tt.Next != null && tt.Next.IsHiphen) { tt = tt.Next; } continue; } if (tmp.Length >= 4) { break; } tmp.Append(tt.GetSourceText()); res.EndToken = tt; } if (tmp.Length < 4) { if (tmp.Length < 2) { return(null); } Pullenti.Ner.Token tt1 = res.EndToken.Next; if (tt1 != null && tt1.IsComma) { tt1 = tt1.Next; } PersonIdToken next = TryParse(tt1, res); if (next != null && next.Typ == Typs.Number) { } else { return(null); } } res.Value = tmp.ToString(); res.HasPrefix = true; return(res); } if (ty == Typs.Code) { for (Pullenti.Ner.Token tt = res.EndToken.Next; tt != null; tt = tt.Next) { if (tt.IsCharOf(":") || tt.IsHiphen) { continue; } if (tt is Pullenti.Ner.NumberToken) { res.EndToken = tt; continue; } break; } } if (ty == Typs.Address) { if (t.GetReferent() is Pullenti.Ner.Address.AddressReferent) { res.Referent = t.GetReferent(); res.EndToken = t; return(res); } for (Pullenti.Ner.Token tt = res.EndToken.Next; tt != null; tt = tt.Next) { if (tt.IsCharOf(":") || tt.IsHiphen || tt.Morph.Class.IsPreposition) { continue; } if (tt.GetReferent() is Pullenti.Ner.Address.AddressReferent) { res.Referent = tt.GetReferent(); res.EndToken = tt; } break; } if (res.Referent == null) { return(null); } } return(res); } else if (prev == null) { return(null); } Pullenti.Ner.Token t0 = t; Pullenti.Ner.Token t1 = Pullenti.Ner.Core.MiscHelper.CheckNumberPrefix(t0); if (t1 != null) { t = t1; } if (t is Pullenti.Ner.NumberToken) { StringBuilder tmp = new StringBuilder(); PersonIdToken res = new PersonIdToken(t0, t) { Typ = Typs.Number }; for (Pullenti.Ner.Token tt = t; tt != null; tt = tt.Next) { if (tt.IsNewlineBefore || !(tt is Pullenti.Ner.NumberToken)) { break; } tmp.Append(tt.GetSourceText()); res.EndToken = tt; } if (tmp.Length < 4) { if (tmp.Length < 2) { return(null); } if (prev == null || prev.Typ != Typs.Keyword) { return(null); } PersonIdToken ne = TryParse(res.EndToken.Next, prev); if (ne != null && ne.Typ == Typs.Number) { res.Typ = Typs.Seria; } else { return(null); } } res.Value = tmp.ToString(); if (t0 != t) { res.HasPrefix = true; } return(res); } if (t is Pullenti.Ner.ReferentToken) { Pullenti.Ner.Referent r = t.GetReferent(); if (r != null) { if (r.TypeName == "DATE") { return new PersonIdToken(t, t) { Typ = Typs.Date, Referent = r } } ; if (r.TypeName == "ORGANIZATION") { return new PersonIdToken(t, t) { Typ = Typs.Org, Referent = r } } ; if (r.TypeName == "ADDRESS") { return new PersonIdToken(t, t) { Typ = Typs.Address, Referent = r } } ; } } if ((prev != null && prev.Typ == Typs.Keyword && (t is Pullenti.Ner.TextToken)) && !t.Chars.IsAllLower && t.Chars.IsLetter) { PersonIdToken rr = TryParse(t.Next, prev); if (rr != null && rr.Typ == Typs.Number) { return new PersonIdToken(t, t) { Typ = Typs.Seria, Value = (t as Pullenti.Ner.TextToken).Term } } ; } if ((t != null && t.IsValue("ОТ", "ВІД") && (t.Next is Pullenti.Ner.ReferentToken)) && t.Next.GetReferent().TypeName == "DATE") { return new PersonIdToken(t, t.Next) { Typ = Typs.Date, Referent = t.Next.GetReferent() } } ; return(null); }
public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this); Pullenti.Ner.Core.TerminCollection models = new Pullenti.Ner.Core.TerminCollection(); Dictionary <string, List <Pullenti.Ner.Referent> > objsByModel = new Dictionary <string, List <Pullenti.Ner.Referent> >(); Pullenti.Ner.Core.TerminCollection objByNames = new Pullenti.Ner.Core.TerminCollection(); for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { List <Pullenti.Ner.Weapon.Internal.WeaponItemToken> its = Pullenti.Ner.Weapon.Internal.WeaponItemToken.TryParseList(t, 10); if (its == null) { continue; } List <Pullenti.Ner.ReferentToken> rts = this.TryAttach(its, false); if (rts != null) { foreach (Pullenti.Ner.ReferentToken rt in rts) { rt.Referent = ad.RegisterReferent(rt.Referent); kit.EmbedToken(rt); t = rt; foreach (Pullenti.Ner.Slot s in rt.Referent.Slots) { if (s.TypeName == WeaponReferent.ATTR_MODEL) { string mod = s.Value.ToString(); for (int k = 0; k < 2; k++) { if (!char.IsDigit(mod[0])) { List <Pullenti.Ner.Referent> li; if (!objsByModel.TryGetValue(mod, out li)) { objsByModel.Add(mod, (li = new List <Pullenti.Ner.Referent>())); } if (!li.Contains(rt.Referent)) { li.Add(rt.Referent); } models.AddString(mod, li, null, false); } if (k > 0) { break; } string brand = rt.Referent.GetStringValue(WeaponReferent.ATTR_BRAND); if (brand == null) { break; } mod = string.Format("{0} {1}", brand, mod); } } else if (s.TypeName == WeaponReferent.ATTR_NAME) { objByNames.Add(new Pullenti.Ner.Core.Termin(s.Value.ToString()) { Tag = rt.Referent }); } } } } } if (objsByModel.Count == 0 && objByNames.Termins.Count == 0) { return; } for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 10); if (br != null) { Pullenti.Ner.Core.TerminToken toks = objByNames.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No); if (toks != null && toks.EndToken.Next == br.EndToken) { Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(toks.Termin.Tag as Pullenti.Ner.Referent, br.BeginToken, br.EndToken); kit.EmbedToken(rt0); t = rt0; continue; } } if (!(t is Pullenti.Ner.TextToken)) { continue; } if (!t.Chars.IsLetter) { continue; } Pullenti.Ner.Core.TerminToken tok = models.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok == null) { if (!t.Chars.IsAllLower) { tok = objByNames.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); } if (tok == null) { continue; } } if (!tok.IsWhitespaceAfter) { if (tok.EndToken.Next == null || !tok.EndToken.Next.IsCharOf(",.)")) { if (!Pullenti.Ner.Core.BracketHelper.IsBracket(tok.EndToken.Next, false)) { continue; } } } Pullenti.Ner.Referent tr = null; List <Pullenti.Ner.Referent> li = tok.Termin.Tag as List <Pullenti.Ner.Referent>; if (li != null && li.Count == 1) { tr = li[0]; } else { tr = tok.Termin.Tag as Pullenti.Ner.Referent; } if (tr != null) { Pullenti.Ner.Weapon.Internal.WeaponItemToken tit = Pullenti.Ner.Weapon.Internal.WeaponItemToken.TryParse(tok.BeginToken.Previous, null, false, true); if (tit != null && tit.Typ == Pullenti.Ner.Weapon.Internal.WeaponItemToken.Typs.Brand) { tr.AddSlot(WeaponReferent.ATTR_BRAND, tit.Value, false, 0); tok.BeginToken = tit.BeginToken; } Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(tr, tok.BeginToken, tok.EndToken); kit.EmbedToken(rt0); t = rt0; continue; } } }
static BookLinkToken _tryParse(Pullenti.Ner.Token t, int lev) { if (t == null || lev > 3) { return(null); } if (t.IsChar('[')) { BookLinkToken re = _tryParse(t.Next, lev + 1); if (re != null && re.EndToken.Next != null && re.EndToken.Next.IsChar(']')) { re.BeginToken = t; re.EndToken = re.EndToken.Next; return(re); } if (re != null && re.EndToken.IsChar(']')) { re.BeginToken = t; return(re); } if (re != null) { if (re.Typ == BookLinkTyp.Sostavitel || re.Typ == BookLinkTyp.Editors) { return(re); } } Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { if ((br.EndToken.Previous is Pullenti.Ner.NumberToken) && (br.LengthChar < 30)) { return new BookLinkToken(t, br.EndToken) { Typ = BookLinkTyp.Number, Value = Pullenti.Ner.Core.MiscHelper.GetTextValue(br.BeginToken.Next, br.EndToken.Previous, Pullenti.Ner.Core.GetTextAttr.No) } } ; } } Pullenti.Ner.Token t0 = t; if (t is Pullenti.Ner.ReferentToken) { if (t.GetReferent() is Pullenti.Ner.Person.PersonReferent) { return(TryParseAuthor(t, Pullenti.Ner.Person.Internal.FioTemplateType.Undefined)); } if (t.GetReferent() is Pullenti.Ner.Geo.GeoReferent) { return new BookLinkToken(t, t) { Typ = BookLinkTyp.Geo, Ref = t.GetReferent() } } ; if (t.GetReferent() is Pullenti.Ner.Date.DateReferent) { Pullenti.Ner.Date.DateReferent dr = t.GetReferent() as Pullenti.Ner.Date.DateReferent; if (dr.Slots.Count == 1 && dr.Year > 0) { return new BookLinkToken(t, t) { Typ = BookLinkTyp.Year, Value = dr.Year.ToString() } } ; if (dr.Year > 0 && t.Previous != null && t.Previous.IsComma) { return new BookLinkToken(t, t) { Typ = BookLinkTyp.Year, Value = dr.Year.ToString() } } ; } if (t.GetReferent() is Pullenti.Ner.Org.OrganizationReferent) { Pullenti.Ner.Org.OrganizationReferent org = t.GetReferent() as Pullenti.Ner.Org.OrganizationReferent; if (org.Kind == Pullenti.Ner.Org.OrganizationKind.Press) { return new BookLinkToken(t, t) { Typ = BookLinkTyp.Press, Ref = org } } ; } if (t.GetReferent() is Pullenti.Ner.Uri.UriReferent) { Pullenti.Ner.Uri.UriReferent uri = t.GetReferent() as Pullenti.Ner.Uri.UriReferent; if ((uri.Scheme == "http" || uri.Scheme == "https" || uri.Scheme == "ftp") || uri.Scheme == null) { return new BookLinkToken(t, t) { Typ = BookLinkTyp.Url, Ref = uri } } ; } } Pullenti.Ner.Core.TerminToken tok = m_Termins.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { BookLinkTyp typ = (BookLinkTyp)tok.Termin.Tag; bool ok = true; if (typ == BookLinkTyp.Type || typ == BookLinkTyp.NameTail || typ == BookLinkTyp.ElectronRes) { if (t.Previous != null && ((t.Previous.IsCharOf(".:[") || t.Previous.IsHiphen))) { } else { ok = false; } } if (ok) { return new BookLinkToken(t, tok.EndToken) { Typ = typ, Value = tok.Termin.CanonicText } } ; if (typ == BookLinkTyp.ElectronRes) { for (Pullenti.Ner.Token tt = tok.EndToken.Next; tt != null; tt = tt.Next) { if ((tt is Pullenti.Ner.TextToken) && !tt.Chars.IsLetter) { continue; } if (tt.GetReferent() is Pullenti.Ner.Uri.UriReferent) { return new BookLinkToken(t, tt) { Typ = BookLinkTyp.ElectronRes, Ref = tt.GetReferent() } } ; break; } } } if (t.IsChar('/')) { BookLinkToken res = new BookLinkToken(t, t) { Typ = BookLinkTyp.Delimeter, Value = "/" }; if (t.Next != null && t.Next.IsChar('/')) { res.EndToken = t.Next; res.Value = "//"; } if (!t.IsWhitespaceBefore && !t.IsWhitespaceAfter) { int coo = 3; bool no = true; for (Pullenti.Ner.Token tt = t.Next; tt != null && coo > 0; tt = tt.Next, coo--) { BookLinkToken vvv = TryParse(tt, lev + 1); if (vvv != null && vvv.Typ != BookLinkTyp.Number) { no = false; break; } } if (no) { return(null); } } return(res); } if ((t is Pullenti.Ner.NumberToken) && (t as Pullenti.Ner.NumberToken).IntValue != null && (t as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Digit) { BookLinkToken res = new BookLinkToken(t, t) { Typ = BookLinkTyp.Number, Value = (t as Pullenti.Ner.NumberToken).Value.ToString() }; int val = (t as Pullenti.Ner.NumberToken).IntValue.Value; if (val >= 1930 && (val < 2030)) { res.Typ = BookLinkTyp.Year; } if (t.Next != null && t.Next.IsChar('.')) { res.EndToken = t.Next; } else if ((t.Next != null && t.Next.LengthChar == 1 && !t.Next.Chars.IsLetter) && t.Next.IsWhitespaceAfter) { res.EndToken = t.Next; } else if (t.Next is Pullenti.Ner.TextToken) { string term = (t.Next as Pullenti.Ner.TextToken).Term; if (((term == "СТР" || term == "C" || term == "С") || term == "P" || term == "S") || term == "PAGES") { res.EndToken = t.Next; res.Typ = BookLinkTyp.Pages; res.Value = (t as Pullenti.Ner.NumberToken).Value.ToString(); } } return(res); } if (t is Pullenti.Ner.TextToken) { string term = (t as Pullenti.Ner.TextToken).Term; if (((((((term == "СТР" || term == "C" || term == "С") || term == "ТОМ" || term == "T") || term == "Т" || term == "P") || term == "PP" || term == "V") || term == "VOL" || term == "S") || term == "СТОР" || t.IsValue("PAGE", null)) || t.IsValue("СТРАНИЦА", "СТОРІНКА")) { Pullenti.Ner.Token tt = t.Next; while (tt != null) { if (tt.IsCharOf(".:~")) { tt = tt.Next; } else { break; } } if (tt is Pullenti.Ner.NumberToken) { BookLinkToken res = new BookLinkToken(t, tt) { Typ = BookLinkTyp.PageRange }; Pullenti.Ner.Token tt0 = tt; Pullenti.Ner.Token tt1 = tt; for (tt = tt.Next; tt != null; tt = tt.Next) { if (tt.IsCharOf(",") || tt.IsHiphen) { if (tt.Next is Pullenti.Ner.NumberToken) { tt = tt.Next; res.EndToken = tt; tt1 = tt; continue; } } break; } res.Value = Pullenti.Ner.Core.MiscHelper.GetTextValue(tt0, tt1, Pullenti.Ner.Core.GetTextAttr.No); return(res); } } if ((term == "M" || term == "М" || term == "СПБ") || term == "K" || term == "К") { if (t.Next != null && t.Next.IsCharOf(":;")) { BookLinkToken re = new BookLinkToken(t, t.Next) { Typ = BookLinkTyp.Geo }; return(re); } if (t.Next != null && t.Next.IsCharOf(".")) { BookLinkToken res = new BookLinkToken(t, t.Next) { Typ = BookLinkTyp.Geo }; if (t.Next.Next != null && t.Next.Next.IsCharOf(":;")) { res.EndToken = t.Next.Next; } else if (t.Next.Next != null && (t.Next.Next is Pullenti.Ner.NumberToken)) { } else if (t.Next.Next != null && t.Next.Next.IsComma && (t.Next.Next.Next is Pullenti.Ner.NumberToken)) { } else { return(null); } return(res); } } if (term == "ПЕР" || term == "ПЕРЕВ" || term == "ПЕРЕВОД") { Pullenti.Ner.Token tt = t; if (tt.Next != null && tt.Next.IsChar('.')) { tt = tt.Next; } if (tt.Next != null && ((tt.Next.IsValue("C", null) || tt.Next.IsValue("С", null)))) { tt = tt.Next; if (tt.Next == null || tt.WhitespacesAfterCount > 2) { return(null); } BookLinkToken re = new BookLinkToken(t, tt.Next) { Typ = BookLinkTyp.Translate }; return(re); } } if (term == "ТАМ" || term == "ТАМЖЕ") { BookLinkToken res = new BookLinkToken(t, t) { Typ = BookLinkTyp.Tamze }; if (t.Next != null && t.Next.IsValue("ЖЕ", null)) { res.EndToken = t.Next; } return(res); } if (((term == "СМ" || term == "CM" || term == "НАПР") || term == "НАПРИМЕР" || term == "SEE") || term == "ПОДРОБНЕЕ" || term == "ПОДРОБНО") { BookLinkToken res = new BookLinkToken(t, t) { Typ = BookLinkTyp.See }; for (t = t.Next; t != null; t = t.Next) { if (t.IsCharOf(".:") || t.IsValue("ALSO", null)) { res.EndToken = t; continue; } if (t.IsValue("В", null) || t.IsValue("IN", null)) { res.EndToken = t; continue; } BookLinkToken vvv = _tryParse(t, lev + 1); if (vvv != null && vvv.Typ == BookLinkTyp.See) { res.EndToken = vvv.EndToken; break; } break; } return(res); } if (term == "БОЛЕЕ") { BookLinkToken vvv = _tryParse(t.Next, lev + 1); if (vvv != null && vvv.Typ == BookLinkTyp.See) { vvv.BeginToken = t; return(vvv); } } Pullenti.Ner.Token no = Pullenti.Ner.Core.MiscHelper.CheckNumberPrefix(t); if (no is Pullenti.Ner.NumberToken) { return new BookLinkToken(t, no) { Typ = BookLinkTyp.N } } ; if (((term == "B" || term == "В")) && (t.Next is Pullenti.Ner.NumberToken) && (t.Next.Next is Pullenti.Ner.TextToken)) { string term2 = (t.Next.Next as Pullenti.Ner.TextToken).Term; if (((term2 == "Т" || term2 == "T" || term2.StartsWith("ТОМ")) || term2 == "TT" || term2 == "ТТ") || term2 == "КН" || term2.StartsWith("КНИГ")) { return new BookLinkToken(t, t.Next.Next) { Typ = BookLinkTyp.Volume } } ; } } if (t.IsChar('(')) { if (((t.Next is Pullenti.Ner.NumberToken) && (t.Next as Pullenti.Ner.NumberToken).IntValue != null && t.Next.Next != null) && t.Next.Next.IsChar(')')) { int num = (t.Next as Pullenti.Ner.NumberToken).IntValue.Value; if (num > 1900 && num <= 2040) { if (num <= DateTime.Now.Year) { return new BookLinkToken(t, t.Next.Next) { Typ = BookLinkTyp.Year, Value = num.ToString() } } ; } } if (((t.Next is Pullenti.Ner.ReferentToken) && (t.Next.GetReferent() is Pullenti.Ner.Date.DateReferent) && t.Next.Next != null) && t.Next.Next.IsChar(')')) { int num = (t.Next.GetReferent() as Pullenti.Ner.Date.DateReferent).Year; if (num > 0) { return new BookLinkToken(t, t.Next.Next) { Typ = BookLinkTyp.Year, Value = num.ToString() } } ; } } return(null); }
Pullenti.Ner.ReferentToken TryAttach(Pullenti.Ner.Token t, bool keyWord) { if (t == null) { return(null); } Pullenti.Ner.Token t0 = t; Pullenti.Ner.Token t1 = t; List <string> urisKeys = null; List <Pullenti.Ner.Uri.UriReferent> uris = null; Pullenti.Ner.Referent org = null; Pullenti.Ner.Referent corOrg = null; bool orgIsBank = false; int empty = 0; Pullenti.Ner.Uri.UriReferent lastUri = null; for (; t != null; t = t.Next) { if (t.IsTableControlChar && t != t0) { break; } if (t.IsComma || t.Morph.Class.IsPreposition || t.IsCharOf("/\\")) { continue; } bool bankKeyword = false; if (t.IsValue("ПОЛНЫЙ", null) && t.Next != null && ((t.Next.IsValue("НАИМЕНОВАНИЕ", null) || t.Next.IsValue("НАЗВАНИЕ", null)))) { t = t.Next.Next; if (t == null) { break; } } if (t.IsValue("БАНК", null)) { if ((t is Pullenti.Ner.ReferentToken) && t.GetReferent().TypeName == "ORGANIZATION") { bankKeyword = true; } Pullenti.Ner.Token tt = t.Next; Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { tt = npt.EndToken.Next; } if (tt != null && tt.IsChar(':')) { tt = tt.Next; } if (tt != null) { if (!bankKeyword) { t = tt; bankKeyword = true; } else if (tt.GetReferent() != null && tt.GetReferent().TypeName == "ORGANIZATION") { t = tt; } } } Pullenti.Ner.Referent r = t.GetReferent(); if (r != null && r.TypeName == "ORGANIZATION") { bool isBank = false; int kk = 0; for (Pullenti.Ner.Referent rr = r; rr != null && (kk < 4); rr = rr.ParentReferent, kk++) { isBank = string.Compare(rr.GetStringValue("KIND") ?? "", "Bank", true) == 0; if (isBank) { break; } } if (!isBank && bankKeyword) { isBank = true; } if (!isBank && uris != null && urisKeys.Contains("ИНН")) { return(null); } if ((lastUri != null && lastUri.Scheme == "К/С" && t.Previous != null) && t.Previous.IsValue("В", null)) { corOrg = r; t1 = t; } else if (org == null || ((!orgIsBank && isBank))) { org = r; t1 = t; orgIsBank = isBank; if (isBank) { continue; } } if (uris == null && !keyWord) { return(null); } continue; } if (r is Pullenti.Ner.Uri.UriReferent) { Pullenti.Ner.Uri.UriReferent u = r as Pullenti.Ner.Uri.UriReferent; if (uris == null) { if (!_isBankReq(u.Scheme)) { return(null); } if (u.Scheme == "ИНН" && t.IsNewlineAfter) { return(null); } uris = new List <Pullenti.Ner.Uri.UriReferent>(); urisKeys = new List <string>(); } else { if (!_isBankReq(u.Scheme)) { break; } if (urisKeys.Contains(u.Scheme)) { break; } if (u.Scheme == "ИНН") { if (empty > 0) { break; } } } urisKeys.Add(u.Scheme); uris.Add(u); lastUri = u; t1 = t; empty = 0; continue; } else if (uris == null && !keyWord && !orgIsBank) { return(null); } if (r != null && ((r.TypeName == "GEO" || r.TypeName == "ADDRESS"))) { empty++; continue; } if (t is Pullenti.Ner.TextToken) { if (t.IsValue("ПОЛНЫЙ", null) || t.IsValue("НАИМЕНОВАНИЕ", null) || t.IsValue("НАЗВАНИЕ", null)) { } else if (t.Chars.IsLetter) { Pullenti.Ner.Core.TerminToken tok = m_Ontology.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { t = tok.EndToken; empty = 0; } else { empty++; if (t.IsNewlineBefore) { Pullenti.Ner.Core.NounPhraseToken nnn = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (nnn != null && nnn.EndToken.Next != null && nnn.EndToken.Next.IsChar(':')) { break; } } } if (uris == null) { break; } } } if (empty > 2) { break; } if (empty > 0 && t.IsChar(':') && t.IsNewlineAfter) { break; } if (((t is Pullenti.Ner.NumberToken) && t.IsNewlineBefore && t.Next != null) && !t.Next.Chars.IsLetter) { break; } } if (uris == null) { return(null); } if (!urisKeys.Contains("Р/С") && !urisKeys.Contains("Л/С")) { return(null); } bool ok = false; if ((uris.Count < 2) && org == null) { return(null); } BankDataReferent bdr = new BankDataReferent(); foreach (Pullenti.Ner.Uri.UriReferent u in uris) { bdr.AddSlot(BankDataReferent.ATTR_ITEM, u, false, 0); } if (org != null) { bdr.AddSlot(BankDataReferent.ATTR_BANK, org, false, 0); } if (corOrg != null) { bdr.AddSlot(BankDataReferent.ATTR_CORBANK, corOrg, false, 0); } Pullenti.Ner.Referent org0 = (t0.Previous == null ? null : t0.Previous.GetReferent()); if (org0 != null && org0.TypeName == "ORGANIZATION") { foreach (Pullenti.Ner.Slot s in org0.Slots) { if (s.Value is Pullenti.Ner.Uri.UriReferent) { Pullenti.Ner.Uri.UriReferent u = s.Value as Pullenti.Ner.Uri.UriReferent; if (_isBankReq(u.Scheme)) { if (!urisKeys.Contains(u.Scheme)) { bdr.AddSlot(BankDataReferent.ATTR_ITEM, u, false, 0); } } } } } return(new Pullenti.Ner.ReferentToken(bdr, t0, t1)); }
static PhoneItemToken _TryAttach(Pullenti.Ner.Token t0) { if (t0 == null) { return(null); } if (t0 is Pullenti.Ner.NumberToken) { if (Pullenti.Ner.Core.NumberHelper.TryParseNumberWithPostfix(t0) != null && !t0.IsWhitespaceAfter) { Pullenti.Ner.ReferentToken rt = t0.Kit.ProcessReferent("PHONE", t0.Next); if (rt == null) { return(null); } } if ((t0 as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Digit && !t0.Morph.Class.IsAdjective) { return new PhoneItemToken(t0, t0) { ItemType = PhoneItemType.Number, Value = t0.GetSourceText() } } ; return(null); } if (t0.IsChar('.')) { return new PhoneItemToken(t0, t0) { ItemType = PhoneItemType.Delim, Value = "." } } ; if (t0.IsHiphen) { return new PhoneItemToken(t0, t0) { ItemType = PhoneItemType.Delim, Value = "-" } } ; if (t0.IsChar('+')) { if (!(t0.Next is Pullenti.Ner.NumberToken) || (t0.Next as Pullenti.Ner.NumberToken).Typ != Pullenti.Ner.NumberSpellingType.Digit) { return(null); } else { string val = t0.Next.GetSourceText(); int i; for (i = 0; i < val.Length; i++) { if (val[i] != '0') { break; } } if (i >= val.Length) { return(null); } if (i > 0) { val = val.Substring(i); } return(new PhoneItemToken(t0, t0.Next) { ItemType = PhoneItemType.CountryCode, Value = val }); } } if (t0.IsChar((char)0x2011) && (t0.Next is Pullenti.Ner.NumberToken) && t0.Next.LengthChar == 2) { return new PhoneItemToken(t0, t0) { ItemType = PhoneItemType.Delim, Value = "-" } } ; if (t0.IsCharOf("(")) { if (t0.Next is Pullenti.Ner.NumberToken) { Pullenti.Ner.Token et = t0.Next; StringBuilder val = new StringBuilder(); for (; et != null; et = et.Next) { if (et.IsChar(')')) { break; } if ((et is Pullenti.Ner.NumberToken) && (et as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Digit) { val.Append(et.GetSourceText()); } else if (!et.IsHiphen && !et.IsChar('.')) { return(null); } } if (et == null || val.Length == 0) { return(null); } else { return new PhoneItemToken(t0, et) { ItemType = PhoneItemType.CityCode, Value = val.ToString(), IsInBrackets = true } }; } else { Pullenti.Ner.Core.TerminToken tt1 = m_PhoneTermins.TryParse(t0.Next, Pullenti.Ner.Core.TerminParseAttr.No); if (tt1 == null || tt1.Termin.Tag != null) { } else if (tt1.EndToken.Next == null || !tt1.EndToken.Next.IsChar(')')) { } else { return new PhoneItemToken(t0, tt1.EndToken.Next) { ItemType = PhoneItemType.Prefix, IsInBrackets = true, Value = string.Empty } }; return(null); } } if ((t0.IsChar('/') && (t0.Next is Pullenti.Ner.NumberToken) && t0.Next.Next != null) && t0.Next.Next.IsChar('/') && t0.Next.LengthChar == 3) { return new PhoneItemToken(t0, t0.Next.Next) { ItemType = PhoneItemType.CityCode, Value = (t0.Next as Pullenti.Ner.NumberToken).Value.ToString(), IsInBrackets = true } } ; Pullenti.Ner.Token t1 = null; Pullenti.Ner.Phone.PhoneKind ki = Pullenti.Ner.Phone.PhoneKind.Undefined; if ((t0.IsValue("Т", null) && t0.Next != null && t0.Next.IsCharOf("\\/")) && t0.Next.Next != null && ((t0.Next.Next.IsValue("Р", null) || t0.Next.Next.IsValue("М", null)))) { t1 = t0.Next.Next; ki = (t1.IsValue("Р", null) ? Pullenti.Ner.Phone.PhoneKind.Work : Pullenti.Ner.Phone.PhoneKind.Mobile); } else { Pullenti.Ner.Core.TerminToken tt = m_PhoneTermins.TryParse(t0, Pullenti.Ner.Core.TerminParseAttr.No); if (tt == null || tt.Termin.Tag != null) { if (t0.IsValue("НОМЕР", null)) { PhoneItemToken rr = _TryAttach(t0.Next); if (rr != null && rr.ItemType == PhoneItemType.Prefix) { rr.BeginToken = t0; return(rr); } } return(null); } if (tt.Termin.Tag2 is Pullenti.Ner.Phone.PhoneKind) { ki = (Pullenti.Ner.Phone.PhoneKind)tt.Termin.Tag2; } t1 = tt.EndToken; } PhoneItemToken res = new PhoneItemToken(t0, t1) { ItemType = PhoneItemType.Prefix, Value = string.Empty, Kind = ki }; while (true) { if (t1.Next != null && t1.Next.IsCharOf(".:")) { res.EndToken = (t1 = t1.Next); } else if (t1.Next != null && t1.Next.IsTableControlChar) { t1 = t1.Next; } else { break; } } if (t0 == t1 && ((t0.BeginChar == t0.EndChar || t0.Chars.IsAllUpper))) { if (!t0.IsWhitespaceAfter) { return(null); } } return(res); }
internal static NumbersWithUnitToken _tryParse(Pullenti.Ner.Token t, Pullenti.Ner.Core.TerminCollection addUnits, bool second, bool canOmitNumber, bool canBeNan) { if (t == null) { return(null); } while (t != null) { if (t.IsCommaAnd || t.IsValue("НО", null)) { t = t.Next; } else { break; } } Pullenti.Ner.Token t0 = t; bool about = false; bool hasKeyw = false; bool isDiapKeyw = false; int minMax = 0; Pullenti.Ner.Token ttt = _isMinOrMax(t, ref minMax); if (ttt != null) { t = ttt.Next; if (t == null) { return(null); } } if (t == null) { return(null); } if (t.IsChar('~') || t.IsValue("ОКОЛО", null) || t.IsValue("ПРИМЕРНО", null)) { t = t.Next; about = true; hasKeyw = true; if (t == null) { return(null); } } if (t.IsValue("В", null) && t.Next != null) { if (t.Next.IsValue("ПРЕДЕЛ", null) || t.IsValue("ДИАПАЗОН", null)) { t = t.Next.Next; if (t == null) { return(null); } isDiapKeyw = true; } } if (t0.IsChar('(')) { NumbersWithUnitToken mt0 = _tryParse(t.Next, addUnits, false, false, false); if (mt0 != null && mt0.EndToken.Next != null && mt0.EndToken.Next.IsChar(')')) { if (second) { if (mt0.FromVal != null && mt0.ToVal != null && mt0.FromVal.Value == (-mt0.ToVal.Value)) { } else { return(null); } } mt0.BeginToken = t0; mt0.EndToken = mt0.EndToken.Next; List <UnitToken> uu = UnitToken.TryParseList(mt0.EndToken.Next, addUnits, false); if (uu != null && mt0.Units.Count == 0) { mt0.Units = uu; mt0.EndToken = uu[uu.Count - 1].EndToken; } return(mt0); } } bool plusminus = false; bool unitBefore = false; bool isAge = false; DiapTyp dty = DiapTyp.Undefined; Pullenti.Ner.MetaToken whd = null; List <UnitToken> uni = null; Pullenti.Ner.Core.TerminToken tok = (m_Termins == null ? null : m_Termins.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No)); if (tok != null) { if (tok.EndToken.IsValue("СТАРШЕ", null) || tok.EndToken.IsValue("МЛАДШЕ", null)) { isAge = true; } t = tok.EndToken.Next; dty = (DiapTyp)tok.Termin.Tag; hasKeyw = true; if (!tok.IsWhitespaceAfter) { if (t == null) { return(null); } if (t is Pullenti.Ner.NumberToken) { if (tok.BeginToken == tok.EndToken && !tok.Chars.IsAllLower) { return(null); } } else if (t.IsComma && t.Next != null && t.Next.IsValue("ЧЕМ", null)) { t = t.Next.Next; if (t != null && t.Morph.Class.IsPreposition) { t = t.Next; } } else if (t.IsCharOf(":,(") || t.IsTableControlChar) { } else { return(null); } } if (t != null && t.IsChar('(')) { uni = UnitToken.TryParseList(t.Next, addUnits, false); if (uni != null) { t = uni[uni.Count - 1].EndToken.Next; while (t != null) { if (t.IsCharOf("):")) { t = t.Next; } else { break; } } NumbersWithUnitToken mt0 = _tryParse(t, addUnits, false, canOmitNumber, false); if (mt0 != null && mt0.Units.Count == 0) { mt0.BeginToken = t0; mt0.Units = uni; return(mt0); } } whd = _tryParseWHL(t); if (whd != null) { t = whd.EndToken.Next; } } else if (t != null && t.IsValue("IP", null)) { uni = UnitToken.TryParseList(t, addUnits, false); if (uni != null) { t = uni[uni.Count - 1].EndToken.Next; } } if ((t != null && t.IsHiphen && t.IsWhitespaceBefore) && t.IsWhitespaceAfter) { t = t.Next; } } else if (t.IsChar('<')) { dty = DiapTyp.Ls; t = t.Next; hasKeyw = true; if (t != null && t.IsChar('=')) { t = t.Next; dty = DiapTyp.Le; } } else if (t.IsChar('>')) { dty = DiapTyp.Gt; t = t.Next; hasKeyw = true; if (t != null && t.IsChar('=')) { t = t.Next; dty = DiapTyp.Ge; } } else if (t.IsChar('≤')) { dty = DiapTyp.Le; hasKeyw = true; t = t.Next; } else if (t.IsChar('≥')) { dty = DiapTyp.Ge; hasKeyw = true; t = t.Next; } else if (t.IsValue("IP", null)) { uni = UnitToken.TryParseList(t, addUnits, false); if (uni != null) { t = uni[uni.Count - 1].EndToken.Next; } } else if (t.IsValue("ЗА", null) && (t.Next is Pullenti.Ner.NumberToken)) { dty = DiapTyp.Ge; t = t.Next; } while (t != null && ((t.IsCharOf(":,") || t.IsValue("ЧЕМ", null) || t.IsTableControlChar))) { t = t.Next; } if (t != null) { if (t.IsChar('+') || t.IsValue("ПЛЮС", null)) { t = t.Next; if (t != null && !t.IsWhitespaceBefore) { if (t.IsHiphen) { t = t.Next; plusminus = true; } else if ((t.IsCharOf("\\/") && t.Next != null && !t.IsNewlineAfter) && t.Next.IsHiphen) { t = t.Next.Next; plusminus = true; } } } else if (second && (t.IsCharOf("\\/÷…~"))) { t = t.Next; } else if ((t.IsHiphen && t == t0 && !second) && m_Termins.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No) != null) { tok = m_Termins.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No); t = tok.EndToken.Next; dty = (DiapTyp)tok.Termin.Tag; } else if (t.IsHiphen && t == t0 && ((t.IsWhitespaceAfter || second))) { t = t.Next; } else if (t.IsChar('±')) { t = t.Next; plusminus = true; hasKeyw = true; } else if ((second && t.IsChar('.') && t.Next != null) && t.Next.IsChar('.')) { t = t.Next.Next; if (t != null && t.IsChar('.')) { t = t.Next; } } } Pullenti.Ner.NumberToken num = Pullenti.Ner.Core.NumberHelper.TryParseRealNumber(t, true, false); if (num == null) { uni = UnitToken.TryParseList(t, addUnits, false); if (uni != null) { unitBefore = true; t = uni[uni.Count - 1].EndToken.Next; bool delim = false; while (t != null) { if (t.IsCharOf(":,")) { delim = true; t = t.Next; } else if (t.IsHiphen && t.IsWhitespaceAfter) { delim = true; t = t.Next; } else { break; } } if (!delim) { if (t == null) { if (hasKeyw && canBeNan) { } else { return(null); } } else if (!t.IsWhitespaceBefore) { return(null); } if (t.Next != null && t.IsHiphen && t.IsWhitespaceAfter) { delim = true; t = t.Next; } } num = Pullenti.Ner.Core.NumberHelper.TryParseRealNumber(t, true, false); } } NumbersWithUnitToken res = null; double rval = (double)0; if (num == null) { Pullenti.Ner.Core.TerminToken tt = m_Spec.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tt != null) { rval = (double)tt.Termin.Tag; string unam = (string)tt.Termin.Tag2; foreach (Unit u in UnitsHelper.Units) { if (u.FullnameCyr == unam) { uni = new List <UnitToken>(); uni.Add(new UnitToken(t, t) { Unit = u }); break; } } if (uni == null) { return(null); } res = new NumbersWithUnitToken(t0, tt.EndToken) { About = about }; t = tt.EndToken.Next; } else { if (!canOmitNumber && !hasKeyw && !canBeNan) { return(null); } if ((uni != null && uni.Count == 1 && uni[0].BeginToken == uni[0].EndToken) && uni[0].LengthChar > 3) { rval = 1; res = new NumbersWithUnitToken(t0, uni[uni.Count - 1].EndToken) { About = about }; t = res.EndToken.Next; } else if (hasKeyw && canBeNan) { rval = double.NaN; res = new NumbersWithUnitToken(t0, t0) { About = about }; if (t != null) { res.EndToken = t.Previous; } else { for (t = t0; t != null; t = t.Next) { res.EndToken = t; } } } else { return(null); } } } else { if ((t == t0 && t0.IsHiphen && !t.IsWhitespaceBefore) && !t.IsWhitespaceAfter && (num.RealValue < 0)) { num = Pullenti.Ner.Core.NumberHelper.TryParseRealNumber(t.Next, true, false); if (num == null) { return(null); } } if (t == t0 && (t is Pullenti.Ner.NumberToken) && t.Morph.Class.IsAdjective) { Pullenti.Ner.TextToken nn = (t as Pullenti.Ner.NumberToken).EndToken as Pullenti.Ner.TextToken; if (nn == null) { return(null); } string norm = nn.GetNormalCaseText(Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false); if ((norm.EndsWith("Ь") || norm == "ЧЕТЫРЕ" || norm == "ТРИ") || norm == "ДВА") { } else { Pullenti.Morph.MorphWordForm mi = Pullenti.Morph.MorphologyService.GetWordBaseInfo("КОКО" + nn.Term, null, false, false); if (mi.Class.IsAdjective) { return(null); } } } t = num.EndToken.Next; res = new NumbersWithUnitToken(t0, num.EndToken) { About = about }; rval = num.RealValue; } if (uni == null) { uni = UnitToken.TryParseList(t, addUnits, false); if (uni != null) { if ((plusminus && second && uni.Count >= 1) && uni[0].Unit == UnitsHelper.uPercent) { res.EndToken = uni[0].EndToken; res.PlusMinusPercent = true; Pullenti.Ner.Token tt1 = uni[0].EndToken.Next; uni = UnitToken.TryParseList(tt1, addUnits, false); if (uni != null) { res.Units = uni; res.EndToken = uni[uni.Count - 1].EndToken; } } else { res.Units = uni; res.EndToken = uni[uni.Count - 1].EndToken; } t = res.EndToken.Next; } } else { res.Units = uni; if (uni.Count > 1) { List <UnitToken> uni1 = UnitToken.TryParseList(t, addUnits, false); if (((uni1 != null && uni1[0].Unit == uni[0].Unit && (uni1.Count < uni.Count)) && uni[uni1.Count].Pow == -1 && uni1[uni1.Count - 1].EndToken.Next != null) && uni1[uni1.Count - 1].EndToken.Next.IsCharOf("/\\")) { NumbersWithUnitToken num2 = _tryParse(uni1[uni1.Count - 1].EndToken.Next.Next, addUnits, false, false, false); if (num2 != null && num2.Units != null && num2.Units[0].Unit == uni[uni1.Count].Unit) { res.Units = uni1; res.DivNum = num2; res.EndToken = num2.EndToken; } } } } res.WHL = whd; if (dty != DiapTyp.Undefined) { if (dty == DiapTyp.Ge || dty == DiapTyp.From) { res.FromInclude = true; res.FromVal = rval; } else if (dty == DiapTyp.Gt) { res.FromInclude = false; res.FromVal = rval; } else if (dty == DiapTyp.Le || dty == DiapTyp.To) { res.ToInclude = true; res.ToVal = rval; } else if (dty == DiapTyp.Ls) { res.ToInclude = false; res.ToVal = rval; } } bool isSecondMax = false; if (!second) { int iii = 0; ttt = _isMinOrMax(t, ref iii); if (ttt != null && iii > 0) { isSecondMax = true; t = ttt.Next; } } NumbersWithUnitToken next = (second || plusminus || ((t != null && ((t.IsTableControlChar || t.IsNewlineBefore)))) ? null : _tryParse(t, addUnits, true, false, canBeNan)); if (next != null && (t.Previous is Pullenti.Ner.NumberToken)) { if (MeasureHelper.IsMultChar((t.Previous as Pullenti.Ner.NumberToken).EndToken)) { next = null; } } if (next != null && ((next.ToVal != null || next.SingleVal != null)) && next.FromVal == null) { if ((((next.BeginToken.IsChar('+') && next.SingleVal != null && !double.IsNaN(next.SingleVal.Value)) && next.EndToken.Next != null && next.EndToken.Next.IsCharOf("\\/")) && next.EndToken.Next.Next != null && next.EndToken.Next.Next.IsHiphen) && !hasKeyw && !double.IsNaN(rval)) { NumbersWithUnitToken next2 = _tryParse(next.EndToken.Next.Next.Next, addUnits, true, false, false); if (next2 != null && next2.SingleVal != null && !double.IsNaN(next2.SingleVal.Value)) { res.FromVal = rval - next2.SingleVal.Value; res.FromInclude = true; res.ToVal = rval + next.SingleVal.Value; res.ToInclude = true; if (next2.Units != null && res.Units.Count == 0) { res.Units = next2.Units; } res.EndToken = next2.EndToken; return(res); } } if (next.Units.Count > 0) { if (res.Units.Count == 0) { res.Units = next.Units; } else if (!UnitToken.CanBeEquals(res.Units, next.Units)) { next = null; } } else if (res.Units.Count > 0 && !unitBefore && !next.PlusMinusPercent) { next = null; } if (next != null) { res.EndToken = next.EndToken; } if (next != null && next.ToVal != null) { res.ToVal = next.ToVal; res.ToInclude = next.ToInclude; } else if (next != null && next.SingleVal != null) { if (next.BeginToken.IsCharOf("/\\")) { res.DivNum = next; res.SingleVal = rval; return(res); } else if (next.PlusMinusPercent) { res.SingleVal = rval; res.PlusMinus = next.SingleVal; res.PlusMinusPercent = true; res.ToInclude = true; } else { res.ToVal = next.SingleVal; res.ToInclude = true; } } if (next != null) { if (res.FromVal == null) { res.FromVal = rval; res.FromInclude = true; } return(res); } } else if ((next != null && next.FromVal != null && next.ToVal != null) && next.ToVal.Value == (-next.FromVal.Value)) { if (next.Units.Count == 1 && next.Units[0].Unit == UnitsHelper.uPercent && res.Units.Count > 0) { res.SingleVal = rval; res.PlusMinus = next.ToVal.Value; res.PlusMinusPercent = true; res.EndToken = next.EndToken; return(res); } if (next.Units.Count == 0) { res.SingleVal = rval; res.PlusMinus = next.ToVal.Value; res.EndToken = next.EndToken; return(res); } res.FromVal = next.FromVal + rval; res.FromInclude = true; res.ToVal = next.ToVal + rval; res.ToInclude = true; res.EndToken = next.EndToken; if (next.Units.Count > 0) { res.Units = next.Units; } return(res); } if (dty == DiapTyp.Undefined) { if (plusminus && ((!res.PlusMinusPercent || !second))) { res.FromInclude = true; res.FromVal = -rval; res.ToInclude = true; res.ToVal = rval; } else { res.SingleVal = rval; res.PlusMinusPercent = plusminus; } } if (isAge) { res.IsAge = true; } return(res); }
internal static TitlePageReferent _process(Pullenti.Ner.Token begin, int maxCharPos, Pullenti.Ner.Core.AnalysisKit kit, out Pullenti.Ner.Token endToken) { endToken = begin; TitlePageReferent res = new TitlePageReferent(); Pullenti.Ner.Core.Termin term = null; List <Pullenti.Ner.Titlepage.Internal.Line> lines = Pullenti.Ner.Titlepage.Internal.Line.Parse(begin, 30, 1500, maxCharPos); if (lines.Count < 1) { return(null); } int cou = lines.Count; int minNewlinesCount = 10; Dictionary <int, int> linesCountStat = new Dictionary <int, int>(); for (int i = 0; i < lines.Count; i++) { if (Pullenti.Ner.Titlepage.Internal.TitleNameToken.CanBeStartOfTextOrContent(lines[i].BeginToken, lines[i].EndToken)) { cou = i; break; } int j = lines[i].NewlinesBeforeCount; if (i > 0 && j > 0) { if (!linesCountStat.ContainsKey(j)) { linesCountStat.Add(j, 1); } else { linesCountStat[j]++; } } } int max = 0; foreach (KeyValuePair <int, int> kp in linesCountStat) { if (kp.Value > max) { max = kp.Value; minNewlinesCount = kp.Key; } } int endChar = (cou > 0 ? lines[cou - 1].EndChar : 0); if (maxCharPos > 0 && endChar > maxCharPos) { endChar = maxCharPos; } List <Pullenti.Ner.Titlepage.Internal.TitleNameToken> names = new List <Pullenti.Ner.Titlepage.Internal.TitleNameToken>(); for (int i = 0; i < cou; i++) { if (i == 6) { } for (int j = i; (j < cou) && (j < (i + 5)); j++) { if (i == 6 && j == 8) { } if (j > i) { if (lines[j - 1].IsPureEn && lines[j].IsPureRu) { break; } if (lines[j - 1].IsPureRu && lines[j].IsPureEn) { break; } if (lines[j].NewlinesBeforeCount >= (minNewlinesCount * 2)) { break; } } Pullenti.Ner.Titlepage.Internal.TitleNameToken ttt = Pullenti.Ner.Titlepage.Internal.TitleNameToken.TryParse(lines[i].BeginToken, lines[j].EndToken, minNewlinesCount); if (ttt != null) { if (lines[i].IsPureEn) { ttt.Morph.Language = Pullenti.Morph.MorphLang.EN; } else if (lines[i].IsPureRu) { ttt.Morph.Language = Pullenti.Morph.MorphLang.RU; } names.Add(ttt); } } } Pullenti.Ner.Titlepage.Internal.TitleNameToken.Sort(names); Pullenti.Ner.ReferentToken nameRt = null; if (names.Count > 0) { int i0 = 0; if (names[i0].Morph.Language.IsEn) { for (int ii = 1; ii < names.Count; ii++) { if (names[ii].Morph.Language.IsRu && names[ii].Rank > 0) { i0 = ii; break; } } } term = res.AddName(names[i0].BeginNameToken, names[i0].EndNameToken); if (names[i0].TypeValue != null) { res.AddType(names[i0].TypeValue); } if (names[i0].Speciality != null) { res.Speciality = names[i0].Speciality; } Pullenti.Ner.ReferentToken rt = new Pullenti.Ner.ReferentToken(res, names[i0].BeginToken, names[i0].EndToken); if (kit != null) { kit.EmbedToken(rt); } else { res.AddOccurence(new Pullenti.Ner.TextAnnotation(rt.BeginToken, rt.EndToken)); } endToken = rt.EndToken; nameRt = rt; if (begin.BeginChar == rt.BeginChar) { begin = rt; } } if (term != null && kit != null) { for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { Pullenti.Ner.Core.TerminToken tok = term.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok == null) { continue; } Pullenti.Ner.Token t0 = t; Pullenti.Ner.Token t1 = tok.EndToken; if (t1.Next != null && t1.Next.IsChar('.')) { t1 = t1.Next; } if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(t0.Previous, false, false) && Pullenti.Ner.Core.BracketHelper.CanBeEndOfSequence(t1.Next, false, null, false)) { t0 = t0.Previous; t1 = t1.Next; } Pullenti.Ner.ReferentToken rt = new Pullenti.Ner.ReferentToken(res, t0, t1); kit.EmbedToken(rt); t = rt; } } Pullenti.Ner.Titlepage.Internal.PersonRelations pr = new Pullenti.Ner.Titlepage.Internal.PersonRelations(); Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types persTyp = Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined; List <Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types> persTypes = pr.RelTypes; for (Pullenti.Ner.Token t = begin; t != null; t = t.Next) { if (maxCharPos > 0 && t.BeginChar > maxCharPos) { break; } if (t == nameRt) { continue; } Pullenti.Ner.Titlepage.Internal.TitleItemToken tpt = Pullenti.Ner.Titlepage.Internal.TitleItemToken.TryAttach(t); if (tpt != null) { persTyp = Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined; if (tpt.Typ == Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Typ) { if (res.Types.Count == 0) { res.AddType(tpt.Value); } else if (res.Types.Count == 1) { string ty = res.Types[0].ToUpper(); if (ty == "РЕФЕРАТ") { res.AddType(tpt.Value); } else if (ty == "АВТОРЕФЕРАТ") { if (tpt.Value == "КАНДИДАТСКАЯ ДИССЕРТАЦИЯ") { res.AddSlot(TitlePageReferent.ATTR_TYPE, "автореферат кандидатской диссертации", true, 0); } else if (tpt.Value == "ДОКТОРСКАЯ ДИССЕРТАЦИЯ") { res.AddSlot(TitlePageReferent.ATTR_TYPE, "автореферат докторской диссертации", true, 0); } else if (tpt.Value == "МАГИСТЕРСКАЯ ДИССЕРТАЦИЯ") { res.AddSlot(TitlePageReferent.ATTR_TYPE, "автореферат магистерской диссертации", true, 0); } else if (tpt.Value == "КАНДИДАТСЬКА ДИСЕРТАЦІЯ") { res.AddSlot(TitlePageReferent.ATTR_TYPE, "автореферат кандидатської дисертації", true, 0); } else if (tpt.Value == "ДОКТОРСЬКА ДИСЕРТАЦІЯ") { res.AddSlot(TitlePageReferent.ATTR_TYPE, "автореферат докторської дисертації", true, 0); } else if (tpt.Value == "МАГІСТЕРСЬКА ДИСЕРТАЦІЯ") { res.AddSlot(TitlePageReferent.ATTR_TYPE, "автореферат магістерської дисертації", true, 0); } else { res.AddType(tpt.Value); } } else if (tpt.Value == "РЕФЕРАТ" || tpt.Value == "АВТОРЕФЕРАТ") { if (!ty.Contains(tpt.Value)) { res.AddType(tpt.Value); } } } } else if (tpt.Typ == Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Speciality) { if (res.Speciality == null) { res.Speciality = tpt.Value; } } else if (persTypes.Contains(tpt.Typ)) { persTyp = tpt.Typ; } t = tpt.EndToken; if (t.EndChar > endToken.EndChar) { endToken = t; } if (t.Next != null && t.Next.IsCharOf(":-")) { t = t.Next; } continue; } if (t.EndChar > endChar) { break; } List <Pullenti.Ner.Referent> rli = t.GetReferents(); if (rli == null) { continue; } if (!t.IsNewlineBefore && (t.Previous is Pullenti.Ner.TextToken)) { string s = (t.Previous as Pullenti.Ner.TextToken).Term; if (s == "ИМЕНИ" || s == "ИМ") { continue; } if (s == "." && t.Previous.Previous != null && t.Previous.Previous.IsValue("ИМ", null)) { continue; } } foreach (Pullenti.Ner.Referent r in rli) { if (r is Pullenti.Ner.Person.PersonReferent) { if (r != rli[0]) { continue; } Pullenti.Ner.Person.PersonReferent p = r as Pullenti.Ner.Person.PersonReferent; if (persTyp != Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined) { if (t.Previous != null && t.Previous.IsChar('.')) { persTyp = Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined; } } Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types typ = pr.CalcTypFromAttrs(p); if (typ != Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined) { pr.Add(p, typ, 1); persTyp = typ; } else if (persTyp != Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined) { pr.Add(p, persTyp, 1); } else if (t.Previous != null && t.Previous.IsChar('©')) { persTyp = Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Worker; pr.Add(p, persTyp, 1); } else { for (Pullenti.Ner.Token tt = t.Next; tt != null; tt = tt.Next) { Pullenti.Ner.Referent rr = tt.GetReferent(); if (rr == res) { persTyp = Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Worker; break; } if (rr is Pullenti.Ner.Person.PersonReferent) { if (pr.CalcTypFromAttrs(r as Pullenti.Ner.Person.PersonReferent) != Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined) { break; } else { continue; } } if (rr != null) { break; } tpt = Pullenti.Ner.Titlepage.Internal.TitleItemToken.TryAttach(tt); if (tpt != null) { if (tpt.Typ != Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Typ && tpt.Typ != Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.TypAndTheme) { break; } tt = tpt.EndToken; if (tt.EndChar > endToken.EndChar) { endToken = tt; } continue; } } if (persTyp == Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined) { for (Pullenti.Ner.Token tt = t.Previous; tt != null; tt = tt.Previous) { Pullenti.Ner.Referent rr = tt.GetReferent(); if (rr == res) { persTyp = Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Worker; break; } if (rr != null) { break; } if ((tt.IsValue("СТУДЕНТ", null) || tt.IsValue("СТУДЕНТКА", null) || tt.IsValue("СЛУШАТЕЛЬ", null)) || tt.IsValue("ДИПЛОМНИК", null) || tt.IsValue("ИСПОЛНИТЕЛЬ", null)) { persTyp = Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Worker; break; } tpt = Pullenti.Ner.Titlepage.Internal.TitleItemToken.TryAttach(tt); if (tpt != null && tpt.Typ != Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Typ) { break; } } } if (persTyp != Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined) { pr.Add(p, persTyp, 1); } else { pr.Add(p, persTyp, (float)0.5); } if (t.EndChar > endToken.EndChar) { endToken = t; } } continue; } if (r == rli[0]) { persTyp = Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined; } if (r is Pullenti.Ner.Date.DateReferent) { if (res.Date == null) { res.Date = r as Pullenti.Ner.Date.DateReferent; if (t.EndChar > endToken.EndChar) { endToken = t; } } } else if (r is Pullenti.Ner.Geo.GeoReferent) { if (res.City == null && (r as Pullenti.Ner.Geo.GeoReferent).IsCity) { res.City = r as Pullenti.Ner.Geo.GeoReferent; if (t.EndChar > endToken.EndChar) { endToken = t; } } } if (r is Pullenti.Ner.Org.OrganizationReferent) { Pullenti.Ner.Org.OrganizationReferent org = r as Pullenti.Ner.Org.OrganizationReferent; if (org.Types.Contains("курс") && org.Number != null) { int i; if (int.TryParse(org.Number, out i)) { if (i > 0 && (i < 8)) { res.StudentYear = i; } } } for (; org.Higher != null; org = org.Higher) { if (org.Kind != Pullenti.Ner.Org.OrganizationKind.Department) { break; } } if (org.Kind != Pullenti.Ner.Org.OrganizationKind.Department) { if (res.Org == null) { res.Org = org; } else if (Pullenti.Ner.Org.OrganizationReferent.CanBeHigher(res.Org, org)) { res.Org = org; } } if (t.EndChar > endToken.EndChar) { endToken = t; } } if ((r is Pullenti.Ner.Uri.UriReferent) || (r is Pullenti.Ner.Geo.GeoReferent)) { if (t.EndChar > endToken.EndChar) { endToken = t; } } } } foreach (Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types ty in persTypes) { foreach (Pullenti.Ner.Person.PersonReferent p in pr.GetPersons(ty)) { if (pr.GetAttrNameForType(ty) != null) { res.AddSlot(pr.GetAttrNameForType(ty), p, false, 0); } } } if (res.GetSlotValue(TitlePageReferent.ATTR_AUTHOR) == null) { foreach (Pullenti.Ner.Person.PersonReferent p in pr.GetPersons(Pullenti.Ner.Titlepage.Internal.TitleItemToken.Types.Undefined)) { res.AddSlot(TitlePageReferent.ATTR_AUTHOR, p, false, 0); break; } } if (res.City == null && res.Org != null) { Pullenti.Ner.Slot s = res.Org.FindSlot(Pullenti.Ner.Org.OrganizationReferent.ATTR_GEO, null, true); if (s != null && (s.Value is Pullenti.Ner.Geo.GeoReferent)) { if ((s.Value as Pullenti.Ner.Geo.GeoReferent).IsCity) { res.City = s.Value as Pullenti.Ner.Geo.GeoReferent; } } } if (res.Date == null) { for (Pullenti.Ner.Token t = begin; t != null && t.EndChar <= endChar; t = t.Next) { Pullenti.Ner.Geo.GeoReferent city = t.GetReferent() as Pullenti.Ner.Geo.GeoReferent; if (city == null) { continue; } if (t.Next is Pullenti.Ner.TextToken) { if (t.Next.IsCharOf(":,") || t.Next.IsHiphen) { t = t.Next; } } Pullenti.Ner.ReferentToken rt = t.Kit.ProcessReferent(Pullenti.Ner.Date.DateAnalyzer.ANALYZER_NAME, t.Next); if (rt != null) { rt.SaveToLocalOntology(); res.Date = rt.Referent as Pullenti.Ner.Date.DateReferent; if (kit != null) { kit.EmbedToken(rt); } break; } } } if (res.Slots.Count == 0) { return(null); } else { return(res); } }
public static MeasureToken TryParse(Pullenti.Ner.Token t, Pullenti.Ner.Core.TerminCollection addUnits, bool canBeSet = true, bool canUnitsAbsent = false, bool isResctriction = false, bool isSubval = false) { if (!(t is Pullenti.Ner.TextToken)) { return(null); } if (t.IsTableControlChar) { return(null); } Pullenti.Ner.Token t0 = t; Pullenti.Ner.MetaToken whd = null; int minmax = 0; Pullenti.Ner.Token tt = NumbersWithUnitToken._isMinOrMax(t0, ref minmax); if (tt != null) { t = tt.Next; } Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition | Pullenti.Ner.Core.NounPhraseParseAttr.IgnoreBrackets, 0, null); if (npt == null) { whd = NumbersWithUnitToken._tryParseWHL(t); if (whd != null) { npt = new Pullenti.Ner.Core.NounPhraseToken(t0, whd.EndToken); } else if (t0.IsValue("КПД", null)) { npt = new Pullenti.Ner.Core.NounPhraseToken(t0, t0); } else if ((t0 is Pullenti.Ner.TextToken) && t0.LengthChar > 3 && t0.GetMorphClassInDictionary().IsUndefined) { npt = new Pullenti.Ner.Core.NounPhraseToken(t0, t0); } else if (t0.IsValue("T", null) && t0.Chars.IsAllLower) { npt = new Pullenti.Ner.Core.NounPhraseToken(t0, t0); t = t0; if (t.Next != null && t.Next.IsChar('=')) { npt.EndToken = t.Next; } } else if ((t0 is Pullenti.Ner.TextToken) && t0.Chars.IsLetter && isSubval) { if (NumbersWithUnitToken.TryParse(t, addUnits, false, false, false, false) != null) { return(null); } npt = new Pullenti.Ner.Core.NounPhraseToken(t0, t0); for (t = t0.Next; t != null; t = t.Next) { if (t.WhitespacesBeforeCount > 2) { break; } else if (!(t is Pullenti.Ner.TextToken)) { break; } else if (!t.Chars.IsLetter) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { npt.EndToken = (t = br.EndToken); } else { break; } } else if (NumbersWithUnitToken.TryParse(t, addUnits, false, false, false, false) != null) { break; } else { npt.EndToken = t; } } } else { return(null); } } else if (Pullenti.Ner.Core.NumberHelper.TryParseRealNumber(t, true, false) != null) { return(null); } else { Pullenti.Ner.Date.Internal.DateItemToken dtok = Pullenti.Ner.Date.Internal.DateItemToken.TryAttach(t, null, false); if (dtok != null) { return(null); } } Pullenti.Ner.Token t1 = npt.EndToken; t = npt.EndToken; Pullenti.Ner.MetaToken name = new Pullenti.Ner.MetaToken(npt.BeginToken, npt.EndToken) { Morph = npt.Morph }; List <UnitToken> units = null; List <UnitToken> units2 = null; List <MeasureToken> internals = new List <MeasureToken>(); bool not = false; for (tt = t1.Next; tt != null; tt = tt.Next) { if (tt.IsNewlineBefore) { break; } if (tt.IsTableControlChar) { break; } Pullenti.Ner.Token tt2 = NumbersWithUnitToken._isMinOrMax(tt, ref minmax); if (tt2 != null) { t1 = (t = (tt = tt2)); continue; } if ((tt.IsValue("БЫТЬ", null) || tt.IsValue("ДОЛЖЕН", null) || tt.IsValue("ДОЛЖНЫЙ", null)) || tt.IsValue("МОЖЕТ", null) || ((tt.IsValue("СОСТАВЛЯТЬ", null) && !tt.GetMorphClassInDictionary().IsAdjective))) { t1 = (t = tt); if (tt.Previous.IsValue("НЕ", null)) { not = true; } continue; } Pullenti.Ner.MetaToken www = NumbersWithUnitToken._tryParseWHL(tt); if (www != null) { whd = www; t1 = (t = (tt = www.EndToken)); continue; } if (tt.IsValue("ПРИ", null)) { MeasureToken mt1 = TryParse(tt.Next, addUnits, false, false, true, false); if (mt1 != null) { internals.Add(mt1); t1 = (t = (tt = mt1.EndToken)); continue; } NumbersWithUnitToken n1 = NumbersWithUnitToken.TryParse(tt.Next, addUnits, false, false, false, false); if (n1 != null && n1.Units.Count > 0) { mt1 = new MeasureToken(n1.BeginToken, n1.EndToken) { Nums = n1 }; internals.Add(mt1); t1 = (t = (tt = mt1.EndToken)); continue; } } if (tt.IsValue("ПО", null) && tt.Next != null && tt.Next.IsValue("U", null)) { t1 = (t = (tt = tt.Next)); continue; } if (internals.Count > 0) { if (tt.IsChar(':')) { break; } MeasureToken mt1 = TryParse(tt.Next, addUnits, false, false, true, false); if (mt1 != null && mt1.Reliable) { internals.Add(mt1); t1 = (t = (tt = mt1.EndToken)); continue; } } if ((tt is Pullenti.Ner.NumberToken) && (tt as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Words) { Pullenti.Ner.Core.NounPhraseToken npt3 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.ParseNumericAsAdjective, 0, null); if (npt3 != null) { t1 = (tt = npt3.EndToken); if (internals.Count == 0) { name.EndToken = t1; } continue; } } if (((tt.IsHiphen && !tt.IsWhitespaceBefore && !tt.IsWhitespaceAfter) && (tt.Next is Pullenti.Ner.NumberToken) && (tt.Previous is Pullenti.Ner.TextToken)) && tt.Previous.Chars.IsAllUpper) { t1 = (tt = (t = tt.Next)); if (internals.Count == 0) { name.EndToken = t1; } continue; } if (((tt is Pullenti.Ner.NumberToken) && !tt.IsWhitespaceBefore && (tt.Previous is Pullenti.Ner.TextToken)) && tt.Previous.Chars.IsAllUpper) { t1 = (t = tt); if (internals.Count == 0) { name.EndToken = t1; } continue; } if ((((tt is Pullenti.Ner.NumberToken) && !tt.IsWhitespaceAfter && tt.Next.IsHiphen) && !tt.Next.IsWhitespaceAfter && (tt.Next.Next is Pullenti.Ner.TextToken)) && tt.Next.Next.LengthChar > 2) { t1 = (t = (tt = tt.Next.Next)); Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt1 != null && npt1.EndChar > tt.EndChar) { t1 = (t = (tt = npt1.EndToken)); } if (internals.Count == 0) { name.EndToken = t1; } continue; } if ((tt is Pullenti.Ner.NumberToken) && tt.Previous != null) { if (tt.Previous.IsValue("USB", null)) { t1 = (t = tt); if (internals.Count == 0) { name.EndToken = t1; } for (Pullenti.Ner.Token ttt = tt.Next; ttt != null; ttt = ttt.Next) { if (ttt.IsWhitespaceBefore) { break; } if (ttt.IsCharOf(",:")) { break; } t1 = (t = (tt = ttt)); if (internals.Count == 0) { name.EndToken = t1; } } continue; } } NumbersWithUnitToken mt0 = NumbersWithUnitToken.TryParse(tt, addUnits, false, false, false, false); if (mt0 != null) { Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.ParseNumericAsAdjective | Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition, 0, null); if (npt1 != null && npt1.EndChar > mt0.EndChar) { t1 = (t = (tt = npt1.EndToken)); if (internals.Count == 0) { name.EndToken = t1; } continue; } break; } if (((tt.IsComma || tt.IsChar('('))) && tt.Next != null) { www = NumbersWithUnitToken._tryParseWHL(tt.Next); if (www != null) { whd = www; t1 = (t = (tt = www.EndToken)); if (tt.Next != null && tt.Next.IsComma) { t1 = (tt = tt.Next); } if (tt.Next != null && tt.Next.IsChar(')')) { t1 = (tt = tt.Next); continue; } } List <UnitToken> uu = UnitToken.TryParseList(tt.Next, addUnits, false); if (uu != null) { t1 = (t = uu[uu.Count - 1].EndToken); units = uu; if (tt.IsChar('(') && t1.Next != null && t1.Next.IsChar(')')) { t1 = (t = (tt = t1.Next)); continue; } else if (t1.Next != null && t1.Next.IsChar('(')) { uu = UnitToken.TryParseList(t1.Next.Next, addUnits, false); if (uu != null && uu[uu.Count - 1].EndToken.Next != null && uu[uu.Count - 1].EndToken.Next.IsChar(')')) { units2 = uu; t1 = (t = (tt = uu[uu.Count - 1].EndToken.Next)); continue; } www = NumbersWithUnitToken._tryParseWHL(t1.Next); if (www != null) { whd = www; t1 = (t = (tt = www.EndToken)); continue; } } if (uu != null && uu.Count > 0 && !uu[0].IsDoubt) { break; } if (t1.Next != null) { if (t1.Next.IsTableControlChar || t1.IsNewlineAfter) { break; } } units = null; } } if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(tt, false, false) && !(tt.Next is Pullenti.Ner.NumberToken)) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(tt, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { t1 = (t = (tt = br.EndToken)); continue; } } if (tt.IsValue("НЕ", null) && tt.Next != null) { Pullenti.Morph.MorphClass mc = tt.Next.GetMorphClassInDictionary(); if (mc.IsAdverb || mc.IsMisc) { break; } continue; } if (tt.IsValue("ЯМЗ", null)) { } Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition | Pullenti.Ner.Core.NounPhraseParseAttr.IgnoreBrackets | Pullenti.Ner.Core.NounPhraseParseAttr.ParsePronouns, 0, null); if (npt2 == null) { if (tt.Morph.Class.IsPreposition || tt.Morph.Class.IsConjunction) { Pullenti.Ner.Core.TerminToken to = NumbersWithUnitToken.m_Termins.TryParse(tt, Pullenti.Ner.Core.TerminParseAttr.No); if (to != null) { if ((to.EndToken.Next is Pullenti.Ner.TextToken) && to.EndToken.Next.IsLetters) { } else { break; } } t1 = tt; continue; } Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary(); if (((tt is Pullenti.Ner.TextToken) && tt.Chars.IsLetter && tt.LengthChar > 1) && (((tt.Chars.IsAllUpper || mc.IsAdverb || mc.IsUndefined) || mc.IsAdjective))) { List <UnitToken> uu = UnitToken.TryParseList(tt, addUnits, false); if (uu != null) { if (uu[0].LengthChar > 1 || uu.Count > 1) { units = uu; t1 = (t = uu[uu.Count - 1].EndToken); break; } } t1 = (t = tt); if (internals.Count == 0) { name.EndToken = tt; } continue; } if (tt.IsComma) { continue; } if (tt.IsChar('.')) { if (!Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(tt.Next)) { continue; } List <UnitToken> uu = UnitToken.TryParseList(tt.Next, addUnits, false); if (uu != null) { if (uu[0].LengthChar > 2 || uu.Count > 1) { units = uu; t1 = (t = uu[uu.Count - 1].EndToken); break; } } } break; } t1 = (t = (tt = npt2.EndToken)); if (internals.Count > 0) { } else if (t.IsValue("ПРЕДЕЛ", null) || t.IsValue("ГРАНИЦА", null) || t.IsValue("ДИАПАЗОН", null)) { } else if (t.Chars.IsLetter) { name.EndToken = t1; } } Pullenti.Ner.Token t11 = t1; for (t1 = t1.Next; t1 != null; t1 = t1.Next) { if (t1.IsTableControlChar) { } else if (t1.IsCharOf(":,_")) { if (isResctriction) { return(null); } Pullenti.Ner.MetaToken www = NumbersWithUnitToken._tryParseWHL(t1.Next); if (www != null) { whd = www; t1 = (t = www.EndToken); continue; } List <UnitToken> uu = UnitToken.TryParseList(t1.Next, addUnits, false); if (uu != null) { if (uu[0].LengthChar > 1 || uu.Count > 1) { units = uu; t1 = (t = uu[uu.Count - 1].EndToken); continue; } } if (t1.IsChar(':')) { List <MeasureToken> li = new List <MeasureToken>(); for (Pullenti.Ner.Token ttt = t1.Next; ttt != null; ttt = ttt.Next) { if (ttt.IsHiphen || ttt.IsTableControlChar) { continue; } if ((ttt is Pullenti.Ner.TextToken) && !ttt.Chars.IsLetter) { continue; } MeasureToken mt1 = TryParse(ttt, addUnits, true, true, false, true); if (mt1 == null) { break; } li.Add(mt1); ttt = mt1.EndToken; if (ttt.Next != null && ttt.Next.IsChar(';')) { ttt = ttt.Next; } if (ttt.IsChar(';')) { } else if (ttt.IsNewlineAfter && mt1.IsNewlineBefore) { } else { break; } } if (li.Count > 1) { MeasureToken res0 = new MeasureToken(t0, li[li.Count - 1].EndToken) { Internals = li, IsEmpty = true }; if (internals != null && internals.Count > 0) { res0.InternalEx = internals[0]; } string nam = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(name, Pullenti.Ner.Core.GetTextAttr.FirstNounGroupToNominative); li[0].BeginToken = t0; foreach (MeasureToken v in li) { v.Name = string.Format("{0} ({1})", nam, v.Name ?? "").Trim(); if (v.Nums != null && v.Nums.Units.Count == 0 && units != null) { v.Nums.Units = units; } } return(res0); } } } else if (t1.IsHiphen && t1.IsWhitespaceAfter && t1.IsWhitespaceBefore) { } else if (t1.IsHiphen && t1.Next != null && t1.Next.IsChar('(')) { } else { break; } } if (t1 == null) { return(null); } List <NumbersWithUnitToken> mts = NumbersWithUnitToken.TryParseMulti(t1, addUnits, false, not, true, isResctriction); if (mts == null) { if (units != null && units.Count > 0) { if (t1 == null || t1.Previous.IsChar(':')) { mts = new List <NumbersWithUnitToken>(); if (t1 == null) { for (t1 = t11; t1 != null && t1.Next != null; t1 = t1.Next) { } } else { t1 = t1.Previous; } mts.Add(new NumbersWithUnitToken(t0, t1) { SingleVal = double.NaN }); } } if (mts == null) { return(null); } } NumbersWithUnitToken mt = mts[0]; if (mt.BeginToken == mt.EndToken && !(mt.BeginToken is Pullenti.Ner.NumberToken)) { return(null); } if (!isSubval && name.BeginToken.Morph.Class.IsPreposition) { name.BeginToken = name.BeginToken.Next; } if (mt.WHL != null) { whd = mt.WHL; } for (int kk = 0; kk < 10; kk++) { if (whd != null && whd.EndToken == name.EndToken) { name.EndToken = whd.BeginToken.Previous; continue; } if (units != null) { if (units[units.Count - 1].EndToken == name.EndToken) { name.EndToken = units[0].BeginToken.Previous; continue; } } break; } if (mts.Count > 1 && internals.Count == 0) { if (mt.Units.Count == 0) { if (units != null) { foreach (NumbersWithUnitToken m in mts) { m.Units = units; } } } MeasureToken res1 = new MeasureToken(t0, mts[mts.Count - 1].EndToken) { Morph = name.Morph, Reliable = true }; res1.Name = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(name, Pullenti.Ner.Core.GetTextAttr.FirstNounGroupToNominative); for (int k = 0; k < mts.Count; k++) { MeasureToken ttt = new MeasureToken(mts[k].BeginToken, mts[k].EndToken) { Nums = mts[k] }; if (whd != null) { List <string> nams = whd.Tag as List <string>; if (k < nams.Count) { ttt.Name = nams[k]; } } res1.Internals.Add(ttt); } Pullenti.Ner.Token tt1 = res1.EndToken.Next; if (tt1 != null && tt1.IsChar('±')) { NumbersWithUnitToken nn = NumbersWithUnitToken._tryParse(tt1, addUnits, true, false, false); if (nn != null && nn.PlusMinusPercent) { res1.EndToken = nn.EndToken; res1.Nums = nn; if (nn.Units.Count > 0 && units == null && mt.Units.Count == 0) { foreach (NumbersWithUnitToken m in mts) { m.Units = nn.Units; } } } } return(res1); } if (!mt.IsWhitespaceBefore) { if (mt.BeginToken.Previous == null) { return(null); } if (mt.BeginToken.Previous.IsCharOf(":),") || mt.BeginToken.Previous.IsTableControlChar || mt.BeginToken.Previous.IsValue("IP", null)) { } else if (mt.BeginToken.IsHiphen && mt.Units.Count > 0 && !mt.Units[0].IsDoubt) { } else { return(null); } } if (mt.Units.Count == 0 && units != null) { mt.Units = units; if (mt.DivNum != null && units.Count > 1 && mt.DivNum.Units.Count == 0) { for (int i = 1; i < units.Count; i++) { if (units[i].Pow == -1) { for (int j = i; j < units.Count; j++) { mt.DivNum.Units.Add(units[j]); units[j].Pow = -units[j].Pow; } mt.Units.RemoveRange(i, units.Count - i); break; } } } } if ((minmax < 0) && mt.SingleVal != null) { mt.FromVal = mt.SingleVal; mt.FromInclude = true; mt.SingleVal = null; } if (minmax > 0 && mt.SingleVal != null) { mt.ToVal = mt.SingleVal; mt.ToInclude = true; mt.SingleVal = null; } if (mt.Units.Count == 0) { units = UnitToken.TryParseList(mt.EndToken.Next, addUnits, true); if (units == null) { if (canUnitsAbsent) { } else { return(null); } } else { mt.Units = units; } } MeasureToken res = new MeasureToken(t0, mt.EndToken) { Morph = name.Morph, Internals = internals }; if (((!t0.IsWhitespaceBefore && t0.Previous != null && t0 == name.BeginToken) && t0.Previous.IsHiphen && !t0.Previous.IsWhitespaceBefore) && (t0.Previous.Previous is Pullenti.Ner.TextToken)) { name.BeginToken = (res.BeginToken = name.BeginToken.Previous.Previous); } res.Name = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(name, (!isSubval ? Pullenti.Ner.Core.GetTextAttr.FirstNounGroupToNominative : Pullenti.Ner.Core.GetTextAttr.No)); res.Nums = mt; foreach (UnitToken u in res.Nums.Units) { if (u.Keyword != null) { if (u.Keyword.BeginChar >= res.BeginChar) { res.Reliable = true; } } } res._parseInternals(addUnits); if (res.Internals.Count > 0 || !canBeSet) { return(res); } t1 = res.EndToken.Next; if (t1 != null && t1.IsCommaAnd) { t1 = t1.Next; } List <NumbersWithUnitToken> mts1 = NumbersWithUnitToken.TryParseMulti(t1, addUnits, false, false, false, false); if ((mts1 != null && mts1.Count == 1 && (t1.WhitespacesBeforeCount < 3)) && mts1[0].Units.Count > 0 && !UnitToken.CanBeEquals(mts[0].Units, mts1[0].Units)) { res.IsSet = true; res.Nums = null; res.Internals.Add(new MeasureToken(mt.BeginToken, mt.EndToken) { Nums = mt }); res.Internals.Add(new MeasureToken(mts1[0].BeginToken, mts1[0].EndToken) { Nums = mts1[0] }); res.EndToken = mts1[0].EndToken; } return(res); }
public static OrgItemNameToken TryAttach(Pullenti.Ner.Token t, OrgItemNameToken prev, bool extOnto, bool first) { if (t == null) { return(null); } if (t.IsValue("ОРДЕНА", null) && t.Next != null) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { Pullenti.Ner.Token t1 = npt.EndToken; if (((t1.IsValue("ЗНАК", null) || t1.IsValue("ДРУЖБА", null))) && (t1.WhitespacesAfterCount < 2)) { npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t1.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { t1 = npt.EndToken; } } return(new OrgItemNameToken(t, t1) { IsIgnoredPart = true }); } if (t.Next.GetMorphClassInDictionary().IsProperSurname) { return new OrgItemNameToken(t, t.Next) { IsIgnoredPart = true } } ; Pullenti.Ner.ReferentToken ppp = t.Kit.ProcessReferent("PERSON", t.Next); if (ppp != null) { return new OrgItemNameToken(t, ppp.EndToken) { IsIgnoredPart = true } } ; if ((t.WhitespacesAfterCount < 2) && Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(t.Next, true, false)) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t.Next, Pullenti.Ner.Core.BracketParseAttr.NearCloseBracket, 10); if (br != null && (br.LengthChar < 40)) { return new OrgItemNameToken(t, br.EndToken) { IsIgnoredPart = true } } ; } } if (first && t.Chars.IsCyrillicLetter && t.Morph.Class.IsPreposition) { if (!t.IsValue("ПО", null) && !t.IsValue("ПРИ", null)) { return(null); } } OrgItemNameToken res = _TryAttach(t, prev, extOnto); if (res == null) { if (extOnto) { if ((t.GetReferent() is Pullenti.Ner.Geo.GeoReferent) || (((t is Pullenti.Ner.TextToken) && !t.IsChar(';')))) { return new OrgItemNameToken(t, t) { Value = t.GetSourceText() } } ; } return(null); } if (prev == null && !extOnto) { if (t.Kit.Ontology != null) { Pullenti.Ner.Org.OrganizationAnalyzer.OrgAnalyzerData ad = t.Kit.Ontology._getAnalyzerData(Pullenti.Ner.Org.OrganizationAnalyzer.ANALYZER_NAME) as Pullenti.Ner.Org.OrganizationAnalyzer.OrgAnalyzerData; if (ad != null) { Pullenti.Ner.Core.TerminToken tok = ad.OrgPureNames.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null && tok.EndChar > res.EndChar) { res.EndToken = tok.EndToken; } } } } if (prev != null && !extOnto) { if ((prev.Chars.IsAllLower && !res.Chars.IsAllLower && !res.IsStdTail) && !res.IsStdName) { if (prev.Chars.IsLatinLetter && res.Chars.IsLatinLetter) { } else if (m_StdNouns.TryParse(res.BeginToken, Pullenti.Ner.Core.TerminParseAttr.No) != null) { } else { return(null); } } } if ((res.EndToken.Next != null && !res.EndToken.IsWhitespaceAfter && res.EndToken.Next.IsHiphen) && !res.EndToken.Next.IsWhitespaceAfter) { Pullenti.Ner.TextToken tt = res.EndToken.Next.Next as Pullenti.Ner.TextToken; if (tt != null) { if (tt.Chars == res.Chars || tt.Chars.IsAllUpper) { res.EndToken = tt; res.Value = string.Format("{0}-{1}", res.Value, tt.Term); } } } if ((res.EndToken.Next != null && res.EndToken.Next.IsAnd && res.EndToken.WhitespacesAfterCount == 1) && res.EndToken.Next.WhitespacesAfterCount == 1) { OrgItemNameToken res1 = _TryAttach(res.EndToken.Next.Next, prev, extOnto); if (res1 != null && res1.Chars == res.Chars && OrgItemTypeToken.TryAttach(res.EndToken.Next.Next, false, null) == null) { if (!((res1.Morph.Case & res.Morph.Case)).IsUndefined) { res.EndToken = res1.EndToken; res.Value = string.Format("{0} {1} {2}", res.Value, (res.Kit.BaseLanguage.IsUa ? "ТА" : "И"), res1.Value); } } } for (Pullenti.Ner.Token tt = res.BeginToken; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next) { if (m_StdNouns.TryParse(tt, Pullenti.Ner.Core.TerminParseAttr.No) != null) { res.StdOrgNameNouns++; } } if (m_StdNouns.TryParse(res.EndToken, Pullenti.Ner.Core.TerminParseAttr.No) != null) { int cou = 1; bool non = false; Pullenti.Ner.Token et = res.EndToken; if (!_isNotTermNoun(res.EndToken)) { non = true; } bool br = false; for (Pullenti.Ner.Token tt = res.EndToken.Next; tt != null; tt = tt.Next) { if (tt.IsTableControlChar) { break; } if (tt.IsChar('(')) { if (!non) { break; } br = true; continue; } if (tt.IsChar(')')) { br = false; et = tt; break; } if (!(tt is Pullenti.Ner.TextToken)) { break; } if (tt.WhitespacesBeforeCount > 1) { if (tt.NewlinesBeforeCount > 1) { break; } if (tt.Chars != res.EndToken.Chars) { break; } } if (tt.Morph.Class.IsPreposition || tt.IsCommaAnd) { continue; } Pullenti.Morph.MorphClass dd = tt.GetMorphClassInDictionary(); if (!dd.IsNoun && !dd.IsAdjective) { break; } Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt2 == null) { if (dd == Pullenti.Morph.MorphClass.Adjective) { continue; } break; } if (m_StdNouns.TryParse(npt2.EndToken, Pullenti.Ner.Core.TerminParseAttr.No) == null) { break; } if (npt2.EndToken.Chars != res.EndToken.Chars) { break; } if ((npt2.EndToken.IsValue("УПРАВЛЕНИЕ", null) || npt2.EndToken.IsValue("ИНСТИТУТ", null) || npt2.EndToken.IsValue("УПРАВЛІННЯ", null)) || npt2.EndToken.IsValue("ІНСТИТУТ", null) || tt.Previous.IsValue("ПРИ", null)) { Pullenti.Ner.ReferentToken rt = tt.Kit.ProcessReferent(Pullenti.Ner.Org.OrganizationAnalyzer.ANALYZER_NAME, tt); if (rt != null) { break; } } cou++; tt = npt2.EndToken; if (!_isNotTermNoun(tt)) { non = true; et = tt; } } if (non && !br) { res.StdOrgNameNouns += cou; res.EndToken = et; } } return(res); }
public static Pullenti.Ner.Core.NumberExToken TryParseNumberWithPostfix(Pullenti.Ner.Token t) { if (t == null) { return(null); } Pullenti.Ner.Token t0 = t; string isDollar = null; if (t.LengthChar == 1 && t.Next != null) { if ((((isDollar = Pullenti.Ner.Core.NumberHelper.IsMoneyChar(t)))) != null) { t = t.Next; } } Pullenti.Ner.NumberToken nt = t as Pullenti.Ner.NumberToken; if (nt == null) { if ((!(t.Previous is Pullenti.Ner.NumberToken) && t.IsChar('(') && (t.Next is Pullenti.Ner.NumberToken)) && t.Next.Next != null && t.Next.Next.IsChar(')')) { Pullenti.Ner.Core.TerminToken toks1 = m_Postfixes.TryParse(t.Next.Next.Next, Pullenti.Ner.Core.TerminParseAttr.No); if (toks1 != null && ((Pullenti.Ner.Core.NumberExType)toks1.Termin.Tag) == Pullenti.Ner.Core.NumberExType.Money) { Pullenti.Ner.NumberToken nt0 = t.Next as Pullenti.Ner.NumberToken; Pullenti.Ner.Core.NumberExToken res = new Pullenti.Ner.Core.NumberExToken(t, toks1.EndToken, nt0.Value, nt0.Typ, Pullenti.Ner.Core.NumberExType.Money) { AltRealValue = nt0.RealValue, Morph = toks1.BeginToken.Morph }; return(_correctMoney(res, toks1.BeginToken)); } } Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; if (tt == null || !tt.Morph.Class.IsAdjective) { return(null); } string val = tt.Term; for (int i = 4; i < (val.Length - 5); i++) { string v = val.Substring(0, i); List <Pullenti.Ner.Core.Termin> li = Pullenti.Ner.Core.NumberHelper.m_Nums.FindTerminsByString(v, tt.Morph.Language); if (li == null) { continue; } string vv = val.Substring(i); List <Pullenti.Ner.Core.Termin> lii = m_Postfixes.FindTerminsByString(vv, tt.Morph.Language); if (lii != null && lii.Count > 0) { Pullenti.Ner.Core.NumberExToken re = new Pullenti.Ner.Core.NumberExToken(t, t, ((int)li[0].Tag).ToString(), Pullenti.Ner.NumberSpellingType.Words, (Pullenti.Ner.Core.NumberExType)lii[0].Tag) { Morph = t.Morph }; _correctExtTypes(re); return(re); } break; } return(null); } if (t.Next == null && isDollar == null) { return(null); } double f = nt.RealValue; if (double.IsNaN(f)) { return(null); } Pullenti.Ner.Token t1 = nt.Next; if (((t1 != null && t1.IsCharOf(",."))) || (((t1 is Pullenti.Ner.NumberToken) && (t1.WhitespacesBeforeCount < 3)))) { double d; Pullenti.Ner.NumberToken tt11 = Pullenti.Ner.Core.NumberHelper.TryParseRealNumber(nt, false, false); if (tt11 != null) { t1 = tt11.EndToken.Next; f = tt11.RealValue; } } if (t1 == null) { if (isDollar == null) { return(null); } } else if ((t1.Next != null && t1.Next.IsValue("С", "З") && t1.Next.Next != null) && t1.Next.Next.IsValue("ПОЛОВИНА", null)) { f += 0.5; t1 = t1.Next.Next; } if (t1 != null && t1.IsHiphen && t1.Next != null) { t1 = t1.Next; } bool det = false; double altf = f; if (((t1 is Pullenti.Ner.NumberToken) && t1.Previous != null && t1.Previous.IsHiphen) && (t1 as Pullenti.Ner.NumberToken).IntValue == 0 && t1.LengthChar == 2) { t1 = t1.Next; } if ((t1 != null && t1.Next != null && t1.IsChar('(')) && (((t1.Next is Pullenti.Ner.NumberToken) || t1.Next.IsValue("НОЛЬ", null))) && t1.Next.Next != null) { Pullenti.Ner.NumberToken nt1 = t1.Next as Pullenti.Ner.NumberToken; double val = (double)0; if (nt1 != null) { val = nt1.RealValue; } if (Math.Floor(f) == Math.Floor(val)) { Pullenti.Ner.Token ttt = t1.Next.Next; if (ttt.IsChar(')')) { t1 = ttt.Next; det = true; if ((t1 is Pullenti.Ner.NumberToken) && (t1 as Pullenti.Ner.NumberToken).IntValue != null && (t1 as Pullenti.Ner.NumberToken).IntValue.Value == 0) { t1 = t1.Next; } } else if (((((ttt is Pullenti.Ner.NumberToken) && ((ttt as Pullenti.Ner.NumberToken).RealValue < 100) && ttt.Next != null) && ttt.Next.IsChar('/') && ttt.Next.Next != null) && ttt.Next.Next.GetSourceText() == "100" && ttt.Next.Next.Next != null) && ttt.Next.Next.Next.IsChar(')')) { int rest = GetDecimalRest100(f); if ((ttt as Pullenti.Ner.NumberToken).IntValue != null && rest == (ttt as Pullenti.Ner.NumberToken).IntValue.Value) { t1 = ttt.Next.Next.Next.Next; det = true; } } else if ((ttt.IsValue("ЦЕЛЫХ", null) && (ttt.Next is Pullenti.Ner.NumberToken) && ttt.Next.Next != null) && ttt.Next.Next.Next != null && ttt.Next.Next.Next.IsChar(')')) { Pullenti.Ner.NumberToken num2 = ttt.Next as Pullenti.Ner.NumberToken; altf = num2.RealValue; if (ttt.Next.Next.IsValue("ДЕСЯТЫЙ", null)) { altf /= 10; } else if (ttt.Next.Next.IsValue("СОТЫЙ", null)) { altf /= 100; } else if (ttt.Next.Next.IsValue("ТЫСЯЧНЫЙ", null)) { altf /= 1000; } else if (ttt.Next.Next.IsValue("ДЕСЯТИТЫСЯЧНЫЙ", null)) { altf /= 10000; } else if (ttt.Next.Next.IsValue("СТОТЫСЯЧНЫЙ", null)) { altf /= 100000; } else if (ttt.Next.Next.IsValue("МИЛЛИОННЫЙ", null)) { altf /= 1000000; } if (altf < 1) { altf += val; t1 = ttt.Next.Next.Next.Next; det = true; } } else { Pullenti.Ner.Core.TerminToken toks1 = m_Postfixes.TryParse(ttt, Pullenti.Ner.Core.TerminParseAttr.No); if (toks1 != null) { if (((Pullenti.Ner.Core.NumberExType)toks1.Termin.Tag) == Pullenti.Ner.Core.NumberExType.Money) { if (toks1.EndToken.Next != null && toks1.EndToken.Next.IsChar(')')) { Pullenti.Ner.Core.NumberExToken res = new Pullenti.Ner.Core.NumberExToken(t, toks1.EndToken.Next, nt.Value, nt.Typ, Pullenti.Ner.Core.NumberExType.Money) { RealValue = f, AltRealValue = altf, Morph = toks1.BeginToken.Morph }; return(_correctMoney(res, toks1.BeginToken)); } } } Pullenti.Ner.Core.NumberExToken res2 = TryParseNumberWithPostfix(t1.Next); if (res2 != null && res2.EndToken.Next != null && res2.EndToken.Next.IsChar(')')) { res2.BeginToken = t; res2.EndToken = res2.EndToken.Next; res2.AltRealValue = res2.RealValue; res2.RealValue = f; _correctExtTypes(res2); if (res2.WhitespacesAfterCount < 2) { Pullenti.Ner.Core.TerminToken toks2 = m_Postfixes.TryParse(res2.EndToken.Next, Pullenti.Ner.Core.TerminParseAttr.No); if (toks2 != null) { if (((Pullenti.Ner.Core.NumberExType)toks2.Termin.Tag) == Pullenti.Ner.Core.NumberExType.Money) { res2.EndToken = toks2.EndToken; } } } return(res2); } } } else if (nt1 != null && nt1.Typ == Pullenti.Ner.NumberSpellingType.Words && nt.Typ == Pullenti.Ner.NumberSpellingType.Digit) { altf = nt1.RealValue; Pullenti.Ner.Token ttt = t1.Next.Next; if (ttt.IsChar(')')) { t1 = ttt.Next; det = true; } if (!det) { altf = f; } } } if ((t1 != null && t1.IsChar('(') && t1.Next != null) && t1.Next.IsValue("СУММА", null)) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t1, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { t1 = br.EndToken.Next; } } if (isDollar != null) { Pullenti.Ner.Token te = null; if (t1 != null) { te = t1.Previous; } else { for (t1 = t0; t1 != null; t1 = t1.Next) { if (t1.Next == null) { te = t1; } } } if (te == null) { return(null); } if (te.IsHiphen && te.Next != null) { if (te.Next.IsValue("МИЛЛИОННЫЙ", null)) { f *= 1000000; altf *= 1000000; te = te.Next; } else if (te.Next.IsValue("МИЛЛИАРДНЫЙ", null)) { f *= 1000000000; altf *= 1000000000; te = te.Next; } } if (!te.IsWhitespaceAfter && (te.Next is Pullenti.Ner.TextToken)) { if (te.Next.IsValue("M", null)) { f *= 1000000; altf *= 1000000; te = te.Next; } else if (te.Next.IsValue("BN", null)) { f *= 1000000000; altf *= 1000000000; te = te.Next; } } return(new Pullenti.Ner.Core.NumberExToken(t0, te, "", nt.Typ, Pullenti.Ner.Core.NumberExType.Money) { RealValue = f, AltRealValue = altf, ExTypParam = isDollar }); } if (t1 == null || ((t1.IsNewlineBefore && !det))) { return(null); } Pullenti.Ner.Core.TerminToken toks = m_Postfixes.TryParse(t1, Pullenti.Ner.Core.TerminParseAttr.No); if ((toks == null && det && (t1 is Pullenti.Ner.NumberToken)) && (t1 as Pullenti.Ner.NumberToken).Value == "0") { toks = m_Postfixes.TryParse(t1.Next, Pullenti.Ner.Core.TerminParseAttr.No); } if (toks == null && t1.IsChar('р')) { int cou = 10; for (Pullenti.Ner.Token ttt = t0.Previous; ttt != null && cou > 0; ttt = ttt.Previous, cou--) { if (ttt.IsValue("СУММА", null) || ttt.IsValue("НАЛИЧНЫЙ", null) || ttt.IsValue("БАЛАНС", null)) { } else if (ttt.GetReferent() != null && ttt.GetReferent().TypeName == "MONEY") { } else { continue; } toks = new Pullenti.Ner.Core.TerminToken(t1, t1) { Termin = m_Postfixes.FindTerminsByCanonicText("RUB")[0] }; if (t1.Next != null && t1.Next.IsChar('.')) { toks.EndToken = t1.Next; } Pullenti.Ner.Core.NumberExType ty = (Pullenti.Ner.Core.NumberExType)toks.Termin.Tag; return(new Pullenti.Ner.Core.NumberExToken(t, toks.EndToken, nt.Value, nt.Typ, ty) { RealValue = f, AltRealValue = altf, Morph = toks.BeginToken.Morph, ExTypParam = "RUB" }); } } if (toks != null) { t1 = toks.EndToken; if (!t1.IsChar('.') && t1.Next != null && t1.Next.IsChar('.')) { if ((t1 is Pullenti.Ner.TextToken) && t1.IsValue(toks.Termin.Terms[0].CanonicalText, null)) { } else if (!t1.Chars.IsLetter) { } else { t1 = t1.Next; } } if (toks.Termin.CanonicText == "LTL") { return(null); } if (toks.BeginToken == t1) { if (t1.Morph.Class.IsPreposition || t1.Morph.Class.IsConjunction) { if (t1.IsWhitespaceBefore && t1.IsWhitespaceAfter) { return(null); } } } Pullenti.Ner.Core.NumberExType ty = (Pullenti.Ner.Core.NumberExType)toks.Termin.Tag; Pullenti.Ner.Core.NumberExToken res = new Pullenti.Ner.Core.NumberExToken(t, t1, nt.Value, nt.Typ, ty) { RealValue = f, AltRealValue = altf, Morph = toks.BeginToken.Morph }; if (ty != Pullenti.Ner.Core.NumberExType.Money) { _correctExtTypes(res); return(res); } return(_correctMoney(res, toks.BeginToken)); } Pullenti.Ner.Core.NumberExToken pfx = _attachSpecPostfix(t1); if (pfx != null) { pfx.BeginToken = t; pfx.Value = nt.Value; pfx.Typ = nt.Typ; pfx.RealValue = f; pfx.AltRealValue = altf; return(pfx); } if (t1.Next != null && ((t1.Morph.Class.IsPreposition || t1.Morph.Class.IsConjunction))) { if (t1.IsValue("НА", null)) { } else { Pullenti.Ner.Core.NumberExToken nn = TryParseNumberWithPostfix(t1.Next); if (nn != null) { return new Pullenti.Ner.Core.NumberExToken(t, t, nt.Value, nt.Typ, nn.ExTyp) { RealValue = f, AltRealValue = altf, ExTyp2 = nn.ExTyp2, ExTypParam = nn.ExTypParam } } ; } } if (!t1.IsWhitespaceAfter && (t1.Next is Pullenti.Ner.NumberToken) && (t1 is Pullenti.Ner.TextToken)) { string term = (t1 as Pullenti.Ner.TextToken).Term; Pullenti.Ner.Core.NumberExType ty = Pullenti.Ner.Core.NumberExType.Undefined; if (term == "СМХ" || term == "CMX") { ty = Pullenti.Ner.Core.NumberExType.Santimeter; } else if (term == "MX" || term == "МХ") { ty = Pullenti.Ner.Core.NumberExType.Meter; } else if (term == "MMX" || term == "ММХ") { ty = Pullenti.Ner.Core.NumberExType.Millimeter; } if (ty != Pullenti.Ner.Core.NumberExType.Undefined) { return new Pullenti.Ner.Core.NumberExToken(t, t1, nt.Value, nt.Typ, ty) { RealValue = f, AltRealValue = altf, MultAfter = true } } ; } return(null); }
public static AdverbToken TryParse(Pullenti.Ner.Token t) { if (t == null) { return(null); } if ((t is Pullenti.Ner.TextToken) && (t as Pullenti.Ner.TextToken).Term == "НЕ") { AdverbToken nn = TryParse(t.Next); if (nn != null) { nn.Not = true; nn.BeginToken = t; return(nn); } } Pullenti.Ner.Token t0 = t; Pullenti.Ner.Token t1; if (t.Next != null && t.Morph.Class.IsPreposition) { t = t.Next; } if (t.IsValue("ДРУГ", null) || t.IsValue("САМ", null)) { t1 = t.Next; if (t1 != null && t1.Morph.Class.IsPreposition) { t1 = t1.Next; } if (t1 != null) { if (t1.IsValue("ДРУГ", null) && t.IsValue("ДРУГ", null)) { return new AdverbToken(t0, t1) { Typ = Pullenti.Semantic.SemAttributeType.EachOther } } ; if (t1.IsValue("СЕБЯ", null) && t.IsValue("САМ", null)) { return new AdverbToken(t0, t1) { Typ = Pullenti.Semantic.SemAttributeType.Himelf } } ; } } Pullenti.Ner.Core.TerminToken tok = m_Termins.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { AdverbToken res = new AdverbToken(t0, tok.EndToken) { Typ = (Pullenti.Semantic.SemAttributeType)tok.Termin.Tag }; t = res.EndToken.Next; if (t != null && t.IsComma) { t = t.Next; } if (res.Typ == Pullenti.Semantic.SemAttributeType.Less || res.Typ == Pullenti.Semantic.SemAttributeType.Great) { if (t != null && t.IsValue("ЧЕМ", null)) { res.EndToken = t; } } return(res); } Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary(); if (mc.IsAdverb) { return(new AdverbToken(t, t)); } if (t.IsValue("ВСТРЕЧА", null) && t.Previous != null && t.Previous.IsValue("НА", null)) { AdverbToken ne = TryParse(t.Next); if (ne != null && ne.Typ == Pullenti.Semantic.SemAttributeType.EachOther) { return(new AdverbToken(t.Previous, t)); } } return(null); }
static OrgItemNameToken _TryAttach(Pullenti.Ner.Token t, OrgItemNameToken prev, bool extOnto) { if (t == null) { return(null); } Pullenti.Ner.Referent r = t.GetReferent(); if (r != null) { if (r.TypeName == "DENOMINATION") { return new OrgItemNameToken(t, t) { Value = r.ToString(true, t.Kit.BaseLanguage, 0), IsDenomination = true } } ; if ((r is Pullenti.Ner.Geo.GeoReferent) && t.Chars.IsLatinLetter) { OrgItemNameToken res2 = _TryAttach(t.Next, prev, extOnto); if (res2 != null && res2.Chars.IsLatinLetter) { res2.BeginToken = t; res2.Value = string.Format("{0} {1}", Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(t as Pullenti.Ner.MetaToken, Pullenti.Ner.Core.GetTextAttr.No), res2.Value); res2.IsInDictionary = false; return(res2); } } return(null); } Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; if (tt == null) { return(null); } OrgItemNameToken res = null; Pullenti.Ner.Core.TerminToken tok = m_StdTails.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok == null && t.IsChar(',')) { tok = m_StdTails.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No); } if (tok != null) { return new OrgItemNameToken(t, tok.EndToken) { Value = tok.Termin.CanonicText, IsStdTail = tok.Termin.Tag == null, IsEmptyWord = tok.Termin.Tag != null, Morph = tok.Morph } } ; if ((((tok = m_StdNames.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No)))) != null) { return new OrgItemNameToken(t, tok.EndToken) { Value = tok.Termin.CanonicText, IsStdName = true } } ; OrgItemEngItem eng = OrgItemEngItem.TryAttach(t, false); if (eng == null && t.IsChar(',')) { eng = OrgItemEngItem.TryAttach(t.Next, false); } if (eng != null) { return new OrgItemNameToken(t, eng.EndToken) { Value = eng.FullValue, IsStdTail = true } } ; if (tt.Chars.IsAllLower && prev != null) { if (!prev.Chars.IsAllLower && !prev.Chars.IsCapitalUpper) { return(null); } } if (tt.IsChar(',') && prev != null) { Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt1 == null || npt1.Chars != prev.Chars || ((npt1.Morph.Case & prev.Morph.Case)).IsUndefined) { return(null); } OrgItemTypeToken ty = OrgItemTypeToken.TryAttach(t.Next, false, null); if (ty != null) { return(null); } if (npt1.EndToken.Next == null || !npt1.EndToken.Next.IsValue("И", null)) { return(null); } Pullenti.Ner.Token t1 = npt1.EndToken.Next; Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t1.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt2 == null || npt2.Chars != prev.Chars || ((npt2.Morph.Case & npt1.Morph.Case & prev.Morph.Case)).IsUndefined) { return(null); } ty = OrgItemTypeToken.TryAttach(t1.Next, false, null); if (ty != null) { return(null); } res = new OrgItemNameToken(npt1.BeginToken, npt1.EndToken) { Morph = npt1.Morph, Value = npt1.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false) }; res.IsNounPhrase = true; res.IsAfterConjunction = true; if (prev.Preposition != null) { res.Preposition = prev.Preposition; } return(res); } if (((tt.IsChar('&') || tt.IsValue("AND", null) || tt.IsValue("UND", null))) && prev != null) { if ((tt.Next is Pullenti.Ner.TextToken) && tt.LengthChar == 1 && tt.Next.Chars.IsLatinLetter) { res = new OrgItemNameToken(tt, tt.Next) { Chars = tt.Next.Chars }; res.IsAfterConjunction = true; res.Value = "& " + (tt.Next as Pullenti.Ner.TextToken).Term; return(res); } res = OrgItemNameToken.TryAttach(tt.Next, null, extOnto, false); if (res == null || res.Chars != prev.Chars) { return(null); } res.IsAfterConjunction = true; res.Value = "& " + res.Value; return(res); } if (!tt.Chars.IsLetter) { return(null); } List <Pullenti.Semantic.Utils.DerivateGroup> expinf = null; if (prev != null && prev.EndToken.GetMorphClassInDictionary().IsNoun) { string wo = prev.EndToken.GetNormalCaseText(Pullenti.Morph.MorphClass.Noun, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false); expinf = Pullenti.Semantic.Utils.DerivateService.FindDerivates(wo, true, prev.EndToken.Morph.Language); } Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && npt.InternalNoun != null) { npt = null; } bool explOk = false; if (npt != null && prev != null && prev.EndToken.GetMorphClassInDictionary().IsNoun) { Pullenti.Ner.Core.NounPhraseToken npt0 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(prev.EndToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt0 != null) { List <Pullenti.Semantic.Core.SemanticLink> links = Pullenti.Semantic.Core.SemanticHelper.TryCreateLinks(npt0, npt, null); if (links.Count > 0) { explOk = true; } } } if (npt != null && ((explOk || npt.Morph.Case.IsGenitive || ((prev != null && !((prev.Morph.Case & npt.Morph.Case)).IsUndefined))))) { Pullenti.Morph.MorphClass mc = npt.BeginToken.GetMorphClassInDictionary(); if (mc.IsVerb || mc.IsPronoun) { return(null); } if (mc.IsAdverb) { if (npt.BeginToken.Next != null && npt.BeginToken.Next.IsHiphen) { } else { return(null); } } if (mc.IsPreposition) { return(null); } if (mc.IsNoun && npt.Chars.IsAllLower) { Pullenti.Morph.MorphCase ca = npt.Morph.Case; if ((!ca.IsDative && !ca.IsGenitive && !ca.IsInstrumental) && !ca.IsPrepositional) { return(null); } } res = new OrgItemNameToken(npt.BeginToken, npt.EndToken) { Morph = npt.Morph, Value = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false) }; res.IsNounPhrase = true; if ((npt.EndToken.WhitespacesAfterCount < 2) && (npt.EndToken.Next is Pullenti.Ner.TextToken)) { Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(npt.EndToken.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt2 != null && npt2.Morph.Case.IsGenitive && npt2.Chars.IsAllLower) { OrgItemTypeToken typ = OrgItemTypeToken.TryAttach(npt.EndToken.Next, true, null); OrgItemEponymToken epo = OrgItemEponymToken.TryAttach(npt.EndToken.Next, false); Pullenti.Ner.ReferentToken rtt = t.Kit.ProcessReferent("PERSONPROPERTY", npt.EndToken.Next); if (typ == null && epo == null && ((rtt == null || rtt.Morph.Number == Pullenti.Morph.MorphNumber.Plural))) { res.EndToken = npt2.EndToken; res.Value = string.Format("{0} {1}", res.Value, Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(npt2, Pullenti.Ner.Core.GetTextAttr.No)); } } else if (npt.EndToken.Next.IsComma && (npt.EndToken.Next.Next is Pullenti.Ner.TextToken)) { Pullenti.Ner.Token tt2 = npt.EndToken.Next.Next; Pullenti.Morph.MorphClass mv2 = tt2.GetMorphClassInDictionary(); if (mv2.IsAdjective && mv2.IsVerb) { Pullenti.Morph.MorphBaseInfo bi = new Pullenti.Morph.MorphBaseInfo() { Case = npt.Morph.Case, Gender = npt.Morph.Gender, Number = npt.Morph.Number }; if (tt2.Morph.CheckAccord(bi, false, false)) { npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt2.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt2 != null && ((npt2.Morph.Case.IsDative || npt2.Morph.Case.IsGenitive)) && npt2.Chars.IsAllLower) { res.EndToken = npt2.EndToken; res.Value = string.Format("{0} {1}", res.Value, Pullenti.Ner.Core.MiscHelper.GetTextValue(npt.EndToken.Next, res.EndToken, Pullenti.Ner.Core.GetTextAttr.No)); } } } } } if (explOk) { res.IsAfterConjunction = true; } } else if (npt != null && ((((prev != null && prev.IsNounPhrase && npt.Morph.Case.IsInstrumental)) || extOnto))) { res = new OrgItemNameToken(npt.BeginToken, npt.EndToken) { Morph = npt.Morph, Value = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false) }; res.IsNounPhrase = true; } else if (tt.IsAnd) { res = TryAttach(tt.Next, prev, extOnto, false); if (res == null || !res.IsNounPhrase || prev == null) { return(null); } if (((prev.Morph.Case & res.Morph.Case)).IsUndefined) { return(null); } if (prev.Morph.Number != Pullenti.Morph.MorphNumber.Undefined && res.Morph.Number != Pullenti.Morph.MorphNumber.Undefined) { if (((prev.Morph.Number & res.Morph.Number)) == Pullenti.Morph.MorphNumber.Undefined) { if (prev.Chars != res.Chars) { return(null); } OrgItemTypeToken ty = OrgItemTypeToken.TryAttach(res.EndToken.Next, false, null); if (ty != null) { return(null); } } } Pullenti.Morph.CharsInfo ci = res.Chars; res.Chars = ci; res.IsAfterConjunction = true; return(res); } else if (((tt.Term == "ПО" || tt.Term == "ПРИ" || tt.Term == "ЗА") || tt.Term == "С" || tt.Term == "В") || tt.Term == "НА") { npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { if (m_VervotWords.TryParse(npt.EndToken, Pullenti.Ner.Core.TerminParseAttr.No) != null) { return(null); } bool ok = false; if (tt.Term == "ПО") { ok = npt.Morph.Case.IsDative; } else if (tt.Term == "С") { ok = npt.Morph.Case.IsInstrumental; } else if (tt.Term == "ЗА") { ok = npt.Morph.Case.IsGenitive | npt.Morph.Case.IsInstrumental; } else if (tt.Term == "НА") { ok = npt.Morph.Case.IsPrepositional; } else if (tt.Term == "В") { ok = npt.Morph.Case.IsDative | npt.Morph.Case.IsPrepositional; if (ok) { ok = false; if (t.Next.IsValue("СФЕРА", null) || t.Next.IsValue("ОБЛАСТЬ", null)) { ok = true; } } } else if (tt.Term == "ПРИ") { ok = npt.Morph.Case.IsPrepositional; if (ok) { if (OrgItemTypeToken.TryAttach(tt.Next, true, null) != null) { ok = false; } else { Pullenti.Ner.ReferentToken rt = tt.Kit.ProcessReferent(Pullenti.Ner.Org.OrganizationAnalyzer.ANALYZER_NAME, tt.Next); if (rt != null) { ok = false; } } } string s = npt.Noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false); if (s == "ПОДДЕРЖКА" || s == "УЧАСТИЕ") { ok = false; } } else { ok = npt.Morph.Case.IsPrepositional; } if (ok) { res = new OrgItemNameToken(t, npt.EndToken) { Morph = npt.Morph, Value = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false), Chars = npt.Chars }; res.IsNounPhrase = true; res.Preposition = tt.Term; if (((res.Value == "ДЕЛО" || res.Value == "ВОПРОС")) && !res.IsNewlineAfter) { OrgItemNameToken res2 = _TryAttach(res.EndToken.Next, res, extOnto); if (res2 != null && res2.Morph.Case.IsGenitive) { res.Value = string.Format("{0} {1}", res.Value, res2.Value); res.EndToken = res2.EndToken; for (Pullenti.Ner.Token ttt = res2.EndToken.Next; ttt != null; ttt = ttt.Next) { if (!ttt.IsCommaAnd) { break; } OrgItemNameToken res3 = _TryAttach(ttt.Next, res2, extOnto); if (res3 == null) { break; } res.Value = string.Format("{0} {1}", res.Value, res3.Value); res.EndToken = res3.EndToken; if (ttt.IsAnd) { break; } ttt = res.EndToken; } } } } } if (res == null) { return(null); } } else if (tt.Term == "OF") { Pullenti.Ner.Token t1 = tt.Next; if (t1 != null && Pullenti.Ner.Core.MiscHelper.IsEngArticle(t1)) { t1 = t1.Next; } if (t1 != null && t1.Chars.IsLatinLetter && !t1.Chars.IsAllLower) { res = new OrgItemNameToken(t, t1) { Chars = t1.Chars, Morph = t1.Morph }; for (Pullenti.Ner.Token ttt = t1.Next; ttt != null; ttt = ttt.Next) { if (ttt.WhitespacesBeforeCount > 2) { break; } if (Pullenti.Ner.Core.MiscHelper.IsEngAdjSuffix(ttt)) { ttt = ttt.Next; continue; } if (!ttt.Chars.IsLatinLetter) { break; } if (ttt.Morph.Class.IsPreposition) { break; } t1 = (res.EndToken = ttt); } res.Value = Pullenti.Ner.Core.MiscHelper.GetTextValue(t, t1, Pullenti.Ner.Core.GetTextAttr.IgnoreArticles); res.Preposition = tt.Term; return(res); } } if (res == null) { if (tt.Chars.IsLatinLetter && tt.LengthChar == 1) { } else if (tt.Chars.IsAllLower || (tt.LengthChar < 2)) { if (!tt.Chars.IsLatinLetter || prev == null || !prev.Chars.IsLatinLetter) { return(null); } } if (tt.Chars.IsCyrillicLetter) { Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary(); if (mc.IsVerb || mc.IsAdverb) { return(null); } } else if (tt.Chars.IsLatinLetter && !tt.IsWhitespaceAfter) { if (!tt.IsWhitespaceAfter && (tt.LengthChar < 5)) { if (tt.Next is Pullenti.Ner.NumberToken) { return(null); } } } res = new OrgItemNameToken(tt, tt) { Value = tt.Term, Morph = tt.Morph }; for (t = tt.Next; t != null; t = t.Next) { if ((((t.IsHiphen || t.IsCharOf("\\/"))) && t.Next != null && (t.Next is Pullenti.Ner.TextToken)) && !t.IsWhitespaceBefore && !t.IsWhitespaceAfter) { t = t.Next; res.EndToken = t; res.Value = string.Format("{0}{1}{2}", res.Value, (t.Previous.IsChar('.') ? '.' : '-'), (t as Pullenti.Ner.TextToken).Term); } else if (t.IsChar('.')) { if (!t.IsWhitespaceAfter && !t.IsWhitespaceBefore && (t.Next is Pullenti.Ner.TextToken)) { res.EndToken = t.Next; t = t.Next; res.Value = string.Format("{0}.{1}", res.Value, (t as Pullenti.Ner.TextToken).Term); } else if ((t.Next != null && !t.IsNewlineAfter && t.Next.Chars.IsLatinLetter) && tt.Chars.IsLatinLetter) { res.EndToken = t; } else { break; } } else { break; } } } for (Pullenti.Ner.Token t0 = res.BeginToken; t0 != null; t0 = t0.Next) { if ((((tt = t0 as Pullenti.Ner.TextToken))) != null && tt.IsLetters) { if (!tt.Morph.Class.IsConjunction && !tt.Morph.Class.IsPreposition) { foreach (Pullenti.Morph.MorphBaseInfo mf in tt.Morph.Items) { if ((mf as Pullenti.Morph.MorphWordForm).IsInDictionary) { res.IsInDictionary = true; } } } } if (t0 == res.EndToken) { break; } } if (res.BeginToken == res.EndToken && res.BeginToken.Chars.IsAllUpper) { if (res.EndToken.Next != null && !res.EndToken.IsWhitespaceAfter) { Pullenti.Ner.Token t1 = res.EndToken.Next; if (t1.Next != null && !t1.IsWhitespaceAfter && t1.IsHiphen) { t1 = t1.Next; } if (t1 is Pullenti.Ner.NumberToken) { res.Value += (t1 as Pullenti.Ner.NumberToken).Value; res.EndToken = t1; } } } if (res.BeginToken == res.EndToken && res.BeginToken.Chars.IsLastLower) { string src = res.BeginToken.GetSourceText(); for (int i = src.Length - 1; i >= 0; i--) { if (char.IsUpper(src[i])) { res.Value = src.Substring(0, i + 1); break; } } } return(res); }
public static TitleItemToken TryAttach(Pullenti.Ner.Token t) { Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; if (tt != null) { Pullenti.Ner.Token t1 = (Pullenti.Ner.Token)tt; if (tt.Term == "ТЕМА") { TitleItemToken tit = TryAttach(tt.Next); if (tit != null && tit.Typ == Types.Typ) { t1 = tit.EndToken; if (t1.Next != null && t1.Next.IsChar(':')) { t1 = t1.Next; } return(new TitleItemToken(t, t1, Types.TypAndTheme) { Value = tit.Value }); } if (tt.Next != null && tt.Next.IsChar(':')) { t1 = tt.Next; } return(new TitleItemToken(tt, t1, Types.Theme)); } if (tt.Term == "ПО" || tt.Term == "НА") { if (tt.Next != null && tt.Next.IsValue("ТЕМА", null)) { t1 = tt.Next; if (t1.Next != null && t1.Next.IsChar(':')) { t1 = t1.Next; } return(new TitleItemToken(tt, t1, Types.Theme)); } } if (tt.Term == "ПЕРЕВОД" || tt.Term == "ПЕР") { Pullenti.Ner.Token tt2 = tt.Next; if (tt2 != null && tt2.IsChar('.')) { tt2 = tt2.Next; } if (tt2 is Pullenti.Ner.TextToken) { if ((tt2 as Pullenti.Ner.TextToken).Term == "C" || (tt2 as Pullenti.Ner.TextToken).Term == "С") { tt2 = tt2.Next; if (tt2 is Pullenti.Ner.TextToken) { return(new TitleItemToken(t, tt2, Types.Translate)); } } } } if (tt.Term == "СЕКЦИЯ" || tt.Term == "SECTION" || tt.Term == "СЕКЦІЯ") { t1 = tt.Next; if (t1 != null && t1.IsChar(':')) { t1 = t1.Next; } Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t1, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { t1 = br.EndToken; } else if (t1 != tt.Next) { for (; t1 != null; t1 = t1.Next) { if (t1.IsNewlineAfter) { break; } } if (t1 == null) { return(null); } } if (t1 != tt.Next) { return(new TitleItemToken(tt, t1, Types.Dust)); } } t1 = null; if (tt.IsValue("СПЕЦИАЛЬНОСТЬ", "СПЕЦІАЛЬНІСТЬ")) { t1 = tt.Next; } else if (tt.Morph.Class.IsPreposition && tt.Next != null && tt.Next.IsValue("СПЕЦИАЛЬНОСТЬ", "СПЕЦІАЛЬНІСТЬ")) { t1 = tt.Next.Next; } else if (tt.IsChar('/') && tt.IsNewlineBefore) { t1 = tt.Next; } if (t1 != null) { if (t1.IsCharOf(":") || t1.IsHiphen) { t1 = t1.Next; } TitleItemToken spec = TryAttachSpeciality(t1, true); if (spec != null) { spec.BeginToken = t; return(spec); } } } TitleItemToken sss = TryAttachSpeciality(t, false); if (sss != null) { return(sss); } if (t is Pullenti.Ner.ReferentToken) { return(null); } Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { string s = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false); Pullenti.Ner.Core.TerminToken tok = m_Termins.TryParse(npt.EndToken, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { Types ty = (Types)tok.Termin.Tag; if (ty == Types.Typ) { TitleItemToken tit = TryAttach(tok.EndToken.Next); if (tit != null && tit.Typ == Types.Theme) { return new TitleItemToken(npt.BeginToken, tit.EndToken, Types.TypAndTheme) { Value = s } } ; if (s == "РАБОТА" || s == "РОБОТА" || s == "ПРОЕКТ") { return(null); } Pullenti.Ner.Token t1 = tok.EndToken; if (s == "ДИССЕРТАЦИЯ" || s == "ДИСЕРТАЦІЯ") { int err = 0; for (Pullenti.Ner.Token ttt = t1.Next; ttt != null; ttt = ttt.Next) { if (ttt.Morph.Class.IsPreposition) { continue; } if (ttt.IsValue("СОИСКАНИЕ", "")) { continue; } Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(ttt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt1 != null && npt1.Noun.IsValue("СТЕПЕНЬ", "СТУПІНЬ")) { t1 = (ttt = npt1.EndToken); continue; } Pullenti.Ner.ReferentToken rt = t1.Kit.ProcessReferent("PERSON", ttt); if (rt != null && (rt.Referent is Pullenti.Ner.Person.PersonPropertyReferent)) { Pullenti.Ner.Person.PersonPropertyReferent ppr = rt.Referent as Pullenti.Ner.Person.PersonPropertyReferent; if (ppr.Name == "доктор наук") { t1 = rt.EndToken; s = "ДОКТОРСКАЯ ДИССЕРТАЦИЯ"; break; } else if (ppr.Name == "кандидат наук") { t1 = rt.EndToken; s = "КАНДИДАТСКАЯ ДИССЕРТАЦИЯ"; break; } else if (ppr.Name == "магистр") { t1 = rt.EndToken; s = "МАГИСТЕРСКАЯ ДИССЕРТАЦИЯ"; break; } } if (ttt.IsValue("ДОКТОР", null) || ttt.IsValue("КАНДИДАТ", null) || ttt.IsValue("МАГИСТР", "МАГІСТР")) { t1 = ttt; npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(ttt.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt1 != null && npt1.EndToken.IsValue("НАУК", null)) { t1 = npt1.EndToken; } s = (ttt.IsValue("МАГИСТР", "МАГІСТР") ? "МАГИСТЕРСКАЯ ДИССЕРТАЦИЯ" : (ttt.IsValue("ДОКТОР", null) ? "ДОКТОРСКАЯ ДИССЕРТАЦИЯ" : "КАНДИДАТСКАЯ ДИССЕРТАЦИЯ")); break; } if ((++err) > 3) { break; } } } if (t1.Next != null && t1.Next.IsChar('.')) { t1 = t1.Next; } if (s.EndsWith("ОТЧЕТ") && t1.Next != null && t1.Next.IsValue("О", null)) { Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t1.Next, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition, 0, null); if (npt1 != null && npt1.Morph.Case.IsPrepositional) { t1 = npt1.EndToken; } } return(new TitleItemToken(npt.BeginToken, t1, ty) { Value = s }); } } } Pullenti.Ner.Core.TerminToken tok1 = m_Termins.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok1 != null) { Pullenti.Ner.Token t1 = tok1.EndToken; TitleItemToken re = new TitleItemToken(tok1.BeginToken, t1, (Types)tok1.Termin.Tag); return(re); } if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(t, false, false)) { tok1 = m_Termins.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No); if (tok1 != null && Pullenti.Ner.Core.BracketHelper.CanBeEndOfSequence(tok1.EndToken.Next, false, null, false)) { Pullenti.Ner.Token t1 = tok1.EndToken.Next; return(new TitleItemToken(tok1.BeginToken, t1, (Types)tok1.Termin.Tag)); } } return(null); }
public static PhoneItemToken TryAttachAdditional(Pullenti.Ner.Token t0) { Pullenti.Ner.Token t = t0; if (t == null) { return(null); } if (t.IsChar(',')) { t = t.Next; } else if (t.IsCharOf("*#") && (t.Next is Pullenti.Ner.NumberToken)) { string val0 = (t.Next as Pullenti.Ner.NumberToken).GetSourceText(); Pullenti.Ner.Token t1 = t.Next; if ((t1.Next != null && t1.Next.IsHiphen && !t1.IsWhitespaceAfter) && (t1.Next.Next is Pullenti.Ner.NumberToken) && !t1.Next.IsWhitespaceAfter) { t1 = t1.Next.Next; val0 += t1.GetSourceText(); } if (val0.Length >= 3 && (val0.Length < 7)) { return new PhoneItemToken(t, t1) { ItemType = PhoneItemType.AddNumber, Value = val0 } } ; } bool br = false; if (t != null && t.IsChar('(')) { if (t.Previous != null && t.Previous.IsComma) { return(null); } br = true; t = t.Next; } Pullenti.Ner.Core.TerminToken to = m_PhoneTermins.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (to == null) { if (!br) { return(null); } if (t0.WhitespacesBeforeCount > 1) { return(null); } } else if (to.Termin.Tag == null) { return(null); } else { t = to.EndToken.Next; } if (t == null) { return(null); } if (((t.IsValue("НОМЕР", null) || t.IsValue("N", null) || t.IsValue("#", null)) || t.IsValue("№", null) || t.IsValue("NUMBER", null)) || ((t.IsChar('+') && br))) { t = t.Next; } else if (to == null && !br) { return(null); } else if (t.IsValue("НОМ", null) || t.IsValue("ТЕЛ", null)) { t = t.Next; if (t != null && t.IsChar('.')) { t = t.Next; } } if (t != null && t.IsCharOf(":,") && !t.IsNewlineAfter) { t = t.Next; } if (!(t is Pullenti.Ner.NumberToken)) { return(null); } string val = (t as Pullenti.Ner.NumberToken).GetSourceText(); if ((t.Next != null && t.Next.IsHiphen && !t.IsWhitespaceAfter) && (t.Next.Next is Pullenti.Ner.NumberToken)) { val += t.Next.Next.GetSourceText(); t = t.Next.Next; } if ((val.Length < 2) || val.Length > 7) { return(null); } if (br) { if (t.Next == null || !t.Next.IsChar(')')) { return(null); } t = t.Next; } PhoneItemToken res = new PhoneItemToken(t0, t) { ItemType = PhoneItemType.AddNumber, Value = val }; return(res); }
public static BlockLine Create(Pullenti.Ner.Token t, Pullenti.Ner.Core.TerminCollection names) { if (t == null) { return(null); } BlockLine res = new BlockLine(t, t); for (Pullenti.Ner.Token tt = t; tt != null; tt = tt.Next) { if (tt != t && tt.IsNewlineBefore) { break; } else { res.EndToken = tt; } } int nums = 0; while (t != null && t.Next != null && t.EndChar <= res.EndChar) { if (t is Pullenti.Ner.NumberToken) { } else { Pullenti.Ner.NumberToken rom = Pullenti.Ner.Core.NumberHelper.TryParseRoman(t); if (rom != null && rom.EndToken.Next != null) { t = rom.EndToken; } else { break; } } if (t.Next.IsChar('.')) { } else if ((t.Next is Pullenti.Ner.TextToken) && !t.Next.Chars.IsAllLower) { } else { break; } res.NumberEnd = t; t = t.Next; if (t.IsChar('.') && t.Next != null) { res.NumberEnd = t; t = t.Next; } if (t.IsNewlineBefore) { return(res); } nums++; } Pullenti.Ner.Core.TerminToken tok = m_Ontology.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok == null) { Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt1 != null && npt1.EndToken != npt1.BeginToken) { tok = m_Ontology.TryParse(npt1.Noun.BeginToken, Pullenti.Ner.Core.TerminParseAttr.No); } } if (tok != null) { if (t.Previous != null && t.Previous.IsChar(':')) { tok = null; } } if (tok != null) { BlkTyps typ = (BlkTyps)tok.Termin.Tag; if (typ == BlkTyps.Conslusion) { if (t.IsNewlineAfter) { } else if (t.Next != null && t.Next.Morph.Class.IsPreposition && t.Next.Next != null) { Pullenti.Ner.Core.TerminToken tok2 = m_Ontology.TryParse(t.Next.Next, Pullenti.Ner.Core.TerminParseAttr.No); if (tok2 != null && ((BlkTyps)tok2.Termin.Tag) == BlkTyps.Chapter) { } else { tok = null; } } else { tok = null; } } if (t.Kit.BaseLanguage != t.Morph.Language) { tok = null; } if (typ == BlkTyps.Index && !t.IsValue("ОГЛАВЛЕНИЕ", null)) { if (!t.IsNewlineAfter && t.Next != null) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && npt.IsNewlineAfter && npt.Morph.Case.IsGenitive) { tok = null; } else if (npt == null) { tok = null; } } } if ((typ == BlkTyps.Intro && tok != null && !tok.IsNewlineAfter) && t.IsValue("ВВЕДЕНИЕ", null)) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && npt.Morph.Case.IsGenitive) { tok = null; } } if (tok != null) { if (res.NumberEnd == null) { res.NumberEnd = tok.EndToken; if (res.NumberEnd.EndChar > res.EndChar) { res.EndToken = res.NumberEnd; } } res.Typ = typ; t = tok.EndToken; if (t.Next != null && t.Next.IsCharOf(":.")) { t = t.Next; res.EndToken = t; } if (t.IsNewlineAfter || t.Next == null) { return(res); } t = t.Next; } } if (t.IsChar('§') && (t.Next is Pullenti.Ner.NumberToken)) { res.Typ = BlkTyps.Chapter; res.NumberEnd = t; t = t.Next; } if (names != null) { Pullenti.Ner.Core.TerminToken tok2 = names.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok2 != null && tok2.EndToken.IsNewlineAfter) { res.EndToken = tok2.EndToken; res.IsExistName = true; if (res.Typ == BlkTyps.Undefined) { BlockLine li2 = Create((res.NumberEnd == null ? null : res.NumberEnd.Next), null); if (li2 != null && ((li2.Typ == BlkTyps.Literature || li2.Typ == BlkTyps.Intro || li2.Typ == BlkTyps.Conslusion))) { res.Typ = li2.Typ; } else { res.Typ = BlkTyps.Chapter; } } return(res); } } Pullenti.Ner.Token t1 = res.EndToken; if ((((t1 is Pullenti.Ner.NumberToken) || t1.IsChar('.'))) && t1.Previous != null) { t1 = t1.Previous; if (t1.IsChar('.')) { res.HasContentItemTail = true; for (; t1 != null && t1.BeginChar > res.BeginChar; t1 = t1.Previous) { if (!t1.IsChar('.')) { break; } } } } res.IsAllUpper = true; for (; t != null && t.EndChar <= t1.EndChar; t = t.Next) { if (!(t is Pullenti.Ner.TextToken) || !t.Chars.IsLetter) { res.NotWords++; } else { Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary(); if (mc.IsUndefined) { res.NotWords++; } else if (t.LengthChar > 2) { res.Words++; } if (!t.Chars.IsAllUpper) { res.IsAllUpper = false; } if ((t as Pullenti.Ner.TextToken).IsPureVerb) { if (!(t as Pullenti.Ner.TextToken).Term.EndsWith("ING")) { res.HasVerb = true; } } } } if (res.Typ == BlkTyps.Undefined) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse((res.NumberEnd == null ? res.BeginToken : res.NumberEnd.Next), Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { if (npt.Noun.IsValue("ХАРАКТЕРИСТИКА", null) || npt.Noun.IsValue("СОДЕРЖАНИЕ", "ЗМІСТ")) { bool ok = true; for (Pullenti.Ner.Token tt = npt.EndToken.Next; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next) { if (tt.IsChar('.')) { continue; } Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt2 == null || !npt2.Morph.Case.IsGenitive) { ok = false; break; } tt = npt2.EndToken; if (tt.EndChar > res.EndChar) { res.EndToken = tt; if (!tt.IsNewlineAfter) { for (; res.EndToken.Next != null; res.EndToken = res.EndToken.Next) { if (res.EndToken.IsNewlineAfter) { break; } } } } } if (ok) { res.Typ = BlkTyps.Intro; res.IsExistName = true; } } else if (npt.Noun.IsValue("ВЫВОД", "ВИСНОВОК") || npt.Noun.IsValue("РЕЗУЛЬТАТ", "ДОСЛІДЖЕННЯ")) { bool ok = true; for (Pullenti.Ner.Token tt = npt.EndToken.Next; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next) { if (tt.IsCharOf(",.") || tt.IsAnd) { continue; } Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt1 != null) { if (npt1.Noun.IsValue("РЕЗУЛЬТАТ", "ДОСЛІДЖЕННЯ") || npt1.Noun.IsValue("РЕКОМЕНДАЦИЯ", "РЕКОМЕНДАЦІЯ") || npt1.Noun.IsValue("ИССЛЕДОВАНИЕ", "ДОСЛІДЖЕННЯ")) { tt = npt1.EndToken; if (tt.EndChar > res.EndChar) { res.EndToken = tt; if (!tt.IsNewlineAfter) { for (; res.EndToken.Next != null; res.EndToken = res.EndToken.Next) { if (res.EndToken.IsNewlineAfter) { break; } } } } continue; } } ok = false; break; } if (ok) { res.Typ = BlkTyps.Conslusion; res.IsExistName = true; } } if (res.Typ == BlkTyps.Undefined && npt != null && npt.EndChar <= res.EndChar) { bool ok = false; int publ = 0; if (_isPub(npt)) { ok = true; publ = 1; } else if ((npt.Noun.IsValue("СПИСОК", null) || npt.Noun.IsValue("УКАЗАТЕЛЬ", "ПОКАЖЧИК") || npt.Noun.IsValue("ПОЛОЖЕНИЕ", "ПОЛОЖЕННЯ")) || npt.Noun.IsValue("ВЫВОД", "ВИСНОВОК") || npt.Noun.IsValue("РЕЗУЛЬТАТ", "ДОСЛІДЖЕННЯ")) { if (npt.EndChar == res.EndChar) { return(null); } ok = true; } if (ok) { if (npt.BeginToken == npt.EndToken && npt.Noun.IsValue("СПИСОК", null) && npt.EndChar == res.EndChar) { ok = false; } for (Pullenti.Ner.Token tt = npt.EndToken.Next; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next) { if (tt.IsCharOf(",.:") || tt.IsAnd || tt.Morph.Class.IsPreposition) { continue; } if (tt.IsValue("ОТРАЖЕНЫ", "ВІДОБРАЖЕНІ")) { continue; } npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt == null) { ok = false; break; } if (((_isPub(npt) || npt.Noun.IsValue("РАБОТА", "РОБОТА") || npt.Noun.IsValue("ИССЛЕДОВАНИЕ", "ДОСЛІДЖЕННЯ")) || npt.Noun.IsValue("АВТОР", null) || npt.Noun.IsValue("ТРУД", "ПРАЦЯ")) || npt.Noun.IsValue("ТЕМА", null) || npt.Noun.IsValue("ДИССЕРТАЦИЯ", "ДИСЕРТАЦІЯ")) { tt = npt.EndToken; if (_isPub(npt)) { publ++; } if (tt.EndChar > res.EndChar) { res.EndToken = tt; if (!tt.IsNewlineAfter) { for (; res.EndToken.Next != null; res.EndToken = res.EndToken.Next) { if (res.EndToken.IsNewlineAfter) { break; } } } } continue; } ok = false; break; } if (ok) { res.Typ = BlkTyps.Literature; res.IsExistName = true; if (publ == 0 && (res.EndChar < (((res.Kit.Sofa.Text.Length * 2) / 3)))) { if (res.NumberEnd != null) { res.Typ = BlkTyps.Misc; } else { res.Typ = BlkTyps.Undefined; } } } } } } } return(res); }
static Pullenti.Ner.Core.NumberExToken _correctMoney(Pullenti.Ner.Core.NumberExToken res, Pullenti.Ner.Token t1) { if (t1 == null) { return(null); } List <Pullenti.Ner.Core.TerminToken> toks = m_Postfixes.TryParseAll(t1, Pullenti.Ner.Core.TerminParseAttr.No); if (toks == null || toks.Count == 0) { return(null); } Pullenti.Ner.Token tt = toks[0].EndToken.Next; Pullenti.Ner.Referent r = (tt == null ? null : tt.GetReferent()); string alpha2 = null; if (r != null && r.TypeName == "GEO") { alpha2 = r.GetStringValue("ALPHA2"); } if (alpha2 != null && toks.Count > 0) { for (int i = toks.Count - 1; i >= 0; i--) { if (!toks[i].Termin.CanonicText.StartsWith(alpha2)) { toks.RemoveAt(i); } } if (toks.Count == 0) { toks = m_Postfixes.TryParseAll(t1, Pullenti.Ner.Core.TerminParseAttr.No); } } if (toks.Count > 1) { alpha2 = null; string str = toks[0].Termin.Terms[0].CanonicalText; if (str == "РУБЛЬ" || str == "RUBLE") { alpha2 = "RU"; } else if (str == "ДОЛЛАР" || str == "ДОЛАР" || str == "DOLLAR") { alpha2 = "US"; } else if (str == "ФУНТ" || str == "POUND") { alpha2 = "UK"; } if (alpha2 != null) { for (int i = toks.Count - 1; i >= 0; i--) { if (!toks[i].Termin.CanonicText.StartsWith(alpha2) && toks[i].Termin.CanonicText != "GBP") { toks.RemoveAt(i); } } } alpha2 = null; } if (toks.Count < 1) { return(null); } res.ExTypParam = toks[0].Termin.CanonicText; if (alpha2 != null && tt != null) { res.EndToken = tt; } tt = res.EndToken.Next; if (tt != null && tt.IsCommaAnd) { tt = tt.Next; } if ((tt is Pullenti.Ner.NumberToken) && tt.Next != null && (tt.WhitespacesAfterCount < 4)) { Pullenti.Ner.Token tt1 = tt.Next; if ((tt1 != null && tt1.IsChar('(') && (tt1.Next is Pullenti.Ner.NumberToken)) && tt1.Next.Next != null && tt1.Next.Next.IsChar(')')) { if ((tt as Pullenti.Ner.NumberToken).Value == (tt1.Next as Pullenti.Ner.NumberToken).Value) { tt1 = tt1.Next.Next.Next; } } Pullenti.Ner.Core.TerminToken tok = m_SmallMoney.TryParse(tt1, Pullenti.Ner.Core.TerminParseAttr.No); if (tok == null && tt1 != null && tt1.IsChar(')')) { tok = m_SmallMoney.TryParse(tt1.Next, Pullenti.Ner.Core.TerminParseAttr.No); } if (tok != null && (tt as Pullenti.Ner.NumberToken).IntValue != null) { int max = (int)tok.Termin.Tag; int val = (tt as Pullenti.Ner.NumberToken).IntValue.Value; if (val < max) { double f = (double)val; f /= max; double f0 = res.RealValue - ((long)res.RealValue); int re0 = (int)(((f0 * 100) + 0.0001)); if (re0 > 0 && val != re0) { res.AltRestMoney = val; } else if (f0 == 0) { res.RealValue += f; } f0 = res.AltRealValue - ((long)res.AltRealValue); re0 = (int)(((f0 * 100) + 0.0001)); if (re0 > 0 && val != re0) { res.AltRestMoney = val; } else if (f0 == 0) { res.AltRealValue += f; } res.EndToken = tok.EndToken; } } } else if ((tt is Pullenti.Ner.TextToken) && tt.IsValue("НОЛЬ", null)) { Pullenti.Ner.Core.TerminToken tok = m_SmallMoney.TryParse(tt.Next, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { res.EndToken = tok.EndToken; } } return(res); }
public static UnitToken TryParse(Pullenti.Ner.Token t, Pullenti.Ner.Core.TerminCollection addUnits, UnitToken prev, bool parseUnknownUnits = false) { if (t == null) { return(null); } Pullenti.Ner.Token t0 = t; int pow = 1; bool isNeg = false; if ((t.IsCharOf("\\/") || t.IsValue("НА", null) || t.IsValue("OF", null)) || t.IsValue("PER", null)) { isNeg = true; t = t.Next; } else if (t.IsValue("В", null) && prev != null) { isNeg = true; t = t.Next; } else if (MeasureHelper.IsMultChar(t)) { t = t.Next; } Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; if (tt == null) { return(null); } if (tt.Term == "КВ" || tt.Term == "КВАДР" || tt.IsValue("КВАДРАТНЫЙ", null)) { pow = 2; tt = tt.Next as Pullenti.Ner.TextToken; if (tt != null && tt.IsChar('.')) { tt = tt.Next as Pullenti.Ner.TextToken; } if (tt == null) { return(null); } } else if (tt.Term == "КУБ" || tt.Term == "КУБИЧ" || tt.IsValue("КУБИЧЕСКИЙ", null)) { pow = 3; tt = tt.Next as Pullenti.Ner.TextToken; if (tt != null && tt.IsChar('.')) { tt = tt.Next as Pullenti.Ner.TextToken; } if (tt == null) { return(null); } } else if (tt.Term == "µ") { UnitToken res = TryParse(tt.Next, addUnits, prev, false); if (res != null) { foreach (Unit u in UnitsHelper.Units) { if (u.Factor == UnitsFactors.Micro && string.Compare("мк" + u.NameCyr, res.Unit.NameCyr, true) == 0) { res.Unit = u; res.BeginToken = tt; res.Pow = pow; if (isNeg) { res.Pow = -pow; } return(res); } } } } List <Pullenti.Ner.Core.TerminToken> toks = UnitsHelper.Termins.TryParseAll(tt, Pullenti.Ner.Core.TerminParseAttr.No); if (toks != null) { if ((prev != null && tt == t0 && toks.Count == 1) && t.IsWhitespaceBefore) { return(null); } if (toks[0].BeginToken == toks[0].EndToken && tt.Morph.Class.IsPreposition && (tt.WhitespacesAfterCount < 3)) { if (Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition, 0, null) != null) { return(null); } if (tt.Next is Pullenti.Ner.NumberToken) { if ((tt.Next as Pullenti.Ner.NumberToken).Typ != Pullenti.Ner.NumberSpellingType.Digit) { return(null); } } UnitToken nex = TryParse(tt.Next, addUnits, null, false); if (nex != null) { return(null); } } if (toks[0].BeginToken == toks[0].EndToken && ((toks[0].BeginToken.IsValue("М", null) || toks[0].BeginToken.IsValue("M", null))) && toks[0].BeginToken.Chars.IsAllLower) { if (prev != null && prev.Unit != null && prev.Unit.Kind == Pullenti.Ner.Measure.MeasureKind.Length) { UnitToken res = new UnitToken(t0, toks[0].EndToken) { Unit = UnitsHelper.uMinute }; res.Pow = pow; if (isNeg) { res.Pow = -pow; } return(res); } } List <UnitToken> uts = new List <UnitToken>(); foreach (Pullenti.Ner.Core.TerminToken tok in toks) { UnitToken res = new UnitToken(t0, tok.EndToken) { Unit = tok.Termin.Tag as Unit }; res.Pow = pow; if (isNeg) { res.Pow = -pow; } if (res.Unit.BaseMultiplier == 1000000 && (t0 is Pullenti.Ner.TextToken) && char.IsLower((t0 as Pullenti.Ner.TextToken).GetSourceText()[0])) { foreach (Unit u in UnitsHelper.Units) { if (u.Factor == UnitsFactors.Milli && string.Compare(u.NameCyr, res.Unit.NameCyr, true) == 0) { res.Unit = u; break; } } } res._correct(); res._checkDoubt(); uts.Add(res); } int max = 0; UnitToken best = null; foreach (UnitToken ut in uts) { if (ut.Keyword != null) { if (ut.Keyword.BeginChar >= max) { max = ut.Keyword.BeginChar; best = ut; } } } if (best != null) { return(best); } foreach (UnitToken ut in uts) { if (!ut.IsDoubt) { return(ut); } } return(uts[0]); } Pullenti.Ner.Token t1 = null; if (t.IsCharOf("º°")) { t1 = t; } else if ((t.IsChar('<') && t.Next != null && t.Next.Next != null) && t.Next.Next.IsChar('>') && ((t.Next.IsValue("О", null) || t.Next.IsValue("O", null) || (((t.Next is Pullenti.Ner.NumberToken) && (t.Next as Pullenti.Ner.NumberToken).Value == "0"))))) { t1 = t.Next.Next; } if (t1 != null) { UnitToken res = new UnitToken(t0, t1) { Unit = UnitsHelper.uGradus }; res._checkDoubt(); t = t1.Next; if (t != null && t.IsComma) { t = t.Next; } if (t != null && t.IsValue("ПО", null)) { t = t.Next; } if (t is Pullenti.Ner.TextToken) { string vv = (t as Pullenti.Ner.TextToken).Term; if (vv == "C" || vv == "С" || vv.StartsWith("ЦЕЛЬС")) { res.Unit = UnitsHelper.uGradusC; res.IsDoubt = false; res.EndToken = t; } if (vv == "F" || vv.StartsWith("ФАР")) { res.Unit = UnitsHelper.uGradusF; res.IsDoubt = false; res.EndToken = t; } } return(res); } if ((t is Pullenti.Ner.TextToken) && ((t.IsValue("ОС", null) || t.IsValue("OC", null)))) { string str = t.GetSourceText(); if (str == "оС" || str == "oC") { UnitToken res = new UnitToken(t, t) { Unit = UnitsHelper.uGradusC, IsDoubt = false }; return(res); } } if (t.IsChar('%')) { Pullenti.Ner.Token tt1 = t.Next; if (tt1 != null && tt1.IsChar('(')) { tt1 = tt1.Next; } if ((tt1 is Pullenti.Ner.TextToken) && (tt1 as Pullenti.Ner.TextToken).Term.StartsWith("ОБ")) { UnitToken re = new UnitToken(t, tt1) { Unit = UnitsHelper.uAlco }; if (re.EndToken.Next != null && re.EndToken.Next.IsChar('.')) { re.EndToken = re.EndToken.Next; } if (re.EndToken.Next != null && re.EndToken.Next.IsChar(')') && t.Next.IsChar('(')) { re.EndToken = re.EndToken.Next; } return(re); } return(new UnitToken(t, t) { Unit = UnitsHelper.uPercent }); } if (addUnits != null) { Pullenti.Ner.Core.TerminToken tok = addUnits.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { UnitToken res = new UnitToken(t0, tok.EndToken) { ExtOnto = tok.Termin.Tag as Pullenti.Ner.Measure.UnitReferent }; if (tok.EndToken.Next != null && tok.EndToken.Next.IsChar('.')) { tok.EndToken = tok.EndToken.Next; } res.Pow = pow; if (isNeg) { res.Pow = -pow; } res._correct(); return(res); } } if (!parseUnknownUnits) { return(null); } if ((t.WhitespacesBeforeCount > 2 || !t.Chars.IsLetter || t.LengthChar > 5) || !(t is Pullenti.Ner.TextToken)) { return(null); } if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t)) { return(null); } t1 = t; if (t.Next != null && t.Next.IsChar('.')) { t1 = t; } bool ok = false; if (t1.Next == null || t1.WhitespacesAfterCount > 2) { ok = true; } else if (t1.Next.IsComma || t1.Next.IsCharOf("\\/") || t1.Next.IsTableControlChar) { ok = true; } else if (MeasureHelper.IsMultChar(t1.Next)) { ok = true; } if (!ok) { return(null); } Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary(); if (mc.IsUndefined) { } else if (t.LengthChar > 7) { return(null); } UnitToken res1 = new UnitToken(t0, t1) { Pow = pow, IsDoubt = true }; res1.UnknownName = (t as Pullenti.Ner.TextToken).GetSourceText(); res1._correct(); return(res1); }
public static ParenthesisToken TryAttach(Pullenti.Ner.Token t) { if (t == null) { return(null); } Pullenti.Ner.Core.TerminToken tok = m_Termins.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { ParenthesisToken res = new ParenthesisToken(t, tok.EndToken); return(res); } if (!(t is Pullenti.Ner.TextToken)) { return(null); } Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary(); bool ok = false; Pullenti.Ner.Token t1; if (mc.IsAdverb) { ok = true; } else if (mc.IsAdjective) { if (t.Morph.ContainsAttr("сравн.", null) && t.Morph.ContainsAttr("кач.прил.", null)) { ok = true; } } if (ok && t.Next != null) { if (t.Next.IsChar(',')) { return(new ParenthesisToken(t, t)); } t1 = t.Next; if (t1.GetMorphClassInDictionary() == Pullenti.Morph.MorphClass.Verb) { if (t1.Morph.ContainsAttr("н.вр.", null) && t1.Morph.ContainsAttr("нес.в.", null) && t1.Morph.ContainsAttr("дейст.з.", null)) { return(new ParenthesisToken(t, t1)); } } } t1 = null; if ((t.IsValue("В", null) && t.Next != null && t.Next.IsValue("СООТВЕТСТВИЕ", null)) && t.Next.Next != null && t.Next.Next.Morph.Class.IsPreposition) { t1 = t.Next.Next.Next; } else if (t.IsValue("СОГЛАСНО", null)) { t1 = t.Next; } else if (t.IsValue("В", null) && t.Next != null) { if (t.Next.IsValue("СИЛА", null)) { t1 = t.Next.Next; } else if (t.Next.Morph.Class.IsAdjective || t.Next.Morph.Class.IsPronoun) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { if (npt.Noun.IsValue("ВИД", null) || npt.Noun.IsValue("СЛУЧАЙ", null) || npt.Noun.IsValue("СФЕРА", null)) { return(new ParenthesisToken(t, npt.EndToken)); } } } } if (t1 != null) { if (t1.Next != null) { Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t1, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt1 != null) { if (npt1.Noun.IsValue("НОРМА", null) || npt1.Noun.IsValue("ПОЛОЖЕНИЕ", null) || npt1.Noun.IsValue("УКАЗАНИЕ", null)) { t1 = npt1.EndToken.Next; } } } Pullenti.Ner.Referent r = t1.GetReferent(); if (r != null) { ParenthesisToken res = new ParenthesisToken(t, t1) { Ref = r }; if (t1.Next != null && t1.Next.IsComma) { bool sila = false; for (Pullenti.Ner.Token ttt = t1.Next.Next; ttt != null; ttt = ttt.Next) { if (ttt.IsValue("СИЛА", null) || ttt.IsValue("ДЕЙСТВИЕ", null)) { sila = true; continue; } if (ttt.IsComma) { if (sila) { res.EndToken = ttt.Previous; } break; } if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(ttt, false, false)) { break; } } } return(res); } Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t1, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { return(new ParenthesisToken(t, npt.EndToken)); } } Pullenti.Ner.Token tt = t; if (tt.IsValue("НЕ", null) && t != null) { tt = tt.Next; } if (tt.Morph.Class.IsPreposition && tt != null) { tt = tt.Next; Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt1 != null) { tt = npt1.EndToken; if (tt.Next != null && tt.Next.IsComma) { return(new ParenthesisToken(t, tt.Next)); } if (npt1.Noun.IsValue("ОЧЕРЕДЬ", null)) { return(new ParenthesisToken(t, tt)); } } } if (t.IsValue("ВЕДЬ", null)) { return(new ParenthesisToken(t, t)); } return(null); }