public static UriItemToken AttachISOContent(Pullenti.Ner.Token t0, string specChars) { Pullenti.Ner.Token t = t0; while (true) { if (t == null) { return(null); } if (t.IsCharOf(":/\\") || t.IsHiphen || t.IsValue("IEC", null)) { t = t.Next; continue; } break; } if (!(t is Pullenti.Ner.NumberToken)) { return(null); } Pullenti.Ner.Token t1 = t; char delim = (char)0; StringBuilder txt = new StringBuilder(); for (; t != null; t = t.Next) { if (t.IsWhitespaceBefore && t != t1) { break; } if (t is Pullenti.Ner.NumberToken) { if (delim != ((char)0)) { txt.Append(delim); } delim = (char)0; t1 = t; txt.Append(t.GetSourceText()); continue; } if (!(t is Pullenti.Ner.TextToken)) { break; } if (!t.IsCharOf(specChars)) { break; } delim = t.GetSourceText()[0]; } if (txt.Length == 0) { return(null); } return(new UriItemToken(t0, t1) { Value = txt.ToString() }); }
public override Pullenti.Ner.ReferentToken ProcessOntologyItem(Pullenti.Ner.Token begin) { if (!(begin is Pullenti.Ner.TextToken)) { return(null); } Pullenti.Ner.Measure.Internal.UnitToken ut = Pullenti.Ner.Measure.Internal.UnitToken.TryParse(begin, null, null, false); if (ut != null) { return(new Pullenti.Ner.ReferentToken(ut.CreateReferentWithRegister(null), ut.BeginToken, ut.EndToken)); } UnitReferent u = new UnitReferent(); u.AddSlot(UnitReferent.ATTR_NAME, begin.GetSourceText(), false, 0); return(new Pullenti.Ner.ReferentToken(u, begin, begin)); }
internal void AddValue(Pullenti.Ner.Token begin, Pullenti.Ner.Token end) { StringBuilder tmp = new StringBuilder(); for (Pullenti.Ner.Token t = begin; t != null && t.Previous != end; t = t.Next) { if (t is Pullenti.Ner.NumberToken) { tmp.Append(t.GetSourceText()); continue; } if (t is Pullenti.Ner.TextToken) { string s = (t as Pullenti.Ner.TextToken).Term; if (t.IsCharOf("-\\/")) { s = "-"; } tmp.Append(s); } } for (int i = 0; i < tmp.Length; i++) { if (tmp[i] == '-' && i > 0 && ((i + 1) < tmp.Length)) { char ch0 = tmp[i - 1]; char ch1 = tmp[i + 1]; if (char.IsLetterOrDigit(ch0) && char.IsLetterOrDigit(ch1)) { if (char.IsDigit(ch0) && !char.IsDigit(ch1)) { tmp.Remove(i, 1); } else if (!char.IsDigit(ch0) && char.IsDigit(ch1)) { tmp.Remove(i, 1); } } } } this.AddSlot(ATTR_VALUE, tmp.ToString(), false, 0); m_Names = null; }
public static UnitToken TryParse(Pullenti.Ner.Token t, Pullenti.Ner.Core.TerminCollection addUnits, UnitToken prev, bool parseUnknownUnits = false) { if (t == null) { return(null); } Pullenti.Ner.Token t0 = t; int pow = 1; bool isNeg = false; if ((t.IsCharOf("\\/") || t.IsValue("НА", null) || t.IsValue("OF", null)) || t.IsValue("PER", null)) { isNeg = true; t = t.Next; } else if (t.IsValue("В", null) && prev != null) { isNeg = true; t = t.Next; } else if (MeasureHelper.IsMultChar(t)) { t = t.Next; } Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; if (tt == null) { return(null); } if (tt.Term == "КВ" || tt.Term == "КВАДР" || tt.IsValue("КВАДРАТНЫЙ", null)) { pow = 2; tt = tt.Next as Pullenti.Ner.TextToken; if (tt != null && tt.IsChar('.')) { tt = tt.Next as Pullenti.Ner.TextToken; } if (tt == null) { return(null); } } else if (tt.Term == "КУБ" || tt.Term == "КУБИЧ" || tt.IsValue("КУБИЧЕСКИЙ", null)) { pow = 3; tt = tt.Next as Pullenti.Ner.TextToken; if (tt != null && tt.IsChar('.')) { tt = tt.Next as Pullenti.Ner.TextToken; } if (tt == null) { return(null); } } else if (tt.Term == "µ") { UnitToken res = TryParse(tt.Next, addUnits, prev, false); if (res != null) { foreach (Unit u in UnitsHelper.Units) { if (u.Factor == UnitsFactors.Micro && string.Compare("мк" + u.NameCyr, res.Unit.NameCyr, true) == 0) { res.Unit = u; res.BeginToken = tt; res.Pow = pow; if (isNeg) { res.Pow = -pow; } return(res); } } } } List <Pullenti.Ner.Core.TerminToken> toks = UnitsHelper.Termins.TryParseAll(tt, Pullenti.Ner.Core.TerminParseAttr.No); if (toks != null) { if ((prev != null && tt == t0 && toks.Count == 1) && t.IsWhitespaceBefore) { return(null); } if (toks[0].BeginToken == toks[0].EndToken && tt.Morph.Class.IsPreposition && (tt.WhitespacesAfterCount < 3)) { if (Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition, 0, null) != null) { return(null); } if (tt.Next is Pullenti.Ner.NumberToken) { if ((tt.Next as Pullenti.Ner.NumberToken).Typ != Pullenti.Ner.NumberSpellingType.Digit) { return(null); } } UnitToken nex = TryParse(tt.Next, addUnits, null, false); if (nex != null) { return(null); } } if (toks[0].BeginToken == toks[0].EndToken && ((toks[0].BeginToken.IsValue("М", null) || toks[0].BeginToken.IsValue("M", null))) && toks[0].BeginToken.Chars.IsAllLower) { if (prev != null && prev.Unit != null && prev.Unit.Kind == Pullenti.Ner.Measure.MeasureKind.Length) { UnitToken res = new UnitToken(t0, toks[0].EndToken) { Unit = UnitsHelper.uMinute }; res.Pow = pow; if (isNeg) { res.Pow = -pow; } return(res); } } List <UnitToken> uts = new List <UnitToken>(); foreach (Pullenti.Ner.Core.TerminToken tok in toks) { UnitToken res = new UnitToken(t0, tok.EndToken) { Unit = tok.Termin.Tag as Unit }; res.Pow = pow; if (isNeg) { res.Pow = -pow; } if (res.Unit.BaseMultiplier == 1000000 && (t0 is Pullenti.Ner.TextToken) && char.IsLower((t0 as Pullenti.Ner.TextToken).GetSourceText()[0])) { foreach (Unit u in UnitsHelper.Units) { if (u.Factor == UnitsFactors.Milli && string.Compare(u.NameCyr, res.Unit.NameCyr, true) == 0) { res.Unit = u; break; } } } res._correct(); res._checkDoubt(); uts.Add(res); } int max = 0; UnitToken best = null; foreach (UnitToken ut in uts) { if (ut.Keyword != null) { if (ut.Keyword.BeginChar >= max) { max = ut.Keyword.BeginChar; best = ut; } } } if (best != null) { return(best); } foreach (UnitToken ut in uts) { if (!ut.IsDoubt) { return(ut); } } return(uts[0]); } Pullenti.Ner.Token t1 = null; if (t.IsCharOf("º°")) { t1 = t; } else if ((t.IsChar('<') && t.Next != null && t.Next.Next != null) && t.Next.Next.IsChar('>') && ((t.Next.IsValue("О", null) || t.Next.IsValue("O", null) || (((t.Next is Pullenti.Ner.NumberToken) && (t.Next as Pullenti.Ner.NumberToken).Value == "0"))))) { t1 = t.Next.Next; } if (t1 != null) { UnitToken res = new UnitToken(t0, t1) { Unit = UnitsHelper.uGradus }; res._checkDoubt(); t = t1.Next; if (t != null && t.IsComma) { t = t.Next; } if (t != null && t.IsValue("ПО", null)) { t = t.Next; } if (t is Pullenti.Ner.TextToken) { string vv = (t as Pullenti.Ner.TextToken).Term; if (vv == "C" || vv == "С" || vv.StartsWith("ЦЕЛЬС")) { res.Unit = UnitsHelper.uGradusC; res.IsDoubt = false; res.EndToken = t; } if (vv == "F" || vv.StartsWith("ФАР")) { res.Unit = UnitsHelper.uGradusF; res.IsDoubt = false; res.EndToken = t; } } return(res); } if ((t is Pullenti.Ner.TextToken) && ((t.IsValue("ОС", null) || t.IsValue("OC", null)))) { string str = t.GetSourceText(); if (str == "оС" || str == "oC") { UnitToken res = new UnitToken(t, t) { Unit = UnitsHelper.uGradusC, IsDoubt = false }; return(res); } } if (t.IsChar('%')) { Pullenti.Ner.Token tt1 = t.Next; if (tt1 != null && tt1.IsChar('(')) { tt1 = tt1.Next; } if ((tt1 is Pullenti.Ner.TextToken) && (tt1 as Pullenti.Ner.TextToken).Term.StartsWith("ОБ")) { UnitToken re = new UnitToken(t, tt1) { Unit = UnitsHelper.uAlco }; if (re.EndToken.Next != null && re.EndToken.Next.IsChar('.')) { re.EndToken = re.EndToken.Next; } if (re.EndToken.Next != null && re.EndToken.Next.IsChar(')') && t.Next.IsChar('(')) { re.EndToken = re.EndToken.Next; } return(re); } return(new UnitToken(t, t) { Unit = UnitsHelper.uPercent }); } if (addUnits != null) { Pullenti.Ner.Core.TerminToken tok = addUnits.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { UnitToken res = new UnitToken(t0, tok.EndToken) { ExtOnto = tok.Termin.Tag as Pullenti.Ner.Measure.UnitReferent }; if (tok.EndToken.Next != null && tok.EndToken.Next.IsChar('.')) { tok.EndToken = tok.EndToken.Next; } res.Pow = pow; if (isNeg) { res.Pow = -pow; } res._correct(); return(res); } } if (!parseUnknownUnits) { return(null); } if ((t.WhitespacesBeforeCount > 2 || !t.Chars.IsLetter || t.LengthChar > 5) || !(t is Pullenti.Ner.TextToken)) { return(null); } if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t)) { return(null); } t1 = t; if (t.Next != null && t.Next.IsChar('.')) { t1 = t; } bool ok = false; if (t1.Next == null || t1.WhitespacesAfterCount > 2) { ok = true; } else if (t1.Next.IsComma || t1.Next.IsCharOf("\\/") || t1.Next.IsTableControlChar) { ok = true; } else if (MeasureHelper.IsMultChar(t1.Next)) { ok = true; } if (!ok) { return(null); } Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary(); if (mc.IsUndefined) { } else if (t.LengthChar > 7) { return(null); } UnitToken res1 = new UnitToken(t0, t1) { Pow = pow, IsDoubt = true }; res1.UnknownName = (t as Pullenti.Ner.TextToken).GetSourceText(); res1._correct(); return(res1); }
static PersonIdToken TryParse(Pullenti.Ner.Token t, PersonIdToken prev) { if (t.IsValue("СВИДЕТЕЛЬСТВО", null)) { Pullenti.Ner.Token tt1 = t; bool ip = false; bool reg = false; for (Pullenti.Ner.Token tt = t.Next; tt != null; tt = tt.Next) { if (tt.IsCommaAnd || tt.Morph.Class.IsPreposition) { continue; } if (tt.IsValue("РЕГИСТРАЦИЯ", null) || tt.IsValue("РЕЕСТР", null) || tt.IsValue("ЗАРЕГИСТРИРОВАТЬ", null)) { reg = true; tt1 = tt; } else if (tt.IsValue("ИНДИВИДУАЛЬНЫЙ", null) || tt.IsValue("ИП", null)) { ip = true; tt1 = tt; } else if ((tt.IsValue("ВНЕСЕНИЕ", null) || tt.IsValue("ГОСУДАРСТВЕННЫЙ", null) || tt.IsValue("ЕДИНЫЙ", null)) || tt.IsValue("ЗАПИСЬ", null) || tt.IsValue("ПРЕДПРИНИМАТЕЛЬ", null)) { tt1 = tt; } else if (tt.GetReferent() != null && tt.GetReferent().TypeName == "DATERANGE") { tt1 = tt; } else { break; } } if (reg && ip) { return new PersonIdToken(t, tt1) { Typ = Typs.Keyword, Value = "СВИДЕТЕЛЬСТВО О ГОСУДАРСТВЕННОЙ РЕГИСТРАЦИИ ФИЗИЧЕСКОГО ЛИЦА В КАЧЕСТВЕ ИНДИВИДУАЛЬНОГО ПРЕДПРИНИМАТЕЛЯ" } } ; } Pullenti.Ner.Core.TerminToken tok = m_Ontology.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { Typs ty = (Typs)tok.Termin.Tag; PersonIdToken res = new PersonIdToken(tok.BeginToken, tok.EndToken) { Typ = ty, Value = tok.Termin.CanonicText }; if (prev == null) { if (ty != Typs.Keyword) { return(null); } for (t = tok.EndToken.Next; t != null; t = t.Next) { Pullenti.Ner.Referent r = t.GetReferent(); if (r != null && (r is Pullenti.Ner.Geo.GeoReferent)) { res.Referent = r; res.EndToken = t; continue; } if (t.IsValue("ГРАЖДАНИН", null) && t.Next != null && (t.Next.GetReferent() is Pullenti.Ner.Geo.GeoReferent)) { res.Referent = t.Next.GetReferent(); t = (res.EndToken = t.Next); continue; } if (r != null) { break; } PersonAttrToken ait = PersonAttrToken.TryAttach(t, null, PersonAttrToken.PersonAttrAttachAttrs.No); if (ait != null) { if (ait.Referent != null) { foreach (Pullenti.Ner.Slot s in ait.Referent.Slots) { if (s.TypeName == Pullenti.Ner.Person.PersonPropertyReferent.ATTR_REF && (s.Value is Pullenti.Ner.Geo.GeoReferent)) { res.Referent = s.Value as Pullenti.Ner.Referent; } } } res.EndToken = ait.EndToken; break; } if (t.IsValue("ДАННЫЙ", null)) { res.EndToken = t; continue; } break; } if ((res.Referent is Pullenti.Ner.Geo.GeoReferent) && !(res.Referent as Pullenti.Ner.Geo.GeoReferent).IsState) { res.Referent = null; } return(res); } if (ty == Typs.Number) { StringBuilder tmp = new StringBuilder(); Pullenti.Ner.Token tt = tok.EndToken.Next; if (tt != null && tt.IsChar(':')) { tt = tt.Next; } for (; tt != null; tt = tt.Next) { if (tt.IsNewlineBefore) { break; } if (!(tt is Pullenti.Ner.NumberToken)) { break; } tmp.Append(tt.GetSourceText()); res.EndToken = tt; } if (tmp.Length < 1) { return(null); } res.Value = tmp.ToString(); res.HasPrefix = true; return(res); } if (ty == Typs.Seria) { StringBuilder tmp = new StringBuilder(); Pullenti.Ner.Token tt = tok.EndToken.Next; if (tt != null && tt.IsChar(':')) { tt = tt.Next; } bool nextNum = false; for (; tt != null; tt = tt.Next) { if (tt.IsNewlineBefore) { break; } if (Pullenti.Ner.Core.MiscHelper.CheckNumberPrefix(tt) != null) { nextNum = true; break; } if (!(tt is Pullenti.Ner.NumberToken)) { if (!(tt is Pullenti.Ner.TextToken)) { break; } if (!tt.Chars.IsAllUpper) { break; } Pullenti.Ner.NumberToken nu = Pullenti.Ner.Core.NumberHelper.TryParseRoman(tt); if (nu != null) { tmp.Append(nu.GetSourceText()); tt = nu.EndToken; } else if (tt.LengthChar != 2) { break; } else { tmp.Append((tt as Pullenti.Ner.TextToken).Term); res.EndToken = tt; } if (tt.Next != null && tt.Next.IsHiphen) { tt = tt.Next; } continue; } if (tmp.Length >= 4) { break; } tmp.Append(tt.GetSourceText()); res.EndToken = tt; } if (tmp.Length < 4) { if (tmp.Length < 2) { return(null); } Pullenti.Ner.Token tt1 = res.EndToken.Next; if (tt1 != null && tt1.IsComma) { tt1 = tt1.Next; } PersonIdToken next = TryParse(tt1, res); if (next != null && next.Typ == Typs.Number) { } else { return(null); } } res.Value = tmp.ToString(); res.HasPrefix = true; return(res); } if (ty == Typs.Code) { for (Pullenti.Ner.Token tt = res.EndToken.Next; tt != null; tt = tt.Next) { if (tt.IsCharOf(":") || tt.IsHiphen) { continue; } if (tt is Pullenti.Ner.NumberToken) { res.EndToken = tt; continue; } break; } } if (ty == Typs.Address) { if (t.GetReferent() is Pullenti.Ner.Address.AddressReferent) { res.Referent = t.GetReferent(); res.EndToken = t; return(res); } for (Pullenti.Ner.Token tt = res.EndToken.Next; tt != null; tt = tt.Next) { if (tt.IsCharOf(":") || tt.IsHiphen || tt.Morph.Class.IsPreposition) { continue; } if (tt.GetReferent() is Pullenti.Ner.Address.AddressReferent) { res.Referent = tt.GetReferent(); res.EndToken = tt; } break; } if (res.Referent == null) { return(null); } } return(res); } else if (prev == null) { return(null); } Pullenti.Ner.Token t0 = t; Pullenti.Ner.Token t1 = Pullenti.Ner.Core.MiscHelper.CheckNumberPrefix(t0); if (t1 != null) { t = t1; } if (t is Pullenti.Ner.NumberToken) { StringBuilder tmp = new StringBuilder(); PersonIdToken res = new PersonIdToken(t0, t) { Typ = Typs.Number }; for (Pullenti.Ner.Token tt = t; tt != null; tt = tt.Next) { if (tt.IsNewlineBefore || !(tt is Pullenti.Ner.NumberToken)) { break; } tmp.Append(tt.GetSourceText()); res.EndToken = tt; } if (tmp.Length < 4) { if (tmp.Length < 2) { return(null); } if (prev == null || prev.Typ != Typs.Keyword) { return(null); } PersonIdToken ne = TryParse(res.EndToken.Next, prev); if (ne != null && ne.Typ == Typs.Number) { res.Typ = Typs.Seria; } else { return(null); } } res.Value = tmp.ToString(); if (t0 != t) { res.HasPrefix = true; } return(res); } if (t is Pullenti.Ner.ReferentToken) { Pullenti.Ner.Referent r = t.GetReferent(); if (r != null) { if (r.TypeName == "DATE") { return new PersonIdToken(t, t) { Typ = Typs.Date, Referent = r } } ; if (r.TypeName == "ORGANIZATION") { return new PersonIdToken(t, t) { Typ = Typs.Org, Referent = r } } ; if (r.TypeName == "ADDRESS") { return new PersonIdToken(t, t) { Typ = Typs.Address, Referent = r } } ; } } if ((prev != null && prev.Typ == Typs.Keyword && (t is Pullenti.Ner.TextToken)) && !t.Chars.IsAllLower && t.Chars.IsLetter) { PersonIdToken rr = TryParse(t.Next, prev); if (rr != null && rr.Typ == Typs.Number) { return new PersonIdToken(t, t) { Typ = Typs.Seria, Value = (t as Pullenti.Ner.TextToken).Term } } ; } if ((t != null && t.IsValue("ОТ", "ВІД") && (t.Next is Pullenti.Ner.ReferentToken)) && t.Next.GetReferent().TypeName == "DATE") { return new PersonIdToken(t, t.Next) { Typ = Typs.Date, Referent = t.Next.GetReferent() } } ; return(null); }
public static Pullenti.Semantic.SemObject CreateNounGroup(Pullenti.Semantic.SemGraph gr, Pullenti.Ner.Core.NounPhraseToken npt) { Pullenti.Ner.Token noun = npt.Noun.BeginToken; Pullenti.Semantic.SemObject sem = new Pullenti.Semantic.SemObject(gr); sem.Tokens.Add(npt.Noun); sem.Typ = Pullenti.Semantic.SemObjectType.Noun; if (npt.Noun.Morph.Class.IsPersonalPronoun) { sem.Typ = Pullenti.Semantic.SemObjectType.PersonalPronoun; } else if (npt.Noun.Morph.Class.IsPronoun) { sem.Typ = Pullenti.Semantic.SemObjectType.Pronoun; } if (npt.Noun.BeginToken != npt.Noun.EndToken) { sem.Morph.NormalCase = npt.Noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false); sem.Morph.NormalFull = npt.Noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false); sem.Morph.Class = Pullenti.Morph.MorphClass.Noun; sem.Morph.Number = npt.Morph.Number; sem.Morph.Gender = npt.Morph.Gender; sem.Morph.Case = npt.Morph.Case; } else if (noun is Pullenti.Ner.TextToken) { foreach (Pullenti.Morph.MorphBaseInfo wf in noun.Morph.Items) { if (wf.CheckAccord(npt.Morph, false, false) && (wf is Pullenti.Morph.MorphWordForm)) { _setMorph(sem, wf as Pullenti.Morph.MorphWordForm); break; } } if (sem.Morph.NormalCase == null) { sem.Morph.NormalCase = noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false); sem.Morph.NormalFull = noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false); } List <Pullenti.Semantic.Utils.DerivateGroup> grs = Pullenti.Semantic.Utils.DerivateService.FindDerivates(sem.Morph.NormalFull, true, null); if (grs != null && grs.Count > 0) { sem.Concept = grs[0]; } } else if (noun is Pullenti.Ner.ReferentToken) { Pullenti.Ner.Referent r = (noun as Pullenti.Ner.ReferentToken).Referent; if (r == null) { return(null); } sem.Morph.NormalFull = (sem.Morph.NormalCase = r.ToString()); sem.Concept = r; } else if (noun is Pullenti.Ner.NumberToken) { Pullenti.Ner.NumberToken num = noun as Pullenti.Ner.NumberToken; sem.Morph.Gender = noun.Morph.Gender; sem.Morph.Number = noun.Morph.Number; if (num.IntValue != null) { sem.Morph.NormalCase = Pullenti.Ner.Core.NumberHelper.GetNumberAdjective(num.IntValue.Value, noun.Morph.Gender, noun.Morph.Number); sem.Morph.NormalFull = Pullenti.Ner.Core.NumberHelper.GetNumberAdjective(num.IntValue.Value, Pullenti.Morph.MorphGender.Masculine, Pullenti.Morph.MorphNumber.Singular); } else { sem.Morph.NormalFull = (sem.Morph.NormalCase = noun.GetSourceText().ToUpper()); } } noun.Tag = sem; if (npt.Adjectives.Count > 0) { foreach (Pullenti.Ner.MetaToken a in npt.Adjectives) { if (npt.MultiNouns && a != npt.Adjectives[0]) { break; } Pullenti.Semantic.SemObject asem = CreateNptAdj(gr, npt, a); if (asem != null) { gr.AddLink(Pullenti.Semantic.SemLinkType.Detail, sem, asem, "какой", false, null); } } } if (npt.InternalNoun != null) { Pullenti.Semantic.SemObject intsem = CreateNounGroup(gr, npt.InternalNoun); if (intsem != null) { gr.AddLink(Pullenti.Semantic.SemLinkType.Detail, sem, intsem, null, false, null); } } gr.Objects.Add(sem); return(sem); }
void CorrectWordsByMorph(Pullenti.Morph.MorphLang lang) { for (Pullenti.Ner.Token tt = FirstToken; tt != null; tt = tt.Next) { if (!(tt is Pullenti.Ner.TextToken)) { continue; } if (tt.Morph.ContainsAttr("прдктв.", null)) { continue; } Pullenti.Morph.MorphClass dd = tt.GetMorphClassInDictionary(); if (!dd.IsUndefined || (tt.LengthChar < 4)) { continue; } if (tt.Morph.Class.IsProperSurname && !tt.Chars.IsAllLower) { continue; } if (tt.Chars.IsAllUpper) { continue; } string corw = Pullenti.Morph.MorphologyService.CorrectWord((tt as Pullenti.Ner.TextToken).Term, (tt.Morph.Language.IsUndefined ? lang : tt.Morph.Language)); if (corw == null) { continue; } List <Pullenti.Morph.MorphToken> ccc = Pullenti.Morph.MorphologyService.Process(corw, lang, null); if (ccc == null || ccc.Count != 1) { continue; } Pullenti.Ner.TextToken tt1 = new Pullenti.Ner.TextToken(ccc[0], this, tt.BeginChar, tt.EndChar) { Chars = tt.Chars, Term0 = (tt as Pullenti.Ner.TextToken).Term }; Pullenti.Morph.MorphClass mc = tt1.GetMorphClassInDictionary(); if (mc.IsProperSurname) { continue; } if (tt == FirstToken) { FirstToken = tt1; } else { tt.Previous.Next = tt1; } tt1.Next = tt.Next; tt = tt1; if (CorrectedTokens == null) { CorrectedTokens = new Dictionary <Pullenti.Ner.Token, string>(); } CorrectedTokens.Add(tt, tt.GetSourceText()); } }
public static PhoneItemToken TryAttachAdditional(Pullenti.Ner.Token t0) { Pullenti.Ner.Token t = t0; if (t == null) { return(null); } if (t.IsChar(',')) { t = t.Next; } else if (t.IsCharOf("*#") && (t.Next is Pullenti.Ner.NumberToken)) { string val0 = (t.Next as Pullenti.Ner.NumberToken).GetSourceText(); Pullenti.Ner.Token t1 = t.Next; if ((t1.Next != null && t1.Next.IsHiphen && !t1.IsWhitespaceAfter) && (t1.Next.Next is Pullenti.Ner.NumberToken) && !t1.Next.IsWhitespaceAfter) { t1 = t1.Next.Next; val0 += t1.GetSourceText(); } if (val0.Length >= 3 && (val0.Length < 7)) { return new PhoneItemToken(t, t1) { ItemType = PhoneItemType.AddNumber, Value = val0 } } ; } bool br = false; if (t != null && t.IsChar('(')) { if (t.Previous != null && t.Previous.IsComma) { return(null); } br = true; t = t.Next; } Pullenti.Ner.Core.TerminToken to = m_PhoneTermins.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (to == null) { if (!br) { return(null); } if (t0.WhitespacesBeforeCount > 1) { return(null); } } else if (to.Termin.Tag == null) { return(null); } else { t = to.EndToken.Next; } if (t == null) { return(null); } if (((t.IsValue("НОМЕР", null) || t.IsValue("N", null) || t.IsValue("#", null)) || t.IsValue("№", null) || t.IsValue("NUMBER", null)) || ((t.IsChar('+') && br))) { t = t.Next; } else if (to == null && !br) { return(null); } else if (t.IsValue("НОМ", null) || t.IsValue("ТЕЛ", null)) { t = t.Next; if (t != null && t.IsChar('.')) { t = t.Next; } } if (t != null && t.IsCharOf(":,") && !t.IsNewlineAfter) { t = t.Next; } if (!(t is Pullenti.Ner.NumberToken)) { return(null); } string val = (t as Pullenti.Ner.NumberToken).GetSourceText(); if ((t.Next != null && t.Next.IsHiphen && !t.IsWhitespaceAfter) && (t.Next.Next is Pullenti.Ner.NumberToken)) { val += t.Next.Next.GetSourceText(); t = t.Next.Next; } if ((val.Length < 2) || val.Length > 7) { return(null); } if (br) { if (t.Next == null || !t.Next.IsChar(')')) { return(null); } t = t.Next; } PhoneItemToken res = new PhoneItemToken(t0, t) { ItemType = PhoneItemType.AddNumber, Value = val }; return(res); }
internal static Pullenti.Ner.ReferentToken CreateReferentToken(Pullenti.Ner.Person.PersonReferent p, Pullenti.Ner.Token begin, Pullenti.Ner.Token end, Pullenti.Ner.MorphCollection morph, List <PersonAttrToken> attrs, Pullenti.Ner.Person.PersonAnalyzer.PersonAnalyzerData ad, bool forAttribute, bool afterBePredicate) { if (p == null) { return(null); } bool hasPrefix = false; if (attrs != null) { foreach (PersonAttrToken a in attrs) { if (a.Typ == PersonAttrTerminType.BestRegards) { hasPrefix = true; } else { if (a.BeginChar < begin.BeginChar) { begin = a.BeginToken; if ((a.EndToken.Next != null && a.EndToken.Next.IsChar(')') && begin.Previous != null) && begin.Previous.IsChar('(')) { begin = begin.Previous; } } if (a.Typ != PersonAttrTerminType.Prefix) { if (a.Age != null) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_AGE, a.Age, false, 0); } if (a.PropRef == null) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, a.Value, false, 0); } else { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, a, false, 0); } } else if (a.Gender == Pullenti.Morph.MorphGender.Feminie && !p.IsFemale) { p.IsFemale = true; } else if (a.Gender == Pullenti.Morph.MorphGender.Masculine && !p.IsMale) { p.IsMale = true; } } } } else if ((begin.Previous is Pullenti.Ner.TextToken) && (begin.WhitespacesBeforeCount < 3)) { if ((begin.Previous as Pullenti.Ner.TextToken).Term == "ИП") { PersonAttrToken a = new PersonAttrToken(begin.Previous, begin.Previous); a.PropRef = new Pullenti.Ner.Person.PersonPropertyReferent(); a.PropRef.Name = "индивидуальный предприниматель"; p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, a, false, 0); begin = begin.Previous; } } Pullenti.Ner.MorphCollection m0 = new Pullenti.Ner.MorphCollection(); foreach (Pullenti.Morph.MorphBaseInfo it in morph.Items) { Pullenti.Morph.MorphBaseInfo bi = new Pullenti.Morph.MorphBaseInfo(); bi.CopyFrom(it); bi.Number = Pullenti.Morph.MorphNumber.Singular; if (bi.Gender == Pullenti.Morph.MorphGender.Undefined) { if (p.IsMale && !p.IsFemale) { bi.Gender = Pullenti.Morph.MorphGender.Masculine; } if (!p.IsMale && p.IsFemale) { bi.Gender = Pullenti.Morph.MorphGender.Feminie; } } m0.AddItem(bi); } morph = m0; if ((attrs != null && attrs.Count > 0 && !attrs[0].Morph.Case.IsUndefined) && morph.Case.IsUndefined) { morph.Case = attrs[0].Morph.Case; if (attrs[0].Morph.Number == Pullenti.Morph.MorphNumber.Singular) { morph.Number = Pullenti.Morph.MorphNumber.Singular; } if (p.IsMale && !p.IsFemale) { morph.Gender = Pullenti.Morph.MorphGender.Masculine; } else if (p.IsFemale) { morph.Gender = Pullenti.Morph.MorphGender.Feminie; } } if (begin.Previous != null) { Pullenti.Ner.Token ttt = begin.Previous; if (ttt.IsValue("ИМЕНИ", "ІМЕНІ")) { forAttribute = true; } else { if (ttt.IsChar('.') && ttt.Previous != null) { ttt = ttt.Previous; } if (ttt.WhitespacesAfterCount < 3) { if (ttt.IsValue("ИМ", "ІМ")) { forAttribute = true; } } } } if (forAttribute) { return new Pullenti.Ner.ReferentToken(p, begin, end) { Morph = morph, MiscAttrs = (int)p.m_PersonIdentityTyp } } ; if ((begin.Previous != null && begin.Previous.IsCommaAnd && (begin.Previous.Previous is Pullenti.Ner.ReferentToken)) && (begin.Previous.Previous.GetReferent() is Pullenti.Ner.Person.PersonReferent)) { Pullenti.Ner.ReferentToken rt00 = begin.Previous.Previous as Pullenti.Ner.ReferentToken; for (Pullenti.Ner.Token ttt = (Pullenti.Ner.Token)rt00; ttt != null;) { if (ttt.Previous == null || !(ttt.Previous.Previous is Pullenti.Ner.ReferentToken)) { break; } if (!ttt.Previous.IsCommaAnd || !(ttt.Previous.Previous.GetReferent() is Pullenti.Ner.Person.PersonReferent)) { break; } rt00 = ttt.Previous.Previous as Pullenti.Ner.ReferentToken; ttt = rt00; } if (rt00.BeginToken.GetReferent() is Pullenti.Ner.Person.PersonPropertyReferent) { bool ok = false; if ((rt00.BeginToken as Pullenti.Ner.ReferentToken).EndToken.Next != null && (rt00.BeginToken as Pullenti.Ner.ReferentToken).EndToken.Next.IsChar(':')) { ok = true; } else if (rt00.BeginToken.Morph.Number == Pullenti.Morph.MorphNumber.Plural) { ok = true; } if (ok) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, rt00.BeginToken.GetReferent(), false, 0); } } } if (ad != null) { if (ad.OverflowLevel > 10) { return new Pullenti.Ner.ReferentToken(p, begin, end) { Morph = morph, MiscAttrs = (int)p.m_PersonIdentityTyp } } ; ad.OverflowLevel++; } List <PersonAttrToken> attrs1 = null; bool hasPosition = false; bool openBr = false; for (Pullenti.Ner.Token t = end.Next; t != null; t = t.Next) { if (t.IsTableControlChar) { break; } if (t.IsNewlineBefore) { if (t.NewlinesBeforeCount > 2) { break; } if (attrs1 != null && attrs1.Count > 0) { break; } Pullenti.Ner.Mail.Internal.MailLine ml = Pullenti.Ner.Mail.Internal.MailLine.Parse(t, 0, 0); if (ml != null && ml.Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From) { break; } if (t.Chars.IsCapitalUpper) { PersonAttrToken attr1 = PersonAttrToken.TryAttach(t, (ad == null ? null : ad.LocalOntology), PersonAttrToken.PersonAttrAttachAttrs.No); bool ok1 = false; if (attr1 != null) { if (hasPrefix || attr1.IsNewlineAfter || ((attr1.EndToken.Next != null && attr1.EndToken.Next.IsTableControlChar))) { ok1 = true; } else { for (Pullenti.Ner.Token tt2 = t.Next; tt2 != null && tt2.EndChar <= attr1.EndChar; tt2 = tt2.Next) { if (tt2.IsWhitespaceBefore) { ok1 = true; } } } } else { Pullenti.Ner.Token ttt = CorrectTailAttributes(p, t); if (ttt != null && ttt != t) { end = (t = ttt); continue; } } if (!ok1) { break; } } } if (t.IsHiphen || t.IsCharOf("_>|")) { continue; } if (t.IsValue("МОДЕЛЬ", null)) { break; } Pullenti.Ner.Token tt = CorrectTailAttributes(p, t); if (tt != t && tt != null) { end = (t = tt); continue; } bool isBe = false; if (t.IsChar('(') && t == end.Next) { openBr = true; t = t.Next; if (t == null) { break; } PersonItemToken pit1 = PersonItemToken.TryAttach(t, null, PersonItemToken.ParseAttr.No, null); if ((pit1 != null && t.Chars.IsCapitalUpper && pit1.EndToken.Next != null) && (t is Pullenti.Ner.TextToken) && pit1.EndToken.Next.IsChar(')')) { if (pit1.Lastname != null) { Pullenti.Morph.MorphBaseInfo inf = new Pullenti.Morph.MorphBaseInfo() { Case = Pullenti.Morph.MorphCase.Nominative }; if (p.IsMale) { inf.Gender |= Pullenti.Morph.MorphGender.Masculine; } if (p.IsFemale) { inf.Gender |= Pullenti.Morph.MorphGender.Feminie; } PersonMorphCollection sur = PersonIdentityToken.CreateLastname(pit1, inf); if (sur != null) { p.AddFioIdentity(sur, null, null); end = (t = pit1.EndToken.Next); continue; } } } if ((t is Pullenti.Ner.TextToken) && t.Chars.IsLatinLetter) { List <PersonItemToken> pits = PersonItemToken.TryAttachList(t, null, PersonItemToken.ParseAttr.CanBeLatin, 10); if (((pits != null && pits.Count >= 2 && pits.Count <= 3) && pits[0].Chars.IsLatinLetter && pits[1].Chars.IsLatinLetter) && pits[pits.Count - 1].EndToken.Next != null && pits[pits.Count - 1].EndToken.Next.IsChar(')')) { Pullenti.Ner.Person.PersonReferent pr2 = new Pullenti.Ner.Person.PersonReferent(); int cou = 0; foreach (PersonItemToken pi in pits) { foreach (Pullenti.Ner.Slot si in p.Slots) { if (si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_FIRSTNAME || si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_MIDDLENAME || si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_LASTNAME) { if (Pullenti.Ner.Core.MiscHelper.CanBeEqualCyrAndLatSS(si.Value.ToString(), pi.Value)) { cou++; pr2.AddSlot(si.TypeName, pi.Value, false, 0); break; } } } } if (cou == pits.Count) { foreach (Pullenti.Ner.Slot si in pr2.Slots) { p.AddSlot(si.TypeName, si.Value, false, 0); } end = (t = pits[pits.Count - 1].EndToken.Next); continue; } } } } else if (t.IsComma) { t = t.Next; if ((t is Pullenti.Ner.TextToken) && (t as Pullenti.Ner.TextToken).IsValue("WHO", null)) { continue; } if ((t is Pullenti.Ner.TextToken) && t.Chars.IsLatinLetter) { List <PersonItemToken> pits = PersonItemToken.TryAttachList(t, null, PersonItemToken.ParseAttr.CanBeLatin, 10); if ((pits != null && pits.Count >= 2 && pits.Count <= 3) && pits[0].Chars.IsLatinLetter && pits[1].Chars.IsLatinLetter) { Pullenti.Ner.Person.PersonReferent pr2 = new Pullenti.Ner.Person.PersonReferent(); int cou = 0; foreach (PersonItemToken pi in pits) { foreach (Pullenti.Ner.Slot si in p.Slots) { if (si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_FIRSTNAME || si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_MIDDLENAME || si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_LASTNAME) { if (Pullenti.Ner.Core.MiscHelper.CanBeEqualCyrAndLatSS(si.Value.ToString(), pi.Value)) { cou++; pr2.AddSlot(si.TypeName, pi.Value, false, 0); break; } } } } if (cou == pits.Count) { foreach (Pullenti.Ner.Slot si in pr2.Slots) { p.AddSlot(si.TypeName, si.Value, false, 0); } end = (t = pits[pits.Count - 1].EndToken); continue; } } } } else if ((t is Pullenti.Ner.TextToken) && (t as Pullenti.Ner.TextToken).IsVerbBe) { t = t.Next; } else if (t.IsAnd && t.IsWhitespaceAfter && !t.IsNewlineAfter) { if (t == end.Next) { break; } t = t.Next; } else if (t.IsHiphen && t == end.Next) { t = t.Next; } else if (t.IsChar('.') && t == end.Next && hasPrefix) { t = t.Next; } Pullenti.Ner.Token ttt2 = CreateNickname(p, t); if (ttt2 != null) { t = (end = ttt2); continue; } if (t == null) { break; } PersonAttrToken attr = null; attr = PersonAttrToken.TryAttach(t, (ad == null ? null : ad.LocalOntology), PersonAttrToken.PersonAttrAttachAttrs.No); if (attr == null) { if ((t != null && t.GetReferent() != null && t.GetReferent().TypeName == "GEO") && attrs1 != null && openBr) { continue; } if ((t.Chars.IsCapitalUpper && openBr && t.Next != null) && t.Next.IsChar(')')) { if (p.FindSlot(Pullenti.Ner.Person.PersonReferent.ATTR_LASTNAME, null, true) == null) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_LASTNAME, t.GetSourceText().ToUpper(), false, 0); t = t.Next; end = t; } } if (t != null && t.IsValue("КОТОРЫЙ", null) && t.Morph.Number == Pullenti.Morph.MorphNumber.Singular) { if (!p.IsFemale && t.Morph.Gender == Pullenti.Morph.MorphGender.Feminie) { p.IsFemale = true; p.CorrectData(); } else if (!p.IsMale && t.Morph.Gender == Pullenti.Morph.MorphGender.Masculine) { p.IsMale = true; p.CorrectData(); } } break; } if (attr.Morph.Number == Pullenti.Morph.MorphNumber.Plural) { break; } if (attr.Typ == PersonAttrTerminType.BestRegards) { break; } if (attr.IsDoubt) { if (hasPrefix) { } else if (t.IsNewlineBefore && attr.IsNewlineAfter) { } else if (t.Previous != null && ((t.Previous.IsHiphen || t.Previous.IsChar(':')))) { } else { break; } } if (!morph.Case.IsUndefined && !attr.Morph.Case.IsUndefined) { if (((morph.Case & attr.Morph.Case)).IsUndefined && !isBe) { break; } } if (openBr) { if (Pullenti.Ner.Person.PersonAnalyzer.TryAttachPerson(t, ad, false, 0, true) != null) { break; } } if (attrs1 == null) { if (t.Previous.IsComma && t.Previous == end.Next) { Pullenti.Ner.Token ttt = attr.EndToken.Next; if (ttt != null) { if (ttt.Morph.Class.IsVerb) { if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(begin)) { } else { break; } } } } attrs1 = new List <PersonAttrToken>(); } attrs1.Add(attr); if (attr.Typ == PersonAttrTerminType.Position || attr.Typ == PersonAttrTerminType.King) { if (!isBe) { hasPosition = true; } } else if (attr.Typ != PersonAttrTerminType.Prefix) { if (attr.Typ == PersonAttrTerminType.Other && attr.Age != null) { } else { attrs1 = null; break; } } t = attr.EndToken; } if (attrs1 != null && hasPosition && attrs != null) { Pullenti.Ner.Token te1 = attrs[attrs.Count - 1].EndToken.Next; Pullenti.Ner.Token te2 = attrs1[0].BeginToken; if (te1.WhitespacesAfterCount > te2.WhitespacesBeforeCount && (te2.WhitespacesBeforeCount < 2)) { } else if (attrs1[0].Age != null) { } else if (((te1.IsHiphen || te1.IsChar(':'))) && !attrs1[0].IsNewlineBefore && ((te2.Previous.IsComma || te2.Previous == end))) { } else { foreach (PersonAttrToken a in attrs) { if (a.Typ == PersonAttrTerminType.Position) { Pullenti.Ner.Token te = attrs1[attrs1.Count - 1].EndToken; if (te.Next != null) { if (!te.Next.IsChar('.')) { attrs1 = null; break; } } } } } } if (attrs1 != null && !hasPrefix) { PersonAttrToken attr = attrs1[attrs1.Count - 1]; bool ok = false; if (attr.EndToken.Next != null && attr.EndToken.Next.Chars.IsCapitalUpper) { ok = true; } else { Pullenti.Ner.ReferentToken rt = Pullenti.Ner.Person.PersonAnalyzer.TryAttachPerson(attr.BeginToken, ad, false, -1, false); if (rt != null && (rt.Referent is Pullenti.Ner.Person.PersonReferent)) { ok = true; } } if (ok) { if (attr.BeginToken.WhitespacesBeforeCount > attr.EndToken.WhitespacesAfterCount) { attrs1 = null; } else if (attr.BeginToken.WhitespacesBeforeCount == attr.EndToken.WhitespacesAfterCount) { Pullenti.Ner.ReferentToken rt1 = Pullenti.Ner.Person.PersonAnalyzer.TryAttachPerson(attr.BeginToken, ad, false, -1, false); if (rt1 != null) { attrs1 = null; } } } } if (attrs1 != null) { foreach (PersonAttrToken a in attrs1) { if (a.Typ != PersonAttrTerminType.Prefix) { if (a.Age != null) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_AGE, a.Age, true, 0); } else if (a.PropRef == null) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, a.Value, false, 0); } else { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, a, false, 0); } end = a.EndToken; if (a.Gender != Pullenti.Morph.MorphGender.Undefined && !p.IsFemale && !p.IsMale) { if (a.Gender == Pullenti.Morph.MorphGender.Masculine && !p.IsMale) { p.IsMale = true; p.CorrectData(); } else if (a.Gender == Pullenti.Morph.MorphGender.Feminie && !p.IsFemale) { p.IsFemale = true; p.CorrectData(); } } } } if (openBr) { if (end.Next != null && end.Next.IsChar(')')) { end = end.Next; } } } int crlfCou = 0; for (Pullenti.Ner.Token t = end.Next; t != null; t = t.Next) { if (t.IsTableControlChar) { break; } if (t.IsNewlineBefore) { Pullenti.Ner.Mail.Internal.MailLine ml = Pullenti.Ner.Mail.Internal.MailLine.Parse(t, 0, 0); if (ml != null && ml.Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From) { break; } crlfCou++; } if (t.IsCharOf(":,(") || t.IsHiphen) { continue; } if (t.IsChar('.') && t == end.Next) { continue; } Pullenti.Ner.Referent r = t.GetReferent(); if (r != null) { if (r.TypeName == "PHONE" || r.TypeName == "URI" || r.TypeName == "ADDRESS") { string ty = r.GetStringValue("SCHEME"); if (r.TypeName == "URI") { if ((ty != "mailto" && ty != "skype" && ty != "ICQ") && ty != "http") { break; } } p.AddContact(r); end = t; crlfCou = 0; continue; } } if (r is Pullenti.Ner.Person.PersonIdentityReferent) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_IDDOC, r, false, 0); end = t; crlfCou = 0; continue; } if (r != null && r.TypeName == "ORGANIZATION") { if (t.Next != null && t.Next.Morph.Class.IsVerb) { break; } if (begin.Previous != null && begin.Previous.Morph.Class.IsVerb) { break; } if (t.WhitespacesAfterCount == 1) { break; } bool exist = false; foreach (Pullenti.Ner.Slot s in p.Slots) { if (s.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_ATTR && (s.Value is Pullenti.Ner.Person.PersonPropertyReferent)) { Pullenti.Ner.Person.PersonPropertyReferent pr = s.Value as Pullenti.Ner.Person.PersonPropertyReferent; if (pr.FindSlot(Pullenti.Ner.Person.PersonPropertyReferent.ATTR_REF, r, true) != null) { exist = true; break; } } else if (s.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_ATTR && (s.Value is PersonAttrToken)) { PersonAttrToken pr = s.Value as PersonAttrToken; if (pr.Referent.FindSlot(Pullenti.Ner.Person.PersonPropertyReferent.ATTR_REF, r, true) != null) { exist = true; break; } } } if (!exist) { PersonAttrToken pat = new PersonAttrToken(t, t); pat.PropRef = new Pullenti.Ner.Person.PersonPropertyReferent() { Name = "сотрудник" }; pat.PropRef.AddSlot(Pullenti.Ner.Person.PersonPropertyReferent.ATTR_REF, r, false, 0); p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, pat, false, 0); } continue; } if (r != null) { break; } if (!hasPrefix || crlfCou >= 2) { break; } Pullenti.Ner.ReferentToken rt = t.Kit.ProcessReferent("PERSON", t); if (rt != null) { break; } } if (ad != null) { ad.OverflowLevel--; } if (begin.IsValue("НА", null) && begin.Next != null && begin.Next.IsValue("ИМЯ", null)) { Pullenti.Ner.Token t0 = begin.Previous; if (t0 != null && t0.IsComma) { t0 = t0.Previous; } if (t0 != null && (t0.GetReferent() is Pullenti.Ner.Person.PersonIdentityReferent)) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_IDDOC, t0.GetReferent(), false, 0); } } return(new Pullenti.Ner.ReferentToken(p, begin, end) { Morph = morph, MiscAttrs = (int)p.m_PersonIdentityTyp }); }
static PhoneItemToken _TryAttach(Pullenti.Ner.Token t0) { if (t0 == null) { return(null); } if (t0 is Pullenti.Ner.NumberToken) { if (Pullenti.Ner.Core.NumberHelper.TryParseNumberWithPostfix(t0) != null && !t0.IsWhitespaceAfter) { Pullenti.Ner.ReferentToken rt = t0.Kit.ProcessReferent("PHONE", t0.Next); if (rt == null) { return(null); } } if ((t0 as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Digit && !t0.Morph.Class.IsAdjective) { return new PhoneItemToken(t0, t0) { ItemType = PhoneItemType.Number, Value = t0.GetSourceText() } } ; return(null); } if (t0.IsChar('.')) { return new PhoneItemToken(t0, t0) { ItemType = PhoneItemType.Delim, Value = "." } } ; if (t0.IsHiphen) { return new PhoneItemToken(t0, t0) { ItemType = PhoneItemType.Delim, Value = "-" } } ; if (t0.IsChar('+')) { if (!(t0.Next is Pullenti.Ner.NumberToken) || (t0.Next as Pullenti.Ner.NumberToken).Typ != Pullenti.Ner.NumberSpellingType.Digit) { return(null); } else { string val = t0.Next.GetSourceText(); int i; for (i = 0; i < val.Length; i++) { if (val[i] != '0') { break; } } if (i >= val.Length) { return(null); } if (i > 0) { val = val.Substring(i); } return(new PhoneItemToken(t0, t0.Next) { ItemType = PhoneItemType.CountryCode, Value = val }); } } if (t0.IsChar((char)0x2011) && (t0.Next is Pullenti.Ner.NumberToken) && t0.Next.LengthChar == 2) { return new PhoneItemToken(t0, t0) { ItemType = PhoneItemType.Delim, Value = "-" } } ; if (t0.IsCharOf("(")) { if (t0.Next is Pullenti.Ner.NumberToken) { Pullenti.Ner.Token et = t0.Next; StringBuilder val = new StringBuilder(); for (; et != null; et = et.Next) { if (et.IsChar(')')) { break; } if ((et is Pullenti.Ner.NumberToken) && (et as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Digit) { val.Append(et.GetSourceText()); } else if (!et.IsHiphen && !et.IsChar('.')) { return(null); } } if (et == null || val.Length == 0) { return(null); } else { return new PhoneItemToken(t0, et) { ItemType = PhoneItemType.CityCode, Value = val.ToString(), IsInBrackets = true } }; } else { Pullenti.Ner.Core.TerminToken tt1 = m_PhoneTermins.TryParse(t0.Next, Pullenti.Ner.Core.TerminParseAttr.No); if (tt1 == null || tt1.Termin.Tag != null) { } else if (tt1.EndToken.Next == null || !tt1.EndToken.Next.IsChar(')')) { } else { return new PhoneItemToken(t0, tt1.EndToken.Next) { ItemType = PhoneItemType.Prefix, IsInBrackets = true, Value = string.Empty } }; return(null); } } if ((t0.IsChar('/') && (t0.Next is Pullenti.Ner.NumberToken) && t0.Next.Next != null) && t0.Next.Next.IsChar('/') && t0.Next.LengthChar == 3) { return new PhoneItemToken(t0, t0.Next.Next) { ItemType = PhoneItemType.CityCode, Value = (t0.Next as Pullenti.Ner.NumberToken).Value.ToString(), IsInBrackets = true } } ; Pullenti.Ner.Token t1 = null; Pullenti.Ner.Phone.PhoneKind ki = Pullenti.Ner.Phone.PhoneKind.Undefined; if ((t0.IsValue("Т", null) && t0.Next != null && t0.Next.IsCharOf("\\/")) && t0.Next.Next != null && ((t0.Next.Next.IsValue("Р", null) || t0.Next.Next.IsValue("М", null)))) { t1 = t0.Next.Next; ki = (t1.IsValue("Р", null) ? Pullenti.Ner.Phone.PhoneKind.Work : Pullenti.Ner.Phone.PhoneKind.Mobile); } else { Pullenti.Ner.Core.TerminToken tt = m_PhoneTermins.TryParse(t0, Pullenti.Ner.Core.TerminParseAttr.No); if (tt == null || tt.Termin.Tag != null) { if (t0.IsValue("НОМЕР", null)) { PhoneItemToken rr = _TryAttach(t0.Next); if (rr != null && rr.ItemType == PhoneItemType.Prefix) { rr.BeginToken = t0; return(rr); } } return(null); } if (tt.Termin.Tag2 is Pullenti.Ner.Phone.PhoneKind) { ki = (Pullenti.Ner.Phone.PhoneKind)tt.Termin.Tag2; } t1 = tt.EndToken; } PhoneItemToken res = new PhoneItemToken(t0, t1) { ItemType = PhoneItemType.Prefix, Value = string.Empty, Kind = ki }; while (true) { if (t1.Next != null && t1.Next.IsCharOf(".:")) { res.EndToken = (t1 = t1.Next); } else if (t1.Next != null && t1.Next.IsTableControlChar) { t1 = t1.Next; } else { break; } } if (t0 == t1 && ((t0.BeginChar == t0.EndChar || t0.Chars.IsAllUpper))) { if (!t0.IsWhitespaceAfter) { return(null); } } return(res); }
public static OrgItemNameToken TryAttach(Pullenti.Ner.Token t, OrgItemNameToken prev, bool extOnto, bool first) { if (t == null) { return(null); } if (t.IsValue("ОРДЕНА", null) && t.Next != null) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { Pullenti.Ner.Token t1 = npt.EndToken; if (((t1.IsValue("ЗНАК", null) || t1.IsValue("ДРУЖБА", null))) && (t1.WhitespacesAfterCount < 2)) { npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t1.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { t1 = npt.EndToken; } } return(new OrgItemNameToken(t, t1) { IsIgnoredPart = true }); } if (t.Next.GetMorphClassInDictionary().IsProperSurname) { return new OrgItemNameToken(t, t.Next) { IsIgnoredPart = true } } ; Pullenti.Ner.ReferentToken ppp = t.Kit.ProcessReferent("PERSON", t.Next); if (ppp != null) { return new OrgItemNameToken(t, ppp.EndToken) { IsIgnoredPart = true } } ; if ((t.WhitespacesAfterCount < 2) && Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(t.Next, true, false)) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t.Next, Pullenti.Ner.Core.BracketParseAttr.NearCloseBracket, 10); if (br != null && (br.LengthChar < 40)) { return new OrgItemNameToken(t, br.EndToken) { IsIgnoredPart = true } } ; } } if (first && t.Chars.IsCyrillicLetter && t.Morph.Class.IsPreposition) { if (!t.IsValue("ПО", null) && !t.IsValue("ПРИ", null)) { return(null); } } OrgItemNameToken res = _TryAttach(t, prev, extOnto); if (res == null) { if (extOnto) { if ((t.GetReferent() is Pullenti.Ner.Geo.GeoReferent) || (((t is Pullenti.Ner.TextToken) && !t.IsChar(';')))) { return new OrgItemNameToken(t, t) { Value = t.GetSourceText() } } ; } return(null); } if (prev == null && !extOnto) { if (t.Kit.Ontology != null) { Pullenti.Ner.Org.OrganizationAnalyzer.OrgAnalyzerData ad = t.Kit.Ontology._getAnalyzerData(Pullenti.Ner.Org.OrganizationAnalyzer.ANALYZER_NAME) as Pullenti.Ner.Org.OrganizationAnalyzer.OrgAnalyzerData; if (ad != null) { Pullenti.Ner.Core.TerminToken tok = ad.OrgPureNames.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null && tok.EndChar > res.EndChar) { res.EndToken = tok.EndToken; } } } } if (prev != null && !extOnto) { if ((prev.Chars.IsAllLower && !res.Chars.IsAllLower && !res.IsStdTail) && !res.IsStdName) { if (prev.Chars.IsLatinLetter && res.Chars.IsLatinLetter) { } else if (m_StdNouns.TryParse(res.BeginToken, Pullenti.Ner.Core.TerminParseAttr.No) != null) { } else { return(null); } } } if ((res.EndToken.Next != null && !res.EndToken.IsWhitespaceAfter && res.EndToken.Next.IsHiphen) && !res.EndToken.Next.IsWhitespaceAfter) { Pullenti.Ner.TextToken tt = res.EndToken.Next.Next as Pullenti.Ner.TextToken; if (tt != null) { if (tt.Chars == res.Chars || tt.Chars.IsAllUpper) { res.EndToken = tt; res.Value = string.Format("{0}-{1}", res.Value, tt.Term); } } } if ((res.EndToken.Next != null && res.EndToken.Next.IsAnd && res.EndToken.WhitespacesAfterCount == 1) && res.EndToken.Next.WhitespacesAfterCount == 1) { OrgItemNameToken res1 = _TryAttach(res.EndToken.Next.Next, prev, extOnto); if (res1 != null && res1.Chars == res.Chars && OrgItemTypeToken.TryAttach(res.EndToken.Next.Next, false, null) == null) { if (!((res1.Morph.Case & res.Morph.Case)).IsUndefined) { res.EndToken = res1.EndToken; res.Value = string.Format("{0} {1} {2}", res.Value, (res.Kit.BaseLanguage.IsUa ? "ТА" : "И"), res1.Value); } } } for (Pullenti.Ner.Token tt = res.BeginToken; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next) { if (m_StdNouns.TryParse(tt, Pullenti.Ner.Core.TerminParseAttr.No) != null) { res.StdOrgNameNouns++; } } if (m_StdNouns.TryParse(res.EndToken, Pullenti.Ner.Core.TerminParseAttr.No) != null) { int cou = 1; bool non = false; Pullenti.Ner.Token et = res.EndToken; if (!_isNotTermNoun(res.EndToken)) { non = true; } bool br = false; for (Pullenti.Ner.Token tt = res.EndToken.Next; tt != null; tt = tt.Next) { if (tt.IsTableControlChar) { break; } if (tt.IsChar('(')) { if (!non) { break; } br = true; continue; } if (tt.IsChar(')')) { br = false; et = tt; break; } if (!(tt is Pullenti.Ner.TextToken)) { break; } if (tt.WhitespacesBeforeCount > 1) { if (tt.NewlinesBeforeCount > 1) { break; } if (tt.Chars != res.EndToken.Chars) { break; } } if (tt.Morph.Class.IsPreposition || tt.IsCommaAnd) { continue; } Pullenti.Morph.MorphClass dd = tt.GetMorphClassInDictionary(); if (!dd.IsNoun && !dd.IsAdjective) { break; } Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt2 == null) { if (dd == Pullenti.Morph.MorphClass.Adjective) { continue; } break; } if (m_StdNouns.TryParse(npt2.EndToken, Pullenti.Ner.Core.TerminParseAttr.No) == null) { break; } if (npt2.EndToken.Chars != res.EndToken.Chars) { break; } if ((npt2.EndToken.IsValue("УПРАВЛЕНИЕ", null) || npt2.EndToken.IsValue("ИНСТИТУТ", null) || npt2.EndToken.IsValue("УПРАВЛІННЯ", null)) || npt2.EndToken.IsValue("ІНСТИТУТ", null) || tt.Previous.IsValue("ПРИ", null)) { Pullenti.Ner.ReferentToken rt = tt.Kit.ProcessReferent(Pullenti.Ner.Org.OrganizationAnalyzer.ANALYZER_NAME, tt); if (rt != null) { break; } } cou++; tt = npt2.EndToken; if (!_isNotTermNoun(tt)) { non = true; et = tt; } } if (non && !br) { res.StdOrgNameNouns += cou; res.EndToken = et; } } return(res); }
static int _analizeListItems(List <FragToken> chi, int ind) { if (ind >= chi.Count) { return(-1); } FragToken res = chi[ind]; Pullenti.Ner.Instrument.InstrumentKind ki = res.Kind; if (((ki == Pullenti.Ner.Instrument.InstrumentKind.Chapter || ki == Pullenti.Ner.Instrument.InstrumentKind.Clause || ki == Pullenti.Ner.Instrument.InstrumentKind.Content) || ki == Pullenti.Ner.Instrument.InstrumentKind.Item || ki == Pullenti.Ner.Instrument.InstrumentKind.Subitem) || ki == Pullenti.Ner.Instrument.InstrumentKind.ClausePart || ki == Pullenti.Ner.Instrument.InstrumentKind.Indention) { } else { return(-1); } if (res.HasChanges && res.MultilineChangesValue != null) { Pullenti.Ner.MetaToken ci = res.MultilineChangesValue; FragToken cit = new FragToken(ci.BeginToken, ci.EndToken) { Kind = Pullenti.Ner.Instrument.InstrumentKind.Citation }; res.Children.Add(cit); if (Pullenti.Ner.Core.BracketHelper.IsBracket(cit.BeginToken.Previous, true)) { cit.BeginToken = cit.BeginToken.Previous; } if (Pullenti.Ner.Core.BracketHelper.IsBracket(cit.EndToken.Next, true)) { cit.EndToken = cit.EndToken.Next; if (cit.EndToken.Next != null && cit.EndToken.Next.IsCharOf(";.")) { cit.EndToken = cit.EndToken.Next; } } res.FillByContentChildren(); if (res.Children[0].HasChanges) { } Pullenti.Ner.Instrument.InstrumentKind citKind = Pullenti.Ner.Instrument.InstrumentKind.Undefined; if (ci.Tag is Pullenti.Ner.Decree.DecreeChangeReferent) { Pullenti.Ner.Decree.DecreeChangeReferent dcr = ci.Tag as Pullenti.Ner.Decree.DecreeChangeReferent; if (dcr.Value != null && dcr.Value.NewItems.Count > 0) { string mnem = dcr.Value.NewItems[0]; int i; if ((((i = mnem.IndexOf(' ')))) > 0) { mnem = mnem.Substring(0, i); } citKind = Pullenti.Ner.Decree.Internal.PartToken._getInstrKindByTyp(Pullenti.Ner.Decree.Internal.PartToken._getTypeByAttrName(mnem)); } else if (dcr.Owners.Count > 0 && (dcr.Owners[0] is Pullenti.Ner.Decree.DecreePartReferent) && dcr.Kind == Pullenti.Ner.Decree.DecreeChangeKind.New) { Pullenti.Ner.Decree.DecreePartReferent pat = dcr.Owners[0] as Pullenti.Ner.Decree.DecreePartReferent; int min = 0; foreach (Pullenti.Ner.Slot s in pat.Slots) { Pullenti.Ner.Decree.Internal.PartToken.ItemType ty = Pullenti.Ner.Decree.Internal.PartToken._getTypeByAttrName(s.TypeName); if (ty == Pullenti.Ner.Decree.Internal.PartToken.ItemType.Undefined) { continue; } int l = Pullenti.Ner.Decree.Internal.PartToken._getRank(ty); if (l == 0) { continue; } if (l > min || min == 0) { min = l; citKind = Pullenti.Ner.Decree.Internal.PartToken._getInstrKindByTyp(ty); } } } } FragToken sub = null; if (citKind != Pullenti.Ner.Instrument.InstrumentKind.Undefined && citKind != Pullenti.Ner.Instrument.InstrumentKind.Appendix) { sub = new FragToken(ci.BeginToken, ci.EndToken); ContentAnalyzeWhapper wr = new ContentAnalyzeWhapper(); wr.Analyze(sub, null, true, citKind); sub.Kind = Pullenti.Ner.Instrument.InstrumentKind.Content; } else { sub = FragToken.CreateDocument(ci.BeginToken, ci.EndChar, citKind); } if (sub == null || sub.Children.Count == 0) { } else if ((sub.Kind == Pullenti.Ner.Instrument.InstrumentKind.Content && sub.Children.Count > 0 && sub.Children[0].BeginToken == sub.BeginToken) && sub.Children[sub.Children.Count - 1].EndToken == sub.EndToken) { cit.Children.AddRange(sub.Children); } else { cit.Children.Add(sub); } return(1); } int endChar = res.EndChar; if (res.Itok == null) { res.Itok = InstrToken1.Parse(res.BeginToken, true, null, 0, null, false, res.EndChar, false, false); } List <LineToken> lines = LineToken.ParseList(res.BeginToken, endChar, null); if (lines == null || (lines.Count < 1)) { return(-1); } int ret = 1; if (res.Kind == Pullenti.Ner.Instrument.InstrumentKind.Content) { for (int j = ind + 1; j < chi.Count; j++) { if (chi[j].Kind == Pullenti.Ner.Instrument.InstrumentKind.Content) { List <LineToken> lines2 = LineToken.ParseList(chi[j].BeginToken, chi[j].EndChar, lines[lines.Count - 1]); if (lines2 == null || (lines2.Count < 1)) { break; } if (!lines2[0].IsListItem) { if ((lines2.Count > 1 && lines2[1].IsListItem && lines2[0].EndToken.IsCharOf(":")) && !lines2[0].BeginToken.Chars.IsCapitalUpper) { lines2[0].IsListItem = true; } else { break; } } lines.AddRange(lines2); ret = (j - ind) + 1; } else if (chi[j].Kind != Pullenti.Ner.Instrument.InstrumentKind.Editions && chi[j].Kind != Pullenti.Ner.Instrument.InstrumentKind.Comment) { break; } } } if (lines.Count < 2) { return(-1); } if ((lines.Count > 1 && lines[0].IsListItem && lines[1].IsListItem) && lines[0].Number != 1) { if (lines.Count == 2 || !lines[2].IsListItem) { lines[0].IsListItem = (lines[1].IsListItem = false); } } for (int i = 0; i < lines.Count; i++) { if (lines[i].IsListItem) { if (i > 0 && lines[i - 1].IsListItem) { continue; } if (((i + 1) < lines.Count) && lines[i + 1].IsListItem) { } else { lines[i].IsListItem = false; continue; } int j; bool newLine = false; for (j = i + 1; j < lines.Count; j++) { if (!lines[j].IsListItem) { break; } else if (lines[j].IsNewlineBefore) { newLine = true; } } if (newLine) { continue; } if (i > 0 && lines[i - 1].EndToken.IsChar(':')) { continue; } for (j = i; j < lines.Count; j++) { if (!lines[j].IsListItem) { break; } else { lines[j].IsListItem = false; } } } } if (lines.Count > 2) { LineToken last = lines[lines.Count - 1]; LineToken last2 = lines[lines.Count - 2]; if ((!last.IsListItem && last.EndToken.IsChar('.') && last2.IsListItem) && last2.EndToken.IsChar(';')) { if ((last.LengthChar < (last2.LengthChar * 2)) || last.BeginToken.Chars.IsAllLower) { last.IsListItem = true; } } } for (int i = 0; i < (lines.Count - 1); i++) { if (!lines[i].IsListItem && !lines[i + 1].IsListItem) { if (((i + 2) < lines.Count) && lines[i + 2].IsListItem && lines[i + 1].EndToken.IsChar(':')) { } else { lines[i].EndToken = lines[i + 1].EndToken; lines.RemoveAt(i + 1); i--; } } } for (int i = 0; i < (lines.Count - 1); i++) { if (lines[i].IsListItem) { if (lines[i].Number == 1) { bool ok = true; int num = 1; int nonum = 0; for (int j = i + 1; j < lines.Count; j++) { if (!lines[j].IsListItem) { ok = false; break; } else if (lines[j].Number > 0) { num++; if (lines[j].Number != num) { ok = false; break; } } else { nonum++; } } if (!ok || nonum == 0 || (num < 2)) { break; } LineToken lt = lines[i]; for (int j = i + 1; j < lines.Count; j++) { if (lines[j].Number > 0) { lt = lines[j]; } else { List <LineToken> chli = lt.Tag as List <LineToken>; if (chli == null) { lt.Tag = (chli = new List <LineToken>()); } lt.EndToken = lines[j].EndToken; chli.Add(lines[j]); lines.RemoveAt(j); j--; } } } } } int cou = 0; foreach (LineToken li in lines) { if (li.IsListItem) { cou++; } } if (cou < 2) { return(-1); } for (int i = 0; i < lines.Count; i++) { if (lines[i].IsListItem) { int i0 = i; bool ok = true; cou = 1; for (; i < lines.Count; i++, cou++) { if (!lines[i].IsListItem) { break; } else if (lines[i].Number != cou) { ok = false; } } if (!ok) { for (i = i0; i < lines.Count; i++) { if (!lines[i].IsListItem) { break; } else { lines[i].Number = 0; } } } if (cou > 3 && lines[i0].BeginToken.GetSourceText() != lines[i0 + 1].BeginToken.GetSourceText() && lines[i0 + 1].BeginToken.GetSourceText() == lines[i0 + 2].BeginToken.GetSourceText()) { string pref = lines[i0 + 1].BeginToken.GetSourceText(); ok = true; for (int j = i0 + 2; j < i; j++) { if (pref != lines[j].BeginToken.GetSourceText()) { ok = false; break; } } if (!ok) { continue; } Pullenti.Ner.Token tt = null; ok = false; for (tt = lines[i0].EndToken.Previous; tt != null && tt != lines[i0].BeginToken; tt = tt.Previous) { if (tt.GetSourceText() == pref) { ok = true; break; } } if (ok) { LineToken li0 = new LineToken(lines[i0].BeginToken, tt.Previous); lines[i0].BeginToken = tt; lines.Insert(i0, li0); i++; } } } } foreach (LineToken li in lines) { li.CorrectBeginToken(); FragToken ch = new FragToken(li.BeginToken, li.EndToken) { Kind = (li.IsListItem ? Pullenti.Ner.Instrument.InstrumentKind.ListItem : Pullenti.Ner.Instrument.InstrumentKind.Content), Number = li.Number }; if (ch.Kind == Pullenti.Ner.Instrument.InstrumentKind.Content && ch.EndToken.IsChar(':')) { ch.Kind = Pullenti.Ner.Instrument.InstrumentKind.ListHead; } res.Children.Add(ch); List <LineToken> chli = li.Tag as List <LineToken>; if (chli != null) { foreach (LineToken lt in chli) { ch.Children.Add(new FragToken(lt.BeginToken, lt.EndToken) { Kind = Pullenti.Ner.Instrument.InstrumentKind.ListItem }); } if (ch.BeginChar < ch.Children[0].BeginChar) { ch.Children.Insert(0, new FragToken(ch.BeginToken, ch.Children[0].BeginToken.Previous) { Kind = Pullenti.Ner.Instrument.InstrumentKind.Content }); } } } return(ret); }
static void _parseNumber(Pullenti.Ner.Token t, InstrToken1 res, InstrToken1 prev) { if (((t is Pullenti.Ner.NumberToken) && (t as Pullenti.Ner.NumberToken).IntValue != null && (t as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Digit) && ((t as Pullenti.Ner.NumberToken).IntValue.Value < 3000)) { if (res.Numbers.Count >= 4) { } if (t.Morph.Class.IsAdjective && res.TypContainerRank == 0) { return; } Pullenti.Ner.Core.NumberExToken nwp = Pullenti.Ner.Core.NumberHelper.TryParseNumberWithPostfix(t); if (nwp != null) { if (nwp.EndToken.IsWhitespaceBefore) { } else { return; } } if ((t.Next != null && (t.WhitespacesAfterCount < 3) && t.Next.Chars.IsLetter) && t.Next.Chars.IsAllLower) { if (!t.IsWhitespaceAfter && t.Next.LengthChar == 1) { } else if (res.Numbers.Count == 0) { res.NumTyp = NumberTypes.Digit; res.Numbers.Add((t as Pullenti.Ner.NumberToken).Value.ToString()); res.NumBeginToken = (res.NumEndToken = (res.EndToken = t)); return; } else { return; } } if (res.NumTyp == NumberTypes.Undefined) { res.NumTyp = NumberTypes.Digit; } else { res.NumTyp = NumberTypes.Combo; } if (res.Numbers.Count > 0 && t.IsWhitespaceBefore) { return; } if (res.Numbers.Count == 0) { res.NumBeginToken = t; } if ((t.Next != null && t.Next.IsHiphen && (t.Next.Next is Pullenti.Ner.NumberToken)) && (t.Next.Next as Pullenti.Ner.NumberToken).IntValue != null && (t.Next.Next as Pullenti.Ner.NumberToken).IntValue.Value > (t as Pullenti.Ner.NumberToken).IntValue.Value) { res.MinNumber = (t as Pullenti.Ner.NumberToken).Value.ToString(); t = t.Next.Next; } else if (((t.Next != null && t.Next.IsCharOf(")") && t.Next.Next != null) && t.Next.Next.IsHiphen && (t.Next.Next.Next is Pullenti.Ner.NumberToken)) && (t.Next.Next.Next as Pullenti.Ner.NumberToken).IntValue != null && (t.Next.Next.Next as Pullenti.Ner.NumberToken).IntValue.Value > (t as Pullenti.Ner.NumberToken).IntValue.Value) { res.MinNumber = (t as Pullenti.Ner.NumberToken).Value.ToString(); t = t.Next.Next.Next; } res.Numbers.Add((t as Pullenti.Ner.NumberToken).Value.ToString()); res.EndToken = (res.NumEndToken = t); res.NumSuffix = null; for (Pullenti.Ner.Token ttt = t.Next; ttt != null && (res.Numbers.Count < 4); ttt = ttt.Next) { bool ok1 = false; bool ok2 = false; if ((ttt.IsCharOf("._") && !ttt.IsWhitespaceAfter && (ttt.Next is Pullenti.Ner.NumberToken)) && (((ttt.Next as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Digit || (((ttt.Next as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Words) && ttt.Next.Chars.IsLatinLetter && !ttt.IsWhitespaceAfter)))) { ok1 = true; } else if ((ttt.IsCharOf("(<") && (ttt.Next is Pullenti.Ner.NumberToken) && ttt.Next.Next != null) && ttt.Next.Next.IsCharOf(")>")) { ok2 = true; } if (ok1 || ok2) { ttt = ttt.Next; res.Numbers.Add((ttt as Pullenti.Ner.NumberToken).Value.ToString()); res.NumTyp = (res.Numbers.Count == 2 ? NumberTypes.TwoDigits : (res.Numbers.Count == 3 ? NumberTypes.ThreeDigits : NumberTypes.FourDigits)); if ((ttt.Next != null && ttt.Next.IsCharOf(")>") && ttt.Next.Next != null) && ttt.Next.Next.IsChar('.')) { ttt = ttt.Next; } else if (ok2) { ttt = ttt.Next; } t = (res.EndToken = (res.NumEndToken = ttt)); continue; } if (((ttt is Pullenti.Ner.TextToken) && ttt.LengthChar == 1 && ttt.Chars.IsLetter) && !ttt.IsWhitespaceBefore && res.Numbers.Count == 1) { res.Numbers.Add((ttt as Pullenti.Ner.TextToken).Term); res.NumTyp = NumberTypes.Combo; t = (res.EndToken = (res.NumEndToken = ttt)); continue; } break; } if (t.Next != null && t.Next.IsCharOf(").")) { res.NumSuffix = t.Next.GetSourceText(); t = (res.EndToken = (res.NumEndToken = t.Next)); } return; } if (((t is Pullenti.Ner.NumberToken) && (t as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Words && res.TypContainerRank > 0) && res.Numbers.Count == 0) { res.Numbers.Add((t as Pullenti.Ner.NumberToken).Value.ToString()); res.NumTyp = NumberTypes.Digit; res.NumBeginToken = t; if (t.Next != null && t.Next.IsChar('.')) { t = t.Next; res.NumSuffix = "."; } res.EndToken = (res.NumEndToken = t); return; } Pullenti.Ner.NumberToken nt = Pullenti.Ner.Core.NumberHelper.TryParseRoman(t); if ((nt != null && nt.Value == "10" && t.Next != null) && t.Next.IsChar(')')) { nt = null; } if (nt != null && nt.Value == "100") { nt = null; } if (nt != null && nt.Typ == Pullenti.Ner.NumberSpellingType.Roman) { if (res.NumTyp == NumberTypes.Undefined) { res.NumTyp = NumberTypes.Roman; } else { res.NumTyp = NumberTypes.Combo; } if (res.Numbers.Count > 0 && t.IsWhitespaceBefore) { return; } if (res.Numbers.Count == 0) { res.NumBeginToken = t; } res.Numbers.Add(nt.Value.ToString()); t = (res.EndToken = (res.NumEndToken = nt.EndToken)); if (res.NumTyp == NumberTypes.Roman && ((res.Typ == InstrToken1.Types.Chapter || res.Typ == InstrToken1.Types.Section || res.Typ == InstrToken1.Types.Line))) { if ((t.Next != null && t.Next.IsCharOf("._<") && (t.Next.Next is Pullenti.Ner.NumberToken)) && (t.Next.Next as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Digit) { t = t.Next.Next; res.Numbers.Add((t as Pullenti.Ner.NumberToken).Value.ToString()); res.NumTyp = NumberTypes.TwoDigits; if (t.Next != null && t.Next.IsChar('>')) { t = t.Next; } res.EndToken = (res.NumEndToken = t); if ((t.Next != null && t.Next.IsCharOf("._<") && (t.Next.Next is Pullenti.Ner.NumberToken)) && (t.Next.Next as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Digit) { t = t.Next.Next; res.Numbers.Add((t as Pullenti.Ner.NumberToken).Value.ToString()); res.NumTyp = NumberTypes.ThreeDigits; if (t.Next != null && t.Next.IsChar('>')) { t = t.Next; } res.EndToken = (res.NumEndToken = t); } } } if (t.Next != null && t.Next.IsCharOf(").")) { res.NumSuffix = t.Next.GetSourceText(); t = (res.EndToken = (res.NumEndToken = t.Next)); } return; } if (((t is Pullenti.Ner.TextToken) && t.LengthChar == 1 && t.Chars.IsLetter) && t == res.BeginToken) { if ((!t.IsWhitespaceAfter && (t.Next is Pullenti.Ner.NumberToken) && t.Next.Next != null) && t.Next.Next.IsChar('.')) { res.NumBeginToken = t; res.NumTyp = NumberTypes.Digit; res.Numbers.Add((t.Next as Pullenti.Ner.NumberToken).Value.ToString()); res.NumSuffix = (t as Pullenti.Ner.TextToken).Term + "."; t = (res.EndToken = (res.NumEndToken = t.Next.Next)); return; } if (t.Next != null && t.Next.IsCharOf(".)")) { if (((t.Next.IsChar('.') && (t.Next.Next is Pullenti.Ner.NumberToken) && t.Next.Next.Next != null) && t.Next.Next.Next.IsChar(')') && !t.Next.IsWhitespaceAfter) && !t.Next.Next.IsWhitespaceAfter) { res.NumTyp = NumberTypes.TwoDigits; res.Numbers.Add((t as Pullenti.Ner.TextToken).Term); res.Numbers.Add((t.Next.Next as Pullenti.Ner.NumberToken).Value.ToString()); res.NumSuffix = ")"; res.NumBeginToken = t; t = (res.EndToken = (res.NumEndToken = t.Next.Next.Next)); return; } if (t.Next.IsChar('.') && ((t.Chars.IsAllUpper || (t.Next.Next is Pullenti.Ner.NumberToken)))) { } else { InstrToken1 tmp1 = new InstrToken1(t, t.Next); tmp1.Numbers.Add((t as Pullenti.Ner.TextToken).Term); if (tmp1.LastNumber > 1 && t.Next.IsCharOf(".") && ((prev == null || (prev.LastNumber + 1) != tmp1.LastNumber))) { } else { if (res.Numbers.Count == 0) { res.NumBeginToken = t; } res.NumTyp = NumberTypes.Letter; res.Numbers.Add((t as Pullenti.Ner.TextToken).Term); res.NumBeginToken = t; t = (res.EndToken = (res.NumEndToken = t.Next)); res.NumSuffix = t.GetSourceText(); return; } } } } }
public static string GetNameEx(Pullenti.Ner.Token begin, Pullenti.Ner.Token end, Pullenti.Morph.MorphClass cla, Pullenti.Morph.MorphCase mc, Pullenti.Morph.MorphGender gender = Pullenti.Morph.MorphGender.Undefined, bool ignoreBracketsAndHiphens = false, bool ignoreGeoReferent = false) { if (end == null || begin == null) { return(null); } if (begin.EndChar > end.BeginChar && begin != end) { return(null); } StringBuilder res = new StringBuilder(); string prefix = null; for (Pullenti.Ner.Token t = begin; t != null && t.EndChar <= end.EndChar; t = t.Next) { if (res.Length > 1000) { break; } if (t.IsTableControlChar) { continue; } if (ignoreBracketsAndHiphens) { if (BracketHelper.IsBracket(t, false)) { if (t == end) { break; } if (t.IsCharOf("(<[")) { BracketSequenceToken br = BracketHelper.TryParse(t, BracketParseAttr.No, 100); if (br != null && br.EndChar <= end.EndChar) { string tmp = GetNameEx(br.BeginToken.Next, br.EndToken.Previous, Pullenti.Morph.MorphClass.Undefined, Pullenti.Morph.MorphCase.Undefined, Pullenti.Morph.MorphGender.Undefined, ignoreBracketsAndHiphens, false); if (tmp != null) { if ((br.EndChar == end.EndChar && br.BeginToken.Next == br.EndToken.Previous && !br.BeginToken.Next.Chars.IsLetter) && !(br.BeginToken.Next is Pullenti.Ner.ReferentToken)) { } else { res.AppendFormat(" {0}{1}{2}", t.GetSourceText(), tmp, br.EndToken.GetSourceText()); } } t = br.EndToken; } } continue; } if (t.IsHiphen) { if (t == end) { break; } else if (t.IsWhitespaceBefore || t.IsWhitespaceAfter) { continue; } } } Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; if (tt != null) { if (!ignoreBracketsAndHiphens) { if ((tt.Next != null && tt.Next.IsHiphen && (tt.Next.Next is Pullenti.Ner.TextToken)) && tt != end && tt.Next != end) { if (prefix == null) { prefix = tt.Term; } else { prefix = string.Format("{0}-{1}", prefix, tt.Term); } t = tt.Next; if (t == end) { break; } else { continue; } } } string s = null; if (cla.Value != 0 || !mc.IsUndefined || gender != Pullenti.Morph.MorphGender.Undefined) { foreach (Pullenti.Morph.MorphBaseInfo wff in tt.Morph.Items) { Pullenti.Morph.MorphWordForm wf = wff as Pullenti.Morph.MorphWordForm; if (wf == null) { continue; } if (cla.Value != 0) { if (((wf.Class.Value & cla.Value)) == 0) { continue; } } if (!mc.IsUndefined) { if (((wf.Case & mc)).IsUndefined) { continue; } } if (gender != Pullenti.Morph.MorphGender.Undefined) { if (((wf.Gender & gender)) == Pullenti.Morph.MorphGender.Undefined) { continue; } } if (s == null || wf.NormalCase == tt.Term) { s = wf.NormalCase; } } if (s == null && gender != Pullenti.Morph.MorphGender.Undefined) { foreach (Pullenti.Morph.MorphBaseInfo wff in tt.Morph.Items) { Pullenti.Morph.MorphWordForm wf = wff as Pullenti.Morph.MorphWordForm; if (wf == null) { continue; } if (cla.Value != 0) { if (((wf.Class.Value & cla.Value)) == 0) { continue; } } if (!mc.IsUndefined) { if (((wf.Case & mc)).IsUndefined) { continue; } } if (s == null || wf.NormalCase == tt.Term) { s = wf.NormalCase; } } } } if (s == null) { s = tt.Term; if (tt.Chars.IsLastLower && tt.LengthChar > 2) { s = tt.GetSourceText(); for (int i = s.Length - 1; i >= 0; i--) { if (char.IsUpper(s[i])) { s = s.Substring(0, i + 1); break; } } } } if (prefix != null) { string delim = "-"; if (ignoreBracketsAndHiphens) { delim = " "; } s = string.Format("{0}{1}{2}", prefix, delim, s); } prefix = null; if (res.Length > 0 && s.Length > 0) { if (char.IsLetterOrDigit(s[0])) { char ch0 = res[res.Length - 1]; if (ch0 == '-') { } else { res.Append(' '); } } else if (!ignoreBracketsAndHiphens && BracketHelper.CanBeStartOfSequence(tt, false, false)) { res.Append(' '); } } res.Append(s); } else if (t is Pullenti.Ner.NumberToken) { if (res.Length > 0) { if (!t.IsWhitespaceBefore && res[res.Length - 1] == '-') { } else { res.Append(' '); } } Pullenti.Ner.NumberToken nt = t as Pullenti.Ner.NumberToken; if ((t.Morph.Class.IsAdjective && nt.Typ == Pullenti.Ner.NumberSpellingType.Words && nt.BeginToken == nt.EndToken) && (nt.BeginToken is Pullenti.Ner.TextToken)) { res.Append((nt.BeginToken as Pullenti.Ner.TextToken).Term); } else { res.Append(nt.Value); } } else if (t is Pullenti.Ner.MetaToken) { if ((ignoreGeoReferent && t != begin && t.GetReferent() != null) && t.GetReferent().TypeName == "GEO") { continue; } string s = GetNameEx((t as Pullenti.Ner.MetaToken).BeginToken, (t as Pullenti.Ner.MetaToken).EndToken, cla, mc, gender, ignoreBracketsAndHiphens, ignoreGeoReferent); if (!string.IsNullOrEmpty(s)) { if (res.Length > 0) { if (!t.IsWhitespaceBefore && res[res.Length - 1] == '-') { } else { res.Append(' '); } } res.Append(s); } } if (t == end) { break; } } if (res.Length == 0) { return(null); } return(res.ToString()); }