// Некоторые специфические случаи Pullenti.Ner.ReferentToken TryAttachSpec(Pullenti.Ner.Token t) { if (t == null) { return(null); } Pullenti.Ner.Token t0 = t; Pullenti.Ner.NumberToken nt = t as Pullenti.Ner.NumberToken; if (nt != null && nt.Typ == Pullenti.Ner.NumberSpellingType.Digit && nt.Value == "1") { if (t.Next != null && t.Next.IsHiphen) { t = t.Next; } if ((t.Next is Pullenti.Ner.TextToken) && !t.Next.IsWhitespaceBefore) { if (t.Next.IsValue("C", null) || t.Next.IsValue("С", null)) { DenominationReferent dr = new DenominationReferent(); dr.AddSlot(DenominationReferent.ATTR_VALUE, "1С", false, 0); dr.AddSlot(DenominationReferent.ATTR_VALUE, "1C", false, 0); return(new Pullenti.Ner.ReferentToken(dr, t0, t.Next)); } } } if (((nt != null && nt.Typ == Pullenti.Ner.NumberSpellingType.Digit && (t.Next is Pullenti.Ner.TextToken)) && !t.IsWhitespaceAfter && !t.Next.Chars.IsAllLower) && t.Next.Chars.IsLetter) { DenominationReferent dr = new DenominationReferent(); dr.AddSlot(DenominationReferent.ATTR_VALUE, string.Format("{0}{1}", nt.GetSourceText(), (t.Next as Pullenti.Ner.TextToken).Term), false, 0); return(new Pullenti.Ner.ReferentToken(dr, t0, t.Next)); } return(null); }
static Pullenti.Ner.Token DeserializeToken(Stream stream, Pullenti.Ner.Core.AnalysisKit kit, int vers) { short typ = DeserializeShort(stream); if (typ == 0) { return(null); } Pullenti.Ner.Token t = null; if (typ == 1) { t = new Pullenti.Ner.TextToken(null, kit); } else if (typ == 2) { t = new Pullenti.Ner.NumberToken(null, null, null, Pullenti.Ner.NumberSpellingType.Digit, kit); } else if (typ == 3) { t = new Pullenti.Ner.ReferentToken(null, null, null, kit); } else { t = new Pullenti.Ner.MetaToken(null, null, kit); } t.Deserialize(stream, kit, vers); if (t is Pullenti.Ner.MetaToken) { Pullenti.Ner.Token tt = DeserializeTokens(stream, kit, vers); if (tt != null) { (t as Pullenti.Ner.MetaToken).m_BeginToken = tt; for (; tt != null; tt = tt.Next) { (t as Pullenti.Ner.MetaToken).m_EndToken = tt; } } } return(t); }
public static UriItemToken AttachDomainName(Pullenti.Ner.Token t0, bool check, bool canBeWhitspaces) { StringBuilder txt = new StringBuilder(); Pullenti.Ner.Token t1 = t0; int ipCount = 0; bool isIp = true; for (Pullenti.Ner.Token t = t0; t != null; t = t.Next) { if (t.IsWhitespaceBefore && t != t0) { bool ok = false; if (!t.IsNewlineBefore && canBeWhitspaces) { for (Pullenti.Ner.Token tt1 = t; tt1 != null; tt1 = tt1.Next) { if (tt1.IsChar('.') || tt1.IsHiphen) { continue; } if (tt1.IsWhitespaceBefore) { if (tt1.IsNewlineBefore) { break; } if (tt1.Previous != null && ((tt1.Previous.IsChar('.') || tt1.Previous.IsHiphen))) { } else { break; } } if (!(tt1 is Pullenti.Ner.TextToken)) { break; } if (m_StdGroups.TryParse(tt1, Pullenti.Ner.Core.TerminParseAttr.No) != null) { ok = true; break; } if (!tt1.Chars.IsLatinLetter) { break; } } } if (!ok) { break; } } if (t is Pullenti.Ner.NumberToken) { Pullenti.Ner.NumberToken nt = t as Pullenti.Ner.NumberToken; if (nt.IntValue == null) { break; } txt.Append(nt.GetSourceText()); t1 = t; if (nt.Typ == Pullenti.Ner.NumberSpellingType.Digit && nt.IntValue.Value >= 0 && (nt.IntValue.Value < 256)) { ipCount++; } else { isIp = false; } continue; } Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; if (tt == null) { break; } string src = (tt as Pullenti.Ner.TextToken).Term; char ch = src[0]; if (!char.IsLetter(ch)) { if (".-_".IndexOf(ch) < 0) { break; } if (ch != '.') { isIp = false; } if (ch == '-') { if (string.Compare(txt.ToString(), "vk.com", true) == 0) { return new UriItemToken(t0, t1) { Value = txt.ToString().ToLower() } } ; } } else { isIp = false; } txt.Append(src.ToLower()); t1 = t; } if (txt.Length == 0) { return(null); } if (ipCount != 4) { isIp = false; } int i; int points = 0; for (i = 0; i < txt.Length; i++) { if (txt[i] == '.') { if (i == 0) { return(null); } if (i >= (txt.Length - 1)) { txt.Length--; t1 = t1.Previous; break; } if (txt[i - 1] == '.' || txt[i + 1] == '.') { return(null); } points++; } } if (points == 0) { return(null); } string uri = txt.ToString(); if (check) { bool ok = isIp; if (!isIp) { if (txt.ToString() == "localhost") { ok = true; } } if (!ok && t1.Previous != null && t1.Previous.IsChar('.')) { if (m_StdGroups.TryParse(t1, Pullenti.Ner.Core.TerminParseAttr.No) != null) { ok = true; } } if (!ok) { return(null); } } return(new UriItemToken(t0, t1) { Value = txt.ToString().ToLower() }); }
public static List <UriItemToken> AttachMailUsers(Pullenti.Ner.Token t1) { if (t1 == null) { return(null); } if (t1.IsChar('}')) { List <UriItemToken> res0 = AttachMailUsers(t1.Previous); if (res0 == null) { return(null); } t1 = res0[0].BeginToken.Previous; for (; t1 != null; t1 = t1.Previous) { if (t1.IsChar('{')) { res0[0].BeginToken = t1; return(res0); } if (t1.IsCharOf(";,")) { continue; } List <UriItemToken> res1 = AttachMailUsers(t1); if (res1 == null) { return(null); } res0.Insert(0, res1[0]); t1 = res1[0].BeginToken; } return(null); } StringBuilder txt = new StringBuilder(); Pullenti.Ner.Token t0 = t1; for (Pullenti.Ner.Token t = t1; t != null; t = t.Previous) { if (t.IsWhitespaceAfter) { break; } if (t is Pullenti.Ner.NumberToken) { Pullenti.Ner.NumberToken nt = t as Pullenti.Ner.NumberToken; txt.Insert(0, nt.GetSourceText()); t0 = t; continue; } Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; if (tt == null) { break; } string src = tt.GetSourceText(); char ch = src[0]; if (!char.IsLetter(ch)) { if (".-_".IndexOf(ch) < 0) { break; } } txt.Insert(0, src); t0 = t; } if (txt.Length == 0) { return(null); } List <UriItemToken> res = new List <UriItemToken>(); res.Add(new UriItemToken(t0, t1) { Value = txt.ToString().ToLower() }); return(res); }
public static OrgItemEponymToken TryAttach(Pullenti.Ner.Token t, bool mustHasPrefix = false) { Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; if (tt == null) { if (t == null) { return(null); } Pullenti.Ner.Referent r1 = t.GetReferent(); if (r1 != null && r1.TypeName == "DATE") { string str = r1.ToString().ToUpper(); if ((str == "1 МАЯ" || str == "7 ОКТЯБРЯ" || str == "9 МАЯ") || str == "8 МАРТА") { OrgItemEponymToken dt = new OrgItemEponymToken(t, t) { Eponyms = new List <string>() }; dt.Eponyms.Add(str); return(dt); } } Pullenti.Ner.NumberToken age = Pullenti.Ner.Core.NumberHelper.TryParseAge(t); if ((age != null && (((age.EndToken.Next is Pullenti.Ner.TextToken) || (age.EndToken.Next is Pullenti.Ner.ReferentToken))) && (age.WhitespacesAfterCount < 3)) && !age.EndToken.Next.Chars.IsAllLower && age.EndToken.Next.Chars.IsCyrillicLetter) { OrgItemEponymToken dt = new OrgItemEponymToken(t, age.EndToken.Next) { Eponyms = new List <string>() }; dt.Eponyms.Add(string.Format("{0} {1}", age.Value, dt.EndToken.GetSourceText().ToUpper())); return(dt); } return(null); } Pullenti.Ner.Token t1 = null; bool full = false; bool hasName = false; if (tt.Term == "ИМЕНИ" || tt.Term == "ІМЕНІ") { t1 = t.Next; full = true; hasName = true; } else if (((tt.Term == "ИМ" || tt.Term == "ІМ")) && tt.Next != null) { if (tt.Next.IsChar('.')) { t1 = tt.Next.Next; full = true; } else if ((tt.Next is Pullenti.Ner.TextToken) && tt.Chars.IsAllLower && !tt.Next.Chars.IsAllLower) { t1 = tt.Next; } hasName = true; } else if (tt.Previous != null && ((tt.Previous.IsValue("ФОНД", null) || tt.Previous.IsValue("ХРАМ", null) || tt.Previous.IsValue("ЦЕРКОВЬ", "ЦЕРКВА")))) { if ((!tt.Chars.IsCyrillicLetter || tt.Morph.Class.IsPreposition || tt.Morph.Class.IsConjunction) || !tt.Chars.IsLetter) { return(null); } if (tt.WhitespacesBeforeCount != 1) { return(null); } if (tt.Chars.IsAllLower) { return(null); } if (tt.Morph.Class.IsAdjective) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && npt.BeginToken != npt.EndToken) { return(null); } } OrgItemNameToken na = OrgItemNameToken.TryAttach(tt, null, false, true); if (na != null) { if (na.IsEmptyWord || na.IsStdName || na.IsStdTail) { return(null); } } t1 = tt; } if (t1 == null || ((t1.IsNewlineBefore && !full))) { return(null); } if (tt.Previous != null && tt.Previous.Morph.Class.IsPreposition) { return(null); } if (mustHasPrefix && !hasName) { return(null); } Pullenti.Ner.Referent r = t1.GetReferent(); if ((r != null && r.TypeName == "DATE" && full) && r.FindSlot("DAY", null, true) != null && r.FindSlot("YEAR", null, true) == null) { OrgItemEponymToken dt = new OrgItemEponymToken(t, t1) { Eponyms = new List <string>() }; dt.Eponyms.Add(r.ToString().ToUpper()); return(dt); } bool holy = false; if ((t1.IsValue("СВЯТОЙ", null) || t1.IsValue("СВЯТИЙ", null) || t1.IsValue("СВ", null)) || t1.IsValue("СВЯТ", null)) { t1 = t1.Next; holy = true; if (t1 != null && t1.IsChar('.')) { t1 = t1.Next; } } if (t1 == null) { return(null); } Pullenti.Morph.MorphClass cl = t1.GetMorphClassInDictionary(); if (cl.IsNoun || cl.IsAdjective) { Pullenti.Ner.ReferentToken rt = t1.Kit.ProcessReferent("PERSON", t1); if (rt != null && rt.Referent.TypeName == "PERSON" && rt.BeginToken != rt.EndToken) { string e = rt.Referent.GetStringValue("LASTNAME"); if (e != null) { if (rt.EndToken.IsValue(e, null)) { OrgItemEponymToken re = new OrgItemEponymToken(t, rt.EndToken); re.Eponyms.Add(rt.EndToken.GetSourceText()); return(re); } } } } Pullenti.Ner.NumberToken nt = Pullenti.Ner.Core.NumberHelper.TryParseAnniversary(t1); if (nt != null && nt.Typ == Pullenti.Ner.NumberSpellingType.Age) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(nt.EndToken.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { string s = string.Format("{0}-{1} {2}", nt.Value, (t.Kit.BaseLanguage.IsUa ? "РОКІВ" : "ЛЕТ"), Pullenti.Ner.Core.MiscHelper.GetTextValue(npt.BeginToken, npt.EndToken, Pullenti.Ner.Core.GetTextAttr.No)); OrgItemEponymToken res = new OrgItemEponymToken(t, npt.EndToken); res.Eponyms.Add(s); return(res); } } List <PersonItemToken> its = PersonItemToken.TryAttach(t1); if (its == null) { if ((t1 is Pullenti.Ner.ReferentToken) && (t1.GetReferent() is Pullenti.Ner.Geo.GeoReferent)) { string s = Pullenti.Ner.Core.MiscHelper.GetTextValue(t1, t1, Pullenti.Ner.Core.GetTextAttr.No); OrgItemEponymToken re = new OrgItemEponymToken(t, t1); re.Eponyms.Add(s); return(re); } return(null); } List <string> eponims = new List <string>(); int i = 0; int j; if (its[i].Typ == PersonItemType.LocaseWord) { i++; } if (i >= its.Count) { return(null); } if (!full) { if (its[i].BeginToken.Morph.Class.IsAdjective && !its[i].BeginToken.Morph.Class.IsProperSurname) { return(null); } } if (its[i].Typ == PersonItemType.Initial) { i++; while (true) { if ((i < its.Count) && its[i].Typ == PersonItemType.Initial) { i++; } if (i >= its.Count || ((its[i].Typ != PersonItemType.Surname && its[i].Typ != PersonItemType.Name))) { break; } eponims.Add(its[i].Value); t1 = its[i].EndToken; if ((i + 2) >= its.Count || its[i + 1].Typ != PersonItemType.And || its[i + 2].Typ != PersonItemType.Initial) { break; } i += 3; } } else if (((i + 1) < its.Count) && its[i].Typ == PersonItemType.Name && its[i + 1].Typ == PersonItemType.Surname) { eponims.Add(its[i + 1].Value); t1 = its[i + 1].EndToken; i += 2; if ((((i + 2) < its.Count) && its[i].Typ == PersonItemType.And && its[i + 1].Typ == PersonItemType.Name) && its[i + 2].Typ == PersonItemType.Surname) { eponims.Add(its[i + 2].Value); t1 = its[i + 2].EndToken; } } else if (its[i].Typ == PersonItemType.Surname) { if (its.Count == (i + 2) && its[i].Chars == its[i + 1].Chars) { its[i].Value += (" " + its[i + 1].Value); its[i].EndToken = its[i + 1].EndToken; its.RemoveAt(i + 1); } eponims.Add(its[i].Value); if (((i + 1) < its.Count) && its[i + 1].Typ == PersonItemType.Name) { if ((i + 2) == its.Count) { i++; } else if (its[i + 2].Typ != PersonItemType.Surname) { i++; } } else if (((i + 1) < its.Count) && its[i + 1].Typ == PersonItemType.Initial) { if ((i + 2) == its.Count) { i++; } else if (its[i + 2].Typ == PersonItemType.Initial && (i + 3) == its.Count) { i += 2; } } else if (((i + 2) < its.Count) && its[i + 1].Typ == PersonItemType.And && its[i + 2].Typ == PersonItemType.Surname) { bool ok = true; Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(its[i + 2].BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && !npt.Morph.Case.IsGenitive && !npt.Morph.Case.IsUndefined) { ok = false; } if (ok) { eponims.Add(its[i + 2].Value); i += 2; } } t1 = its[i].EndToken; } else if (its[i].Typ == PersonItemType.Name && holy) { t1 = its[i].EndToken; bool sec = false; if (((i + 1) < its.Count) && its[i].Chars == its[i + 1].Chars && its[i + 1].Typ != PersonItemType.Initial) { sec = true; t1 = its[i + 1].EndToken; } if (sec) { eponims.Add(string.Format("СВЯТ.{0} {1}", its[i].Value, its[i + 1].Value)); } else { eponims.Add(string.Format("СВЯТ.{0}", its[i].Value)); } } else if (full && (i + 1) == its.Count && ((its[i].Typ == PersonItemType.Name || its[i].Typ == PersonItemType.Surname))) { t1 = its[i].EndToken; eponims.Add(its[i].Value); } else if ((its[i].Typ == PersonItemType.Name && its.Count == 3 && its[i + 1].Typ == PersonItemType.Name) && its[i + 2].Typ == PersonItemType.Surname) { t1 = its[i + 2].EndToken; eponims.Add(string.Format("{0} {1} {2}", its[i].Value, its[i + 1].Value, its[i + 2].Value)); i += 2; } if (eponims.Count == 0) { return(null); } return(new OrgItemEponymToken(t, t1) { Eponyms = eponims }); }
static UriItemToken _AttachUriContent(Pullenti.Ner.Token t0, string chars, bool canBeWhitespaces = false) { StringBuilder txt = new StringBuilder(); Pullenti.Ner.Token t1 = t0; UriItemToken dom = AttachDomainName(t0, true, canBeWhitespaces); if (dom != null) { if (dom.Value.Length < 3) { return(null); } } char openChar = (char)0; Pullenti.Ner.Token t = t0; if (dom != null) { t = dom.EndToken.Next; } for (; t != null; t = t.Next) { if (t != t0 && t.IsWhitespaceBefore) { if (t.IsNewlineBefore || !canBeWhitespaces) { break; } if (dom == null) { break; } if (t.Previous.IsHiphen) { } else if (t.Previous.IsCharOf(",;")) { break; } else if (t.Previous.IsChar('.') && t.Chars.IsLetter && t.LengthChar == 2) { } else { bool ok = false; Pullenti.Ner.Token tt1 = t; if (t.IsCharOf("\\/")) { tt1 = t.Next; } Pullenti.Ner.Token tt0 = tt1; for (; tt1 != null; tt1 = tt1.Next) { if (tt1 != tt0 && tt1.IsWhitespaceBefore) { break; } if (tt1 is Pullenti.Ner.NumberToken) { continue; } if (!(tt1 is Pullenti.Ner.TextToken)) { break; } string term1 = (tt1 as Pullenti.Ner.TextToken).Term; if (((term1 == "HTM" || term1 == "HTML" || term1 == "SHTML") || term1 == "ASP" || term1 == "ASPX") || term1 == "JSP") { ok = true; break; } if (!tt1.Chars.IsLetter) { if (tt1.IsCharOf("\\/")) { ok = true; break; } if (!tt1.IsCharOf(chars)) { break; } } else if (!tt1.Chars.IsLatinLetter) { break; } } if (!ok) { break; } } } if (t is Pullenti.Ner.NumberToken) { Pullenti.Ner.NumberToken nt = t as Pullenti.Ner.NumberToken; txt.Append(nt.GetSourceText()); t1 = t; continue; } Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; if (tt == null) { Pullenti.Ner.ReferentToken rt = t as Pullenti.Ner.ReferentToken; if (rt != null && rt.BeginToken.IsValue("РФ", null)) { if (txt.Length > 0 && txt[txt.Length - 1] == '.') { txt.Append(rt.BeginToken.GetSourceText()); t1 = t; continue; } } if (rt != null && rt.Chars.IsLatinLetter && rt.BeginToken == rt.EndToken) { txt.Append(rt.BeginToken.GetSourceText()); t1 = t; continue; } break; } string src = tt.GetSourceText(); char ch = src[0]; if (!char.IsLetter(ch)) { if (chars.IndexOf(ch) < 0) { break; } if (ch == '(' || ch == '[') { openChar = ch; } else if (ch == ')') { if (openChar != '(') { break; } openChar = (char)0; } else if (ch == ']') { if (openChar != '[') { break; } openChar = (char)0; } } txt.Append(src); t1 = t; } if (txt.Length == 0) { return(dom); } int i; for (i = 0; i < txt.Length; i++) { if (char.IsLetterOrDigit(txt[i])) { break; } } if (i >= txt.Length) { return(dom); } if (txt[txt.Length - 1] == '.' || txt[txt.Length - 1] == '/') { txt.Length--; t1 = t1.Previous; } if (dom != null) { txt.Insert(0, dom.Value); } string tmp = txt.ToString(); if (tmp.StartsWith("\\\\")) { txt.Replace("\\\\", "//"); tmp = txt.ToString(); } if (tmp.StartsWith("//")) { tmp = tmp.Substring(2); } if (string.Compare(tmp, "WWW", true) == 0) { return(null); } UriItemToken res = new UriItemToken(t0, t1) { Value = txt.ToString() }; return(res); }
public static BlockLine Create(Pullenti.Ner.Token t, Pullenti.Ner.Core.TerminCollection names) { if (t == null) { return(null); } BlockLine res = new BlockLine(t, t); for (Pullenti.Ner.Token tt = t; tt != null; tt = tt.Next) { if (tt != t && tt.IsNewlineBefore) { break; } else { res.EndToken = tt; } } int nums = 0; while (t != null && t.Next != null && t.EndChar <= res.EndChar) { if (t is Pullenti.Ner.NumberToken) { } else { Pullenti.Ner.NumberToken rom = Pullenti.Ner.Core.NumberHelper.TryParseRoman(t); if (rom != null && rom.EndToken.Next != null) { t = rom.EndToken; } else { break; } } if (t.Next.IsChar('.')) { } else if ((t.Next is Pullenti.Ner.TextToken) && !t.Next.Chars.IsAllLower) { } else { break; } res.NumberEnd = t; t = t.Next; if (t.IsChar('.') && t.Next != null) { res.NumberEnd = t; t = t.Next; } if (t.IsNewlineBefore) { return(res); } nums++; } Pullenti.Ner.Core.TerminToken tok = m_Ontology.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok == null) { Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt1 != null && npt1.EndToken != npt1.BeginToken) { tok = m_Ontology.TryParse(npt1.Noun.BeginToken, Pullenti.Ner.Core.TerminParseAttr.No); } } if (tok != null) { if (t.Previous != null && t.Previous.IsChar(':')) { tok = null; } } if (tok != null) { BlkTyps typ = (BlkTyps)tok.Termin.Tag; if (typ == BlkTyps.Conslusion) { if (t.IsNewlineAfter) { } else if (t.Next != null && t.Next.Morph.Class.IsPreposition && t.Next.Next != null) { Pullenti.Ner.Core.TerminToken tok2 = m_Ontology.TryParse(t.Next.Next, Pullenti.Ner.Core.TerminParseAttr.No); if (tok2 != null && ((BlkTyps)tok2.Termin.Tag) == BlkTyps.Chapter) { } else { tok = null; } } else { tok = null; } } if (t.Kit.BaseLanguage != t.Morph.Language) { tok = null; } if (typ == BlkTyps.Index && !t.IsValue("ОГЛАВЛЕНИЕ", null)) { if (!t.IsNewlineAfter && t.Next != null) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && npt.IsNewlineAfter && npt.Morph.Case.IsGenitive) { tok = null; } else if (npt == null) { tok = null; } } } if ((typ == BlkTyps.Intro && tok != null && !tok.IsNewlineAfter) && t.IsValue("ВВЕДЕНИЕ", null)) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && npt.Morph.Case.IsGenitive) { tok = null; } } if (tok != null) { if (res.NumberEnd == null) { res.NumberEnd = tok.EndToken; if (res.NumberEnd.EndChar > res.EndChar) { res.EndToken = res.NumberEnd; } } res.Typ = typ; t = tok.EndToken; if (t.Next != null && t.Next.IsCharOf(":.")) { t = t.Next; res.EndToken = t; } if (t.IsNewlineAfter || t.Next == null) { return(res); } t = t.Next; } } if (t.IsChar('§') && (t.Next is Pullenti.Ner.NumberToken)) { res.Typ = BlkTyps.Chapter; res.NumberEnd = t; t = t.Next; } if (names != null) { Pullenti.Ner.Core.TerminToken tok2 = names.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok2 != null && tok2.EndToken.IsNewlineAfter) { res.EndToken = tok2.EndToken; res.IsExistName = true; if (res.Typ == BlkTyps.Undefined) { BlockLine li2 = Create((res.NumberEnd == null ? null : res.NumberEnd.Next), null); if (li2 != null && ((li2.Typ == BlkTyps.Literature || li2.Typ == BlkTyps.Intro || li2.Typ == BlkTyps.Conslusion))) { res.Typ = li2.Typ; } else { res.Typ = BlkTyps.Chapter; } } return(res); } } Pullenti.Ner.Token t1 = res.EndToken; if ((((t1 is Pullenti.Ner.NumberToken) || t1.IsChar('.'))) && t1.Previous != null) { t1 = t1.Previous; if (t1.IsChar('.')) { res.HasContentItemTail = true; for (; t1 != null && t1.BeginChar > res.BeginChar; t1 = t1.Previous) { if (!t1.IsChar('.')) { break; } } } } res.IsAllUpper = true; for (; t != null && t.EndChar <= t1.EndChar; t = t.Next) { if (!(t is Pullenti.Ner.TextToken) || !t.Chars.IsLetter) { res.NotWords++; } else { Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary(); if (mc.IsUndefined) { res.NotWords++; } else if (t.LengthChar > 2) { res.Words++; } if (!t.Chars.IsAllUpper) { res.IsAllUpper = false; } if ((t as Pullenti.Ner.TextToken).IsPureVerb) { if (!(t as Pullenti.Ner.TextToken).Term.EndsWith("ING")) { res.HasVerb = true; } } } } if (res.Typ == BlkTyps.Undefined) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse((res.NumberEnd == null ? res.BeginToken : res.NumberEnd.Next), Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { if (npt.Noun.IsValue("ХАРАКТЕРИСТИКА", null) || npt.Noun.IsValue("СОДЕРЖАНИЕ", "ЗМІСТ")) { bool ok = true; for (Pullenti.Ner.Token tt = npt.EndToken.Next; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next) { if (tt.IsChar('.')) { continue; } Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt2 == null || !npt2.Morph.Case.IsGenitive) { ok = false; break; } tt = npt2.EndToken; if (tt.EndChar > res.EndChar) { res.EndToken = tt; if (!tt.IsNewlineAfter) { for (; res.EndToken.Next != null; res.EndToken = res.EndToken.Next) { if (res.EndToken.IsNewlineAfter) { break; } } } } } if (ok) { res.Typ = BlkTyps.Intro; res.IsExistName = true; } } else if (npt.Noun.IsValue("ВЫВОД", "ВИСНОВОК") || npt.Noun.IsValue("РЕЗУЛЬТАТ", "ДОСЛІДЖЕННЯ")) { bool ok = true; for (Pullenti.Ner.Token tt = npt.EndToken.Next; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next) { if (tt.IsCharOf(",.") || tt.IsAnd) { continue; } Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt1 != null) { if (npt1.Noun.IsValue("РЕЗУЛЬТАТ", "ДОСЛІДЖЕННЯ") || npt1.Noun.IsValue("РЕКОМЕНДАЦИЯ", "РЕКОМЕНДАЦІЯ") || npt1.Noun.IsValue("ИССЛЕДОВАНИЕ", "ДОСЛІДЖЕННЯ")) { tt = npt1.EndToken; if (tt.EndChar > res.EndChar) { res.EndToken = tt; if (!tt.IsNewlineAfter) { for (; res.EndToken.Next != null; res.EndToken = res.EndToken.Next) { if (res.EndToken.IsNewlineAfter) { break; } } } } continue; } } ok = false; break; } if (ok) { res.Typ = BlkTyps.Conslusion; res.IsExistName = true; } } if (res.Typ == BlkTyps.Undefined && npt != null && npt.EndChar <= res.EndChar) { bool ok = false; int publ = 0; if (_isPub(npt)) { ok = true; publ = 1; } else if ((npt.Noun.IsValue("СПИСОК", null) || npt.Noun.IsValue("УКАЗАТЕЛЬ", "ПОКАЖЧИК") || npt.Noun.IsValue("ПОЛОЖЕНИЕ", "ПОЛОЖЕННЯ")) || npt.Noun.IsValue("ВЫВОД", "ВИСНОВОК") || npt.Noun.IsValue("РЕЗУЛЬТАТ", "ДОСЛІДЖЕННЯ")) { if (npt.EndChar == res.EndChar) { return(null); } ok = true; } if (ok) { if (npt.BeginToken == npt.EndToken && npt.Noun.IsValue("СПИСОК", null) && npt.EndChar == res.EndChar) { ok = false; } for (Pullenti.Ner.Token tt = npt.EndToken.Next; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next) { if (tt.IsCharOf(",.:") || tt.IsAnd || tt.Morph.Class.IsPreposition) { continue; } if (tt.IsValue("ОТРАЖЕНЫ", "ВІДОБРАЖЕНІ")) { continue; } npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt == null) { ok = false; break; } if (((_isPub(npt) || npt.Noun.IsValue("РАБОТА", "РОБОТА") || npt.Noun.IsValue("ИССЛЕДОВАНИЕ", "ДОСЛІДЖЕННЯ")) || npt.Noun.IsValue("АВТОР", null) || npt.Noun.IsValue("ТРУД", "ПРАЦЯ")) || npt.Noun.IsValue("ТЕМА", null) || npt.Noun.IsValue("ДИССЕРТАЦИЯ", "ДИСЕРТАЦІЯ")) { tt = npt.EndToken; if (_isPub(npt)) { publ++; } if (tt.EndChar > res.EndChar) { res.EndToken = tt; if (!tt.IsNewlineAfter) { for (; res.EndToken.Next != null; res.EndToken = res.EndToken.Next) { if (res.EndToken.IsNewlineAfter) { break; } } } } continue; } ok = false; break; } if (ok) { res.Typ = BlkTyps.Literature; res.IsExistName = true; if (publ == 0 && (res.EndChar < (((res.Kit.Sofa.Text.Length * 2) / 3)))) { if (res.NumberEnd != null) { res.Typ = BlkTyps.Misc; } else { res.Typ = BlkTyps.Undefined; } } } } } } } return(res); }
static TitleItemToken TryAttachSpeciality(Pullenti.Ner.Token t, bool keyWordBefore) { if (t == null) { return(null); } bool susp = false; if (!keyWordBefore) { if (!t.IsNewlineBefore) { susp = true; } } StringBuilder val = null; Pullenti.Ner.Token t0 = t; int digCount = 0; for (int i = 0; i < 3; i++) { Pullenti.Ner.NumberToken nt = t as Pullenti.Ner.NumberToken; if (nt == null) { break; } if (nt.Typ != Pullenti.Ner.NumberSpellingType.Digit || nt.Morph.Class.IsAdjective) { break; } if (val == null) { val = new StringBuilder(); } if (susp && t.LengthChar != 2) { return(null); } string digs = nt.GetSourceText(); digCount += digs.Length; val.Append(digs); if (t.Next == null) { break; } t = t.Next; if (t.IsCharOf(".,") || t.IsHiphen) { if (susp && (i < 2)) { if (!t.IsChar('.') || t.IsWhitespaceAfter || t.IsWhitespaceBefore) { return(null); } } if (t.Next != null) { t = t.Next; } } } if (val == null || (digCount < 5)) { return(null); } if (digCount != 6) { if (!keyWordBefore) { return(null); } } else { val.Insert(4, '.'); val.Insert(2, '.'); } for (Pullenti.Ner.Token tt = t.Next; tt != null; tt = tt.Next) { if (tt.IsNewlineBefore) { break; } Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(tt, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { t = (tt = br.EndToken); continue; } t = tt; } return(new TitleItemToken(t0, t, Types.Speciality) { Value = val.ToString() }); }
public Pullenti.Ner.ReferentToken TryAttach(Pullenti.Ner.Token t, bool forOntology = false) { if (t == null) { return(null); } Pullenti.Ner.ReferentToken rt0 = this.TryAttachSpec(t); if (rt0 != null) { return(rt0); } if (t.Chars.IsAllLower) { if (!t.IsWhitespaceAfter && (t.Next is Pullenti.Ner.NumberToken)) { if (t.Previous == null || t.IsWhitespaceBefore || t.Previous.IsCharOf(",:")) { } else { return(null); } } else { return(null); } } StringBuilder tmp = new StringBuilder(); Pullenti.Ner.Token t1 = t; bool hiph = false; bool ok = true; int nums = 0; int chars = 0; for (Pullenti.Ner.Token w = t1.Next; w != null; w = w.Next) { if (w.IsWhitespaceBefore && !forOntology) { break; } if (w.IsCharOf("/\\_") || w.IsHiphen) { hiph = true; tmp.Append('-'); continue; } hiph = false; Pullenti.Ner.NumberToken nt = w as Pullenti.Ner.NumberToken; if (nt != null) { if (nt.Typ != Pullenti.Ner.NumberSpellingType.Digit) { break; } t1 = nt; tmp.Append(nt.GetSourceText()); nums++; continue; } Pullenti.Ner.TextToken tt = w as Pullenti.Ner.TextToken; if (tt == null) { break; } if (tt.LengthChar > 3) { ok = false; break; } if (!char.IsLetter(tt.Term[0])) { if (tt.IsCharOf(",:") || Pullenti.Ner.Core.BracketHelper.CanBeEndOfSequence(tt, false, null, false)) { break; } if (!tt.IsCharOf("+*&^#@!")) { ok = false; break; } chars++; } t1 = tt; tmp.Append(tt.GetSourceText()); } if (!forOntology) { if ((tmp.Length < 1) || !ok || hiph) { return(null); } if (tmp.Length > 12) { return(null); } char last = tmp[tmp.Length - 1]; if (last == '!') { return(null); } if ((nums + chars) == 0) { return(null); } if (!this.CheckAttach(t, t1)) { return(null); } } DenominationReferent newDr = new DenominationReferent(); newDr.AddValue(t, t1); return(new Pullenti.Ner.ReferentToken(newDr, t, t1)); }
static Pullenti.Ner.ReferentToken _tryNounName(List <CityItemToken> li, out Pullenti.Ner.Core.IntOntologyItem oi, bool always) { oi = null; if (li == null || (li.Count < 2) || ((li[0].Typ != CityItemToken.ItemType.Noun && li[0].Typ != CityItemToken.ItemType.Misc))) { return(null); } bool ok = !li[0].Doubtful; if (ok && li[0].Typ == CityItemToken.ItemType.Misc) { ok = false; } string typ = (li[0].Typ == CityItemToken.ItemType.Misc ? null : li[0].Value); string typ2 = (li[0].Typ == CityItemToken.ItemType.Misc ? null : li[0].AltValue); string probAdj = null; int i1 = 1; Pullenti.Ner.Referent org = null; if ((typ != null && li[i1].Typ == CityItemToken.ItemType.Noun && ((i1 + 1) < li.Count)) && li[0].WhitespacesAfterCount <= 1 && (((Pullenti.Morph.LanguageHelper.EndsWith(typ, "ПОСЕЛОК") || Pullenti.Morph.LanguageHelper.EndsWith(typ, "СЕЛИЩЕ") || typ == "ДЕРЕВНЯ") || typ == "СЕЛО"))) { if (li[i1].BeginToken == li[i1].EndToken) { Pullenti.Ner.Address.Internal.AddressItemToken ooo = Pullenti.Ner.Address.Internal.AddressItemToken.TryAttachOrg(li[i1].BeginToken); if (ooo != null && ooo.RefToken != null) { return(null); } } typ2 = li[i1].Value; if (typ2 == "СТАНЦИЯ" && li[i1].BeginToken.IsValue("СТ", null) && ((i1 + 1) < li.Count)) { Pullenti.Ner.MorphCollection m = li[i1 + 1].Morph; if (m.Number == Pullenti.Morph.MorphNumber.Plural) { probAdj = "СТАРЫЕ"; } else if (m.Gender == Pullenti.Morph.MorphGender.Feminie) { probAdj = "СТАРАЯ"; } else if (m.Gender == Pullenti.Morph.MorphGender.Masculine) { probAdj = "СТАРЫЙ"; } else { probAdj = "СТАРОЕ"; } } i1++; } string name = li[i1].Value ?? ((li[i1].OntoItem == null ? null : li[i1].OntoItem.CanonicText)); string altName = li[i1].AltValue; if (name == null) { return(null); } Pullenti.Ner.MorphCollection mc = li[0].Morph; if (i1 == 1 && li[i1].Typ == CityItemToken.ItemType.City && ((li[0].Value == "ГОРОД" || li[0].Value == "МІСТО" || li[0].Typ == CityItemToken.ItemType.Misc))) { if (typ == null && ((i1 + 1) < li.Count) && li[i1 + 1].Typ == CityItemToken.ItemType.Noun) { return(null); } oi = li[i1].OntoItem; if (oi != null) { name = oi.CanonicText; } if (name.Length > 2 || oi.MiscAttr != null) { if (!li[1].Doubtful || ((oi != null && oi.MiscAttr != null))) { ok = true; } else if (!ok && !li[1].IsNewlineBefore) { if (li[0].GeoObjectBefore || li[1].GeoObjectAfter) { ok = true; } else if (Pullenti.Ner.Address.Internal.StreetDefineHelper.CheckStreetAfter(li[1].EndToken.Next)) { ok = true; } else if (li[1].EndToken.Next != null && (li[1].EndToken.Next.GetReferent() is Pullenti.Ner.Date.DateReferent)) { ok = true; } else if ((li[1].WhitespacesBeforeCount < 2) && li[1].OntoItem != null) { if (li[1].IsNewlineAfter) { ok = true; } else { ok = true; } } } if (li[1].Doubtful && li[1].EndToken.Next != null && li[1].EndToken.Chars == li[1].EndToken.Next.Chars) { ok = false; } if (li[0].BeginToken.Previous != null && li[0].BeginToken.Previous.IsValue("В", null)) { ok = true; } } if (!ok) { ok = CheckYearAfter(li[1].EndToken.Next); } if (!ok) { ok = CheckCityAfter(li[1].EndToken.Next); } } else if ((li[i1].Typ == CityItemToken.ItemType.ProperName || li[i1].Typ == CityItemToken.ItemType.City)) { if (((li[0].Value == "АДМИНИСТРАЦИЯ" || li[0].Value == "АДМІНІСТРАЦІЯ")) && i1 == 1) { return(null); } if (li[i1].IsNewlineBefore) { if (li.Count != 2) { return(null); } } if (!li[0].Doubtful) { ok = true; if (name.Length < 2) { ok = false; } else if ((name.Length < 3) && li[0].Morph.Number != Pullenti.Morph.MorphNumber.Singular) { ok = false; } if (li[i1].Doubtful && !li[i1].GeoObjectAfter && !li[0].GeoObjectBefore) { if (li[i1].Morph.Case.IsGenitive) { if (li[i1].EndToken.Next == null || MiscLocationHelper.CheckGeoObjectAfter(li[i1].EndToken.Next, false) || Pullenti.Ner.Address.Internal.AddressItemToken.CheckHouseAfter(li[i1].EndToken.Next, false, true)) { } else if (li[0].BeginToken.Previous == null || MiscLocationHelper.CheckGeoObjectBefore(li[0].BeginToken)) { } else { ok = false; } } if (ok) { Pullenti.Ner.ReferentToken rt0 = li[i1].Kit.ProcessReferent("PERSONPROPERTY", li[0].BeginToken.Previous); if (rt0 != null) { Pullenti.Ner.ReferentToken rt1 = li[i1].Kit.ProcessReferent("PERSON", li[i1].BeginToken); if (rt1 != null) { ok = false; } } } } Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(li[i1].BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { if (npt.EndToken.EndChar > li[i1].EndChar && npt.Adjectives.Count > 0 && !npt.Adjectives[0].EndToken.Next.IsComma) { ok = false; } else if (TerrItemToken.m_UnknownRegions.TryParse(npt.EndToken, Pullenti.Ner.Core.TerminParseAttr.FullwordsOnly) != null) { bool ok1 = false; if (li[0].BeginToken.Previous != null) { Pullenti.Ner.Token ttt = li[0].BeginToken.Previous; if (ttt.IsComma && ttt.Previous != null) { ttt = ttt.Previous; } Pullenti.Ner.Geo.GeoReferent geo = ttt.GetReferent() as Pullenti.Ner.Geo.GeoReferent; if (geo != null && !geo.IsCity) { ok1 = true; } } if (npt.EndToken.Next != null) { Pullenti.Ner.Token ttt = npt.EndToken.Next; if (ttt.IsComma && ttt.Next != null) { ttt = ttt.Next; } Pullenti.Ner.Geo.GeoReferent geo = ttt.GetReferent() as Pullenti.Ner.Geo.GeoReferent; if (geo != null && !geo.IsCity) { ok1 = true; } } if (!ok1) { return(null); } } } if (li[0].Value == "ПОРТ") { if (li[i1].Chars.IsAllUpper || li[i1].Chars.IsLatinLetter) { return(null); } } } else if (li[0].GeoObjectBefore) { ok = true; } else if (li[i1].GeoObjectAfter && !li[i1].IsNewlineAfter) { ok = true; } else { ok = CheckYearAfter(li[i1].EndToken.Next); } if (!ok) { ok = CheckStreetAfter(li[i1].EndToken.Next); } if (!ok && li[0].BeginToken.Previous != null && li[0].BeginToken.Previous.IsValue("В", null)) { ok = true; } } else { return(null); } if (!ok && !always) { if (MiscLocationHelper.CheckNearBefore(li[0].BeginToken.Previous) == null) { return(null); } } if (li.Count > (i1 + 1)) { li.RemoveRange(i1 + 1, li.Count - i1 - 1); } Pullenti.Ner.Geo.GeoReferent city = new Pullenti.Ner.Geo.GeoReferent(); if (oi != null && oi.Referent != null) { city = oi.Referent.Clone() as Pullenti.Ner.Geo.GeoReferent; city.Occurrence.Clear(); } if (!li[0].Morph.Case.IsUndefined && li[0].Morph.Gender != Pullenti.Morph.MorphGender.Undefined) { if (li[i1].EndToken.Morph.Class.IsAdjective && li[i1].BeginToken == li[i1].EndToken) { string nam = Pullenti.Ner.Core.ProperNameHelper.GetNameEx(li[i1].BeginToken, li[i1].EndToken, Pullenti.Morph.MorphClass.Adjective, li[0].Morph.Case, li[0].Morph.Gender, false, false); if (nam != null && nam != name) { name = nam; } } } if (li[0].Morph.Case.IsNominative) { if (altName != null) { city.AddName(altName); } altName = null; } city.AddName(name); if (probAdj != null) { city.AddName(probAdj + " " + name); } if (altName != null) { city.AddName(altName); if (probAdj != null) { city.AddName(probAdj + " " + altName); } } if (typ != null) { city.AddTyp(typ); } else if (!city.IsCity) { city.AddTypCity(li[0].Kit.BaseLanguage); } if (typ2 != null) { city.AddTyp(typ2.ToLower()); } if (li[0].HigherGeo != null && GeoOwnerHelper.CanBeHigher(li[0].HigherGeo, city)) { city.Higher = li[0].HigherGeo; } if (li[0].Typ == CityItemToken.ItemType.Misc) { li.RemoveAt(0); } Pullenti.Ner.ReferentToken res = new Pullenti.Ner.ReferentToken(city, li[0].BeginToken, li[li.Count - 1].EndToken) { Morph = mc }; if (res.EndToken.Next != null && res.EndToken.Next.IsHiphen && (res.EndToken.Next.Next is Pullenti.Ner.NumberToken)) { Pullenti.Ner.NumberToken num = res.EndToken.Next.Next as Pullenti.Ner.NumberToken; if ((num.Typ == Pullenti.Ner.NumberSpellingType.Digit && !num.Morph.Class.IsAdjective && num.IntValue != null) && (num.IntValue.Value < 50)) { foreach (Pullenti.Ner.Slot s in city.Slots) { if (s.TypeName == Pullenti.Ner.Geo.GeoReferent.ATTR_NAME) { city.UploadSlot(s, string.Format("{0}-{1}", s.Value, num.Value)); } } res.EndToken = num; } } if (li[0].BeginToken == li[0].EndToken && li[0].BeginToken.IsValue("ГОРОДОК", null)) { if (Pullenti.Ner.Address.Internal.AddressItemToken.CheckHouseAfter(res.EndToken.Next, true, false)) { return(null); } } return(res); }
public static Pullenti.Semantic.SemObject CreateNounGroup(Pullenti.Semantic.SemGraph gr, Pullenti.Ner.Core.NounPhraseToken npt) { Pullenti.Ner.Token noun = npt.Noun.BeginToken; Pullenti.Semantic.SemObject sem = new Pullenti.Semantic.SemObject(gr); sem.Tokens.Add(npt.Noun); sem.Typ = Pullenti.Semantic.SemObjectType.Noun; if (npt.Noun.Morph.Class.IsPersonalPronoun) { sem.Typ = Pullenti.Semantic.SemObjectType.PersonalPronoun; } else if (npt.Noun.Morph.Class.IsPronoun) { sem.Typ = Pullenti.Semantic.SemObjectType.Pronoun; } if (npt.Noun.BeginToken != npt.Noun.EndToken) { sem.Morph.NormalCase = npt.Noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false); sem.Morph.NormalFull = npt.Noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false); sem.Morph.Class = Pullenti.Morph.MorphClass.Noun; sem.Morph.Number = npt.Morph.Number; sem.Morph.Gender = npt.Morph.Gender; sem.Morph.Case = npt.Morph.Case; } else if (noun is Pullenti.Ner.TextToken) { foreach (Pullenti.Morph.MorphBaseInfo wf in noun.Morph.Items) { if (wf.CheckAccord(npt.Morph, false, false) && (wf is Pullenti.Morph.MorphWordForm)) { _setMorph(sem, wf as Pullenti.Morph.MorphWordForm); break; } } if (sem.Morph.NormalCase == null) { sem.Morph.NormalCase = noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false); sem.Morph.NormalFull = noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false); } List <Pullenti.Semantic.Utils.DerivateGroup> grs = Pullenti.Semantic.Utils.DerivateService.FindDerivates(sem.Morph.NormalFull, true, null); if (grs != null && grs.Count > 0) { sem.Concept = grs[0]; } } else if (noun is Pullenti.Ner.ReferentToken) { Pullenti.Ner.Referent r = (noun as Pullenti.Ner.ReferentToken).Referent; if (r == null) { return(null); } sem.Morph.NormalFull = (sem.Morph.NormalCase = r.ToString()); sem.Concept = r; } else if (noun is Pullenti.Ner.NumberToken) { Pullenti.Ner.NumberToken num = noun as Pullenti.Ner.NumberToken; sem.Morph.Gender = noun.Morph.Gender; sem.Morph.Number = noun.Morph.Number; if (num.IntValue != null) { sem.Morph.NormalCase = Pullenti.Ner.Core.NumberHelper.GetNumberAdjective(num.IntValue.Value, noun.Morph.Gender, noun.Morph.Number); sem.Morph.NormalFull = Pullenti.Ner.Core.NumberHelper.GetNumberAdjective(num.IntValue.Value, Pullenti.Morph.MorphGender.Masculine, Pullenti.Morph.MorphNumber.Singular); } else { sem.Morph.NormalFull = (sem.Morph.NormalCase = noun.GetSourceText().ToUpper()); } } noun.Tag = sem; if (npt.Adjectives.Count > 0) { foreach (Pullenti.Ner.MetaToken a in npt.Adjectives) { if (npt.MultiNouns && a != npt.Adjectives[0]) { break; } Pullenti.Semantic.SemObject asem = CreateNptAdj(gr, npt, a); if (asem != null) { gr.AddLink(Pullenti.Semantic.SemLinkType.Detail, sem, asem, "какой", false, null); } } } if (npt.InternalNoun != null) { Pullenti.Semantic.SemObject intsem = CreateNounGroup(gr, npt.InternalNoun); if (intsem != null) { gr.AddLink(Pullenti.Semantic.SemLinkType.Detail, sem, intsem, null, false, null); } } gr.Objects.Add(sem); return(sem); }
public AnalysisKit(Pullenti.Ner.SourceOfAnalysis sofa = null, bool onlyTokenizing = false, Pullenti.Morph.MorphLang lang = null, ProgressChangedEventHandler progress = null) { if (sofa == null) { return; } m_Sofa = sofa; StartDate = DateTime.Now; List <Pullenti.Morph.MorphToken> tokens = Pullenti.Morph.MorphologyService.Process(sofa.Text, lang, progress); Pullenti.Ner.Token t0 = null; if (tokens != null) { for (int ii = 0; ii < tokens.Count; ii++) { Pullenti.Morph.MorphToken mt = tokens[ii]; if (mt.BeginChar == 733860) { } Pullenti.Ner.TextToken tt = new Pullenti.Ner.TextToken(mt, this); if (sofa.CorrectionDict != null) { string corw; if (sofa.CorrectionDict.TryGetValue(mt.Term, out corw)) { List <Pullenti.Morph.MorphToken> ccc = Pullenti.Morph.MorphologyService.Process(corw, lang, null); if (ccc != null && ccc.Count == 1) { Pullenti.Ner.TextToken tt1 = new Pullenti.Ner.TextToken(ccc[0], this, tt.BeginChar, tt.EndChar) { Term0 = tt.Term }; tt1.Chars = tt.Chars; tt = tt1; if (CorrectedTokens == null) { CorrectedTokens = new Dictionary <Pullenti.Ner.Token, string>(); } CorrectedTokens.Add(tt, tt.GetSourceText()); } } } if (t0 == null) { FirstToken = tt; } else { t0.Next = tt; } t0 = tt; } } if (sofa.ClearDust) { this.ClearDust(); } if (sofa.DoWordsMergingByMorph) { this.CorrectWordsByMerging(lang); } if (sofa.DoWordCorrectionByMorph) { this.CorrectWordsByMorph(lang); } this.MergeLetters(); this.DefineBaseLanguage(); if (sofa.CreateNumberTokens) { for (Pullenti.Ner.Token t = FirstToken; t != null; t = t.Next) { Pullenti.Ner.NumberToken nt = NumberHelper.TryParseNumber(t); if (nt == null) { continue; } this.EmbedToken(nt); t = nt; } } if (onlyTokenizing) { return; } for (Pullenti.Ner.Token t = FirstToken; t != null; t = t.Next) { if (t.Morph.Class.IsPreposition) { continue; } Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary(); if (mc.IsUndefined && t.Chars.IsCyrillicLetter && t.LengthChar > 4) { string tail = sofa.Text.Substring(t.EndChar - 1, 2); Pullenti.Ner.Token tte = null; Pullenti.Ner.Token tt = t.Previous; if (tt != null && ((tt.IsCommaAnd || tt.Morph.Class.IsPreposition || tt.Morph.Class.IsConjunction))) { tt = tt.Previous; } if ((tt != null && !tt.GetMorphClassInDictionary().IsUndefined&& ((tt.Morph.Class.Value & t.Morph.Class.Value)) != 0) && tt.LengthChar > 4) { string tail2 = sofa.Text.Substring(tt.EndChar - 1, 2); if (tail2 == tail) { tte = tt; } } if (tte == null) { tt = t.Next; if (tt != null && ((tt.IsCommaAnd || tt.Morph.Class.IsPreposition || tt.Morph.Class.IsConjunction))) { tt = tt.Next; } if ((tt != null && !tt.GetMorphClassInDictionary().IsUndefined&& ((tt.Morph.Class.Value & t.Morph.Class.Value)) != 0) && tt.LengthChar > 4) { string tail2 = sofa.Text.Substring(tt.EndChar - 1, 2); if (tail2 == tail) { tte = tt; } } } if (tte != null) { t.Morph.RemoveItemsEx(tte.Morph, tte.GetMorphClassInDictionary()); } } continue; } this.CreateStatistics(); }
public static DefinitionWithNumericToken TryParse(Pullenti.Ner.Token t) { if (!Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t)) { return(null); } Pullenti.Ner.Token tt = t; Pullenti.Ner.Core.NounPhraseToken noun = null; Pullenti.Ner.NumberToken num = null; for (; tt != null; tt = tt.Next) { if (tt != t && Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(tt)) { return(null); } if (!(tt is Pullenti.Ner.NumberToken)) { continue; } if (tt.WhitespacesAfterCount > 2 || tt == t) { continue; } if (tt.Morph.Class.IsAdjective) { continue; } Pullenti.Ner.Core.NounPhraseToken nn = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (nn == null) { continue; } num = tt as Pullenti.Ner.NumberToken; noun = nn; break; } if (num == null || num.IntValue == null) { return(null); } DefinitionWithNumericToken res = new DefinitionWithNumericToken(t, noun.EndToken); res.Number = num.IntValue.Value; res.NumberBeginChar = num.BeginChar; res.NumberEndChar = num.EndChar; res.Noun = noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false); res.NounsGenetive = noun.GetMorphVariant(Pullenti.Morph.MorphCase.Genitive, true) ?? res.Noun; res.Text = Pullenti.Ner.Core.MiscHelper.GetTextValue(t, num.Previous, Pullenti.Ner.Core.GetTextAttr.KeepQuotes | Pullenti.Ner.Core.GetTextAttr.KeepRegister); if (num.IsWhitespaceBefore) { res.Text += " "; } res.NumberSubstring = Pullenti.Ner.Core.MiscHelper.GetTextValue(num, noun.EndToken, Pullenti.Ner.Core.GetTextAttr.KeepQuotes | Pullenti.Ner.Core.GetTextAttr.KeepRegister); res.Text += res.NumberSubstring; for (tt = noun.EndToken; tt != null; tt = tt.Next) { if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(tt)) { break; } res.EndToken = tt; } if (res.EndToken != noun.EndToken) { if (noun.IsWhitespaceAfter) { res.Text += " "; } res.Text += Pullenti.Ner.Core.MiscHelper.GetTextValue(noun.EndToken.Next, res.EndToken, Pullenti.Ner.Core.GetTextAttr.KeepQuotes | Pullenti.Ner.Core.GetTextAttr.KeepRegister); } return(res); }
public static Pullenti.Ner.Core.NumberExToken TryParseNumberWithPostfix(Pullenti.Ner.Token t) { if (t == null) { return(null); } Pullenti.Ner.Token t0 = t; string isDollar = null; if (t.LengthChar == 1 && t.Next != null) { if ((((isDollar = Pullenti.Ner.Core.NumberHelper.IsMoneyChar(t)))) != null) { t = t.Next; } } Pullenti.Ner.NumberToken nt = t as Pullenti.Ner.NumberToken; if (nt == null) { if ((!(t.Previous is Pullenti.Ner.NumberToken) && t.IsChar('(') && (t.Next is Pullenti.Ner.NumberToken)) && t.Next.Next != null && t.Next.Next.IsChar(')')) { Pullenti.Ner.Core.TerminToken toks1 = m_Postfixes.TryParse(t.Next.Next.Next, Pullenti.Ner.Core.TerminParseAttr.No); if (toks1 != null && ((Pullenti.Ner.Core.NumberExType)toks1.Termin.Tag) == Pullenti.Ner.Core.NumberExType.Money) { Pullenti.Ner.NumberToken nt0 = t.Next as Pullenti.Ner.NumberToken; Pullenti.Ner.Core.NumberExToken res = new Pullenti.Ner.Core.NumberExToken(t, toks1.EndToken, nt0.Value, nt0.Typ, Pullenti.Ner.Core.NumberExType.Money) { AltRealValue = nt0.RealValue, Morph = toks1.BeginToken.Morph }; return(_correctMoney(res, toks1.BeginToken)); } } Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; if (tt == null || !tt.Morph.Class.IsAdjective) { return(null); } string val = tt.Term; for (int i = 4; i < (val.Length - 5); i++) { string v = val.Substring(0, i); List <Pullenti.Ner.Core.Termin> li = Pullenti.Ner.Core.NumberHelper.m_Nums.FindTerminsByString(v, tt.Morph.Language); if (li == null) { continue; } string vv = val.Substring(i); List <Pullenti.Ner.Core.Termin> lii = m_Postfixes.FindTerminsByString(vv, tt.Morph.Language); if (lii != null && lii.Count > 0) { Pullenti.Ner.Core.NumberExToken re = new Pullenti.Ner.Core.NumberExToken(t, t, ((int)li[0].Tag).ToString(), Pullenti.Ner.NumberSpellingType.Words, (Pullenti.Ner.Core.NumberExType)lii[0].Tag) { Morph = t.Morph }; _correctExtTypes(re); return(re); } break; } return(null); } if (t.Next == null && isDollar == null) { return(null); } double f = nt.RealValue; if (double.IsNaN(f)) { return(null); } Pullenti.Ner.Token t1 = nt.Next; if (((t1 != null && t1.IsCharOf(",."))) || (((t1 is Pullenti.Ner.NumberToken) && (t1.WhitespacesBeforeCount < 3)))) { double d; Pullenti.Ner.NumberToken tt11 = Pullenti.Ner.Core.NumberHelper.TryParseRealNumber(nt, false, false); if (tt11 != null) { t1 = tt11.EndToken.Next; f = tt11.RealValue; } } if (t1 == null) { if (isDollar == null) { return(null); } } else if ((t1.Next != null && t1.Next.IsValue("С", "З") && t1.Next.Next != null) && t1.Next.Next.IsValue("ПОЛОВИНА", null)) { f += 0.5; t1 = t1.Next.Next; } if (t1 != null && t1.IsHiphen && t1.Next != null) { t1 = t1.Next; } bool det = false; double altf = f; if (((t1 is Pullenti.Ner.NumberToken) && t1.Previous != null && t1.Previous.IsHiphen) && (t1 as Pullenti.Ner.NumberToken).IntValue == 0 && t1.LengthChar == 2) { t1 = t1.Next; } if ((t1 != null && t1.Next != null && t1.IsChar('(')) && (((t1.Next is Pullenti.Ner.NumberToken) || t1.Next.IsValue("НОЛЬ", null))) && t1.Next.Next != null) { Pullenti.Ner.NumberToken nt1 = t1.Next as Pullenti.Ner.NumberToken; double val = (double)0; if (nt1 != null) { val = nt1.RealValue; } if (Math.Floor(f) == Math.Floor(val)) { Pullenti.Ner.Token ttt = t1.Next.Next; if (ttt.IsChar(')')) { t1 = ttt.Next; det = true; if ((t1 is Pullenti.Ner.NumberToken) && (t1 as Pullenti.Ner.NumberToken).IntValue != null && (t1 as Pullenti.Ner.NumberToken).IntValue.Value == 0) { t1 = t1.Next; } } else if (((((ttt is Pullenti.Ner.NumberToken) && ((ttt as Pullenti.Ner.NumberToken).RealValue < 100) && ttt.Next != null) && ttt.Next.IsChar('/') && ttt.Next.Next != null) && ttt.Next.Next.GetSourceText() == "100" && ttt.Next.Next.Next != null) && ttt.Next.Next.Next.IsChar(')')) { int rest = GetDecimalRest100(f); if ((ttt as Pullenti.Ner.NumberToken).IntValue != null && rest == (ttt as Pullenti.Ner.NumberToken).IntValue.Value) { t1 = ttt.Next.Next.Next.Next; det = true; } } else if ((ttt.IsValue("ЦЕЛЫХ", null) && (ttt.Next is Pullenti.Ner.NumberToken) && ttt.Next.Next != null) && ttt.Next.Next.Next != null && ttt.Next.Next.Next.IsChar(')')) { Pullenti.Ner.NumberToken num2 = ttt.Next as Pullenti.Ner.NumberToken; altf = num2.RealValue; if (ttt.Next.Next.IsValue("ДЕСЯТЫЙ", null)) { altf /= 10; } else if (ttt.Next.Next.IsValue("СОТЫЙ", null)) { altf /= 100; } else if (ttt.Next.Next.IsValue("ТЫСЯЧНЫЙ", null)) { altf /= 1000; } else if (ttt.Next.Next.IsValue("ДЕСЯТИТЫСЯЧНЫЙ", null)) { altf /= 10000; } else if (ttt.Next.Next.IsValue("СТОТЫСЯЧНЫЙ", null)) { altf /= 100000; } else if (ttt.Next.Next.IsValue("МИЛЛИОННЫЙ", null)) { altf /= 1000000; } if (altf < 1) { altf += val; t1 = ttt.Next.Next.Next.Next; det = true; } } else { Pullenti.Ner.Core.TerminToken toks1 = m_Postfixes.TryParse(ttt, Pullenti.Ner.Core.TerminParseAttr.No); if (toks1 != null) { if (((Pullenti.Ner.Core.NumberExType)toks1.Termin.Tag) == Pullenti.Ner.Core.NumberExType.Money) { if (toks1.EndToken.Next != null && toks1.EndToken.Next.IsChar(')')) { Pullenti.Ner.Core.NumberExToken res = new Pullenti.Ner.Core.NumberExToken(t, toks1.EndToken.Next, nt.Value, nt.Typ, Pullenti.Ner.Core.NumberExType.Money) { RealValue = f, AltRealValue = altf, Morph = toks1.BeginToken.Morph }; return(_correctMoney(res, toks1.BeginToken)); } } } Pullenti.Ner.Core.NumberExToken res2 = TryParseNumberWithPostfix(t1.Next); if (res2 != null && res2.EndToken.Next != null && res2.EndToken.Next.IsChar(')')) { res2.BeginToken = t; res2.EndToken = res2.EndToken.Next; res2.AltRealValue = res2.RealValue; res2.RealValue = f; _correctExtTypes(res2); if (res2.WhitespacesAfterCount < 2) { Pullenti.Ner.Core.TerminToken toks2 = m_Postfixes.TryParse(res2.EndToken.Next, Pullenti.Ner.Core.TerminParseAttr.No); if (toks2 != null) { if (((Pullenti.Ner.Core.NumberExType)toks2.Termin.Tag) == Pullenti.Ner.Core.NumberExType.Money) { res2.EndToken = toks2.EndToken; } } } return(res2); } } } else if (nt1 != null && nt1.Typ == Pullenti.Ner.NumberSpellingType.Words && nt.Typ == Pullenti.Ner.NumberSpellingType.Digit) { altf = nt1.RealValue; Pullenti.Ner.Token ttt = t1.Next.Next; if (ttt.IsChar(')')) { t1 = ttt.Next; det = true; } if (!det) { altf = f; } } } if ((t1 != null && t1.IsChar('(') && t1.Next != null) && t1.Next.IsValue("СУММА", null)) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t1, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { t1 = br.EndToken.Next; } } if (isDollar != null) { Pullenti.Ner.Token te = null; if (t1 != null) { te = t1.Previous; } else { for (t1 = t0; t1 != null; t1 = t1.Next) { if (t1.Next == null) { te = t1; } } } if (te == null) { return(null); } if (te.IsHiphen && te.Next != null) { if (te.Next.IsValue("МИЛЛИОННЫЙ", null)) { f *= 1000000; altf *= 1000000; te = te.Next; } else if (te.Next.IsValue("МИЛЛИАРДНЫЙ", null)) { f *= 1000000000; altf *= 1000000000; te = te.Next; } } if (!te.IsWhitespaceAfter && (te.Next is Pullenti.Ner.TextToken)) { if (te.Next.IsValue("M", null)) { f *= 1000000; altf *= 1000000; te = te.Next; } else if (te.Next.IsValue("BN", null)) { f *= 1000000000; altf *= 1000000000; te = te.Next; } } return(new Pullenti.Ner.Core.NumberExToken(t0, te, "", nt.Typ, Pullenti.Ner.Core.NumberExType.Money) { RealValue = f, AltRealValue = altf, ExTypParam = isDollar }); } if (t1 == null || ((t1.IsNewlineBefore && !det))) { return(null); } Pullenti.Ner.Core.TerminToken toks = m_Postfixes.TryParse(t1, Pullenti.Ner.Core.TerminParseAttr.No); if ((toks == null && det && (t1 is Pullenti.Ner.NumberToken)) && (t1 as Pullenti.Ner.NumberToken).Value == "0") { toks = m_Postfixes.TryParse(t1.Next, Pullenti.Ner.Core.TerminParseAttr.No); } if (toks == null && t1.IsChar('р')) { int cou = 10; for (Pullenti.Ner.Token ttt = t0.Previous; ttt != null && cou > 0; ttt = ttt.Previous, cou--) { if (ttt.IsValue("СУММА", null) || ttt.IsValue("НАЛИЧНЫЙ", null) || ttt.IsValue("БАЛАНС", null)) { } else if (ttt.GetReferent() != null && ttt.GetReferent().TypeName == "MONEY") { } else { continue; } toks = new Pullenti.Ner.Core.TerminToken(t1, t1) { Termin = m_Postfixes.FindTerminsByCanonicText("RUB")[0] }; if (t1.Next != null && t1.Next.IsChar('.')) { toks.EndToken = t1.Next; } Pullenti.Ner.Core.NumberExType ty = (Pullenti.Ner.Core.NumberExType)toks.Termin.Tag; return(new Pullenti.Ner.Core.NumberExToken(t, toks.EndToken, nt.Value, nt.Typ, ty) { RealValue = f, AltRealValue = altf, Morph = toks.BeginToken.Morph, ExTypParam = "RUB" }); } } if (toks != null) { t1 = toks.EndToken; if (!t1.IsChar('.') && t1.Next != null && t1.Next.IsChar('.')) { if ((t1 is Pullenti.Ner.TextToken) && t1.IsValue(toks.Termin.Terms[0].CanonicalText, null)) { } else if (!t1.Chars.IsLetter) { } else { t1 = t1.Next; } } if (toks.Termin.CanonicText == "LTL") { return(null); } if (toks.BeginToken == t1) { if (t1.Morph.Class.IsPreposition || t1.Morph.Class.IsConjunction) { if (t1.IsWhitespaceBefore && t1.IsWhitespaceAfter) { return(null); } } } Pullenti.Ner.Core.NumberExType ty = (Pullenti.Ner.Core.NumberExType)toks.Termin.Tag; Pullenti.Ner.Core.NumberExToken res = new Pullenti.Ner.Core.NumberExToken(t, t1, nt.Value, nt.Typ, ty) { RealValue = f, AltRealValue = altf, Morph = toks.BeginToken.Morph }; if (ty != Pullenti.Ner.Core.NumberExType.Money) { _correctExtTypes(res); return(res); } return(_correctMoney(res, toks.BeginToken)); } Pullenti.Ner.Core.NumberExToken pfx = _attachSpecPostfix(t1); if (pfx != null) { pfx.BeginToken = t; pfx.Value = nt.Value; pfx.Typ = nt.Typ; pfx.RealValue = f; pfx.AltRealValue = altf; return(pfx); } if (t1.Next != null && ((t1.Morph.Class.IsPreposition || t1.Morph.Class.IsConjunction))) { if (t1.IsValue("НА", null)) { } else { Pullenti.Ner.Core.NumberExToken nn = TryParseNumberWithPostfix(t1.Next); if (nn != null) { return new Pullenti.Ner.Core.NumberExToken(t, t, nt.Value, nt.Typ, nn.ExTyp) { RealValue = f, AltRealValue = altf, ExTyp2 = nn.ExTyp2, ExTypParam = nn.ExTypParam } } ; } } if (!t1.IsWhitespaceAfter && (t1.Next is Pullenti.Ner.NumberToken) && (t1 is Pullenti.Ner.TextToken)) { string term = (t1 as Pullenti.Ner.TextToken).Term; Pullenti.Ner.Core.NumberExType ty = Pullenti.Ner.Core.NumberExType.Undefined; if (term == "СМХ" || term == "CMX") { ty = Pullenti.Ner.Core.NumberExType.Santimeter; } else if (term == "MX" || term == "МХ") { ty = Pullenti.Ner.Core.NumberExType.Meter; } else if (term == "MMX" || term == "ММХ") { ty = Pullenti.Ner.Core.NumberExType.Millimeter; } if (ty != Pullenti.Ner.Core.NumberExType.Undefined) { return new Pullenti.Ner.Core.NumberExToken(t, t1, nt.Value, nt.Typ, ty) { RealValue = f, AltRealValue = altf, MultAfter = true } } ; } return(null); }
static Pullenti.Ner.Token CorrectTailAttributes(Pullenti.Ner.Person.PersonReferent p, Pullenti.Ner.Token t0) { Pullenti.Ner.Token res = t0; Pullenti.Ner.Token t = t0; if (t != null && t.IsChar(',')) { t = t.Next; } bool born = false; bool die = false; if (t != null && ((t.IsValue("РОДИТЬСЯ", "НАРОДИТИСЯ") || t.IsValue("BORN", null)))) { t = t.Next; born = true; } else if (t != null && ((t.IsValue("УМЕРЕТЬ", "ПОМЕРТИ") || t.IsValue("СКОНЧАТЬСЯ", null) || t.IsValue("DIED", null)))) { t = t.Next; die = true; } else if ((t != null && t.IsValue("ДАТА", null) && t.Next != null) && t.Next.IsValue("РОЖДЕНИЕ", "НАРОДЖЕННЯ")) { t = t.Next.Next; born = true; } while (t != null) { if (t.Morph.Class.IsPreposition || t.IsHiphen || t.IsChar(':')) { t = t.Next; } else { break; } } if (t != null && t.GetReferent() != null) { Pullenti.Ner.Referent r = t.GetReferent(); if (r.TypeName == "DATE") { Pullenti.Ner.Token t1 = t; if (t.Next != null && ((t.Next.IsValue("Р", null) || t.Next.IsValue("РОЖДЕНИЕ", "НАРОДЖЕННЯ")))) { born = true; t1 = t.Next; if (t1.Next != null && t1.Next.IsChar('.')) { t1 = t1.Next; } } if (born) { if (p != null) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_BORN, r, false, 0); } res = t1; t = t1; } else if (die) { if (p != null) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_DIE, r, false, 0); } res = t1; t = t1; } } } if (die && t != null) { Pullenti.Ner.NumberToken ag = Pullenti.Ner.Core.NumberHelper.TryParseAge(t.Next); if (ag != null) { if (p != null) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_AGE, ag.Value.ToString(), false, 0); } t = ag.EndToken.Next; res = ag.EndToken; } } if (t == null) { return(res); } if (t.IsChar('(')) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { Pullenti.Ner.Token t1 = t.Next; born = false; if (t1.IsValue("РОД", null)) { born = true; t1 = t1.Next; if (t1 != null && t1.IsChar('.')) { t1 = t1.Next; } } if (t1 is Pullenti.Ner.ReferentToken) { Pullenti.Ner.Referent r = t1.GetReferent(); if (r.TypeName == "DATERANGE" && t1.Next == br.EndToken) { Pullenti.Ner.Referent bd = r.GetSlotValue("FROM") as Pullenti.Ner.Referent; Pullenti.Ner.Referent to = r.GetSlotValue("TO") as Pullenti.Ner.Referent; if (bd != null && to != null) { if (p != null) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_BORN, bd, false, 0); p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_DIE, to, false, 0); } t = (res = br.EndToken); } } else if (r.TypeName == "DATE" && t1.Next == br.EndToken) { if (p != null) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_BORN, r, false, 0); } t = (res = br.EndToken); } } } } return(res); }
public static UriItemToken AttachISBN(Pullenti.Ner.Token t0) { StringBuilder txt = new StringBuilder(); Pullenti.Ner.Token t1 = t0; int digs = 0; for (Pullenti.Ner.Token t = t0; t != null; t = t.Next) { if (t.IsTableControlChar) { break; } if (t.IsNewlineBefore && t != t0) { if (t.Previous != null && t.Previous.IsHiphen) { } else { break; } } if (t is Pullenti.Ner.NumberToken) { Pullenti.Ner.NumberToken nt = t as Pullenti.Ner.NumberToken; if (nt.Typ != Pullenti.Ner.NumberSpellingType.Digit || !nt.Morph.Class.IsUndefined) { break; } string d = nt.GetSourceText(); txt.Append(d); digs += d.Length; t1 = t; if (digs > 13) { break; } continue; } Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; if (tt == null) { break; } string s = tt.Term; if (s != "-" && s != "Х" && s != "X") { break; } if (s == "Х") { s = "X"; } txt.Append(s); t1 = t; if (s != "-") { break; } } int i; int dig = 0; for (i = 0; i < txt.Length; i++) { if (char.IsDigit(txt[i])) { dig++; } } if (dig < 7) { return(null); } return(new UriItemToken(t0, t1) { Value = txt.ToString() }); }
static void _parseNumber(Pullenti.Ner.Token t, InstrToken1 res, InstrToken1 prev) { if (((t is Pullenti.Ner.NumberToken) && (t as Pullenti.Ner.NumberToken).IntValue != null && (t as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Digit) && ((t as Pullenti.Ner.NumberToken).IntValue.Value < 3000)) { if (res.Numbers.Count >= 4) { } if (t.Morph.Class.IsAdjective && res.TypContainerRank == 0) { return; } Pullenti.Ner.Core.NumberExToken nwp = Pullenti.Ner.Core.NumberHelper.TryParseNumberWithPostfix(t); if (nwp != null) { if (nwp.EndToken.IsWhitespaceBefore) { } else { return; } } if ((t.Next != null && (t.WhitespacesAfterCount < 3) && t.Next.Chars.IsLetter) && t.Next.Chars.IsAllLower) { if (!t.IsWhitespaceAfter && t.Next.LengthChar == 1) { } else if (res.Numbers.Count == 0) { res.NumTyp = NumberTypes.Digit; res.Numbers.Add((t as Pullenti.Ner.NumberToken).Value.ToString()); res.NumBeginToken = (res.NumEndToken = (res.EndToken = t)); return; } else { return; } } if (res.NumTyp == NumberTypes.Undefined) { res.NumTyp = NumberTypes.Digit; } else { res.NumTyp = NumberTypes.Combo; } if (res.Numbers.Count > 0 && t.IsWhitespaceBefore) { return; } if (res.Numbers.Count == 0) { res.NumBeginToken = t; } if ((t.Next != null && t.Next.IsHiphen && (t.Next.Next is Pullenti.Ner.NumberToken)) && (t.Next.Next as Pullenti.Ner.NumberToken).IntValue != null && (t.Next.Next as Pullenti.Ner.NumberToken).IntValue.Value > (t as Pullenti.Ner.NumberToken).IntValue.Value) { res.MinNumber = (t as Pullenti.Ner.NumberToken).Value.ToString(); t = t.Next.Next; } else if (((t.Next != null && t.Next.IsCharOf(")") && t.Next.Next != null) && t.Next.Next.IsHiphen && (t.Next.Next.Next is Pullenti.Ner.NumberToken)) && (t.Next.Next.Next as Pullenti.Ner.NumberToken).IntValue != null && (t.Next.Next.Next as Pullenti.Ner.NumberToken).IntValue.Value > (t as Pullenti.Ner.NumberToken).IntValue.Value) { res.MinNumber = (t as Pullenti.Ner.NumberToken).Value.ToString(); t = t.Next.Next.Next; } res.Numbers.Add((t as Pullenti.Ner.NumberToken).Value.ToString()); res.EndToken = (res.NumEndToken = t); res.NumSuffix = null; for (Pullenti.Ner.Token ttt = t.Next; ttt != null && (res.Numbers.Count < 4); ttt = ttt.Next) { bool ok1 = false; bool ok2 = false; if ((ttt.IsCharOf("._") && !ttt.IsWhitespaceAfter && (ttt.Next is Pullenti.Ner.NumberToken)) && (((ttt.Next as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Digit || (((ttt.Next as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Words) && ttt.Next.Chars.IsLatinLetter && !ttt.IsWhitespaceAfter)))) { ok1 = true; } else if ((ttt.IsCharOf("(<") && (ttt.Next is Pullenti.Ner.NumberToken) && ttt.Next.Next != null) && ttt.Next.Next.IsCharOf(")>")) { ok2 = true; } if (ok1 || ok2) { ttt = ttt.Next; res.Numbers.Add((ttt as Pullenti.Ner.NumberToken).Value.ToString()); res.NumTyp = (res.Numbers.Count == 2 ? NumberTypes.TwoDigits : (res.Numbers.Count == 3 ? NumberTypes.ThreeDigits : NumberTypes.FourDigits)); if ((ttt.Next != null && ttt.Next.IsCharOf(")>") && ttt.Next.Next != null) && ttt.Next.Next.IsChar('.')) { ttt = ttt.Next; } else if (ok2) { ttt = ttt.Next; } t = (res.EndToken = (res.NumEndToken = ttt)); continue; } if (((ttt is Pullenti.Ner.TextToken) && ttt.LengthChar == 1 && ttt.Chars.IsLetter) && !ttt.IsWhitespaceBefore && res.Numbers.Count == 1) { res.Numbers.Add((ttt as Pullenti.Ner.TextToken).Term); res.NumTyp = NumberTypes.Combo; t = (res.EndToken = (res.NumEndToken = ttt)); continue; } break; } if (t.Next != null && t.Next.IsCharOf(").")) { res.NumSuffix = t.Next.GetSourceText(); t = (res.EndToken = (res.NumEndToken = t.Next)); } return; } if (((t is Pullenti.Ner.NumberToken) && (t as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Words && res.TypContainerRank > 0) && res.Numbers.Count == 0) { res.Numbers.Add((t as Pullenti.Ner.NumberToken).Value.ToString()); res.NumTyp = NumberTypes.Digit; res.NumBeginToken = t; if (t.Next != null && t.Next.IsChar('.')) { t = t.Next; res.NumSuffix = "."; } res.EndToken = (res.NumEndToken = t); return; } Pullenti.Ner.NumberToken nt = Pullenti.Ner.Core.NumberHelper.TryParseRoman(t); if ((nt != null && nt.Value == "10" && t.Next != null) && t.Next.IsChar(')')) { nt = null; } if (nt != null && nt.Value == "100") { nt = null; } if (nt != null && nt.Typ == Pullenti.Ner.NumberSpellingType.Roman) { if (res.NumTyp == NumberTypes.Undefined) { res.NumTyp = NumberTypes.Roman; } else { res.NumTyp = NumberTypes.Combo; } if (res.Numbers.Count > 0 && t.IsWhitespaceBefore) { return; } if (res.Numbers.Count == 0) { res.NumBeginToken = t; } res.Numbers.Add(nt.Value.ToString()); t = (res.EndToken = (res.NumEndToken = nt.EndToken)); if (res.NumTyp == NumberTypes.Roman && ((res.Typ == InstrToken1.Types.Chapter || res.Typ == InstrToken1.Types.Section || res.Typ == InstrToken1.Types.Line))) { if ((t.Next != null && t.Next.IsCharOf("._<") && (t.Next.Next is Pullenti.Ner.NumberToken)) && (t.Next.Next as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Digit) { t = t.Next.Next; res.Numbers.Add((t as Pullenti.Ner.NumberToken).Value.ToString()); res.NumTyp = NumberTypes.TwoDigits; if (t.Next != null && t.Next.IsChar('>')) { t = t.Next; } res.EndToken = (res.NumEndToken = t); if ((t.Next != null && t.Next.IsCharOf("._<") && (t.Next.Next is Pullenti.Ner.NumberToken)) && (t.Next.Next as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Digit) { t = t.Next.Next; res.Numbers.Add((t as Pullenti.Ner.NumberToken).Value.ToString()); res.NumTyp = NumberTypes.ThreeDigits; if (t.Next != null && t.Next.IsChar('>')) { t = t.Next; } res.EndToken = (res.NumEndToken = t); } } } if (t.Next != null && t.Next.IsCharOf(").")) { res.NumSuffix = t.Next.GetSourceText(); t = (res.EndToken = (res.NumEndToken = t.Next)); } return; } if (((t is Pullenti.Ner.TextToken) && t.LengthChar == 1 && t.Chars.IsLetter) && t == res.BeginToken) { if ((!t.IsWhitespaceAfter && (t.Next is Pullenti.Ner.NumberToken) && t.Next.Next != null) && t.Next.Next.IsChar('.')) { res.NumBeginToken = t; res.NumTyp = NumberTypes.Digit; res.Numbers.Add((t.Next as Pullenti.Ner.NumberToken).Value.ToString()); res.NumSuffix = (t as Pullenti.Ner.TextToken).Term + "."; t = (res.EndToken = (res.NumEndToken = t.Next.Next)); return; } if (t.Next != null && t.Next.IsCharOf(".)")) { if (((t.Next.IsChar('.') && (t.Next.Next is Pullenti.Ner.NumberToken) && t.Next.Next.Next != null) && t.Next.Next.Next.IsChar(')') && !t.Next.IsWhitespaceAfter) && !t.Next.Next.IsWhitespaceAfter) { res.NumTyp = NumberTypes.TwoDigits; res.Numbers.Add((t as Pullenti.Ner.TextToken).Term); res.Numbers.Add((t.Next.Next as Pullenti.Ner.NumberToken).Value.ToString()); res.NumSuffix = ")"; res.NumBeginToken = t; t = (res.EndToken = (res.NumEndToken = t.Next.Next.Next)); return; } if (t.Next.IsChar('.') && ((t.Chars.IsAllUpper || (t.Next.Next is Pullenti.Ner.NumberToken)))) { } else { InstrToken1 tmp1 = new InstrToken1(t, t.Next); tmp1.Numbers.Add((t as Pullenti.Ner.TextToken).Term); if (tmp1.LastNumber > 1 && t.Next.IsCharOf(".") && ((prev == null || (prev.LastNumber + 1) != tmp1.LastNumber))) { } else { if (res.Numbers.Count == 0) { res.NumBeginToken = t; } res.NumTyp = NumberTypes.Letter; res.Numbers.Add((t as Pullenti.Ner.TextToken).Term); res.NumBeginToken = t; t = (res.EndToken = (res.NumEndToken = t.Next)); res.NumSuffix = t.GetSourceText(); return; } } } } }
public static UriItemToken AttachBBK(Pullenti.Ner.Token t0) { StringBuilder txt = new StringBuilder(); Pullenti.Ner.Token t1 = t0; int digs = 0; for (Pullenti.Ner.Token t = t0; t != null; t = t.Next) { if (t.IsNewlineBefore && t != t0) { break; } if (t.IsTableControlChar) { break; } if (t is Pullenti.Ner.NumberToken) { Pullenti.Ner.NumberToken nt = t as Pullenti.Ner.NumberToken; if (nt.Typ != Pullenti.Ner.NumberSpellingType.Digit || !nt.Morph.Class.IsUndefined) { break; } string d = nt.GetSourceText(); txt.Append(d); digs += d.Length; t1 = t; continue; } Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; if (tt == null) { break; } if (tt.IsChar(',')) { break; } if (tt.IsChar('(')) { if (!(tt.Next is Pullenti.Ner.NumberToken)) { break; } } string s = tt.GetSourceText(); if (char.IsLetter(s[0])) { if (tt.IsWhitespaceBefore) { break; } } txt.Append(s); t1 = t; } if ((txt.Length < 3) || (digs < 2)) { return(null); } if (txt[txt.Length - 1] == '.') { txt.Length--; t1 = t1.Previous; } return(new UriItemToken(t0, t1) { Value = txt.ToString() }); }
internal static NumbersWithUnitToken _tryParse(Pullenti.Ner.Token t, Pullenti.Ner.Core.TerminCollection addUnits, bool second, bool canOmitNumber, bool canBeNan) { if (t == null) { return(null); } while (t != null) { if (t.IsCommaAnd || t.IsValue("НО", null)) { t = t.Next; } else { break; } } Pullenti.Ner.Token t0 = t; bool about = false; bool hasKeyw = false; bool isDiapKeyw = false; int minMax = 0; Pullenti.Ner.Token ttt = _isMinOrMax(t, ref minMax); if (ttt != null) { t = ttt.Next; if (t == null) { return(null); } } if (t == null) { return(null); } if (t.IsChar('~') || t.IsValue("ОКОЛО", null) || t.IsValue("ПРИМЕРНО", null)) { t = t.Next; about = true; hasKeyw = true; if (t == null) { return(null); } } if (t.IsValue("В", null) && t.Next != null) { if (t.Next.IsValue("ПРЕДЕЛ", null) || t.IsValue("ДИАПАЗОН", null)) { t = t.Next.Next; if (t == null) { return(null); } isDiapKeyw = true; } } if (t0.IsChar('(')) { NumbersWithUnitToken mt0 = _tryParse(t.Next, addUnits, false, false, false); if (mt0 != null && mt0.EndToken.Next != null && mt0.EndToken.Next.IsChar(')')) { if (second) { if (mt0.FromVal != null && mt0.ToVal != null && mt0.FromVal.Value == (-mt0.ToVal.Value)) { } else { return(null); } } mt0.BeginToken = t0; mt0.EndToken = mt0.EndToken.Next; List <UnitToken> uu = UnitToken.TryParseList(mt0.EndToken.Next, addUnits, false); if (uu != null && mt0.Units.Count == 0) { mt0.Units = uu; mt0.EndToken = uu[uu.Count - 1].EndToken; } return(mt0); } } bool plusminus = false; bool unitBefore = false; bool isAge = false; DiapTyp dty = DiapTyp.Undefined; Pullenti.Ner.MetaToken whd = null; List <UnitToken> uni = null; Pullenti.Ner.Core.TerminToken tok = (m_Termins == null ? null : m_Termins.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No)); if (tok != null) { if (tok.EndToken.IsValue("СТАРШЕ", null) || tok.EndToken.IsValue("МЛАДШЕ", null)) { isAge = true; } t = tok.EndToken.Next; dty = (DiapTyp)tok.Termin.Tag; hasKeyw = true; if (!tok.IsWhitespaceAfter) { if (t == null) { return(null); } if (t is Pullenti.Ner.NumberToken) { if (tok.BeginToken == tok.EndToken && !tok.Chars.IsAllLower) { return(null); } } else if (t.IsComma && t.Next != null && t.Next.IsValue("ЧЕМ", null)) { t = t.Next.Next; if (t != null && t.Morph.Class.IsPreposition) { t = t.Next; } } else if (t.IsCharOf(":,(") || t.IsTableControlChar) { } else { return(null); } } if (t != null && t.IsChar('(')) { uni = UnitToken.TryParseList(t.Next, addUnits, false); if (uni != null) { t = uni[uni.Count - 1].EndToken.Next; while (t != null) { if (t.IsCharOf("):")) { t = t.Next; } else { break; } } NumbersWithUnitToken mt0 = _tryParse(t, addUnits, false, canOmitNumber, false); if (mt0 != null && mt0.Units.Count == 0) { mt0.BeginToken = t0; mt0.Units = uni; return(mt0); } } whd = _tryParseWHL(t); if (whd != null) { t = whd.EndToken.Next; } } else if (t != null && t.IsValue("IP", null)) { uni = UnitToken.TryParseList(t, addUnits, false); if (uni != null) { t = uni[uni.Count - 1].EndToken.Next; } } if ((t != null && t.IsHiphen && t.IsWhitespaceBefore) && t.IsWhitespaceAfter) { t = t.Next; } } else if (t.IsChar('<')) { dty = DiapTyp.Ls; t = t.Next; hasKeyw = true; if (t != null && t.IsChar('=')) { t = t.Next; dty = DiapTyp.Le; } } else if (t.IsChar('>')) { dty = DiapTyp.Gt; t = t.Next; hasKeyw = true; if (t != null && t.IsChar('=')) { t = t.Next; dty = DiapTyp.Ge; } } else if (t.IsChar('≤')) { dty = DiapTyp.Le; hasKeyw = true; t = t.Next; } else if (t.IsChar('≥')) { dty = DiapTyp.Ge; hasKeyw = true; t = t.Next; } else if (t.IsValue("IP", null)) { uni = UnitToken.TryParseList(t, addUnits, false); if (uni != null) { t = uni[uni.Count - 1].EndToken.Next; } } else if (t.IsValue("ЗА", null) && (t.Next is Pullenti.Ner.NumberToken)) { dty = DiapTyp.Ge; t = t.Next; } while (t != null && ((t.IsCharOf(":,") || t.IsValue("ЧЕМ", null) || t.IsTableControlChar))) { t = t.Next; } if (t != null) { if (t.IsChar('+') || t.IsValue("ПЛЮС", null)) { t = t.Next; if (t != null && !t.IsWhitespaceBefore) { if (t.IsHiphen) { t = t.Next; plusminus = true; } else if ((t.IsCharOf("\\/") && t.Next != null && !t.IsNewlineAfter) && t.Next.IsHiphen) { t = t.Next.Next; plusminus = true; } } } else if (second && (t.IsCharOf("\\/÷…~"))) { t = t.Next; } else if ((t.IsHiphen && t == t0 && !second) && m_Termins.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No) != null) { tok = m_Termins.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No); t = tok.EndToken.Next; dty = (DiapTyp)tok.Termin.Tag; } else if (t.IsHiphen && t == t0 && ((t.IsWhitespaceAfter || second))) { t = t.Next; } else if (t.IsChar('±')) { t = t.Next; plusminus = true; hasKeyw = true; } else if ((second && t.IsChar('.') && t.Next != null) && t.Next.IsChar('.')) { t = t.Next.Next; if (t != null && t.IsChar('.')) { t = t.Next; } } } Pullenti.Ner.NumberToken num = Pullenti.Ner.Core.NumberHelper.TryParseRealNumber(t, true, false); if (num == null) { uni = UnitToken.TryParseList(t, addUnits, false); if (uni != null) { unitBefore = true; t = uni[uni.Count - 1].EndToken.Next; bool delim = false; while (t != null) { if (t.IsCharOf(":,")) { delim = true; t = t.Next; } else if (t.IsHiphen && t.IsWhitespaceAfter) { delim = true; t = t.Next; } else { break; } } if (!delim) { if (t == null) { if (hasKeyw && canBeNan) { } else { return(null); } } else if (!t.IsWhitespaceBefore) { return(null); } if (t.Next != null && t.IsHiphen && t.IsWhitespaceAfter) { delim = true; t = t.Next; } } num = Pullenti.Ner.Core.NumberHelper.TryParseRealNumber(t, true, false); } } NumbersWithUnitToken res = null; double rval = (double)0; if (num == null) { Pullenti.Ner.Core.TerminToken tt = m_Spec.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tt != null) { rval = (double)tt.Termin.Tag; string unam = (string)tt.Termin.Tag2; foreach (Unit u in UnitsHelper.Units) { if (u.FullnameCyr == unam) { uni = new List <UnitToken>(); uni.Add(new UnitToken(t, t) { Unit = u }); break; } } if (uni == null) { return(null); } res = new NumbersWithUnitToken(t0, tt.EndToken) { About = about }; t = tt.EndToken.Next; } else { if (!canOmitNumber && !hasKeyw && !canBeNan) { return(null); } if ((uni != null && uni.Count == 1 && uni[0].BeginToken == uni[0].EndToken) && uni[0].LengthChar > 3) { rval = 1; res = new NumbersWithUnitToken(t0, uni[uni.Count - 1].EndToken) { About = about }; t = res.EndToken.Next; } else if (hasKeyw && canBeNan) { rval = double.NaN; res = new NumbersWithUnitToken(t0, t0) { About = about }; if (t != null) { res.EndToken = t.Previous; } else { for (t = t0; t != null; t = t.Next) { res.EndToken = t; } } } else { return(null); } } } else { if ((t == t0 && t0.IsHiphen && !t.IsWhitespaceBefore) && !t.IsWhitespaceAfter && (num.RealValue < 0)) { num = Pullenti.Ner.Core.NumberHelper.TryParseRealNumber(t.Next, true, false); if (num == null) { return(null); } } if (t == t0 && (t is Pullenti.Ner.NumberToken) && t.Morph.Class.IsAdjective) { Pullenti.Ner.TextToken nn = (t as Pullenti.Ner.NumberToken).EndToken as Pullenti.Ner.TextToken; if (nn == null) { return(null); } string norm = nn.GetNormalCaseText(Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false); if ((norm.EndsWith("Ь") || norm == "ЧЕТЫРЕ" || norm == "ТРИ") || norm == "ДВА") { } else { Pullenti.Morph.MorphWordForm mi = Pullenti.Morph.MorphologyService.GetWordBaseInfo("КОКО" + nn.Term, null, false, false); if (mi.Class.IsAdjective) { return(null); } } } t = num.EndToken.Next; res = new NumbersWithUnitToken(t0, num.EndToken) { About = about }; rval = num.RealValue; } if (uni == null) { uni = UnitToken.TryParseList(t, addUnits, false); if (uni != null) { if ((plusminus && second && uni.Count >= 1) && uni[0].Unit == UnitsHelper.uPercent) { res.EndToken = uni[0].EndToken; res.PlusMinusPercent = true; Pullenti.Ner.Token tt1 = uni[0].EndToken.Next; uni = UnitToken.TryParseList(tt1, addUnits, false); if (uni != null) { res.Units = uni; res.EndToken = uni[uni.Count - 1].EndToken; } } else { res.Units = uni; res.EndToken = uni[uni.Count - 1].EndToken; } t = res.EndToken.Next; } } else { res.Units = uni; if (uni.Count > 1) { List <UnitToken> uni1 = UnitToken.TryParseList(t, addUnits, false); if (((uni1 != null && uni1[0].Unit == uni[0].Unit && (uni1.Count < uni.Count)) && uni[uni1.Count].Pow == -1 && uni1[uni1.Count - 1].EndToken.Next != null) && uni1[uni1.Count - 1].EndToken.Next.IsCharOf("/\\")) { NumbersWithUnitToken num2 = _tryParse(uni1[uni1.Count - 1].EndToken.Next.Next, addUnits, false, false, false); if (num2 != null && num2.Units != null && num2.Units[0].Unit == uni[uni1.Count].Unit) { res.Units = uni1; res.DivNum = num2; res.EndToken = num2.EndToken; } } } } res.WHL = whd; if (dty != DiapTyp.Undefined) { if (dty == DiapTyp.Ge || dty == DiapTyp.From) { res.FromInclude = true; res.FromVal = rval; } else if (dty == DiapTyp.Gt) { res.FromInclude = false; res.FromVal = rval; } else if (dty == DiapTyp.Le || dty == DiapTyp.To) { res.ToInclude = true; res.ToVal = rval; } else if (dty == DiapTyp.Ls) { res.ToInclude = false; res.ToVal = rval; } } bool isSecondMax = false; if (!second) { int iii = 0; ttt = _isMinOrMax(t, ref iii); if (ttt != null && iii > 0) { isSecondMax = true; t = ttt.Next; } } NumbersWithUnitToken next = (second || plusminus || ((t != null && ((t.IsTableControlChar || t.IsNewlineBefore)))) ? null : _tryParse(t, addUnits, true, false, canBeNan)); if (next != null && (t.Previous is Pullenti.Ner.NumberToken)) { if (MeasureHelper.IsMultChar((t.Previous as Pullenti.Ner.NumberToken).EndToken)) { next = null; } } if (next != null && ((next.ToVal != null || next.SingleVal != null)) && next.FromVal == null) { if ((((next.BeginToken.IsChar('+') && next.SingleVal != null && !double.IsNaN(next.SingleVal.Value)) && next.EndToken.Next != null && next.EndToken.Next.IsCharOf("\\/")) && next.EndToken.Next.Next != null && next.EndToken.Next.Next.IsHiphen) && !hasKeyw && !double.IsNaN(rval)) { NumbersWithUnitToken next2 = _tryParse(next.EndToken.Next.Next.Next, addUnits, true, false, false); if (next2 != null && next2.SingleVal != null && !double.IsNaN(next2.SingleVal.Value)) { res.FromVal = rval - next2.SingleVal.Value; res.FromInclude = true; res.ToVal = rval + next.SingleVal.Value; res.ToInclude = true; if (next2.Units != null && res.Units.Count == 0) { res.Units = next2.Units; } res.EndToken = next2.EndToken; return(res); } } if (next.Units.Count > 0) { if (res.Units.Count == 0) { res.Units = next.Units; } else if (!UnitToken.CanBeEquals(res.Units, next.Units)) { next = null; } } else if (res.Units.Count > 0 && !unitBefore && !next.PlusMinusPercent) { next = null; } if (next != null) { res.EndToken = next.EndToken; } if (next != null && next.ToVal != null) { res.ToVal = next.ToVal; res.ToInclude = next.ToInclude; } else if (next != null && next.SingleVal != null) { if (next.BeginToken.IsCharOf("/\\")) { res.DivNum = next; res.SingleVal = rval; return(res); } else if (next.PlusMinusPercent) { res.SingleVal = rval; res.PlusMinus = next.SingleVal; res.PlusMinusPercent = true; res.ToInclude = true; } else { res.ToVal = next.SingleVal; res.ToInclude = true; } } if (next != null) { if (res.FromVal == null) { res.FromVal = rval; res.FromInclude = true; } return(res); } } else if ((next != null && next.FromVal != null && next.ToVal != null) && next.ToVal.Value == (-next.FromVal.Value)) { if (next.Units.Count == 1 && next.Units[0].Unit == UnitsHelper.uPercent && res.Units.Count > 0) { res.SingleVal = rval; res.PlusMinus = next.ToVal.Value; res.PlusMinusPercent = true; res.EndToken = next.EndToken; return(res); } if (next.Units.Count == 0) { res.SingleVal = rval; res.PlusMinus = next.ToVal.Value; res.EndToken = next.EndToken; return(res); } res.FromVal = next.FromVal + rval; res.FromInclude = true; res.ToVal = next.ToVal + rval; res.ToInclude = true; res.EndToken = next.EndToken; if (next.Units.Count > 0) { res.Units = next.Units; } return(res); } if (dty == DiapTyp.Undefined) { if (plusminus && ((!res.PlusMinusPercent || !second))) { res.FromInclude = true; res.FromVal = -rval; res.ToInclude = true; res.ToVal = rval; } else { res.SingleVal = rval; res.PlusMinusPercent = plusminus; } } if (isAge) { res.IsAge = true; } return(res); }
static PersonIdToken TryParse(Pullenti.Ner.Token t, PersonIdToken prev) { if (t.IsValue("СВИДЕТЕЛЬСТВО", null)) { Pullenti.Ner.Token tt1 = t; bool ip = false; bool reg = false; for (Pullenti.Ner.Token tt = t.Next; tt != null; tt = tt.Next) { if (tt.IsCommaAnd || tt.Morph.Class.IsPreposition) { continue; } if (tt.IsValue("РЕГИСТРАЦИЯ", null) || tt.IsValue("РЕЕСТР", null) || tt.IsValue("ЗАРЕГИСТРИРОВАТЬ", null)) { reg = true; tt1 = tt; } else if (tt.IsValue("ИНДИВИДУАЛЬНЫЙ", null) || tt.IsValue("ИП", null)) { ip = true; tt1 = tt; } else if ((tt.IsValue("ВНЕСЕНИЕ", null) || tt.IsValue("ГОСУДАРСТВЕННЫЙ", null) || tt.IsValue("ЕДИНЫЙ", null)) || tt.IsValue("ЗАПИСЬ", null) || tt.IsValue("ПРЕДПРИНИМАТЕЛЬ", null)) { tt1 = tt; } else if (tt.GetReferent() != null && tt.GetReferent().TypeName == "DATERANGE") { tt1 = tt; } else { break; } } if (reg && ip) { return new PersonIdToken(t, tt1) { Typ = Typs.Keyword, Value = "СВИДЕТЕЛЬСТВО О ГОСУДАРСТВЕННОЙ РЕГИСТРАЦИИ ФИЗИЧЕСКОГО ЛИЦА В КАЧЕСТВЕ ИНДИВИДУАЛЬНОГО ПРЕДПРИНИМАТЕЛЯ" } } ; } Pullenti.Ner.Core.TerminToken tok = m_Ontology.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { Typs ty = (Typs)tok.Termin.Tag; PersonIdToken res = new PersonIdToken(tok.BeginToken, tok.EndToken) { Typ = ty, Value = tok.Termin.CanonicText }; if (prev == null) { if (ty != Typs.Keyword) { return(null); } for (t = tok.EndToken.Next; t != null; t = t.Next) { Pullenti.Ner.Referent r = t.GetReferent(); if (r != null && (r is Pullenti.Ner.Geo.GeoReferent)) { res.Referent = r; res.EndToken = t; continue; } if (t.IsValue("ГРАЖДАНИН", null) && t.Next != null && (t.Next.GetReferent() is Pullenti.Ner.Geo.GeoReferent)) { res.Referent = t.Next.GetReferent(); t = (res.EndToken = t.Next); continue; } if (r != null) { break; } PersonAttrToken ait = PersonAttrToken.TryAttach(t, null, PersonAttrToken.PersonAttrAttachAttrs.No); if (ait != null) { if (ait.Referent != null) { foreach (Pullenti.Ner.Slot s in ait.Referent.Slots) { if (s.TypeName == Pullenti.Ner.Person.PersonPropertyReferent.ATTR_REF && (s.Value is Pullenti.Ner.Geo.GeoReferent)) { res.Referent = s.Value as Pullenti.Ner.Referent; } } } res.EndToken = ait.EndToken; break; } if (t.IsValue("ДАННЫЙ", null)) { res.EndToken = t; continue; } break; } if ((res.Referent is Pullenti.Ner.Geo.GeoReferent) && !(res.Referent as Pullenti.Ner.Geo.GeoReferent).IsState) { res.Referent = null; } return(res); } if (ty == Typs.Number) { StringBuilder tmp = new StringBuilder(); Pullenti.Ner.Token tt = tok.EndToken.Next; if (tt != null && tt.IsChar(':')) { tt = tt.Next; } for (; tt != null; tt = tt.Next) { if (tt.IsNewlineBefore) { break; } if (!(tt is Pullenti.Ner.NumberToken)) { break; } tmp.Append(tt.GetSourceText()); res.EndToken = tt; } if (tmp.Length < 1) { return(null); } res.Value = tmp.ToString(); res.HasPrefix = true; return(res); } if (ty == Typs.Seria) { StringBuilder tmp = new StringBuilder(); Pullenti.Ner.Token tt = tok.EndToken.Next; if (tt != null && tt.IsChar(':')) { tt = tt.Next; } bool nextNum = false; for (; tt != null; tt = tt.Next) { if (tt.IsNewlineBefore) { break; } if (Pullenti.Ner.Core.MiscHelper.CheckNumberPrefix(tt) != null) { nextNum = true; break; } if (!(tt is Pullenti.Ner.NumberToken)) { if (!(tt is Pullenti.Ner.TextToken)) { break; } if (!tt.Chars.IsAllUpper) { break; } Pullenti.Ner.NumberToken nu = Pullenti.Ner.Core.NumberHelper.TryParseRoman(tt); if (nu != null) { tmp.Append(nu.GetSourceText()); tt = nu.EndToken; } else if (tt.LengthChar != 2) { break; } else { tmp.Append((tt as Pullenti.Ner.TextToken).Term); res.EndToken = tt; } if (tt.Next != null && tt.Next.IsHiphen) { tt = tt.Next; } continue; } if (tmp.Length >= 4) { break; } tmp.Append(tt.GetSourceText()); res.EndToken = tt; } if (tmp.Length < 4) { if (tmp.Length < 2) { return(null); } Pullenti.Ner.Token tt1 = res.EndToken.Next; if (tt1 != null && tt1.IsComma) { tt1 = tt1.Next; } PersonIdToken next = TryParse(tt1, res); if (next != null && next.Typ == Typs.Number) { } else { return(null); } } res.Value = tmp.ToString(); res.HasPrefix = true; return(res); } if (ty == Typs.Code) { for (Pullenti.Ner.Token tt = res.EndToken.Next; tt != null; tt = tt.Next) { if (tt.IsCharOf(":") || tt.IsHiphen) { continue; } if (tt is Pullenti.Ner.NumberToken) { res.EndToken = tt; continue; } break; } } if (ty == Typs.Address) { if (t.GetReferent() is Pullenti.Ner.Address.AddressReferent) { res.Referent = t.GetReferent(); res.EndToken = t; return(res); } for (Pullenti.Ner.Token tt = res.EndToken.Next; tt != null; tt = tt.Next) { if (tt.IsCharOf(":") || tt.IsHiphen || tt.Morph.Class.IsPreposition) { continue; } if (tt.GetReferent() is Pullenti.Ner.Address.AddressReferent) { res.Referent = tt.GetReferent(); res.EndToken = tt; } break; } if (res.Referent == null) { return(null); } } return(res); } else if (prev == null) { return(null); } Pullenti.Ner.Token t0 = t; Pullenti.Ner.Token t1 = Pullenti.Ner.Core.MiscHelper.CheckNumberPrefix(t0); if (t1 != null) { t = t1; } if (t is Pullenti.Ner.NumberToken) { StringBuilder tmp = new StringBuilder(); PersonIdToken res = new PersonIdToken(t0, t) { Typ = Typs.Number }; for (Pullenti.Ner.Token tt = t; tt != null; tt = tt.Next) { if (tt.IsNewlineBefore || !(tt is Pullenti.Ner.NumberToken)) { break; } tmp.Append(tt.GetSourceText()); res.EndToken = tt; } if (tmp.Length < 4) { if (tmp.Length < 2) { return(null); } if (prev == null || prev.Typ != Typs.Keyword) { return(null); } PersonIdToken ne = TryParse(res.EndToken.Next, prev); if (ne != null && ne.Typ == Typs.Number) { res.Typ = Typs.Seria; } else { return(null); } } res.Value = tmp.ToString(); if (t0 != t) { res.HasPrefix = true; } return(res); } if (t is Pullenti.Ner.ReferentToken) { Pullenti.Ner.Referent r = t.GetReferent(); if (r != null) { if (r.TypeName == "DATE") { return new PersonIdToken(t, t) { Typ = Typs.Date, Referent = r } } ; if (r.TypeName == "ORGANIZATION") { return new PersonIdToken(t, t) { Typ = Typs.Org, Referent = r } } ; if (r.TypeName == "ADDRESS") { return new PersonIdToken(t, t) { Typ = Typs.Address, Referent = r } } ; } } if ((prev != null && prev.Typ == Typs.Keyword && (t is Pullenti.Ner.TextToken)) && !t.Chars.IsAllLower && t.Chars.IsLetter) { PersonIdToken rr = TryParse(t.Next, prev); if (rr != null && rr.Typ == Typs.Number) { return new PersonIdToken(t, t) { Typ = Typs.Seria, Value = (t as Pullenti.Ner.TextToken).Term } } ; } if ((t != null && t.IsValue("ОТ", "ВІД") && (t.Next is Pullenti.Ner.ReferentToken)) && t.Next.GetReferent().TypeName == "DATE") { return new PersonIdToken(t, t.Next) { Typ = Typs.Date, Referent = t.Next.GetReferent() } } ; return(null); }
public static string GetNameEx(Pullenti.Ner.Token begin, Pullenti.Ner.Token end, Pullenti.Morph.MorphClass cla, Pullenti.Morph.MorphCase mc, Pullenti.Morph.MorphGender gender = Pullenti.Morph.MorphGender.Undefined, bool ignoreBracketsAndHiphens = false, bool ignoreGeoReferent = false) { if (end == null || begin == null) { return(null); } if (begin.EndChar > end.BeginChar && begin != end) { return(null); } StringBuilder res = new StringBuilder(); string prefix = null; for (Pullenti.Ner.Token t = begin; t != null && t.EndChar <= end.EndChar; t = t.Next) { if (res.Length > 1000) { break; } if (t.IsTableControlChar) { continue; } if (ignoreBracketsAndHiphens) { if (BracketHelper.IsBracket(t, false)) { if (t == end) { break; } if (t.IsCharOf("(<[")) { BracketSequenceToken br = BracketHelper.TryParse(t, BracketParseAttr.No, 100); if (br != null && br.EndChar <= end.EndChar) { string tmp = GetNameEx(br.BeginToken.Next, br.EndToken.Previous, Pullenti.Morph.MorphClass.Undefined, Pullenti.Morph.MorphCase.Undefined, Pullenti.Morph.MorphGender.Undefined, ignoreBracketsAndHiphens, false); if (tmp != null) { if ((br.EndChar == end.EndChar && br.BeginToken.Next == br.EndToken.Previous && !br.BeginToken.Next.Chars.IsLetter) && !(br.BeginToken.Next is Pullenti.Ner.ReferentToken)) { } else { res.AppendFormat(" {0}{1}{2}", t.GetSourceText(), tmp, br.EndToken.GetSourceText()); } } t = br.EndToken; } } continue; } if (t.IsHiphen) { if (t == end) { break; } else if (t.IsWhitespaceBefore || t.IsWhitespaceAfter) { continue; } } } Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; if (tt != null) { if (!ignoreBracketsAndHiphens) { if ((tt.Next != null && tt.Next.IsHiphen && (tt.Next.Next is Pullenti.Ner.TextToken)) && tt != end && tt.Next != end) { if (prefix == null) { prefix = tt.Term; } else { prefix = string.Format("{0}-{1}", prefix, tt.Term); } t = tt.Next; if (t == end) { break; } else { continue; } } } string s = null; if (cla.Value != 0 || !mc.IsUndefined || gender != Pullenti.Morph.MorphGender.Undefined) { foreach (Pullenti.Morph.MorphBaseInfo wff in tt.Morph.Items) { Pullenti.Morph.MorphWordForm wf = wff as Pullenti.Morph.MorphWordForm; if (wf == null) { continue; } if (cla.Value != 0) { if (((wf.Class.Value & cla.Value)) == 0) { continue; } } if (!mc.IsUndefined) { if (((wf.Case & mc)).IsUndefined) { continue; } } if (gender != Pullenti.Morph.MorphGender.Undefined) { if (((wf.Gender & gender)) == Pullenti.Morph.MorphGender.Undefined) { continue; } } if (s == null || wf.NormalCase == tt.Term) { s = wf.NormalCase; } } if (s == null && gender != Pullenti.Morph.MorphGender.Undefined) { foreach (Pullenti.Morph.MorphBaseInfo wff in tt.Morph.Items) { Pullenti.Morph.MorphWordForm wf = wff as Pullenti.Morph.MorphWordForm; if (wf == null) { continue; } if (cla.Value != 0) { if (((wf.Class.Value & cla.Value)) == 0) { continue; } } if (!mc.IsUndefined) { if (((wf.Case & mc)).IsUndefined) { continue; } } if (s == null || wf.NormalCase == tt.Term) { s = wf.NormalCase; } } } } if (s == null) { s = tt.Term; if (tt.Chars.IsLastLower && tt.LengthChar > 2) { s = tt.GetSourceText(); for (int i = s.Length - 1; i >= 0; i--) { if (char.IsUpper(s[i])) { s = s.Substring(0, i + 1); break; } } } } if (prefix != null) { string delim = "-"; if (ignoreBracketsAndHiphens) { delim = " "; } s = string.Format("{0}{1}{2}", prefix, delim, s); } prefix = null; if (res.Length > 0 && s.Length > 0) { if (char.IsLetterOrDigit(s[0])) { char ch0 = res[res.Length - 1]; if (ch0 == '-') { } else { res.Append(' '); } } else if (!ignoreBracketsAndHiphens && BracketHelper.CanBeStartOfSequence(tt, false, false)) { res.Append(' '); } } res.Append(s); } else if (t is Pullenti.Ner.NumberToken) { if (res.Length > 0) { if (!t.IsWhitespaceBefore && res[res.Length - 1] == '-') { } else { res.Append(' '); } } Pullenti.Ner.NumberToken nt = t as Pullenti.Ner.NumberToken; if ((t.Morph.Class.IsAdjective && nt.Typ == Pullenti.Ner.NumberSpellingType.Words && nt.BeginToken == nt.EndToken) && (nt.BeginToken is Pullenti.Ner.TextToken)) { res.Append((nt.BeginToken as Pullenti.Ner.TextToken).Term); } else { res.Append(nt.Value); } } else if (t is Pullenti.Ner.MetaToken) { if ((ignoreGeoReferent && t != begin && t.GetReferent() != null) && t.GetReferent().TypeName == "GEO") { continue; } string s = GetNameEx((t as Pullenti.Ner.MetaToken).BeginToken, (t as Pullenti.Ner.MetaToken).EndToken, cla, mc, gender, ignoreBracketsAndHiphens, ignoreGeoReferent); if (!string.IsNullOrEmpty(s)) { if (res.Length > 0) { if (!t.IsWhitespaceBefore && res[res.Length - 1] == '-') { } else { res.Append(' '); } } res.Append(s); } } if (t == end) { break; } } if (res.Length == 0) { return(null); } return(res.ToString()); }