public static UriItemToken AttachISOContent(Pullenti.Ner.Token t0, string specChars) { Pullenti.Ner.Token t = t0; while (true) { if (t == null) { return(null); } if (t.IsCharOf(":/\\") || t.IsHiphen || t.IsValue("IEC", null)) { t = t.Next; continue; } break; } if (!(t is Pullenti.Ner.NumberToken)) { return(null); } Pullenti.Ner.Token t1 = t; char delim = (char)0; StringBuilder txt = new StringBuilder(); for (; t != null; t = t.Next) { if (t.IsWhitespaceBefore && t != t1) { break; } if (t is Pullenti.Ner.NumberToken) { if (delim != ((char)0)) { txt.Append(delim); } delim = (char)0; t1 = t; txt.Append(t.GetSourceText()); continue; } if (!(t is Pullenti.Ner.TextToken)) { break; } if (!t.IsCharOf(specChars)) { break; } delim = t.GetSourceText()[0]; } if (txt.Length == 0) { return(null); } return(new UriItemToken(t0, t1) { Value = txt.ToString() }); }
bool CanBeStartOfDenom(Pullenti.Ner.Token t) { if ((t == null || !t.Chars.IsLetter || t.Next == null) || t.IsNewlineAfter) { return(false); } if (!(t is Pullenti.Ner.TextToken)) { return(false); } if (t.LengthChar > 4) { return(false); } t = t.Next; if (t.Chars.IsLetter) { return(false); } if (t is Pullenti.Ner.NumberToken) { return(true); } if (t.IsCharOf("/\\") || t.IsHiphen) { return(t.Next is Pullenti.Ner.NumberToken); } if (t.IsCharOf("+*&^#@!_")) { return(true); } return(false); }
static int _calcNewlineBetweenCoef(FragToken fr1, FragToken fr2) { if (fr1.NewlinesAfterCount > 1) { return(1); } for (Pullenti.Ner.Token tt = fr1.BeginToken; tt != null && tt.EndChar <= fr1.EndChar; tt = tt.Next) { if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(tt, false, false)) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(tt, Pullenti.Ner.Core.BracketParseAttr.CanBeManyLines, 100); if (br != null && br.EndChar >= fr2.BeginChar) { return(-1); } } } Pullenti.Ner.Token t = fr1.EndToken; if (t.IsCharOf(":;.")) { return(1); } if ((t is Pullenti.Ner.TextToken) && ((t.Morph.Class.IsPreposition || t.Morph.Class.IsConjunction))) { return(-1); } Pullenti.Ner.Token t1 = fr2.BeginToken; if (t1 is Pullenti.Ner.TextToken) { if (t1.Chars.IsAllLower) { return(-1); } if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(t1, false, false)) { if (t.Chars.IsAllLower) { return(-1); } } } else if (t1 is Pullenti.Ner.NumberToken) { if (t.Chars.IsAllLower) { return(-1); } } if (t.Chars.IsAllLower) { if (fr2.EndToken.IsChar(';')) { return(-1); } } return(0); }
/// <summary> /// Проверка токена, что он является скобкой или кавычкой /// </summary> /// <param name="t">проверяемый токен</param> /// <param name="quotesOnly">должны быть именно кавычка, а не скобка</param> /// <return>да-нет</return> public static bool IsBracket(Pullenti.Ner.Token t, bool quotesOnly = false) { if (t == null) { return(false); } if (t.IsCharOf(m_OpenChars)) { if (quotesOnly) { if (t is Pullenti.Ner.TextToken) { if (m_Quotes.IndexOf((t as Pullenti.Ner.TextToken).Term[0]) < 0) { return(false); } } } return(true); } if (t.IsCharOf(m_CloseChars)) { if (quotesOnly) { if (t is Pullenti.Ner.TextToken) { if (m_Quotes.IndexOf((t as Pullenti.Ner.TextToken).Term[0]) < 0) { return(false); } } } return(true); } return(false); }
internal void AddValue(Pullenti.Ner.Token begin, Pullenti.Ner.Token end) { StringBuilder tmp = new StringBuilder(); for (Pullenti.Ner.Token t = begin; t != null && t.Previous != end; t = t.Next) { if (t is Pullenti.Ner.NumberToken) { tmp.Append(t.GetSourceText()); continue; } if (t is Pullenti.Ner.TextToken) { string s = (t as Pullenti.Ner.TextToken).Term; if (t.IsCharOf("-\\/")) { s = "-"; } tmp.Append(s); } } for (int i = 0; i < tmp.Length; i++) { if (tmp[i] == '-' && i > 0 && ((i + 1) < tmp.Length)) { char ch0 = tmp[i - 1]; char ch1 = tmp[i + 1]; if (char.IsLetterOrDigit(ch0) && char.IsLetterOrDigit(ch1)) { if (char.IsDigit(ch0) && !char.IsDigit(ch1)) { tmp.Remove(i, 1); } else if (!char.IsDigit(ch0) && char.IsDigit(ch1)) { tmp.Remove(i, 1); } } } } this.AddSlot(ATTR_VALUE, tmp.ToString(), false, 0); m_Names = null; }
static void _correctExtTypes(Pullenti.Ner.Core.NumberExToken ex) { Pullenti.Ner.Token t = ex.EndToken.Next; if (t == null) { return; } Pullenti.Ner.Core.NumberExType ty = ex.ExTyp; Pullenti.Ner.Token tt = _corrExTyp2(t, ref ty); if (tt != null) { ex.ExTyp = ty; ex.EndToken = tt; t = tt.Next; } if (t == null || t.Next == null) { return; } if (t.IsCharOf("/\\") || t.IsValue("НА", null)) { } else { return; } Pullenti.Ner.Core.TerminToken tok = m_Postfixes.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null && (((Pullenti.Ner.Core.NumberExType)tok.Termin.Tag) != Pullenti.Ner.Core.NumberExType.Money)) { ex.ExTyp2 = (Pullenti.Ner.Core.NumberExType)tok.Termin.Tag; ex.EndToken = tok.EndToken; ty = ex.ExTyp2; tt = _corrExTyp2(ex.EndToken.Next, ref ty); if (tt != null) { ex.ExTyp2 = ty; ex.EndToken = tt; t = tt.Next; } } }
static Pullenti.Ner.Core.NumberExToken _attachSpecPostfix(Pullenti.Ner.Token t) { if (t == null) { return(null); } if (t.IsCharOf("%")) { return(new Pullenti.Ner.Core.NumberExToken(t, t, "", Pullenti.Ner.NumberSpellingType.Digit, Pullenti.Ner.Core.NumberExType.Percent)); } string money = Pullenti.Ner.Core.NumberHelper.IsMoneyChar(t); if (money != null) { return new Pullenti.Ner.Core.NumberExToken(t, t, "", Pullenti.Ner.NumberSpellingType.Digit, Pullenti.Ner.Core.NumberExType.Money) { ExTypParam = money } } ; return(null); }
public static CanonicDecreeRefUri TryCreateCanonicDecreeRefUri(Pullenti.Ner.Token t) { if (!(t is Pullenti.Ner.ReferentToken)) { return(null); } Pullenti.Ner.Decree.DecreeReferent dr = t.GetReferent() as Pullenti.Ner.Decree.DecreeReferent; CanonicDecreeRefUri res; if (dr != null) { if (dr.Kind == Pullenti.Ner.Decree.DecreeKind.Publisher) { return(null); } res = new CanonicDecreeRefUri(t.Kit.Sofa.Text) { Ref = dr, BeginChar = t.BeginChar, EndChar = t.EndChar }; if ((t.Previous != null && t.Previous.IsChar('(') && t.Next != null) && t.Next.IsChar(')')) { return(res); } if ((t as Pullenti.Ner.ReferentToken).MiscAttrs != 0) { return(res); } Pullenti.Ner.ReferentToken rt = t as Pullenti.Ner.ReferentToken; if (rt.BeginToken.IsChar('(') && rt.EndToken.IsChar(')')) { res = new CanonicDecreeRefUri(t.Kit.Sofa.Text) { Ref = dr, BeginChar = rt.BeginToken.Next.BeginChar, EndChar = rt.EndToken.Previous.EndChar }; return(res); } List <DecreeToken> nextDecreeItems = null; if ((t.Next != null && t.Next.IsCommaAnd && (t.Next.Next is Pullenti.Ner.ReferentToken)) && (t.Next.Next.GetReferent() is Pullenti.Ner.Decree.DecreeReferent)) { nextDecreeItems = DecreeToken.TryAttachList((t.Next.Next as Pullenti.Ner.ReferentToken).BeginToken, null, 10, false); if (nextDecreeItems != null && nextDecreeItems.Count > 1) { for (int i = 0; i < (nextDecreeItems.Count - 1); i++) { if (nextDecreeItems[i].IsNewlineAfter) { nextDecreeItems.RemoveRange(i + 1, nextDecreeItems.Count - i - 1); break; } } } } bool wasTyp = false; bool wasNum = false; for (Pullenti.Ner.Token tt = (t as Pullenti.Ner.MetaToken).BeginToken; tt != null && tt.EndChar <= t.EndChar; tt = tt.Next) { if (tt.BeginChar == t.BeginChar && tt.IsChar('(') && tt.Next != null) { res.BeginChar = tt.Next.BeginChar; } if (tt.IsChar('(') && tt.Next != null && tt.Next.IsValue("ДАЛЕЕ", null)) { if (res.EndChar >= tt.BeginChar) { res.EndChar = tt.Previous.EndChar; } break; } if (tt.EndChar == t.EndChar && tt.IsChar(')')) { res.EndChar = tt.Previous.EndChar; for (Pullenti.Ner.Token tt1 = tt.Previous; tt1 != null && tt1.BeginChar >= res.BeginChar; tt1 = tt1.Previous) { if (tt1.IsChar('(') && tt1.Previous != null) { if (res.BeginChar < tt1.Previous.BeginChar) { res.EndChar = tt1.Previous.EndChar; } } } } List <DecreeToken> li = DecreeToken.TryAttachList(tt, null, 10, false); if (li != null && li.Count > 0) { for (int ii = 0; ii < (li.Count - 1); ii++) { if (li[ii].Typ == DecreeToken.ItemType.Typ && li[ii + 1].Typ == DecreeToken.ItemType.Terr) { res.TypeWithGeo = Pullenti.Ner.Core.MiscHelper.GetTextValue(li[ii].BeginToken, li[ii + 1].EndToken, Pullenti.Ner.Core.GetTextAttr.FirstNounGroupToNominativeSingle); } } if ((nextDecreeItems != null && nextDecreeItems.Count > 1 && (nextDecreeItems.Count < li.Count)) && nextDecreeItems[0].Typ != DecreeToken.ItemType.Typ) { int d = li.Count - nextDecreeItems.Count; int j; for (j = 0; j < nextDecreeItems.Count; j++) { if (nextDecreeItems[j].Typ != li[d + j].Typ) { break; } } if (j >= nextDecreeItems.Count) { li.RemoveRange(0, d); res.BeginChar = li[0].BeginChar; } } else if ((nextDecreeItems != null && nextDecreeItems.Count == 1 && nextDecreeItems[0].Typ == DecreeToken.ItemType.Name) && li.Count == 2 && li[1].Typ == DecreeToken.ItemType.Name) { res.BeginChar = li[1].BeginChar; res.EndChar = li[1].EndChar; break; } else if ((nextDecreeItems != null && nextDecreeItems.Count == 1 && nextDecreeItems[0].Typ == DecreeToken.ItemType.Number) && li[li.Count - 1].Typ == DecreeToken.ItemType.Number) { res.BeginChar = li[li.Count - 1].BeginChar; res.EndChar = li[li.Count - 1].EndChar; } for (int i = 0; i < li.Count; i++) { DecreeToken l = li[i]; if (l.BeginChar > t.EndChar) { li.RemoveRange(i, li.Count - i); break; } if (l.Typ == DecreeToken.ItemType.Name) { if (!wasNum) { if (dr.Kind == Pullenti.Ner.Decree.DecreeKind.Contract) { continue; } if (((i + 1) < li.Count) && ((li[i + 1].Typ == DecreeToken.ItemType.Date || li[i + 1].Typ == DecreeToken.ItemType.Number))) { continue; } } int ee = l.BeginToken.Previous.EndChar; if (ee > res.BeginChar && (ee < res.EndChar)) { res.EndChar = ee; } break; } if (l.Typ == DecreeToken.ItemType.Number) { wasNum = true; } if (i == 0) { if (l.Typ == DecreeToken.ItemType.Typ) { wasTyp = true; } else if (l.Typ == DecreeToken.ItemType.Owner || l.Typ == DecreeToken.ItemType.Org) { if (((i + 1) < li.Count) && ((li[1].Typ == DecreeToken.ItemType.Date || li[1].Typ == DecreeToken.ItemType.Number))) { wasTyp = true; } } if (wasTyp) { Pullenti.Ner.Token tt0 = l.BeginToken.Previous; if (tt0 != null && tt0.IsChar('.')) { tt0 = tt0.Previous; } if (tt0 != null && ((tt0.IsValue("УТВЕРЖДЕННЫЙ", null) || tt0.IsValue("УТВЕРДИТЬ", null) || tt0.IsValue("УТВ", null)))) { if (l.BeginChar > res.BeginChar) { res.BeginChar = l.BeginChar; if (res.EndChar < res.BeginChar) { res.EndChar = t.EndChar; } res.IsAdopted = true; } } } } } if (li.Count > 0) { tt = li[li.Count - 1].EndToken; if (tt.IsChar(')')) { tt = tt.Previous; } continue; } } if (wasTyp) { DecreeToken na = DecreeToken.TryAttachName(tt, dr.Typ0, true, false); if (na != null && tt.BeginChar > t.BeginChar) { Pullenti.Ner.Token tt1 = na.EndToken.Next; if (tt1 != null && tt1.IsCharOf(",()")) { tt1 = tt1.Next; } if (tt1 != null && (tt1.EndChar < t.EndChar)) { if (tt1.IsValue("УТВЕРЖДЕННЫЙ", null) || tt1.IsValue("УТВЕРДИТЬ", null) || tt1.IsValue("УТВ", null)) { tt = tt1; continue; } } if (tt.Previous != null && tt.Previous.IsChar(':') && na.EndChar <= res.EndChar) { res.BeginChar = tt.BeginChar; break; } if (tt.Previous.EndChar > res.BeginChar) { res.EndChar = tt.Previous.EndChar; break; } } } } return(res); } Pullenti.Ner.Decree.DecreePartReferent dpr = t.GetReferent() as Pullenti.Ner.Decree.DecreePartReferent; if (dpr == null) { return(null); } if ((t.Previous != null && t.Previous.IsHiphen && (t.Previous.Previous is Pullenti.Ner.ReferentToken)) && (t.Previous.Previous.GetReferent() is Pullenti.Ner.Decree.DecreePartReferent)) { if (Pullenti.Ner.Decree.DecreePartReferent.CreateRangeReferent(t.Previous.Previous.GetReferent() as Pullenti.Ner.Decree.DecreePartReferent, dpr) != null) { return(null); } } Pullenti.Ner.Token t1 = t; bool hasDiap = false; Pullenti.Ner.ReferentToken DiapRef = null; if ((t.Next != null && t.Next.IsHiphen && (t.Next.Next is Pullenti.Ner.ReferentToken)) && (t.Next.Next.GetReferent() is Pullenti.Ner.Decree.DecreePartReferent)) { Pullenti.Ner.Decree.DecreePartReferent diap = Pullenti.Ner.Decree.DecreePartReferent.CreateRangeReferent(dpr as Pullenti.Ner.Decree.DecreePartReferent, t.Next.Next.GetReferent() as Pullenti.Ner.Decree.DecreePartReferent); if (diap != null) { dpr = diap; hasDiap = true; t1 = t.Next.Next; DiapRef = t1 as Pullenti.Ner.ReferentToken; } } res = new CanonicDecreeRefUri(t.Kit.Sofa.Text) { Ref = dpr, BeginChar = t.BeginChar, EndChar = t1.EndChar, IsDiap = hasDiap }; if ((t.Previous != null && t.Previous.IsChar('(') && t1.Next != null) && t1.Next.IsChar(')')) { return(res); } for (Pullenti.Ner.Token tt = (t as Pullenti.Ner.MetaToken).BeginToken; tt != null && tt.EndChar <= t.EndChar; tt = tt.Next) { if (tt.GetReferent() is Pullenti.Ner.Decree.DecreeReferent) { if (tt.BeginChar > t.BeginChar) { res.EndChar = tt.Previous.EndChar; if (tt.Previous.Morph.Class.IsPreposition && tt.Previous.Previous != null) { res.EndChar = tt.Previous.Previous.EndChar; } } else if (tt.EndChar < t.EndChar) { res.BeginChar = tt.BeginChar; } break; } } bool hasSameBefore = _hasSameDecree(t, dpr, true); bool hasSameAfter = _hasSameDecree(t, dpr, false); PartToken.ItemType ptmin = PartToken.ItemType.Prefix; PartToken.ItemType ptmin2 = PartToken.ItemType.Prefix; int max = 0; int max2 = 00; foreach (Pullenti.Ner.Slot s in dpr.Slots) { PartToken.ItemType pt = PartToken._getTypeByAttrName(s.TypeName); if (pt == PartToken.ItemType.Prefix) { continue; } int co = PartToken._getRank(pt); if (co < 1) { if (pt == PartToken.ItemType.Part && dpr.FindSlot(Pullenti.Ner.Decree.DecreePartReferent.ATTR_CLAUSE, null, true) != null) { co = PartToken._getRank(PartToken.ItemType.Paragraph); } else { continue; } } if (co > max) { max2 = max; ptmin2 = ptmin; max = co; ptmin = pt; } else if (co > max2) { max2 = co; ptmin2 = pt; } } if (ptmin != PartToken.ItemType.Prefix) { for (Pullenti.Ner.Token tt = (t as Pullenti.Ner.MetaToken).BeginToken; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next) { if (tt.BeginChar >= res.BeginChar) { PartToken pt = PartToken.TryAttach(tt, null, false, false); if (pt != null && pt.Typ == ptmin) { res.BeginChar = pt.BeginChar; res.EndChar = pt.EndChar; if (pt.Typ == PartToken.ItemType.Appendix && pt.EndToken.IsValue("К", null) && pt.BeginToken != pt.EndToken) { res.EndChar = pt.EndToken.Previous.EndChar; } if (pt.EndChar == t.EndChar) { if ((t.Next != null && t.Next.IsCommaAnd && (t.Next.Next is Pullenti.Ner.ReferentToken)) && (t.Next.Next.GetReferent() is Pullenti.Ner.Decree.DecreePartReferent)) { Pullenti.Ner.Token tt1 = (t.Next.Next as Pullenti.Ner.ReferentToken).BeginToken; bool ok = true; if (tt1.Chars.IsLetter) { ok = false; } if (ok) { foreach (PartToken.PartValue v in pt.Values) { res.BeginChar = v.BeginChar; res.EndChar = v.EndChar; break; } } } } if (!hasDiap) { return(res); } break; } } } if (hasDiap && DiapRef != null) { for (Pullenti.Ner.Token tt = DiapRef.BeginToken; tt != null && tt.EndChar <= DiapRef.EndChar; tt = tt.Next) { if (tt.IsChar(',')) { break; } if (tt != DiapRef.BeginToken && tt.IsWhitespaceBefore) { break; } res.EndChar = tt.EndChar; } return(res); } } if (((hasSameBefore || hasSameAfter)) && ptmin != PartToken.ItemType.Prefix) { for (Pullenti.Ner.Token tt = (t as Pullenti.Ner.MetaToken).BeginToken; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next) { if (tt.BeginChar >= res.BeginChar) { PartToken pt = (!hasSameBefore ? PartToken.TryAttach(tt, null, false, false) : null); if (pt != null) { if (pt.Typ == ptmin) { foreach (PartToken.PartValue v in pt.Values) { res.BeginChar = v.BeginChar; res.EndChar = v.EndChar; return(res); } } tt = pt.EndToken; continue; } if ((tt is Pullenti.Ner.NumberToken) && tt.BeginChar == res.BeginChar) { res.EndChar = tt.EndChar; for (; tt != null && tt.Next != null;) { if (!tt.Next.IsChar('.') || tt.IsWhitespaceAfter || tt.Next.IsWhitespaceAfter) { break; } if (!(tt.Next.Next is Pullenti.Ner.NumberToken)) { break; } tt = tt.Next.Next; res.EndChar = tt.EndChar; } if (tt.Next != null && tt.Next.IsHiphen) { if (tt.Next.Next is Pullenti.Ner.NumberToken) { tt = tt.Next.Next; res.EndChar = tt.EndChar; for (; tt != null && tt.Next != null;) { if (!tt.Next.IsChar('.') || tt.IsWhitespaceAfter || tt.Next.IsWhitespaceAfter) { break; } if (!(tt.Next.Next is Pullenti.Ner.NumberToken)) { break; } tt = tt.Next.Next; res.EndChar = tt.EndChar; } } else if (tt.Next.Next != null && (tt.Next.Next.GetReferent() is Pullenti.Ner.Decree.DecreePartReferent) && hasDiap) { res.EndChar = (tt.Next.Next as Pullenti.Ner.MetaToken).BeginToken.EndChar; } } return(res); } if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(tt, true, false) && tt.BeginChar == res.BeginChar && hasSameBefore) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(tt, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null && br.EndToken.Previous == tt.Next) { res.EndChar = br.EndChar; return(res); } } } } return(res); } if (!hasSameBefore && !hasSameAfter && ptmin != PartToken.ItemType.Prefix) { for (Pullenti.Ner.Token tt = (t as Pullenti.Ner.MetaToken).BeginToken; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next) { if (tt.BeginChar >= res.BeginChar) { List <PartToken> pts = PartToken.TryAttachList(tt, false, 40); if (pts == null || pts.Count == 0) { break; } for (int i = 0; i < pts.Count; i++) { if (pts[i].Typ == ptmin) { res.BeginChar = pts[i].BeginChar; res.EndChar = pts[i].EndChar; tt = pts[i].EndToken; if (tt.Next != null && tt.Next.IsHiphen) { if (tt.Next.Next is Pullenti.Ner.NumberToken) { res.EndChar = tt.Next.Next.EndChar; } else if (tt.Next.Next != null && (tt.Next.Next.GetReferent() is Pullenti.Ner.Decree.DecreePartReferent) && hasDiap) { res.EndChar = (tt.Next.Next as Pullenti.Ner.MetaToken).BeginToken.EndChar; } } return(res); } } } } } return(res); }
public static MeasureToken TryParse(Pullenti.Ner.Token t, Pullenti.Ner.Core.TerminCollection addUnits, bool canBeSet = true, bool canUnitsAbsent = false, bool isResctriction = false, bool isSubval = false) { if (!(t is Pullenti.Ner.TextToken)) { return(null); } if (t.IsTableControlChar) { return(null); } Pullenti.Ner.Token t0 = t; Pullenti.Ner.MetaToken whd = null; int minmax = 0; Pullenti.Ner.Token tt = NumbersWithUnitToken._isMinOrMax(t0, ref minmax); if (tt != null) { t = tt.Next; } Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition | Pullenti.Ner.Core.NounPhraseParseAttr.IgnoreBrackets, 0, null); if (npt == null) { whd = NumbersWithUnitToken._tryParseWHL(t); if (whd != null) { npt = new Pullenti.Ner.Core.NounPhraseToken(t0, whd.EndToken); } else if (t0.IsValue("КПД", null)) { npt = new Pullenti.Ner.Core.NounPhraseToken(t0, t0); } else if ((t0 is Pullenti.Ner.TextToken) && t0.LengthChar > 3 && t0.GetMorphClassInDictionary().IsUndefined) { npt = new Pullenti.Ner.Core.NounPhraseToken(t0, t0); } else if (t0.IsValue("T", null) && t0.Chars.IsAllLower) { npt = new Pullenti.Ner.Core.NounPhraseToken(t0, t0); t = t0; if (t.Next != null && t.Next.IsChar('=')) { npt.EndToken = t.Next; } } else if ((t0 is Pullenti.Ner.TextToken) && t0.Chars.IsLetter && isSubval) { if (NumbersWithUnitToken.TryParse(t, addUnits, false, false, false, false) != null) { return(null); } npt = new Pullenti.Ner.Core.NounPhraseToken(t0, t0); for (t = t0.Next; t != null; t = t.Next) { if (t.WhitespacesBeforeCount > 2) { break; } else if (!(t is Pullenti.Ner.TextToken)) { break; } else if (!t.Chars.IsLetter) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { npt.EndToken = (t = br.EndToken); } else { break; } } else if (NumbersWithUnitToken.TryParse(t, addUnits, false, false, false, false) != null) { break; } else { npt.EndToken = t; } } } else { return(null); } } else if (Pullenti.Ner.Core.NumberHelper.TryParseRealNumber(t, true, false) != null) { return(null); } else { Pullenti.Ner.Date.Internal.DateItemToken dtok = Pullenti.Ner.Date.Internal.DateItemToken.TryAttach(t, null, false); if (dtok != null) { return(null); } } Pullenti.Ner.Token t1 = npt.EndToken; t = npt.EndToken; Pullenti.Ner.MetaToken name = new Pullenti.Ner.MetaToken(npt.BeginToken, npt.EndToken) { Morph = npt.Morph }; List <UnitToken> units = null; List <UnitToken> units2 = null; List <MeasureToken> internals = new List <MeasureToken>(); bool not = false; for (tt = t1.Next; tt != null; tt = tt.Next) { if (tt.IsNewlineBefore) { break; } if (tt.IsTableControlChar) { break; } Pullenti.Ner.Token tt2 = NumbersWithUnitToken._isMinOrMax(tt, ref minmax); if (tt2 != null) { t1 = (t = (tt = tt2)); continue; } if ((tt.IsValue("БЫТЬ", null) || tt.IsValue("ДОЛЖЕН", null) || tt.IsValue("ДОЛЖНЫЙ", null)) || tt.IsValue("МОЖЕТ", null) || ((tt.IsValue("СОСТАВЛЯТЬ", null) && !tt.GetMorphClassInDictionary().IsAdjective))) { t1 = (t = tt); if (tt.Previous.IsValue("НЕ", null)) { not = true; } continue; } Pullenti.Ner.MetaToken www = NumbersWithUnitToken._tryParseWHL(tt); if (www != null) { whd = www; t1 = (t = (tt = www.EndToken)); continue; } if (tt.IsValue("ПРИ", null)) { MeasureToken mt1 = TryParse(tt.Next, addUnits, false, false, true, false); if (mt1 != null) { internals.Add(mt1); t1 = (t = (tt = mt1.EndToken)); continue; } NumbersWithUnitToken n1 = NumbersWithUnitToken.TryParse(tt.Next, addUnits, false, false, false, false); if (n1 != null && n1.Units.Count > 0) { mt1 = new MeasureToken(n1.BeginToken, n1.EndToken) { Nums = n1 }; internals.Add(mt1); t1 = (t = (tt = mt1.EndToken)); continue; } } if (tt.IsValue("ПО", null) && tt.Next != null && tt.Next.IsValue("U", null)) { t1 = (t = (tt = tt.Next)); continue; } if (internals.Count > 0) { if (tt.IsChar(':')) { break; } MeasureToken mt1 = TryParse(tt.Next, addUnits, false, false, true, false); if (mt1 != null && mt1.Reliable) { internals.Add(mt1); t1 = (t = (tt = mt1.EndToken)); continue; } } if ((tt is Pullenti.Ner.NumberToken) && (tt as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Words) { Pullenti.Ner.Core.NounPhraseToken npt3 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.ParseNumericAsAdjective, 0, null); if (npt3 != null) { t1 = (tt = npt3.EndToken); if (internals.Count == 0) { name.EndToken = t1; } continue; } } if (((tt.IsHiphen && !tt.IsWhitespaceBefore && !tt.IsWhitespaceAfter) && (tt.Next is Pullenti.Ner.NumberToken) && (tt.Previous is Pullenti.Ner.TextToken)) && tt.Previous.Chars.IsAllUpper) { t1 = (tt = (t = tt.Next)); if (internals.Count == 0) { name.EndToken = t1; } continue; } if (((tt is Pullenti.Ner.NumberToken) && !tt.IsWhitespaceBefore && (tt.Previous is Pullenti.Ner.TextToken)) && tt.Previous.Chars.IsAllUpper) { t1 = (t = tt); if (internals.Count == 0) { name.EndToken = t1; } continue; } if ((((tt is Pullenti.Ner.NumberToken) && !tt.IsWhitespaceAfter && tt.Next.IsHiphen) && !tt.Next.IsWhitespaceAfter && (tt.Next.Next is Pullenti.Ner.TextToken)) && tt.Next.Next.LengthChar > 2) { t1 = (t = (tt = tt.Next.Next)); Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt1 != null && npt1.EndChar > tt.EndChar) { t1 = (t = (tt = npt1.EndToken)); } if (internals.Count == 0) { name.EndToken = t1; } continue; } if ((tt is Pullenti.Ner.NumberToken) && tt.Previous != null) { if (tt.Previous.IsValue("USB", null)) { t1 = (t = tt); if (internals.Count == 0) { name.EndToken = t1; } for (Pullenti.Ner.Token ttt = tt.Next; ttt != null; ttt = ttt.Next) { if (ttt.IsWhitespaceBefore) { break; } if (ttt.IsCharOf(",:")) { break; } t1 = (t = (tt = ttt)); if (internals.Count == 0) { name.EndToken = t1; } } continue; } } NumbersWithUnitToken mt0 = NumbersWithUnitToken.TryParse(tt, addUnits, false, false, false, false); if (mt0 != null) { Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.ParseNumericAsAdjective | Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition, 0, null); if (npt1 != null && npt1.EndChar > mt0.EndChar) { t1 = (t = (tt = npt1.EndToken)); if (internals.Count == 0) { name.EndToken = t1; } continue; } break; } if (((tt.IsComma || tt.IsChar('('))) && tt.Next != null) { www = NumbersWithUnitToken._tryParseWHL(tt.Next); if (www != null) { whd = www; t1 = (t = (tt = www.EndToken)); if (tt.Next != null && tt.Next.IsComma) { t1 = (tt = tt.Next); } if (tt.Next != null && tt.Next.IsChar(')')) { t1 = (tt = tt.Next); continue; } } List <UnitToken> uu = UnitToken.TryParseList(tt.Next, addUnits, false); if (uu != null) { t1 = (t = uu[uu.Count - 1].EndToken); units = uu; if (tt.IsChar('(') && t1.Next != null && t1.Next.IsChar(')')) { t1 = (t = (tt = t1.Next)); continue; } else if (t1.Next != null && t1.Next.IsChar('(')) { uu = UnitToken.TryParseList(t1.Next.Next, addUnits, false); if (uu != null && uu[uu.Count - 1].EndToken.Next != null && uu[uu.Count - 1].EndToken.Next.IsChar(')')) { units2 = uu; t1 = (t = (tt = uu[uu.Count - 1].EndToken.Next)); continue; } www = NumbersWithUnitToken._tryParseWHL(t1.Next); if (www != null) { whd = www; t1 = (t = (tt = www.EndToken)); continue; } } if (uu != null && uu.Count > 0 && !uu[0].IsDoubt) { break; } if (t1.Next != null) { if (t1.Next.IsTableControlChar || t1.IsNewlineAfter) { break; } } units = null; } } if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(tt, false, false) && !(tt.Next is Pullenti.Ner.NumberToken)) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(tt, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { t1 = (t = (tt = br.EndToken)); continue; } } if (tt.IsValue("НЕ", null) && tt.Next != null) { Pullenti.Morph.MorphClass mc = tt.Next.GetMorphClassInDictionary(); if (mc.IsAdverb || mc.IsMisc) { break; } continue; } if (tt.IsValue("ЯМЗ", null)) { } Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition | Pullenti.Ner.Core.NounPhraseParseAttr.IgnoreBrackets | Pullenti.Ner.Core.NounPhraseParseAttr.ParsePronouns, 0, null); if (npt2 == null) { if (tt.Morph.Class.IsPreposition || tt.Morph.Class.IsConjunction) { Pullenti.Ner.Core.TerminToken to = NumbersWithUnitToken.m_Termins.TryParse(tt, Pullenti.Ner.Core.TerminParseAttr.No); if (to != null) { if ((to.EndToken.Next is Pullenti.Ner.TextToken) && to.EndToken.Next.IsLetters) { } else { break; } } t1 = tt; continue; } Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary(); if (((tt is Pullenti.Ner.TextToken) && tt.Chars.IsLetter && tt.LengthChar > 1) && (((tt.Chars.IsAllUpper || mc.IsAdverb || mc.IsUndefined) || mc.IsAdjective))) { List <UnitToken> uu = UnitToken.TryParseList(tt, addUnits, false); if (uu != null) { if (uu[0].LengthChar > 1 || uu.Count > 1) { units = uu; t1 = (t = uu[uu.Count - 1].EndToken); break; } } t1 = (t = tt); if (internals.Count == 0) { name.EndToken = tt; } continue; } if (tt.IsComma) { continue; } if (tt.IsChar('.')) { if (!Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(tt.Next)) { continue; } List <UnitToken> uu = UnitToken.TryParseList(tt.Next, addUnits, false); if (uu != null) { if (uu[0].LengthChar > 2 || uu.Count > 1) { units = uu; t1 = (t = uu[uu.Count - 1].EndToken); break; } } } break; } t1 = (t = (tt = npt2.EndToken)); if (internals.Count > 0) { } else if (t.IsValue("ПРЕДЕЛ", null) || t.IsValue("ГРАНИЦА", null) || t.IsValue("ДИАПАЗОН", null)) { } else if (t.Chars.IsLetter) { name.EndToken = t1; } } Pullenti.Ner.Token t11 = t1; for (t1 = t1.Next; t1 != null; t1 = t1.Next) { if (t1.IsTableControlChar) { } else if (t1.IsCharOf(":,_")) { if (isResctriction) { return(null); } Pullenti.Ner.MetaToken www = NumbersWithUnitToken._tryParseWHL(t1.Next); if (www != null) { whd = www; t1 = (t = www.EndToken); continue; } List <UnitToken> uu = UnitToken.TryParseList(t1.Next, addUnits, false); if (uu != null) { if (uu[0].LengthChar > 1 || uu.Count > 1) { units = uu; t1 = (t = uu[uu.Count - 1].EndToken); continue; } } if (t1.IsChar(':')) { List <MeasureToken> li = new List <MeasureToken>(); for (Pullenti.Ner.Token ttt = t1.Next; ttt != null; ttt = ttt.Next) { if (ttt.IsHiphen || ttt.IsTableControlChar) { continue; } if ((ttt is Pullenti.Ner.TextToken) && !ttt.Chars.IsLetter) { continue; } MeasureToken mt1 = TryParse(ttt, addUnits, true, true, false, true); if (mt1 == null) { break; } li.Add(mt1); ttt = mt1.EndToken; if (ttt.Next != null && ttt.Next.IsChar(';')) { ttt = ttt.Next; } if (ttt.IsChar(';')) { } else if (ttt.IsNewlineAfter && mt1.IsNewlineBefore) { } else { break; } } if (li.Count > 1) { MeasureToken res0 = new MeasureToken(t0, li[li.Count - 1].EndToken) { Internals = li, IsEmpty = true }; if (internals != null && internals.Count > 0) { res0.InternalEx = internals[0]; } string nam = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(name, Pullenti.Ner.Core.GetTextAttr.FirstNounGroupToNominative); li[0].BeginToken = t0; foreach (MeasureToken v in li) { v.Name = string.Format("{0} ({1})", nam, v.Name ?? "").Trim(); if (v.Nums != null && v.Nums.Units.Count == 0 && units != null) { v.Nums.Units = units; } } return(res0); } } } else if (t1.IsHiphen && t1.IsWhitespaceAfter && t1.IsWhitespaceBefore) { } else if (t1.IsHiphen && t1.Next != null && t1.Next.IsChar('(')) { } else { break; } } if (t1 == null) { return(null); } List <NumbersWithUnitToken> mts = NumbersWithUnitToken.TryParseMulti(t1, addUnits, false, not, true, isResctriction); if (mts == null) { if (units != null && units.Count > 0) { if (t1 == null || t1.Previous.IsChar(':')) { mts = new List <NumbersWithUnitToken>(); if (t1 == null) { for (t1 = t11; t1 != null && t1.Next != null; t1 = t1.Next) { } } else { t1 = t1.Previous; } mts.Add(new NumbersWithUnitToken(t0, t1) { SingleVal = double.NaN }); } } if (mts == null) { return(null); } } NumbersWithUnitToken mt = mts[0]; if (mt.BeginToken == mt.EndToken && !(mt.BeginToken is Pullenti.Ner.NumberToken)) { return(null); } if (!isSubval && name.BeginToken.Morph.Class.IsPreposition) { name.BeginToken = name.BeginToken.Next; } if (mt.WHL != null) { whd = mt.WHL; } for (int kk = 0; kk < 10; kk++) { if (whd != null && whd.EndToken == name.EndToken) { name.EndToken = whd.BeginToken.Previous; continue; } if (units != null) { if (units[units.Count - 1].EndToken == name.EndToken) { name.EndToken = units[0].BeginToken.Previous; continue; } } break; } if (mts.Count > 1 && internals.Count == 0) { if (mt.Units.Count == 0) { if (units != null) { foreach (NumbersWithUnitToken m in mts) { m.Units = units; } } } MeasureToken res1 = new MeasureToken(t0, mts[mts.Count - 1].EndToken) { Morph = name.Morph, Reliable = true }; res1.Name = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(name, Pullenti.Ner.Core.GetTextAttr.FirstNounGroupToNominative); for (int k = 0; k < mts.Count; k++) { MeasureToken ttt = new MeasureToken(mts[k].BeginToken, mts[k].EndToken) { Nums = mts[k] }; if (whd != null) { List <string> nams = whd.Tag as List <string>; if (k < nams.Count) { ttt.Name = nams[k]; } } res1.Internals.Add(ttt); } Pullenti.Ner.Token tt1 = res1.EndToken.Next; if (tt1 != null && tt1.IsChar('±')) { NumbersWithUnitToken nn = NumbersWithUnitToken._tryParse(tt1, addUnits, true, false, false); if (nn != null && nn.PlusMinusPercent) { res1.EndToken = nn.EndToken; res1.Nums = nn; if (nn.Units.Count > 0 && units == null && mt.Units.Count == 0) { foreach (NumbersWithUnitToken m in mts) { m.Units = nn.Units; } } } } return(res1); } if (!mt.IsWhitespaceBefore) { if (mt.BeginToken.Previous == null) { return(null); } if (mt.BeginToken.Previous.IsCharOf(":),") || mt.BeginToken.Previous.IsTableControlChar || mt.BeginToken.Previous.IsValue("IP", null)) { } else if (mt.BeginToken.IsHiphen && mt.Units.Count > 0 && !mt.Units[0].IsDoubt) { } else { return(null); } } if (mt.Units.Count == 0 && units != null) { mt.Units = units; if (mt.DivNum != null && units.Count > 1 && mt.DivNum.Units.Count == 0) { for (int i = 1; i < units.Count; i++) { if (units[i].Pow == -1) { for (int j = i; j < units.Count; j++) { mt.DivNum.Units.Add(units[j]); units[j].Pow = -units[j].Pow; } mt.Units.RemoveRange(i, units.Count - i); break; } } } } if ((minmax < 0) && mt.SingleVal != null) { mt.FromVal = mt.SingleVal; mt.FromInclude = true; mt.SingleVal = null; } if (minmax > 0 && mt.SingleVal != null) { mt.ToVal = mt.SingleVal; mt.ToInclude = true; mt.SingleVal = null; } if (mt.Units.Count == 0) { units = UnitToken.TryParseList(mt.EndToken.Next, addUnits, true); if (units == null) { if (canUnitsAbsent) { } else { return(null); } } else { mt.Units = units; } } MeasureToken res = new MeasureToken(t0, mt.EndToken) { Morph = name.Morph, Internals = internals }; if (((!t0.IsWhitespaceBefore && t0.Previous != null && t0 == name.BeginToken) && t0.Previous.IsHiphen && !t0.Previous.IsWhitespaceBefore) && (t0.Previous.Previous is Pullenti.Ner.TextToken)) { name.BeginToken = (res.BeginToken = name.BeginToken.Previous.Previous); } res.Name = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(name, (!isSubval ? Pullenti.Ner.Core.GetTextAttr.FirstNounGroupToNominative : Pullenti.Ner.Core.GetTextAttr.No)); res.Nums = mt; foreach (UnitToken u in res.Nums.Units) { if (u.Keyword != null) { if (u.Keyword.BeginChar >= res.BeginChar) { res.Reliable = true; } } } res._parseInternals(addUnits); if (res.Internals.Count > 0 || !canBeSet) { return(res); } t1 = res.EndToken.Next; if (t1 != null && t1.IsCommaAnd) { t1 = t1.Next; } List <NumbersWithUnitToken> mts1 = NumbersWithUnitToken.TryParseMulti(t1, addUnits, false, false, false, false); if ((mts1 != null && mts1.Count == 1 && (t1.WhitespacesBeforeCount < 3)) && mts1[0].Units.Count > 0 && !UnitToken.CanBeEquals(mts[0].Units, mts1[0].Units)) { res.IsSet = true; res.Nums = null; res.Internals.Add(new MeasureToken(mt.BeginToken, mt.EndToken) { Nums = mt }); res.Internals.Add(new MeasureToken(mts1[0].BeginToken, mts1[0].EndToken) { Nums = mts1[0] }); res.EndToken = mts1[0].EndToken; } return(res); }
public static Pullenti.Ner.Core.NumberExToken TryParseNumberWithPostfix(Pullenti.Ner.Token t) { if (t == null) { return(null); } Pullenti.Ner.Token t0 = t; string isDollar = null; if (t.LengthChar == 1 && t.Next != null) { if ((((isDollar = Pullenti.Ner.Core.NumberHelper.IsMoneyChar(t)))) != null) { t = t.Next; } } Pullenti.Ner.NumberToken nt = t as Pullenti.Ner.NumberToken; if (nt == null) { if ((!(t.Previous is Pullenti.Ner.NumberToken) && t.IsChar('(') && (t.Next is Pullenti.Ner.NumberToken)) && t.Next.Next != null && t.Next.Next.IsChar(')')) { Pullenti.Ner.Core.TerminToken toks1 = m_Postfixes.TryParse(t.Next.Next.Next, Pullenti.Ner.Core.TerminParseAttr.No); if (toks1 != null && ((Pullenti.Ner.Core.NumberExType)toks1.Termin.Tag) == Pullenti.Ner.Core.NumberExType.Money) { Pullenti.Ner.NumberToken nt0 = t.Next as Pullenti.Ner.NumberToken; Pullenti.Ner.Core.NumberExToken res = new Pullenti.Ner.Core.NumberExToken(t, toks1.EndToken, nt0.Value, nt0.Typ, Pullenti.Ner.Core.NumberExType.Money) { AltRealValue = nt0.RealValue, Morph = toks1.BeginToken.Morph }; return(_correctMoney(res, toks1.BeginToken)); } } Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; if (tt == null || !tt.Morph.Class.IsAdjective) { return(null); } string val = tt.Term; for (int i = 4; i < (val.Length - 5); i++) { string v = val.Substring(0, i); List <Pullenti.Ner.Core.Termin> li = Pullenti.Ner.Core.NumberHelper.m_Nums.FindTerminsByString(v, tt.Morph.Language); if (li == null) { continue; } string vv = val.Substring(i); List <Pullenti.Ner.Core.Termin> lii = m_Postfixes.FindTerminsByString(vv, tt.Morph.Language); if (lii != null && lii.Count > 0) { Pullenti.Ner.Core.NumberExToken re = new Pullenti.Ner.Core.NumberExToken(t, t, ((int)li[0].Tag).ToString(), Pullenti.Ner.NumberSpellingType.Words, (Pullenti.Ner.Core.NumberExType)lii[0].Tag) { Morph = t.Morph }; _correctExtTypes(re); return(re); } break; } return(null); } if (t.Next == null && isDollar == null) { return(null); } double f = nt.RealValue; if (double.IsNaN(f)) { return(null); } Pullenti.Ner.Token t1 = nt.Next; if (((t1 != null && t1.IsCharOf(",."))) || (((t1 is Pullenti.Ner.NumberToken) && (t1.WhitespacesBeforeCount < 3)))) { double d; Pullenti.Ner.NumberToken tt11 = Pullenti.Ner.Core.NumberHelper.TryParseRealNumber(nt, false, false); if (tt11 != null) { t1 = tt11.EndToken.Next; f = tt11.RealValue; } } if (t1 == null) { if (isDollar == null) { return(null); } } else if ((t1.Next != null && t1.Next.IsValue("С", "З") && t1.Next.Next != null) && t1.Next.Next.IsValue("ПОЛОВИНА", null)) { f += 0.5; t1 = t1.Next.Next; } if (t1 != null && t1.IsHiphen && t1.Next != null) { t1 = t1.Next; } bool det = false; double altf = f; if (((t1 is Pullenti.Ner.NumberToken) && t1.Previous != null && t1.Previous.IsHiphen) && (t1 as Pullenti.Ner.NumberToken).IntValue == 0 && t1.LengthChar == 2) { t1 = t1.Next; } if ((t1 != null && t1.Next != null && t1.IsChar('(')) && (((t1.Next is Pullenti.Ner.NumberToken) || t1.Next.IsValue("НОЛЬ", null))) && t1.Next.Next != null) { Pullenti.Ner.NumberToken nt1 = t1.Next as Pullenti.Ner.NumberToken; double val = (double)0; if (nt1 != null) { val = nt1.RealValue; } if (Math.Floor(f) == Math.Floor(val)) { Pullenti.Ner.Token ttt = t1.Next.Next; if (ttt.IsChar(')')) { t1 = ttt.Next; det = true; if ((t1 is Pullenti.Ner.NumberToken) && (t1 as Pullenti.Ner.NumberToken).IntValue != null && (t1 as Pullenti.Ner.NumberToken).IntValue.Value == 0) { t1 = t1.Next; } } else if (((((ttt is Pullenti.Ner.NumberToken) && ((ttt as Pullenti.Ner.NumberToken).RealValue < 100) && ttt.Next != null) && ttt.Next.IsChar('/') && ttt.Next.Next != null) && ttt.Next.Next.GetSourceText() == "100" && ttt.Next.Next.Next != null) && ttt.Next.Next.Next.IsChar(')')) { int rest = GetDecimalRest100(f); if ((ttt as Pullenti.Ner.NumberToken).IntValue != null && rest == (ttt as Pullenti.Ner.NumberToken).IntValue.Value) { t1 = ttt.Next.Next.Next.Next; det = true; } } else if ((ttt.IsValue("ЦЕЛЫХ", null) && (ttt.Next is Pullenti.Ner.NumberToken) && ttt.Next.Next != null) && ttt.Next.Next.Next != null && ttt.Next.Next.Next.IsChar(')')) { Pullenti.Ner.NumberToken num2 = ttt.Next as Pullenti.Ner.NumberToken; altf = num2.RealValue; if (ttt.Next.Next.IsValue("ДЕСЯТЫЙ", null)) { altf /= 10; } else if (ttt.Next.Next.IsValue("СОТЫЙ", null)) { altf /= 100; } else if (ttt.Next.Next.IsValue("ТЫСЯЧНЫЙ", null)) { altf /= 1000; } else if (ttt.Next.Next.IsValue("ДЕСЯТИТЫСЯЧНЫЙ", null)) { altf /= 10000; } else if (ttt.Next.Next.IsValue("СТОТЫСЯЧНЫЙ", null)) { altf /= 100000; } else if (ttt.Next.Next.IsValue("МИЛЛИОННЫЙ", null)) { altf /= 1000000; } if (altf < 1) { altf += val; t1 = ttt.Next.Next.Next.Next; det = true; } } else { Pullenti.Ner.Core.TerminToken toks1 = m_Postfixes.TryParse(ttt, Pullenti.Ner.Core.TerminParseAttr.No); if (toks1 != null) { if (((Pullenti.Ner.Core.NumberExType)toks1.Termin.Tag) == Pullenti.Ner.Core.NumberExType.Money) { if (toks1.EndToken.Next != null && toks1.EndToken.Next.IsChar(')')) { Pullenti.Ner.Core.NumberExToken res = new Pullenti.Ner.Core.NumberExToken(t, toks1.EndToken.Next, nt.Value, nt.Typ, Pullenti.Ner.Core.NumberExType.Money) { RealValue = f, AltRealValue = altf, Morph = toks1.BeginToken.Morph }; return(_correctMoney(res, toks1.BeginToken)); } } } Pullenti.Ner.Core.NumberExToken res2 = TryParseNumberWithPostfix(t1.Next); if (res2 != null && res2.EndToken.Next != null && res2.EndToken.Next.IsChar(')')) { res2.BeginToken = t; res2.EndToken = res2.EndToken.Next; res2.AltRealValue = res2.RealValue; res2.RealValue = f; _correctExtTypes(res2); if (res2.WhitespacesAfterCount < 2) { Pullenti.Ner.Core.TerminToken toks2 = m_Postfixes.TryParse(res2.EndToken.Next, Pullenti.Ner.Core.TerminParseAttr.No); if (toks2 != null) { if (((Pullenti.Ner.Core.NumberExType)toks2.Termin.Tag) == Pullenti.Ner.Core.NumberExType.Money) { res2.EndToken = toks2.EndToken; } } } return(res2); } } } else if (nt1 != null && nt1.Typ == Pullenti.Ner.NumberSpellingType.Words && nt.Typ == Pullenti.Ner.NumberSpellingType.Digit) { altf = nt1.RealValue; Pullenti.Ner.Token ttt = t1.Next.Next; if (ttt.IsChar(')')) { t1 = ttt.Next; det = true; } if (!det) { altf = f; } } } if ((t1 != null && t1.IsChar('(') && t1.Next != null) && t1.Next.IsValue("СУММА", null)) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t1, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { t1 = br.EndToken.Next; } } if (isDollar != null) { Pullenti.Ner.Token te = null; if (t1 != null) { te = t1.Previous; } else { for (t1 = t0; t1 != null; t1 = t1.Next) { if (t1.Next == null) { te = t1; } } } if (te == null) { return(null); } if (te.IsHiphen && te.Next != null) { if (te.Next.IsValue("МИЛЛИОННЫЙ", null)) { f *= 1000000; altf *= 1000000; te = te.Next; } else if (te.Next.IsValue("МИЛЛИАРДНЫЙ", null)) { f *= 1000000000; altf *= 1000000000; te = te.Next; } } if (!te.IsWhitespaceAfter && (te.Next is Pullenti.Ner.TextToken)) { if (te.Next.IsValue("M", null)) { f *= 1000000; altf *= 1000000; te = te.Next; } else if (te.Next.IsValue("BN", null)) { f *= 1000000000; altf *= 1000000000; te = te.Next; } } return(new Pullenti.Ner.Core.NumberExToken(t0, te, "", nt.Typ, Pullenti.Ner.Core.NumberExType.Money) { RealValue = f, AltRealValue = altf, ExTypParam = isDollar }); } if (t1 == null || ((t1.IsNewlineBefore && !det))) { return(null); } Pullenti.Ner.Core.TerminToken toks = m_Postfixes.TryParse(t1, Pullenti.Ner.Core.TerminParseAttr.No); if ((toks == null && det && (t1 is Pullenti.Ner.NumberToken)) && (t1 as Pullenti.Ner.NumberToken).Value == "0") { toks = m_Postfixes.TryParse(t1.Next, Pullenti.Ner.Core.TerminParseAttr.No); } if (toks == null && t1.IsChar('р')) { int cou = 10; for (Pullenti.Ner.Token ttt = t0.Previous; ttt != null && cou > 0; ttt = ttt.Previous, cou--) { if (ttt.IsValue("СУММА", null) || ttt.IsValue("НАЛИЧНЫЙ", null) || ttt.IsValue("БАЛАНС", null)) { } else if (ttt.GetReferent() != null && ttt.GetReferent().TypeName == "MONEY") { } else { continue; } toks = new Pullenti.Ner.Core.TerminToken(t1, t1) { Termin = m_Postfixes.FindTerminsByCanonicText("RUB")[0] }; if (t1.Next != null && t1.Next.IsChar('.')) { toks.EndToken = t1.Next; } Pullenti.Ner.Core.NumberExType ty = (Pullenti.Ner.Core.NumberExType)toks.Termin.Tag; return(new Pullenti.Ner.Core.NumberExToken(t, toks.EndToken, nt.Value, nt.Typ, ty) { RealValue = f, AltRealValue = altf, Morph = toks.BeginToken.Morph, ExTypParam = "RUB" }); } } if (toks != null) { t1 = toks.EndToken; if (!t1.IsChar('.') && t1.Next != null && t1.Next.IsChar('.')) { if ((t1 is Pullenti.Ner.TextToken) && t1.IsValue(toks.Termin.Terms[0].CanonicalText, null)) { } else if (!t1.Chars.IsLetter) { } else { t1 = t1.Next; } } if (toks.Termin.CanonicText == "LTL") { return(null); } if (toks.BeginToken == t1) { if (t1.Morph.Class.IsPreposition || t1.Morph.Class.IsConjunction) { if (t1.IsWhitespaceBefore && t1.IsWhitespaceAfter) { return(null); } } } Pullenti.Ner.Core.NumberExType ty = (Pullenti.Ner.Core.NumberExType)toks.Termin.Tag; Pullenti.Ner.Core.NumberExToken res = new Pullenti.Ner.Core.NumberExToken(t, t1, nt.Value, nt.Typ, ty) { RealValue = f, AltRealValue = altf, Morph = toks.BeginToken.Morph }; if (ty != Pullenti.Ner.Core.NumberExType.Money) { _correctExtTypes(res); return(res); } return(_correctMoney(res, toks.BeginToken)); } Pullenti.Ner.Core.NumberExToken pfx = _attachSpecPostfix(t1); if (pfx != null) { pfx.BeginToken = t; pfx.Value = nt.Value; pfx.Typ = nt.Typ; pfx.RealValue = f; pfx.AltRealValue = altf; return(pfx); } if (t1.Next != null && ((t1.Morph.Class.IsPreposition || t1.Morph.Class.IsConjunction))) { if (t1.IsValue("НА", null)) { } else { Pullenti.Ner.Core.NumberExToken nn = TryParseNumberWithPostfix(t1.Next); if (nn != null) { return new Pullenti.Ner.Core.NumberExToken(t, t, nt.Value, nt.Typ, nn.ExTyp) { RealValue = f, AltRealValue = altf, ExTyp2 = nn.ExTyp2, ExTypParam = nn.ExTypParam } } ; } } if (!t1.IsWhitespaceAfter && (t1.Next is Pullenti.Ner.NumberToken) && (t1 is Pullenti.Ner.TextToken)) { string term = (t1 as Pullenti.Ner.TextToken).Term; Pullenti.Ner.Core.NumberExType ty = Pullenti.Ner.Core.NumberExType.Undefined; if (term == "СМХ" || term == "CMX") { ty = Pullenti.Ner.Core.NumberExType.Santimeter; } else if (term == "MX" || term == "МХ") { ty = Pullenti.Ner.Core.NumberExType.Meter; } else if (term == "MMX" || term == "ММХ") { ty = Pullenti.Ner.Core.NumberExType.Millimeter; } if (ty != Pullenti.Ner.Core.NumberExType.Undefined) { return new Pullenti.Ner.Core.NumberExToken(t, t1, nt.Value, nt.Typ, ty) { RealValue = f, AltRealValue = altf, MultAfter = true } } ; } return(null); }
public static PhoneItemToken TryAttachAlternate(Pullenti.Ner.Token t0, Pullenti.Ner.Phone.PhoneReferent ph0, List <PhoneItemToken> pli) { if (t0 == null) { return(null); } if (t0.IsCharOf("\\/") && (t0.Next is Pullenti.Ner.NumberToken) && (t0.Next.EndChar - t0.Next.BeginChar) <= 1) { List <PhoneItemToken> pli1 = PhoneItemToken.TryAttachAll(t0.Next, 15); if (pli1 != null && pli1.Count > 1) { if (pli1[pli1.Count - 1].ItemType == PhoneItemType.Delim) { pli1.RemoveAt(pli1.Count - 1); } if (pli1.Count <= pli.Count) { int ii; string num = ""; for (ii = 0; ii < pli1.Count; ii++) { PhoneItemToken p1 = pli1[ii]; PhoneItemToken p0 = pli[(pli.Count - pli1.Count) + ii]; if (p1.ItemType != p0.ItemType) { break; } if (p1.ItemType != PhoneItemType.Number && p1.ItemType != PhoneItemType.Delim) { break; } if (p1.ItemType == PhoneItemType.Number) { if (p1.LengthChar != p0.LengthChar) { break; } num += p1.Value; } } if (ii >= pli1.Count) { return new PhoneItemToken(t0, pli1[pli1.Count - 1].EndToken) { ItemType = PhoneItemType.Alt, Value = num } } ; } } return(new PhoneItemToken(t0, t0.Next) { ItemType = PhoneItemType.Alt, Value = t0.Next.GetSourceText() }); } if (t0.IsHiphen && (t0.Next is Pullenti.Ner.NumberToken) && (t0.Next.EndChar - t0.Next.BeginChar) <= 1) { Pullenti.Ner.Token t1 = t0.Next.Next; bool ok = false; if (t1 == null) { ok = true; } else if (t1.IsNewlineBefore || t1.IsCharOf(",.")) { ok = true; } if (ok) { return new PhoneItemToken(t0, t0.Next) { ItemType = PhoneItemType.Alt, Value = t0.Next.GetSourceText() } } ; } if ((t0.IsChar('(') && (t0.Next is Pullenti.Ner.NumberToken) && (t0.Next.EndChar - t0.Next.BeginChar) == 1) && t0.Next.Next != null && t0.Next.Next.IsChar(')')) { return new PhoneItemToken(t0, t0.Next.Next) { ItemType = PhoneItemType.Alt, Value = t0.Next.GetSourceText() } } ; if ((t0.IsCharOf("/-") && (t0.Next is Pullenti.Ner.NumberToken) && ph0.m_Template != null) && Pullenti.Morph.LanguageHelper.EndsWith(ph0.m_Template, (((t0.Next.EndChar - t0.Next.BeginChar) + 1)).ToString())) { return new PhoneItemToken(t0, t0.Next) { ItemType = PhoneItemType.Alt, Value = t0.Next.GetSourceText() } } ; return(null); }
static Pullenti.Ner.ReferentToken TryParseThesis(Pullenti.Ner.Token t) { if (t == null) { return(null); } Pullenti.Ner.Token t0 = t; Pullenti.Ner.Token tt = t; Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary(); Pullenti.Ner.MetaToken preamb = null; if (mc.IsConjunction) { return(null); } if (t.IsValue("LET", null)) { return(null); } if (mc.IsPreposition || mc.IsMisc || mc.IsAdverb) { if (!Pullenti.Ner.Core.MiscHelper.IsEngArticle(tt)) { for (tt = tt.Next; tt != null; tt = tt.Next) { if (tt.IsComma) { break; } if (tt.IsChar('(')) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(tt, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { tt = br.EndToken; continue; } } if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(tt)) { break; } Pullenti.Ner.Core.NounPhraseToken npt0 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.ParseNumericAsAdjective | Pullenti.Ner.Core.NounPhraseParseAttr.ReferentCanBeNoun, 0, null); if (npt0 != null) { tt = npt0.EndToken; continue; } if (tt.GetMorphClassInDictionary().IsVerb) { break; } } if (tt == null || !tt.IsComma || tt.Next == null) { return(null); } preamb = new Pullenti.Ner.MetaToken(t0, tt.Previous); tt = tt.Next; } } Pullenti.Ner.Token t1 = tt; mc = tt.GetMorphClassInDictionary(); Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.ParseNumericAsAdjective | Pullenti.Ner.Core.NounPhraseParseAttr.ReferentCanBeNoun | Pullenti.Ner.Core.NounPhraseParseAttr.ParseAdverbs, 0, null); if (npt == null && (tt is Pullenti.Ner.TextToken)) { if (tt.Chars.IsAllUpper) { npt = new Pullenti.Ner.Core.NounPhraseToken(tt, tt); } else if (!tt.Chars.IsAllLower) { if (mc.IsProper || preamb != null) { npt = new Pullenti.Ner.Core.NounPhraseToken(tt, tt); } } } if (npt == null) { return(null); } if (mc.IsPersonalPronoun) { return(null); } Pullenti.Ner.Token t2 = npt.EndToken.Next; if (t2 == null || Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t2) || !(t2 is Pullenti.Ner.TextToken)) { return(null); } if (!t2.GetMorphClassInDictionary().IsVerb) { return(null); } Pullenti.Ner.Token t3 = t2; for (tt = t2.Next; tt != null; tt = tt.Next) { if (!tt.GetMorphClassInDictionary().IsVerb) { break; } } for (; tt != null; tt = tt.Next) { if (tt.Next == null) { t3 = tt; break; } if (tt.IsCharOf(".;!?")) { if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(tt.Next)) { t3 = tt; break; } } if (!(tt is Pullenti.Ner.TextToken)) { continue; } if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(tt, false, false)) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(tt, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { tt = br.EndToken; continue; } } } tt = t3; if (t3.IsCharOf(";.!?")) { tt = tt.Previous; } string txt = Pullenti.Ner.Core.MiscHelper.GetTextValue(t2, tt, Pullenti.Ner.Core.GetTextAttr.KeepRegister | Pullenti.Ner.Core.GetTextAttr.KeepQuotes); if (txt == null || (txt.Length < 15)) { return(null); } if (t0 != t1) { tt = t1.Previous; if (tt.IsComma) { tt = tt.Previous; } string txt0 = Pullenti.Ner.Core.MiscHelper.GetTextValue(t0, tt, Pullenti.Ner.Core.GetTextAttr.KeepRegister | Pullenti.Ner.Core.GetTextAttr.KeepQuotes); if (txt0 != null && txt0.Length > 10) { if (t0.Chars.IsCapitalUpper) { txt0 = char.ToLower(txt0[0]) + txt0.Substring(1); } txt = string.Format("{0}, {1}", txt, txt0); } } tt = t1; if (Pullenti.Ner.Core.MiscHelper.IsEngArticle(tt)) { tt = tt.Next; } string nam = Pullenti.Ner.Core.MiscHelper.GetTextValue(tt, t2.Previous, Pullenti.Ner.Core.GetTextAttr.KeepQuotes); if (nam.StartsWith("SO-CALLED")) { nam = nam.Substring(9).Trim(); } Pullenti.Ner.Definition.DefinitionReferent dr = new Pullenti.Ner.Definition.DefinitionReferent(); dr.Kind = Pullenti.Ner.Definition.DefinitionKind.Assertation; dr.AddSlot(Pullenti.Ner.Definition.DefinitionReferent.ATTR_TERMIN, nam, false, 0); dr.AddSlot(Pullenti.Ner.Definition.DefinitionReferent.ATTR_VALUE, txt, false, 0); return(new Pullenti.Ner.ReferentToken(dr, t0, t3)); }
public static UnitToken TryParse(Pullenti.Ner.Token t, Pullenti.Ner.Core.TerminCollection addUnits, UnitToken prev, bool parseUnknownUnits = false) { if (t == null) { return(null); } Pullenti.Ner.Token t0 = t; int pow = 1; bool isNeg = false; if ((t.IsCharOf("\\/") || t.IsValue("НА", null) || t.IsValue("OF", null)) || t.IsValue("PER", null)) { isNeg = true; t = t.Next; } else if (t.IsValue("В", null) && prev != null) { isNeg = true; t = t.Next; } else if (MeasureHelper.IsMultChar(t)) { t = t.Next; } Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; if (tt == null) { return(null); } if (tt.Term == "КВ" || tt.Term == "КВАДР" || tt.IsValue("КВАДРАТНЫЙ", null)) { pow = 2; tt = tt.Next as Pullenti.Ner.TextToken; if (tt != null && tt.IsChar('.')) { tt = tt.Next as Pullenti.Ner.TextToken; } if (tt == null) { return(null); } } else if (tt.Term == "КУБ" || tt.Term == "КУБИЧ" || tt.IsValue("КУБИЧЕСКИЙ", null)) { pow = 3; tt = tt.Next as Pullenti.Ner.TextToken; if (tt != null && tt.IsChar('.')) { tt = tt.Next as Pullenti.Ner.TextToken; } if (tt == null) { return(null); } } else if (tt.Term == "µ") { UnitToken res = TryParse(tt.Next, addUnits, prev, false); if (res != null) { foreach (Unit u in UnitsHelper.Units) { if (u.Factor == UnitsFactors.Micro && string.Compare("мк" + u.NameCyr, res.Unit.NameCyr, true) == 0) { res.Unit = u; res.BeginToken = tt; res.Pow = pow; if (isNeg) { res.Pow = -pow; } return(res); } } } } List <Pullenti.Ner.Core.TerminToken> toks = UnitsHelper.Termins.TryParseAll(tt, Pullenti.Ner.Core.TerminParseAttr.No); if (toks != null) { if ((prev != null && tt == t0 && toks.Count == 1) && t.IsWhitespaceBefore) { return(null); } if (toks[0].BeginToken == toks[0].EndToken && tt.Morph.Class.IsPreposition && (tt.WhitespacesAfterCount < 3)) { if (Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition, 0, null) != null) { return(null); } if (tt.Next is Pullenti.Ner.NumberToken) { if ((tt.Next as Pullenti.Ner.NumberToken).Typ != Pullenti.Ner.NumberSpellingType.Digit) { return(null); } } UnitToken nex = TryParse(tt.Next, addUnits, null, false); if (nex != null) { return(null); } } if (toks[0].BeginToken == toks[0].EndToken && ((toks[0].BeginToken.IsValue("М", null) || toks[0].BeginToken.IsValue("M", null))) && toks[0].BeginToken.Chars.IsAllLower) { if (prev != null && prev.Unit != null && prev.Unit.Kind == Pullenti.Ner.Measure.MeasureKind.Length) { UnitToken res = new UnitToken(t0, toks[0].EndToken) { Unit = UnitsHelper.uMinute }; res.Pow = pow; if (isNeg) { res.Pow = -pow; } return(res); } } List <UnitToken> uts = new List <UnitToken>(); foreach (Pullenti.Ner.Core.TerminToken tok in toks) { UnitToken res = new UnitToken(t0, tok.EndToken) { Unit = tok.Termin.Tag as Unit }; res.Pow = pow; if (isNeg) { res.Pow = -pow; } if (res.Unit.BaseMultiplier == 1000000 && (t0 is Pullenti.Ner.TextToken) && char.IsLower((t0 as Pullenti.Ner.TextToken).GetSourceText()[0])) { foreach (Unit u in UnitsHelper.Units) { if (u.Factor == UnitsFactors.Milli && string.Compare(u.NameCyr, res.Unit.NameCyr, true) == 0) { res.Unit = u; break; } } } res._correct(); res._checkDoubt(); uts.Add(res); } int max = 0; UnitToken best = null; foreach (UnitToken ut in uts) { if (ut.Keyword != null) { if (ut.Keyword.BeginChar >= max) { max = ut.Keyword.BeginChar; best = ut; } } } if (best != null) { return(best); } foreach (UnitToken ut in uts) { if (!ut.IsDoubt) { return(ut); } } return(uts[0]); } Pullenti.Ner.Token t1 = null; if (t.IsCharOf("º°")) { t1 = t; } else if ((t.IsChar('<') && t.Next != null && t.Next.Next != null) && t.Next.Next.IsChar('>') && ((t.Next.IsValue("О", null) || t.Next.IsValue("O", null) || (((t.Next is Pullenti.Ner.NumberToken) && (t.Next as Pullenti.Ner.NumberToken).Value == "0"))))) { t1 = t.Next.Next; } if (t1 != null) { UnitToken res = new UnitToken(t0, t1) { Unit = UnitsHelper.uGradus }; res._checkDoubt(); t = t1.Next; if (t != null && t.IsComma) { t = t.Next; } if (t != null && t.IsValue("ПО", null)) { t = t.Next; } if (t is Pullenti.Ner.TextToken) { string vv = (t as Pullenti.Ner.TextToken).Term; if (vv == "C" || vv == "С" || vv.StartsWith("ЦЕЛЬС")) { res.Unit = UnitsHelper.uGradusC; res.IsDoubt = false; res.EndToken = t; } if (vv == "F" || vv.StartsWith("ФАР")) { res.Unit = UnitsHelper.uGradusF; res.IsDoubt = false; res.EndToken = t; } } return(res); } if ((t is Pullenti.Ner.TextToken) && ((t.IsValue("ОС", null) || t.IsValue("OC", null)))) { string str = t.GetSourceText(); if (str == "оС" || str == "oC") { UnitToken res = new UnitToken(t, t) { Unit = UnitsHelper.uGradusC, IsDoubt = false }; return(res); } } if (t.IsChar('%')) { Pullenti.Ner.Token tt1 = t.Next; if (tt1 != null && tt1.IsChar('(')) { tt1 = tt1.Next; } if ((tt1 is Pullenti.Ner.TextToken) && (tt1 as Pullenti.Ner.TextToken).Term.StartsWith("ОБ")) { UnitToken re = new UnitToken(t, tt1) { Unit = UnitsHelper.uAlco }; if (re.EndToken.Next != null && re.EndToken.Next.IsChar('.')) { re.EndToken = re.EndToken.Next; } if (re.EndToken.Next != null && re.EndToken.Next.IsChar(')') && t.Next.IsChar('(')) { re.EndToken = re.EndToken.Next; } return(re); } return(new UnitToken(t, t) { Unit = UnitsHelper.uPercent }); } if (addUnits != null) { Pullenti.Ner.Core.TerminToken tok = addUnits.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { UnitToken res = new UnitToken(t0, tok.EndToken) { ExtOnto = tok.Termin.Tag as Pullenti.Ner.Measure.UnitReferent }; if (tok.EndToken.Next != null && tok.EndToken.Next.IsChar('.')) { tok.EndToken = tok.EndToken.Next; } res.Pow = pow; if (isNeg) { res.Pow = -pow; } res._correct(); return(res); } } if (!parseUnknownUnits) { return(null); } if ((t.WhitespacesBeforeCount > 2 || !t.Chars.IsLetter || t.LengthChar > 5) || !(t is Pullenti.Ner.TextToken)) { return(null); } if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t)) { return(null); } t1 = t; if (t.Next != null && t.Next.IsChar('.')) { t1 = t; } bool ok = false; if (t1.Next == null || t1.WhitespacesAfterCount > 2) { ok = true; } else if (t1.Next.IsComma || t1.Next.IsCharOf("\\/") || t1.Next.IsTableControlChar) { ok = true; } else if (MeasureHelper.IsMultChar(t1.Next)) { ok = true; } if (!ok) { return(null); } Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary(); if (mc.IsUndefined) { } else if (t.LengthChar > 7) { return(null); } UnitToken res1 = new UnitToken(t0, t1) { Pow = pow, IsDoubt = true }; res1.UnknownName = (t as Pullenti.Ner.TextToken).GetSourceText(); res1._correct(); return(res1); }
static TitleItemToken TryAttachSpeciality(Pullenti.Ner.Token t, bool keyWordBefore) { if (t == null) { return(null); } bool susp = false; if (!keyWordBefore) { if (!t.IsNewlineBefore) { susp = true; } } StringBuilder val = null; Pullenti.Ner.Token t0 = t; int digCount = 0; for (int i = 0; i < 3; i++) { Pullenti.Ner.NumberToken nt = t as Pullenti.Ner.NumberToken; if (nt == null) { break; } if (nt.Typ != Pullenti.Ner.NumberSpellingType.Digit || nt.Morph.Class.IsAdjective) { break; } if (val == null) { val = new StringBuilder(); } if (susp && t.LengthChar != 2) { return(null); } string digs = nt.GetSourceText(); digCount += digs.Length; val.Append(digs); if (t.Next == null) { break; } t = t.Next; if (t.IsCharOf(".,") || t.IsHiphen) { if (susp && (i < 2)) { if (!t.IsChar('.') || t.IsWhitespaceAfter || t.IsWhitespaceBefore) { return(null); } } if (t.Next != null) { t = t.Next; } } } if (val == null || (digCount < 5)) { return(null); } if (digCount != 6) { if (!keyWordBefore) { return(null); } } else { val.Insert(4, '.'); val.Insert(2, '.'); } for (Pullenti.Ner.Token tt = t.Next; tt != null; tt = tt.Next) { if (tt.IsNewlineBefore) { break; } Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(tt, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { t = (tt = br.EndToken); continue; } t = tt; } return(new TitleItemToken(t0, t, Types.Speciality) { Value = val.ToString() }); }
public static Pullenti.Ner.ReferentToken TryAttach(Pullenti.Ner.Token t) { if (t == null || !t.Chars.IsLetter) { return(null); } PersonIdToken noun = TryParse(t, null); if (noun == null) { return(null); } List <PersonIdToken> li = new List <PersonIdToken>(); for (t = noun.EndToken.Next; t != null; t = t.Next) { if (t.IsTableControlChar) { break; } if (t.IsCharOf(",:")) { continue; } PersonIdToken idt = TryParse(t, (li.Count > 0 ? li[li.Count - 1] : noun)); if (idt == null) { if (t.IsValue("ОТДЕЛ", null) || t.IsValue("ОТДЕЛЕНИЕ", null)) { continue; } break; } if (idt.Typ == Typs.Keyword) { break; } li.Add(idt); t = idt.EndToken; } if (li.Count == 0) { return(null); } string num = null; int i = 0; if (li[0].Typ == Typs.Number) { if (li.Count > 1 && li[1].Typ == Typs.Number && li[1].HasPrefix) { num = li[0].Value + li[1].Value; i = 2; } else { num = li[0].Value; i = 1; } } else if (li[0].Typ == Typs.Seria && li.Count > 1 && li[1].Typ == Typs.Number) { num = li[0].Value + li[1].Value; i = 2; } else if (li[0].Typ == Typs.Seria && li[0].Value.Length > 5) { num = li[0].Value; i = 1; } else { return(null); } Pullenti.Ner.Person.PersonIdentityReferent pid = new Pullenti.Ner.Person.PersonIdentityReferent(); pid.Typ = noun.Value.ToLower(); pid.Number = num; if (noun.Referent is Pullenti.Ner.Geo.GeoReferent) { pid.State = noun.Referent; } for (; i < li.Count; i++) { if (li[i].Typ == Typs.Vidan || li[i].Typ == Typs.Code) { } else if (li[i].Typ == Typs.Date && li[i].Referent != null) { if (pid.FindSlot(Pullenti.Ner.Person.PersonIdentityReferent.ATTR_DATE, null, true) != null) { break; } pid.AddSlot(Pullenti.Ner.Person.PersonIdentityReferent.ATTR_DATE, li[i].Referent, false, 0); } else if (li[i].Typ == Typs.Address && li[i].Referent != null) { if (pid.FindSlot(Pullenti.Ner.Person.PersonIdentityReferent.ATTR_ADDRESS, null, true) != null) { break; } pid.AddSlot(Pullenti.Ner.Person.PersonIdentityReferent.ATTR_ADDRESS, li[i].Referent, false, 0); } else if (li[i].Typ == Typs.Org && li[i].Referent != null) { if (pid.FindSlot(Pullenti.Ner.Person.PersonIdentityReferent.ATTR_ORG, null, true) != null) { break; } pid.AddSlot(Pullenti.Ner.Person.PersonIdentityReferent.ATTR_ORG, li[i].Referent, false, 0); } else { break; } } return(new Pullenti.Ner.ReferentToken(pid, noun.BeginToken, li[i - 1].EndToken)); }
static PersonIdToken TryParse(Pullenti.Ner.Token t, PersonIdToken prev) { if (t.IsValue("СВИДЕТЕЛЬСТВО", null)) { Pullenti.Ner.Token tt1 = t; bool ip = false; bool reg = false; for (Pullenti.Ner.Token tt = t.Next; tt != null; tt = tt.Next) { if (tt.IsCommaAnd || tt.Morph.Class.IsPreposition) { continue; } if (tt.IsValue("РЕГИСТРАЦИЯ", null) || tt.IsValue("РЕЕСТР", null) || tt.IsValue("ЗАРЕГИСТРИРОВАТЬ", null)) { reg = true; tt1 = tt; } else if (tt.IsValue("ИНДИВИДУАЛЬНЫЙ", null) || tt.IsValue("ИП", null)) { ip = true; tt1 = tt; } else if ((tt.IsValue("ВНЕСЕНИЕ", null) || tt.IsValue("ГОСУДАРСТВЕННЫЙ", null) || tt.IsValue("ЕДИНЫЙ", null)) || tt.IsValue("ЗАПИСЬ", null) || tt.IsValue("ПРЕДПРИНИМАТЕЛЬ", null)) { tt1 = tt; } else if (tt.GetReferent() != null && tt.GetReferent().TypeName == "DATERANGE") { tt1 = tt; } else { break; } } if (reg && ip) { return new PersonIdToken(t, tt1) { Typ = Typs.Keyword, Value = "СВИДЕТЕЛЬСТВО О ГОСУДАРСТВЕННОЙ РЕГИСТРАЦИИ ФИЗИЧЕСКОГО ЛИЦА В КАЧЕСТВЕ ИНДИВИДУАЛЬНОГО ПРЕДПРИНИМАТЕЛЯ" } } ; } Pullenti.Ner.Core.TerminToken tok = m_Ontology.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { Typs ty = (Typs)tok.Termin.Tag; PersonIdToken res = new PersonIdToken(tok.BeginToken, tok.EndToken) { Typ = ty, Value = tok.Termin.CanonicText }; if (prev == null) { if (ty != Typs.Keyword) { return(null); } for (t = tok.EndToken.Next; t != null; t = t.Next) { Pullenti.Ner.Referent r = t.GetReferent(); if (r != null && (r is Pullenti.Ner.Geo.GeoReferent)) { res.Referent = r; res.EndToken = t; continue; } if (t.IsValue("ГРАЖДАНИН", null) && t.Next != null && (t.Next.GetReferent() is Pullenti.Ner.Geo.GeoReferent)) { res.Referent = t.Next.GetReferent(); t = (res.EndToken = t.Next); continue; } if (r != null) { break; } PersonAttrToken ait = PersonAttrToken.TryAttach(t, null, PersonAttrToken.PersonAttrAttachAttrs.No); if (ait != null) { if (ait.Referent != null) { foreach (Pullenti.Ner.Slot s in ait.Referent.Slots) { if (s.TypeName == Pullenti.Ner.Person.PersonPropertyReferent.ATTR_REF && (s.Value is Pullenti.Ner.Geo.GeoReferent)) { res.Referent = s.Value as Pullenti.Ner.Referent; } } } res.EndToken = ait.EndToken; break; } if (t.IsValue("ДАННЫЙ", null)) { res.EndToken = t; continue; } break; } if ((res.Referent is Pullenti.Ner.Geo.GeoReferent) && !(res.Referent as Pullenti.Ner.Geo.GeoReferent).IsState) { res.Referent = null; } return(res); } if (ty == Typs.Number) { StringBuilder tmp = new StringBuilder(); Pullenti.Ner.Token tt = tok.EndToken.Next; if (tt != null && tt.IsChar(':')) { tt = tt.Next; } for (; tt != null; tt = tt.Next) { if (tt.IsNewlineBefore) { break; } if (!(tt is Pullenti.Ner.NumberToken)) { break; } tmp.Append(tt.GetSourceText()); res.EndToken = tt; } if (tmp.Length < 1) { return(null); } res.Value = tmp.ToString(); res.HasPrefix = true; return(res); } if (ty == Typs.Seria) { StringBuilder tmp = new StringBuilder(); Pullenti.Ner.Token tt = tok.EndToken.Next; if (tt != null && tt.IsChar(':')) { tt = tt.Next; } bool nextNum = false; for (; tt != null; tt = tt.Next) { if (tt.IsNewlineBefore) { break; } if (Pullenti.Ner.Core.MiscHelper.CheckNumberPrefix(tt) != null) { nextNum = true; break; } if (!(tt is Pullenti.Ner.NumberToken)) { if (!(tt is Pullenti.Ner.TextToken)) { break; } if (!tt.Chars.IsAllUpper) { break; } Pullenti.Ner.NumberToken nu = Pullenti.Ner.Core.NumberHelper.TryParseRoman(tt); if (nu != null) { tmp.Append(nu.GetSourceText()); tt = nu.EndToken; } else if (tt.LengthChar != 2) { break; } else { tmp.Append((tt as Pullenti.Ner.TextToken).Term); res.EndToken = tt; } if (tt.Next != null && tt.Next.IsHiphen) { tt = tt.Next; } continue; } if (tmp.Length >= 4) { break; } tmp.Append(tt.GetSourceText()); res.EndToken = tt; } if (tmp.Length < 4) { if (tmp.Length < 2) { return(null); } Pullenti.Ner.Token tt1 = res.EndToken.Next; if (tt1 != null && tt1.IsComma) { tt1 = tt1.Next; } PersonIdToken next = TryParse(tt1, res); if (next != null && next.Typ == Typs.Number) { } else { return(null); } } res.Value = tmp.ToString(); res.HasPrefix = true; return(res); } if (ty == Typs.Code) { for (Pullenti.Ner.Token tt = res.EndToken.Next; tt != null; tt = tt.Next) { if (tt.IsCharOf(":") || tt.IsHiphen) { continue; } if (tt is Pullenti.Ner.NumberToken) { res.EndToken = tt; continue; } break; } } if (ty == Typs.Address) { if (t.GetReferent() is Pullenti.Ner.Address.AddressReferent) { res.Referent = t.GetReferent(); res.EndToken = t; return(res); } for (Pullenti.Ner.Token tt = res.EndToken.Next; tt != null; tt = tt.Next) { if (tt.IsCharOf(":") || tt.IsHiphen || tt.Morph.Class.IsPreposition) { continue; } if (tt.GetReferent() is Pullenti.Ner.Address.AddressReferent) { res.Referent = tt.GetReferent(); res.EndToken = tt; } break; } if (res.Referent == null) { return(null); } } return(res); } else if (prev == null) { return(null); } Pullenti.Ner.Token t0 = t; Pullenti.Ner.Token t1 = Pullenti.Ner.Core.MiscHelper.CheckNumberPrefix(t0); if (t1 != null) { t = t1; } if (t is Pullenti.Ner.NumberToken) { StringBuilder tmp = new StringBuilder(); PersonIdToken res = new PersonIdToken(t0, t) { Typ = Typs.Number }; for (Pullenti.Ner.Token tt = t; tt != null; tt = tt.Next) { if (tt.IsNewlineBefore || !(tt is Pullenti.Ner.NumberToken)) { break; } tmp.Append(tt.GetSourceText()); res.EndToken = tt; } if (tmp.Length < 4) { if (tmp.Length < 2) { return(null); } if (prev == null || prev.Typ != Typs.Keyword) { return(null); } PersonIdToken ne = TryParse(res.EndToken.Next, prev); if (ne != null && ne.Typ == Typs.Number) { res.Typ = Typs.Seria; } else { return(null); } } res.Value = tmp.ToString(); if (t0 != t) { res.HasPrefix = true; } return(res); } if (t is Pullenti.Ner.ReferentToken) { Pullenti.Ner.Referent r = t.GetReferent(); if (r != null) { if (r.TypeName == "DATE") { return new PersonIdToken(t, t) { Typ = Typs.Date, Referent = r } } ; if (r.TypeName == "ORGANIZATION") { return new PersonIdToken(t, t) { Typ = Typs.Org, Referent = r } } ; if (r.TypeName == "ADDRESS") { return new PersonIdToken(t, t) { Typ = Typs.Address, Referent = r } } ; } } if ((prev != null && prev.Typ == Typs.Keyword && (t is Pullenti.Ner.TextToken)) && !t.Chars.IsAllLower && t.Chars.IsLetter) { PersonIdToken rr = TryParse(t.Next, prev); if (rr != null && rr.Typ == Typs.Number) { return new PersonIdToken(t, t) { Typ = Typs.Seria, Value = (t as Pullenti.Ner.TextToken).Term } } ; } if ((t != null && t.IsValue("ОТ", "ВІД") && (t.Next is Pullenti.Ner.ReferentToken)) && t.Next.GetReferent().TypeName == "DATE") { return new PersonIdToken(t, t.Next) { Typ = Typs.Date, Referent = t.Next.GetReferent() } } ; return(null); }
static BookLinkToken _tryParse(Pullenti.Ner.Token t, int lev) { if (t == null || lev > 3) { return(null); } if (t.IsChar('[')) { BookLinkToken re = _tryParse(t.Next, lev + 1); if (re != null && re.EndToken.Next != null && re.EndToken.Next.IsChar(']')) { re.BeginToken = t; re.EndToken = re.EndToken.Next; return(re); } if (re != null && re.EndToken.IsChar(']')) { re.BeginToken = t; return(re); } if (re != null) { if (re.Typ == BookLinkTyp.Sostavitel || re.Typ == BookLinkTyp.Editors) { return(re); } } Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { if ((br.EndToken.Previous is Pullenti.Ner.NumberToken) && (br.LengthChar < 30)) { return new BookLinkToken(t, br.EndToken) { Typ = BookLinkTyp.Number, Value = Pullenti.Ner.Core.MiscHelper.GetTextValue(br.BeginToken.Next, br.EndToken.Previous, Pullenti.Ner.Core.GetTextAttr.No) } } ; } } Pullenti.Ner.Token t0 = t; if (t is Pullenti.Ner.ReferentToken) { if (t.GetReferent() is Pullenti.Ner.Person.PersonReferent) { return(TryParseAuthor(t, Pullenti.Ner.Person.Internal.FioTemplateType.Undefined)); } if (t.GetReferent() is Pullenti.Ner.Geo.GeoReferent) { return new BookLinkToken(t, t) { Typ = BookLinkTyp.Geo, Ref = t.GetReferent() } } ; if (t.GetReferent() is Pullenti.Ner.Date.DateReferent) { Pullenti.Ner.Date.DateReferent dr = t.GetReferent() as Pullenti.Ner.Date.DateReferent; if (dr.Slots.Count == 1 && dr.Year > 0) { return new BookLinkToken(t, t) { Typ = BookLinkTyp.Year, Value = dr.Year.ToString() } } ; if (dr.Year > 0 && t.Previous != null && t.Previous.IsComma) { return new BookLinkToken(t, t) { Typ = BookLinkTyp.Year, Value = dr.Year.ToString() } } ; } if (t.GetReferent() is Pullenti.Ner.Org.OrganizationReferent) { Pullenti.Ner.Org.OrganizationReferent org = t.GetReferent() as Pullenti.Ner.Org.OrganizationReferent; if (org.Kind == Pullenti.Ner.Org.OrganizationKind.Press) { return new BookLinkToken(t, t) { Typ = BookLinkTyp.Press, Ref = org } } ; } if (t.GetReferent() is Pullenti.Ner.Uri.UriReferent) { Pullenti.Ner.Uri.UriReferent uri = t.GetReferent() as Pullenti.Ner.Uri.UriReferent; if ((uri.Scheme == "http" || uri.Scheme == "https" || uri.Scheme == "ftp") || uri.Scheme == null) { return new BookLinkToken(t, t) { Typ = BookLinkTyp.Url, Ref = uri } } ; } } Pullenti.Ner.Core.TerminToken tok = m_Termins.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { BookLinkTyp typ = (BookLinkTyp)tok.Termin.Tag; bool ok = true; if (typ == BookLinkTyp.Type || typ == BookLinkTyp.NameTail || typ == BookLinkTyp.ElectronRes) { if (t.Previous != null && ((t.Previous.IsCharOf(".:[") || t.Previous.IsHiphen))) { } else { ok = false; } } if (ok) { return new BookLinkToken(t, tok.EndToken) { Typ = typ, Value = tok.Termin.CanonicText } } ; if (typ == BookLinkTyp.ElectronRes) { for (Pullenti.Ner.Token tt = tok.EndToken.Next; tt != null; tt = tt.Next) { if ((tt is Pullenti.Ner.TextToken) && !tt.Chars.IsLetter) { continue; } if (tt.GetReferent() is Pullenti.Ner.Uri.UriReferent) { return new BookLinkToken(t, tt) { Typ = BookLinkTyp.ElectronRes, Ref = tt.GetReferent() } } ; break; } } } if (t.IsChar('/')) { BookLinkToken res = new BookLinkToken(t, t) { Typ = BookLinkTyp.Delimeter, Value = "/" }; if (t.Next != null && t.Next.IsChar('/')) { res.EndToken = t.Next; res.Value = "//"; } if (!t.IsWhitespaceBefore && !t.IsWhitespaceAfter) { int coo = 3; bool no = true; for (Pullenti.Ner.Token tt = t.Next; tt != null && coo > 0; tt = tt.Next, coo--) { BookLinkToken vvv = TryParse(tt, lev + 1); if (vvv != null && vvv.Typ != BookLinkTyp.Number) { no = false; break; } } if (no) { return(null); } } return(res); } if ((t is Pullenti.Ner.NumberToken) && (t as Pullenti.Ner.NumberToken).IntValue != null && (t as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Digit) { BookLinkToken res = new BookLinkToken(t, t) { Typ = BookLinkTyp.Number, Value = (t as Pullenti.Ner.NumberToken).Value.ToString() }; int val = (t as Pullenti.Ner.NumberToken).IntValue.Value; if (val >= 1930 && (val < 2030)) { res.Typ = BookLinkTyp.Year; } if (t.Next != null && t.Next.IsChar('.')) { res.EndToken = t.Next; } else if ((t.Next != null && t.Next.LengthChar == 1 && !t.Next.Chars.IsLetter) && t.Next.IsWhitespaceAfter) { res.EndToken = t.Next; } else if (t.Next is Pullenti.Ner.TextToken) { string term = (t.Next as Pullenti.Ner.TextToken).Term; if (((term == "СТР" || term == "C" || term == "С") || term == "P" || term == "S") || term == "PAGES") { res.EndToken = t.Next; res.Typ = BookLinkTyp.Pages; res.Value = (t as Pullenti.Ner.NumberToken).Value.ToString(); } } return(res); } if (t is Pullenti.Ner.TextToken) { string term = (t as Pullenti.Ner.TextToken).Term; if (((((((term == "СТР" || term == "C" || term == "С") || term == "ТОМ" || term == "T") || term == "Т" || term == "P") || term == "PP" || term == "V") || term == "VOL" || term == "S") || term == "СТОР" || t.IsValue("PAGE", null)) || t.IsValue("СТРАНИЦА", "СТОРІНКА")) { Pullenti.Ner.Token tt = t.Next; while (tt != null) { if (tt.IsCharOf(".:~")) { tt = tt.Next; } else { break; } } if (tt is Pullenti.Ner.NumberToken) { BookLinkToken res = new BookLinkToken(t, tt) { Typ = BookLinkTyp.PageRange }; Pullenti.Ner.Token tt0 = tt; Pullenti.Ner.Token tt1 = tt; for (tt = tt.Next; tt != null; tt = tt.Next) { if (tt.IsCharOf(",") || tt.IsHiphen) { if (tt.Next is Pullenti.Ner.NumberToken) { tt = tt.Next; res.EndToken = tt; tt1 = tt; continue; } } break; } res.Value = Pullenti.Ner.Core.MiscHelper.GetTextValue(tt0, tt1, Pullenti.Ner.Core.GetTextAttr.No); return(res); } } if ((term == "M" || term == "М" || term == "СПБ") || term == "K" || term == "К") { if (t.Next != null && t.Next.IsCharOf(":;")) { BookLinkToken re = new BookLinkToken(t, t.Next) { Typ = BookLinkTyp.Geo }; return(re); } if (t.Next != null && t.Next.IsCharOf(".")) { BookLinkToken res = new BookLinkToken(t, t.Next) { Typ = BookLinkTyp.Geo }; if (t.Next.Next != null && t.Next.Next.IsCharOf(":;")) { res.EndToken = t.Next.Next; } else if (t.Next.Next != null && (t.Next.Next is Pullenti.Ner.NumberToken)) { } else if (t.Next.Next != null && t.Next.Next.IsComma && (t.Next.Next.Next is Pullenti.Ner.NumberToken)) { } else { return(null); } return(res); } } if (term == "ПЕР" || term == "ПЕРЕВ" || term == "ПЕРЕВОД") { Pullenti.Ner.Token tt = t; if (tt.Next != null && tt.Next.IsChar('.')) { tt = tt.Next; } if (tt.Next != null && ((tt.Next.IsValue("C", null) || tt.Next.IsValue("С", null)))) { tt = tt.Next; if (tt.Next == null || tt.WhitespacesAfterCount > 2) { return(null); } BookLinkToken re = new BookLinkToken(t, tt.Next) { Typ = BookLinkTyp.Translate }; return(re); } } if (term == "ТАМ" || term == "ТАМЖЕ") { BookLinkToken res = new BookLinkToken(t, t) { Typ = BookLinkTyp.Tamze }; if (t.Next != null && t.Next.IsValue("ЖЕ", null)) { res.EndToken = t.Next; } return(res); } if (((term == "СМ" || term == "CM" || term == "НАПР") || term == "НАПРИМЕР" || term == "SEE") || term == "ПОДРОБНЕЕ" || term == "ПОДРОБНО") { BookLinkToken res = new BookLinkToken(t, t) { Typ = BookLinkTyp.See }; for (t = t.Next; t != null; t = t.Next) { if (t.IsCharOf(".:") || t.IsValue("ALSO", null)) { res.EndToken = t; continue; } if (t.IsValue("В", null) || t.IsValue("IN", null)) { res.EndToken = t; continue; } BookLinkToken vvv = _tryParse(t, lev + 1); if (vvv != null && vvv.Typ == BookLinkTyp.See) { res.EndToken = vvv.EndToken; break; } break; } return(res); } if (term == "БОЛЕЕ") { BookLinkToken vvv = _tryParse(t.Next, lev + 1); if (vvv != null && vvv.Typ == BookLinkTyp.See) { vvv.BeginToken = t; return(vvv); } } Pullenti.Ner.Token no = Pullenti.Ner.Core.MiscHelper.CheckNumberPrefix(t); if (no is Pullenti.Ner.NumberToken) { return new BookLinkToken(t, no) { Typ = BookLinkTyp.N } } ; if (((term == "B" || term == "В")) && (t.Next is Pullenti.Ner.NumberToken) && (t.Next.Next is Pullenti.Ner.TextToken)) { string term2 = (t.Next.Next as Pullenti.Ner.TextToken).Term; if (((term2 == "Т" || term2 == "T" || term2.StartsWith("ТОМ")) || term2 == "TT" || term2 == "ТТ") || term2 == "КН" || term2.StartsWith("КНИГ")) { return new BookLinkToken(t, t.Next.Next) { Typ = BookLinkTyp.Volume } } ; } } if (t.IsChar('(')) { if (((t.Next is Pullenti.Ner.NumberToken) && (t.Next as Pullenti.Ner.NumberToken).IntValue != null && t.Next.Next != null) && t.Next.Next.IsChar(')')) { int num = (t.Next as Pullenti.Ner.NumberToken).IntValue.Value; if (num > 1900 && num <= 2040) { if (num <= DateTime.Now.Year) { return new BookLinkToken(t, t.Next.Next) { Typ = BookLinkTyp.Year, Value = num.ToString() } } ; } } if (((t.Next is Pullenti.Ner.ReferentToken) && (t.Next.GetReferent() is Pullenti.Ner.Date.DateReferent) && t.Next.Next != null) && t.Next.Next.IsChar(')')) { int num = (t.Next.GetReferent() as Pullenti.Ner.Date.DateReferent).Year; if (num > 0) { return new BookLinkToken(t, t.Next.Next) { Typ = BookLinkTyp.Year, Value = num.ToString() } } ; } } return(null); }
Pullenti.Ner.ReferentToken TryAttach(Pullenti.Ner.Token t, bool keyWord) { if (t == null) { return(null); } Pullenti.Ner.Token t0 = t; Pullenti.Ner.Token t1 = t; List <string> urisKeys = null; List <Pullenti.Ner.Uri.UriReferent> uris = null; Pullenti.Ner.Referent org = null; Pullenti.Ner.Referent corOrg = null; bool orgIsBank = false; int empty = 0; Pullenti.Ner.Uri.UriReferent lastUri = null; for (; t != null; t = t.Next) { if (t.IsTableControlChar && t != t0) { break; } if (t.IsComma || t.Morph.Class.IsPreposition || t.IsCharOf("/\\")) { continue; } bool bankKeyword = false; if (t.IsValue("ПОЛНЫЙ", null) && t.Next != null && ((t.Next.IsValue("НАИМЕНОВАНИЕ", null) || t.Next.IsValue("НАЗВАНИЕ", null)))) { t = t.Next.Next; if (t == null) { break; } } if (t.IsValue("БАНК", null)) { if ((t is Pullenti.Ner.ReferentToken) && t.GetReferent().TypeName == "ORGANIZATION") { bankKeyword = true; } Pullenti.Ner.Token tt = t.Next; Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { tt = npt.EndToken.Next; } if (tt != null && tt.IsChar(':')) { tt = tt.Next; } if (tt != null) { if (!bankKeyword) { t = tt; bankKeyword = true; } else if (tt.GetReferent() != null && tt.GetReferent().TypeName == "ORGANIZATION") { t = tt; } } } Pullenti.Ner.Referent r = t.GetReferent(); if (r != null && r.TypeName == "ORGANIZATION") { bool isBank = false; int kk = 0; for (Pullenti.Ner.Referent rr = r; rr != null && (kk < 4); rr = rr.ParentReferent, kk++) { isBank = string.Compare(rr.GetStringValue("KIND") ?? "", "Bank", true) == 0; if (isBank) { break; } } if (!isBank && bankKeyword) { isBank = true; } if (!isBank && uris != null && urisKeys.Contains("ИНН")) { return(null); } if ((lastUri != null && lastUri.Scheme == "К/С" && t.Previous != null) && t.Previous.IsValue("В", null)) { corOrg = r; t1 = t; } else if (org == null || ((!orgIsBank && isBank))) { org = r; t1 = t; orgIsBank = isBank; if (isBank) { continue; } } if (uris == null && !keyWord) { return(null); } continue; } if (r is Pullenti.Ner.Uri.UriReferent) { Pullenti.Ner.Uri.UriReferent u = r as Pullenti.Ner.Uri.UriReferent; if (uris == null) { if (!_isBankReq(u.Scheme)) { return(null); } if (u.Scheme == "ИНН" && t.IsNewlineAfter) { return(null); } uris = new List <Pullenti.Ner.Uri.UriReferent>(); urisKeys = new List <string>(); } else { if (!_isBankReq(u.Scheme)) { break; } if (urisKeys.Contains(u.Scheme)) { break; } if (u.Scheme == "ИНН") { if (empty > 0) { break; } } } urisKeys.Add(u.Scheme); uris.Add(u); lastUri = u; t1 = t; empty = 0; continue; } else if (uris == null && !keyWord && !orgIsBank) { return(null); } if (r != null && ((r.TypeName == "GEO" || r.TypeName == "ADDRESS"))) { empty++; continue; } if (t is Pullenti.Ner.TextToken) { if (t.IsValue("ПОЛНЫЙ", null) || t.IsValue("НАИМЕНОВАНИЕ", null) || t.IsValue("НАЗВАНИЕ", null)) { } else if (t.Chars.IsLetter) { Pullenti.Ner.Core.TerminToken tok = m_Ontology.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { t = tok.EndToken; empty = 0; } else { empty++; if (t.IsNewlineBefore) { Pullenti.Ner.Core.NounPhraseToken nnn = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (nnn != null && nnn.EndToken.Next != null && nnn.EndToken.Next.IsChar(':')) { break; } } } if (uris == null) { break; } } } if (empty > 2) { break; } if (empty > 0 && t.IsChar(':') && t.IsNewlineAfter) { break; } if (((t is Pullenti.Ner.NumberToken) && t.IsNewlineBefore && t.Next != null) && !t.Next.Chars.IsLetter) { break; } } if (uris == null) { return(null); } if (!urisKeys.Contains("Р/С") && !urisKeys.Contains("Л/С")) { return(null); } bool ok = false; if ((uris.Count < 2) && org == null) { return(null); } BankDataReferent bdr = new BankDataReferent(); foreach (Pullenti.Ner.Uri.UriReferent u in uris) { bdr.AddSlot(BankDataReferent.ATTR_ITEM, u, false, 0); } if (org != null) { bdr.AddSlot(BankDataReferent.ATTR_BANK, org, false, 0); } if (corOrg != null) { bdr.AddSlot(BankDataReferent.ATTR_CORBANK, corOrg, false, 0); } Pullenti.Ner.Referent org0 = (t0.Previous == null ? null : t0.Previous.GetReferent()); if (org0 != null && org0.TypeName == "ORGANIZATION") { foreach (Pullenti.Ner.Slot s in org0.Slots) { if (s.Value is Pullenti.Ner.Uri.UriReferent) { Pullenti.Ner.Uri.UriReferent u = s.Value as Pullenti.Ner.Uri.UriReferent; if (_isBankReq(u.Scheme)) { if (!urisKeys.Contains(u.Scheme)) { bdr.AddSlot(BankDataReferent.ATTR_ITEM, u, false, 0); } } } } } return(new Pullenti.Ner.ReferentToken(bdr, t0, t1)); }
internal static Pullenti.Ner.ReferentToken CreateReferentToken(Pullenti.Ner.Person.PersonReferent p, Pullenti.Ner.Token begin, Pullenti.Ner.Token end, Pullenti.Ner.MorphCollection morph, List <PersonAttrToken> attrs, Pullenti.Ner.Person.PersonAnalyzer.PersonAnalyzerData ad, bool forAttribute, bool afterBePredicate) { if (p == null) { return(null); } bool hasPrefix = false; if (attrs != null) { foreach (PersonAttrToken a in attrs) { if (a.Typ == PersonAttrTerminType.BestRegards) { hasPrefix = true; } else { if (a.BeginChar < begin.BeginChar) { begin = a.BeginToken; if ((a.EndToken.Next != null && a.EndToken.Next.IsChar(')') && begin.Previous != null) && begin.Previous.IsChar('(')) { begin = begin.Previous; } } if (a.Typ != PersonAttrTerminType.Prefix) { if (a.Age != null) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_AGE, a.Age, false, 0); } if (a.PropRef == null) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, a.Value, false, 0); } else { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, a, false, 0); } } else if (a.Gender == Pullenti.Morph.MorphGender.Feminie && !p.IsFemale) { p.IsFemale = true; } else if (a.Gender == Pullenti.Morph.MorphGender.Masculine && !p.IsMale) { p.IsMale = true; } } } } else if ((begin.Previous is Pullenti.Ner.TextToken) && (begin.WhitespacesBeforeCount < 3)) { if ((begin.Previous as Pullenti.Ner.TextToken).Term == "ИП") { PersonAttrToken a = new PersonAttrToken(begin.Previous, begin.Previous); a.PropRef = new Pullenti.Ner.Person.PersonPropertyReferent(); a.PropRef.Name = "индивидуальный предприниматель"; p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, a, false, 0); begin = begin.Previous; } } Pullenti.Ner.MorphCollection m0 = new Pullenti.Ner.MorphCollection(); foreach (Pullenti.Morph.MorphBaseInfo it in morph.Items) { Pullenti.Morph.MorphBaseInfo bi = new Pullenti.Morph.MorphBaseInfo(); bi.CopyFrom(it); bi.Number = Pullenti.Morph.MorphNumber.Singular; if (bi.Gender == Pullenti.Morph.MorphGender.Undefined) { if (p.IsMale && !p.IsFemale) { bi.Gender = Pullenti.Morph.MorphGender.Masculine; } if (!p.IsMale && p.IsFemale) { bi.Gender = Pullenti.Morph.MorphGender.Feminie; } } m0.AddItem(bi); } morph = m0; if ((attrs != null && attrs.Count > 0 && !attrs[0].Morph.Case.IsUndefined) && morph.Case.IsUndefined) { morph.Case = attrs[0].Morph.Case; if (attrs[0].Morph.Number == Pullenti.Morph.MorphNumber.Singular) { morph.Number = Pullenti.Morph.MorphNumber.Singular; } if (p.IsMale && !p.IsFemale) { morph.Gender = Pullenti.Morph.MorphGender.Masculine; } else if (p.IsFemale) { morph.Gender = Pullenti.Morph.MorphGender.Feminie; } } if (begin.Previous != null) { Pullenti.Ner.Token ttt = begin.Previous; if (ttt.IsValue("ИМЕНИ", "ІМЕНІ")) { forAttribute = true; } else { if (ttt.IsChar('.') && ttt.Previous != null) { ttt = ttt.Previous; } if (ttt.WhitespacesAfterCount < 3) { if (ttt.IsValue("ИМ", "ІМ")) { forAttribute = true; } } } } if (forAttribute) { return new Pullenti.Ner.ReferentToken(p, begin, end) { Morph = morph, MiscAttrs = (int)p.m_PersonIdentityTyp } } ; if ((begin.Previous != null && begin.Previous.IsCommaAnd && (begin.Previous.Previous is Pullenti.Ner.ReferentToken)) && (begin.Previous.Previous.GetReferent() is Pullenti.Ner.Person.PersonReferent)) { Pullenti.Ner.ReferentToken rt00 = begin.Previous.Previous as Pullenti.Ner.ReferentToken; for (Pullenti.Ner.Token ttt = (Pullenti.Ner.Token)rt00; ttt != null;) { if (ttt.Previous == null || !(ttt.Previous.Previous is Pullenti.Ner.ReferentToken)) { break; } if (!ttt.Previous.IsCommaAnd || !(ttt.Previous.Previous.GetReferent() is Pullenti.Ner.Person.PersonReferent)) { break; } rt00 = ttt.Previous.Previous as Pullenti.Ner.ReferentToken; ttt = rt00; } if (rt00.BeginToken.GetReferent() is Pullenti.Ner.Person.PersonPropertyReferent) { bool ok = false; if ((rt00.BeginToken as Pullenti.Ner.ReferentToken).EndToken.Next != null && (rt00.BeginToken as Pullenti.Ner.ReferentToken).EndToken.Next.IsChar(':')) { ok = true; } else if (rt00.BeginToken.Morph.Number == Pullenti.Morph.MorphNumber.Plural) { ok = true; } if (ok) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, rt00.BeginToken.GetReferent(), false, 0); } } } if (ad != null) { if (ad.OverflowLevel > 10) { return new Pullenti.Ner.ReferentToken(p, begin, end) { Morph = morph, MiscAttrs = (int)p.m_PersonIdentityTyp } } ; ad.OverflowLevel++; } List <PersonAttrToken> attrs1 = null; bool hasPosition = false; bool openBr = false; for (Pullenti.Ner.Token t = end.Next; t != null; t = t.Next) { if (t.IsTableControlChar) { break; } if (t.IsNewlineBefore) { if (t.NewlinesBeforeCount > 2) { break; } if (attrs1 != null && attrs1.Count > 0) { break; } Pullenti.Ner.Mail.Internal.MailLine ml = Pullenti.Ner.Mail.Internal.MailLine.Parse(t, 0, 0); if (ml != null && ml.Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From) { break; } if (t.Chars.IsCapitalUpper) { PersonAttrToken attr1 = PersonAttrToken.TryAttach(t, (ad == null ? null : ad.LocalOntology), PersonAttrToken.PersonAttrAttachAttrs.No); bool ok1 = false; if (attr1 != null) { if (hasPrefix || attr1.IsNewlineAfter || ((attr1.EndToken.Next != null && attr1.EndToken.Next.IsTableControlChar))) { ok1 = true; } else { for (Pullenti.Ner.Token tt2 = t.Next; tt2 != null && tt2.EndChar <= attr1.EndChar; tt2 = tt2.Next) { if (tt2.IsWhitespaceBefore) { ok1 = true; } } } } else { Pullenti.Ner.Token ttt = CorrectTailAttributes(p, t); if (ttt != null && ttt != t) { end = (t = ttt); continue; } } if (!ok1) { break; } } } if (t.IsHiphen || t.IsCharOf("_>|")) { continue; } if (t.IsValue("МОДЕЛЬ", null)) { break; } Pullenti.Ner.Token tt = CorrectTailAttributes(p, t); if (tt != t && tt != null) { end = (t = tt); continue; } bool isBe = false; if (t.IsChar('(') && t == end.Next) { openBr = true; t = t.Next; if (t == null) { break; } PersonItemToken pit1 = PersonItemToken.TryAttach(t, null, PersonItemToken.ParseAttr.No, null); if ((pit1 != null && t.Chars.IsCapitalUpper && pit1.EndToken.Next != null) && (t is Pullenti.Ner.TextToken) && pit1.EndToken.Next.IsChar(')')) { if (pit1.Lastname != null) { Pullenti.Morph.MorphBaseInfo inf = new Pullenti.Morph.MorphBaseInfo() { Case = Pullenti.Morph.MorphCase.Nominative }; if (p.IsMale) { inf.Gender |= Pullenti.Morph.MorphGender.Masculine; } if (p.IsFemale) { inf.Gender |= Pullenti.Morph.MorphGender.Feminie; } PersonMorphCollection sur = PersonIdentityToken.CreateLastname(pit1, inf); if (sur != null) { p.AddFioIdentity(sur, null, null); end = (t = pit1.EndToken.Next); continue; } } } if ((t is Pullenti.Ner.TextToken) && t.Chars.IsLatinLetter) { List <PersonItemToken> pits = PersonItemToken.TryAttachList(t, null, PersonItemToken.ParseAttr.CanBeLatin, 10); if (((pits != null && pits.Count >= 2 && pits.Count <= 3) && pits[0].Chars.IsLatinLetter && pits[1].Chars.IsLatinLetter) && pits[pits.Count - 1].EndToken.Next != null && pits[pits.Count - 1].EndToken.Next.IsChar(')')) { Pullenti.Ner.Person.PersonReferent pr2 = new Pullenti.Ner.Person.PersonReferent(); int cou = 0; foreach (PersonItemToken pi in pits) { foreach (Pullenti.Ner.Slot si in p.Slots) { if (si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_FIRSTNAME || si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_MIDDLENAME || si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_LASTNAME) { if (Pullenti.Ner.Core.MiscHelper.CanBeEqualCyrAndLatSS(si.Value.ToString(), pi.Value)) { cou++; pr2.AddSlot(si.TypeName, pi.Value, false, 0); break; } } } } if (cou == pits.Count) { foreach (Pullenti.Ner.Slot si in pr2.Slots) { p.AddSlot(si.TypeName, si.Value, false, 0); } end = (t = pits[pits.Count - 1].EndToken.Next); continue; } } } } else if (t.IsComma) { t = t.Next; if ((t is Pullenti.Ner.TextToken) && (t as Pullenti.Ner.TextToken).IsValue("WHO", null)) { continue; } if ((t is Pullenti.Ner.TextToken) && t.Chars.IsLatinLetter) { List <PersonItemToken> pits = PersonItemToken.TryAttachList(t, null, PersonItemToken.ParseAttr.CanBeLatin, 10); if ((pits != null && pits.Count >= 2 && pits.Count <= 3) && pits[0].Chars.IsLatinLetter && pits[1].Chars.IsLatinLetter) { Pullenti.Ner.Person.PersonReferent pr2 = new Pullenti.Ner.Person.PersonReferent(); int cou = 0; foreach (PersonItemToken pi in pits) { foreach (Pullenti.Ner.Slot si in p.Slots) { if (si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_FIRSTNAME || si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_MIDDLENAME || si.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_LASTNAME) { if (Pullenti.Ner.Core.MiscHelper.CanBeEqualCyrAndLatSS(si.Value.ToString(), pi.Value)) { cou++; pr2.AddSlot(si.TypeName, pi.Value, false, 0); break; } } } } if (cou == pits.Count) { foreach (Pullenti.Ner.Slot si in pr2.Slots) { p.AddSlot(si.TypeName, si.Value, false, 0); } end = (t = pits[pits.Count - 1].EndToken); continue; } } } } else if ((t is Pullenti.Ner.TextToken) && (t as Pullenti.Ner.TextToken).IsVerbBe) { t = t.Next; } else if (t.IsAnd && t.IsWhitespaceAfter && !t.IsNewlineAfter) { if (t == end.Next) { break; } t = t.Next; } else if (t.IsHiphen && t == end.Next) { t = t.Next; } else if (t.IsChar('.') && t == end.Next && hasPrefix) { t = t.Next; } Pullenti.Ner.Token ttt2 = CreateNickname(p, t); if (ttt2 != null) { t = (end = ttt2); continue; } if (t == null) { break; } PersonAttrToken attr = null; attr = PersonAttrToken.TryAttach(t, (ad == null ? null : ad.LocalOntology), PersonAttrToken.PersonAttrAttachAttrs.No); if (attr == null) { if ((t != null && t.GetReferent() != null && t.GetReferent().TypeName == "GEO") && attrs1 != null && openBr) { continue; } if ((t.Chars.IsCapitalUpper && openBr && t.Next != null) && t.Next.IsChar(')')) { if (p.FindSlot(Pullenti.Ner.Person.PersonReferent.ATTR_LASTNAME, null, true) == null) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_LASTNAME, t.GetSourceText().ToUpper(), false, 0); t = t.Next; end = t; } } if (t != null && t.IsValue("КОТОРЫЙ", null) && t.Morph.Number == Pullenti.Morph.MorphNumber.Singular) { if (!p.IsFemale && t.Morph.Gender == Pullenti.Morph.MorphGender.Feminie) { p.IsFemale = true; p.CorrectData(); } else if (!p.IsMale && t.Morph.Gender == Pullenti.Morph.MorphGender.Masculine) { p.IsMale = true; p.CorrectData(); } } break; } if (attr.Morph.Number == Pullenti.Morph.MorphNumber.Plural) { break; } if (attr.Typ == PersonAttrTerminType.BestRegards) { break; } if (attr.IsDoubt) { if (hasPrefix) { } else if (t.IsNewlineBefore && attr.IsNewlineAfter) { } else if (t.Previous != null && ((t.Previous.IsHiphen || t.Previous.IsChar(':')))) { } else { break; } } if (!morph.Case.IsUndefined && !attr.Morph.Case.IsUndefined) { if (((morph.Case & attr.Morph.Case)).IsUndefined && !isBe) { break; } } if (openBr) { if (Pullenti.Ner.Person.PersonAnalyzer.TryAttachPerson(t, ad, false, 0, true) != null) { break; } } if (attrs1 == null) { if (t.Previous.IsComma && t.Previous == end.Next) { Pullenti.Ner.Token ttt = attr.EndToken.Next; if (ttt != null) { if (ttt.Morph.Class.IsVerb) { if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(begin)) { } else { break; } } } } attrs1 = new List <PersonAttrToken>(); } attrs1.Add(attr); if (attr.Typ == PersonAttrTerminType.Position || attr.Typ == PersonAttrTerminType.King) { if (!isBe) { hasPosition = true; } } else if (attr.Typ != PersonAttrTerminType.Prefix) { if (attr.Typ == PersonAttrTerminType.Other && attr.Age != null) { } else { attrs1 = null; break; } } t = attr.EndToken; } if (attrs1 != null && hasPosition && attrs != null) { Pullenti.Ner.Token te1 = attrs[attrs.Count - 1].EndToken.Next; Pullenti.Ner.Token te2 = attrs1[0].BeginToken; if (te1.WhitespacesAfterCount > te2.WhitespacesBeforeCount && (te2.WhitespacesBeforeCount < 2)) { } else if (attrs1[0].Age != null) { } else if (((te1.IsHiphen || te1.IsChar(':'))) && !attrs1[0].IsNewlineBefore && ((te2.Previous.IsComma || te2.Previous == end))) { } else { foreach (PersonAttrToken a in attrs) { if (a.Typ == PersonAttrTerminType.Position) { Pullenti.Ner.Token te = attrs1[attrs1.Count - 1].EndToken; if (te.Next != null) { if (!te.Next.IsChar('.')) { attrs1 = null; break; } } } } } } if (attrs1 != null && !hasPrefix) { PersonAttrToken attr = attrs1[attrs1.Count - 1]; bool ok = false; if (attr.EndToken.Next != null && attr.EndToken.Next.Chars.IsCapitalUpper) { ok = true; } else { Pullenti.Ner.ReferentToken rt = Pullenti.Ner.Person.PersonAnalyzer.TryAttachPerson(attr.BeginToken, ad, false, -1, false); if (rt != null && (rt.Referent is Pullenti.Ner.Person.PersonReferent)) { ok = true; } } if (ok) { if (attr.BeginToken.WhitespacesBeforeCount > attr.EndToken.WhitespacesAfterCount) { attrs1 = null; } else if (attr.BeginToken.WhitespacesBeforeCount == attr.EndToken.WhitespacesAfterCount) { Pullenti.Ner.ReferentToken rt1 = Pullenti.Ner.Person.PersonAnalyzer.TryAttachPerson(attr.BeginToken, ad, false, -1, false); if (rt1 != null) { attrs1 = null; } } } } if (attrs1 != null) { foreach (PersonAttrToken a in attrs1) { if (a.Typ != PersonAttrTerminType.Prefix) { if (a.Age != null) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_AGE, a.Age, true, 0); } else if (a.PropRef == null) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, a.Value, false, 0); } else { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, a, false, 0); } end = a.EndToken; if (a.Gender != Pullenti.Morph.MorphGender.Undefined && !p.IsFemale && !p.IsMale) { if (a.Gender == Pullenti.Morph.MorphGender.Masculine && !p.IsMale) { p.IsMale = true; p.CorrectData(); } else if (a.Gender == Pullenti.Morph.MorphGender.Feminie && !p.IsFemale) { p.IsFemale = true; p.CorrectData(); } } } } if (openBr) { if (end.Next != null && end.Next.IsChar(')')) { end = end.Next; } } } int crlfCou = 0; for (Pullenti.Ner.Token t = end.Next; t != null; t = t.Next) { if (t.IsTableControlChar) { break; } if (t.IsNewlineBefore) { Pullenti.Ner.Mail.Internal.MailLine ml = Pullenti.Ner.Mail.Internal.MailLine.Parse(t, 0, 0); if (ml != null && ml.Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From) { break; } crlfCou++; } if (t.IsCharOf(":,(") || t.IsHiphen) { continue; } if (t.IsChar('.') && t == end.Next) { continue; } Pullenti.Ner.Referent r = t.GetReferent(); if (r != null) { if (r.TypeName == "PHONE" || r.TypeName == "URI" || r.TypeName == "ADDRESS") { string ty = r.GetStringValue("SCHEME"); if (r.TypeName == "URI") { if ((ty != "mailto" && ty != "skype" && ty != "ICQ") && ty != "http") { break; } } p.AddContact(r); end = t; crlfCou = 0; continue; } } if (r is Pullenti.Ner.Person.PersonIdentityReferent) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_IDDOC, r, false, 0); end = t; crlfCou = 0; continue; } if (r != null && r.TypeName == "ORGANIZATION") { if (t.Next != null && t.Next.Morph.Class.IsVerb) { break; } if (begin.Previous != null && begin.Previous.Morph.Class.IsVerb) { break; } if (t.WhitespacesAfterCount == 1) { break; } bool exist = false; foreach (Pullenti.Ner.Slot s in p.Slots) { if (s.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_ATTR && (s.Value is Pullenti.Ner.Person.PersonPropertyReferent)) { Pullenti.Ner.Person.PersonPropertyReferent pr = s.Value as Pullenti.Ner.Person.PersonPropertyReferent; if (pr.FindSlot(Pullenti.Ner.Person.PersonPropertyReferent.ATTR_REF, r, true) != null) { exist = true; break; } } else if (s.TypeName == Pullenti.Ner.Person.PersonReferent.ATTR_ATTR && (s.Value is PersonAttrToken)) { PersonAttrToken pr = s.Value as PersonAttrToken; if (pr.Referent.FindSlot(Pullenti.Ner.Person.PersonPropertyReferent.ATTR_REF, r, true) != null) { exist = true; break; } } } if (!exist) { PersonAttrToken pat = new PersonAttrToken(t, t); pat.PropRef = new Pullenti.Ner.Person.PersonPropertyReferent() { Name = "сотрудник" }; pat.PropRef.AddSlot(Pullenti.Ner.Person.PersonPropertyReferent.ATTR_REF, r, false, 0); p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_ATTR, pat, false, 0); } continue; } if (r != null) { break; } if (!hasPrefix || crlfCou >= 2) { break; } Pullenti.Ner.ReferentToken rt = t.Kit.ProcessReferent("PERSON", t); if (rt != null) { break; } } if (ad != null) { ad.OverflowLevel--; } if (begin.IsValue("НА", null) && begin.Next != null && begin.Next.IsValue("ИМЯ", null)) { Pullenti.Ner.Token t0 = begin.Previous; if (t0 != null && t0.IsComma) { t0 = t0.Previous; } if (t0 != null && (t0.GetReferent() is Pullenti.Ner.Person.PersonIdentityReferent)) { p.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_IDDOC, t0.GetReferent(), false, 0); } } return(new Pullenti.Ner.ReferentToken(p, begin, end) { Morph = morph, MiscAttrs = (int)p.m_PersonIdentityTyp }); }
public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { PhoneAnalizerData ad = kit.GetAnalyzerData(this) as PhoneAnalizerData; for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { List<Pullenti.Ner.Phone.Internal.PhoneItemToken> pli = Pullenti.Ner.Phone.Internal.PhoneItemToken.TryAttachAll(t, 15); if (pli == null || pli.Count == 0) continue; PhoneReferent prevPhone = null; int kkk = 0; for (Pullenti.Ner.Token tt = t.Previous; tt != null; tt = tt.Previous) { if (tt.GetReferent() is PhoneReferent) { prevPhone = tt.GetReferent() as PhoneReferent; break; } else if (tt is Pullenti.Ner.ReferentToken) { } else if (tt.IsChar(')')) { Pullenti.Ner.Token ttt = tt.Previous; int cou = 0; for (; ttt != null; ttt = ttt.Previous) { if (ttt.IsChar('(')) break; else if ((++cou) > 100) break; } if (ttt == null || !ttt.IsChar('(')) break; tt = ttt; } else if (!tt.IsCharOf(",;/\\") && !tt.IsAnd) { if ((++kkk) > 5) break; if (tt.IsNewlineBefore || tt.IsNewlineAfter) break; } } int j = 0; bool isPhoneBefore = false; bool isPref = false; PhoneKind ki = PhoneKind.Undefined; while (j < pli.Count) { if (pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Prefix) { if (ki == PhoneKind.Undefined) ki = pli[j].Kind; isPref = true; isPhoneBefore = true; j++; if ((j < pli.Count) && pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Delim) j++; } else if (((j + 1) < pli.Count) && pli[j + 1].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Prefix && j == 0) { if (ki == PhoneKind.Undefined) ki = pli[0].Kind; isPref = true; pli.RemoveAt(0); } else break; } if (prevPhone != null) isPhoneBefore = true; if (pli.Count == 1 && pli[0].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Number) { Pullenti.Ner.Token tt = t.Previous; if ((tt is Pullenti.Ner.TextToken) && !tt.Chars.IsLetter) tt = tt.Previous; if (tt is Pullenti.Ner.TextToken) { if (Pullenti.Ner.Uri.UriAnalyzer.m_Schemes.TryParse(tt, Pullenti.Ner.Core.TerminParseAttr.No) != null) continue; } } List<Pullenti.Ner.ReferentToken> rts = this.TryAttach(pli, j, isPhoneBefore, prevPhone); if (rts == null) { for (j = 1; j < pli.Count; j++) { if (pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Prefix) { pli.RemoveRange(0, j); rts = this.TryAttach(pli, 1, true, prevPhone); break; } } } if (rts == null) t = pli[pli.Count - 1].EndToken; else { if ((ki == PhoneKind.Undefined && prevPhone != null && !isPref) && prevPhone.Kind != PhoneKind.Mobile && kkk == 0) ki = prevPhone.Kind; foreach (Pullenti.Ner.ReferentToken rt in rts) { PhoneReferent ph = rt.Referent as PhoneReferent; if (ki != PhoneKind.Undefined) ph.Kind = ki; else { if (rt == rts[0] && (rt.WhitespacesBeforeCount < 3)) { Pullenti.Ner.Token tt1 = rt.BeginToken.Previous; if (tt1 != null && tt1.IsTableControlChar) tt1 = tt1.Previous; if ((tt1 is Pullenti.Ner.TextToken) && ((tt1.IsNewlineBefore || ((tt1.Previous != null && tt1.Previous.IsTableControlChar))))) { string term = (tt1 as Pullenti.Ner.TextToken).Term; if (term == "T" || term == "Т") rt.BeginToken = tt1; else if (term == "Ф" || term == "F") { ph.Kind = (ki = PhoneKind.Fax); rt.BeginToken = tt1; } else if (term == "M" || term == "М") { ph.Kind = (ki = PhoneKind.Mobile); rt.BeginToken = tt1; } } } ph.Correct(); } rt.Referent = ad.RegisterReferent(rt.Referent); kit.EmbedToken(rt); t = rt; } } } }
public static BlockLine Create(Pullenti.Ner.Token t, Pullenti.Ner.Core.TerminCollection names) { if (t == null) { return(null); } BlockLine res = new BlockLine(t, t); for (Pullenti.Ner.Token tt = t; tt != null; tt = tt.Next) { if (tt != t && tt.IsNewlineBefore) { break; } else { res.EndToken = tt; } } int nums = 0; while (t != null && t.Next != null && t.EndChar <= res.EndChar) { if (t is Pullenti.Ner.NumberToken) { } else { Pullenti.Ner.NumberToken rom = Pullenti.Ner.Core.NumberHelper.TryParseRoman(t); if (rom != null && rom.EndToken.Next != null) { t = rom.EndToken; } else { break; } } if (t.Next.IsChar('.')) { } else if ((t.Next is Pullenti.Ner.TextToken) && !t.Next.Chars.IsAllLower) { } else { break; } res.NumberEnd = t; t = t.Next; if (t.IsChar('.') && t.Next != null) { res.NumberEnd = t; t = t.Next; } if (t.IsNewlineBefore) { return(res); } nums++; } Pullenti.Ner.Core.TerminToken tok = m_Ontology.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok == null) { Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt1 != null && npt1.EndToken != npt1.BeginToken) { tok = m_Ontology.TryParse(npt1.Noun.BeginToken, Pullenti.Ner.Core.TerminParseAttr.No); } } if (tok != null) { if (t.Previous != null && t.Previous.IsChar(':')) { tok = null; } } if (tok != null) { BlkTyps typ = (BlkTyps)tok.Termin.Tag; if (typ == BlkTyps.Conslusion) { if (t.IsNewlineAfter) { } else if (t.Next != null && t.Next.Morph.Class.IsPreposition && t.Next.Next != null) { Pullenti.Ner.Core.TerminToken tok2 = m_Ontology.TryParse(t.Next.Next, Pullenti.Ner.Core.TerminParseAttr.No); if (tok2 != null && ((BlkTyps)tok2.Termin.Tag) == BlkTyps.Chapter) { } else { tok = null; } } else { tok = null; } } if (t.Kit.BaseLanguage != t.Morph.Language) { tok = null; } if (typ == BlkTyps.Index && !t.IsValue("ОГЛАВЛЕНИЕ", null)) { if (!t.IsNewlineAfter && t.Next != null) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && npt.IsNewlineAfter && npt.Morph.Case.IsGenitive) { tok = null; } else if (npt == null) { tok = null; } } } if ((typ == BlkTyps.Intro && tok != null && !tok.IsNewlineAfter) && t.IsValue("ВВЕДЕНИЕ", null)) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && npt.Morph.Case.IsGenitive) { tok = null; } } if (tok != null) { if (res.NumberEnd == null) { res.NumberEnd = tok.EndToken; if (res.NumberEnd.EndChar > res.EndChar) { res.EndToken = res.NumberEnd; } } res.Typ = typ; t = tok.EndToken; if (t.Next != null && t.Next.IsCharOf(":.")) { t = t.Next; res.EndToken = t; } if (t.IsNewlineAfter || t.Next == null) { return(res); } t = t.Next; } } if (t.IsChar('§') && (t.Next is Pullenti.Ner.NumberToken)) { res.Typ = BlkTyps.Chapter; res.NumberEnd = t; t = t.Next; } if (names != null) { Pullenti.Ner.Core.TerminToken tok2 = names.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok2 != null && tok2.EndToken.IsNewlineAfter) { res.EndToken = tok2.EndToken; res.IsExistName = true; if (res.Typ == BlkTyps.Undefined) { BlockLine li2 = Create((res.NumberEnd == null ? null : res.NumberEnd.Next), null); if (li2 != null && ((li2.Typ == BlkTyps.Literature || li2.Typ == BlkTyps.Intro || li2.Typ == BlkTyps.Conslusion))) { res.Typ = li2.Typ; } else { res.Typ = BlkTyps.Chapter; } } return(res); } } Pullenti.Ner.Token t1 = res.EndToken; if ((((t1 is Pullenti.Ner.NumberToken) || t1.IsChar('.'))) && t1.Previous != null) { t1 = t1.Previous; if (t1.IsChar('.')) { res.HasContentItemTail = true; for (; t1 != null && t1.BeginChar > res.BeginChar; t1 = t1.Previous) { if (!t1.IsChar('.')) { break; } } } } res.IsAllUpper = true; for (; t != null && t.EndChar <= t1.EndChar; t = t.Next) { if (!(t is Pullenti.Ner.TextToken) || !t.Chars.IsLetter) { res.NotWords++; } else { Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary(); if (mc.IsUndefined) { res.NotWords++; } else if (t.LengthChar > 2) { res.Words++; } if (!t.Chars.IsAllUpper) { res.IsAllUpper = false; } if ((t as Pullenti.Ner.TextToken).IsPureVerb) { if (!(t as Pullenti.Ner.TextToken).Term.EndsWith("ING")) { res.HasVerb = true; } } } } if (res.Typ == BlkTyps.Undefined) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse((res.NumberEnd == null ? res.BeginToken : res.NumberEnd.Next), Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { if (npt.Noun.IsValue("ХАРАКТЕРИСТИКА", null) || npt.Noun.IsValue("СОДЕРЖАНИЕ", "ЗМІСТ")) { bool ok = true; for (Pullenti.Ner.Token tt = npt.EndToken.Next; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next) { if (tt.IsChar('.')) { continue; } Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt2 == null || !npt2.Morph.Case.IsGenitive) { ok = false; break; } tt = npt2.EndToken; if (tt.EndChar > res.EndChar) { res.EndToken = tt; if (!tt.IsNewlineAfter) { for (; res.EndToken.Next != null; res.EndToken = res.EndToken.Next) { if (res.EndToken.IsNewlineAfter) { break; } } } } } if (ok) { res.Typ = BlkTyps.Intro; res.IsExistName = true; } } else if (npt.Noun.IsValue("ВЫВОД", "ВИСНОВОК") || npt.Noun.IsValue("РЕЗУЛЬТАТ", "ДОСЛІДЖЕННЯ")) { bool ok = true; for (Pullenti.Ner.Token tt = npt.EndToken.Next; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next) { if (tt.IsCharOf(",.") || tt.IsAnd) { continue; } Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt1 != null) { if (npt1.Noun.IsValue("РЕЗУЛЬТАТ", "ДОСЛІДЖЕННЯ") || npt1.Noun.IsValue("РЕКОМЕНДАЦИЯ", "РЕКОМЕНДАЦІЯ") || npt1.Noun.IsValue("ИССЛЕДОВАНИЕ", "ДОСЛІДЖЕННЯ")) { tt = npt1.EndToken; if (tt.EndChar > res.EndChar) { res.EndToken = tt; if (!tt.IsNewlineAfter) { for (; res.EndToken.Next != null; res.EndToken = res.EndToken.Next) { if (res.EndToken.IsNewlineAfter) { break; } } } } continue; } } ok = false; break; } if (ok) { res.Typ = BlkTyps.Conslusion; res.IsExistName = true; } } if (res.Typ == BlkTyps.Undefined && npt != null && npt.EndChar <= res.EndChar) { bool ok = false; int publ = 0; if (_isPub(npt)) { ok = true; publ = 1; } else if ((npt.Noun.IsValue("СПИСОК", null) || npt.Noun.IsValue("УКАЗАТЕЛЬ", "ПОКАЖЧИК") || npt.Noun.IsValue("ПОЛОЖЕНИЕ", "ПОЛОЖЕННЯ")) || npt.Noun.IsValue("ВЫВОД", "ВИСНОВОК") || npt.Noun.IsValue("РЕЗУЛЬТАТ", "ДОСЛІДЖЕННЯ")) { if (npt.EndChar == res.EndChar) { return(null); } ok = true; } if (ok) { if (npt.BeginToken == npt.EndToken && npt.Noun.IsValue("СПИСОК", null) && npt.EndChar == res.EndChar) { ok = false; } for (Pullenti.Ner.Token tt = npt.EndToken.Next; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next) { if (tt.IsCharOf(",.:") || tt.IsAnd || tt.Morph.Class.IsPreposition) { continue; } if (tt.IsValue("ОТРАЖЕНЫ", "ВІДОБРАЖЕНІ")) { continue; } npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt == null) { ok = false; break; } if (((_isPub(npt) || npt.Noun.IsValue("РАБОТА", "РОБОТА") || npt.Noun.IsValue("ИССЛЕДОВАНИЕ", "ДОСЛІДЖЕННЯ")) || npt.Noun.IsValue("АВТОР", null) || npt.Noun.IsValue("ТРУД", "ПРАЦЯ")) || npt.Noun.IsValue("ТЕМА", null) || npt.Noun.IsValue("ДИССЕРТАЦИЯ", "ДИСЕРТАЦІЯ")) { tt = npt.EndToken; if (_isPub(npt)) { publ++; } if (tt.EndChar > res.EndChar) { res.EndToken = tt; if (!tt.IsNewlineAfter) { for (; res.EndToken.Next != null; res.EndToken = res.EndToken.Next) { if (res.EndToken.IsNewlineAfter) { break; } } } } continue; } ok = false; break; } if (ok) { res.Typ = BlkTyps.Literature; res.IsExistName = true; if (publ == 0 && (res.EndChar < (((res.Kit.Sofa.Text.Length * 2) / 3)))) { if (res.NumberEnd != null) { res.Typ = BlkTyps.Misc; } else { res.Typ = BlkTyps.Undefined; } } } } } } } return(res); }
Pullenti.Ner.ReferentToken _TryAttach_(List<Pullenti.Ner.Phone.Internal.PhoneItemToken> pli, int ind, bool isPhoneBefore, PhoneReferent prevPhone, int lev = 0) { if (ind >= pli.Count || lev > 4) return null; string countryCode = null; string cityCode = null; int j = ind; if (prevPhone != null && prevPhone.m_Template != null && pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Number) { StringBuilder tmp = new StringBuilder(); for (int jj = j; jj < pli.Count; jj++) { if (pli[jj].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Number) tmp.Append(pli[jj].Value.Length); else if (pli[jj].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Delim) { if (pli[jj].Value == " ") break; tmp.Append(pli[jj].Value); continue; } else break; string templ0 = tmp.ToString(); if (templ0 == prevPhone.m_Template) { if ((jj + 1) < pli.Count) { if (pli[jj + 1].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Prefix && (jj + 2) == pli.Count) { } else pli.RemoveRange(jj + 1, pli.Count - jj - 1); } break; } } } if ((j < pli.Count) && pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.CountryCode) { countryCode = pli[j].Value; if (countryCode != "8") { string cc = Pullenti.Ner.Phone.Internal.PhoneHelper.GetCountryPrefix(countryCode); if (cc != null && (cc.Length < countryCode.Length)) { cityCode = countryCode.Substring(cc.Length); countryCode = cc; } } j++; } else if ((j < pli.Count) && pli[j].CanBeCountryPrefix) { int k = j + 1; if ((k < pli.Count) && pli[k].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Delim) k++; Pullenti.Ner.ReferentToken rrt = this._TryAttach_(pli, k, isPhoneBefore, null, lev + 1); if (rrt != null) { if ((((isPhoneBefore && pli[j + 1].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Delim && pli[j + 1].BeginToken.IsHiphen) && pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Number && pli[j].Value.Length == 3) && ((j + 2) < pli.Count) && pli[j + 2].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Number) && pli[j + 2].Value.Length == 3) { } else { countryCode = pli[j].Value; j++; } } } if (((j < pli.Count) && pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Number && ((pli[j].Value[0] == '8' || pli[j].Value[0] == '7'))) && countryCode == null) { if (pli[j].Value.Length == 1) { countryCode = pli[j].Value; j++; } else if (pli[j].Value.Length == 4) { countryCode = pli[j].Value.Substring(0, 1); if (cityCode == null) cityCode = pli[j].Value.Substring(1); else cityCode += pli[j].Value.Substring(1); j++; } else if (pli[j].Value.Length == 11 && j == (pli.Count - 1) && isPhoneBefore) { PhoneReferent ph0 = new PhoneReferent(); if (pli[j].Value[0] != '8') ph0.CountryCode = pli[j].Value.Substring(0, 1); ph0.Number = pli[j].Value.Substring(1, 3) + pli[j].Value.Substring(4); return new Pullenti.Ner.ReferentToken(ph0, pli[0].BeginToken, pli[j].EndToken); } else if (cityCode == null && pli[j].Value.Length > 3 && ((j + 1) < pli.Count)) { int sum = 0; foreach (Pullenti.Ner.Phone.Internal.PhoneItemToken it in pli) { if (it.ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Number) sum += it.Value.Length; } if (sum == 11) { cityCode = pli[j].Value.Substring(1); j++; } } } if ((j < pli.Count) && pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.CityCode) { if (cityCode == null) cityCode = pli[j].Value; else cityCode += pli[j].Value; j++; } if ((j < pli.Count) && pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Delim) j++; if ((countryCode == "8" && cityCode == null && ((j + 3) < pli.Count)) && pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Number) { if (pli[j].Value.Length == 3 || pli[j].Value.Length == 4) { cityCode = pli[j].Value; j++; if ((j < pli.Count) && pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Delim) j++; } } int normalNumLen = 0; if (countryCode == "421") normalNumLen = 9; StringBuilder num = new StringBuilder(); StringBuilder templ = new StringBuilder(); List<int> partLength = new List<int>(); string delim = null; bool ok = false; string additional = null; bool std = false; if (countryCode != null && ((j + 4) < pli.Count) && j > 0) { if (((((pli[j - 1].Value == "-" || pli[j - 1].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.CountryCode)) && pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Number && pli[j + 1].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Delim) && pli[j + 2].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Number && pli[j + 3].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Delim) && pli[j + 4].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Number) { if ((((pli[j].Value.Length + pli[j + 2].Value.Length) == 6 || ((pli[j].Value.Length == 4 && pli[j + 2].Value.Length == 5)))) && ((pli[j + 4].Value.Length == 4 || pli[j + 4].Value.Length == 1))) { num.Append(pli[j].Value); num.Append(pli[j + 2].Value); num.Append(pli[j + 4].Value); templ.AppendFormat("{0}{1}{2}{3}{4}", pli[j].Value.Length, pli[j + 1].Value, pli[j + 2].Value.Length, pli[j + 3].Value, pli[j + 4].Value.Length); std = true; ok = true; j += 5; } } } for (; j < pli.Count; j++) { if (std) break; if (pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Delim) { if (pli[j].IsInBrackets) continue; if (j > 0 && pli[j - 1].IsInBrackets) continue; if (templ.Length > 0) templ.Append(pli[j].Value); if (delim == null) delim = pli[j].Value; else if (pli[j].Value != delim) { if ((partLength.Count == 2 && ((partLength[0] == 3 || partLength[0] == 4)) && cityCode == null) && partLength[1] == 3) { cityCode = num.ToString().Substring(0, partLength[0]); num.Remove(0, partLength[0]); partLength.RemoveAt(0); delim = pli[j].Value; continue; } if (isPhoneBefore && ((j + 1) < pli.Count) && pli[j + 1].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Number) { if (num.Length < 6) continue; if (normalNumLen > 0 && (num.Length + pli[j + 1].Value.Length) == normalNumLen) continue; } break; } else continue; ok = false; } else if (pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Number) { if (num.Length == 0 && pli[j].BeginToken.Previous != null && pli[j].BeginToken.Previous.IsTableControlChar) { Pullenti.Ner.Token tt = pli[pli.Count - 1].EndToken.Next; if (tt != null && tt.IsCharOf(",.")) tt = tt.Next; if (tt is Pullenti.Ner.NumberToken) return null; } if ((num.Length + pli[j].Value.Length) > 13) { if (j > 0 && pli[j - 1].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Delim) j--; ok = true; break; } num.Append(pli[j].Value); partLength.Add(pli[j].Value.Length); templ.Append(pli[j].Value.Length); ok = true; if (num.Length > 10) { j++; if ((j < pli.Count) && pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.AddNumber) { additional = pli[j].Value; j++; } break; } } else if (pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.AddNumber) { additional = pli[j].Value; j++; break; } else break; } if ((j == (pli.Count - 1) && pli[j].IsInBrackets && ((pli[j].Value.Length == 3 || pli[j].Value.Length == 4))) && additional == null) { additional = pli[j].Value; j++; } if ((j < pli.Count) && pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Prefix && pli[j].IsInBrackets) { isPhoneBefore = true; j++; } if ((countryCode == null && cityCode != null && cityCode.Length > 3) && (num.Length < 8) && cityCode[0] != '8') { if ((cityCode.Length + num.Length) == 10) { } else { string cc = Pullenti.Ner.Phone.Internal.PhoneHelper.GetCountryPrefix(cityCode); if (cc != null) { if (cc.Length > 1 && (cityCode.Length - cc.Length) > 1) { countryCode = cc; cityCode = cityCode.Substring(cc.Length); } } } } if (countryCode == null && cityCode != null && cityCode.StartsWith("00")) { string cc = Pullenti.Ner.Phone.Internal.PhoneHelper.GetCountryPrefix(cityCode.Substring(2)); if (cc != null) { if (cityCode.Length > (cc.Length + 3)) { countryCode = cc; cityCode = cityCode.Substring(cc.Length + 2); } } } if (num.Length == 0 && cityCode != null) { if (cityCode.Length == 10) { num.Append(cityCode.Substring(3)); partLength.Add(num.Length); cityCode = cityCode.Substring(0, 3); ok = true; } else if (((cityCode.Length == 9 || cityCode.Length == 11 || cityCode.Length == 8)) && ((isPhoneBefore || countryCode != null))) { num.Append(cityCode); partLength.Add(num.Length); cityCode = null; ok = true; } } if (num.Length < 4) ok = false; if (num.Length < 7) { if (cityCode != null && (cityCode.Length + num.Length) > 7) { if (!isPhoneBefore && cityCode.Length == 3) { int ii; for (ii = 0; ii < partLength.Count; ii++) { if (partLength[ii] == 3) { } else if (partLength[ii] > 3) break; else if ((ii < (partLength.Count - 1)) || (partLength[ii] < 2)) break; } if (ii >= partLength.Count) { if (countryCode == "61") { } else ok = false; } } } else if (((num.Length == 6 || num.Length == 5)) && ((partLength.Count >= 1 && partLength.Count <= 3)) && isPhoneBefore) { if (pli[0].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Prefix && pli[0].Kind == PhoneKind.Home) ok = false; } else if (prevPhone != null && prevPhone.Number != null && ((prevPhone.Number.Length == num.Length || prevPhone.Number.Length == (num.Length + 3) || prevPhone.Number.Length == (num.Length + 4)))) { } else if (num.Length > 4 && prevPhone != null && templ.ToString() == prevPhone.m_Template) ok = true; else ok = false; } if (delim == "." && countryCode == null && cityCode == null) ok = false; if ((isPhoneBefore && countryCode == null && cityCode == null) && num.Length > 10) { string cc = Pullenti.Ner.Phone.Internal.PhoneHelper.GetCountryPrefix(num.ToString()); if (cc != null) { if ((num.Length - cc.Length) == 9) { countryCode = cc; num.Remove(0, cc.Length); ok = true; } } } if (ok) { if (std) { } else if (prevPhone != null && prevPhone.Number != null && (((prevPhone.Number.Length == num.Length || prevPhone.Number.Length == (num.Length + 3) || prevPhone.Number.Length == (num.Length + 4)) || prevPhone.m_Template == templ.ToString()))) { } else if ((partLength.Count == 3 && partLength[0] == 3 && partLength[1] == 2) && partLength[2] == 2) { } else if (partLength.Count == 3 && isPhoneBefore) { } else if ((partLength.Count == 4 && ((partLength[0] + partLength[1]) == 3) && partLength[2] == 2) && partLength[3] == 2) { } else if ((partLength.Count == 4 && partLength[0] == 3 && partLength[1] == 3) && partLength[2] == 2 && partLength[3] == 2) { } else if (partLength.Count == 5 && (partLength[1] + partLength[2]) == 4 && (partLength[3] + partLength[4]) == 4) { } else if (partLength.Count > 4) ok = false; else if (partLength.Count > 3 && cityCode != null) ok = false; else if ((isPhoneBefore || cityCode != null || countryCode != null) || additional != null) ok = true; else { ok = false; if (((num.Length == 6 || num.Length == 7)) && (partLength.Count < 4) && j > 0) { PhoneReferent nextPh = this.GetNextPhone(pli[j - 1].EndToken.Next, lev + 1); if (nextPh != null) { int d = nextPh.Number.Length - num.Length; if (d == 0 || d == 3 || d == 4) ok = true; } } } } Pullenti.Ner.Token end = (j > 0 ? pli[j - 1].EndToken : null); if (end == null) ok = false; if ((ok && cityCode == null && countryCode == null) && prevPhone == null && !isPhoneBefore) { if (!end.IsWhitespaceAfter && end.Next != null) { Pullenti.Ner.Token tt = end.Next; if (tt.IsCharOf(".,)") && tt.Next != null) tt = tt.Next; if (!tt.IsWhitespaceBefore) ok = false; } } if (!ok) return null; if (templ.Length > 0 && !char.IsDigit(templ[templ.Length - 1])) templ.Length--; if ((countryCode == null && cityCode != null && cityCode.Length > 3) && num.Length > 6) { string cc = Pullenti.Ner.Phone.Internal.PhoneHelper.GetCountryPrefix(cityCode); if (cc != null && ((cc.Length + 1) < cityCode.Length)) { countryCode = cc; cityCode = cityCode.Substring(cc.Length); } } if (pli[0].BeginToken.Previous != null) { if (pli[0].BeginToken.Previous.IsValue("ГОСТ", null) || pli[0].BeginToken.Previous.IsValue("ТУ", null)) return null; } PhoneReferent ph = new PhoneReferent(); if (countryCode != null) ph.CountryCode = countryCode; string number = num.ToString(); if ((cityCode == null && num.Length > 7 && partLength.Count > 0) && (partLength[0] < 5)) { cityCode = number.Substring(0, partLength[0]); number = number.Substring(partLength[0]); } if (cityCode == null && num.Length == 11 && num[0] == '8') { cityCode = number.Substring(1, 3); number = number.Substring(4); } if (cityCode == null && num.Length == 10) { cityCode = number.Substring(0, 3); number = number.Substring(3); } if (cityCode != null) number = cityCode + number; else if (countryCode == null && prevPhone != null) { bool ok1 = false; if (prevPhone.Number.Length >= (number.Length + 2)) ok1 = true; else if (templ.Length > 0 && prevPhone.m_Template != null && Pullenti.Morph.LanguageHelper.EndsWith(prevPhone.m_Template, templ.ToString())) ok1 = true; if (ok1 && prevPhone.Number.Length > number.Length) number = prevPhone.Number.Substring(0, prevPhone.Number.Length - number.Length) + number; } if (ph.CountryCode == null && prevPhone != null && prevPhone.CountryCode != null) { if (prevPhone.Number.Length == number.Length) ph.CountryCode = prevPhone.CountryCode; } ok = false; foreach (char d in number) { if (d != '0') { ok = true; break; } } if (!ok) return null; if (countryCode != null) { if (number.Length < 7) return null; } else { string s = Pullenti.Ner.Phone.Internal.PhoneHelper.GetCountryPrefix(number); if (s != null) { string num2 = number.Substring(s.Length); if (num2.Length >= 10 && num2.Length <= 11) { number = num2; if (s != "7") ph.CountryCode = s; } } if (number.Length == 8 && prevPhone == null) return null; } if (number.Length > 11) { if ((number.Length < 14) && ((countryCode == "1" || countryCode == "43"))) { } else return null; } ph.Number = number; if (additional != null) ph.AddSlot(PhoneReferent.ATTR_ADDNUMBER, additional, true, 0); if (!isPhoneBefore && end.Next != null && !end.IsNewlineAfter) { if (end.Next.IsCharOf("+=") || end.Next.IsHiphen) return null; } if (countryCode != null && countryCode == "7") { if (number.Length != 10) return null; } ph.m_Template = templ.ToString(); if (j == (pli.Count - 1) && pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Prefix && !pli[j].IsNewlineBefore) { end = pli[j].EndToken; if (pli[j].Kind != PhoneKind.Undefined) ph.Kind = pli[j].Kind; } Pullenti.Ner.ReferentToken res = new Pullenti.Ner.ReferentToken(ph, pli[0].BeginToken, end); if (pli[0].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Prefix && pli[0].EndToken.Next.IsTableControlChar) res.BeginToken = pli[1].BeginToken; return res; }
public static TitleItemToken TryAttach(Pullenti.Ner.Token t) { Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; if (tt != null) { Pullenti.Ner.Token t1 = (Pullenti.Ner.Token)tt; if (tt.Term == "ТЕМА") { TitleItemToken tit = TryAttach(tt.Next); if (tit != null && tit.Typ == Types.Typ) { t1 = tit.EndToken; if (t1.Next != null && t1.Next.IsChar(':')) { t1 = t1.Next; } return(new TitleItemToken(t, t1, Types.TypAndTheme) { Value = tit.Value }); } if (tt.Next != null && tt.Next.IsChar(':')) { t1 = tt.Next; } return(new TitleItemToken(tt, t1, Types.Theme)); } if (tt.Term == "ПО" || tt.Term == "НА") { if (tt.Next != null && tt.Next.IsValue("ТЕМА", null)) { t1 = tt.Next; if (t1.Next != null && t1.Next.IsChar(':')) { t1 = t1.Next; } return(new TitleItemToken(tt, t1, Types.Theme)); } } if (tt.Term == "ПЕРЕВОД" || tt.Term == "ПЕР") { Pullenti.Ner.Token tt2 = tt.Next; if (tt2 != null && tt2.IsChar('.')) { tt2 = tt2.Next; } if (tt2 is Pullenti.Ner.TextToken) { if ((tt2 as Pullenti.Ner.TextToken).Term == "C" || (tt2 as Pullenti.Ner.TextToken).Term == "С") { tt2 = tt2.Next; if (tt2 is Pullenti.Ner.TextToken) { return(new TitleItemToken(t, tt2, Types.Translate)); } } } } if (tt.Term == "СЕКЦИЯ" || tt.Term == "SECTION" || tt.Term == "СЕКЦІЯ") { t1 = tt.Next; if (t1 != null && t1.IsChar(':')) { t1 = t1.Next; } Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t1, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { t1 = br.EndToken; } else if (t1 != tt.Next) { for (; t1 != null; t1 = t1.Next) { if (t1.IsNewlineAfter) { break; } } if (t1 == null) { return(null); } } if (t1 != tt.Next) { return(new TitleItemToken(tt, t1, Types.Dust)); } } t1 = null; if (tt.IsValue("СПЕЦИАЛЬНОСТЬ", "СПЕЦІАЛЬНІСТЬ")) { t1 = tt.Next; } else if (tt.Morph.Class.IsPreposition && tt.Next != null && tt.Next.IsValue("СПЕЦИАЛЬНОСТЬ", "СПЕЦІАЛЬНІСТЬ")) { t1 = tt.Next.Next; } else if (tt.IsChar('/') && tt.IsNewlineBefore) { t1 = tt.Next; } if (t1 != null) { if (t1.IsCharOf(":") || t1.IsHiphen) { t1 = t1.Next; } TitleItemToken spec = TryAttachSpeciality(t1, true); if (spec != null) { spec.BeginToken = t; return(spec); } } } TitleItemToken sss = TryAttachSpeciality(t, false); if (sss != null) { return(sss); } if (t is Pullenti.Ner.ReferentToken) { return(null); } Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { string s = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false); Pullenti.Ner.Core.TerminToken tok = m_Termins.TryParse(npt.EndToken, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { Types ty = (Types)tok.Termin.Tag; if (ty == Types.Typ) { TitleItemToken tit = TryAttach(tok.EndToken.Next); if (tit != null && tit.Typ == Types.Theme) { return new TitleItemToken(npt.BeginToken, tit.EndToken, Types.TypAndTheme) { Value = s } } ; if (s == "РАБОТА" || s == "РОБОТА" || s == "ПРОЕКТ") { return(null); } Pullenti.Ner.Token t1 = tok.EndToken; if (s == "ДИССЕРТАЦИЯ" || s == "ДИСЕРТАЦІЯ") { int err = 0; for (Pullenti.Ner.Token ttt = t1.Next; ttt != null; ttt = ttt.Next) { if (ttt.Morph.Class.IsPreposition) { continue; } if (ttt.IsValue("СОИСКАНИЕ", "")) { continue; } Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(ttt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt1 != null && npt1.Noun.IsValue("СТЕПЕНЬ", "СТУПІНЬ")) { t1 = (ttt = npt1.EndToken); continue; } Pullenti.Ner.ReferentToken rt = t1.Kit.ProcessReferent("PERSON", ttt); if (rt != null && (rt.Referent is Pullenti.Ner.Person.PersonPropertyReferent)) { Pullenti.Ner.Person.PersonPropertyReferent ppr = rt.Referent as Pullenti.Ner.Person.PersonPropertyReferent; if (ppr.Name == "доктор наук") { t1 = rt.EndToken; s = "ДОКТОРСКАЯ ДИССЕРТАЦИЯ"; break; } else if (ppr.Name == "кандидат наук") { t1 = rt.EndToken; s = "КАНДИДАТСКАЯ ДИССЕРТАЦИЯ"; break; } else if (ppr.Name == "магистр") { t1 = rt.EndToken; s = "МАГИСТЕРСКАЯ ДИССЕРТАЦИЯ"; break; } } if (ttt.IsValue("ДОКТОР", null) || ttt.IsValue("КАНДИДАТ", null) || ttt.IsValue("МАГИСТР", "МАГІСТР")) { t1 = ttt; npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(ttt.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt1 != null && npt1.EndToken.IsValue("НАУК", null)) { t1 = npt1.EndToken; } s = (ttt.IsValue("МАГИСТР", "МАГІСТР") ? "МАГИСТЕРСКАЯ ДИССЕРТАЦИЯ" : (ttt.IsValue("ДОКТОР", null) ? "ДОКТОРСКАЯ ДИССЕРТАЦИЯ" : "КАНДИДАТСКАЯ ДИССЕРТАЦИЯ")); break; } if ((++err) > 3) { break; } } } if (t1.Next != null && t1.Next.IsChar('.')) { t1 = t1.Next; } if (s.EndsWith("ОТЧЕТ") && t1.Next != null && t1.Next.IsValue("О", null)) { Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t1.Next, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition, 0, null); if (npt1 != null && npt1.Morph.Case.IsPrepositional) { t1 = npt1.EndToken; } } return(new TitleItemToken(npt.BeginToken, t1, ty) { Value = s }); } } } Pullenti.Ner.Core.TerminToken tok1 = m_Termins.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok1 != null) { Pullenti.Ner.Token t1 = tok1.EndToken; TitleItemToken re = new TitleItemToken(tok1.BeginToken, t1, (Types)tok1.Termin.Tag); return(re); } if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(t, false, false)) { tok1 = m_Termins.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No); if (tok1 != null && Pullenti.Ner.Core.BracketHelper.CanBeEndOfSequence(tok1.EndToken.Next, false, null, false)) { Pullenti.Ner.Token t1 = tok1.EndToken.Next; return(new TitleItemToken(tok1.BeginToken, t1, (Types)tok1.Termin.Tag)); } } return(null); }
public static PhoneItemToken TryAttachAdditional(Pullenti.Ner.Token t0) { Pullenti.Ner.Token t = t0; if (t == null) { return(null); } if (t.IsChar(',')) { t = t.Next; } else if (t.IsCharOf("*#") && (t.Next is Pullenti.Ner.NumberToken)) { string val0 = (t.Next as Pullenti.Ner.NumberToken).GetSourceText(); Pullenti.Ner.Token t1 = t.Next; if ((t1.Next != null && t1.Next.IsHiphen && !t1.IsWhitespaceAfter) && (t1.Next.Next is Pullenti.Ner.NumberToken) && !t1.Next.IsWhitespaceAfter) { t1 = t1.Next.Next; val0 += t1.GetSourceText(); } if (val0.Length >= 3 && (val0.Length < 7)) { return new PhoneItemToken(t, t1) { ItemType = PhoneItemType.AddNumber, Value = val0 } } ; } bool br = false; if (t != null && t.IsChar('(')) { if (t.Previous != null && t.Previous.IsComma) { return(null); } br = true; t = t.Next; } Pullenti.Ner.Core.TerminToken to = m_PhoneTermins.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (to == null) { if (!br) { return(null); } if (t0.WhitespacesBeforeCount > 1) { return(null); } } else if (to.Termin.Tag == null) { return(null); } else { t = to.EndToken.Next; } if (t == null) { return(null); } if (((t.IsValue("НОМЕР", null) || t.IsValue("N", null) || t.IsValue("#", null)) || t.IsValue("№", null) || t.IsValue("NUMBER", null)) || ((t.IsChar('+') && br))) { t = t.Next; } else if (to == null && !br) { return(null); } else if (t.IsValue("НОМ", null) || t.IsValue("ТЕЛ", null)) { t = t.Next; if (t != null && t.IsChar('.')) { t = t.Next; } } if (t != null && t.IsCharOf(":,") && !t.IsNewlineAfter) { t = t.Next; } if (!(t is Pullenti.Ner.NumberToken)) { return(null); } string val = (t as Pullenti.Ner.NumberToken).GetSourceText(); if ((t.Next != null && t.Next.IsHiphen && !t.IsWhitespaceAfter) && (t.Next.Next is Pullenti.Ner.NumberToken)) { val += t.Next.Next.GetSourceText(); t = t.Next.Next; } if ((val.Length < 2) || val.Length > 7) { return(null); } if (br) { if (t.Next == null || !t.Next.IsChar(')')) { return(null); } t = t.Next; } PhoneItemToken res = new PhoneItemToken(t0, t) { ItemType = PhoneItemType.AddNumber, Value = val }; return(res); }
static UriItemToken _AttachUriContent(Pullenti.Ner.Token t0, string chars, bool canBeWhitespaces = false) { StringBuilder txt = new StringBuilder(); Pullenti.Ner.Token t1 = t0; UriItemToken dom = AttachDomainName(t0, true, canBeWhitespaces); if (dom != null) { if (dom.Value.Length < 3) { return(null); } } char openChar = (char)0; Pullenti.Ner.Token t = t0; if (dom != null) { t = dom.EndToken.Next; } for (; t != null; t = t.Next) { if (t != t0 && t.IsWhitespaceBefore) { if (t.IsNewlineBefore || !canBeWhitespaces) { break; } if (dom == null) { break; } if (t.Previous.IsHiphen) { } else if (t.Previous.IsCharOf(",;")) { break; } else if (t.Previous.IsChar('.') && t.Chars.IsLetter && t.LengthChar == 2) { } else { bool ok = false; Pullenti.Ner.Token tt1 = t; if (t.IsCharOf("\\/")) { tt1 = t.Next; } Pullenti.Ner.Token tt0 = tt1; for (; tt1 != null; tt1 = tt1.Next) { if (tt1 != tt0 && tt1.IsWhitespaceBefore) { break; } if (tt1 is Pullenti.Ner.NumberToken) { continue; } if (!(tt1 is Pullenti.Ner.TextToken)) { break; } string term1 = (tt1 as Pullenti.Ner.TextToken).Term; if (((term1 == "HTM" || term1 == "HTML" || term1 == "SHTML") || term1 == "ASP" || term1 == "ASPX") || term1 == "JSP") { ok = true; break; } if (!tt1.Chars.IsLetter) { if (tt1.IsCharOf("\\/")) { ok = true; break; } if (!tt1.IsCharOf(chars)) { break; } } else if (!tt1.Chars.IsLatinLetter) { break; } } if (!ok) { break; } } } if (t is Pullenti.Ner.NumberToken) { Pullenti.Ner.NumberToken nt = t as Pullenti.Ner.NumberToken; txt.Append(nt.GetSourceText()); t1 = t; continue; } Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; if (tt == null) { Pullenti.Ner.ReferentToken rt = t as Pullenti.Ner.ReferentToken; if (rt != null && rt.BeginToken.IsValue("РФ", null)) { if (txt.Length > 0 && txt[txt.Length - 1] == '.') { txt.Append(rt.BeginToken.GetSourceText()); t1 = t; continue; } } if (rt != null && rt.Chars.IsLatinLetter && rt.BeginToken == rt.EndToken) { txt.Append(rt.BeginToken.GetSourceText()); t1 = t; continue; } break; } string src = tt.GetSourceText(); char ch = src[0]; if (!char.IsLetter(ch)) { if (chars.IndexOf(ch) < 0) { break; } if (ch == '(' || ch == '[') { openChar = ch; } else if (ch == ')') { if (openChar != '(') { break; } openChar = (char)0; } else if (ch == ']') { if (openChar != '[') { break; } openChar = (char)0; } } txt.Append(src); t1 = t; } if (txt.Length == 0) { return(dom); } int i; for (i = 0; i < txt.Length; i++) { if (char.IsLetterOrDigit(txt[i])) { break; } } if (i >= txt.Length) { return(dom); } if (txt[txt.Length - 1] == '.' || txt[txt.Length - 1] == '/') { txt.Length--; t1 = t1.Previous; } if (dom != null) { txt.Insert(0, dom.Value); } string tmp = txt.ToString(); if (tmp.StartsWith("\\\\")) { txt.Replace("\\\\", "//"); tmp = txt.ToString(); } if (tmp.StartsWith("//")) { tmp = tmp.Substring(2); } if (string.Compare(tmp, "WWW", true) == 0) { return(null); } UriItemToken res = new UriItemToken(t0, t1) { Value = txt.ToString() }; return(res); }
public static MailLine Parse(Pullenti.Ner.Token t0, int lev, int maxCount = 0) { if (t0 == null) { return(null); } MailLine res = new MailLine(t0, t0); bool pr = true; int cou = 0; for (Pullenti.Ner.Token t = t0; t != null; t = t.Next, cou++) { if (t.IsNewlineBefore && t0 != t) { break; } if (maxCount > 0 && cou > maxCount) { break; } res.EndToken = t; if (t.IsTableControlChar || t.IsHiphen) { continue; } if (pr) { if ((t is Pullenti.Ner.TextToken) && t.IsCharOf(">|")) { res.Lev++; } else { pr = false; Pullenti.Ner.Core.TerminToken tok = m_FromWords.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null && tok.EndToken.Next != null && tok.EndToken.Next.IsChar(':')) { res.Typ = Types.From; t = tok.EndToken.Next; continue; } } } if (t is Pullenti.Ner.ReferentToken) { Pullenti.Ner.Referent r = t.GetReferent(); if (r != null) { if ((((r is Pullenti.Ner.Person.PersonReferent) || (r is Pullenti.Ner.Geo.GeoReferent) || (r is Pullenti.Ner.Address.AddressReferent)) || r.TypeName == "PHONE" || r.TypeName == "URI") || (r is Pullenti.Ner.Person.PersonPropertyReferent) || r.TypeName == "ORGANIZATION") { res.Refs.Add(r); } } } } if (res.Typ == Types.Undefined) { Pullenti.Ner.Token t = t0; for (; t != null && (t.EndChar < res.EndChar); t = t.Next) { if (!t.IsHiphen && t.Chars.IsLetter) { break; } } int ok = 0; int nams = 0; int oth = 0; Pullenti.Ner.Token lastComma = null; for (; t != null && (t.EndChar < res.EndChar); t = t.Next) { if (t.GetReferent() is Pullenti.Ner.Person.PersonReferent) { nams++; continue; } if (t is Pullenti.Ner.TextToken) { if (!t.Chars.IsLetter) { lastComma = t; continue; } Pullenti.Ner.Core.TerminToken tok = m_HelloWords.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { ok++; t = tok.EndToken; continue; } if (t.IsValue("ВСЕ", null) || t.IsValue("ALL", null) || t.IsValue("TEAM", null)) { nams++; continue; } Pullenti.Ner.Person.Internal.PersonItemToken pit = Pullenti.Ner.Person.Internal.PersonItemToken.TryAttach(t, null, Pullenti.Ner.Person.Internal.PersonItemToken.ParseAttr.No, null); if (pit != null) { nams++; t = pit.EndToken; continue; } } if ((++oth) > 3) { if (ok > 0 && lastComma != null) { res.EndToken = lastComma; oth = 0; } break; } } if ((oth < 3) && ok > 0) { res.Typ = Types.Hello; } } if (res.Typ == Types.Undefined) { int okWords = 0; if (t0.IsValue("HAVE", null)) { } for (Pullenti.Ner.Token t = t0; t != null && t.EndChar <= res.EndChar; t = t.Next) { if (!(t is Pullenti.Ner.TextToken)) { continue; } if (t.IsChar('<')) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { t = br.EndToken; continue; } } if (!t.IsLetters || t.IsTableControlChar) { continue; } Pullenti.Ner.Core.TerminToken tok = m_RegardWords.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { okWords++; for (; t != null && t.EndChar <= tok.EndChar; t = t.Next) { t.Tag = tok.Termin; } t = tok.EndToken; if ((t.Next is Pullenti.Ner.TextToken) && t.Next.Morph.Case.IsGenitive) { for (t = t.Next; t.EndChar <= res.EndChar; t = t.Next) { if (t.Morph.Class.IsConjunction) { continue; } Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt1 == null) { break; } if (!npt1.Morph.Case.IsGenitive) { break; } for (; t.EndChar < npt1.EndChar; t = t.Next) { t.Tag = t; } t.Tag = t; } } continue; } if ((t.Morph.Class.IsPreposition || t.Morph.Class.IsConjunction || t.Morph.Class.IsMisc) || t.IsValue("C", null)) { continue; } if ((okWords > 0 && t.Previous != null && t.Previous.IsComma) && t.Previous.BeginChar > t0.BeginChar && !t.Chars.IsAllLower) { res.EndToken = t.Previous; break; } Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt == null) { if ((res.EndChar - t.EndChar) > 10) { okWords = 0; } break; } tok = m_RegardWords.TryParse(npt.EndToken, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null && (npt.EndToken is Pullenti.Ner.TextToken)) { string term = (npt.EndToken as Pullenti.Ner.TextToken).Term; if (term == "ДЕЛ") { tok = null; } } if (tok == null) { if (npt.Noun.IsValue("НАДЕЖДА", null)) { t.Tag = t; } else if (okWords > 0 && t.IsValue("NICE", null) && ((res.EndChar - npt.EndChar) < 13)) { t.Tag = t; } else { okWords = 0; } break; } okWords++; for (; t != null && t.EndChar <= tok.EndChar; t = t.Next) { t.Tag = tok.Termin; } t = tok.EndToken; } if (okWords > 0) { res.Typ = Types.BestRegards; } } if (res.Typ == Types.Undefined) { Pullenti.Ner.Token t = t0; for (; t != null && (t.EndChar < res.EndChar); t = t.Next) { if (!(t is Pullenti.Ner.TextToken)) { break; } else if (!t.IsHiphen && t.Chars.IsLetter) { break; } } if (t != null) { if (t != t0) { } if (((t.IsValue("ПЕРЕСЫЛАЕМОЕ", null) || t.IsValue("ПЕРЕАДРЕСОВАННОЕ", null))) && t.Next != null && t.Next.IsValue("СООБЩЕНИЕ", null)) { res.Typ = Types.From; res.MustBeFirstLine = true; } else if ((t.IsValue("НАЧАЛО", null) && t.Next != null && ((t.Next.IsValue("ПЕРЕСЫЛАЕМОЕ", null) || t.Next.IsValue("ПЕРЕАДРЕСОВАННОЕ", null)))) && t.Next.Next != null && t.Next.Next.IsValue("СООБЩЕНИЕ", null)) { res.Typ = Types.From; res.MustBeFirstLine = true; } else if (t.IsValue("ORIGINAL", null) && t.Next != null && ((t.Next.IsValue("MESSAGE", null) || t.Next.IsValue("APPOINTMENT", null)))) { res.Typ = Types.From; res.MustBeFirstLine = true; } else if (t.IsValue("ПЕРЕСЛАНО", null) && t.Next != null && t.Next.IsValue("ПОЛЬЗОВАТЕЛЕМ", null)) { res.Typ = Types.From; res.MustBeFirstLine = true; } else if (((t.GetReferent() != null && t.GetReferent().TypeName == "DATE")) || ((t.IsValue("IL", null) && t.Next != null && t.Next.IsValue("GIORNO", null))) || ((t.IsValue("ON", null) && (t.Next is Pullenti.Ner.ReferentToken) && t.Next.GetReferent().TypeName == "DATE"))) { bool hasFrom = false; bool hasDate = t.GetReferent() != null && t.GetReferent().TypeName == "DATE"; if (t.IsNewlineAfter && (lev < 5)) { MailLine res1 = Parse(t.Next, lev + 1, 0); if (res1 != null && res1.Typ == Types.Hello) { res.Typ = Types.From; } } MailLine next = Parse(res.EndToken.Next, lev + 1, 0); if (next != null) { if (next.Typ != Types.Undefined) { next = null; } } int tmax = res.EndChar; if (next != null) { tmax = next.EndChar; } Pullenti.Ner.Core.BracketSequenceToken br1 = null; for (; t != null && t.EndChar <= tmax; t = t.Next) { if (t.IsValue("ОТ", null) || t.IsValue("FROM", null)) { hasFrom = true; } else if (t.GetReferent() != null && ((t.GetReferent().TypeName == "URI" || (t.GetReferent() is Pullenti.Ner.Person.PersonReferent)))) { if (t.GetReferent().TypeName == "URI" && hasDate) { if (br1 != null) { hasFrom = true; next = null; } if (t.Previous.IsChar('<') && t.Next != null && t.Next.IsChar('>')) { t = t.Next; if (t.Next != null && t.Next.IsChar(':')) { t = t.Next; } if (t.IsNewlineAfter) { hasFrom = true; next = null; } } } for (t = t.Next; t != null && t.EndChar <= res.EndChar; t = t.Next) { if (t.IsValue("HA", null) && t.Next != null && t.Next.IsValue("SCRITTO", null)) { hasFrom = true; break; } else if (((t.IsValue("НАПИСАТЬ", null) || t.IsValue("WROTE", null))) && ((res.EndChar - t.EndChar) < 10)) { hasFrom = true; break; } } if (hasFrom) { res.Typ = Types.From; if (next != null && t.EndChar >= next.BeginChar) { res.EndToken = next.EndToken; } } break; } else if (br1 == null && !t.IsChar('<') && Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(t, true, false)) { br1 = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br1 != null) { t = br1.EndToken; } } } } else { bool hasUri = false; for (; t != null && (t.EndChar < res.EndChar); t = t.Next) { if (t.GetReferent() != null && ((t.GetReferent().TypeName == "URI" || (t.GetReferent() is Pullenti.Ner.Person.PersonReferent)))) { hasUri = true; } else if (t.IsValue("ПИСАТЬ", null) && hasUri) { if (t.Next != null && t.Next.IsChar('(')) { if (hasUri) { res.Typ = Types.From; } break; } } } } } } return(res); }
public static List <UriItemToken> AttachMailUsers(Pullenti.Ner.Token t1) { if (t1 == null) { return(null); } if (t1.IsChar('}')) { List <UriItemToken> res0 = AttachMailUsers(t1.Previous); if (res0 == null) { return(null); } t1 = res0[0].BeginToken.Previous; for (; t1 != null; t1 = t1.Previous) { if (t1.IsChar('{')) { res0[0].BeginToken = t1; return(res0); } if (t1.IsCharOf(";,")) { continue; } List <UriItemToken> res1 = AttachMailUsers(t1); if (res1 == null) { return(null); } res0.Insert(0, res1[0]); t1 = res1[0].BeginToken; } return(null); } StringBuilder txt = new StringBuilder(); Pullenti.Ner.Token t0 = t1; for (Pullenti.Ner.Token t = t1; t != null; t = t.Previous) { if (t.IsWhitespaceAfter) { break; } if (t is Pullenti.Ner.NumberToken) { Pullenti.Ner.NumberToken nt = t as Pullenti.Ner.NumberToken; txt.Insert(0, nt.GetSourceText()); t0 = t; continue; } Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; if (tt == null) { break; } string src = tt.GetSourceText(); char ch = src[0]; if (!char.IsLetter(ch)) { if (".-_".IndexOf(ch) < 0) { break; } } txt.Insert(0, src); t0 = t; } if (txt.Length == 0) { return(null); } List <UriItemToken> res = new List <UriItemToken>(); res.Add(new UriItemToken(t0, t1) { Value = txt.ToString().ToLower() }); return(res); }
public static Pullenti.Ner.ReferentToken TryAttachOrg(Pullenti.Ner.Token t, bool canBeCyr = false) { if (t == null) { return(null); } bool br = false; if (t.IsChar('(') && t.Next != null) { t = t.Next; br = true; } if (t is Pullenti.Ner.NumberToken) { if ((t as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Words && t.Morph.Class.IsAdjective && t.Chars.IsCapitalUpper) { } else { return(null); } } else { if (t.Chars.IsAllLower) { return(null); } if ((t.LengthChar < 3) && !t.Chars.IsLetter) { return(null); } if (!t.Chars.IsLatinLetter) { if (!canBeCyr || !t.Chars.IsCyrillicLetter) { return(null); } } } Pullenti.Ner.Token t0 = t; Pullenti.Ner.Token t1 = t0; int namWo = 0; OrgItemEngItem tok = null; Pullenti.Ner.Geo.GeoReferent geo = null; OrgItemTypeToken addTyp = null; for (; t != null; t = t.Next) { if (t != t0 && t.WhitespacesBeforeCount > 1) { break; } if (t.IsChar(')')) { break; } if (t.IsChar('(') && t.Next != null) { if ((t.Next.GetReferent() is Pullenti.Ner.Geo.GeoReferent) && t.Next.Next != null && t.Next.Next.IsChar(')')) { geo = t.Next.GetReferent() as Pullenti.Ner.Geo.GeoReferent; t = t.Next.Next; continue; } OrgItemTypeToken typ = OrgItemTypeToken.TryAttach(t.Next, true, null); if ((typ != null && typ.EndToken.Next != null && typ.EndToken.Next.IsChar(')')) && typ.Chars.IsLatinLetter) { addTyp = typ; t = typ.EndToken.Next; continue; } if (((t.Next is Pullenti.Ner.TextToken) && t.Next.Next != null && t.Next.Next.IsChar(')')) && t.Next.Chars.IsCapitalUpper) { t1 = (t = t.Next.Next); continue; } break; } tok = TryAttach(t, canBeCyr); if (tok == null && t.IsCharOf(".,") && t.Next != null) { tok = TryAttach(t.Next, canBeCyr); if (tok == null && t.Next.IsCharOf(",.")) { tok = TryAttach(t.Next.Next, canBeCyr); } } if (tok != null) { if (tok.LengthChar == 1 && t0.Chars.IsCyrillicLetter) { return(null); } break; } if (t.IsHiphen && !t.IsWhitespaceAfter && !t.IsWhitespaceBefore) { continue; } if (t.IsCharOf("&+") || t.IsAnd) { continue; } if (t.IsChar('.')) { if (t.Previous != null && t.Previous.LengthChar == 1) { continue; } else if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t.Next)) { break; } } if (!t.Chars.IsLatinLetter) { if (!canBeCyr || !t.Chars.IsCyrillicLetter) { break; } } if (t.Chars.IsAllLower) { if (t.Morph.Class.IsPreposition || t.Morph.Class.IsConjunction) { continue; } if (br) { continue; } break; } Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary(); if (mc.IsVerb) { if (t.Next != null && t.Next.Morph.Class.IsPreposition) { break; } } if (t.Next != null && t.Next.IsValue("OF", null)) { break; } if (t is Pullenti.Ner.TextToken) { namWo++; } t1 = t; } if (tok == null) { return(null); } if (t0 == tok.BeginToken) { Pullenti.Ner.Core.BracketSequenceToken br2 = Pullenti.Ner.Core.BracketHelper.TryParse(tok.EndToken.Next, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br2 != null) { Pullenti.Ner.Org.OrganizationReferent org1 = new Pullenti.Ner.Org.OrganizationReferent(); if (tok.ShortValue != null) { org1.AddTypeStr(tok.ShortValue); } org1.AddTypeStr(tok.FullValue); string nam1 = Pullenti.Ner.Core.MiscHelper.GetTextValue(br2.BeginToken, br2.EndToken, Pullenti.Ner.Core.GetTextAttr.No); if (nam1 != null) { org1.AddName(nam1, true, null); return(new Pullenti.Ner.ReferentToken(org1, t0, br2.EndToken)); } } return(null); } Pullenti.Ner.Org.OrganizationReferent org = new Pullenti.Ner.Org.OrganizationReferent(); Pullenti.Ner.Token te = tok.EndToken; if (tok.IsBank) { t1 = tok.EndToken; } if (tok.FullValue == "company" && (tok.WhitespacesAfterCount < 3)) { OrgItemEngItem tok1 = TryAttach(tok.EndToken.Next, canBeCyr); if (tok1 != null) { t1 = tok.EndToken; tok = tok1; te = tok.EndToken; } } if (tok.FullValue == "company") { if (namWo == 0) { return(null); } } string nam = Pullenti.Ner.Core.MiscHelper.GetTextValue(t0, t1, Pullenti.Ner.Core.GetTextAttr.IgnoreArticles); if (nam == "STOCK" && tok.FullValue == "company") { return(null); } string altNam = null; if (string.IsNullOrEmpty(nam)) { return(null); } if (nam.IndexOf('(') > 0) { int i1 = nam.IndexOf('('); int i2 = nam.IndexOf(')'); if (i1 < i2) { altNam = nam; string tai = null; if ((i2 + 1) < nam.Length) { tai = nam.Substring(i2).Trim(); } nam = nam.Substring(0, i1).Trim(); if (tai != null) { nam = string.Format("{0} {1}", nam, tai); } } } if (tok.IsBank) { org.AddTypeStr((tok.Kit.BaseLanguage.IsEn ? "bank" : "банк")); org.AddProfile(Pullenti.Ner.Org.OrgProfile.Finance); if ((t1.Next != null && t1.Next.IsValue("OF", null) && t1.Next.Next != null) && t1.Next.Next.Chars.IsLatinLetter) { OrgItemNameToken nam0 = OrgItemNameToken.TryAttach(t1.Next, null, false, false); if (nam0 != null) { te = nam0.EndToken; } else { te = t1.Next.Next; } nam = Pullenti.Ner.Core.MiscHelper.GetTextValue(t0, te, Pullenti.Ner.Core.GetTextAttr.No); if (te.GetReferent() is Pullenti.Ner.Geo.GeoReferent) { org.AddGeoObject(te.GetReferent() as Pullenti.Ner.Geo.GeoReferent); } } else if (t0 == t1) { return(null); } } else { if (tok.ShortValue != null) { org.AddTypeStr(tok.ShortValue); } org.AddTypeStr(tok.FullValue); } if (string.IsNullOrEmpty(nam)) { return(null); } org.AddName(nam, true, null); if (altNam != null) { org.AddName(altNam, true, null); } Pullenti.Ner.ReferentToken res = new Pullenti.Ner.ReferentToken(org, t0, te); t = te; while (t.Next != null) { if (t.Next.IsCharOf(",.")) { t = t.Next; } else { break; } } if (t.WhitespacesAfterCount < 2) { tok = TryAttach(t.Next, canBeCyr); if (tok != null) { if (tok.ShortValue != null) { org.AddTypeStr(tok.ShortValue); } org.AddTypeStr(tok.FullValue); res.EndToken = tok.EndToken; } } if (geo != null) { org.AddGeoObject(geo); } if (addTyp != null) { org.AddType(addTyp, false); } if (!br) { return(res); } t = res.EndToken; if (t.Next == null || t.Next.IsChar(')')) { res.EndToken = t.Next; } else { return(null); } return(res); }
public Pullenti.Ner.ReferentToken TryAttach(Pullenti.Ner.Token t, bool forOntology = false) { if (t == null) { return(null); } Pullenti.Ner.ReferentToken rt0 = this.TryAttachSpec(t); if (rt0 != null) { return(rt0); } if (t.Chars.IsAllLower) { if (!t.IsWhitespaceAfter && (t.Next is Pullenti.Ner.NumberToken)) { if (t.Previous == null || t.IsWhitespaceBefore || t.Previous.IsCharOf(",:")) { } else { return(null); } } else { return(null); } } StringBuilder tmp = new StringBuilder(); Pullenti.Ner.Token t1 = t; bool hiph = false; bool ok = true; int nums = 0; int chars = 0; for (Pullenti.Ner.Token w = t1.Next; w != null; w = w.Next) { if (w.IsWhitespaceBefore && !forOntology) { break; } if (w.IsCharOf("/\\_") || w.IsHiphen) { hiph = true; tmp.Append('-'); continue; } hiph = false; Pullenti.Ner.NumberToken nt = w as Pullenti.Ner.NumberToken; if (nt != null) { if (nt.Typ != Pullenti.Ner.NumberSpellingType.Digit) { break; } t1 = nt; tmp.Append(nt.GetSourceText()); nums++; continue; } Pullenti.Ner.TextToken tt = w as Pullenti.Ner.TextToken; if (tt == null) { break; } if (tt.LengthChar > 3) { ok = false; break; } if (!char.IsLetter(tt.Term[0])) { if (tt.IsCharOf(",:") || Pullenti.Ner.Core.BracketHelper.CanBeEndOfSequence(tt, false, null, false)) { break; } if (!tt.IsCharOf("+*&^#@!")) { ok = false; break; } chars++; } t1 = tt; tmp.Append(tt.GetSourceText()); } if (!forOntology) { if ((tmp.Length < 1) || !ok || hiph) { return(null); } if (tmp.Length > 12) { return(null); } char last = tmp[tmp.Length - 1]; if (last == '!') { return(null); } if ((nums + chars) == 0) { return(null); } if (!this.CheckAttach(t, t1)) { return(null); } } DenominationReferent newDr = new DenominationReferent(); newDr.AddValue(t, t1); return(new Pullenti.Ner.ReferentToken(newDr, t, t1)); }
public static Pullenti.Ner.Token CreateNickname(Pullenti.Ner.Person.PersonReferent pr, Pullenti.Ner.Token t) { bool hasKeyw = false; bool isBr = false; for (; t != null; t = t.Next) { if (t.IsHiphen || t.IsComma || t.IsCharOf(".:;")) { continue; } if (t.Morph.Class.IsPreposition) { continue; } if (t.IsChar('(')) { isBr = true; continue; } if ((t.IsValue("ПРОЗВИЩЕ", "ПРІЗВИСЬКО") || t.IsValue("КЛИЧКА", null) || t.IsValue("ПСЕВДОНИМ", "ПСЕВДОНІМ")) || t.IsValue("ПСЕВДО", null) || t.IsValue("ПОЗЫВНОЙ", "ПОЗИВНИЙ")) { hasKeyw = true; continue; } break; } if (!hasKeyw || t == null) { return(null); } if (Pullenti.Ner.Core.BracketHelper.IsBracket(t, true)) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { string ni = Pullenti.Ner.Core.MiscHelper.GetTextValue(br.BeginToken.Next, br.EndToken.Previous, Pullenti.Ner.Core.GetTextAttr.No); if (ni != null) { pr.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_NICKNAME, ni, false, 0); t = br.EndToken; for (Pullenti.Ner.Token tt = t.Next; tt != null; tt = tt.Next) { if (tt.IsCommaAnd) { continue; } if (!Pullenti.Ner.Core.BracketHelper.IsBracket(tt, true)) { break; } br = Pullenti.Ner.Core.BracketHelper.TryParse(tt, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br == null) { break; } ni = Pullenti.Ner.Core.MiscHelper.GetTextValue(br.BeginToken.Next, br.EndToken.Previous, Pullenti.Ner.Core.GetTextAttr.No); if (ni != null) { pr.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_NICKNAME, ni, false, 0); } t = (tt = br.EndToken); } if (isBr && t.Next != null && t.Next.IsChar(')')) { t = t.Next; } return(t); } } } else { Pullenti.Ner.Token ret = null; for (; t != null; t = t.Next) { if (t.IsCommaAnd) { continue; } if (ret != null && t.Chars.IsAllLower) { break; } if (t.WhitespacesBeforeCount > 2) { break; } List <PersonItemToken> pli = PersonItemToken.TryAttachList(t, null, PersonItemToken.ParseAttr.No, 10); if (pli != null && ((pli.Count == 1 || pli.Count == 2))) { string ni = Pullenti.Ner.Core.MiscHelper.GetTextValue(pli[0].BeginToken, pli[pli.Count - 1].EndToken, Pullenti.Ner.Core.GetTextAttr.No); if (ni != null) { pr.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_NICKNAME, ni, false, 0); t = pli[pli.Count - 1].EndToken; if (isBr && t.Next != null && t.Next.IsChar(')')) { t = t.Next; } ret = t; continue; } } if ((t is Pullenti.Ner.ReferentToken) && !t.Chars.IsAllLower && (t as Pullenti.Ner.ReferentToken).BeginToken == (t as Pullenti.Ner.ReferentToken).EndToken) { string val = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(t as Pullenti.Ner.ReferentToken, Pullenti.Ner.Core.GetTextAttr.No); pr.AddSlot(Pullenti.Ner.Person.PersonReferent.ATTR_NICKNAME, val, false, 0); if (isBr && t.Next != null && t.Next.IsChar(')')) { t = t.Next; } ret = t; continue; } break; } return(ret); } return(null); }