public static UriItemToken AttachBBK(Pullenti.Ner.Token t0) { StringBuilder txt = new StringBuilder(); Pullenti.Ner.Token t1 = t0; int digs = 0; for (Pullenti.Ner.Token t = t0; t != null; t = t.Next) { if (t.IsNewlineBefore && t != t0) { break; } if (t.IsTableControlChar) { break; } if (t is Pullenti.Ner.NumberToken) { Pullenti.Ner.NumberToken nt = t as Pullenti.Ner.NumberToken; if (nt.Typ != Pullenti.Ner.NumberSpellingType.Digit || !nt.Morph.Class.IsUndefined) { break; } string d = nt.GetSourceText(); txt.Append(d); digs += d.Length; t1 = t; continue; } Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; if (tt == null) { break; } if (tt.IsChar(',')) { break; } if (tt.IsChar('(')) { if (!(tt.Next is Pullenti.Ner.NumberToken)) { break; } } string s = tt.GetSourceText(); if (char.IsLetter(s[0])) { if (tt.IsWhitespaceBefore) { break; } } txt.Append(s); t1 = t; } if ((txt.Length < 3) || (digs < 2)) { return(null); } if (txt[txt.Length - 1] == '.') { txt.Length--; t1 = t1.Previous; } return(new UriItemToken(t0, t1) { Value = txt.ToString() }); }
public static UnitToken TryParse(Pullenti.Ner.Token t, Pullenti.Ner.Core.TerminCollection addUnits, UnitToken prev, bool parseUnknownUnits = false) { if (t == null) { return(null); } Pullenti.Ner.Token t0 = t; int pow = 1; bool isNeg = false; if ((t.IsCharOf("\\/") || t.IsValue("НА", null) || t.IsValue("OF", null)) || t.IsValue("PER", null)) { isNeg = true; t = t.Next; } else if (t.IsValue("В", null) && prev != null) { isNeg = true; t = t.Next; } else if (MeasureHelper.IsMultChar(t)) { t = t.Next; } Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; if (tt == null) { return(null); } if (tt.Term == "КВ" || tt.Term == "КВАДР" || tt.IsValue("КВАДРАТНЫЙ", null)) { pow = 2; tt = tt.Next as Pullenti.Ner.TextToken; if (tt != null && tt.IsChar('.')) { tt = tt.Next as Pullenti.Ner.TextToken; } if (tt == null) { return(null); } } else if (tt.Term == "КУБ" || tt.Term == "КУБИЧ" || tt.IsValue("КУБИЧЕСКИЙ", null)) { pow = 3; tt = tt.Next as Pullenti.Ner.TextToken; if (tt != null && tt.IsChar('.')) { tt = tt.Next as Pullenti.Ner.TextToken; } if (tt == null) { return(null); } } else if (tt.Term == "µ") { UnitToken res = TryParse(tt.Next, addUnits, prev, false); if (res != null) { foreach (Unit u in UnitsHelper.Units) { if (u.Factor == UnitsFactors.Micro && string.Compare("мк" + u.NameCyr, res.Unit.NameCyr, true) == 0) { res.Unit = u; res.BeginToken = tt; res.Pow = pow; if (isNeg) { res.Pow = -pow; } return(res); } } } } List <Pullenti.Ner.Core.TerminToken> toks = UnitsHelper.Termins.TryParseAll(tt, Pullenti.Ner.Core.TerminParseAttr.No); if (toks != null) { if ((prev != null && tt == t0 && toks.Count == 1) && t.IsWhitespaceBefore) { return(null); } if (toks[0].BeginToken == toks[0].EndToken && tt.Morph.Class.IsPreposition && (tt.WhitespacesAfterCount < 3)) { if (Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition, 0, null) != null) { return(null); } if (tt.Next is Pullenti.Ner.NumberToken) { if ((tt.Next as Pullenti.Ner.NumberToken).Typ != Pullenti.Ner.NumberSpellingType.Digit) { return(null); } } UnitToken nex = TryParse(tt.Next, addUnits, null, false); if (nex != null) { return(null); } } if (toks[0].BeginToken == toks[0].EndToken && ((toks[0].BeginToken.IsValue("М", null) || toks[0].BeginToken.IsValue("M", null))) && toks[0].BeginToken.Chars.IsAllLower) { if (prev != null && prev.Unit != null && prev.Unit.Kind == Pullenti.Ner.Measure.MeasureKind.Length) { UnitToken res = new UnitToken(t0, toks[0].EndToken) { Unit = UnitsHelper.uMinute }; res.Pow = pow; if (isNeg) { res.Pow = -pow; } return(res); } } List <UnitToken> uts = new List <UnitToken>(); foreach (Pullenti.Ner.Core.TerminToken tok in toks) { UnitToken res = new UnitToken(t0, tok.EndToken) { Unit = tok.Termin.Tag as Unit }; res.Pow = pow; if (isNeg) { res.Pow = -pow; } if (res.Unit.BaseMultiplier == 1000000 && (t0 is Pullenti.Ner.TextToken) && char.IsLower((t0 as Pullenti.Ner.TextToken).GetSourceText()[0])) { foreach (Unit u in UnitsHelper.Units) { if (u.Factor == UnitsFactors.Milli && string.Compare(u.NameCyr, res.Unit.NameCyr, true) == 0) { res.Unit = u; break; } } } res._correct(); res._checkDoubt(); uts.Add(res); } int max = 0; UnitToken best = null; foreach (UnitToken ut in uts) { if (ut.Keyword != null) { if (ut.Keyword.BeginChar >= max) { max = ut.Keyword.BeginChar; best = ut; } } } if (best != null) { return(best); } foreach (UnitToken ut in uts) { if (!ut.IsDoubt) { return(ut); } } return(uts[0]); } Pullenti.Ner.Token t1 = null; if (t.IsCharOf("º°")) { t1 = t; } else if ((t.IsChar('<') && t.Next != null && t.Next.Next != null) && t.Next.Next.IsChar('>') && ((t.Next.IsValue("О", null) || t.Next.IsValue("O", null) || (((t.Next is Pullenti.Ner.NumberToken) && (t.Next as Pullenti.Ner.NumberToken).Value == "0"))))) { t1 = t.Next.Next; } if (t1 != null) { UnitToken res = new UnitToken(t0, t1) { Unit = UnitsHelper.uGradus }; res._checkDoubt(); t = t1.Next; if (t != null && t.IsComma) { t = t.Next; } if (t != null && t.IsValue("ПО", null)) { t = t.Next; } if (t is Pullenti.Ner.TextToken) { string vv = (t as Pullenti.Ner.TextToken).Term; if (vv == "C" || vv == "С" || vv.StartsWith("ЦЕЛЬС")) { res.Unit = UnitsHelper.uGradusC; res.IsDoubt = false; res.EndToken = t; } if (vv == "F" || vv.StartsWith("ФАР")) { res.Unit = UnitsHelper.uGradusF; res.IsDoubt = false; res.EndToken = t; } } return(res); } if ((t is Pullenti.Ner.TextToken) && ((t.IsValue("ОС", null) || t.IsValue("OC", null)))) { string str = t.GetSourceText(); if (str == "оС" || str == "oC") { UnitToken res = new UnitToken(t, t) { Unit = UnitsHelper.uGradusC, IsDoubt = false }; return(res); } } if (t.IsChar('%')) { Pullenti.Ner.Token tt1 = t.Next; if (tt1 != null && tt1.IsChar('(')) { tt1 = tt1.Next; } if ((tt1 is Pullenti.Ner.TextToken) && (tt1 as Pullenti.Ner.TextToken).Term.StartsWith("ОБ")) { UnitToken re = new UnitToken(t, tt1) { Unit = UnitsHelper.uAlco }; if (re.EndToken.Next != null && re.EndToken.Next.IsChar('.')) { re.EndToken = re.EndToken.Next; } if (re.EndToken.Next != null && re.EndToken.Next.IsChar(')') && t.Next.IsChar('(')) { re.EndToken = re.EndToken.Next; } return(re); } return(new UnitToken(t, t) { Unit = UnitsHelper.uPercent }); } if (addUnits != null) { Pullenti.Ner.Core.TerminToken tok = addUnits.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { UnitToken res = new UnitToken(t0, tok.EndToken) { ExtOnto = tok.Termin.Tag as Pullenti.Ner.Measure.UnitReferent }; if (tok.EndToken.Next != null && tok.EndToken.Next.IsChar('.')) { tok.EndToken = tok.EndToken.Next; } res.Pow = pow; if (isNeg) { res.Pow = -pow; } res._correct(); return(res); } } if (!parseUnknownUnits) { return(null); } if ((t.WhitespacesBeforeCount > 2 || !t.Chars.IsLetter || t.LengthChar > 5) || !(t is Pullenti.Ner.TextToken)) { return(null); } if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t)) { return(null); } t1 = t; if (t.Next != null && t.Next.IsChar('.')) { t1 = t; } bool ok = false; if (t1.Next == null || t1.WhitespacesAfterCount > 2) { ok = true; } else if (t1.Next.IsComma || t1.Next.IsCharOf("\\/") || t1.Next.IsTableControlChar) { ok = true; } else if (MeasureHelper.IsMultChar(t1.Next)) { ok = true; } if (!ok) { return(null); } Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary(); if (mc.IsUndefined) { } else if (t.LengthChar > 7) { return(null); } UnitToken res1 = new UnitToken(t0, t1) { Pow = pow, IsDoubt = true }; res1.UnknownName = (t as Pullenti.Ner.TextToken).GetSourceText(); res1._correct(); return(res1); }
bool CalcRankAndValue(int minNewlinesCount) { Rank = 0; if (BeginToken.Chars.IsAllLower) { Rank -= 30; } int words = 0; int upWords = 0; int notwords = 0; int lineNumber = 0; Pullenti.Ner.Token tstart = BeginToken; Pullenti.Ner.Token tend = EndToken; for (Pullenti.Ner.Token t = BeginToken; t != EndToken.Next && t != null && t.EndChar <= EndToken.EndChar; t = t.Next) { if (t.IsNewlineBefore) { } TitleItemToken tit = TitleItemToken.TryAttach(t); if (tit != null) { if (tit.Typ == TitleItemToken.Types.Theme || tit.Typ == TitleItemToken.Types.TypAndTheme) { if (t != BeginToken) { if (lineNumber > 0) { return(false); } words = (upWords = (notwords = 0)); tstart = tit.EndToken.Next; } t = tit.EndToken; if (t.Next == null) { return(false); } if (t.Next.Chars.IsLetter && t.Next.Chars.IsAllLower) { Rank += 20; } else { Rank += 100; } tstart = t.Next; if (tit.Typ == TitleItemToken.Types.TypAndTheme) { TypeValue = tit.Value; } continue; } if (tit.Typ == TitleItemToken.Types.Typ) { if (t == BeginToken) { if (tit.EndToken.IsNewlineAfter) { TypeValue = tit.Value; Rank += 5; tstart = tit.EndToken.Next; } } t = tit.EndToken; words++; if (tit.BeginToken != tit.EndToken) { words++; } if (tit.Chars.IsAllUpper) { upWords++; } continue; } if (tit.Typ == TitleItemToken.Types.Dust || tit.Typ == TitleItemToken.Types.Speciality) { if (t == BeginToken) { return(false); } Rank -= 20; if (tit.Typ == TitleItemToken.Types.Speciality) { Speciality = tit.Value; } t = tit.EndToken; continue; } if (tit.Typ == TitleItemToken.Types.Consultant || tit.Typ == TitleItemToken.Types.Boss || tit.Typ == TitleItemToken.Types.Editor) { t = tit.EndToken; if (t.Next != null && ((t.Next.IsCharOf(":") || t.Next.IsHiphen || t.WhitespacesAfterCount > 4))) { Rank -= 10; } else { Rank -= 2; } continue; } return(false); } Pullenti.Ner.Booklink.Internal.BookLinkToken blt = Pullenti.Ner.Booklink.Internal.BookLinkToken.TryParse(t, 0); if (blt != null) { if (blt.Typ == Pullenti.Ner.Booklink.Internal.BookLinkTyp.Misc || blt.Typ == Pullenti.Ner.Booklink.Internal.BookLinkTyp.N || blt.Typ == Pullenti.Ner.Booklink.Internal.BookLinkTyp.Pages) { Rank -= 10; } else if (blt.Typ == Pullenti.Ner.Booklink.Internal.BookLinkTyp.N || blt.Typ == Pullenti.Ner.Booklink.Internal.BookLinkTyp.PageRange) { Rank -= 20; } } if (t == BeginToken && Pullenti.Ner.Booklink.Internal.BookLinkToken.TryParseAuthor(t, Pullenti.Ner.Person.Internal.FioTemplateType.Undefined) != null) { Rank -= 20; } if (t.IsNewlineBefore && t != BeginToken) { lineNumber++; if (lineNumber > 4) { return(false); } if (t.Chars.IsAllLower) { Rank += 10; } else if (t.Previous.IsChar('.')) { Rank -= 10; } else if (t.Previous.IsCharOf(",-")) { Rank += 10; } else { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Previous, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && npt.EndChar >= t.EndChar) { Rank += 10; } } } if (t != BeginToken && t.NewlinesBeforeCount > minNewlinesCount) { Rank -= (t.NewlinesBeforeCount - minNewlinesCount); } Pullenti.Ner.Core.BracketSequenceToken bst = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (bst != null && bst.IsQuoteType && bst.EndToken.EndChar <= EndToken.EndChar) { if (words == 0) { tstart = bst.BeginToken; Rank += 10; if (bst.EndToken == EndToken) { tend = EndToken; Rank += 10; } } } List <Pullenti.Ner.Referent> rli = t.GetReferents(); if (rli != null) { foreach (Pullenti.Ner.Referent r in rli) { if (r is Pullenti.Ner.Org.OrganizationReferent) { if (t.IsNewlineBefore) { Rank -= 10; } else { Rank -= 4; } continue; } if ((r is Pullenti.Ner.Geo.GeoReferent) || (r is Pullenti.Ner.Person.PersonReferent)) { if (t.IsNewlineBefore) { Rank -= 5; if (t.IsNewlineAfter || t.Next == null) { Rank -= 20; } else if (t.Next.IsHiphen || (t.Next is Pullenti.Ner.NumberToken) || (t.Next.GetReferent() is Pullenti.Ner.Date.DateReferent)) { Rank -= 20; } else if (t != BeginToken) { Rank -= 20; } } continue; } if ((r is Pullenti.Ner.Geo.GeoReferent) || (r is Pullenti.Ner.Denomination.DenominationReferent)) { continue; } if ((r is Pullenti.Ner.Uri.UriReferent) || (r is Pullenti.Ner.Phone.PhoneReferent)) { return(false); } if (t.IsNewlineBefore) { Rank -= 4; } else { Rank -= 2; } if (t == BeginToken && (EndToken.GetReferent() is Pullenti.Ner.Person.PersonReferent)) { Rank -= 10; } } words++; if (t.Chars.IsAllUpper) { upWords++; } if (t == BeginToken) { if (t.IsNewlineAfter) { Rank -= 10; } else if (t.Next != null && t.Next.IsChar('.') && t.Next.IsNewlineAfter) { Rank -= 10; } } continue; } if (t is Pullenti.Ner.NumberToken) { if ((t as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Words) { words++; if (t.Chars.IsAllUpper) { upWords++; } } else { notwords++; } continue; } Pullenti.Ner.Person.Internal.PersonAttrToken pat = Pullenti.Ner.Person.Internal.PersonAttrToken.TryAttach(t, null, Pullenti.Ner.Person.Internal.PersonAttrToken.PersonAttrAttachAttrs.No); if (pat != null) { if (t.IsNewlineBefore) { if (!pat.Morph.Case.IsUndefined && !pat.Morph.Case.IsNominative) { } else if (pat.Chars.IsAllUpper) { } else { Rank -= 20; } } else if (t.Chars.IsAllLower) { Rank--; } for (; t != null; t = t.Next) { words++; if (t.Chars.IsAllUpper) { upWords++; } if (t == pat.EndToken) { break; } } continue; } Pullenti.Ner.Org.Internal.OrgItemTypeToken oitt = Pullenti.Ner.Org.Internal.OrgItemTypeToken.TryAttach(t, true, null); if (oitt != null) { if (oitt.Morph.Number != Pullenti.Morph.MorphNumber.Plural && !oitt.IsDoubtRootWord) { if (!oitt.Morph.Case.IsUndefined && !oitt.Morph.Case.IsNominative) { words++; if (t.Chars.IsAllUpper) { upWords++; } } else { Rank -= 4; if (t == BeginToken) { Rank -= 5; } } } else { words += 1; if (t.Chars.IsAllUpper) { upWords++; } } t = oitt.EndToken; continue; } Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; if (tt != null) { if (tt.IsChar('©')) { Rank -= 10; } if (tt.IsChar('_')) { Rank--; } if (tt.Chars.IsLetter) { if (tt.LengthChar > 2) { words++; if (t.Chars.IsAllUpper) { upWords++; } } } else if (!tt.IsChar(',')) { notwords++; } if (tt.IsPureVerb) { { Rank -= 30; words--; } break; } if (tt == EndToken) { if (tt.Morph.Class.IsPreposition || tt.Morph.Class.IsConjunction) { Rank -= 10; } else if (tt.IsChar('.')) { Rank += 5; } } else if (tt.IsCharOf("._")) { Rank -= 5; } } } Rank += words; Rank -= notwords; if ((words < 1) && (Rank < 50)) { return(false); } if (tstart == null || tend == null) { return(false); } if (tstart.EndChar > tend.EndChar) { return(false); } TitleItemToken tit1 = TitleItemToken.TryAttach(EndToken.Next); if (tit1 != null && ((tit1.Typ == TitleItemToken.Types.Typ || tit1.Typ == TitleItemToken.Types.Speciality))) { if (tit1.EndToken.IsNewlineAfter) { Rank += 15; } else { Rank += 10; } if (tit1.Typ == TitleItemToken.Types.Speciality) { Speciality = tit1.Value; } } if (upWords > 4 && upWords > ((int)((0.8 * words)))) { if (tstart.Previous != null && (tstart.Previous.GetReferent() is Pullenti.Ner.Person.PersonReferent)) { Rank += (5 + upWords); } } BeginNameToken = tstart; EndNameToken = tend; return(true); }
public static TitleItemToken TryAttach(Pullenti.Ner.Token t) { Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; if (tt != null) { Pullenti.Ner.Token t1 = (Pullenti.Ner.Token)tt; if (tt.Term == "ТЕМА") { TitleItemToken tit = TryAttach(tt.Next); if (tit != null && tit.Typ == Types.Typ) { t1 = tit.EndToken; if (t1.Next != null && t1.Next.IsChar(':')) { t1 = t1.Next; } return(new TitleItemToken(t, t1, Types.TypAndTheme) { Value = tit.Value }); } if (tt.Next != null && tt.Next.IsChar(':')) { t1 = tt.Next; } return(new TitleItemToken(tt, t1, Types.Theme)); } if (tt.Term == "ПО" || tt.Term == "НА") { if (tt.Next != null && tt.Next.IsValue("ТЕМА", null)) { t1 = tt.Next; if (t1.Next != null && t1.Next.IsChar(':')) { t1 = t1.Next; } return(new TitleItemToken(tt, t1, Types.Theme)); } } if (tt.Term == "ПЕРЕВОД" || tt.Term == "ПЕР") { Pullenti.Ner.Token tt2 = tt.Next; if (tt2 != null && tt2.IsChar('.')) { tt2 = tt2.Next; } if (tt2 is Pullenti.Ner.TextToken) { if ((tt2 as Pullenti.Ner.TextToken).Term == "C" || (tt2 as Pullenti.Ner.TextToken).Term == "С") { tt2 = tt2.Next; if (tt2 is Pullenti.Ner.TextToken) { return(new TitleItemToken(t, tt2, Types.Translate)); } } } } if (tt.Term == "СЕКЦИЯ" || tt.Term == "SECTION" || tt.Term == "СЕКЦІЯ") { t1 = tt.Next; if (t1 != null && t1.IsChar(':')) { t1 = t1.Next; } Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t1, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { t1 = br.EndToken; } else if (t1 != tt.Next) { for (; t1 != null; t1 = t1.Next) { if (t1.IsNewlineAfter) { break; } } if (t1 == null) { return(null); } } if (t1 != tt.Next) { return(new TitleItemToken(tt, t1, Types.Dust)); } } t1 = null; if (tt.IsValue("СПЕЦИАЛЬНОСТЬ", "СПЕЦІАЛЬНІСТЬ")) { t1 = tt.Next; } else if (tt.Morph.Class.IsPreposition && tt.Next != null && tt.Next.IsValue("СПЕЦИАЛЬНОСТЬ", "СПЕЦІАЛЬНІСТЬ")) { t1 = tt.Next.Next; } else if (tt.IsChar('/') && tt.IsNewlineBefore) { t1 = tt.Next; } if (t1 != null) { if (t1.IsCharOf(":") || t1.IsHiphen) { t1 = t1.Next; } TitleItemToken spec = TryAttachSpeciality(t1, true); if (spec != null) { spec.BeginToken = t; return(spec); } } } TitleItemToken sss = TryAttachSpeciality(t, false); if (sss != null) { return(sss); } if (t is Pullenti.Ner.ReferentToken) { return(null); } Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { string s = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false); Pullenti.Ner.Core.TerminToken tok = m_Termins.TryParse(npt.EndToken, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { Types ty = (Types)tok.Termin.Tag; if (ty == Types.Typ) { TitleItemToken tit = TryAttach(tok.EndToken.Next); if (tit != null && tit.Typ == Types.Theme) { return new TitleItemToken(npt.BeginToken, tit.EndToken, Types.TypAndTheme) { Value = s } } ; if (s == "РАБОТА" || s == "РОБОТА" || s == "ПРОЕКТ") { return(null); } Pullenti.Ner.Token t1 = tok.EndToken; if (s == "ДИССЕРТАЦИЯ" || s == "ДИСЕРТАЦІЯ") { int err = 0; for (Pullenti.Ner.Token ttt = t1.Next; ttt != null; ttt = ttt.Next) { if (ttt.Morph.Class.IsPreposition) { continue; } if (ttt.IsValue("СОИСКАНИЕ", "")) { continue; } Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(ttt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt1 != null && npt1.Noun.IsValue("СТЕПЕНЬ", "СТУПІНЬ")) { t1 = (ttt = npt1.EndToken); continue; } Pullenti.Ner.ReferentToken rt = t1.Kit.ProcessReferent("PERSON", ttt); if (rt != null && (rt.Referent is Pullenti.Ner.Person.PersonPropertyReferent)) { Pullenti.Ner.Person.PersonPropertyReferent ppr = rt.Referent as Pullenti.Ner.Person.PersonPropertyReferent; if (ppr.Name == "доктор наук") { t1 = rt.EndToken; s = "ДОКТОРСКАЯ ДИССЕРТАЦИЯ"; break; } else if (ppr.Name == "кандидат наук") { t1 = rt.EndToken; s = "КАНДИДАТСКАЯ ДИССЕРТАЦИЯ"; break; } else if (ppr.Name == "магистр") { t1 = rt.EndToken; s = "МАГИСТЕРСКАЯ ДИССЕРТАЦИЯ"; break; } } if (ttt.IsValue("ДОКТОР", null) || ttt.IsValue("КАНДИДАТ", null) || ttt.IsValue("МАГИСТР", "МАГІСТР")) { t1 = ttt; npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(ttt.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt1 != null && npt1.EndToken.IsValue("НАУК", null)) { t1 = npt1.EndToken; } s = (ttt.IsValue("МАГИСТР", "МАГІСТР") ? "МАГИСТЕРСКАЯ ДИССЕРТАЦИЯ" : (ttt.IsValue("ДОКТОР", null) ? "ДОКТОРСКАЯ ДИССЕРТАЦИЯ" : "КАНДИДАТСКАЯ ДИССЕРТАЦИЯ")); break; } if ((++err) > 3) { break; } } } if (t1.Next != null && t1.Next.IsChar('.')) { t1 = t1.Next; } if (s.EndsWith("ОТЧЕТ") && t1.Next != null && t1.Next.IsValue("О", null)) { Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t1.Next, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition, 0, null); if (npt1 != null && npt1.Morph.Case.IsPrepositional) { t1 = npt1.EndToken; } } return(new TitleItemToken(npt.BeginToken, t1, ty) { Value = s }); } } } Pullenti.Ner.Core.TerminToken tok1 = m_Termins.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok1 != null) { Pullenti.Ner.Token t1 = tok1.EndToken; TitleItemToken re = new TitleItemToken(tok1.BeginToken, t1, (Types)tok1.Termin.Tag); return(re); } if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(t, false, false)) { tok1 = m_Termins.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No); if (tok1 != null && Pullenti.Ner.Core.BracketHelper.CanBeEndOfSequence(tok1.EndToken.Next, false, null, false)) { Pullenti.Ner.Token t1 = tok1.EndToken.Next; return(new TitleItemToken(tok1.BeginToken, t1, (Types)tok1.Termin.Tag)); } } return(null); }
static OrgItemNameToken _TryAttach(Pullenti.Ner.Token t, OrgItemNameToken prev, bool extOnto) { if (t == null) { return(null); } Pullenti.Ner.Referent r = t.GetReferent(); if (r != null) { if (r.TypeName == "DENOMINATION") { return new OrgItemNameToken(t, t) { Value = r.ToString(true, t.Kit.BaseLanguage, 0), IsDenomination = true } } ; if ((r is Pullenti.Ner.Geo.GeoReferent) && t.Chars.IsLatinLetter) { OrgItemNameToken res2 = _TryAttach(t.Next, prev, extOnto); if (res2 != null && res2.Chars.IsLatinLetter) { res2.BeginToken = t; res2.Value = string.Format("{0} {1}", Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(t as Pullenti.Ner.MetaToken, Pullenti.Ner.Core.GetTextAttr.No), res2.Value); res2.IsInDictionary = false; return(res2); } } return(null); } Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; if (tt == null) { return(null); } OrgItemNameToken res = null; Pullenti.Ner.Core.TerminToken tok = m_StdTails.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok == null && t.IsChar(',')) { tok = m_StdTails.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No); } if (tok != null) { return new OrgItemNameToken(t, tok.EndToken) { Value = tok.Termin.CanonicText, IsStdTail = tok.Termin.Tag == null, IsEmptyWord = tok.Termin.Tag != null, Morph = tok.Morph } } ; if ((((tok = m_StdNames.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No)))) != null) { return new OrgItemNameToken(t, tok.EndToken) { Value = tok.Termin.CanonicText, IsStdName = true } } ; OrgItemEngItem eng = OrgItemEngItem.TryAttach(t, false); if (eng == null && t.IsChar(',')) { eng = OrgItemEngItem.TryAttach(t.Next, false); } if (eng != null) { return new OrgItemNameToken(t, eng.EndToken) { Value = eng.FullValue, IsStdTail = true } } ; if (tt.Chars.IsAllLower && prev != null) { if (!prev.Chars.IsAllLower && !prev.Chars.IsCapitalUpper) { return(null); } } if (tt.IsChar(',') && prev != null) { Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt1 == null || npt1.Chars != prev.Chars || ((npt1.Morph.Case & prev.Morph.Case)).IsUndefined) { return(null); } OrgItemTypeToken ty = OrgItemTypeToken.TryAttach(t.Next, false, null); if (ty != null) { return(null); } if (npt1.EndToken.Next == null || !npt1.EndToken.Next.IsValue("И", null)) { return(null); } Pullenti.Ner.Token t1 = npt1.EndToken.Next; Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t1.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt2 == null || npt2.Chars != prev.Chars || ((npt2.Morph.Case & npt1.Morph.Case & prev.Morph.Case)).IsUndefined) { return(null); } ty = OrgItemTypeToken.TryAttach(t1.Next, false, null); if (ty != null) { return(null); } res = new OrgItemNameToken(npt1.BeginToken, npt1.EndToken) { Morph = npt1.Morph, Value = npt1.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false) }; res.IsNounPhrase = true; res.IsAfterConjunction = true; if (prev.Preposition != null) { res.Preposition = prev.Preposition; } return(res); } if (((tt.IsChar('&') || tt.IsValue("AND", null) || tt.IsValue("UND", null))) && prev != null) { if ((tt.Next is Pullenti.Ner.TextToken) && tt.LengthChar == 1 && tt.Next.Chars.IsLatinLetter) { res = new OrgItemNameToken(tt, tt.Next) { Chars = tt.Next.Chars }; res.IsAfterConjunction = true; res.Value = "& " + (tt.Next as Pullenti.Ner.TextToken).Term; return(res); } res = OrgItemNameToken.TryAttach(tt.Next, null, extOnto, false); if (res == null || res.Chars != prev.Chars) { return(null); } res.IsAfterConjunction = true; res.Value = "& " + res.Value; return(res); } if (!tt.Chars.IsLetter) { return(null); } List <Pullenti.Semantic.Utils.DerivateGroup> expinf = null; if (prev != null && prev.EndToken.GetMorphClassInDictionary().IsNoun) { string wo = prev.EndToken.GetNormalCaseText(Pullenti.Morph.MorphClass.Noun, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false); expinf = Pullenti.Semantic.Utils.DerivateService.FindDerivates(wo, true, prev.EndToken.Morph.Language); } Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && npt.InternalNoun != null) { npt = null; } bool explOk = false; if (npt != null && prev != null && prev.EndToken.GetMorphClassInDictionary().IsNoun) { Pullenti.Ner.Core.NounPhraseToken npt0 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(prev.EndToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt0 != null) { List <Pullenti.Semantic.Core.SemanticLink> links = Pullenti.Semantic.Core.SemanticHelper.TryCreateLinks(npt0, npt, null); if (links.Count > 0) { explOk = true; } } } if (npt != null && ((explOk || npt.Morph.Case.IsGenitive || ((prev != null && !((prev.Morph.Case & npt.Morph.Case)).IsUndefined))))) { Pullenti.Morph.MorphClass mc = npt.BeginToken.GetMorphClassInDictionary(); if (mc.IsVerb || mc.IsPronoun) { return(null); } if (mc.IsAdverb) { if (npt.BeginToken.Next != null && npt.BeginToken.Next.IsHiphen) { } else { return(null); } } if (mc.IsPreposition) { return(null); } if (mc.IsNoun && npt.Chars.IsAllLower) { Pullenti.Morph.MorphCase ca = npt.Morph.Case; if ((!ca.IsDative && !ca.IsGenitive && !ca.IsInstrumental) && !ca.IsPrepositional) { return(null); } } res = new OrgItemNameToken(npt.BeginToken, npt.EndToken) { Morph = npt.Morph, Value = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false) }; res.IsNounPhrase = true; if ((npt.EndToken.WhitespacesAfterCount < 2) && (npt.EndToken.Next is Pullenti.Ner.TextToken)) { Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(npt.EndToken.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt2 != null && npt2.Morph.Case.IsGenitive && npt2.Chars.IsAllLower) { OrgItemTypeToken typ = OrgItemTypeToken.TryAttach(npt.EndToken.Next, true, null); OrgItemEponymToken epo = OrgItemEponymToken.TryAttach(npt.EndToken.Next, false); Pullenti.Ner.ReferentToken rtt = t.Kit.ProcessReferent("PERSONPROPERTY", npt.EndToken.Next); if (typ == null && epo == null && ((rtt == null || rtt.Morph.Number == Pullenti.Morph.MorphNumber.Plural))) { res.EndToken = npt2.EndToken; res.Value = string.Format("{0} {1}", res.Value, Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(npt2, Pullenti.Ner.Core.GetTextAttr.No)); } } else if (npt.EndToken.Next.IsComma && (npt.EndToken.Next.Next is Pullenti.Ner.TextToken)) { Pullenti.Ner.Token tt2 = npt.EndToken.Next.Next; Pullenti.Morph.MorphClass mv2 = tt2.GetMorphClassInDictionary(); if (mv2.IsAdjective && mv2.IsVerb) { Pullenti.Morph.MorphBaseInfo bi = new Pullenti.Morph.MorphBaseInfo() { Case = npt.Morph.Case, Gender = npt.Morph.Gender, Number = npt.Morph.Number }; if (tt2.Morph.CheckAccord(bi, false, false)) { npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt2.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt2 != null && ((npt2.Morph.Case.IsDative || npt2.Morph.Case.IsGenitive)) && npt2.Chars.IsAllLower) { res.EndToken = npt2.EndToken; res.Value = string.Format("{0} {1}", res.Value, Pullenti.Ner.Core.MiscHelper.GetTextValue(npt.EndToken.Next, res.EndToken, Pullenti.Ner.Core.GetTextAttr.No)); } } } } } if (explOk) { res.IsAfterConjunction = true; } } else if (npt != null && ((((prev != null && prev.IsNounPhrase && npt.Morph.Case.IsInstrumental)) || extOnto))) { res = new OrgItemNameToken(npt.BeginToken, npt.EndToken) { Morph = npt.Morph, Value = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false) }; res.IsNounPhrase = true; } else if (tt.IsAnd) { res = TryAttach(tt.Next, prev, extOnto, false); if (res == null || !res.IsNounPhrase || prev == null) { return(null); } if (((prev.Morph.Case & res.Morph.Case)).IsUndefined) { return(null); } if (prev.Morph.Number != Pullenti.Morph.MorphNumber.Undefined && res.Morph.Number != Pullenti.Morph.MorphNumber.Undefined) { if (((prev.Morph.Number & res.Morph.Number)) == Pullenti.Morph.MorphNumber.Undefined) { if (prev.Chars != res.Chars) { return(null); } OrgItemTypeToken ty = OrgItemTypeToken.TryAttach(res.EndToken.Next, false, null); if (ty != null) { return(null); } } } Pullenti.Morph.CharsInfo ci = res.Chars; res.Chars = ci; res.IsAfterConjunction = true; return(res); } else if (((tt.Term == "ПО" || tt.Term == "ПРИ" || tt.Term == "ЗА") || tt.Term == "С" || tt.Term == "В") || tt.Term == "НА") { npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { if (m_VervotWords.TryParse(npt.EndToken, Pullenti.Ner.Core.TerminParseAttr.No) != null) { return(null); } bool ok = false; if (tt.Term == "ПО") { ok = npt.Morph.Case.IsDative; } else if (tt.Term == "С") { ok = npt.Morph.Case.IsInstrumental; } else if (tt.Term == "ЗА") { ok = npt.Morph.Case.IsGenitive | npt.Morph.Case.IsInstrumental; } else if (tt.Term == "НА") { ok = npt.Morph.Case.IsPrepositional; } else if (tt.Term == "В") { ok = npt.Morph.Case.IsDative | npt.Morph.Case.IsPrepositional; if (ok) { ok = false; if (t.Next.IsValue("СФЕРА", null) || t.Next.IsValue("ОБЛАСТЬ", null)) { ok = true; } } } else if (tt.Term == "ПРИ") { ok = npt.Morph.Case.IsPrepositional; if (ok) { if (OrgItemTypeToken.TryAttach(tt.Next, true, null) != null) { ok = false; } else { Pullenti.Ner.ReferentToken rt = tt.Kit.ProcessReferent(Pullenti.Ner.Org.OrganizationAnalyzer.ANALYZER_NAME, tt.Next); if (rt != null) { ok = false; } } } string s = npt.Noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false); if (s == "ПОДДЕРЖКА" || s == "УЧАСТИЕ") { ok = false; } } else { ok = npt.Morph.Case.IsPrepositional; } if (ok) { res = new OrgItemNameToken(t, npt.EndToken) { Morph = npt.Morph, Value = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false), Chars = npt.Chars }; res.IsNounPhrase = true; res.Preposition = tt.Term; if (((res.Value == "ДЕЛО" || res.Value == "ВОПРОС")) && !res.IsNewlineAfter) { OrgItemNameToken res2 = _TryAttach(res.EndToken.Next, res, extOnto); if (res2 != null && res2.Morph.Case.IsGenitive) { res.Value = string.Format("{0} {1}", res.Value, res2.Value); res.EndToken = res2.EndToken; for (Pullenti.Ner.Token ttt = res2.EndToken.Next; ttt != null; ttt = ttt.Next) { if (!ttt.IsCommaAnd) { break; } OrgItemNameToken res3 = _TryAttach(ttt.Next, res2, extOnto); if (res3 == null) { break; } res.Value = string.Format("{0} {1}", res.Value, res3.Value); res.EndToken = res3.EndToken; if (ttt.IsAnd) { break; } ttt = res.EndToken; } } } } } if (res == null) { return(null); } } else if (tt.Term == "OF") { Pullenti.Ner.Token t1 = tt.Next; if (t1 != null && Pullenti.Ner.Core.MiscHelper.IsEngArticle(t1)) { t1 = t1.Next; } if (t1 != null && t1.Chars.IsLatinLetter && !t1.Chars.IsAllLower) { res = new OrgItemNameToken(t, t1) { Chars = t1.Chars, Morph = t1.Morph }; for (Pullenti.Ner.Token ttt = t1.Next; ttt != null; ttt = ttt.Next) { if (ttt.WhitespacesBeforeCount > 2) { break; } if (Pullenti.Ner.Core.MiscHelper.IsEngAdjSuffix(ttt)) { ttt = ttt.Next; continue; } if (!ttt.Chars.IsLatinLetter) { break; } if (ttt.Morph.Class.IsPreposition) { break; } t1 = (res.EndToken = ttt); } res.Value = Pullenti.Ner.Core.MiscHelper.GetTextValue(t, t1, Pullenti.Ner.Core.GetTextAttr.IgnoreArticles); res.Preposition = tt.Term; return(res); } } if (res == null) { if (tt.Chars.IsLatinLetter && tt.LengthChar == 1) { } else if (tt.Chars.IsAllLower || (tt.LengthChar < 2)) { if (!tt.Chars.IsLatinLetter || prev == null || !prev.Chars.IsLatinLetter) { return(null); } } if (tt.Chars.IsCyrillicLetter) { Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary(); if (mc.IsVerb || mc.IsAdverb) { return(null); } } else if (tt.Chars.IsLatinLetter && !tt.IsWhitespaceAfter) { if (!tt.IsWhitespaceAfter && (tt.LengthChar < 5)) { if (tt.Next is Pullenti.Ner.NumberToken) { return(null); } } } res = new OrgItemNameToken(tt, tt) { Value = tt.Term, Morph = tt.Morph }; for (t = tt.Next; t != null; t = t.Next) { if ((((t.IsHiphen || t.IsCharOf("\\/"))) && t.Next != null && (t.Next is Pullenti.Ner.TextToken)) && !t.IsWhitespaceBefore && !t.IsWhitespaceAfter) { t = t.Next; res.EndToken = t; res.Value = string.Format("{0}{1}{2}", res.Value, (t.Previous.IsChar('.') ? '.' : '-'), (t as Pullenti.Ner.TextToken).Term); } else if (t.IsChar('.')) { if (!t.IsWhitespaceAfter && !t.IsWhitespaceBefore && (t.Next is Pullenti.Ner.TextToken)) { res.EndToken = t.Next; t = t.Next; res.Value = string.Format("{0}.{1}", res.Value, (t as Pullenti.Ner.TextToken).Term); } else if ((t.Next != null && !t.IsNewlineAfter && t.Next.Chars.IsLatinLetter) && tt.Chars.IsLatinLetter) { res.EndToken = t; } else { break; } } else { break; } } } for (Pullenti.Ner.Token t0 = res.BeginToken; t0 != null; t0 = t0.Next) { if ((((tt = t0 as Pullenti.Ner.TextToken))) != null && tt.IsLetters) { if (!tt.Morph.Class.IsConjunction && !tt.Morph.Class.IsPreposition) { foreach (Pullenti.Morph.MorphBaseInfo mf in tt.Morph.Items) { if ((mf as Pullenti.Morph.MorphWordForm).IsInDictionary) { res.IsInDictionary = true; } } } } if (t0 == res.EndToken) { break; } } if (res.BeginToken == res.EndToken && res.BeginToken.Chars.IsAllUpper) { if (res.EndToken.Next != null && !res.EndToken.IsWhitespaceAfter) { Pullenti.Ner.Token t1 = res.EndToken.Next; if (t1.Next != null && !t1.IsWhitespaceAfter && t1.IsHiphen) { t1 = t1.Next; } if (t1 is Pullenti.Ner.NumberToken) { res.Value += (t1 as Pullenti.Ner.NumberToken).Value; res.EndToken = t1; } } } if (res.BeginToken == res.EndToken && res.BeginToken.Chars.IsLastLower) { string src = res.BeginToken.GetSourceText(); for (int i = src.Length - 1; i >= 0; i--) { if (char.IsUpper(src[i])) { res.Value = src.Substring(0, i + 1); break; } } } return(res); }