public static List <PersonItemToken> TryAttach(Pullenti.Ner.Token t) { List <PersonItemToken> res = new List <PersonItemToken>(); for (; t != null; t = t.Next) { if (t.IsNewlineBefore && res.Count > 0) { break; } Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; if (tt == null) { break; } string s = tt.Term; if (!char.IsLetter(s[0])) { break; } if (((s.Length == 1 || s == "ДЖ")) && !tt.Chars.IsAllLower) { Pullenti.Ner.Token t1 = t; if (t1.Next != null && t1.Next.IsChar('.')) { t1 = t1.Next; } res.Add(new PersonItemToken(t, t1) { Typ = Pullenti.Ner.Org.Internal.OrgItemEponymToken.PersonItemType.Initial, Value = s }); t = t1; continue; } if (tt.IsAnd) { res.Add(new PersonItemToken(t, t) { Typ = Pullenti.Ner.Org.Internal.OrgItemEponymToken.PersonItemType.And }); continue; } if (tt.Morph.Class.IsPronoun || tt.Morph.Class.IsPersonalPronoun) { break; } if (tt.Chars.IsAllLower) { Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary(); if (mc.IsPreposition || mc.IsVerb || mc.IsAdverb) { break; } Pullenti.Ner.Token t1 = t; if (t1.Next != null && !t1.IsWhitespaceAfter && t1.Next.IsChar('.')) { t1 = t1.Next; } res.Add(new PersonItemToken(t, t1) { Typ = Pullenti.Ner.Org.Internal.OrgItemEponymToken.PersonItemType.LocaseWord, Value = s }); t = t1; continue; } if (tt.Morph.Class.IsProperName) { res.Add(new PersonItemToken(t, t) { Typ = Pullenti.Ner.Org.Internal.OrgItemEponymToken.PersonItemType.Name, Value = s }); } else if ((t.Next != null && t.Next.IsHiphen && (t.Next.Next is Pullenti.Ner.TextToken)) && !t.Next.IsWhitespaceAfter) { res.Add(new PersonItemToken(t, t.Next.Next) { Typ = Pullenti.Ner.Org.Internal.OrgItemEponymToken.PersonItemType.Surname, Value = string.Format("{0}-{1}", s, (t.Next.Next as Pullenti.Ner.TextToken).Term) }); t = t.Next.Next; } else { res.Add(new PersonItemToken(t, t) { Typ = Pullenti.Ner.Org.Internal.OrgItemEponymToken.PersonItemType.Surname, Value = s }); } } return(res.Count > 0 ? res : null); }
void CorrectWordsByMorph(Pullenti.Morph.MorphLang lang) { for (Pullenti.Ner.Token tt = FirstToken; tt != null; tt = tt.Next) { if (!(tt is Pullenti.Ner.TextToken)) { continue; } if (tt.Morph.ContainsAttr("прдктв.", null)) { continue; } Pullenti.Morph.MorphClass dd = tt.GetMorphClassInDictionary(); if (!dd.IsUndefined || (tt.LengthChar < 4)) { continue; } if (tt.Morph.Class.IsProperSurname && !tt.Chars.IsAllLower) { continue; } if (tt.Chars.IsAllUpper) { continue; } string corw = Pullenti.Morph.MorphologyService.CorrectWord((tt as Pullenti.Ner.TextToken).Term, (tt.Morph.Language.IsUndefined ? lang : tt.Morph.Language)); if (corw == null) { continue; } List <Pullenti.Morph.MorphToken> ccc = Pullenti.Morph.MorphologyService.Process(corw, lang, null); if (ccc == null || ccc.Count != 1) { continue; } Pullenti.Ner.TextToken tt1 = new Pullenti.Ner.TextToken(ccc[0], this, tt.BeginChar, tt.EndChar) { Chars = tt.Chars, Term0 = (tt as Pullenti.Ner.TextToken).Term }; Pullenti.Morph.MorphClass mc = tt1.GetMorphClassInDictionary(); if (mc.IsProperSurname) { continue; } if (tt == FirstToken) { FirstToken = tt1; } else { tt.Previous.Next = tt1; } tt1.Next = tt.Next; tt = tt1; if (CorrectedTokens == null) { CorrectedTokens = new Dictionary <Pullenti.Ner.Token, string>(); } CorrectedTokens.Add(tt, tt.GetSourceText()); } }
static VerbPhraseToken TryParseRu(Pullenti.Ner.Token t, bool canBePartition, bool canBeAdjPartition, bool forceParse) { VerbPhraseToken res = null; Pullenti.Ner.Token t0 = t; Pullenti.Ner.Token not = null; bool hasVerb = false; bool verbBeBefore = false; PrepositionToken prep = null; for (; t != null; t = t.Next) { if (!(t is Pullenti.Ner.TextToken)) { break; } Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; bool isParticiple = false; if (tt.Term == "НЕ") { not = t; continue; } int ty = 0; string norm = null; Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary(); if (tt.Term == "НЕТ") { if (hasVerb) { break; } ty = 1; } else if (tt.Term == "ДОПУСТИМО") { ty = 3; } else if (mc.IsAdverb && !mc.IsVerb) { ty = 2; } else if (tt.IsPureVerb || tt.IsVerbBe) { ty = 1; if (hasVerb) { if (!tt.Morph.ContainsAttr("инф.", null)) { if (verbBeBefore) { } else { break; } } } } else if (mc.IsVerb) { if (mc.IsPreposition || mc.IsMisc || mc.IsPronoun) { } else if (mc.IsNoun) { if (tt.Term == "СТАЛИ" || tt.Term == "СТЕКЛО" || tt.Term == "БЫЛИ") { ty = 1; } else if (!tt.Chars.IsAllLower && !MiscHelper.CanBeStartOfSentence(tt)) { ty = 1; } else if (mc.IsAdjective && canBePartition) { ty = 1; } else if (forceParse) { ty = 1; } } else if (mc.IsProper) { if (tt.Chars.IsAllLower) { ty = 1; } } else { ty = 1; } if (mc.IsAdjective) { isParticiple = true; } if (!tt.Morph.Case.IsUndefined) { isParticiple = true; } if (!canBePartition && isParticiple) { break; } if (hasVerb) { if (tt.Morph.ContainsAttr("инф.", null)) { } else if (!isParticiple) { } else { break; } } } else if ((mc.IsAdjective && tt.Morph.ContainsAttr("к.ф.", null) && tt.Term.EndsWith("О")) && NounPhraseHelper.TryParse(tt, NounPhraseParseAttr.No, 0, null) == null) { ty = 2; } else if (mc.IsAdjective && ((canBePartition || canBeAdjPartition))) { if (tt.Morph.ContainsAttr("к.ф.", null) && !canBeAdjPartition) { break; } norm = tt.GetNormalCaseText(Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Masculine, false); if (norm.EndsWith("ЙШИЙ")) { } else { List <Pullenti.Semantic.Utils.DerivateGroup> grs = Pullenti.Semantic.Utils.DerivateService.FindDerivates(norm, true, null); if (grs != null && grs.Count > 0) { bool hVerb = false; bool hPart = false; foreach (Pullenti.Semantic.Utils.DerivateGroup gr in grs) { foreach (Pullenti.Semantic.Utils.DerivateWord w in gr.Words) { if (w.Class.IsAdjective && w.Class.IsVerb) { if (w.Spelling == norm) { hPart = true; } } else if (w.Class.IsVerb) { hVerb = true; } } } if (hPart && hVerb) { ty = 3; } else if (canBeAdjPartition) { ty = 3; } if (ty != 3 && !string.IsNullOrEmpty(grs[0].Prefix) && norm.StartsWith(grs[0].Prefix)) { hVerb = false; hPart = false; string norm1 = norm.Substring(grs[0].Prefix.Length); grs = Pullenti.Semantic.Utils.DerivateService.FindDerivates(norm1, true, null); if (grs != null && grs.Count > 0) { foreach (Pullenti.Semantic.Utils.DerivateGroup gr in grs) { foreach (Pullenti.Semantic.Utils.DerivateWord w in gr.Words) { if (w.Class.IsAdjective && w.Class.IsVerb) { if (w.Spelling == norm1) { hPart = true; } } else if (w.Class.IsVerb) { hVerb = true; } } } } if (hPart && hVerb) { ty = 3; } } } } } if (ty == 0 && t == t0 && canBePartition) { prep = PrepositionHelper.TryParse(t); if (prep != null) { t = prep.EndToken; continue; } } if (ty == 0) { break; } if (res == null) { res = new VerbPhraseToken(t0, t); } res.EndToken = t; VerbPhraseItemToken it = new VerbPhraseItemToken(t, t) { Morph = new Pullenti.Ner.MorphCollection(t.Morph) }; if (not != null) { it.BeginToken = not; it.Not = true; not = null; } it.IsAdverb = ty == 2; if (prep != null && !t.Morph.Case.IsUndefined && res.Items.Count == 0) { if (((prep.NextCase & t.Morph.Case)).IsUndefined) { return(null); } it.Morph.RemoveItems(prep.NextCase); res.Preposition = prep; } if (norm == null) { norm = t.GetNormalCaseText((ty == 3 ? Pullenti.Morph.MorphClass.Adjective : (ty == 2 ? Pullenti.Morph.MorphClass.Adverb : Pullenti.Morph.MorphClass.Verb)), Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Masculine, false); if (ty == 1 && !tt.Morph.Case.IsUndefined) { Pullenti.Morph.MorphWordForm mi = new Pullenti.Morph.MorphWordForm() { Case = Pullenti.Morph.MorphCase.Nominative, Number = Pullenti.Morph.MorphNumber.Singular, Gender = Pullenti.Morph.MorphGender.Masculine }; foreach (Pullenti.Morph.MorphBaseInfo mit in tt.Morph.Items) { if (mit is Pullenti.Morph.MorphWordForm) { mi.Misc = (mit as Pullenti.Morph.MorphWordForm).Misc; break; } } string nnn = Pullenti.Morph.MorphologyService.GetWordform("КК" + (t as Pullenti.Ner.TextToken).Term, mi); if (nnn != null) { norm = nnn.Substring(2); } } } it.Normal = norm; res.Items.Add(it); if (!hasVerb && ((ty == 1 || ty == 3))) { res.Morph = it.Morph; hasVerb = true; } if (ty == 1 || ty == 3) { if (ty == 1 && tt.IsVerbBe) { verbBeBefore = true; } else { verbBeBefore = false; } } } if (!hasVerb) { return(null); } for (int i = res.Items.Count - 1; i > 0; i--) { if (res.Items[i].IsAdverb) { res.Items.RemoveAt(i); res.EndToken = res.Items[i - 1].EndToken; } else { break; } } return(res); }
void CorrectWordsByMerging(Pullenti.Morph.MorphLang lang) { for (Pullenti.Ner.Token t = FirstToken; t != null && t.Next != null; t = t.Next) { if (!t.Chars.IsLetter || (t.LengthChar < 2)) { continue; } Pullenti.Morph.MorphClass mc0 = t.GetMorphClassInDictionary(); if (t.Morph.ContainsAttr("прдктв.", null)) { continue; } Pullenti.Ner.Token t1 = t.Next; if (t1.IsHiphen && t1.Next != null && !t1.IsNewlineAfter) { t1 = t1.Next; } if (t1.LengthChar == 1) { continue; } if (!t1.Chars.IsLetter || !t.Chars.IsLetter || t1.Chars.IsLatinLetter != t.Chars.IsLatinLetter) { continue; } if (t1.Chars.IsAllUpper && !t.Chars.IsAllUpper) { continue; } else if (!t1.Chars.IsAllLower) { continue; } else if (t.Chars.IsAllUpper) { continue; } if (t1.Morph.ContainsAttr("прдктв.", null)) { continue; } Pullenti.Morph.MorphClass mc1 = t1.GetMorphClassInDictionary(); if (!mc1.IsUndefined && !mc0.IsUndefined) { continue; } if (((t as Pullenti.Ner.TextToken).Term.Length + (t1 as Pullenti.Ner.TextToken).Term.Length) < 6) { continue; } string corw = (t as Pullenti.Ner.TextToken).Term + (t1 as Pullenti.Ner.TextToken).Term; List <Pullenti.Morph.MorphToken> ccc = Pullenti.Morph.MorphologyService.Process(corw, lang, null); if (ccc == null || ccc.Count != 1) { continue; } if (corw == "ПОСТ" || corw == "ВРЕД") { continue; } Pullenti.Ner.TextToken tt = new Pullenti.Ner.TextToken(ccc[0], this, t.BeginChar, t1.EndChar); if (tt.GetMorphClassInDictionary().IsUndefined) { continue; } tt.Chars = t.Chars; if (t == FirstToken) { FirstToken = tt; } else { t.Previous.Next = tt; } if (t1.Next != null) { tt.Next = t1.Next; } t = tt; } }
static OrgItemNameToken _TryAttach(Pullenti.Ner.Token t, OrgItemNameToken prev, bool extOnto) { if (t == null) { return(null); } Pullenti.Ner.Referent r = t.GetReferent(); if (r != null) { if (r.TypeName == "DENOMINATION") { return new OrgItemNameToken(t, t) { Value = r.ToString(true, t.Kit.BaseLanguage, 0), IsDenomination = true } } ; if ((r is Pullenti.Ner.Geo.GeoReferent) && t.Chars.IsLatinLetter) { OrgItemNameToken res2 = _TryAttach(t.Next, prev, extOnto); if (res2 != null && res2.Chars.IsLatinLetter) { res2.BeginToken = t; res2.Value = string.Format("{0} {1}", Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(t as Pullenti.Ner.MetaToken, Pullenti.Ner.Core.GetTextAttr.No), res2.Value); res2.IsInDictionary = false; return(res2); } } return(null); } Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; if (tt == null) { return(null); } OrgItemNameToken res = null; Pullenti.Ner.Core.TerminToken tok = m_StdTails.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok == null && t.IsChar(',')) { tok = m_StdTails.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No); } if (tok != null) { return new OrgItemNameToken(t, tok.EndToken) { Value = tok.Termin.CanonicText, IsStdTail = tok.Termin.Tag == null, IsEmptyWord = tok.Termin.Tag != null, Morph = tok.Morph } } ; if ((((tok = m_StdNames.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No)))) != null) { return new OrgItemNameToken(t, tok.EndToken) { Value = tok.Termin.CanonicText, IsStdName = true } } ; OrgItemEngItem eng = OrgItemEngItem.TryAttach(t, false); if (eng == null && t.IsChar(',')) { eng = OrgItemEngItem.TryAttach(t.Next, false); } if (eng != null) { return new OrgItemNameToken(t, eng.EndToken) { Value = eng.FullValue, IsStdTail = true } } ; if (tt.Chars.IsAllLower && prev != null) { if (!prev.Chars.IsAllLower && !prev.Chars.IsCapitalUpper) { return(null); } } if (tt.IsChar(',') && prev != null) { Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt1 == null || npt1.Chars != prev.Chars || ((npt1.Morph.Case & prev.Morph.Case)).IsUndefined) { return(null); } OrgItemTypeToken ty = OrgItemTypeToken.TryAttach(t.Next, false, null); if (ty != null) { return(null); } if (npt1.EndToken.Next == null || !npt1.EndToken.Next.IsValue("И", null)) { return(null); } Pullenti.Ner.Token t1 = npt1.EndToken.Next; Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t1.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt2 == null || npt2.Chars != prev.Chars || ((npt2.Morph.Case & npt1.Morph.Case & prev.Morph.Case)).IsUndefined) { return(null); } ty = OrgItemTypeToken.TryAttach(t1.Next, false, null); if (ty != null) { return(null); } res = new OrgItemNameToken(npt1.BeginToken, npt1.EndToken) { Morph = npt1.Morph, Value = npt1.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false) }; res.IsNounPhrase = true; res.IsAfterConjunction = true; if (prev.Preposition != null) { res.Preposition = prev.Preposition; } return(res); } if (((tt.IsChar('&') || tt.IsValue("AND", null) || tt.IsValue("UND", null))) && prev != null) { if ((tt.Next is Pullenti.Ner.TextToken) && tt.LengthChar == 1 && tt.Next.Chars.IsLatinLetter) { res = new OrgItemNameToken(tt, tt.Next) { Chars = tt.Next.Chars }; res.IsAfterConjunction = true; res.Value = "& " + (tt.Next as Pullenti.Ner.TextToken).Term; return(res); } res = OrgItemNameToken.TryAttach(tt.Next, null, extOnto, false); if (res == null || res.Chars != prev.Chars) { return(null); } res.IsAfterConjunction = true; res.Value = "& " + res.Value; return(res); } if (!tt.Chars.IsLetter) { return(null); } List <Pullenti.Semantic.Utils.DerivateGroup> expinf = null; if (prev != null && prev.EndToken.GetMorphClassInDictionary().IsNoun) { string wo = prev.EndToken.GetNormalCaseText(Pullenti.Morph.MorphClass.Noun, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false); expinf = Pullenti.Semantic.Utils.DerivateService.FindDerivates(wo, true, prev.EndToken.Morph.Language); } Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && npt.InternalNoun != null) { npt = null; } bool explOk = false; if (npt != null && prev != null && prev.EndToken.GetMorphClassInDictionary().IsNoun) { Pullenti.Ner.Core.NounPhraseToken npt0 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(prev.EndToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt0 != null) { List <Pullenti.Semantic.Core.SemanticLink> links = Pullenti.Semantic.Core.SemanticHelper.TryCreateLinks(npt0, npt, null); if (links.Count > 0) { explOk = true; } } } if (npt != null && ((explOk || npt.Morph.Case.IsGenitive || ((prev != null && !((prev.Morph.Case & npt.Morph.Case)).IsUndefined))))) { Pullenti.Morph.MorphClass mc = npt.BeginToken.GetMorphClassInDictionary(); if (mc.IsVerb || mc.IsPronoun) { return(null); } if (mc.IsAdverb) { if (npt.BeginToken.Next != null && npt.BeginToken.Next.IsHiphen) { } else { return(null); } } if (mc.IsPreposition) { return(null); } if (mc.IsNoun && npt.Chars.IsAllLower) { Pullenti.Morph.MorphCase ca = npt.Morph.Case; if ((!ca.IsDative && !ca.IsGenitive && !ca.IsInstrumental) && !ca.IsPrepositional) { return(null); } } res = new OrgItemNameToken(npt.BeginToken, npt.EndToken) { Morph = npt.Morph, Value = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false) }; res.IsNounPhrase = true; if ((npt.EndToken.WhitespacesAfterCount < 2) && (npt.EndToken.Next is Pullenti.Ner.TextToken)) { Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(npt.EndToken.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt2 != null && npt2.Morph.Case.IsGenitive && npt2.Chars.IsAllLower) { OrgItemTypeToken typ = OrgItemTypeToken.TryAttach(npt.EndToken.Next, true, null); OrgItemEponymToken epo = OrgItemEponymToken.TryAttach(npt.EndToken.Next, false); Pullenti.Ner.ReferentToken rtt = t.Kit.ProcessReferent("PERSONPROPERTY", npt.EndToken.Next); if (typ == null && epo == null && ((rtt == null || rtt.Morph.Number == Pullenti.Morph.MorphNumber.Plural))) { res.EndToken = npt2.EndToken; res.Value = string.Format("{0} {1}", res.Value, Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(npt2, Pullenti.Ner.Core.GetTextAttr.No)); } } else if (npt.EndToken.Next.IsComma && (npt.EndToken.Next.Next is Pullenti.Ner.TextToken)) { Pullenti.Ner.Token tt2 = npt.EndToken.Next.Next; Pullenti.Morph.MorphClass mv2 = tt2.GetMorphClassInDictionary(); if (mv2.IsAdjective && mv2.IsVerb) { Pullenti.Morph.MorphBaseInfo bi = new Pullenti.Morph.MorphBaseInfo() { Case = npt.Morph.Case, Gender = npt.Morph.Gender, Number = npt.Morph.Number }; if (tt2.Morph.CheckAccord(bi, false, false)) { npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt2.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt2 != null && ((npt2.Morph.Case.IsDative || npt2.Morph.Case.IsGenitive)) && npt2.Chars.IsAllLower) { res.EndToken = npt2.EndToken; res.Value = string.Format("{0} {1}", res.Value, Pullenti.Ner.Core.MiscHelper.GetTextValue(npt.EndToken.Next, res.EndToken, Pullenti.Ner.Core.GetTextAttr.No)); } } } } } if (explOk) { res.IsAfterConjunction = true; } } else if (npt != null && ((((prev != null && prev.IsNounPhrase && npt.Morph.Case.IsInstrumental)) || extOnto))) { res = new OrgItemNameToken(npt.BeginToken, npt.EndToken) { Morph = npt.Morph, Value = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false) }; res.IsNounPhrase = true; } else if (tt.IsAnd) { res = TryAttach(tt.Next, prev, extOnto, false); if (res == null || !res.IsNounPhrase || prev == null) { return(null); } if (((prev.Morph.Case & res.Morph.Case)).IsUndefined) { return(null); } if (prev.Morph.Number != Pullenti.Morph.MorphNumber.Undefined && res.Morph.Number != Pullenti.Morph.MorphNumber.Undefined) { if (((prev.Morph.Number & res.Morph.Number)) == Pullenti.Morph.MorphNumber.Undefined) { if (prev.Chars != res.Chars) { return(null); } OrgItemTypeToken ty = OrgItemTypeToken.TryAttach(res.EndToken.Next, false, null); if (ty != null) { return(null); } } } Pullenti.Morph.CharsInfo ci = res.Chars; res.Chars = ci; res.IsAfterConjunction = true; return(res); } else if (((tt.Term == "ПО" || tt.Term == "ПРИ" || tt.Term == "ЗА") || tt.Term == "С" || tt.Term == "В") || tt.Term == "НА") { npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { if (m_VervotWords.TryParse(npt.EndToken, Pullenti.Ner.Core.TerminParseAttr.No) != null) { return(null); } bool ok = false; if (tt.Term == "ПО") { ok = npt.Morph.Case.IsDative; } else if (tt.Term == "С") { ok = npt.Morph.Case.IsInstrumental; } else if (tt.Term == "ЗА") { ok = npt.Morph.Case.IsGenitive | npt.Morph.Case.IsInstrumental; } else if (tt.Term == "НА") { ok = npt.Morph.Case.IsPrepositional; } else if (tt.Term == "В") { ok = npt.Morph.Case.IsDative | npt.Morph.Case.IsPrepositional; if (ok) { ok = false; if (t.Next.IsValue("СФЕРА", null) || t.Next.IsValue("ОБЛАСТЬ", null)) { ok = true; } } } else if (tt.Term == "ПРИ") { ok = npt.Morph.Case.IsPrepositional; if (ok) { if (OrgItemTypeToken.TryAttach(tt.Next, true, null) != null) { ok = false; } else { Pullenti.Ner.ReferentToken rt = tt.Kit.ProcessReferent(Pullenti.Ner.Org.OrganizationAnalyzer.ANALYZER_NAME, tt.Next); if (rt != null) { ok = false; } } } string s = npt.Noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false); if (s == "ПОДДЕРЖКА" || s == "УЧАСТИЕ") { ok = false; } } else { ok = npt.Morph.Case.IsPrepositional; } if (ok) { res = new OrgItemNameToken(t, npt.EndToken) { Morph = npt.Morph, Value = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false), Chars = npt.Chars }; res.IsNounPhrase = true; res.Preposition = tt.Term; if (((res.Value == "ДЕЛО" || res.Value == "ВОПРОС")) && !res.IsNewlineAfter) { OrgItemNameToken res2 = _TryAttach(res.EndToken.Next, res, extOnto); if (res2 != null && res2.Morph.Case.IsGenitive) { res.Value = string.Format("{0} {1}", res.Value, res2.Value); res.EndToken = res2.EndToken; for (Pullenti.Ner.Token ttt = res2.EndToken.Next; ttt != null; ttt = ttt.Next) { if (!ttt.IsCommaAnd) { break; } OrgItemNameToken res3 = _TryAttach(ttt.Next, res2, extOnto); if (res3 == null) { break; } res.Value = string.Format("{0} {1}", res.Value, res3.Value); res.EndToken = res3.EndToken; if (ttt.IsAnd) { break; } ttt = res.EndToken; } } } } } if (res == null) { return(null); } } else if (tt.Term == "OF") { Pullenti.Ner.Token t1 = tt.Next; if (t1 != null && Pullenti.Ner.Core.MiscHelper.IsEngArticle(t1)) { t1 = t1.Next; } if (t1 != null && t1.Chars.IsLatinLetter && !t1.Chars.IsAllLower) { res = new OrgItemNameToken(t, t1) { Chars = t1.Chars, Morph = t1.Morph }; for (Pullenti.Ner.Token ttt = t1.Next; ttt != null; ttt = ttt.Next) { if (ttt.WhitespacesBeforeCount > 2) { break; } if (Pullenti.Ner.Core.MiscHelper.IsEngAdjSuffix(ttt)) { ttt = ttt.Next; continue; } if (!ttt.Chars.IsLatinLetter) { break; } if (ttt.Morph.Class.IsPreposition) { break; } t1 = (res.EndToken = ttt); } res.Value = Pullenti.Ner.Core.MiscHelper.GetTextValue(t, t1, Pullenti.Ner.Core.GetTextAttr.IgnoreArticles); res.Preposition = tt.Term; return(res); } } if (res == null) { if (tt.Chars.IsLatinLetter && tt.LengthChar == 1) { } else if (tt.Chars.IsAllLower || (tt.LengthChar < 2)) { if (!tt.Chars.IsLatinLetter || prev == null || !prev.Chars.IsLatinLetter) { return(null); } } if (tt.Chars.IsCyrillicLetter) { Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary(); if (mc.IsVerb || mc.IsAdverb) { return(null); } } else if (tt.Chars.IsLatinLetter && !tt.IsWhitespaceAfter) { if (!tt.IsWhitespaceAfter && (tt.LengthChar < 5)) { if (tt.Next is Pullenti.Ner.NumberToken) { return(null); } } } res = new OrgItemNameToken(tt, tt) { Value = tt.Term, Morph = tt.Morph }; for (t = tt.Next; t != null; t = t.Next) { if ((((t.IsHiphen || t.IsCharOf("\\/"))) && t.Next != null && (t.Next is Pullenti.Ner.TextToken)) && !t.IsWhitespaceBefore && !t.IsWhitespaceAfter) { t = t.Next; res.EndToken = t; res.Value = string.Format("{0}{1}{2}", res.Value, (t.Previous.IsChar('.') ? '.' : '-'), (t as Pullenti.Ner.TextToken).Term); } else if (t.IsChar('.')) { if (!t.IsWhitespaceAfter && !t.IsWhitespaceBefore && (t.Next is Pullenti.Ner.TextToken)) { res.EndToken = t.Next; t = t.Next; res.Value = string.Format("{0}.{1}", res.Value, (t as Pullenti.Ner.TextToken).Term); } else if ((t.Next != null && !t.IsNewlineAfter && t.Next.Chars.IsLatinLetter) && tt.Chars.IsLatinLetter) { res.EndToken = t; } else { break; } } else { break; } } } for (Pullenti.Ner.Token t0 = res.BeginToken; t0 != null; t0 = t0.Next) { if ((((tt = t0 as Pullenti.Ner.TextToken))) != null && tt.IsLetters) { if (!tt.Morph.Class.IsConjunction && !tt.Morph.Class.IsPreposition) { foreach (Pullenti.Morph.MorphBaseInfo mf in tt.Morph.Items) { if ((mf as Pullenti.Morph.MorphWordForm).IsInDictionary) { res.IsInDictionary = true; } } } } if (t0 == res.EndToken) { break; } } if (res.BeginToken == res.EndToken && res.BeginToken.Chars.IsAllUpper) { if (res.EndToken.Next != null && !res.EndToken.IsWhitespaceAfter) { Pullenti.Ner.Token t1 = res.EndToken.Next; if (t1.Next != null && !t1.IsWhitespaceAfter && t1.IsHiphen) { t1 = t1.Next; } if (t1 is Pullenti.Ner.NumberToken) { res.Value += (t1 as Pullenti.Ner.NumberToken).Value; res.EndToken = t1; } } } if (res.BeginToken == res.EndToken && res.BeginToken.Chars.IsLastLower) { string src = res.BeginToken.GetSourceText(); for (int i = src.Length - 1; i >= 0; i--) { if (char.IsUpper(src[i])) { res.Value = src.Substring(0, i + 1); break; } } } return(res); }