public static Pullenti.Ner.ReferentToken TryAttachOrg(Pullenti.Ner.Token t, bool canBeCyr = false) { if (t == null) { return(null); } bool br = false; if (t.IsChar('(') && t.Next != null) { t = t.Next; br = true; } if (t is Pullenti.Ner.NumberToken) { if ((t as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Words && t.Morph.Class.IsAdjective && t.Chars.IsCapitalUpper) { } else { return(null); } } else { if (t.Chars.IsAllLower) { return(null); } if ((t.LengthChar < 3) && !t.Chars.IsLetter) { return(null); } if (!t.Chars.IsLatinLetter) { if (!canBeCyr || !t.Chars.IsCyrillicLetter) { return(null); } } } Pullenti.Ner.Token t0 = t; Pullenti.Ner.Token t1 = t0; int namWo = 0; OrgItemEngItem tok = null; Pullenti.Ner.Geo.GeoReferent geo = null; OrgItemTypeToken addTyp = null; for (; t != null; t = t.Next) { if (t != t0 && t.WhitespacesBeforeCount > 1) { break; } if (t.IsChar(')')) { break; } if (t.IsChar('(') && t.Next != null) { if ((t.Next.GetReferent() is Pullenti.Ner.Geo.GeoReferent) && t.Next.Next != null && t.Next.Next.IsChar(')')) { geo = t.Next.GetReferent() as Pullenti.Ner.Geo.GeoReferent; t = t.Next.Next; continue; } OrgItemTypeToken typ = OrgItemTypeToken.TryAttach(t.Next, true, null); if ((typ != null && typ.EndToken.Next != null && typ.EndToken.Next.IsChar(')')) && typ.Chars.IsLatinLetter) { addTyp = typ; t = typ.EndToken.Next; continue; } if (((t.Next is Pullenti.Ner.TextToken) && t.Next.Next != null && t.Next.Next.IsChar(')')) && t.Next.Chars.IsCapitalUpper) { t1 = (t = t.Next.Next); continue; } break; } tok = TryAttach(t, canBeCyr); if (tok == null && t.IsCharOf(".,") && t.Next != null) { tok = TryAttach(t.Next, canBeCyr); if (tok == null && t.Next.IsCharOf(",.")) { tok = TryAttach(t.Next.Next, canBeCyr); } } if (tok != null) { if (tok.LengthChar == 1 && t0.Chars.IsCyrillicLetter) { return(null); } break; } if (t.IsHiphen && !t.IsWhitespaceAfter && !t.IsWhitespaceBefore) { continue; } if (t.IsCharOf("&+") || t.IsAnd) { continue; } if (t.IsChar('.')) { if (t.Previous != null && t.Previous.LengthChar == 1) { continue; } else if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t.Next)) { break; } } if (!t.Chars.IsLatinLetter) { if (!canBeCyr || !t.Chars.IsCyrillicLetter) { break; } } if (t.Chars.IsAllLower) { if (t.Morph.Class.IsPreposition || t.Morph.Class.IsConjunction) { continue; } if (br) { continue; } break; } Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary(); if (mc.IsVerb) { if (t.Next != null && t.Next.Morph.Class.IsPreposition) { break; } } if (t.Next != null && t.Next.IsValue("OF", null)) { break; } if (t is Pullenti.Ner.TextToken) { namWo++; } t1 = t; } if (tok == null) { return(null); } if (t0 == tok.BeginToken) { Pullenti.Ner.Core.BracketSequenceToken br2 = Pullenti.Ner.Core.BracketHelper.TryParse(tok.EndToken.Next, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br2 != null) { Pullenti.Ner.Org.OrganizationReferent org1 = new Pullenti.Ner.Org.OrganizationReferent(); if (tok.ShortValue != null) { org1.AddTypeStr(tok.ShortValue); } org1.AddTypeStr(tok.FullValue); string nam1 = Pullenti.Ner.Core.MiscHelper.GetTextValue(br2.BeginToken, br2.EndToken, Pullenti.Ner.Core.GetTextAttr.No); if (nam1 != null) { org1.AddName(nam1, true, null); return(new Pullenti.Ner.ReferentToken(org1, t0, br2.EndToken)); } } return(null); } Pullenti.Ner.Org.OrganizationReferent org = new Pullenti.Ner.Org.OrganizationReferent(); Pullenti.Ner.Token te = tok.EndToken; if (tok.IsBank) { t1 = tok.EndToken; } if (tok.FullValue == "company" && (tok.WhitespacesAfterCount < 3)) { OrgItemEngItem tok1 = TryAttach(tok.EndToken.Next, canBeCyr); if (tok1 != null) { t1 = tok.EndToken; tok = tok1; te = tok.EndToken; } } if (tok.FullValue == "company") { if (namWo == 0) { return(null); } } string nam = Pullenti.Ner.Core.MiscHelper.GetTextValue(t0, t1, Pullenti.Ner.Core.GetTextAttr.IgnoreArticles); if (nam == "STOCK" && tok.FullValue == "company") { return(null); } string altNam = null; if (string.IsNullOrEmpty(nam)) { return(null); } if (nam.IndexOf('(') > 0) { int i1 = nam.IndexOf('('); int i2 = nam.IndexOf(')'); if (i1 < i2) { altNam = nam; string tai = null; if ((i2 + 1) < nam.Length) { tai = nam.Substring(i2).Trim(); } nam = nam.Substring(0, i1).Trim(); if (tai != null) { nam = string.Format("{0} {1}", nam, tai); } } } if (tok.IsBank) { org.AddTypeStr((tok.Kit.BaseLanguage.IsEn ? "bank" : "банк")); org.AddProfile(Pullenti.Ner.Org.OrgProfile.Finance); if ((t1.Next != null && t1.Next.IsValue("OF", null) && t1.Next.Next != null) && t1.Next.Next.Chars.IsLatinLetter) { OrgItemNameToken nam0 = OrgItemNameToken.TryAttach(t1.Next, null, false, false); if (nam0 != null) { te = nam0.EndToken; } else { te = t1.Next.Next; } nam = Pullenti.Ner.Core.MiscHelper.GetTextValue(t0, te, Pullenti.Ner.Core.GetTextAttr.No); if (te.GetReferent() is Pullenti.Ner.Geo.GeoReferent) { org.AddGeoObject(te.GetReferent() as Pullenti.Ner.Geo.GeoReferent); } } else if (t0 == t1) { return(null); } } else { if (tok.ShortValue != null) { org.AddTypeStr(tok.ShortValue); } org.AddTypeStr(tok.FullValue); } if (string.IsNullOrEmpty(nam)) { return(null); } org.AddName(nam, true, null); if (altNam != null) { org.AddName(altNam, true, null); } Pullenti.Ner.ReferentToken res = new Pullenti.Ner.ReferentToken(org, t0, te); t = te; while (t.Next != null) { if (t.Next.IsCharOf(",.")) { t = t.Next; } else { break; } } if (t.WhitespacesAfterCount < 2) { tok = TryAttach(t.Next, canBeCyr); if (tok != null) { if (tok.ShortValue != null) { org.AddTypeStr(tok.ShortValue); } org.AddTypeStr(tok.FullValue); res.EndToken = tok.EndToken; } } if (geo != null) { org.AddGeoObject(geo); } if (addTyp != null) { org.AddType(addTyp, false); } if (!br) { return(res); } t = res.EndToken; if (t.Next == null || t.Next.IsChar(')')) { res.EndToken = t.Next; } else { return(null); } return(res); }