Exemplo n.º 1
0
        public static Pullenti.Ner.ReferentToken TryAttachOrg(Pullenti.Ner.Token t, bool canBeCyr = false)
        {
            if (t == null)
            {
                return(null);
            }
            bool br = false;

            if (t.IsChar('(') && t.Next != null)
            {
                t  = t.Next;
                br = true;
            }
            if (t is Pullenti.Ner.NumberToken)
            {
                if ((t as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Words && t.Morph.Class.IsAdjective && t.Chars.IsCapitalUpper)
                {
                }
                else
                {
                    return(null);
                }
            }
            else
            {
                if (t.Chars.IsAllLower)
                {
                    return(null);
                }
                if ((t.LengthChar < 3) && !t.Chars.IsLetter)
                {
                    return(null);
                }
                if (!t.Chars.IsLatinLetter)
                {
                    if (!canBeCyr || !t.Chars.IsCyrillicLetter)
                    {
                        return(null);
                    }
                }
            }
            Pullenti.Ner.Token t0 = t;
            Pullenti.Ner.Token t1 = t0;
            int            namWo  = 0;
            OrgItemEngItem tok    = null;

            Pullenti.Ner.Geo.GeoReferent geo    = null;
            OrgItemTypeToken             addTyp = null;

            for (; t != null; t = t.Next)
            {
                if (t != t0 && t.WhitespacesBeforeCount > 1)
                {
                    break;
                }
                if (t.IsChar(')'))
                {
                    break;
                }
                if (t.IsChar('(') && t.Next != null)
                {
                    if ((t.Next.GetReferent() is Pullenti.Ner.Geo.GeoReferent) && t.Next.Next != null && t.Next.Next.IsChar(')'))
                    {
                        geo = t.Next.GetReferent() as Pullenti.Ner.Geo.GeoReferent;
                        t   = t.Next.Next;
                        continue;
                    }
                    OrgItemTypeToken typ = OrgItemTypeToken.TryAttach(t.Next, true, null);
                    if ((typ != null && typ.EndToken.Next != null && typ.EndToken.Next.IsChar(')')) && typ.Chars.IsLatinLetter)
                    {
                        addTyp = typ;
                        t      = typ.EndToken.Next;
                        continue;
                    }
                    if (((t.Next is Pullenti.Ner.TextToken) && t.Next.Next != null && t.Next.Next.IsChar(')')) && t.Next.Chars.IsCapitalUpper)
                    {
                        t1 = (t = t.Next.Next);
                        continue;
                    }
                    break;
                }
                tok = TryAttach(t, canBeCyr);
                if (tok == null && t.IsCharOf(".,") && t.Next != null)
                {
                    tok = TryAttach(t.Next, canBeCyr);
                    if (tok == null && t.Next.IsCharOf(",."))
                    {
                        tok = TryAttach(t.Next.Next, canBeCyr);
                    }
                }
                if (tok != null)
                {
                    if (tok.LengthChar == 1 && t0.Chars.IsCyrillicLetter)
                    {
                        return(null);
                    }
                    break;
                }
                if (t.IsHiphen && !t.IsWhitespaceAfter && !t.IsWhitespaceBefore)
                {
                    continue;
                }
                if (t.IsCharOf("&+") || t.IsAnd)
                {
                    continue;
                }
                if (t.IsChar('.'))
                {
                    if (t.Previous != null && t.Previous.LengthChar == 1)
                    {
                        continue;
                    }
                    else if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t.Next))
                    {
                        break;
                    }
                }
                if (!t.Chars.IsLatinLetter)
                {
                    if (!canBeCyr || !t.Chars.IsCyrillicLetter)
                    {
                        break;
                    }
                }
                if (t.Chars.IsAllLower)
                {
                    if (t.Morph.Class.IsPreposition || t.Morph.Class.IsConjunction)
                    {
                        continue;
                    }
                    if (br)
                    {
                        continue;
                    }
                    break;
                }
                Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
                if (mc.IsVerb)
                {
                    if (t.Next != null && t.Next.Morph.Class.IsPreposition)
                    {
                        break;
                    }
                }
                if (t.Next != null && t.Next.IsValue("OF", null))
                {
                    break;
                }
                if (t is Pullenti.Ner.TextToken)
                {
                    namWo++;
                }
                t1 = t;
            }
            if (tok == null)
            {
                return(null);
            }
            if (t0 == tok.BeginToken)
            {
                Pullenti.Ner.Core.BracketSequenceToken br2 = Pullenti.Ner.Core.BracketHelper.TryParse(tok.EndToken.Next, Pullenti.Ner.Core.BracketParseAttr.No, 100);
                if (br2 != null)
                {
                    Pullenti.Ner.Org.OrganizationReferent org1 = new Pullenti.Ner.Org.OrganizationReferent();
                    if (tok.ShortValue != null)
                    {
                        org1.AddTypeStr(tok.ShortValue);
                    }
                    org1.AddTypeStr(tok.FullValue);
                    string nam1 = Pullenti.Ner.Core.MiscHelper.GetTextValue(br2.BeginToken, br2.EndToken, Pullenti.Ner.Core.GetTextAttr.No);
                    if (nam1 != null)
                    {
                        org1.AddName(nam1, true, null);
                        return(new Pullenti.Ner.ReferentToken(org1, t0, br2.EndToken));
                    }
                }
                return(null);
            }
            Pullenti.Ner.Org.OrganizationReferent org = new Pullenti.Ner.Org.OrganizationReferent();
            Pullenti.Ner.Token te = tok.EndToken;
            if (tok.IsBank)
            {
                t1 = tok.EndToken;
            }
            if (tok.FullValue == "company" && (tok.WhitespacesAfterCount < 3))
            {
                OrgItemEngItem tok1 = TryAttach(tok.EndToken.Next, canBeCyr);
                if (tok1 != null)
                {
                    t1  = tok.EndToken;
                    tok = tok1;
                    te  = tok.EndToken;
                }
            }
            if (tok.FullValue == "company")
            {
                if (namWo == 0)
                {
                    return(null);
                }
            }
            string nam = Pullenti.Ner.Core.MiscHelper.GetTextValue(t0, t1, Pullenti.Ner.Core.GetTextAttr.IgnoreArticles);

            if (nam == "STOCK" && tok.FullValue == "company")
            {
                return(null);
            }
            string altNam = null;

            if (string.IsNullOrEmpty(nam))
            {
                return(null);
            }
            if (nam.IndexOf('(') > 0)
            {
                int i1 = nam.IndexOf('(');
                int i2 = nam.IndexOf(')');
                if (i1 < i2)
                {
                    altNam = nam;
                    string tai = null;
                    if ((i2 + 1) < nam.Length)
                    {
                        tai = nam.Substring(i2).Trim();
                    }
                    nam = nam.Substring(0, i1).Trim();
                    if (tai != null)
                    {
                        nam = string.Format("{0} {1}", nam, tai);
                    }
                }
            }
            if (tok.IsBank)
            {
                org.AddTypeStr((tok.Kit.BaseLanguage.IsEn ? "bank" : "банк"));
                org.AddProfile(Pullenti.Ner.Org.OrgProfile.Finance);
                if ((t1.Next != null && t1.Next.IsValue("OF", null) && t1.Next.Next != null) && t1.Next.Next.Chars.IsLatinLetter)
                {
                    OrgItemNameToken nam0 = OrgItemNameToken.TryAttach(t1.Next, null, false, false);
                    if (nam0 != null)
                    {
                        te = nam0.EndToken;
                    }
                    else
                    {
                        te = t1.Next.Next;
                    }
                    nam = Pullenti.Ner.Core.MiscHelper.GetTextValue(t0, te, Pullenti.Ner.Core.GetTextAttr.No);
                    if (te.GetReferent() is Pullenti.Ner.Geo.GeoReferent)
                    {
                        org.AddGeoObject(te.GetReferent() as Pullenti.Ner.Geo.GeoReferent);
                    }
                }
                else if (t0 == t1)
                {
                    return(null);
                }
            }
            else
            {
                if (tok.ShortValue != null)
                {
                    org.AddTypeStr(tok.ShortValue);
                }
                org.AddTypeStr(tok.FullValue);
            }
            if (string.IsNullOrEmpty(nam))
            {
                return(null);
            }
            org.AddName(nam, true, null);
            if (altNam != null)
            {
                org.AddName(altNam, true, null);
            }
            Pullenti.Ner.ReferentToken res = new Pullenti.Ner.ReferentToken(org, t0, te);
            t = te;
            while (t.Next != null)
            {
                if (t.Next.IsCharOf(",."))
                {
                    t = t.Next;
                }
                else
                {
                    break;
                }
            }
            if (t.WhitespacesAfterCount < 2)
            {
                tok = TryAttach(t.Next, canBeCyr);
                if (tok != null)
                {
                    if (tok.ShortValue != null)
                    {
                        org.AddTypeStr(tok.ShortValue);
                    }
                    org.AddTypeStr(tok.FullValue);
                    res.EndToken = tok.EndToken;
                }
            }
            if (geo != null)
            {
                org.AddGeoObject(geo);
            }
            if (addTyp != null)
            {
                org.AddType(addTyp, false);
            }
            if (!br)
            {
                return(res);
            }
            t = res.EndToken;
            if (t.Next == null || t.Next.IsChar(')'))
            {
                res.EndToken = t.Next;
            }
            else
            {
                return(null);
            }
            return(res);
        }