Пример #1
0
        public static NounPhraseItem TryParse(Pullenti.Ner.Token t, List <NounPhraseItem> items, Pullenti.Ner.Core.NounPhraseParseAttr attrs)
        {
            if (t == null)
            {
                return(null);
            }
            Pullenti.Ner.Token t0 = t;
            bool _canBeSurname    = false;
            bool _isDoubtAdj      = false;

            Pullenti.Ner.ReferentToken rt = t as Pullenti.Ner.ReferentToken;
            if (rt != null && rt.BeginToken == rt.EndToken && (rt.BeginToken is Pullenti.Ner.TextToken))
            {
                NounPhraseItem res = TryParse(rt.BeginToken, items, attrs);
                if (res != null)
                {
                    res.BeginToken = (res.EndToken = t);
                    res.CanBeNoun  = true;
                    return(res);
                }
            }
            if (rt != null)
            {
                NounPhraseItem res = new NounPhraseItem(t, t);
                foreach (Pullenti.Morph.MorphBaseInfo m in t.Morph.Items)
                {
                    NounPhraseItemTextVar v = new NounPhraseItemTextVar(m, null);
                    v.NormalValue = t.GetReferent().ToString();
                    res.NounMorph.Add(v);
                }
                res.CanBeNoun = true;
                return(res);
            }
            if (t is Pullenti.Ner.NumberToken)
            {
            }
            bool hasLegalVerb = false;

            if (t is Pullenti.Ner.TextToken)
            {
                if (!t.Chars.IsLetter)
                {
                    return(null);
                }
                string str = (t as Pullenti.Ner.TextToken).Term;
                if (str[str.Length - 1] == 'А' || str[str.Length - 1] == 'О')
                {
                    foreach (Pullenti.Morph.MorphBaseInfo wf in t.Morph.Items)
                    {
                        if ((wf is Pullenti.Morph.MorphWordForm) && (wf as Pullenti.Morph.MorphWordForm).IsInDictionary)
                        {
                            if (wf.Class.IsVerb)
                            {
                                Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
                                if (!mc.IsNoun && ((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.IgnoreParticiples)) == Pullenti.Ner.Core.NounPhraseParseAttr.No)
                                {
                                    if (!Pullenti.Morph.LanguageHelper.EndsWithEx(str, "ОГО", "ЕГО", null, null))
                                    {
                                        return(null);
                                    }
                                }
                                hasLegalVerb = true;
                            }
                            if (wf.Class.IsAdverb)
                            {
                                if (t.Next == null || !t.Next.IsHiphen)
                                {
                                    if ((str == "ВСЕГО" || str == "ДОМА" || str == "НЕСКОЛЬКО") || str == "МНОГО" || str == "ПОРЯДКА")
                                    {
                                    }
                                    else
                                    {
                                        return(null);
                                    }
                                }
                            }
                            if (wf.Class.IsAdjective)
                            {
                                if (wf.ContainsAttr("к.ф.", null))
                                {
                                    if (t.GetMorphClassInDictionary() == Pullenti.Morph.MorphClass.Adjective)
                                    {
                                    }
                                    else
                                    {
                                        _isDoubtAdj = true;
                                    }
                                }
                            }
                        }
                    }
                }
                Pullenti.Morph.MorphClass mc0 = t.Morph.Class;
                if (mc0.IsProperSurname && !t.Chars.IsAllLower)
                {
                    foreach (Pullenti.Morph.MorphBaseInfo wf in t.Morph.Items)
                    {
                        if (wf.Class.IsProperSurname && wf.Number != Pullenti.Morph.MorphNumber.Plural)
                        {
                            Pullenti.Morph.MorphWordForm wff = wf as Pullenti.Morph.MorphWordForm;
                            if (wff == null)
                            {
                                continue;
                            }
                            string s = ((wff.NormalFull ?? wff.NormalCase)) ?? "";
                            if (Pullenti.Morph.LanguageHelper.EndsWithEx(s, "ИН", "ЕН", "ЫН", null))
                            {
                                if (!wff.IsInDictionary)
                                {
                                    _canBeSurname = true;
                                }
                                else
                                {
                                    return(null);
                                }
                            }
                            if (wff.IsInDictionary && Pullenti.Morph.LanguageHelper.EndsWith(s, "ОВ"))
                            {
                                _canBeSurname = true;
                            }
                        }
                    }
                }
                if (mc0.IsProperName && !t.Chars.IsAllLower)
                {
                    foreach (Pullenti.Morph.MorphBaseInfo wff in t.Morph.Items)
                    {
                        Pullenti.Morph.MorphWordForm wf = wff as Pullenti.Morph.MorphWordForm;
                        if (wf == null)
                        {
                            continue;
                        }
                        if (wf.NormalCase == "ГОР")
                        {
                            continue;
                        }
                        if (wf.Class.IsProperName && wf.IsInDictionary)
                        {
                            if (wf.NormalCase == null || !wf.NormalCase.StartsWith("ЛЮБ"))
                            {
                                if (mc0.IsAdjective && t.Morph.ContainsAttr("неизм.", null))
                                {
                                }
                                else if (((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.ReferentCanBeNoun)) == Pullenti.Ner.Core.NounPhraseParseAttr.ReferentCanBeNoun)
                                {
                                }
                                else
                                {
                                    if (items == null || (items.Count < 1))
                                    {
                                        return(null);
                                    }
                                    if (!items[0].IsStdAdjective)
                                    {
                                        return(null);
                                    }
                                }
                            }
                        }
                    }
                }
                if (mc0.IsAdjective && t.Morph.ItemsCount == 1)
                {
                    if (t.Morph[0].ContainsAttr("в.ср.ст.", null))
                    {
                        return(null);
                    }
                }
                Pullenti.Morph.MorphClass mc1 = t.GetMorphClassInDictionary();
                if (mc1 == Pullenti.Morph.MorphClass.Verb && t.Morph.Case.IsUndefined)
                {
                    return(null);
                }
                if ((((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.IgnoreParticiples)) == Pullenti.Ner.Core.NounPhraseParseAttr.IgnoreParticiples && t.Morph.Class.IsVerb && !t.Morph.Class.IsNoun) && !t.Morph.Class.IsProper)
                {
                    foreach (Pullenti.Morph.MorphBaseInfo wf in t.Morph.Items)
                    {
                        if (wf.Class.IsVerb)
                        {
                            if (wf.ContainsAttr("дейст.з.", null))
                            {
                                if (Pullenti.Morph.LanguageHelper.EndsWith((t as Pullenti.Ner.TextToken).Term, "СЯ"))
                                {
                                }
                                else
                                {
                                    return(null);
                                }
                            }
                        }
                    }
                }
            }
            Pullenti.Ner.Token t1 = null;
            for (int k = 0; k < 2; k++)
            {
                t = t1 ?? t0;
                if (k == 0)
                {
                    if (((t0 is Pullenti.Ner.TextToken) && t0.Next != null && t0.Next.IsHiphen) && t0.Next.Next != null)
                    {
                        if (!t0.IsWhitespaceAfter && !t0.Morph.Class.IsPronoun && !(t0.Next.Next is Pullenti.Ner.NumberToken))
                        {
                            if (!t0.Next.IsWhitespaceAfter)
                            {
                                t = t0.Next.Next;
                            }
                            else if (t0.Next.Next.Chars.IsAllLower && Pullenti.Morph.LanguageHelper.EndsWith((t0 as Pullenti.Ner.TextToken).Term, "О"))
                            {
                                t = t0.Next.Next;
                            }
                        }
                    }
                }
                NounPhraseItem it = new NounPhraseItem(t0, t)
                {
                    CanBeSurname = _canBeSurname
                };
                if (t0 == t && (t0 is Pullenti.Ner.ReferentToken))
                {
                    it.CanBeNoun = true;
                    it.Morph     = new Pullenti.Ner.MorphCollection(t0.Morph);
                }
                bool canBePrepos = false;
                foreach (Pullenti.Morph.MorphBaseInfo v in t.Morph.Items)
                {
                    Pullenti.Morph.MorphWordForm wf = v as Pullenti.Morph.MorphWordForm;
                    if (v.Class.IsVerb && !v.Case.IsUndefined)
                    {
                        it.CanBeAdj = true;
                        it.AdjMorph.Add(new NounPhraseItemTextVar(v, t));
                        continue;
                    }
                    if (v.Class.IsPreposition)
                    {
                        canBePrepos = true;
                    }
                    if (v.Class.IsAdjective || ((v.Class.IsPronoun && !v.Class.IsPersonalPronoun && !v.ContainsAttr("неизм.", null))) || ((v.Class.IsNoun && (t is Pullenti.Ner.NumberToken))))
                    {
                        if (TryAccordVariant(items, (items == null ? 0 : items.Count), v, false))
                        {
                            bool isDoub = false;
                            if (v.ContainsAttr("к.ф.", null))
                            {
                                continue;
                            }
                            if (v.ContainsAttr("собир.", null) && !(t is Pullenti.Ner.NumberToken))
                            {
                                if (wf != null && wf.IsInDictionary)
                                {
                                    return(null);
                                }
                                continue;
                            }
                            if (v.ContainsAttr("сравн.", null))
                            {
                                continue;
                            }
                            bool ok = true;
                            if (t is Pullenti.Ner.TextToken)
                            {
                                string s = (t as Pullenti.Ner.TextToken).Term;
                                if (s == "ПРАВО" || s == "ПРАВА")
                                {
                                    ok = false;
                                }
                                else if (Pullenti.Morph.LanguageHelper.EndsWith(s, "ОВ") && t.GetMorphClassInDictionary().IsNoun)
                                {
                                    ok = false;
                                }
                            }
                            else if (t is Pullenti.Ner.NumberToken)
                            {
                                if (v.Class.IsNoun && t.Morph.Class.IsAdjective)
                                {
                                    ok = false;
                                }
                                else if (t.Morph.Class.IsNoun && ((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.ParseNumericAsAdjective)) == Pullenti.Ner.Core.NounPhraseParseAttr.No)
                                {
                                    ok = false;
                                }
                            }
                            if (ok)
                            {
                                it.AdjMorph.Add(new NounPhraseItemTextVar(v, t));
                                it.CanBeAdj = true;
                                if (_isDoubtAdj && t0 == t)
                                {
                                    it.IsDoubtAdjective = true;
                                }
                                if (hasLegalVerb && wf != null && wf.IsInDictionary)
                                {
                                    it.CanBeNoun = true;
                                }
                                if (wf != null && wf.Class.IsPronoun)
                                {
                                    it.CanBeNoun = true;
                                    it.NounMorph.Add(new NounPhraseItemTextVar(v, t));
                                }
                            }
                        }
                    }
                    bool canBeNoun = false;
                    if (t is Pullenti.Ner.NumberToken)
                    {
                    }
                    else if (v.Class.IsNoun || ((wf != null && wf.NormalCase == "САМ")))
                    {
                        canBeNoun = true;
                    }
                    else if (v.Class.IsPersonalPronoun)
                    {
                        if (items == null || items.Count == 0)
                        {
                            canBeNoun = true;
                        }
                        else
                        {
                            foreach (NounPhraseItem it1 in items)
                            {
                                if (it1.IsVerb)
                                {
                                    if (items.Count == 1 && !v.Case.IsNominative)
                                    {
                                        canBeNoun = true;
                                    }
                                    else
                                    {
                                        return(null);
                                    }
                                }
                            }
                            if (items.Count == 1)
                            {
                                if (items[0].CanBeAdjForPersonalPronoun)
                                {
                                    canBeNoun = true;
                                }
                            }
                        }
                    }
                    else if ((v.Class.IsPronoun && ((items == null || items.Count == 0 || ((items.Count == 1 && items[0].CanBeAdjForPersonalPronoun)))) && wf != null) && (((((wf.NormalCase == "ТОТ" || wf.NormalFull == "ТО" || wf.NormalCase == "ТО") || wf.NormalCase == "ЭТО" || wf.NormalCase == "ВСЕ") || wf.NormalCase == "ЧТО" || wf.NormalCase == "КТО") || wf.NormalFull == "КОТОРЫЙ" || wf.NormalCase == "КОТОРЫЙ")))
                    {
                        if (wf.NormalCase == "ВСЕ")
                        {
                            if (t.Next != null && t.Next.IsValue("РАВНО", null))
                            {
                                return(null);
                            }
                        }
                        canBeNoun = true;
                    }
                    else if (wf != null && ((wf.NormalFull ?? wf.NormalCase)) == "КОТОРЫЙ" && ((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.ParsePronouns)) == Pullenti.Ner.Core.NounPhraseParseAttr.No)
                    {
                        return(null);
                    }
                    else if (v.Class.IsProper && (t is Pullenti.Ner.TextToken))
                    {
                        if (t.LengthChar > 4 || v.Class.IsProperName)
                        {
                            canBeNoun = true;
                        }
                    }
                    if (canBeNoun)
                    {
                        bool added = false;
                        if (items != null && items.Count > 1 && ((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.MultiNouns)) != Pullenti.Ner.Core.NounPhraseParseAttr.No)
                        {
                            bool ok1 = true;
                            for (int ii = 1; ii < items.Count; ii++)
                            {
                                if (!items[ii].ConjBefore)
                                {
                                    ok1 = false;
                                    break;
                                }
                            }
                            if (ok1)
                            {
                                if (TryAccordVariant(items, (items == null ? 0 : items.Count), v, true))
                                {
                                    it.NounMorph.Add(new NounPhraseItemTextVar(v, t));
                                    it.CanBeNoun  = true;
                                    it.MultiNouns = true;
                                    added         = true;
                                }
                            }
                        }
                        if (!added)
                        {
                            if (TryAccordVariant(items, (items == null ? 0 : items.Count), v, false))
                            {
                                it.NounMorph.Add(new NounPhraseItemTextVar(v, t));
                                it.CanBeNoun = true;
                                if (v.Class.IsPersonalPronoun && t.Morph.ContainsAttr("неизм.", null) && !it.CanBeAdj)
                                {
                                    NounPhraseItemTextVar itt = new NounPhraseItemTextVar(v, t);
                                    itt.Case   = Pullenti.Morph.MorphCase.AllCases;
                                    itt.Number = Pullenti.Morph.MorphNumber.Undefined;
                                    if (itt.NormalValue == null)
                                    {
                                    }
                                    it.AdjMorph.Add(itt);
                                    it.CanBeAdj = true;
                                }
                            }
                            else if ((items.Count > 0 && items[0].AdjMorph.Count > 0 && items[0].AdjMorph[0].Number == Pullenti.Morph.MorphNumber.Plural) && !((items[0].AdjMorph[0].Case & v.Case)).IsUndefined && !items[0].AdjMorph[0].Class.IsVerb)
                            {
                                if (t.Next != null && t.Next.IsCommaAnd && (t.Next.Next is Pullenti.Ner.TextToken))
                                {
                                    Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next.Next, attrs, 0, null);
                                    if (npt2 != null && npt2.Preposition == null && !((npt2.Morph.Case & v.Case & items[0].AdjMorph[0].Case)).IsUndefined)
                                    {
                                        it.NounMorph.Add(new NounPhraseItemTextVar(v, t));
                                        it.CanBeNoun = true;
                                    }
                                }
                            }
                        }
                    }
                }
                if (t0 != t)
                {
                    foreach (NounPhraseItemTextVar v in it.AdjMorph)
                    {
                        v.CorrectPrefix(t0 as Pullenti.Ner.TextToken, false);
                    }
                    foreach (NounPhraseItemTextVar v in it.NounMorph)
                    {
                        v.CorrectPrefix(t0 as Pullenti.Ner.TextToken, true);
                    }
                }
                if (k == 1 && it.CanBeNoun && !it.CanBeAdj)
                {
                    if (t1 != null)
                    {
                        it.EndToken = t1;
                    }
                    else
                    {
                        it.EndToken = t0.Next.Next;
                    }
                    foreach (NounPhraseItemTextVar v in it.NounMorph)
                    {
                        if (v.NormalValue != null && (v.NormalValue.IndexOf('-') < 0))
                        {
                            v.NormalValue = string.Format("{0}-{1}", v.NormalValue, it.EndToken.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false));
                        }
                    }
                }
                if (it.CanBeAdj)
                {
                    if (m_StdAdjectives.TryParse(it.BeginToken, Pullenti.Ner.Core.TerminParseAttr.No) != null)
                    {
                        it.IsStdAdjective = true;
                    }
                }
                if (canBePrepos && it.CanBeNoun)
                {
                    if (items != null && items.Count > 0)
                    {
                        Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition | Pullenti.Ner.Core.NounPhraseParseAttr.ParsePronouns | Pullenti.Ner.Core.NounPhraseParseAttr.ParseVerbs, 0, null);
                        if (npt1 != null && npt1.EndChar > t.EndChar)
                        {
                            return(null);
                        }
                    }
                    else
                    {
                        Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePronouns | Pullenti.Ner.Core.NounPhraseParseAttr.ParseVerbs, 0, null);
                        if (npt1 != null)
                        {
                            Pullenti.Morph.MorphCase mc = Pullenti.Morph.LanguageHelper.GetCaseAfterPreposition((t as Pullenti.Ner.TextToken).Lemma);
                            if (!((mc & npt1.Morph.Case)).IsUndefined)
                            {
                                return(null);
                            }
                        }
                    }
                }
                if (it.CanBeNoun || it.CanBeAdj || k == 1)
                {
                    if (it.BeginToken.Morph.Class.IsPronoun)
                    {
                        Pullenti.Ner.Token tt2 = it.EndToken.Next;
                        if ((tt2 != null && tt2.IsHiphen && !tt2.IsWhitespaceAfter) && !tt2.IsWhitespaceBefore)
                        {
                            tt2 = tt2.Next;
                        }
                        if (tt2 is Pullenti.Ner.TextToken)
                        {
                            string ss = (tt2 as Pullenti.Ner.TextToken).Term;
                            if ((ss == "ЖЕ" || ss == "БЫ" || ss == "ЛИ") || ss == "Ж")
                            {
                                it.EndToken = tt2;
                            }
                            else if (ss == "НИБУДЬ" || ss == "ЛИБО" || (((ss == "ТО" && tt2.Previous.IsHiphen)) && it.CanBeAdj))
                            {
                                it.EndToken = tt2;
                                foreach (NounPhraseItemTextVar m in it.AdjMorph)
                                {
                                    m.NormalValue = string.Format("{0}-{1}", m.NormalValue, ss);
                                    if (m.SingleNumberValue != null)
                                    {
                                        m.SingleNumberValue = string.Format("{0}-{1}", m.SingleNumberValue, ss);
                                    }
                                }
                            }
                        }
                    }
                    return(it);
                }
                if (t0 == t)
                {
                    if (t0.IsValue("БИЗНЕС", null) && t0.Next != null && t0.Next.Chars == t0.Chars)
                    {
                        t1 = t0.Next;
                        continue;
                    }
                    return(it);
                }
            }
            return(null);
        }
Пример #2
0
        public static PhoneItemToken TryAttach(Pullenti.Ner.Token t0)
        {
            PhoneItemToken res = _TryAttach(t0);

            if (res == null)
            {
                return(null);
            }
            if (res.ItemType != PhoneItemType.Prefix)
            {
                return(res);
            }
            for (Pullenti.Ner.Token t = res.EndToken.Next; t != null; t = t.Next)
            {
                if (t.IsTableControlChar)
                {
                    break;
                }
                if (t.IsNewlineBefore)
                {
                    break;
                }
                PhoneItemToken res2 = _TryAttach(t);
                if (res2 != null)
                {
                    if (res2.ItemType == PhoneItemType.Prefix)
                    {
                        if (res.Kind == Pullenti.Ner.Phone.PhoneKind.Undefined)
                        {
                            res.Kind = res2.Kind;
                        }
                        t = (res.EndToken = res2.EndToken);
                        continue;
                    }
                    break;
                }
                if (t.IsChar(':'))
                {
                    res.EndToken = t;
                    break;
                }
                if (!(t is Pullenti.Ner.TextToken))
                {
                    break;
                }
                if (t0.LengthChar == 1)
                {
                    break;
                }
                Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                if (npt != null)
                {
                    t = npt.EndToken;
                    if (t.IsValue("ПОСЕЛЕНИЕ", null))
                    {
                        return(null);
                    }
                    res.EndToken = t;
                    continue;
                }
                if (t.GetMorphClassInDictionary().IsProper)
                {
                    res.EndToken = t;
                    continue;
                }
                if (t.Morph.Class.IsPreposition)
                {
                    continue;
                }
                break;
            }
            return(res);
        }
Пример #3
0
        static OrgItemNameToken _TryAttach(Pullenti.Ner.Token t, OrgItemNameToken prev, bool extOnto)
        {
            if (t == null)
            {
                return(null);
            }
            Pullenti.Ner.Referent r = t.GetReferent();
            if (r != null)
            {
                if (r.TypeName == "DENOMINATION")
                {
                    return new OrgItemNameToken(t, t)
                           {
                               Value = r.ToString(true, t.Kit.BaseLanguage, 0), IsDenomination = true
                           }
                }
                ;
                if ((r is Pullenti.Ner.Geo.GeoReferent) && t.Chars.IsLatinLetter)
                {
                    OrgItemNameToken res2 = _TryAttach(t.Next, prev, extOnto);

                    if (res2 != null && res2.Chars.IsLatinLetter)
                    {
                        res2.BeginToken     = t;
                        res2.Value          = string.Format("{0} {1}", Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(t as Pullenti.Ner.MetaToken, Pullenti.Ner.Core.GetTextAttr.No), res2.Value);
                        res2.IsInDictionary = false;
                        return(res2);
                    }
                }
                return(null);
            }
            Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
            if (tt == null)
            {
                return(null);
            }
            OrgItemNameToken res = null;

            Pullenti.Ner.Core.TerminToken tok = m_StdTails.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
            if (tok == null && t.IsChar(','))
            {
                tok = m_StdTails.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No);
            }
            if (tok != null)
            {
                return new OrgItemNameToken(t, tok.EndToken)
                       {
                           Value = tok.Termin.CanonicText, IsStdTail = tok.Termin.Tag == null, IsEmptyWord = tok.Termin.Tag != null, Morph = tok.Morph
                       }
            }
            ;
            if ((((tok = m_StdNames.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No)))) != null)
            {
                return new OrgItemNameToken(t, tok.EndToken)
                       {
                           Value = tok.Termin.CanonicText, IsStdName = true
                       }
            }
            ;
            OrgItemEngItem eng = OrgItemEngItem.TryAttach(t, false);

            if (eng == null && t.IsChar(','))
            {
                eng = OrgItemEngItem.TryAttach(t.Next, false);
            }
            if (eng != null)
            {
                return new OrgItemNameToken(t, eng.EndToken)
                       {
                           Value = eng.FullValue, IsStdTail = true
                       }
            }
            ;
            if (tt.Chars.IsAllLower && prev != null)
            {
                if (!prev.Chars.IsAllLower && !prev.Chars.IsCapitalUpper)
                {
                    return(null);
                }
            }
            if (tt.IsChar(',') && prev != null)
            {
                Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);

                if (npt1 == null || npt1.Chars != prev.Chars || ((npt1.Morph.Case & prev.Morph.Case)).IsUndefined)
                {
                    return(null);
                }
                OrgItemTypeToken ty = OrgItemTypeToken.TryAttach(t.Next, false, null);
                if (ty != null)
                {
                    return(null);
                }
                if (npt1.EndToken.Next == null || !npt1.EndToken.Next.IsValue("И", null))
                {
                    return(null);
                }
                Pullenti.Ner.Token t1 = npt1.EndToken.Next;
                Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t1.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                if (npt2 == null || npt2.Chars != prev.Chars || ((npt2.Morph.Case & npt1.Morph.Case & prev.Morph.Case)).IsUndefined)
                {
                    return(null);
                }
                ty = OrgItemTypeToken.TryAttach(t1.Next, false, null);
                if (ty != null)
                {
                    return(null);
                }
                res = new OrgItemNameToken(npt1.BeginToken, npt1.EndToken)
                {
                    Morph = npt1.Morph, Value = npt1.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false)
                };
                res.IsNounPhrase       = true;
                res.IsAfterConjunction = true;
                if (prev.Preposition != null)
                {
                    res.Preposition = prev.Preposition;
                }
                return(res);
            }
            if (((tt.IsChar('&') || tt.IsValue("AND", null) || tt.IsValue("UND", null))) && prev != null)
            {
                if ((tt.Next is Pullenti.Ner.TextToken) && tt.LengthChar == 1 && tt.Next.Chars.IsLatinLetter)
                {
                    res = new OrgItemNameToken(tt, tt.Next)
                    {
                        Chars = tt.Next.Chars
                    };
                    res.IsAfterConjunction = true;
                    res.Value = "& " + (tt.Next as Pullenti.Ner.TextToken).Term;
                    return(res);
                }
                res = OrgItemNameToken.TryAttach(tt.Next, null, extOnto, false);
                if (res == null || res.Chars != prev.Chars)
                {
                    return(null);
                }
                res.IsAfterConjunction = true;
                res.Value = "& " + res.Value;
                return(res);
            }
            if (!tt.Chars.IsLetter)
            {
                return(null);
            }
            List <Pullenti.Semantic.Utils.DerivateGroup> expinf = null;

            if (prev != null && prev.EndToken.GetMorphClassInDictionary().IsNoun)
            {
                string wo = prev.EndToken.GetNormalCaseText(Pullenti.Morph.MorphClass.Noun, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false);
                expinf = Pullenti.Semantic.Utils.DerivateService.FindDerivates(wo, true, prev.EndToken.Morph.Language);
            }
            Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
            if (npt != null && npt.InternalNoun != null)
            {
                npt = null;
            }
            bool explOk = false;

            if (npt != null && prev != null && prev.EndToken.GetMorphClassInDictionary().IsNoun)
            {
                Pullenti.Ner.Core.NounPhraseToken npt0 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(prev.EndToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                if (npt0 != null)
                {
                    List <Pullenti.Semantic.Core.SemanticLink> links = Pullenti.Semantic.Core.SemanticHelper.TryCreateLinks(npt0, npt, null);
                    if (links.Count > 0)
                    {
                        explOk = true;
                    }
                }
            }
            if (npt != null && ((explOk || npt.Morph.Case.IsGenitive || ((prev != null && !((prev.Morph.Case & npt.Morph.Case)).IsUndefined)))))
            {
                Pullenti.Morph.MorphClass mc = npt.BeginToken.GetMorphClassInDictionary();
                if (mc.IsVerb || mc.IsPronoun)
                {
                    return(null);
                }
                if (mc.IsAdverb)
                {
                    if (npt.BeginToken.Next != null && npt.BeginToken.Next.IsHiphen)
                    {
                    }
                    else
                    {
                        return(null);
                    }
                }
                if (mc.IsPreposition)
                {
                    return(null);
                }
                if (mc.IsNoun && npt.Chars.IsAllLower)
                {
                    Pullenti.Morph.MorphCase ca = npt.Morph.Case;
                    if ((!ca.IsDative && !ca.IsGenitive && !ca.IsInstrumental) && !ca.IsPrepositional)
                    {
                        return(null);
                    }
                }
                res = new OrgItemNameToken(npt.BeginToken, npt.EndToken)
                {
                    Morph = npt.Morph, Value = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false)
                };
                res.IsNounPhrase = true;
                if ((npt.EndToken.WhitespacesAfterCount < 2) && (npt.EndToken.Next is Pullenti.Ner.TextToken))
                {
                    Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(npt.EndToken.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                    if (npt2 != null && npt2.Morph.Case.IsGenitive && npt2.Chars.IsAllLower)
                    {
                        OrgItemTypeToken           typ = OrgItemTypeToken.TryAttach(npt.EndToken.Next, true, null);
                        OrgItemEponymToken         epo = OrgItemEponymToken.TryAttach(npt.EndToken.Next, false);
                        Pullenti.Ner.ReferentToken rtt = t.Kit.ProcessReferent("PERSONPROPERTY", npt.EndToken.Next);
                        if (typ == null && epo == null && ((rtt == null || rtt.Morph.Number == Pullenti.Morph.MorphNumber.Plural)))
                        {
                            res.EndToken = npt2.EndToken;
                            res.Value    = string.Format("{0} {1}", res.Value, Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(npt2, Pullenti.Ner.Core.GetTextAttr.No));
                        }
                    }
                    else if (npt.EndToken.Next.IsComma && (npt.EndToken.Next.Next is Pullenti.Ner.TextToken))
                    {
                        Pullenti.Ner.Token        tt2 = npt.EndToken.Next.Next;
                        Pullenti.Morph.MorphClass mv2 = tt2.GetMorphClassInDictionary();
                        if (mv2.IsAdjective && mv2.IsVerb)
                        {
                            Pullenti.Morph.MorphBaseInfo bi = new Pullenti.Morph.MorphBaseInfo()
                            {
                                Case = npt.Morph.Case, Gender = npt.Morph.Gender, Number = npt.Morph.Number
                            };
                            if (tt2.Morph.CheckAccord(bi, false, false))
                            {
                                npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt2.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                                if (npt2 != null && ((npt2.Morph.Case.IsDative || npt2.Morph.Case.IsGenitive)) && npt2.Chars.IsAllLower)
                                {
                                    res.EndToken = npt2.EndToken;
                                    res.Value    = string.Format("{0} {1}", res.Value, Pullenti.Ner.Core.MiscHelper.GetTextValue(npt.EndToken.Next, res.EndToken, Pullenti.Ner.Core.GetTextAttr.No));
                                }
                            }
                        }
                    }
                }
                if (explOk)
                {
                    res.IsAfterConjunction = true;
                }
            }
            else if (npt != null && ((((prev != null && prev.IsNounPhrase && npt.Morph.Case.IsInstrumental)) || extOnto)))
            {
                res = new OrgItemNameToken(npt.BeginToken, npt.EndToken)
                {
                    Morph = npt.Morph, Value = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false)
                };
                res.IsNounPhrase = true;
            }
            else if (tt.IsAnd)
            {
                res = TryAttach(tt.Next, prev, extOnto, false);
                if (res == null || !res.IsNounPhrase || prev == null)
                {
                    return(null);
                }
                if (((prev.Morph.Case & res.Morph.Case)).IsUndefined)
                {
                    return(null);
                }
                if (prev.Morph.Number != Pullenti.Morph.MorphNumber.Undefined && res.Morph.Number != Pullenti.Morph.MorphNumber.Undefined)
                {
                    if (((prev.Morph.Number & res.Morph.Number)) == Pullenti.Morph.MorphNumber.Undefined)
                    {
                        if (prev.Chars != res.Chars)
                        {
                            return(null);
                        }
                        OrgItemTypeToken ty = OrgItemTypeToken.TryAttach(res.EndToken.Next, false, null);
                        if (ty != null)
                        {
                            return(null);
                        }
                    }
                }
                Pullenti.Morph.CharsInfo ci = res.Chars;
                res.Chars = ci;
                res.IsAfterConjunction = true;
                return(res);
            }
            else if (((tt.Term == "ПО" || tt.Term == "ПРИ" || tt.Term == "ЗА") || tt.Term == "С" || tt.Term == "В") || tt.Term == "НА")
            {
                npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                if (npt != null)
                {
                    if (m_VervotWords.TryParse(npt.EndToken, Pullenti.Ner.Core.TerminParseAttr.No) != null)
                    {
                        return(null);
                    }
                    bool ok = false;
                    if (tt.Term == "ПО")
                    {
                        ok = npt.Morph.Case.IsDative;
                    }
                    else if (tt.Term == "С")
                    {
                        ok = npt.Morph.Case.IsInstrumental;
                    }
                    else if (tt.Term == "ЗА")
                    {
                        ok = npt.Morph.Case.IsGenitive | npt.Morph.Case.IsInstrumental;
                    }
                    else if (tt.Term == "НА")
                    {
                        ok = npt.Morph.Case.IsPrepositional;
                    }
                    else if (tt.Term == "В")
                    {
                        ok = npt.Morph.Case.IsDative | npt.Morph.Case.IsPrepositional;
                        if (ok)
                        {
                            ok = false;
                            if (t.Next.IsValue("СФЕРА", null) || t.Next.IsValue("ОБЛАСТЬ", null))
                            {
                                ok = true;
                            }
                        }
                    }
                    else if (tt.Term == "ПРИ")
                    {
                        ok = npt.Morph.Case.IsPrepositional;
                        if (ok)
                        {
                            if (OrgItemTypeToken.TryAttach(tt.Next, true, null) != null)
                            {
                                ok = false;
                            }
                            else
                            {
                                Pullenti.Ner.ReferentToken rt = tt.Kit.ProcessReferent(Pullenti.Ner.Org.OrganizationAnalyzer.ANALYZER_NAME, tt.Next);
                                if (rt != null)
                                {
                                    ok = false;
                                }
                            }
                        }
                        string s = npt.Noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false);
                        if (s == "ПОДДЕРЖКА" || s == "УЧАСТИЕ")
                        {
                            ok = false;
                        }
                    }
                    else
                    {
                        ok = npt.Morph.Case.IsPrepositional;
                    }
                    if (ok)
                    {
                        res = new OrgItemNameToken(t, npt.EndToken)
                        {
                            Morph = npt.Morph, Value = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false), Chars = npt.Chars
                        };
                        res.IsNounPhrase = true;
                        res.Preposition  = tt.Term;
                        if (((res.Value == "ДЕЛО" || res.Value == "ВОПРОС")) && !res.IsNewlineAfter)
                        {
                            OrgItemNameToken res2 = _TryAttach(res.EndToken.Next, res, extOnto);
                            if (res2 != null && res2.Morph.Case.IsGenitive)
                            {
                                res.Value    = string.Format("{0} {1}", res.Value, res2.Value);
                                res.EndToken = res2.EndToken;
                                for (Pullenti.Ner.Token ttt = res2.EndToken.Next; ttt != null; ttt = ttt.Next)
                                {
                                    if (!ttt.IsCommaAnd)
                                    {
                                        break;
                                    }
                                    OrgItemNameToken res3 = _TryAttach(ttt.Next, res2, extOnto);
                                    if (res3 == null)
                                    {
                                        break;
                                    }
                                    res.Value    = string.Format("{0} {1}", res.Value, res3.Value);
                                    res.EndToken = res3.EndToken;
                                    if (ttt.IsAnd)
                                    {
                                        break;
                                    }
                                    ttt = res.EndToken;
                                }
                            }
                        }
                    }
                }
                if (res == null)
                {
                    return(null);
                }
            }
            else if (tt.Term == "OF")
            {
                Pullenti.Ner.Token t1 = tt.Next;
                if (t1 != null && Pullenti.Ner.Core.MiscHelper.IsEngArticle(t1))
                {
                    t1 = t1.Next;
                }
                if (t1 != null && t1.Chars.IsLatinLetter && !t1.Chars.IsAllLower)
                {
                    res = new OrgItemNameToken(t, t1)
                    {
                        Chars = t1.Chars, Morph = t1.Morph
                    };
                    for (Pullenti.Ner.Token ttt = t1.Next; ttt != null; ttt = ttt.Next)
                    {
                        if (ttt.WhitespacesBeforeCount > 2)
                        {
                            break;
                        }
                        if (Pullenti.Ner.Core.MiscHelper.IsEngAdjSuffix(ttt))
                        {
                            ttt = ttt.Next;
                            continue;
                        }
                        if (!ttt.Chars.IsLatinLetter)
                        {
                            break;
                        }
                        if (ttt.Morph.Class.IsPreposition)
                        {
                            break;
                        }
                        t1 = (res.EndToken = ttt);
                    }
                    res.Value       = Pullenti.Ner.Core.MiscHelper.GetTextValue(t, t1, Pullenti.Ner.Core.GetTextAttr.IgnoreArticles);
                    res.Preposition = tt.Term;
                    return(res);
                }
            }
            if (res == null)
            {
                if (tt.Chars.IsLatinLetter && tt.LengthChar == 1)
                {
                }
                else if (tt.Chars.IsAllLower || (tt.LengthChar < 2))
                {
                    if (!tt.Chars.IsLatinLetter || prev == null || !prev.Chars.IsLatinLetter)
                    {
                        return(null);
                    }
                }
                if (tt.Chars.IsCyrillicLetter)
                {
                    Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary();
                    if (mc.IsVerb || mc.IsAdverb)
                    {
                        return(null);
                    }
                }
                else if (tt.Chars.IsLatinLetter && !tt.IsWhitespaceAfter)
                {
                    if (!tt.IsWhitespaceAfter && (tt.LengthChar < 5))
                    {
                        if (tt.Next is Pullenti.Ner.NumberToken)
                        {
                            return(null);
                        }
                    }
                }
                res = new OrgItemNameToken(tt, tt)
                {
                    Value = tt.Term, Morph = tt.Morph
                };
                for (t = tt.Next; t != null; t = t.Next)
                {
                    if ((((t.IsHiphen || t.IsCharOf("\\/"))) && t.Next != null && (t.Next is Pullenti.Ner.TextToken)) && !t.IsWhitespaceBefore && !t.IsWhitespaceAfter)
                    {
                        t            = t.Next;
                        res.EndToken = t;
                        res.Value    = string.Format("{0}{1}{2}", res.Value, (t.Previous.IsChar('.') ? '.' : '-'), (t as Pullenti.Ner.TextToken).Term);
                    }
                    else if (t.IsChar('.'))
                    {
                        if (!t.IsWhitespaceAfter && !t.IsWhitespaceBefore && (t.Next is Pullenti.Ner.TextToken))
                        {
                            res.EndToken = t.Next;
                            t            = t.Next;
                            res.Value    = string.Format("{0}.{1}", res.Value, (t as Pullenti.Ner.TextToken).Term);
                        }
                        else if ((t.Next != null && !t.IsNewlineAfter && t.Next.Chars.IsLatinLetter) && tt.Chars.IsLatinLetter)
                        {
                            res.EndToken = t;
                        }
                        else
                        {
                            break;
                        }
                    }
                    else
                    {
                        break;
                    }
                }
            }
            for (Pullenti.Ner.Token t0 = res.BeginToken; t0 != null; t0 = t0.Next)
            {
                if ((((tt = t0 as Pullenti.Ner.TextToken))) != null && tt.IsLetters)
                {
                    if (!tt.Morph.Class.IsConjunction && !tt.Morph.Class.IsPreposition)
                    {
                        foreach (Pullenti.Morph.MorphBaseInfo mf in tt.Morph.Items)
                        {
                            if ((mf as Pullenti.Morph.MorphWordForm).IsInDictionary)
                            {
                                res.IsInDictionary = true;
                            }
                        }
                    }
                }
                if (t0 == res.EndToken)
                {
                    break;
                }
            }
            if (res.BeginToken == res.EndToken && res.BeginToken.Chars.IsAllUpper)
            {
                if (res.EndToken.Next != null && !res.EndToken.IsWhitespaceAfter)
                {
                    Pullenti.Ner.Token t1 = res.EndToken.Next;
                    if (t1.Next != null && !t1.IsWhitespaceAfter && t1.IsHiphen)
                    {
                        t1 = t1.Next;
                    }
                    if (t1 is Pullenti.Ner.NumberToken)
                    {
                        res.Value   += (t1 as Pullenti.Ner.NumberToken).Value;
                        res.EndToken = t1;
                    }
                }
            }
            if (res.BeginToken == res.EndToken && res.BeginToken.Chars.IsLastLower)
            {
                string src = res.BeginToken.GetSourceText();
                for (int i = src.Length - 1; i >= 0; i--)
                {
                    if (char.IsUpper(src[i]))
                    {
                        res.Value = src.Substring(0, i + 1);
                        break;
                    }
                }
            }
            return(res);
        }
Пример #4
0
        public static OrgItemNameToken TryAttach(Pullenti.Ner.Token t, OrgItemNameToken prev, bool extOnto, bool first)
        {
            if (t == null)
            {
                return(null);
            }
            if (t.IsValue("ОРДЕНА", null) && t.Next != null)
            {
                Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                if (npt != null)
                {
                    Pullenti.Ner.Token t1 = npt.EndToken;
                    if (((t1.IsValue("ЗНАК", null) || t1.IsValue("ДРУЖБА", null))) && (t1.WhitespacesAfterCount < 2))
                    {
                        npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t1.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                        if (npt != null)
                        {
                            t1 = npt.EndToken;
                        }
                    }
                    return(new OrgItemNameToken(t, t1)
                    {
                        IsIgnoredPart = true
                    });
                }
                if (t.Next.GetMorphClassInDictionary().IsProperSurname)
                {
                    return new OrgItemNameToken(t, t.Next)
                           {
                               IsIgnoredPart = true
                           }
                }
                ;
                Pullenti.Ner.ReferentToken ppp = t.Kit.ProcessReferent("PERSON", t.Next);
                if (ppp != null)
                {
                    return new OrgItemNameToken(t, ppp.EndToken)
                           {
                               IsIgnoredPart = true
                           }
                }
                ;
                if ((t.WhitespacesAfterCount < 2) && Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(t.Next, true, false))
                {
                    Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t.Next, Pullenti.Ner.Core.BracketParseAttr.NearCloseBracket, 10);

                    if (br != null && (br.LengthChar < 40))
                    {
                        return new OrgItemNameToken(t, br.EndToken)
                               {
                                   IsIgnoredPart = true
                               }
                    }
                    ;
                }
            }
            if (first && t.Chars.IsCyrillicLetter && t.Morph.Class.IsPreposition)
            {
                if (!t.IsValue("ПО", null) && !t.IsValue("ПРИ", null))
                {
                    return(null);
                }
            }
            OrgItemNameToken res = _TryAttach(t, prev, extOnto);

            if (res == null)
            {
                if (extOnto)
                {
                    if ((t.GetReferent() is Pullenti.Ner.Geo.GeoReferent) || (((t is Pullenti.Ner.TextToken) && !t.IsChar(';'))))
                    {
                        return new OrgItemNameToken(t, t)
                               {
                                   Value = t.GetSourceText()
                               }
                    }
                    ;
                }
                return(null);
            }
            if (prev == null && !extOnto)
            {
                if (t.Kit.Ontology != null)
                {
                    Pullenti.Ner.Org.OrganizationAnalyzer.OrgAnalyzerData ad = t.Kit.Ontology._getAnalyzerData(Pullenti.Ner.Org.OrganizationAnalyzer.ANALYZER_NAME) as Pullenti.Ner.Org.OrganizationAnalyzer.OrgAnalyzerData;
                    if (ad != null)
                    {
                        Pullenti.Ner.Core.TerminToken tok = ad.OrgPureNames.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);

                        if (tok != null && tok.EndChar > res.EndChar)
                        {
                            res.EndToken = tok.EndToken;
                        }
                    }
                }
            }
            if (prev != null && !extOnto)
            {
                if ((prev.Chars.IsAllLower && !res.Chars.IsAllLower && !res.IsStdTail) && !res.IsStdName)
                {
                    if (prev.Chars.IsLatinLetter && res.Chars.IsLatinLetter)
                    {
                    }
                    else if (m_StdNouns.TryParse(res.BeginToken, Pullenti.Ner.Core.TerminParseAttr.No) != null)
                    {
                    }
                    else
                    {
                        return(null);
                    }
                }
            }
            if ((res.EndToken.Next != null && !res.EndToken.IsWhitespaceAfter && res.EndToken.Next.IsHiphen) && !res.EndToken.Next.IsWhitespaceAfter)
            {
                Pullenti.Ner.TextToken tt = res.EndToken.Next.Next as Pullenti.Ner.TextToken;
                if (tt != null)
                {
                    if (tt.Chars == res.Chars || tt.Chars.IsAllUpper)
                    {
                        res.EndToken = tt;
                        res.Value    = string.Format("{0}-{1}", res.Value, tt.Term);
                    }
                }
            }
            if ((res.EndToken.Next != null && res.EndToken.Next.IsAnd && res.EndToken.WhitespacesAfterCount == 1) && res.EndToken.Next.WhitespacesAfterCount == 1)
            {
                OrgItemNameToken res1 = _TryAttach(res.EndToken.Next.Next, prev, extOnto);
                if (res1 != null && res1.Chars == res.Chars && OrgItemTypeToken.TryAttach(res.EndToken.Next.Next, false, null) == null)
                {
                    if (!((res1.Morph.Case & res.Morph.Case)).IsUndefined)
                    {
                        res.EndToken = res1.EndToken;
                        res.Value    = string.Format("{0} {1} {2}", res.Value, (res.Kit.BaseLanguage.IsUa ? "ТА" : "И"), res1.Value);
                    }
                }
            }
            for (Pullenti.Ner.Token tt = res.BeginToken; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next)
            {
                if (m_StdNouns.TryParse(tt, Pullenti.Ner.Core.TerminParseAttr.No) != null)
                {
                    res.StdOrgNameNouns++;
                }
            }
            if (m_StdNouns.TryParse(res.EndToken, Pullenti.Ner.Core.TerminParseAttr.No) != null)
            {
                int  cou = 1;
                bool non = false;
                Pullenti.Ner.Token et = res.EndToken;
                if (!_isNotTermNoun(res.EndToken))
                {
                    non = true;
                }
                bool br = false;
                for (Pullenti.Ner.Token tt = res.EndToken.Next; tt != null; tt = tt.Next)
                {
                    if (tt.IsTableControlChar)
                    {
                        break;
                    }
                    if (tt.IsChar('('))
                    {
                        if (!non)
                        {
                            break;
                        }
                        br = true;
                        continue;
                    }
                    if (tt.IsChar(')'))
                    {
                        br = false;
                        et = tt;
                        break;
                    }
                    if (!(tt is Pullenti.Ner.TextToken))
                    {
                        break;
                    }
                    if (tt.WhitespacesBeforeCount > 1)
                    {
                        if (tt.NewlinesBeforeCount > 1)
                        {
                            break;
                        }
                        if (tt.Chars != res.EndToken.Chars)
                        {
                            break;
                        }
                    }
                    if (tt.Morph.Class.IsPreposition || tt.IsCommaAnd)
                    {
                        continue;
                    }
                    Pullenti.Morph.MorphClass dd = tt.GetMorphClassInDictionary();
                    if (!dd.IsNoun && !dd.IsAdjective)
                    {
                        break;
                    }
                    Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                    if (npt2 == null)
                    {
                        if (dd == Pullenti.Morph.MorphClass.Adjective)
                        {
                            continue;
                        }
                        break;
                    }
                    if (m_StdNouns.TryParse(npt2.EndToken, Pullenti.Ner.Core.TerminParseAttr.No) == null)
                    {
                        break;
                    }
                    if (npt2.EndToken.Chars != res.EndToken.Chars)
                    {
                        break;
                    }
                    if ((npt2.EndToken.IsValue("УПРАВЛЕНИЕ", null) || npt2.EndToken.IsValue("ИНСТИТУТ", null) || npt2.EndToken.IsValue("УПРАВЛІННЯ", null)) || npt2.EndToken.IsValue("ІНСТИТУТ", null) || tt.Previous.IsValue("ПРИ", null))
                    {
                        Pullenti.Ner.ReferentToken rt = tt.Kit.ProcessReferent(Pullenti.Ner.Org.OrganizationAnalyzer.ANALYZER_NAME, tt);
                        if (rt != null)
                        {
                            break;
                        }
                    }
                    cou++;
                    tt = npt2.EndToken;
                    if (!_isNotTermNoun(tt))
                    {
                        non = true;
                        et  = tt;
                    }
                }
                if (non && !br)
                {
                    res.StdOrgNameNouns += cou;
                    res.EndToken         = et;
                }
            }
            return(res);
        }
Пример #5
0
        static Pullenti.Ner.ReferentToken _tryNameExist(List <CityItemToken> li, out Pullenti.Ner.Core.IntOntologyItem oi, bool always)
        {
            oi = null;
            if (li == null || li[0].Typ != CityItemToken.ItemType.City)
            {
                return(null);
            }
            oi = li[0].OntoItem;
            Pullenti.Ner.TextToken tt = li[0].BeginToken as Pullenti.Ner.TextToken;
            if (tt == null)
            {
                return(null);
            }
            bool   ok  = false;
            string nam = (oi == null ? li[0].Value : oi.CanonicText);

            if (nam == null)
            {
                return(null);
            }
            if (nam == "РИМ")
            {
                if (tt.Term == "РИМ")
                {
                    if ((tt.Next is Pullenti.Ner.TextToken) && tt.Next.GetMorphClassInDictionary().IsProperSecname)
                    {
                    }
                    else
                    {
                        ok = true;
                    }
                }
                else if (tt.Previous != null && tt.Previous.IsValue("В", null) && tt.Term == "РИМЕ")
                {
                    ok = true;
                }
            }
            else if (oi != null && oi.Referent != null && oi.Owner.IsExtOntology)
            {
                ok = true;
            }
            else if (nam.EndsWith("ГРАД") || nam.EndsWith("СК"))
            {
                ok = true;
            }
            else if (nam.EndsWith("TOWN") || nam.StartsWith("SAN"))
            {
                ok = true;
            }
            else if (li[0].Chars.IsLatinLetter && li[0].BeginToken.Previous != null && ((li[0].BeginToken.Previous.IsValue("IN", null) || li[0].BeginToken.Previous.IsValue("FROM", null))))
            {
                ok = true;
            }
            else
            {
                for (Pullenti.Ner.Token tt2 = li[0].EndToken.Next; tt2 != null; tt2 = tt2.Next)
                {
                    if (tt2.IsNewlineBefore)
                    {
                        break;
                    }
                    if ((tt2.IsCharOf(",(") || tt2.Morph.Class.IsPreposition || tt2.Morph.Class.IsConjunction) || tt2.Morph.Class.IsMisc)
                    {
                        continue;
                    }
                    if ((tt2.GetReferent() is Pullenti.Ner.Geo.GeoReferent) && tt2.Chars.IsCyrillicLetter == li[0].Chars.IsCyrillicLetter)
                    {
                        ok = true;
                    }
                    break;
                }
                if (!ok)
                {
                    for (Pullenti.Ner.Token tt2 = li[0].BeginToken.Previous; tt2 != null; tt2 = tt2.Previous)
                    {
                        if (tt2.IsNewlineAfter)
                        {
                            break;
                        }
                        if ((tt2.IsCharOf(",)") || tt2.Morph.Class.IsPreposition || tt2.Morph.Class.IsConjunction) || tt2.Morph.Class.IsMisc)
                        {
                            continue;
                        }
                        if ((tt2.GetReferent() is Pullenti.Ner.Geo.GeoReferent) && tt2.Chars.IsCyrillicLetter == li[0].Chars.IsCyrillicLetter)
                        {
                            ok = true;
                        }
                        if (ok)
                        {
                            List <Pullenti.Ner.Address.Internal.StreetItemToken> sits = Pullenti.Ner.Address.Internal.StreetItemToken.TryParseList(li[0].BeginToken, null, 10);
                            if (sits != null && sits.Count > 1)
                            {
                                Pullenti.Ner.Address.Internal.AddressItemToken ss = Pullenti.Ner.Address.Internal.StreetDefineHelper.TryParseStreet(sits, false, false);
                                if (ss != null)
                                {
                                    sits.RemoveAt(0);
                                    if (Pullenti.Ner.Address.Internal.StreetDefineHelper.TryParseStreet(sits, false, false) == null)
                                    {
                                        ok = false;
                                    }
                                }
                            }
                        }
                        if (ok)
                        {
                            if (li.Count > 1 && li[1].Typ == CityItemToken.ItemType.ProperName && (li[1].WhitespacesBeforeCount < 3))
                            {
                                ok = false;
                            }
                            else
                            {
                                Pullenti.Morph.MorphClass mc = li[0].BeginToken.GetMorphClassInDictionary();
                                if (mc.IsProperName || mc.IsProperSurname || mc.IsAdjective)
                                {
                                    ok = false;
                                }
                                else
                                {
                                    Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(li[0].BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                                    if (npt != null && npt.EndChar > li[0].EndChar)
                                    {
                                        ok = false;
                                    }
                                }
                            }
                        }
                        if (Pullenti.Ner.Address.Internal.AddressItemToken.TryAttachOrg(li[0].BeginToken) != null)
                        {
                            ok = false;
                            break;
                        }
                        break;
                    }
                }
            }
            if (always)
            {
                if (li[0].WhitespacesBeforeCount > 3 && li[0].Doubtful && li[0].BeginToken.GetMorphClassInDictionary().IsProperSurname)
                {
                    Pullenti.Ner.ReferentToken pp = li[0].Kit.ProcessReferent("PERSON", li[0].BeginToken);
                    if (pp != null)
                    {
                        always = false;
                    }
                }
            }
            if (li[0].BeginToken.Chars.IsLatinLetter && li[0].BeginToken == li[0].EndToken)
            {
                Pullenti.Ner.Token tt1 = li[0].EndToken.Next;
                if (tt1 != null && tt1.IsChar(','))
                {
                    tt1 = tt1.Next;
                }
                if (((tt1 is Pullenti.Ner.TextToken) && tt1.Chars.IsLatinLetter && (tt1.LengthChar < 3)) && !tt1.Chars.IsAllLower)
                {
                    ok = false;
                }
            }
            if (!ok && !always)
            {
                return(null);
            }
            Pullenti.Ner.Geo.GeoReferent city = null;
            if (oi != null && (oi.Referent is Pullenti.Ner.Geo.GeoReferent) && !oi.Owner.IsExtOntology)
            {
                city = oi.Referent.Clone() as Pullenti.Ner.Geo.GeoReferent;
                city.Occurrence.Clear();
            }
            else
            {
                city = new Pullenti.Ner.Geo.GeoReferent();
                city.AddName(nam);
                if (oi != null && (oi.Referent is Pullenti.Ner.Geo.GeoReferent))
                {
                    city.MergeSlots2(oi.Referent as Pullenti.Ner.Geo.GeoReferent, li[0].Kit.BaseLanguage);
                }
                if (!city.IsCity)
                {
                    city.AddTypCity(li[0].Kit.BaseLanguage);
                }
            }
            return(new Pullenti.Ner.ReferentToken(city, li[0].BeginToken, li[0].EndToken)
            {
                Morph = li[0].Morph
            });
        }
Пример #6
0
        static Pullenti.Ner.ReferentToken _tryNounName(List <CityItemToken> li, out Pullenti.Ner.Core.IntOntologyItem oi, bool always)
        {
            oi = null;
            if (li == null || (li.Count < 2) || ((li[0].Typ != CityItemToken.ItemType.Noun && li[0].Typ != CityItemToken.ItemType.Misc)))
            {
                return(null);
            }
            bool ok = !li[0].Doubtful;

            if (ok && li[0].Typ == CityItemToken.ItemType.Misc)
            {
                ok = false;
            }
            string typ     = (li[0].Typ == CityItemToken.ItemType.Misc ? null : li[0].Value);
            string typ2    = (li[0].Typ == CityItemToken.ItemType.Misc ? null : li[0].AltValue);
            string probAdj = null;
            int    i1      = 1;

            Pullenti.Ner.Referent org = null;
            if ((typ != null && li[i1].Typ == CityItemToken.ItemType.Noun && ((i1 + 1) < li.Count)) && li[0].WhitespacesAfterCount <= 1 && (((Pullenti.Morph.LanguageHelper.EndsWith(typ, "ПОСЕЛОК") || Pullenti.Morph.LanguageHelper.EndsWith(typ, "СЕЛИЩЕ") || typ == "ДЕРЕВНЯ") || typ == "СЕЛО")))
            {
                if (li[i1].BeginToken == li[i1].EndToken)
                {
                    Pullenti.Ner.Address.Internal.AddressItemToken ooo = Pullenti.Ner.Address.Internal.AddressItemToken.TryAttachOrg(li[i1].BeginToken);
                    if (ooo != null && ooo.RefToken != null)
                    {
                        return(null);
                    }
                }
                typ2 = li[i1].Value;
                if (typ2 == "СТАНЦИЯ" && li[i1].BeginToken.IsValue("СТ", null) && ((i1 + 1) < li.Count))
                {
                    Pullenti.Ner.MorphCollection m = li[i1 + 1].Morph;
                    if (m.Number == Pullenti.Morph.MorphNumber.Plural)
                    {
                        probAdj = "СТАРЫЕ";
                    }
                    else if (m.Gender == Pullenti.Morph.MorphGender.Feminie)
                    {
                        probAdj = "СТАРАЯ";
                    }
                    else if (m.Gender == Pullenti.Morph.MorphGender.Masculine)
                    {
                        probAdj = "СТАРЫЙ";
                    }
                    else
                    {
                        probAdj = "СТАРОЕ";
                    }
                }
                i1++;
            }
            string name    = li[i1].Value ?? ((li[i1].OntoItem == null ? null : li[i1].OntoItem.CanonicText));
            string altName = li[i1].AltValue;

            if (name == null)
            {
                return(null);
            }
            Pullenti.Ner.MorphCollection mc = li[0].Morph;
            if (i1 == 1 && li[i1].Typ == CityItemToken.ItemType.City && ((li[0].Value == "ГОРОД" || li[0].Value == "МІСТО" || li[0].Typ == CityItemToken.ItemType.Misc)))
            {
                if (typ == null && ((i1 + 1) < li.Count) && li[i1 + 1].Typ == CityItemToken.ItemType.Noun)
                {
                    return(null);
                }
                oi = li[i1].OntoItem;
                if (oi != null)
                {
                    name = oi.CanonicText;
                }
                if (name.Length > 2 || oi.MiscAttr != null)
                {
                    if (!li[1].Doubtful || ((oi != null && oi.MiscAttr != null)))
                    {
                        ok = true;
                    }
                    else if (!ok && !li[1].IsNewlineBefore)
                    {
                        if (li[0].GeoObjectBefore || li[1].GeoObjectAfter)
                        {
                            ok = true;
                        }
                        else if (Pullenti.Ner.Address.Internal.StreetDefineHelper.CheckStreetAfter(li[1].EndToken.Next))
                        {
                            ok = true;
                        }
                        else if (li[1].EndToken.Next != null && (li[1].EndToken.Next.GetReferent() is Pullenti.Ner.Date.DateReferent))
                        {
                            ok = true;
                        }
                        else if ((li[1].WhitespacesBeforeCount < 2) && li[1].OntoItem != null)
                        {
                            if (li[1].IsNewlineAfter)
                            {
                                ok = true;
                            }
                            else
                            {
                                ok = true;
                            }
                        }
                    }
                    if (li[1].Doubtful && li[1].EndToken.Next != null && li[1].EndToken.Chars == li[1].EndToken.Next.Chars)
                    {
                        ok = false;
                    }
                    if (li[0].BeginToken.Previous != null && li[0].BeginToken.Previous.IsValue("В", null))
                    {
                        ok = true;
                    }
                }
                if (!ok)
                {
                    ok = CheckYearAfter(li[1].EndToken.Next);
                }
                if (!ok)
                {
                    ok = CheckCityAfter(li[1].EndToken.Next);
                }
            }
            else if ((li[i1].Typ == CityItemToken.ItemType.ProperName || li[i1].Typ == CityItemToken.ItemType.City))
            {
                if (((li[0].Value == "АДМИНИСТРАЦИЯ" || li[0].Value == "АДМІНІСТРАЦІЯ")) && i1 == 1)
                {
                    return(null);
                }
                if (li[i1].IsNewlineBefore)
                {
                    if (li.Count != 2)
                    {
                        return(null);
                    }
                }
                if (!li[0].Doubtful)
                {
                    ok = true;
                    if (name.Length < 2)
                    {
                        ok = false;
                    }
                    else if ((name.Length < 3) && li[0].Morph.Number != Pullenti.Morph.MorphNumber.Singular)
                    {
                        ok = false;
                    }
                    if (li[i1].Doubtful && !li[i1].GeoObjectAfter && !li[0].GeoObjectBefore)
                    {
                        if (li[i1].Morph.Case.IsGenitive)
                        {
                            if (li[i1].EndToken.Next == null || MiscLocationHelper.CheckGeoObjectAfter(li[i1].EndToken.Next, false) || Pullenti.Ner.Address.Internal.AddressItemToken.CheckHouseAfter(li[i1].EndToken.Next, false, true))
                            {
                            }
                            else if (li[0].BeginToken.Previous == null || MiscLocationHelper.CheckGeoObjectBefore(li[0].BeginToken))
                            {
                            }
                            else
                            {
                                ok = false;
                            }
                        }
                        if (ok)
                        {
                            Pullenti.Ner.ReferentToken rt0 = li[i1].Kit.ProcessReferent("PERSONPROPERTY", li[0].BeginToken.Previous);
                            if (rt0 != null)
                            {
                                Pullenti.Ner.ReferentToken rt1 = li[i1].Kit.ProcessReferent("PERSON", li[i1].BeginToken);
                                if (rt1 != null)
                                {
                                    ok = false;
                                }
                            }
                        }
                    }
                    Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(li[i1].BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                    if (npt != null)
                    {
                        if (npt.EndToken.EndChar > li[i1].EndChar && npt.Adjectives.Count > 0 && !npt.Adjectives[0].EndToken.Next.IsComma)
                        {
                            ok = false;
                        }
                        else if (TerrItemToken.m_UnknownRegions.TryParse(npt.EndToken, Pullenti.Ner.Core.TerminParseAttr.FullwordsOnly) != null)
                        {
                            bool ok1 = false;
                            if (li[0].BeginToken.Previous != null)
                            {
                                Pullenti.Ner.Token ttt = li[0].BeginToken.Previous;
                                if (ttt.IsComma && ttt.Previous != null)
                                {
                                    ttt = ttt.Previous;
                                }
                                Pullenti.Ner.Geo.GeoReferent geo = ttt.GetReferent() as Pullenti.Ner.Geo.GeoReferent;
                                if (geo != null && !geo.IsCity)
                                {
                                    ok1 = true;
                                }
                            }
                            if (npt.EndToken.Next != null)
                            {
                                Pullenti.Ner.Token ttt = npt.EndToken.Next;
                                if (ttt.IsComma && ttt.Next != null)
                                {
                                    ttt = ttt.Next;
                                }
                                Pullenti.Ner.Geo.GeoReferent geo = ttt.GetReferent() as Pullenti.Ner.Geo.GeoReferent;
                                if (geo != null && !geo.IsCity)
                                {
                                    ok1 = true;
                                }
                            }
                            if (!ok1)
                            {
                                return(null);
                            }
                        }
                    }
                    if (li[0].Value == "ПОРТ")
                    {
                        if (li[i1].Chars.IsAllUpper || li[i1].Chars.IsLatinLetter)
                        {
                            return(null);
                        }
                    }
                }
                else if (li[0].GeoObjectBefore)
                {
                    ok = true;
                }
                else if (li[i1].GeoObjectAfter && !li[i1].IsNewlineAfter)
                {
                    ok = true;
                }
                else
                {
                    ok = CheckYearAfter(li[i1].EndToken.Next);
                }
                if (!ok)
                {
                    ok = CheckStreetAfter(li[i1].EndToken.Next);
                }
                if (!ok && li[0].BeginToken.Previous != null && li[0].BeginToken.Previous.IsValue("В", null))
                {
                    ok = true;
                }
            }
            else
            {
                return(null);
            }
            if (!ok && !always)
            {
                if (MiscLocationHelper.CheckNearBefore(li[0].BeginToken.Previous) == null)
                {
                    return(null);
                }
            }
            if (li.Count > (i1 + 1))
            {
                li.RemoveRange(i1 + 1, li.Count - i1 - 1);
            }
            Pullenti.Ner.Geo.GeoReferent city = new Pullenti.Ner.Geo.GeoReferent();
            if (oi != null && oi.Referent != null)
            {
                city = oi.Referent.Clone() as Pullenti.Ner.Geo.GeoReferent;
                city.Occurrence.Clear();
            }
            if (!li[0].Morph.Case.IsUndefined && li[0].Morph.Gender != Pullenti.Morph.MorphGender.Undefined)
            {
                if (li[i1].EndToken.Morph.Class.IsAdjective && li[i1].BeginToken == li[i1].EndToken)
                {
                    string nam = Pullenti.Ner.Core.ProperNameHelper.GetNameEx(li[i1].BeginToken, li[i1].EndToken, Pullenti.Morph.MorphClass.Adjective, li[0].Morph.Case, li[0].Morph.Gender, false, false);
                    if (nam != null && nam != name)
                    {
                        name = nam;
                    }
                }
            }
            if (li[0].Morph.Case.IsNominative)
            {
                if (altName != null)
                {
                    city.AddName(altName);
                }
                altName = null;
            }
            city.AddName(name);
            if (probAdj != null)
            {
                city.AddName(probAdj + " " + name);
            }
            if (altName != null)
            {
                city.AddName(altName);
                if (probAdj != null)
                {
                    city.AddName(probAdj + " " + altName);
                }
            }
            if (typ != null)
            {
                city.AddTyp(typ);
            }
            else if (!city.IsCity)
            {
                city.AddTypCity(li[0].Kit.BaseLanguage);
            }
            if (typ2 != null)
            {
                city.AddTyp(typ2.ToLower());
            }
            if (li[0].HigherGeo != null && GeoOwnerHelper.CanBeHigher(li[0].HigherGeo, city))
            {
                city.Higher = li[0].HigherGeo;
            }
            if (li[0].Typ == CityItemToken.ItemType.Misc)
            {
                li.RemoveAt(0);
            }
            Pullenti.Ner.ReferentToken res = new Pullenti.Ner.ReferentToken(city, li[0].BeginToken, li[li.Count - 1].EndToken)
            {
                Morph = mc
            };
            if (res.EndToken.Next != null && res.EndToken.Next.IsHiphen && (res.EndToken.Next.Next is Pullenti.Ner.NumberToken))
            {
                Pullenti.Ner.NumberToken num = res.EndToken.Next.Next as Pullenti.Ner.NumberToken;
                if ((num.Typ == Pullenti.Ner.NumberSpellingType.Digit && !num.Morph.Class.IsAdjective && num.IntValue != null) && (num.IntValue.Value < 50))
                {
                    foreach (Pullenti.Ner.Slot s in city.Slots)
                    {
                        if (s.TypeName == Pullenti.Ner.Geo.GeoReferent.ATTR_NAME)
                        {
                            city.UploadSlot(s, string.Format("{0}-{1}", s.Value, num.Value));
                        }
                    }
                    res.EndToken = num;
                }
            }
            if (li[0].BeginToken == li[0].EndToken && li[0].BeginToken.IsValue("ГОРОДОК", null))
            {
                if (Pullenti.Ner.Address.Internal.AddressItemToken.CheckHouseAfter(res.EndToken.Next, true, false))
                {
                    return(null);
                }
            }
            return(res);
        }
Пример #7
0
        public static Pullenti.Ner.ReferentToken TryAttachTerritory(List <TerrItemToken> li, Pullenti.Ner.Core.AnalyzerData ad, bool attachAlways = false, List <CityItemToken> cits = null, List <Pullenti.Ner.Geo.GeoReferent> exists = null)
        {
            if (li == null || li.Count == 0)
            {
                return(null);
            }
            TerrItemToken        exObj   = null;
            TerrItemToken        newName = null;
            List <TerrItemToken> adjList = new List <TerrItemToken>();
            TerrItemToken        noun    = null;
            TerrItemToken        addNoun = null;

            Pullenti.Ner.ReferentToken rt = _tryAttachMoscowAO(li, ad);
            if (rt != null)
            {
                return(rt);
            }
            if (li[0].TerminItem != null && li[0].TerminItem.CanonicText == "ТЕРРИТОРИЯ")
            {
                Pullenti.Ner.ReferentToken res2 = _tryAttachPureTerr(li, ad);
                return(res2);
            }
            if (li.Count == 2)
            {
                if (li[0].Rzd != null && li[1].RzdDir != null)
                {
                    Pullenti.Ner.Geo.GeoReferent rzd = new Pullenti.Ner.Geo.GeoReferent();
                    rzd.AddName(li[1].RzdDir);
                    rzd.AddTypTer(li[0].Kit.BaseLanguage);
                    rzd.AddSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_REF, li[0].Rzd.Referent, false, 0);
                    rzd.AddExtReferent(li[0].Rzd);
                    return(new Pullenti.Ner.ReferentToken(rzd, li[0].BeginToken, li[1].EndToken));
                }
                if (li[1].Rzd != null && li[0].RzdDir != null)
                {
                    Pullenti.Ner.Geo.GeoReferent rzd = new Pullenti.Ner.Geo.GeoReferent();
                    rzd.AddName(li[0].RzdDir);
                    rzd.AddTypTer(li[0].Kit.BaseLanguage);
                    rzd.AddSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_REF, li[1].Rzd.Referent, false, 0);
                    rzd.AddExtReferent(li[1].Rzd);
                    return(new Pullenti.Ner.ReferentToken(rzd, li[0].BeginToken, li[1].EndToken));
                }
            }
            bool canBeCityBefore = false;
            bool adjTerrBefore   = false;

            if (cits != null)
            {
                if (cits[0].Typ == CityItemToken.ItemType.City)
                {
                    canBeCityBefore = true;
                }
                else if (cits[0].Typ == CityItemToken.ItemType.Noun && cits.Count > 1)
                {
                    canBeCityBefore = true;
                }
            }
            int k;

            for (k = 0; k < li.Count; k++)
            {
                if (li[k].OntoItem != null)
                {
                    if (exObj != null || newName != null)
                    {
                        break;
                    }
                    if (noun != null)
                    {
                        if (k == 1)
                        {
                            if (noun.TerminItem.CanonicText == "РАЙОН" || noun.TerminItem.CanonicText == "ОБЛАСТЬ" || noun.TerminItem.CanonicText == "СОЮЗ")
                            {
                                if (li[k].OntoItem.Referent is Pullenti.Ner.Geo.GeoReferent)
                                {
                                    if ((li[k].OntoItem.Referent as Pullenti.Ner.Geo.GeoReferent).IsState)
                                    {
                                        break;
                                    }
                                }
                                bool ok = false;
                                Pullenti.Ner.Token tt = li[k].EndToken.Next;
                                if (tt == null)
                                {
                                    ok = true;
                                }
                                else if (tt.IsCharOf(",."))
                                {
                                    ok = true;
                                }
                                if (!ok)
                                {
                                    ok = MiscLocationHelper.CheckGeoObjectBefore(li[0].BeginToken);
                                }
                                if (!ok)
                                {
                                    Pullenti.Ner.Address.Internal.AddressItemToken adr = Pullenti.Ner.Address.Internal.AddressItemToken.TryParse(tt, null, false, false, null);
                                    if (adr != null)
                                    {
                                        if (adr.Typ == Pullenti.Ner.Address.Internal.AddressItemToken.ItemType.Street)
                                        {
                                            ok = true;
                                        }
                                    }
                                }
                                if (!ok)
                                {
                                    break;
                                }
                            }
                            if (li[k].OntoItem != null)
                            {
                                if (noun.BeginToken.IsValue("МО", null) || noun.BeginToken.IsValue("ЛО", null))
                                {
                                    return(null);
                                }
                            }
                        }
                    }
                    exObj = li[k];
                }
                else if (li[k].TerminItem != null)
                {
                    if (noun != null)
                    {
                        break;
                    }
                    if (li[k].TerminItem.IsAlwaysPrefix && k > 0)
                    {
                        break;
                    }
                    if (k > 0 && li[k].IsDoubt)
                    {
                        if (li[k].BeginToken == li[k].EndToken && li[k].BeginToken.IsValue("ЗАО", null))
                        {
                            break;
                        }
                    }
                    if (li[k].TerminItem.IsAdjective || li[k].IsGeoInDictionary)
                    {
                        adjList.Add(li[k]);
                    }
                    else
                    {
                        if (exObj != null)
                        {
                            Pullenti.Ner.Geo.GeoReferent geo = exObj.OntoItem.Referent as Pullenti.Ner.Geo.GeoReferent;
                            if (geo == null)
                            {
                                break;
                            }
                            if (exObj.IsAdjective && ((li[k].TerminItem.CanonicText == "СОЮЗ" || li[k].TerminItem.CanonicText == "ФЕДЕРАЦИЯ")))
                            {
                                string str = exObj.OntoItem.ToString();
                                if (!str.Contains(li[k].TerminItem.CanonicText))
                                {
                                    return(null);
                                }
                            }
                            if (li[k].TerminItem.CanonicText == "РАЙОН" || li[k].TerminItem.CanonicText == "ОКРУГ" || li[k].TerminItem.CanonicText == "КРАЙ")
                            {
                                StringBuilder tmp = new StringBuilder();
                                foreach (Pullenti.Ner.Slot s in geo.Slots)
                                {
                                    if (s.TypeName == Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE)
                                    {
                                        tmp.AppendFormat("{0};", s.Value);
                                    }
                                }
                                if (!tmp.ToString().ToUpper().Contains(li[k].TerminItem.CanonicText))
                                {
                                    if (k != 1 || newName != null)
                                    {
                                        break;
                                    }
                                    newName             = li[0];
                                    newName.IsAdjective = true;
                                    newName.OntoItem    = null;
                                    exObj = null;
                                }
                            }
                        }
                        noun = li[k];
                        if (k == 0)
                        {
                            TerrItemToken tt = TerrItemToken.TryParse(li[k].BeginToken.Previous, null, true, false, null);
                            if (tt != null && tt.Morph.Class.IsAdjective)
                            {
                                adjTerrBefore = true;
                            }
                        }
                    }
                }
                else
                {
                    if (exObj != null)
                    {
                        break;
                    }
                    if (newName != null)
                    {
                        break;
                    }
                    newName = li[k];
                }
            }
            string name     = null;
            string altName  = null;
            string fullName = null;

            Pullenti.Ner.MorphCollection morph = null;
            if (exObj != null)
            {
                if (exObj.IsAdjective && !exObj.Morph.Language.IsEn && noun == null)
                {
                    if (attachAlways && exObj.EndToken.Next != null)
                    {
                        Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(exObj.BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                        if (exObj.EndToken.Next.IsCommaAnd)
                        {
                        }
                        else if (npt == null)
                        {
                        }
                        else
                        {
                            Pullenti.Ner.Address.Internal.StreetItemToken str = Pullenti.Ner.Address.Internal.StreetItemToken.TryParse(exObj.EndToken.Next, null, false, null, false);
                            if (str != null)
                            {
                                if (str.Typ == Pullenti.Ner.Address.Internal.StreetItemType.Noun && str.EndToken == npt.EndToken)
                                {
                                    return(null);
                                }
                            }
                        }
                    }
                    else
                    {
                        CityItemToken cit = CityItemToken.TryParse(exObj.EndToken.Next, null, false, null);
                        if (cit != null && ((cit.Typ == CityItemToken.ItemType.Noun || cit.Typ == CityItemToken.ItemType.City)))
                        {
                            Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(exObj.BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                            if (npt != null && npt.EndToken == cit.EndToken)
                            {
                            }
                            else
                            {
                                return(null);
                            }
                        }
                        else if (exObj.BeginToken.IsValue("ПОДНЕБЕСНЫЙ", null))
                        {
                        }
                        else
                        {
                            return(null);
                        }
                    }
                }
                if (noun == null && exObj.CanBeCity)
                {
                    CityItemToken cit0 = CityItemToken.TryParseBack(exObj.BeginToken.Previous);
                    if (cit0 != null && cit0.Typ != CityItemToken.ItemType.ProperName)
                    {
                        return(null);
                    }
                }
                if (exObj.IsDoubt && noun == null)
                {
                    bool ok2 = false;
                    if (_canBeGeoAfter(exObj.EndToken.Next))
                    {
                        ok2 = true;
                    }
                    else if (!exObj.CanBeSurname && !exObj.CanBeCity)
                    {
                        if ((exObj.EndToken.Next != null && exObj.EndToken.Next.IsChar(')') && exObj.BeginToken.Previous != null) && exObj.BeginToken.Previous.IsChar('('))
                        {
                            ok2 = true;
                        }
                        else if (exObj.Chars.IsLatinLetter && exObj.BeginToken.Previous != null)
                        {
                            if (exObj.BeginToken.Previous.IsValue("IN", null))
                            {
                                ok2 = true;
                            }
                            else if (exObj.BeginToken.Previous.IsValue("THE", null) && exObj.BeginToken.Previous.Previous != null && exObj.BeginToken.Previous.Previous.IsValue("IN", null))
                            {
                                ok2 = true;
                            }
                        }
                    }
                    if (!ok2)
                    {
                        CityItemToken cit0 = CityItemToken.TryParseBack(exObj.BeginToken.Previous);
                        if (cit0 != null && cit0.Typ != CityItemToken.ItemType.ProperName)
                        {
                        }
                        else if (MiscLocationHelper.CheckGeoObjectBefore(exObj.BeginToken.Previous))
                        {
                        }
                        else
                        {
                            return(null);
                        }
                    }
                }
                name  = exObj.OntoItem.CanonicText;
                morph = exObj.Morph;
            }
            else if (newName != null)
            {
                if (noun == null)
                {
                    return(null);
                }
                for (int j = 1; j < k; j++)
                {
                    if (li[j].IsNewlineBefore && !li[0].IsNewlineBefore)
                    {
                        if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(li[j].BeginToken, false, false))
                        {
                        }
                        else
                        {
                            return(null);
                        }
                    }
                }
                morph = noun.Morph;
                if (newName.IsAdjective)
                {
                    if (noun.TerminItem.Acronym == "АО")
                    {
                        if (noun.BeginToken != noun.EndToken)
                        {
                            return(null);
                        }
                        if (newName.Morph.Gender != Pullenti.Morph.MorphGender.Feminie)
                        {
                            return(null);
                        }
                    }
                    Pullenti.Ner.Geo.GeoReferent geoBefore = null;
                    Pullenti.Ner.Token           tt0       = li[0].BeginToken.Previous;
                    if (tt0 != null && tt0.IsCommaAnd)
                    {
                        tt0 = tt0.Previous;
                    }
                    if (!li[0].IsNewlineBefore && tt0 != null)
                    {
                        geoBefore = tt0.GetReferent() as Pullenti.Ner.Geo.GeoReferent;
                    }
                    if (li.IndexOf(noun) < li.IndexOf(newName))
                    {
                        if (noun.TerminItem.IsState)
                        {
                            return(null);
                        }
                        if (newName.CanBeSurname && geoBefore == null)
                        {
                            if (((noun.Morph.Case & newName.Morph.Case)).IsUndefined)
                            {
                                return(null);
                            }
                        }
                        if (Pullenti.Ner.Core.MiscHelper.IsExistsInDictionary(newName.BeginToken, newName.EndToken, Pullenti.Morph.MorphClass.Adjective | Pullenti.Morph.MorphClass.Pronoun | Pullenti.Morph.MorphClass.Verb))
                        {
                            if (noun.BeginToken != newName.BeginToken)
                            {
                                if (geoBefore == null)
                                {
                                    if (li.Count == 2 && _canBeGeoAfter(li[1].EndToken.Next))
                                    {
                                    }
                                    else if (li.Count == 3 && li[2].TerminItem != null && _canBeGeoAfter(li[2].EndToken.Next))
                                    {
                                    }
                                    else if (newName.IsGeoInDictionary)
                                    {
                                    }
                                    else if (newName.EndToken.IsNewlineAfter)
                                    {
                                    }
                                    else
                                    {
                                        return(null);
                                    }
                                }
                            }
                        }
                        Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(newName.EndToken, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePronouns, 0, null);
                        if (npt != null && npt.EndToken != newName.EndToken)
                        {
                            if (li.Count >= 3 && li[2].TerminItem != null && npt.EndToken == li[2].EndToken)
                            {
                                addNoun = li[2];
                            }
                            else
                            {
                                return(null);
                            }
                        }
                        Pullenti.Ner.ReferentToken rtp = newName.Kit.ProcessReferent("PERSON", newName.BeginToken);
                        if (rtp != null)
                        {
                            return(null);
                        }
                        name = Pullenti.Ner.Core.ProperNameHelper.GetNameEx(newName.BeginToken, newName.EndToken, Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphCase.Undefined, noun.TerminItem.Gender, false, false);
                    }
                    else
                    {
                        bool ok = false;
                        if (((k + 1) < li.Count) && li[k].TerminItem == null && li[k + 1].TerminItem != null)
                        {
                            ok = true;
                        }
                        else if ((k < li.Count) && li[k].OntoItem != null)
                        {
                            ok = true;
                        }
                        else if (k == li.Count && !newName.IsAdjInDictionary)
                        {
                            ok = true;
                        }
                        else if (MiscLocationHelper.CheckGeoObjectBefore(li[0].BeginToken) || canBeCityBefore)
                        {
                            ok = true;
                        }
                        else if (MiscLocationHelper.CheckGeoObjectAfter(li[k - 1].EndToken, false))
                        {
                            ok = true;
                        }
                        else if (li.Count == 3 && k == 2)
                        {
                            CityItemToken cit = CityItemToken.TryParse(li[2].BeginToken, null, false, null);
                            if (cit != null)
                            {
                                if (cit.Typ == CityItemToken.ItemType.City || cit.Typ == CityItemToken.ItemType.Noun)
                                {
                                    ok = true;
                                }
                            }
                        }
                        else if (li.Count == 2)
                        {
                            ok = _canBeGeoAfter(li[li.Count - 1].EndToken.Next);
                        }
                        if (!ok && !li[0].IsNewlineBefore && !li[0].Chars.IsAllLower)
                        {
                            Pullenti.Ner.ReferentToken rt00 = li[0].Kit.ProcessReferent("PERSONPROPERTY", li[0].BeginToken.Previous);
                            if (rt00 != null)
                            {
                                ok = true;
                            }
                        }
                        if (noun.TerminItem != null && noun.TerminItem.IsStrong && newName.IsAdjective)
                        {
                            ok = true;
                        }
                        if (noun.IsDoubt && adjList.Count == 0 && geoBefore == null)
                        {
                            return(null);
                        }
                        name = Pullenti.Ner.Core.ProperNameHelper.GetNameEx(newName.BeginToken, newName.EndToken, Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphCase.Undefined, noun.TerminItem.Gender, false, false);
                        if (!ok && !attachAlways)
                        {
                            if (Pullenti.Ner.Core.MiscHelper.IsExistsInDictionary(newName.BeginToken, newName.EndToken, Pullenti.Morph.MorphClass.Adjective | Pullenti.Morph.MorphClass.Pronoun | Pullenti.Morph.MorphClass.Verb))
                            {
                                if (exists != null)
                                {
                                    foreach (Pullenti.Ner.Geo.GeoReferent e in exists)
                                    {
                                        if (e.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_NAME, name, true) != null)
                                        {
                                            ok = true;
                                            break;
                                        }
                                    }
                                }
                                if (!ok)
                                {
                                    return(null);
                                }
                            }
                        }
                        fullName = string.Format("{0} {1}", Pullenti.Ner.Core.ProperNameHelper.GetNameEx(li[0].BeginToken, noun.BeginToken.Previous, Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphCase.Undefined, noun.TerminItem.Gender, false, false), noun.TerminItem.CanonicText);
                    }
                }
                else
                {
                    if (!attachAlways || ((noun.TerminItem != null && noun.TerminItem.CanonicText == "ФЕДЕРАЦИЯ")))
                    {
                        bool isLatin = noun.Chars.IsLatinLetter && newName.Chars.IsLatinLetter;
                        if (li.IndexOf(noun) > li.IndexOf(newName))
                        {
                            if (!isLatin)
                            {
                                return(null);
                            }
                        }
                        if (!newName.IsDistrictName && !Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(newName.BeginToken, false, false))
                        {
                            if (adjList.Count == 0 && Pullenti.Ner.Core.MiscHelper.IsExistsInDictionary(newName.BeginToken, newName.EndToken, Pullenti.Morph.MorphClass.Noun | Pullenti.Morph.MorphClass.Pronoun))
                            {
                                if (li.Count == 2 && noun.IsCityRegion && (noun.WhitespacesAfterCount < 2))
                                {
                                }
                                else
                                {
                                    return(null);
                                }
                            }
                            if (!isLatin)
                            {
                                if ((noun.TerminItem.IsRegion && !attachAlways && ((!adjTerrBefore || newName.IsDoubt))) && !noun.IsCityRegion && !noun.TerminItem.IsSpecificPrefix)
                                {
                                    if (!MiscLocationHelper.CheckGeoObjectBefore(noun.BeginToken))
                                    {
                                        if (!noun.IsDoubt && noun.BeginToken != noun.EndToken)
                                        {
                                        }
                                        else if ((noun.TerminItem.IsAlwaysPrefix && li.Count == 2 && li[0] == noun) && li[1] == newName)
                                        {
                                        }
                                        else
                                        {
                                            return(null);
                                        }
                                    }
                                }
                                if (noun.IsDoubt && adjList.Count == 0)
                                {
                                    if (noun.TerminItem.Acronym == "МО" || noun.TerminItem.Acronym == "ЛО")
                                    {
                                        if (k == (li.Count - 1) && li[k].TerminItem != null)
                                        {
                                            addNoun = li[k];
                                            k++;
                                        }
                                        else if (li.Count == 2 && noun == li[0] && newName.ToString().EndsWith("совет"))
                                        {
                                        }
                                        else
                                        {
                                            return(null);
                                        }
                                    }
                                    else
                                    {
                                        return(null);
                                    }
                                }
                                Pullenti.Ner.ReferentToken pers = newName.Kit.ProcessReferent("PERSON", newName.BeginToken);
                                if (pers != null)
                                {
                                    return(null);
                                }
                            }
                        }
                    }
                    name = Pullenti.Ner.Core.MiscHelper.GetTextValue(newName.BeginToken, newName.EndToken, Pullenti.Ner.Core.GetTextAttr.No);
                    if (newName.BeginToken != newName.EndToken)
                    {
                        for (Pullenti.Ner.Token ttt = newName.BeginToken.Next; ttt != null && ttt.EndChar <= newName.EndChar; ttt = ttt.Next)
                        {
                            if (ttt.Chars.IsLetter)
                            {
                                TerrItemToken ty = TerrItemToken.TryParse(ttt, null, false, false, null);
                                if ((ty != null && ty.TerminItem != null && noun != null) && ((ty.TerminItem.CanonicText.Contains(noun.TerminItem.CanonicText) || noun.TerminItem.CanonicText.Contains(ty.TerminItem.CanonicText))))
                                {
                                    name = Pullenti.Ner.Core.MiscHelper.GetTextValue(newName.BeginToken, ttt.Previous, Pullenti.Ner.Core.GetTextAttr.No);
                                    break;
                                }
                            }
                        }
                    }
                    if (adjList.Count > 0)
                    {
                        Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(adjList[0].BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                        if (npt != null && npt.EndToken == noun.EndToken)
                        {
                            altName = string.Format("{0} {1}", npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false), name);
                        }
                    }
                }
            }
            else
            {
                if ((li.Count == 1 && noun != null && noun.EndToken.Next != null) && (noun.EndToken.Next.GetReferent() is Pullenti.Ner.Geo.GeoReferent))
                {
                    Pullenti.Ner.Geo.GeoReferent g = noun.EndToken.Next.GetReferent() as Pullenti.Ner.Geo.GeoReferent;
                    if (noun.TerminItem != null)
                    {
                        string tyy = noun.TerminItem.CanonicText.ToLower();
                        bool   ooo = false;
                        if (g.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, tyy, true) != null)
                        {
                            ooo = true;
                        }
                        else if (tyy.EndsWith("район") && g.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, "район", true) != null)
                        {
                            ooo = true;
                        }
                        if (ooo)
                        {
                            return new Pullenti.Ner.ReferentToken(g, noun.BeginToken, noun.EndToken.Next)
                                   {
                                       Morph = noun.BeginToken.Morph
                                   }
                        }
                        ;
                    }
                }
                if ((li.Count == 1 && noun == li[0] && li[0].TerminItem != null) && TerrItemToken.TryParse(li[0].EndToken.Next, null, true, false, null) == null && TerrItemToken.TryParse(li[0].BeginToken.Previous, null, true, false, null) == null)
                {
                    if (li[0].Morph.Number == Pullenti.Morph.MorphNumber.Plural)
                    {
                        return(null);
                    }
                    int    cou = 0;
                    string str = li[0].TerminItem.CanonicText.ToLower();
                    for (Pullenti.Ner.Token tt = li[0].BeginToken.Previous; tt != null; tt = tt.Previous)
                    {
                        if (tt.IsNewlineAfter)
                        {
                            cou += 10;
                        }
                        else
                        {
                            cou++;
                        }
                        if (cou > 500)
                        {
                            break;
                        }
                        Pullenti.Ner.Geo.GeoReferent g = tt.GetReferent() as Pullenti.Ner.Geo.GeoReferent;
                        if (g == null)
                        {
                            continue;
                        }
                        bool ok = true;
                        cou = 0;
                        for (tt = li[0].EndToken.Next; tt != null; tt = tt.Next)
                        {
                            if (tt.IsNewlineBefore)
                            {
                                cou += 10;
                            }
                            else
                            {
                                cou++;
                            }
                            if (cou > 500)
                            {
                                break;
                            }
                            TerrItemToken tee = TerrItemToken.TryParse(tt, null, true, false, null);
                            if (tee == null)
                            {
                                continue;
                            }
                            ok = false;
                            break;
                        }
                        if (ok)
                        {
                            for (int ii = 0; g != null && (ii < 3); g = g.Higher, ii++)
                            {
                                if (g.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, str, true) != null)
                                {
                                    return new Pullenti.Ner.ReferentToken(g, li[0].BeginToken, li[0].EndToken)
                                           {
                                               Morph = noun.BeginToken.Morph
                                           }
                                }
                                ;
                            }
                        }
                        break;
                    }
                }
                return(null);
            }
            Pullenti.Ner.Geo.GeoReferent ter = null;
            if (exObj != null && (exObj.Tag is Pullenti.Ner.Geo.GeoReferent))
            {
                ter = exObj.Tag as Pullenti.Ner.Geo.GeoReferent;
            }
            else
            {
                ter = new Pullenti.Ner.Geo.GeoReferent();
                if (exObj != null)
                {
                    Pullenti.Ner.Geo.GeoReferent geo = exObj.OntoItem.Referent as Pullenti.Ner.Geo.GeoReferent;
                    if (geo != null && !geo.IsCity)
                    {
                        ter.MergeSlots2(geo, li[0].Kit.BaseLanguage);
                    }
                    else
                    {
                        ter.AddName(name);
                    }
                    if (noun == null && exObj.CanBeCity)
                    {
                        ter.AddTypCity(li[0].Kit.BaseLanguage);
                    }
                    else
                    {
                    }
                }
                else if (newName != null)
                {
                    ter.AddName(name);
                    if (altName != null)
                    {
                        ter.AddName(altName);
                    }
                }
                if (noun != null)
                {
                    if (noun.TerminItem.CanonicText == "АО")
                    {
                        ter.AddTyp((li[0].Kit.BaseLanguage.IsUa ? "АВТОНОМНИЙ ОКРУГ" : "АВТОНОМНЫЙ ОКРУГ"));
                    }
                    else if (noun.TerminItem.CanonicText == "МУНИЦИПАЛЬНОЕ СОБРАНИЕ" || noun.TerminItem.CanonicText == "МУНІЦИПАЛЬНЕ ЗБОРИ")
                    {
                        ter.AddTyp((li[0].Kit.BaseLanguage.IsUa ? "МУНІЦИПАЛЬНЕ УТВОРЕННЯ" : "МУНИЦИПАЛЬНОЕ ОБРАЗОВАНИЕ"));
                    }
                    else if (noun.TerminItem.Acronym == "МО" && addNoun != null)
                    {
                        ter.AddTyp(addNoun.TerminItem.CanonicText);
                    }
                    else
                    {
                        if (noun.TerminItem.CanonicText == "СОЮЗ" && exObj != null && exObj.EndChar > noun.EndChar)
                        {
                            return new Pullenti.Ner.ReferentToken(ter, exObj.BeginToken, exObj.EndToken)
                                   {
                                       Morph = exObj.Morph
                                   }
                        }
                        ;
                        ter.AddTyp(noun.TerminItem.CanonicText);
                        if (noun.TerminItem.IsRegion && ter.IsState)
                        {
                            ter.AddTypReg(li[0].Kit.BaseLanguage);
                        }
                    }
                }
                if (ter.IsState && ter.IsRegion)
                {
                    foreach (TerrItemToken a in adjList)
                    {
                        if (a.TerminItem.IsRegion)
                        {
                            ter.AddTypReg(li[0].Kit.BaseLanguage);
                            break;
                        }
                    }
                }
                if (ter.IsState)
                {
                    if (fullName != null)
                    {
                        ter.AddName(fullName);
                    }
                }
            }
            Pullenti.Ner.ReferentToken res = new Pullenti.Ner.ReferentToken(ter, li[0].BeginToken, li[k - 1].EndToken);
            if (noun != null && noun.Morph.Class.IsNoun)
            {
                res.Morph = noun.Morph;
            }
            else
            {
                res.Morph = new Pullenti.Ner.MorphCollection();
                for (int ii = 0; ii < k; ii++)
                {
                    foreach (Pullenti.Morph.MorphBaseInfo v in li[ii].Morph.Items)
                    {
                        Pullenti.Morph.MorphBaseInfo bi = new Pullenti.Morph.MorphBaseInfo();
                        bi.CopyFrom(v);
                        if (noun != null)
                        {
                            if (bi.Class.IsAdjective)
                            {
                                bi.Class = Pullenti.Morph.MorphClass.Noun;
                            }
                        }
                        res.Morph.AddItem(bi);
                    }
                }
            }
            if (li[0].TerminItem != null && li[0].TerminItem.IsSpecificPrefix)
            {
                res.BeginToken = li[0].EndToken.Next;
            }
            if (addNoun != null && addNoun.EndChar > res.EndChar)
            {
                res.EndToken = addNoun.EndToken;
            }
            if ((res.BeginToken.Previous is Pullenti.Ner.TextToken) && (res.WhitespacesBeforeCount < 2))
            {
                Pullenti.Ner.TextToken tt = res.BeginToken.Previous as Pullenti.Ner.TextToken;
                if (tt.Term == "АР")
                {
                    foreach (string ty in ter.Typs)
                    {
                        if (ty.Contains("республика") || ty.Contains("республіка"))
                        {
                            res.BeginToken = tt;
                            break;
                        }
                    }
                }
            }
            return(res);
        }
Пример #8
0
        static List <SemanticLink> _tryCreateNoun(Pullenti.Ner.Core.NounPhraseToken npt1, Pullenti.Ner.MetaToken slave, Pullenti.Semantic.Utils.DerivateGroup gr)
        {
            if (npt1 == null || slave == null)
            {
                return(null);
            }
            if (slave is Pullenti.Ner.Core.VerbPhraseToken)
            {
                return(_tryCreateInf(npt1, slave as Pullenti.Ner.Core.VerbPhraseToken, gr));
            }
            SemanticAbstractSlave sla2 = slave as SemanticAbstractSlave;
            List <SemanticLink>   res  = new List <SemanticLink>();

            if (sla2 == null)
            {
                return(res);
            }
            Pullenti.Semantic.Utils.ControlModelItem cit = FindControlItem(npt1, gr);
            _createRoles(cit, sla2.Preposition, sla2.Morph.Case, res, false, false);
            if (res.Count == 1 && res[0].Role == SemanticRole.Agent && res[0].Question == Pullenti.Semantic.Utils.ControlModelQuestion.BaseInstrumental)
            {
                if (gr.Model.Items.Count > 0 && gr.Model.Items[0].Typ == Pullenti.Semantic.Utils.ControlModelItemType.Verb && gr.Model.Items[0].Links.ContainsKey(Pullenti.Semantic.Utils.ControlModelQuestion.BaseInstrumental))
                {
                    res[0].Role = gr.Model.Items[0].Links[Pullenti.Semantic.Utils.ControlModelQuestion.BaseInstrumental];
                }
            }
            bool ok = false;

            Pullenti.Semantic.Utils.DerivateWord w = FindWordInGroup(npt1, gr);
            if (w != null && w.NextWords != null && w.NextWords.Count > 0)
            {
                foreach (string n in w.NextWords)
                {
                    if (sla2.Source != null)
                    {
                        if (sla2.Source.EndToken.IsValue(n, null))
                        {
                            ok = true;
                            break;
                        }
                    }
                }
            }
            if (gr != null && gr.Model.Pacients.Count > 0)
            {
                foreach (string n in gr.Model.Pacients)
                {
                    if (sla2.Source != null)
                    {
                        if (sla2.Source.EndToken.IsValue(n, null))
                        {
                            ok = true;
                            break;
                        }
                    }
                }
            }
            if (ok)
            {
                if (res.Count == 0)
                {
                    res.Add(new SemanticLink()
                    {
                        Question = Pullenti.Semantic.Utils.ControlModelQuestion.BaseGenetive, Role = SemanticRole.Pacient, Idiom = true
                    });
                }
                foreach (SemanticLink r in res)
                {
                    r.Rank += 4;
                    if (r.Role == SemanticRole.Common)
                    {
                        r.Role = SemanticRole.Strong;
                    }
                    if (npt1.EndToken.Next == sla2.BeginToken)
                    {
                        r.Rank += 2;
                    }
                    r.Idiom = true;
                }
            }
            return(res);
        }
Пример #9
0
        /// <summary>
        /// Попробовать создать семантическую связь между элементами.
        /// Элементом м.б. именная (NounPhraseToken) или глагольная группа (VerbPhraseToken).
        /// </summary>
        /// <param name="master">основной элемент</param>
        /// <param name="slave">стыкуемый элемент (также м.б. SemanticAbstractSlave)</param>
        /// <param name="onto">дополнительный онтологический словарь</param>
        /// <return>список вариантов (возможно, пустой)</return>
        public static List <SemanticLink> TryCreateLinks(Pullenti.Ner.MetaToken master, Pullenti.Ner.MetaToken slave, ISemanticOnto onto = null)
        {
            List <SemanticLink> res = new List <SemanticLink>();

            Pullenti.Ner.Core.VerbPhraseToken vpt1 = master as Pullenti.Ner.Core.VerbPhraseToken;
            Pullenti.Ner.Core.VerbPhraseToken vpt2 = slave as Pullenti.Ner.Core.VerbPhraseToken;
            Pullenti.Ner.Core.NounPhraseToken npt1 = master as Pullenti.Ner.Core.NounPhraseToken;
            if (slave is Pullenti.Ner.Core.NounPhraseToken)
            {
                slave = SemanticAbstractSlave.CreateFromNoun(slave as Pullenti.Ner.Core.NounPhraseToken);
            }
            SemanticAbstractSlave sla2 = slave as SemanticAbstractSlave;

            if (vpt2 != null)
            {
                if (!vpt2.FirstVerb.IsVerbInfinitive || !vpt2.LastVerb.IsVerbInfinitive)
                {
                    return(res);
                }
            }
            List <Pullenti.Semantic.Utils.DerivateGroup> grs = FindDerivates(master);

            if (grs == null || grs.Count == 0)
            {
                List <SemanticLink> rl = (vpt1 != null ? _tryCreateVerb(vpt1, slave, null) : _tryCreateNoun(npt1, slave, null));
                if (rl != null)
                {
                    res.AddRange(rl);
                }
            }
            else
            {
                foreach (Pullenti.Semantic.Utils.DerivateGroup gr in grs)
                {
                    List <SemanticLink> rl = (vpt1 != null ? _tryCreateVerb(vpt1, slave, gr) : _tryCreateNoun(npt1, slave, gr));
                    if (rl == null || rl.Count == 0)
                    {
                        continue;
                    }
                    res.AddRange(rl);
                }
            }
            if ((npt1 != null && sla2 != null && sla2.Morph.Case.IsGenitive) && sla2.Preposition == null)
            {
                if (npt1.Noun.BeginToken.GetMorphClassInDictionary().IsPersonalPronoun)
                {
                }
                else
                {
                    bool hasGen = false;
                    foreach (SemanticLink r in res)
                    {
                        if (r.Question == Pullenti.Semantic.Utils.ControlModelQuestion.BaseGenetive)
                        {
                            hasGen = true;
                            break;
                        }
                    }
                    if (!hasGen)
                    {
                        res.Add(new SemanticLink()
                        {
                            Modelled = true, Master = npt1, Slave = sla2, Rank = 0.5, Question = Pullenti.Semantic.Utils.ControlModelQuestion.BaseGenetive
                        });
                    }
                }
            }
            if (onto != null)
            {
                string str1 = GetKeyword(master);
                string str2 = GetKeyword(slave);
                if (str2 != null)
                {
                    if (onto.CheckLink(str1, str2))
                    {
                        if (res.Count > 0)
                        {
                            foreach (SemanticLink r in res)
                            {
                                r.Rank += 3;
                                if (r.Role == SemanticRole.Common)
                                {
                                    r.Role = SemanticRole.Strong;
                                }
                            }
                        }
                        else
                        {
                            res.Add(new SemanticLink()
                            {
                                Role = SemanticRole.Strong, Master = master, Slave = slave, Rank = 3
                            });
                        }
                    }
                }
            }
            if (npt1 != null)
            {
                if (((npt1.Adjectives.Count > 0 && npt1.Adjectives[0].BeginToken.Morph.Class.IsPronoun)) || npt1.Anafor != null)
                {
                    foreach (SemanticLink r in res)
                    {
                        if (r.Question == Pullenti.Semantic.Utils.ControlModelQuestion.BaseGenetive)
                        {
                            r.Rank -= 0.5;
                            if (r.Role == SemanticRole.Strong)
                            {
                                r.Role = SemanticRole.Common;
                            }
                        }
                    }
                }
            }
            foreach (SemanticLink r in res)
            {
                if (r.Role == SemanticRole.Strong)
                {
                    foreach (SemanticLink rr in res)
                    {
                        if (rr != r && rr.Role != SemanticRole.Strong)
                        {
                            rr.Rank /= 2;
                        }
                    }
                }
            }
            for (int i = 0; i < res.Count; i++)
            {
                for (int j = 0; j < (res.Count - 1); j++)
                {
                    if (res[j].CompareTo(res[j + 1]) > 0)
                    {
                        SemanticLink r = res[j];
                        res[j]     = res[j + 1];
                        res[j + 1] = r;
                    }
                }
            }
            foreach (SemanticLink r in res)
            {
                r.Master = master;
                r.Slave  = slave;
            }
            return(res);
        }