Exemple #1
0
 // Проверить, что все элементы находятся на более низком уровне, чем у аргумента
 internal bool IsAllItemsLessLevel(Pullenti.Ner.Referent upperParts, bool ignoreEquals)
 {
     if (upperParts is DecreeReferent)
     {
         return(true);
     }
     foreach (Pullenti.Ner.Slot s in Slots)
     {
         int l = this._getLevel(s.TypeName);
         if (l < 0)
         {
             continue;
         }
         if (upperParts.FindSlot(s.TypeName, null, true) != null)
         {
             if (upperParts.FindSlot(s.TypeName, s.Value, true) == null)
             {
                 return(false);
             }
             continue;
         }
         foreach (Pullenti.Ner.Slot ss in upperParts.Slots)
         {
             int ll = this._getLevel(ss.TypeName);
             if (ll >= l)
             {
                 return(false);
             }
         }
     }
     return(true);
 }
Exemple #2
0
        // Проверка, что этот референт может выступать в качестве ATTR_REF
        public bool CanHasRef(Pullenti.Ner.Referent r)
        {
            string nam = Name;

            if (nam == null || r == null)
            {
                return(false);
            }
            if (r is Pullenti.Ner.Geo.GeoReferent)
            {
                Pullenti.Ner.Geo.GeoReferent g = r as Pullenti.Ner.Geo.GeoReferent;
                if (Pullenti.Morph.LanguageHelper.EndsWithEx(nam, "президент", "губернатор", null, null))
                {
                    return(g.IsState || g.IsRegion);
                }
                if (nam == "мэр" || nam == "градоначальник")
                {
                    return(g.IsCity);
                }
                if (nam == "глава")
                {
                    return(true);
                }
                return(false);
            }
            if (r.TypeName == "ORGANIZATION")
            {
                if ((Pullenti.Morph.LanguageHelper.EndsWith(nam, "губернатор") || nam == "мэр" || nam == "градоначальник") || nam == "президент")
                {
                    return(false);
                }
                if (nam.Contains("министр"))
                {
                    if (r.FindSlot(null, "министерство", true) == null)
                    {
                        return(false);
                    }
                }
                if (nam.EndsWith("директор"))
                {
                    if ((r.FindSlot(null, "суд", true)) != null)
                    {
                        return(false);
                    }
                }
                return(true);
            }
            return(false);
        }
Exemple #3
0
        public static OrgItemEponymToken TryAttach(Pullenti.Ner.Token t, bool mustHasPrefix = false)
        {
            Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
            if (tt == null)
            {
                if (t == null)
                {
                    return(null);
                }
                Pullenti.Ner.Referent r1 = t.GetReferent();
                if (r1 != null && r1.TypeName == "DATE")
                {
                    string str = r1.ToString().ToUpper();
                    if ((str == "1 МАЯ" || str == "7 ОКТЯБРЯ" || str == "9 МАЯ") || str == "8 МАРТА")
                    {
                        OrgItemEponymToken dt = new OrgItemEponymToken(t, t)
                        {
                            Eponyms = new List <string>()
                        };
                        dt.Eponyms.Add(str);
                        return(dt);
                    }
                }
                Pullenti.Ner.NumberToken age = Pullenti.Ner.Core.NumberHelper.TryParseAge(t);
                if ((age != null && (((age.EndToken.Next is Pullenti.Ner.TextToken) || (age.EndToken.Next is Pullenti.Ner.ReferentToken))) && (age.WhitespacesAfterCount < 3)) && !age.EndToken.Next.Chars.IsAllLower && age.EndToken.Next.Chars.IsCyrillicLetter)
                {
                    OrgItemEponymToken dt = new OrgItemEponymToken(t, age.EndToken.Next)
                    {
                        Eponyms = new List <string>()
                    };
                    dt.Eponyms.Add(string.Format("{0} {1}", age.Value, dt.EndToken.GetSourceText().ToUpper()));
                    return(dt);
                }
                return(null);
            }
            Pullenti.Ner.Token t1 = null;
            bool full             = false;
            bool hasName          = false;

            if (tt.Term == "ИМЕНИ" || tt.Term == "ІМЕНІ")
            {
                t1      = t.Next;
                full    = true;
                hasName = true;
            }
            else if (((tt.Term == "ИМ" || tt.Term == "ІМ")) && tt.Next != null)
            {
                if (tt.Next.IsChar('.'))
                {
                    t1   = tt.Next.Next;
                    full = true;
                }
                else if ((tt.Next is Pullenti.Ner.TextToken) && tt.Chars.IsAllLower && !tt.Next.Chars.IsAllLower)
                {
                    t1 = tt.Next;
                }
                hasName = true;
            }
            else if (tt.Previous != null && ((tt.Previous.IsValue("ФОНД", null) || tt.Previous.IsValue("ХРАМ", null) || tt.Previous.IsValue("ЦЕРКОВЬ", "ЦЕРКВА"))))
            {
                if ((!tt.Chars.IsCyrillicLetter || tt.Morph.Class.IsPreposition || tt.Morph.Class.IsConjunction) || !tt.Chars.IsLetter)
                {
                    return(null);
                }
                if (tt.WhitespacesBeforeCount != 1)
                {
                    return(null);
                }
                if (tt.Chars.IsAllLower)
                {
                    return(null);
                }
                if (tt.Morph.Class.IsAdjective)
                {
                    Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                    if (npt != null && npt.BeginToken != npt.EndToken)
                    {
                        return(null);
                    }
                }
                OrgItemNameToken na = OrgItemNameToken.TryAttach(tt, null, false, true);
                if (na != null)
                {
                    if (na.IsEmptyWord || na.IsStdName || na.IsStdTail)
                    {
                        return(null);
                    }
                }
                t1 = tt;
            }
            if (t1 == null || ((t1.IsNewlineBefore && !full)))
            {
                return(null);
            }
            if (tt.Previous != null && tt.Previous.Morph.Class.IsPreposition)
            {
                return(null);
            }
            if (mustHasPrefix && !hasName)
            {
                return(null);
            }
            Pullenti.Ner.Referent r = t1.GetReferent();
            if ((r != null && r.TypeName == "DATE" && full) && r.FindSlot("DAY", null, true) != null && r.FindSlot("YEAR", null, true) == null)
            {
                OrgItemEponymToken dt = new OrgItemEponymToken(t, t1)
                {
                    Eponyms = new List <string>()
                };
                dt.Eponyms.Add(r.ToString().ToUpper());
                return(dt);
            }
            bool holy = false;

            if ((t1.IsValue("СВЯТОЙ", null) || t1.IsValue("СВЯТИЙ", null) || t1.IsValue("СВ", null)) || t1.IsValue("СВЯТ", null))
            {
                t1   = t1.Next;
                holy = true;
                if (t1 != null && t1.IsChar('.'))
                {
                    t1 = t1.Next;
                }
            }
            if (t1 == null)
            {
                return(null);
            }
            Pullenti.Morph.MorphClass cl = t1.GetMorphClassInDictionary();
            if (cl.IsNoun || cl.IsAdjective)
            {
                Pullenti.Ner.ReferentToken rt = t1.Kit.ProcessReferent("PERSON", t1);
                if (rt != null && rt.Referent.TypeName == "PERSON" && rt.BeginToken != rt.EndToken)
                {
                    string e = rt.Referent.GetStringValue("LASTNAME");
                    if (e != null)
                    {
                        if (rt.EndToken.IsValue(e, null))
                        {
                            OrgItemEponymToken re = new OrgItemEponymToken(t, rt.EndToken);
                            re.Eponyms.Add(rt.EndToken.GetSourceText());
                            return(re);
                        }
                    }
                }
            }
            Pullenti.Ner.NumberToken nt = Pullenti.Ner.Core.NumberHelper.TryParseAnniversary(t1);
            if (nt != null && nt.Typ == Pullenti.Ner.NumberSpellingType.Age)
            {
                Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(nt.EndToken.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                if (npt != null)
                {
                    string             s   = string.Format("{0}-{1} {2}", nt.Value, (t.Kit.BaseLanguage.IsUa ? "РОКІВ" : "ЛЕТ"), Pullenti.Ner.Core.MiscHelper.GetTextValue(npt.BeginToken, npt.EndToken, Pullenti.Ner.Core.GetTextAttr.No));
                    OrgItemEponymToken res = new OrgItemEponymToken(t, npt.EndToken);
                    res.Eponyms.Add(s);
                    return(res);
                }
            }
            List <PersonItemToken> its = PersonItemToken.TryAttach(t1);

            if (its == null)
            {
                if ((t1 is Pullenti.Ner.ReferentToken) && (t1.GetReferent() is Pullenti.Ner.Geo.GeoReferent))
                {
                    string             s  = Pullenti.Ner.Core.MiscHelper.GetTextValue(t1, t1, Pullenti.Ner.Core.GetTextAttr.No);
                    OrgItemEponymToken re = new OrgItemEponymToken(t, t1);
                    re.Eponyms.Add(s);
                    return(re);
                }
                return(null);
            }
            List <string> eponims = new List <string>();
            int           i       = 0;
            int           j;

            if (its[i].Typ == PersonItemType.LocaseWord)
            {
                i++;
            }
            if (i >= its.Count)
            {
                return(null);
            }
            if (!full)
            {
                if (its[i].BeginToken.Morph.Class.IsAdjective && !its[i].BeginToken.Morph.Class.IsProperSurname)
                {
                    return(null);
                }
            }
            if (its[i].Typ == PersonItemType.Initial)
            {
                i++;
                while (true)
                {
                    if ((i < its.Count) && its[i].Typ == PersonItemType.Initial)
                    {
                        i++;
                    }
                    if (i >= its.Count || ((its[i].Typ != PersonItemType.Surname && its[i].Typ != PersonItemType.Name)))
                    {
                        break;
                    }
                    eponims.Add(its[i].Value);
                    t1 = its[i].EndToken;
                    if ((i + 2) >= its.Count || its[i + 1].Typ != PersonItemType.And || its[i + 2].Typ != PersonItemType.Initial)
                    {
                        break;
                    }
                    i += 3;
                }
            }
            else if (((i + 1) < its.Count) && its[i].Typ == PersonItemType.Name && its[i + 1].Typ == PersonItemType.Surname)
            {
                eponims.Add(its[i + 1].Value);
                t1 = its[i + 1].EndToken;
                i += 2;
                if ((((i + 2) < its.Count) && its[i].Typ == PersonItemType.And && its[i + 1].Typ == PersonItemType.Name) && its[i + 2].Typ == PersonItemType.Surname)
                {
                    eponims.Add(its[i + 2].Value);
                    t1 = its[i + 2].EndToken;
                }
            }
            else if (its[i].Typ == PersonItemType.Surname)
            {
                if (its.Count == (i + 2) && its[i].Chars == its[i + 1].Chars)
                {
                    its[i].Value   += (" " + its[i + 1].Value);
                    its[i].EndToken = its[i + 1].EndToken;
                    its.RemoveAt(i + 1);
                }
                eponims.Add(its[i].Value);
                if (((i + 1) < its.Count) && its[i + 1].Typ == PersonItemType.Name)
                {
                    if ((i + 2) == its.Count)
                    {
                        i++;
                    }
                    else if (its[i + 2].Typ != PersonItemType.Surname)
                    {
                        i++;
                    }
                }
                else if (((i + 1) < its.Count) && its[i + 1].Typ == PersonItemType.Initial)
                {
                    if ((i + 2) == its.Count)
                    {
                        i++;
                    }
                    else if (its[i + 2].Typ == PersonItemType.Initial && (i + 3) == its.Count)
                    {
                        i += 2;
                    }
                }
                else if (((i + 2) < its.Count) && its[i + 1].Typ == PersonItemType.And && its[i + 2].Typ == PersonItemType.Surname)
                {
                    bool ok = true;
                    Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(its[i + 2].BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                    if (npt != null && !npt.Morph.Case.IsGenitive && !npt.Morph.Case.IsUndefined)
                    {
                        ok = false;
                    }
                    if (ok)
                    {
                        eponims.Add(its[i + 2].Value);
                        i += 2;
                    }
                }
                t1 = its[i].EndToken;
            }
            else if (its[i].Typ == PersonItemType.Name && holy)
            {
                t1 = its[i].EndToken;
                bool sec = false;
                if (((i + 1) < its.Count) && its[i].Chars == its[i + 1].Chars && its[i + 1].Typ != PersonItemType.Initial)
                {
                    sec = true;
                    t1  = its[i + 1].EndToken;
                }
                if (sec)
                {
                    eponims.Add(string.Format("СВЯТ.{0} {1}", its[i].Value, its[i + 1].Value));
                }
                else
                {
                    eponims.Add(string.Format("СВЯТ.{0}", its[i].Value));
                }
            }
            else if (full && (i + 1) == its.Count && ((its[i].Typ == PersonItemType.Name || its[i].Typ == PersonItemType.Surname)))
            {
                t1 = its[i].EndToken;
                eponims.Add(its[i].Value);
            }
            else if ((its[i].Typ == PersonItemType.Name && its.Count == 3 && its[i + 1].Typ == PersonItemType.Name) && its[i + 2].Typ == PersonItemType.Surname)
            {
                t1 = its[i + 2].EndToken;
                eponims.Add(string.Format("{0} {1} {2}", its[i].Value, its[i + 1].Value, its[i + 2].Value));
                i += 2;
            }
            if (eponims.Count == 0)
            {
                return(null);
            }
            return(new OrgItemEponymToken(t, t1)
            {
                Eponyms = eponims
            });
        }
Exemple #4
0
        internal void MergeSlots2(Pullenti.Ner.Referent obj, Pullenti.Morph.MorphLang lang)
        {
            bool mergeStatistic = true;

            foreach (Pullenti.Ner.Slot s in obj.Slots)
            {
                if (s.TypeName == ATTR_NAME || s.TypeName == ATTR_TYPE)
                {
                    string nam = (string)s.Value;
                    if (Pullenti.Morph.LanguageHelper.IsLatinChar(nam[0]))
                    {
                        if (!lang.IsEn)
                        {
                            continue;
                        }
                    }
                    else if (lang.IsEn)
                    {
                        continue;
                    }
                    if (Pullenti.Morph.LanguageHelper.EndsWith(nam, " ССР"))
                    {
                        continue;
                    }
                }
                this.AddSlot(s.TypeName, s.Value, false, (mergeStatistic ? s.Count : 0));
            }
            if (this.FindSlot(ATTR_NAME, null, true) == null && obj.FindSlot(ATTR_NAME, null, true) != null)
            {
                foreach (Pullenti.Ner.Slot s in obj.Slots)
                {
                    if (s.TypeName == ATTR_NAME)
                    {
                        this.AddSlot(s.TypeName, s.Value, false, (mergeStatistic ? s.Count : 0));
                    }
                }
            }
            if (this.FindSlot(ATTR_TYPE, null, true) == null && obj.FindSlot(ATTR_TYPE, null, true) != null)
            {
                foreach (Pullenti.Ner.Slot s in obj.Slots)
                {
                    if (s.TypeName == ATTR_TYPE)
                    {
                        this.AddSlot(s.TypeName, s.Value, false, (mergeStatistic ? s.Count : 0));
                    }
                }
            }
            if (IsTerritory)
            {
                if (((Alpha2 != null || this.FindSlot(ATTR_TYPE, "государство", true) != null || this.FindSlot(ATTR_TYPE, "держава", true) != null) || this.FindSlot(ATTR_TYPE, "империя", true) != null || this.FindSlot(ATTR_TYPE, "імперія", true) != null) || this.FindSlot(ATTR_TYPE, "state", true) != null)
                {
                    Pullenti.Ner.Slot s = this.FindSlot(ATTR_TYPE, "территория", true);
                    if (s != null)
                    {
                        Slots.Remove(s);
                    }
                }
            }
            if (IsState)
            {
                foreach (Pullenti.Ner.Slot s in Slots)
                {
                    if (s.TypeName == ATTR_TYPE && ((s.Value.ToString() == "регион" || s.Value.ToString() == "регіон" || s.Value.ToString() == "region")))
                    {
                        Slots.Remove(s);
                        break;
                    }
                }
            }
            if (IsCity)
            {
                Pullenti.Ner.Slot s = this.FindSlot(ATTR_TYPE, "город", true) ?? this.FindSlot(ATTR_TYPE, "місто", true) ?? this.FindSlot(ATTR_TYPE, "city", true);
                if (s != null)
                {
                    foreach (Pullenti.Ner.Slot ss in Slots)
                    {
                        if (ss.TypeName == ATTR_TYPE && ss != s && _isCity((string)ss.Value))
                        {
                            Slots.Remove(s);
                            break;
                        }
                    }
                }
            }
            bool has = false;

            for (int i = 0; i < Slots.Count; i++)
            {
                if (Slots[i].TypeName == ATTR_HIGHER)
                {
                    if (!has)
                    {
                        has = true;
                    }
                    else
                    {
                        Slots.RemoveAt(i);
                        i--;
                    }
                }
            }
            this._mergeExtReferents(obj);
        }
Exemple #5
0
        public override bool CanBeEquals(Pullenti.Ner.Referent obj, Pullenti.Ner.Core.ReferentsEqualType typ)
        {
            GeoReferent geo = obj as GeoReferent;

            if (geo == null)
            {
                return(false);
            }
            if (geo.Alpha2 != null && geo.Alpha2 == Alpha2)
            {
                return(true);
            }
            if (IsCity != geo.IsCity)
            {
                return(false);
            }
            if (IsUnion != geo.IsUnion)
            {
                return(false);
            }
            if (IsUnion)
            {
                foreach (Pullenti.Ner.Slot s in Slots)
                {
                    if (s.TypeName == ATTR_REF)
                    {
                        if (obj.FindSlot(ATTR_REF, s.Value, true) == null)
                        {
                            return(false);
                        }
                    }
                }
                foreach (Pullenti.Ner.Slot s in obj.Slots)
                {
                    if (s.TypeName == ATTR_REF)
                    {
                        if (this.FindSlot(ATTR_REF, s.Value, true) == null)
                        {
                            return(false);
                        }
                    }
                }
                return(true);
            }
            Pullenti.Ner.Referent ref1 = this.GetSlotValue(ATTR_REF) as Pullenti.Ner.Referent;
            Pullenti.Ner.Referent ref2 = geo.GetSlotValue(ATTR_REF) as Pullenti.Ner.Referent;
            if (ref1 != null && ref2 != null)
            {
                if (ref1 != ref2)
                {
                    return(false);
                }
            }
            bool r  = IsRegion || IsState;
            bool r1 = geo.IsRegion || geo.IsState;

            if (r != r1)
            {
                if (IsTerritory != geo.IsTerritory)
                {
                    return(false);
                }
                return(false);
            }
            bool eqNames = false;

            foreach (Pullenti.Ner.Slot s in Slots)
            {
                if (s.TypeName == ATTR_NAME)
                {
                    if (geo.FindSlot(s.TypeName, s.Value, true) != null)
                    {
                        eqNames = true;
                        break;
                    }
                }
            }
            if (!eqNames)
            {
                return(false);
            }
            if (IsRegion && geo.IsRegion)
            {
                List <string> typs1 = Typs;
                List <string> typs2 = geo.Typs;
                bool          ok    = false;
                foreach (string t in typs1)
                {
                    if (typs2.Contains(t))
                    {
                        ok = true;
                    }
                    else
                    {
                        foreach (string tt in typs2)
                        {
                            if (Pullenti.Morph.LanguageHelper.EndsWith(tt, t) || Pullenti.Morph.LanguageHelper.EndsWith(t, tt))
                            {
                                ok = true;
                            }
                        }
                    }
                }
                if (!ok)
                {
                    return(false);
                }
            }
            if (Higher != null && geo.Higher != null)
            {
                if (_checkRoundDep(this) || _checkRoundDep(geo))
                {
                    return(false);
                }
                if (Higher.CanBeEquals(geo.Higher, typ))
                {
                }
                else if (geo.Higher.Higher != null && Higher.CanBeEquals(geo.Higher.Higher, typ))
                {
                }
                else if (Higher.Higher != null && Higher.Higher.CanBeEquals(geo.Higher, typ))
                {
                }
                else
                {
                    return(false);
                }
            }
            return(true);
        }
Exemple #6
0
        /// <summary>
        /// Зарегистрировать новую сущность или привязать к существующей сущности. Сущности, получаемые в ходе анализа,
        /// должны сохраняться через эту функцию. Именно здесь решается задача кореференции, то есть объединения
        /// сущностей, соответствующих одному и тому же объекту текста.
        /// </summary>
        /// <param name="referent">сохраняемая сущность</param>
        /// <return>этот же экземпляр referent или другой, если удалось объединиться с ранее выделенной сущностью</return>
        public virtual Pullenti.Ner.Referent RegisterReferent(Pullenti.Ner.Referent referent)
        {
            if (referent == null)
            {
                return(null);
            }
            if (referent.m_ExtReferents != null)
            {
                if (m_RegRefLevel > 2)
                {
                }
                else
                {
                    foreach (Pullenti.Ner.ReferentToken rt in referent.m_ExtReferents)
                    {
                        Pullenti.Ner.Referent oldRef = rt.Referent;
                        m_RegRefLevel++;
                        rt.SaveToLocalOntology();
                        m_RegRefLevel--;
                        if (oldRef == rt.Referent || rt.Referent == null)
                        {
                            continue;
                        }
                        foreach (Pullenti.Ner.Slot s in referent.Slots)
                        {
                            if (s.Value == oldRef)
                            {
                                referent.UploadSlot(s, rt.Referent);
                            }
                        }
                        if (referent.m_ExtReferents != null)
                        {
                            foreach (Pullenti.Ner.ReferentToken rtt in referent.m_ExtReferents)
                            {
                                foreach (Pullenti.Ner.Slot s in rtt.Referent.Slots)
                                {
                                    if (s.Value == oldRef)
                                    {
                                        referent.UploadSlot(s, rt.Referent);
                                    }
                                }
                            }
                        }
                    }
                    referent.m_ExtReferents = null;
                }
            }
            List <Pullenti.Ner.Referent> eq = null;

            if (m_Referents.Contains(referent))
            {
                return(referent);
            }
            for (int i = m_Referents.Count - 1; i >= 0 && ((m_Referents.Count - i) < 1000); i--)
            {
                Pullenti.Ner.Referent p = m_Referents[i];
                if (p.CanBeEquals(referent, ReferentsEqualType.WithinOneText))
                {
                    if (!p.CanBeGeneralFor(referent) && !referent.CanBeGeneralFor(p))
                    {
                        if (eq == null)
                        {
                            eq = new List <Pullenti.Ner.Referent>();
                        }
                        eq.Add(p);
                    }
                }
            }
            if (eq != null)
            {
                if (eq.Count == 1)
                {
                    eq[0].MergeSlots(referent, true);
                    return(eq[0]);
                }
                if (eq.Count > 1)
                {
                    foreach (Pullenti.Ner.Referent e in eq)
                    {
                        if (e.Slots.Count != referent.Slots.Count)
                        {
                            continue;
                        }
                        bool ok = true;
                        foreach (Pullenti.Ner.Slot s in referent.Slots)
                        {
                            if (e.FindSlot(s.TypeName, s.Value, true) == null)
                            {
                                ok = false;
                                break;
                            }
                        }
                        if (ok)
                        {
                            foreach (Pullenti.Ner.Slot s in e.Slots)
                            {
                                if (referent.FindSlot(s.TypeName, s.Value, true) == null)
                                {
                                    ok = false;
                                    break;
                                }
                            }
                        }
                        if (ok)
                        {
                            return(e);
                        }
                    }
                }
            }
            m_Referents.Add(referent);
            return(referent);
        }