예제 #1
0
        /// возвращает все листовые теги
        /// под листовыми тегами понимаются такие, у которых все дети прошли разрешение кореферентности
        private bool GetLeafNodes(XElement parent, List <XElement> list)
        {
            bool isLeaf = true;

            foreach (XElement node in parent.Elements(UnitTextType.ENTITY))
            {
                if (node.IsContainAttribute(BlockAttribute.FULLNAME))
                {
                    continue;
                }

                List <XElement> childrenLeaf = new List <XElement>();
                isLeaf = GetLeafNodes(node, childrenLeaf);
                CoreferenceType type = GetCoreferenceType(node);
                foreach (XElement elem in childrenLeaf)
                {
                    CoreferenceType elemType = GetCoreferenceType(elem);
                    if (((type == CoreferenceType.Person) && (elemType == type)) ||
                        ((type == CoreferenceType.Unknown) && (elemType != type)) ||
                        (type == CoreferenceType.Default))
                    {
                        continue;
                    }
                    list.Add(elem);
                    isLeaf = false;
                }

                if (isLeaf)
                {
                    list.Add(node);
                }
            }
            return(isLeaf);
        }
예제 #2
0
        private List <XElement> GetNodeChilds(XElement node, List <string> prevLink, bool withoutLinks)
        {
            List <XElement> childs = new List <XElement>();
            string          link   = "";

            if (!withoutLinks)
            {
                link = GetNodeLink(node);
            }
            if (link == "")
            {
                CoreferenceType parentType = GetCoreferenceType(node);
                foreach (XElement child in node.Elements(UnitTextType.ENTITY))
                {
                    CoreferenceType type = GetCoreferenceType(child);
                    if ((type == CoreferenceType.Organization) ||
                        (type == CoreferenceType.Unknown) ||
                        (type != parentType))
                    {
                        childs.Add(child);
                    }
                    else
                    {
                        childs.AddRange(GetNodeChilds(child, prevLink, withoutLinks));
                    }
                }
            }
            else
            {
                if (prevLink.Contains(link))
                {
                    List <XElement> childText = new List <XElement>();
                    foreach (string id in prevLink)
                    {
                        XElement linkNode;
                        if (GetLinkFromDictionary(id, out linkNode))
                        {
                            List <XElement> fullname = GetNodeChilds(linkNode, prevLink, true);
                            if (fullname.Count > childText.Count)
                            {
                                childText = fullname;
                            }
                        }
                    }
                    childs.AddRange(childText);
                }
                else
                {
                    prevLink.Add(link);
                    XElement linkNode;
                    if (GetLinkFromDictionary(link, out linkNode))
                    {
                        childs.AddRange(GetNodeChilds(linkNode, prevLink, withoutLinks));
                    }
                }
            }
            return(childs);
        }
예제 #3
0
        /// <summary>
        /// возвращает тип кореферентности
        /// </summary>
        /// <param name="node">сущность</param>
        /// <returns>тип кореферентности</returns>
        public CoreferenceType GetCoreferenceType(XElement node)
        {
            EntityType entityType = EntityType.Default;

            entityType = (EntityType)Enum.Parse(typeof(EntityType),
                                                node.Attribute(BlockAttribute.TYPE.ToString()).Value);

            SpecialUnknownProperTest(node, ref entityType);

            CoreferenceType type = CoreferenceType.Default;

            switch (entityType)
            {
            case EntityType.PersonName:
            case EntityType.PersonInfo:
            case EntityType.PersonNoInfo:
                type = CoreferenceType.Person;
                break;

            case EntityType.OrgName:
                type = CoreferenceType.Organization;
                break;

            case EntityType.OrgAbbr:
                type = CoreferenceType.Abbreviation;
                break;

            case EntityType.OrgNoInfo:
                type = CoreferenceType.WeakOrganization;
                break;

            case EntityType.Unknown:
            case EntityType.UnknownProper:
            case EntityType.NounVerb:
                type = CoreferenceType.Unknown;
                break;
            }
            return(type);
        }
예제 #4
0
        private bool NeedUpdateNode()
        {
            bool            allEqual   = true;
            Array           attributes = null;
            CoreferenceType type       = GetCoreferenceType(_CurrentNode);

            switch (type)
            {
            case CoreferenceType.Person:
                attributes = _PersonAttributes;
                break;

            case CoreferenceType.Unknown:
            case CoreferenceType.Organization:
            case CoreferenceType.WeakOrganization:
                attributes = _UsualAttributes;
                break;
            }

            if (attributes != null)
            {
                foreach (BlockAttribute attribute in attributes)
                {
                    string currentValue = _CurrentNode.AttributeUpper(attribute);
                    string compareValue = _CompareNode.AttributeUpper(attribute);
                    if (currentValue.IsEmpty() && compareValue.IsEmpty())
                    {
                        allEqual = false;
                    }
                }
            }
            if (allEqual)
            {
                allEqual = !NeedUpdateField(GetNodeChildTextUpper(_CurrentNode),
                                            GetNodeChildTextUpper(_CompareNode),
                                            ref allEqual);
            }

            if (!allEqual)
            {
                if (_CurrentNode.Descendants().Contains(_CompareNode) ||
                    _CompareNode.Descendants().Contains(_CurrentNode))
                {
                    allEqual = true;
                }
                else if (_CompareNode.IsContainAttribute(BlockAttribute.LINK))
                {
                    string link = _CompareNode.Attribute(BlockAttribute.LINK);
                    foreach (XElement parent in _CurrentNode.DescendantsAndSelf())
                    {
                        if (link.IsEqual(parent.Attribute(BlockAttribute.ID)) ||
                            link.IsEqual(parent.Attribute(BlockAttribute.LINK)))
                        {
                            allEqual = true;
                            break;
                        }
                    }
                }
            }

            if (!allEqual)
            {
                XElement currentFirstParent = GetFirstParent(_CurrentNode);
                XElement compareFirstParent = GetFirstParent(_CompareNode);
                if (currentFirstParent.Descendants().Count() == compareFirstParent.Descendants().Count())
                {
                    bool allExist = true;
                    foreach (XElement node in currentFirstParent.DescendantsAndSelf(UnitTextType.ENTITY.ToString()))
                    {
                        int result =
                            (from child in compareFirstParent.DescendantsAndSelf()
                             where child.Name.IsEqual(UnitTextType.ENTITY) &&
                             node.AttributeUpper(BlockAttribute.PROF).IsEqual(child.AttributeUpper(BlockAttribute.PROF)) &&
                             node.AttributeUpper(BlockAttribute.PRE).IsEqual(child.AttributeUpper(BlockAttribute.PRE))
                             select child).Count();
                        if (result == 0)
                        {
                            allExist = false;
                            break;
                        }
                    }
                    if (allExist)
                    {
                        allEqual = true;
                    }
                }
            }
            return(!allEqual);
        }
예제 #5
0
        private void Coreference(List <XElement> collection, List <XElement> entities, CoreferenceType type, bool isDownAnalyzing = true)
        {
            CoreferenceAlgorithm algorithm = null;

            switch (type)
            {
            case CoreferenceType.Abbreviation:
                algorithm = AbbreviationNodeCoreference;
                break;

            case CoreferenceType.Organization:
                algorithm = OrganizatioNodeCoreference;
                break;

            case CoreferenceType.Person:
                algorithm = PersonNodeCoreference;
                break;
            }
            if (algorithm == null)
            {
                return;
            }

            foreach (XElement entity in entities)
            {
                CoreferenceType corefType  = GetCoreferenceType(entity);
                bool            isGoodType = false;
                switch (type)
                {
                case CoreferenceType.Abbreviation:
                    isGoodType = (corefType == CoreferenceType.Abbreviation);
                    break;

                case CoreferenceType.Organization:
                    isGoodType = (corefType == CoreferenceType.Organization) || (corefType == CoreferenceType.WeakOrganization);
                    break;

                case CoreferenceType.Person:
                    isGoodType = (corefType == CoreferenceType.Person);
                    break;
                }
                if (isGoodType)
                {
                    _CurrentNode = entity;
                    string currentID = _CurrentNode.Attribute(BlockAttribute.ID);
                    int    index     = collection.IndexOf(entity);
                    bool   find      = false;
                    for (int j = index - 1; !find && (j >= 0); --j)
                    {
                        if (collection[j].Attribute(BlockAttribute.LINK) != currentID)
                        {
                            find = algorithm(collection[j]);
                        }
                    }
                    if (isDownAnalyzing && !find)
                    /// вверху ничего не нашли
                    {
                        for (int j = index + 1; j < collection.Count; ++j)
                        {
                            if (collection[j].Attribute(BlockAttribute.LINK) != currentID)
                            {
                                algorithm(collection[j]);
                            }
                        }
                    }
                }
            }
        }