/// возвращает все листовые теги /// под листовыми тегами понимаются такие, у которых все дети прошли разрешение кореферентности private bool GetLeafNodes(XElement parent, List <XElement> list) { bool isLeaf = true; foreach (XElement node in parent.Elements(UnitTextType.ENTITY)) { if (node.IsContainAttribute(BlockAttribute.FULLNAME)) { continue; } List <XElement> childrenLeaf = new List <XElement>(); isLeaf = GetLeafNodes(node, childrenLeaf); CoreferenceType type = GetCoreferenceType(node); foreach (XElement elem in childrenLeaf) { CoreferenceType elemType = GetCoreferenceType(elem); if (((type == CoreferenceType.Person) && (elemType == type)) || ((type == CoreferenceType.Unknown) && (elemType != type)) || (type == CoreferenceType.Default)) { continue; } list.Add(elem); isLeaf = false; } if (isLeaf) { list.Add(node); } } return(isLeaf); }
private List <XElement> GetNodeChilds(XElement node, List <string> prevLink, bool withoutLinks) { List <XElement> childs = new List <XElement>(); string link = ""; if (!withoutLinks) { link = GetNodeLink(node); } if (link == "") { CoreferenceType parentType = GetCoreferenceType(node); foreach (XElement child in node.Elements(UnitTextType.ENTITY)) { CoreferenceType type = GetCoreferenceType(child); if ((type == CoreferenceType.Organization) || (type == CoreferenceType.Unknown) || (type != parentType)) { childs.Add(child); } else { childs.AddRange(GetNodeChilds(child, prevLink, withoutLinks)); } } } else { if (prevLink.Contains(link)) { List <XElement> childText = new List <XElement>(); foreach (string id in prevLink) { XElement linkNode; if (GetLinkFromDictionary(id, out linkNode)) { List <XElement> fullname = GetNodeChilds(linkNode, prevLink, true); if (fullname.Count > childText.Count) { childText = fullname; } } } childs.AddRange(childText); } else { prevLink.Add(link); XElement linkNode; if (GetLinkFromDictionary(link, out linkNode)) { childs.AddRange(GetNodeChilds(linkNode, prevLink, withoutLinks)); } } } return(childs); }
/// <summary> /// возвращает тип кореферентности /// </summary> /// <param name="node">сущность</param> /// <returns>тип кореферентности</returns> public CoreferenceType GetCoreferenceType(XElement node) { EntityType entityType = EntityType.Default; entityType = (EntityType)Enum.Parse(typeof(EntityType), node.Attribute(BlockAttribute.TYPE.ToString()).Value); SpecialUnknownProperTest(node, ref entityType); CoreferenceType type = CoreferenceType.Default; switch (entityType) { case EntityType.PersonName: case EntityType.PersonInfo: case EntityType.PersonNoInfo: type = CoreferenceType.Person; break; case EntityType.OrgName: type = CoreferenceType.Organization; break; case EntityType.OrgAbbr: type = CoreferenceType.Abbreviation; break; case EntityType.OrgNoInfo: type = CoreferenceType.WeakOrganization; break; case EntityType.Unknown: case EntityType.UnknownProper: case EntityType.NounVerb: type = CoreferenceType.Unknown; break; } return(type); }
private bool NeedUpdateNode() { bool allEqual = true; Array attributes = null; CoreferenceType type = GetCoreferenceType(_CurrentNode); switch (type) { case CoreferenceType.Person: attributes = _PersonAttributes; break; case CoreferenceType.Unknown: case CoreferenceType.Organization: case CoreferenceType.WeakOrganization: attributes = _UsualAttributes; break; } if (attributes != null) { foreach (BlockAttribute attribute in attributes) { string currentValue = _CurrentNode.AttributeUpper(attribute); string compareValue = _CompareNode.AttributeUpper(attribute); if (currentValue.IsEmpty() && compareValue.IsEmpty()) { allEqual = false; } } } if (allEqual) { allEqual = !NeedUpdateField(GetNodeChildTextUpper(_CurrentNode), GetNodeChildTextUpper(_CompareNode), ref allEqual); } if (!allEqual) { if (_CurrentNode.Descendants().Contains(_CompareNode) || _CompareNode.Descendants().Contains(_CurrentNode)) { allEqual = true; } else if (_CompareNode.IsContainAttribute(BlockAttribute.LINK)) { string link = _CompareNode.Attribute(BlockAttribute.LINK); foreach (XElement parent in _CurrentNode.DescendantsAndSelf()) { if (link.IsEqual(parent.Attribute(BlockAttribute.ID)) || link.IsEqual(parent.Attribute(BlockAttribute.LINK))) { allEqual = true; break; } } } } if (!allEqual) { XElement currentFirstParent = GetFirstParent(_CurrentNode); XElement compareFirstParent = GetFirstParent(_CompareNode); if (currentFirstParent.Descendants().Count() == compareFirstParent.Descendants().Count()) { bool allExist = true; foreach (XElement node in currentFirstParent.DescendantsAndSelf(UnitTextType.ENTITY.ToString())) { int result = (from child in compareFirstParent.DescendantsAndSelf() where child.Name.IsEqual(UnitTextType.ENTITY) && node.AttributeUpper(BlockAttribute.PROF).IsEqual(child.AttributeUpper(BlockAttribute.PROF)) && node.AttributeUpper(BlockAttribute.PRE).IsEqual(child.AttributeUpper(BlockAttribute.PRE)) select child).Count(); if (result == 0) { allExist = false; break; } } if (allExist) { allEqual = true; } } } return(!allEqual); }
private void Coreference(List <XElement> collection, List <XElement> entities, CoreferenceType type, bool isDownAnalyzing = true) { CoreferenceAlgorithm algorithm = null; switch (type) { case CoreferenceType.Abbreviation: algorithm = AbbreviationNodeCoreference; break; case CoreferenceType.Organization: algorithm = OrganizatioNodeCoreference; break; case CoreferenceType.Person: algorithm = PersonNodeCoreference; break; } if (algorithm == null) { return; } foreach (XElement entity in entities) { CoreferenceType corefType = GetCoreferenceType(entity); bool isGoodType = false; switch (type) { case CoreferenceType.Abbreviation: isGoodType = (corefType == CoreferenceType.Abbreviation); break; case CoreferenceType.Organization: isGoodType = (corefType == CoreferenceType.Organization) || (corefType == CoreferenceType.WeakOrganization); break; case CoreferenceType.Person: isGoodType = (corefType == CoreferenceType.Person); break; } if (isGoodType) { _CurrentNode = entity; string currentID = _CurrentNode.Attribute(BlockAttribute.ID); int index = collection.IndexOf(entity); bool find = false; for (int j = index - 1; !find && (j >= 0); --j) { if (collection[j].Attribute(BlockAttribute.LINK) != currentID) { find = algorithm(collection[j]); } } if (isDownAnalyzing && !find) /// вверху ничего не нашли { for (int j = index + 1; j < collection.Count; ++j) { if (collection[j].Attribute(BlockAttribute.LINK) != currentID) { algorithm(collection[j]); } } } } } }