/// <summary> /// Calculates the similarity for properties of NodeKind IRI. IRI properties could contain nested metadata, so these need to compared additionally. /// Otherwise just calculate the similarity literal based. /// </summary> /// <param name="metadataComparisonProperty">The current metadata property to compare in the given resources</param> /// <param name="resources">resources to compare</param> /// <returns>The similarity result for a specific property.</returns> private double CalculateIRISimilarity(MetadataComparisonProperty metadataComparisonProperty, Entity[] resources) { if (metadataComparisonProperty.NestedMetadata.IsNullOrEmpty()) { return(CalculateLiteralSimilarity(metadataComparisonProperty.Key, DataTypes.AnyUri, resources)); } else { return(CalculateNestedLiteralSimilarity(metadataComparisonProperty, resources)); } }
/// <summary> /// Calculates the differences for properties of NodeKind IRI. IRI properties could contain nested metadata, so these need to compared additionally. /// Otherwise just copy the resource property values to the result list. /// </summary> /// <param name="metadataComparisonProperty">The current metadata property to compare in the given resources</param> /// <param name="resources">resources to compare</param> /// <returns>The comparison result for a specific property. While the returned key is the metadata key, the list contains the compared properties of both resources.</returns> private IDictionary <string, IList <dynamic> > CalculateIRIDifference(MetadataComparisonProperty metadataComparisonProperty, params Entity[] resources) { if (metadataComparisonProperty.NestedMetadata.IsNullOrEmpty()) { return(ExtractIRIValues(metadataComparisonProperty, resources)); } else { return(CalculateIRINestedValuesDifference(metadataComparisonProperty, resources)); } }
public IList <MetadataComparisonProperty> GetComparisonMetadata(IEnumerable <MetadataComparisonConfigTypesDto> metadataComparisonConfigTypes) { if (metadataComparisonConfigTypes.IsNullOrEmpty()) { throw new ArgumentNullException(nameof(metadataComparisonConfigTypes), "No MetadataComparisonConfigTypes were given"); } if (metadataComparisonConfigTypes.Any(mcct => mcct.EntityTypes.IsNullOrEmpty())) { throw new ArgumentNullException(nameof(metadataComparisonConfigTypes), $"No entity types were given for MetadataComparisonConfigTypes"); } var mergedMetadata = new Dictionary <string, MetadataComparisonProperty>(); foreach (var configTypes in metadataComparisonConfigTypes) { foreach (var entityType in configTypes.EntityTypes) { var metadataList = GetMetadataForEntityTypeInConfig(entityType, configTypes.MetadataGraphConfigurationId); foreach (var metadata in metadataList) { if (mergedMetadata.TryGetValue(metadata.Key, out var comparisonProperty)) { if (!comparisonProperty.Properties.ContainsKey(entityType)) { comparisonProperty.Properties.Add(entityType, metadata.Properties); // Distributed endpoints are not different across different resource types. // Therefore, it is only necessary to ensure that the metadata of all endpoints are included in the metadata. var filteredNestedMetadata = metadata .NestedMetadata .Where(nm => comparisonProperty.NestedMetadata.Any(t => t.Key != nm.Key)); comparisonProperty.NestedMetadata.AddRange(filteredNestedMetadata); } } else { var properties = new Dictionary <string, IDictionary <string, dynamic> > { { entityType, metadata.Properties } }; var newComparisonProperty = new MetadataComparisonProperty(metadata.Key, properties, metadata.NestedMetadata); mergedMetadata.Add(metadata.Key, newComparisonProperty); } } } } return(OrderMetadata(mergedMetadata.Select(t => t.Value))); }
/// <summary> /// Calculates the combined similarity of all nested properties by first calculating individual similarities of each property and then combining them as an average value. /// </summary> /// <param name="metadataComparisonProperty">The current metadata property to compare in the given resources</param> /// <param name="resources">resources to compare</param> /// <returns>The similarity result for a specific property.</returns> private double CalculateNestedLiteralSimilarity(MetadataComparisonProperty metadataComparisonProperty, Entity[] resources) { double totalSimilarity = 0; try { var allFirstNestedEntities = resources[0].Properties[metadataComparisonProperty.Key].Select(x => ((Entity)x)).ToList(); var allSecondNestedEntities = resources[1].Properties[metadataComparisonProperty.Key].Select(x => ((Entity)x)).ToList(); foreach (var nestedMetadata in metadataComparisonProperty.NestedMetadata) { var typedFirstNestedEntities = allFirstNestedEntities.Where(n => n.Properties[RDF.Type].First() == nestedMetadata.Key).ToList(); var typedSecondNestedEntities = allSecondNestedEntities.Where(n => n.Properties[RDF.Type].First() == nestedMetadata.Key).ToList(); double similarity = 0; foreach (var nestedEntityA in typedFirstNestedEntities) { double maxLocalSimilarity = 0; foreach (var nestedEntityB in typedSecondNestedEntities) { double localSimilarity = 0; foreach (var metadataProperty in nestedMetadata.Properties) { try { localSimilarity += Calculate(metadataProperty, new Entity[] { nestedEntityA, nestedEntityB }); } catch (System.Exception ex) { } } localSimilarity = localSimilarity / nestedMetadata.Properties.Count; maxLocalSimilarity = Math.Max(maxLocalSimilarity, localSimilarity); } similarity += maxLocalSimilarity; } totalSimilarity += similarity; } return(totalSimilarity / Math.Max(allFirstNestedEntities.Count, allSecondNestedEntities.Count)); } catch (System.Exception ex) when(ex is KeyNotFoundException || ex is ArgumentNullException) { return(0); } }
public void ContainsOneDatatype_SingleResourceType_NoMetadata() { var properties = new Dictionary <string, IDictionary <string, dynamic> >(); var genericDatasetSubProbs = new Dictionary <string, dynamic>(); properties.Add(Graph.Metadata.Constants.Resource.Type.GenericDataset, genericDatasetSubProbs); var metadataComparisonPropertyUnderTest = new MetadataComparisonProperty(Graph.Metadata.Constants.Resource.HasLabel, properties, null); var result = metadataComparisonPropertyUnderTest.ContainsOneDatatype(out var resultNodeKind, out var resultDataType); Assert.False(result); Assert.Null(resultNodeKind); Assert.Null(resultDataType); }
public double Calculate(MetadataComparisonProperty metadataComparisonProperty, Entity[] resources) { if (metadataComparisonProperty.ContainsOneDatatype(out var nodeKind, out var dataType)) { if (nodeKind == Shacl.NodeKinds.IRI) { return(CalculateIRISimilarity(metadataComparisonProperty, resources)); } else if (nodeKind == Shacl.NodeKinds.Literal) { return(CalculateLiteralSimilarity(metadataComparisonProperty.Key, dataType, resources)); } } return(0); }
public IDictionary <string, IList <dynamic> > Calculate(MetadataComparisonProperty metadataComparisonProperty, Entity[] resources) { IDictionary <string, IList <dynamic> > props = new Dictionary <string, IList <dynamic> >(); if (metadataComparisonProperty.ContainsOneDatatype(out var nodeKind, out var _)) { if (nodeKind == Graph.Metadata.Constants.Shacl.NodeKinds.IRI) { props = CalculateIRIDifference(metadataComparisonProperty, resources); } else if (nodeKind == Graph.Metadata.Constants.Shacl.NodeKinds.Literal) { props = CalculateLiteralValuesDifference(metadataComparisonProperty.Key, resources); } } return(props); }
/// <summary> /// Checks the inner properties of MetadataComparisonProperty for NodeKind and DataType /// </summary> /// <param name="metadataComparisonProperty">The metadata properties for comparison</param> /// <param name="nodeKind">Nodekind of the current metadata, e.g. IRI or Literal</param> /// <param name="dataType">DataType of the current metadata, e.g. boolean or string</param> /// <returns></returns> public static bool ContainsOneDatatype(this MetadataComparisonProperty metadataComparisonProperty, out string nodeKind, out string dataType) { var rdfsTypes = new List <string>(); // First check if the current resource types in comparison contain different NodeKinds // e.g. shacl:Literal or shacl:IRI var nodeKinds = metadataComparisonProperty.Properties.Select(c => c.Value.GetValueOrNull(Graph.Metadata.Constants.Shacl.NodeKind, true)); if (nodeKinds.Any(nk => nk is IEnumerable <dynamic> enumNodeKinds)) { throw new Exception.Models.BusinessException("Given metadata is incorrect, only single node kinds allowed"); } if (IsNotSingleValue(nodeKinds)) { nodeKind = null; dataType = null; return(false); } // If we don't have different NodeKinds, check if the current resource types in comparison contain different data types. // Note: if the NodeKind is not shacl:Literal, the SHACL data type is not present in the resource types properties // e.g. rdfs:HTML, xmls:boolean, xmls:string var shaclDatatypes = metadataComparisonProperty.Properties.Select(c => c.Value.GetValueOrNull(Graph.Metadata.Constants.Shacl.Datatype, true)); if (shaclDatatypes.Any(dt => dt is IEnumerable <dynamic> enumDataTypes)) { throw new Exception.Models.BusinessException("Given metadata is incorrect, only single data types allowed"); } if (IsNotSingleValue(shaclDatatypes)) { nodeKind = nodeKinds.First().ToString(); dataType = null; return(true); } nodeKind = nodeKinds.First().ToString(); dataType = shaclDatatypes?.First().ToString(); return(true); }
/// <summary> /// Copies the valus from the result properties. /// </summary> /// <param name="metadataComparisonProperty">The current metadata property to compare in the given resources</param> /// <param name="resources">resources to compare</param> /// <exception cref="KeyNotFoundException">If both resources don't contain the key in the properties.</exception> /// <returns>The comparison result for a specific property. While the returned key is the metadata key, the list contains the compared properties of both resources.</returns> private IDictionary <string, IList <dynamic> > ExtractIRIValues(MetadataComparisonProperty metadataComparisonProperty, params Entity[] resources) { var propertyResults = new Dictionary <string, IList <dynamic> >(); if (resources[0].Properties.TryGetValue(metadataComparisonProperty.Key, out List <dynamic> firstValue)) { propertyResults.Add(resources[0].Id, firstValue); } if (resources[1].Properties.TryGetValue(metadataComparisonProperty.Key, out List <dynamic> secondValue)) { propertyResults.Add(resources[1].Id, secondValue); } if (propertyResults.Count == 0) { throw new KeyNotFoundException($"Key {metadataComparisonProperty.Key} not found in properties."); } return(propertyResults); }
/// <summary> /// Calculates for resource properties with NodeKind IRI and nested metadata the difference between all subproperties in the list. /// </summary> /// <param name="metadataComparisonProperty">The current metadata property to compare in the given resources</param> /// <param name="resources">resources to compare</param> /// <returns>The comparison result for a specific property. While the returned key is the metadata key, the list contains the compared properties of both resources.</returns> private IDictionary <string, IList <dynamic> > CalculateIRINestedValuesDifference(MetadataComparisonProperty metadataComparisonProperty, params Entity[] resources) { var propertyResults = new Dictionary <string, IList <dynamic> >(); var allFirstNestedEntities = resources[0].Properties.ContainsKey(metadataComparisonProperty.Key) ? resources[0].Properties[metadataComparisonProperty.Key]?.Select(x => ((Entity)x)).ToList() : null; var allSecondNestedEntities = resources[1].Properties.ContainsKey(metadataComparisonProperty.Key) ? resources[1].Properties[metadataComparisonProperty.Key]?.Select(x => ((Entity)x)).ToList() : null; if (allFirstNestedEntities != null) { propertyResults.Add(resources[0].Id, new List <dynamic>()); if (allSecondNestedEntities != null) { propertyResults.Add(resources[1].Id, new List <dynamic>()); } // Compare all nested properties that are present in both resources. // The existence of the nested properties is given by the fact that both nested properties have the same PID URI and // the same resource type, e.g. Browsable Resource foreach (var firstEntity in allFirstNestedEntities) { if (firstEntity.Properties.TryGetValue(Graph.Metadata.Constants.EnterpriseCore.PidUri, out List <dynamic> pidUriA)) { var secondEntity = allSecondNestedEntities?.SingleOrDefault(b => { if (b.Properties.TryGetValue(Graph.Metadata.Constants.EnterpriseCore.PidUri, out List <dynamic> pidUriB)) { if (((Entity)pidUriA.First()).Id != ((Entity)pidUriB.First()).Id) { return(false); } if (firstEntity.Properties.TryGetValue(Graph.Metadata.Constants.RDF.Type, out List <dynamic> typeA) && b.Properties.TryGetValue(Graph.Metadata.Constants.RDF.Type, out List <dynamic> typeB)) { if (typeA[0] == typeB[0]) { return(true); } } } return(false); }); // If the second resource has no nested property with the same PID URI, // then add the found property to the first resource, mark all sub properties as deleted // and continue with the next property if (secondEntity == null) { // TODO SL: set all entries in entityA to <diffDel> propertyResults[resources[0].Id].Add(firstEntity); continue; } // If both properties are found with the same PID URI and the same resource type, // compare all sub - properties using the resource metadata and calculate the differences if (firstEntity.Properties.TryGetValue(Graph.Metadata.Constants.RDF.Type, out List <dynamic> typeA)) { var metadata = metadataComparisonProperty.NestedMetadata.First(m => m.Key == typeA.First()); var firstComparedEntity = new Entity() { Id = firstEntity.Id }; var secondComparedEntity = new Entity() { Id = secondEntity.Id }; foreach (var prop in metadata.Properties) { try { if (prop.Key == Graph.Metadata.Constants.EnterpriseCore.PidUri) { firstComparedEntity.Properties.Add(prop.Key, firstEntity.Properties[Graph.Metadata.Constants.EnterpriseCore.PidUri]); secondComparedEntity.Properties.Add(prop.Key, secondEntity.Properties[Graph.Metadata.Constants.EnterpriseCore.PidUri]); continue; } var compareResult = CalculateLiteralValuesDifference(prop.Key, new Entity[] { firstEntity, secondEntity }); firstComparedEntity.Properties.Add(prop.Key, compareResult[firstEntity.Id].ToList()); secondComparedEntity.Properties.Add(prop.Key, compareResult[secondEntity.Id].ToList()); } catch (KeyNotFoundException) { } } propertyResults[resources[0].Id].Add(firstComparedEntity); propertyResults[resources[1].Id].Add(secondComparedEntity); } } } } // In the previous comparison it was ignored that there are properties of the second resource // that were not present in the first one. These must now be added and all sub properties must be marked as inserted. var secondEntitiesNotFoundInFirst = allSecondNestedEntities?.Where(b => { if (b.Properties.TryGetValue(Graph.Metadata.Constants.EnterpriseCore.PidUri, out List <dynamic> pidUriB)) { if (allFirstNestedEntities.Any(a => ((Entity)a.Properties[Graph.Metadata.Constants.EnterpriseCore.PidUri].First()).Id == ((Entity)pidUriB.First()).Id)) { return(false); } return(true); } return(false); }); if (secondEntitiesNotFoundInFirst != null) { // TODO SL: set all entries in entity to <diffIns> propertyResults[resources[1].Id].AddRange(secondEntitiesNotFoundInFirst); } return(propertyResults); }