/// <summary> /// Calculates the maximum distance for all values for the provided attribute /// using the provided ISimilarityMeasure /// </summary> /// <param name="attribute">The attribute to retrieve the maximum distance for</param> /// <param name="measure">The ISimilarityMeasure to use to calculate the distance</param> /// <returns>the maximum distance value for all values for the given attribute</returns> private double CalculateMaxDistance(Data.Attributes.Attribute attribute, ISimilarityMeasure measure) { // Ensure that the global collection contains the attribute if (!GlobalAttributeCollection.GetInstance(this.scope).ContainsAttribute(attribute)) { return(0); } double maxDistance = double.MinValue; List <string> values = new List <string>(GlobalAttributeCollection.GetInstance(this.scope).GetAttributeValues(attribute)); //TODO: IS THERE A MORE EFFICIENT METHOD FOR HANDLING THIS?? for (int i = 0; i < values.Count; i++) { // get the value for this item string a = values[i]; // Compare the current item to all other items in the list for (int j = 0; j < i; j++) { // get the value for this item string b = values[j]; double?distance = measure.CalculateDistance(a, b); if (distance != null) { maxDistance = Math.Max(maxDistance, (double)distance); } } } return(maxDistance); }
/// <summary> /// Calculates the maximum distance for all values for the provided attribute /// using the provided ISimilarityMeasure /// </summary> /// <param name="attribute">The attribute to retrieve the maximum distance for</param> /// <param name="measure">The ISimilarityMeasure to use to calculate the distance</param> /// <returns>the maximum distance value for all values for the given attribute</returns> private double CalculateMaxDistance(Data.Attributes.Attribute attribute, ISimilarityMeasure measure) { // Ensure that the global collection contains the attribute if (!GlobalAttributeCollection.GetInstance(this.scope).ContainsAttribute(attribute)) return 0; double maxDistance = double.MinValue; List<string> values = new List<string>(GlobalAttributeCollection.GetInstance(this.scope).GetAttributeValues(attribute)); //TODO: IS THERE A MORE EFFICIENT METHOD FOR HANDLING THIS?? for (int i = 0; i < values.Count; i++) { // get the value for this item string a = values[i]; // Compare the current item to all other items in the list for (int j = 0; j < i; j++) { // get the value for this item string b = values[j]; double? distance = measure.CalculateDistance(a, b); if (distance != null) maxDistance = Math.Max(maxDistance, (double)distance); } } return maxDistance; }
/// <summary> /// Returns a list of tuples that contain the calculated distances /// and the frequency of those distances /// </summary> /// <param name="attributeName">The name of the attribute that /// distances are being calculated for</param> /// <param name="measure">The similarity measure to be used</param> /// <returns>a collection of distances and the number of times /// that those distances occur</returns> private List<Tuple<double, int>> CalculateDistances(string attributeName, ISimilarityMeasure measure) { if (!GlobalAttributeCollection.GetInstance(this.scope).ContainsAttribute(attributeName)) return null; List<Tuple<double, int>> distances = new List<Tuple<double, int>>(); double frequencyTotal = 0; int nodeCount = 0; // Get the values for the attribute List<string> attributeValues = new List<string>(GlobalAttributeCollection.GetInstance(this.scope).GetAttributeValues(attributeName)); // Loop over all the attribute values for (int i = 0; i <= attributeValues.Count - 1; i++) { // Get the frequency at which the source attribute value occurrs int sourceFrequency = GlobalAttributeCollection.GetInstance(this.scope).GetFrequency(attributeName, attributeValues[i]); nodeCount += sourceFrequency; // Compare the current attribute value to all other // attribute values for (int j = 0; j < i; j++) { // Compute the distance for the two values double? distance = measure.CalculateDistance(attributeValues[i], attributeValues[j]); if (distance != null) { // Get the frequency at which the target attribute value occurrs int targetFrequency = GlobalAttributeCollection.GetInstance(this.scope).GetFrequency(attributeName, attributeValues[j]); distances.Add(Tuple.Create<double, int>(distance.Value, sourceFrequency * targetFrequency)); // Keep a running total of the frequencies frequencyTotal += sourceFrequency * targetFrequency; } } } // Since we only loop over unique attribute values we never make // the comparisons against nodes where the values would be the // same. We need to determine if this case has occurred and insert // the appropriate number of zero distance items. // Use binomial function to determine the number of possible combinations //double combinations = (MathUtils.LogFactorial(nodeCount) / (2 * (MathUtils.LogFactorial(nodeCount - 2)))); double combinations = Math.Exp(MathUtils.LogFactorial(nodeCount) - MathUtils.LogFactorial(2) - MathUtils.LogFactorial(nodeCount - 2)); // Add in all the zero distance items that we need for (int i = 1; i <= combinations - frequencyTotal; i++) { distances.Add(Tuple.Create<double, int>(0, 1)); } //foreach (Tuple<double, int> distanceCount in distances) //{ // System.Diagnostics.Debug.WriteLine("[{0},{1}]", distanceCount.Item1, distanceCount.Item2); //} return distances; }
/// <summary> /// Returns a list of tuples that contain the calculated distances /// and the frequency of those distances /// </summary> /// <param name="attributeName">The name of the attribute that /// distances are being calculated for</param> /// <param name="measure">The similarity measure to be used</param> /// <returns>a collection of distances and the number of times /// that those distances occur</returns> private List <Tuple <double, int> > CalculateDistances(string attributeName, ISimilarityMeasure measure) { if (!GlobalAttributeCollection.GetInstance(this.scope).ContainsAttribute(attributeName)) { return(null); } List <Tuple <double, int> > distances = new List <Tuple <double, int> >(); double frequencyTotal = 0; int nodeCount = 0; // Get the values for the attribute List <string> attributeValues = new List <string>(GlobalAttributeCollection.GetInstance(this.scope).GetAttributeValues(attributeName)); // Loop over all the attribute values for (int i = 0; i <= attributeValues.Count - 1; i++) { // Get the frequency at which the source attribute value occurrs int sourceFrequency = GlobalAttributeCollection.GetInstance(this.scope).GetFrequency(attributeName, attributeValues[i]); nodeCount += sourceFrequency; // Compare the current attribute value to all other // attribute values for (int j = 0; j < i; j++) { // Compute the distance for the two values double?distance = measure.CalculateDistance(attributeValues[i], attributeValues[j]); if (distance != null) { // Get the frequency at which the target attribute value occurrs int targetFrequency = GlobalAttributeCollection.GetInstance(this.scope).GetFrequency(attributeName, attributeValues[j]); distances.Add(Tuple.Create <double, int>(distance.Value, sourceFrequency * targetFrequency)); // Keep a running total of the frequencies frequencyTotal += sourceFrequency * targetFrequency; } } } // Since we only loop over unique attribute values we never make // the comparisons against nodes where the values would be the // same. We need to determine if this case has occurred and insert // the appropriate number of zero distance items. // Use binomial function to determine the number of possible combinations //double combinations = (MathUtils.LogFactorial(nodeCount) / (2 * (MathUtils.LogFactorial(nodeCount - 2)))); double combinations = Math.Exp(MathUtils.LogFactorial(nodeCount) - MathUtils.LogFactorial(2) - MathUtils.LogFactorial(nodeCount - 2)); // Add in all the zero distance items that we need for (int i = 1; i <= combinations - frequencyTotal; i++) { distances.Add(Tuple.Create <double, int>(0, 1)); } //foreach (Tuple<double, int> distanceCount in distances) //{ // System.Diagnostics.Debug.WriteLine("[{0},{1}]", distanceCount.Item1, distanceCount.Item2); //} return(distances); }