예제 #1
0
        /// <summary>
        /// Calculates the maximum distance for all values for the provided attribute
        /// using the provided ISimilarityMeasure
        /// </summary>
        /// <param name="attribute">The attribute to retrieve the maximum distance for</param>
        /// <param name="measure">The ISimilarityMeasure to use to calculate the distance</param>
        /// <returns>the maximum distance value for all values for the given attribute</returns>
        private double CalculateMaxDistance(Data.Attributes.Attribute attribute, ISimilarityMeasure measure)
        {
            // Ensure that the global collection contains the attribute
            if (!GlobalAttributeCollection.GetInstance(this.scope).ContainsAttribute(attribute))
            {
                return(0);
            }

            double maxDistance = double.MinValue;

            List <string> values = new List <string>(GlobalAttributeCollection.GetInstance(this.scope).GetAttributeValues(attribute));

            //TODO:  IS THERE A MORE EFFICIENT METHOD FOR HANDLING THIS??
            for (int i = 0; i < values.Count; i++)
            {
                // get the value for this item
                string a = values[i];

                // Compare the current item to all other items in the list
                for (int j = 0; j < i; j++)
                {
                    // get the value for this item
                    string b        = values[j];
                    double?distance = measure.CalculateDistance(a, b);

                    if (distance != null)
                    {
                        maxDistance = Math.Max(maxDistance, (double)distance);
                    }
                }
            }

            return(maxDistance);
        }
예제 #2
0
        /// <summary>
        /// Calculates the maximum distance for all values for the provided attribute
        /// using the provided ISimilarityMeasure
        /// </summary>
        /// <param name="attribute">The attribute to retrieve the maximum distance for</param>
        /// <param name="measure">The ISimilarityMeasure to use to calculate the distance</param>
        /// <returns>the maximum distance value for all values for the given attribute</returns>
        private double CalculateMaxDistance(Data.Attributes.Attribute attribute, ISimilarityMeasure measure)
        {
            // Ensure that the global collection contains the attribute
            if (!GlobalAttributeCollection.GetInstance(this.scope).ContainsAttribute(attribute))
                return 0;

            double maxDistance = double.MinValue;

            List<string> values = new List<string>(GlobalAttributeCollection.GetInstance(this.scope).GetAttributeValues(attribute));

            //TODO:  IS THERE A MORE EFFICIENT METHOD FOR HANDLING THIS??
            for (int i = 0; i < values.Count; i++)
            {
                // get the value for this item
                string a = values[i];

                // Compare the current item to all other items in the list
                for (int j = 0; j < i; j++)
                {
                    // get the value for this item
                    string b = values[j];
                    double? distance = measure.CalculateDistance(a, b);

                    if (distance != null)
                        maxDistance = Math.Max(maxDistance, (double)distance);

                }
            }

            return maxDistance;
        }
예제 #3
0
        /// <summary>
        /// Returns a list of tuples that contain the calculated distances
        /// and the frequency of those distances
        /// </summary>
        /// <param name="attributeName">The name of the attribute that
        /// distances are being calculated for</param>
        /// <param name="measure">The similarity measure to be used</param>
        /// <returns>a collection of distances and the number of times
        /// that those distances occur</returns>
        private List<Tuple<double, int>> CalculateDistances(string attributeName, ISimilarityMeasure measure)
        {
            if (!GlobalAttributeCollection.GetInstance(this.scope).ContainsAttribute(attributeName))
                return null;

            List<Tuple<double, int>> distances = new List<Tuple<double, int>>();
            double frequencyTotal = 0;
            int nodeCount = 0;

            // Get the values for the attribute
            List<string> attributeValues = new List<string>(GlobalAttributeCollection.GetInstance(this.scope).GetAttributeValues(attributeName));

            // Loop over all the attribute values
            for (int i = 0; i <= attributeValues.Count - 1; i++)
            {
                // Get the frequency at which the source attribute value occurrs
                int sourceFrequency = GlobalAttributeCollection.GetInstance(this.scope).GetFrequency(attributeName, attributeValues[i]);

                nodeCount += sourceFrequency;

                // Compare the current attribute value to all other
                // attribute values
                for (int j = 0; j < i; j++)
                {
                    // Compute the distance for the two values
                    double? distance = measure.CalculateDistance(attributeValues[i], attributeValues[j]);

                    if (distance != null)
                    {
                        // Get the frequency at which the target attribute value occurrs
                        int targetFrequency = GlobalAttributeCollection.GetInstance(this.scope).GetFrequency(attributeName, attributeValues[j]);

                        distances.Add(Tuple.Create<double, int>(distance.Value, sourceFrequency * targetFrequency));

                        // Keep a running total of the frequencies
                        frequencyTotal += sourceFrequency * targetFrequency;
                     }
                }
            }

            // Since we only loop over unique attribute values we never make
            // the comparisons against nodes where the values would be the
            // same.  We need to determine if this case has occurred and insert
            // the appropriate number of zero distance items.

            // Use binomial function to determine the number of possible combinations
            //double combinations = (MathUtils.LogFactorial(nodeCount) / (2 * (MathUtils.LogFactorial(nodeCount - 2))));
            double combinations = Math.Exp(MathUtils.LogFactorial(nodeCount) - MathUtils.LogFactorial(2) - MathUtils.LogFactorial(nodeCount - 2));

            // Add in all the zero distance items that we need
            for (int i = 1; i <= combinations - frequencyTotal; i++)
            {
                distances.Add(Tuple.Create<double, int>(0, 1));
            }

            //foreach (Tuple<double, int> distanceCount in distances)
            //{
            //    System.Diagnostics.Debug.WriteLine("[{0},{1}]", distanceCount.Item1, distanceCount.Item2);
            //}

            return distances;
        }
예제 #4
0
        /// <summary>
        /// Returns a list of tuples that contain the calculated distances
        /// and the frequency of those distances
        /// </summary>
        /// <param name="attributeName">The name of the attribute that
        /// distances are being calculated for</param>
        /// <param name="measure">The similarity measure to be used</param>
        /// <returns>a collection of distances and the number of times
        /// that those distances occur</returns>
        private List <Tuple <double, int> > CalculateDistances(string attributeName, ISimilarityMeasure measure)
        {
            if (!GlobalAttributeCollection.GetInstance(this.scope).ContainsAttribute(attributeName))
            {
                return(null);
            }

            List <Tuple <double, int> > distances = new List <Tuple <double, int> >();
            double frequencyTotal = 0;
            int    nodeCount      = 0;

            // Get the values for the attribute
            List <string> attributeValues = new List <string>(GlobalAttributeCollection.GetInstance(this.scope).GetAttributeValues(attributeName));

            // Loop over all the attribute values
            for (int i = 0; i <= attributeValues.Count - 1; i++)
            {
                // Get the frequency at which the source attribute value occurrs
                int sourceFrequency = GlobalAttributeCollection.GetInstance(this.scope).GetFrequency(attributeName, attributeValues[i]);

                nodeCount += sourceFrequency;

                // Compare the current attribute value to all other
                // attribute values
                for (int j = 0; j < i; j++)
                {
                    // Compute the distance for the two values
                    double?distance = measure.CalculateDistance(attributeValues[i], attributeValues[j]);

                    if (distance != null)
                    {
                        // Get the frequency at which the target attribute value occurrs
                        int targetFrequency = GlobalAttributeCollection.GetInstance(this.scope).GetFrequency(attributeName, attributeValues[j]);

                        distances.Add(Tuple.Create <double, int>(distance.Value, sourceFrequency * targetFrequency));

                        // Keep a running total of the frequencies
                        frequencyTotal += sourceFrequency * targetFrequency;
                    }
                }
            }

            // Since we only loop over unique attribute values we never make
            // the comparisons against nodes where the values would be the
            // same.  We need to determine if this case has occurred and insert
            // the appropriate number of zero distance items.

            // Use binomial function to determine the number of possible combinations
            //double combinations = (MathUtils.LogFactorial(nodeCount) / (2 * (MathUtils.LogFactorial(nodeCount - 2))));
            double combinations = Math.Exp(MathUtils.LogFactorial(nodeCount) - MathUtils.LogFactorial(2) - MathUtils.LogFactorial(nodeCount - 2));

            // Add in all the zero distance items that we need
            for (int i = 1; i <= combinations - frequencyTotal; i++)
            {
                distances.Add(Tuple.Create <double, int>(0, 1));
            }

            //foreach (Tuple<double, int> distanceCount in distances)
            //{
            //    System.Diagnostics.Debug.WriteLine("[{0},{1}]", distanceCount.Item1, distanceCount.Item2);
            //}

            return(distances);
        }