예제 #1
0
        /// <summary>
        /// Returns the similarity of the two specified values for the given
        /// attribute name.  This method uses various methods to determine the
        /// most appropriate Similarity Measure that should be used.
        /// </summary>
        /// <param name="attributeName">The name of the attribute to measure similarity for</param>
        /// <param name="sourceValue">The source value for similarity measurement</param>
        /// <param name="targetValue">The target value for similarity measurement</param>
        /// <returns>the calculated similarity for the given attribute</returns>
        public double?ComputeAttributeSimilarity(string attributeName, string sourceValue, string targetValue)
        {
            ISimilarityMeasure measure = null;

            // Check if we have already identified an appropriate
            // Similarity Instance
            if (attributeToMeasure.ContainsKey(attributeName))
            {
                // Get the pre-determined similarity measure
                measure = SimilarityMeasures[attributeToMeasure[attributeName]];
            }
            else
            {
                // Try and get the perferred Similarity Measure
                measure = GetPrefferedSimilarityMeasure(attributeName);

                // If we couldn't get a preferred Similarity Measure, try and
                // determine the most appropriate one to use
                if (measure == null)
                {
                    measure = DetermineSimilarityMeasure(attributeName, sourceValue, targetValue);
                }

                // We are going to cache the determined Similarity Measure
                this.attributeToMeasure.Add(attributeName, SimilarityMeasures.IndexOf(measure));
            }

            // Call the main method to get the similarity value
            return(ComputeAttributeSimilarity(attributeName, sourceValue, targetValue, measure));
        }
예제 #2
0
 public KMeansLocalSearch(DataMining.Data.Dataset dataset, int maxIterations, ISimilarityMeasure similarityMeasure, ClusteringQualityEvaluator evaluator)
 {
     this._dataset                 = dataset;
     this._maxIterations           = maxIterations;
     this.SolutionQualityEvaluator = evaluator;
     this._similarityMeasure       = similarityMeasure;
 }
예제 #3
0
        /// <summary>
        /// Calculates the maximum distance for all values for the provided attribute
        /// name using the provided ISimilarityMeasure
        /// </summary>
        /// <param name="attributeName">The attribute name to retrieve the maximum distance for</param>
        /// <param name="measure">The ISimilarityMeasure to use to calculate the distance</param>
        /// <returns>the maximum distance value for all values for the given attribute</returns>
        private double CalculateMaxDistance(string attributeName, ISimilarityMeasure measure)
        {
            // Get the Attribute instance based on the provided attribute name
            Data.Attributes.Attribute attributeFound = GlobalAttributeCollection.GetInstance(this.scope).GetAttributes().Where(attribute => attribute.Name == attributeName).FirstOrDefault();

            return(CalculateMaxDistance(attributeFound, measure));
        }
예제 #4
0
        /// <summary>
        /// Calculates the maximum distance for all values for the provided attribute
        /// using the provided ISimilarityMeasure
        /// </summary>
        /// <param name="attribute">The attribute to retrieve the maximum distance for</param>
        /// <param name="measure">The ISimilarityMeasure to use to calculate the distance</param>
        /// <returns>the maximum distance value for all values for the given attribute</returns>
        private double CalculateMaxDistance(Data.Attributes.Attribute attribute, ISimilarityMeasure measure)
        {
            // Ensure that the global collection contains the attribute
            if (!GlobalAttributeCollection.GetInstance(this.scope).ContainsAttribute(attribute))
            {
                return(0);
            }

            double maxDistance = double.MinValue;

            List <string> values = new List <string>(GlobalAttributeCollection.GetInstance(this.scope).GetAttributeValues(attribute));

            //TODO:  IS THERE A MORE EFFICIENT METHOD FOR HANDLING THIS??
            for (int i = 0; i < values.Count; i++)
            {
                // get the value for this item
                string a = values[i];

                // Compare the current item to all other items in the list
                for (int j = 0; j < i; j++)
                {
                    // get the value for this item
                    string b        = values[j];
                    double?distance = measure.CalculateDistance(a, b);

                    if (distance != null)
                    {
                        maxDistance = Math.Max(maxDistance, (double)distance);
                    }
                }
            }

            return(maxDistance);
        }
예제 #5
0
파일: KMeans.cs 프로젝트: skn123/iFourmi
 public KMeans(Dataset dataset, int clustersNumber, ISimilarityMeasure similarityMeasure, int maxtIterations, bool fireEvents)
 {
     this._dataset           = dataset;
     this._clustersNumber    = clustersNumber;
     this._maxIterations     = maxtIterations;
     this._similarityMeasure = similarityMeasure;
     this._fireEvents        = fireEvents;
 }
예제 #6
0
 public ClusterBMN(int clustersNumber, ISimilarityMeasure similarityMeasure, IClassificationQualityMeasure classificationMeasure, IClusteringAlgorithm clusteringAlgorithm, IClassificationAlgorithm classificationAlgorithm)
 {
     this._similarityMeasure       = similarityMeasure;
     this._clustersNumber          = clustersNumber;
     this._classificationMeasure   = classificationMeasure;
     this._clusteringAlgorithm     = clusteringAlgorithm;
     this._classificationAlgorithm = classificationAlgorithm;
 }
예제 #7
0
 public ClusterBMN(DataMining.Data.Dataset trainingset, int clustersNumber, ISimilarityMeasure similarityMeasure, IClassificationQualityMeasure classificationMeasure, IClusteringAlgorithm clusteringAlgorithm, IClassificationAlgorithm classificationAlgorithm)
 {
     this._trainingset             = trainingset;
     this._similarityMeasure       = similarityMeasure;
     this._clustersNumber          = clustersNumber;
     this._classificationMeasure   = classificationMeasure;
     this._clusteringAlgorithm     = clusteringAlgorithm;
     this._classificationAlgorithm = classificationAlgorithm;
 }
예제 #8
0
        public SimilarityRetriever(ISimilarityMeasure measure, int threshold)
        {
            if (threshold < 1)
            {
                throw new ArgumentException("Case threshold should be greater than 0.", nameof(threshold));
            }

            this.threshold = threshold;
            this.measure   = measure ?? throw new ArgumentNullException(nameof(measure));
        }
예제 #9
0
 /// <summary>
 ///     Creates a new <see cref="EvolutionaryDistanceCalculator{TProgram,TOutput}" /> with the given arguments.
 /// </summary>
 /// <param name="similarityMeasure">The similarity measure to guide the transformation search process.</param>
 /// <param name="primitiveSet">The primitives used for the crossover operator.</param>
 /// <param name="crossovers">The crossover operators to generate programs during the search.</param>
 /// <param name="mutations">The mutation operators to generate programs during the search.</param>
 public EvolutionaryDistanceCalculator(
     ISimilarityMeasure <TProgram> similarityMeasure,
     PrimitiveSet <TProgram> primitiveSet,
     IEnumerable <ICrossoverOperator <TProgram> > crossovers,
     IEnumerable <IMutationOperator <TProgram> > mutations)
 {
     this._similarityMeasure = similarityMeasure;
     this._primitiveSet      = primitiveSet;
     this._crossovers        = crossovers;
     this._mutations         = mutations;
 }
예제 #10
0
 public ClusteringSolution(Dataset dataset, int clustersNumber, ISimilarityMeasure similarityMeasure, double[,] proximityMatrix)
 {
     this._dataset           = dataset;
     this.ProximityMatrix    = proximityMatrix;
     this._similarityMeasure = similarityMeasure;
     this._clusters          = new Cluster[clustersNumber];
     for (int index = 0; index < this._clusters.Length; index++)
     {
         this._clusters[index] = new Cluster(this, index);
     }
     this._belongingness = new int[this._dataset.Size];
 }
예제 #11
0
        /// <summary>
        /// Constructs a valid and appropriate AttributeSimilarityDescriptor
        /// for this criterion.  If this criterion is not valid, null is
        /// returned.
        /// </summary>
        /// <returns></returns>
        public AttributeSimilarityDescriptor GetSimilarityDescriptor()
        {
            // Validate the control
            Validate();

            // Check if the this AttributeSimilarityCriterionViewModel instance
            // is valid.  Valid means that all required input controls have values.
            if (IsValid && IsActive)
            {
                ISimilarityMeasure selectedMeasure = null;
                double             currentWeight   = 0.0;

                // Check if the tool is in simple mode or not
                if (currentMode == ClusteringToolMode.Simple)
                {
                    // Check if 'Exact' is selected
                    if (UseExactSimilarity)
                    {
                        // Set the similarity measure to an ExactMatchSimilarityMeasure
                        selectedMeasure = AttributeSimilarityManager.Instance.GetSimilarityMeasureInstance(typeof(Berico.SnagL.Infrastructure.Similarity.ExactMatchSimilarityMeasure).FullName);
                    }
                    else
                    {
                        // Set the similarity measure to the default for
                        // the selected
                        selectedMeasure = AttributeSimilarityManager.Instance.GetDefaultSimilarityMeasure(SelectedAttribute);
                    }

                    // Set current weight to 1 (no weight)
                    currentWeight = 1d;
                }
                else
                {
                    // Set the similarity measure to the currently selected one
                    selectedMeasure = SelectedSimilarityMeasure;

                    // Set the current weight to the weight set on the slider
                    currentWeight = Weight / 100d;
                }

                // Create a new AttributeSimilarityDescriptor instance using
                // the values from the input controls
                return(new AttributeSimilarityDescriptor(SelectedAttribute,
                                                         selectedMeasure,
                                                         currentWeight));
            }
            else
            {
                // Return null if this AttributeSimilarityDescriptor instance
                // was invalid
                return(null);
            }
        }
예제 #12
0
        /// <summary>
        /// Calculates the mean for all distances (over all values) for the given
        /// attributes and using the specified similarity measure
        /// </summary>
        /// <param name="attributeName">The attribute to compute the standard
        /// deviation for</param>
        /// <param name="measure">The similarity measure to be used</param>
        /// <returns>the mean for all distances over all attribute
        /// values for the given attribute and similarity measure</returns>
        private double CalculateDistanceMean(string attributeName, ISimilarityMeasure measure)
        {
            // Get all the distances
            List <Tuple <double, int> > distanceValues = CalculateDistances(attributeName, measure);

            if (distanceValues == null || distanceValues.Count == 0)
            {
                return(0);
            }

            // Calculate and return the mean value
            return(distanceValues.Mean());
        }
예제 #13
0
        /// <summary>
        /// Returns the standard deviation for all distances (over all values) for the
        /// given attribute and using the specified similarity measure.  This method
        /// attempts to use the cached value.  If one doesn't exist, it is calculated.
        /// </summary>
        /// <param name="attributeName">The attribute to compute the standard
        /// deviation for</param>
        /// <param name="measure">The similarity measure to be used</param>
        /// <returns>the standard deviation for all distances over all attribute
        /// values for the given attribute and similarity measure</returns>
        public double GetDistanceStandardDeviation(string attributeName, ISimilarityMeasure measure)
        {
            Tuple <string, string> tuple = Tuple.Create(attributeName, measure.ToString());

            // Ensure that the standard deviation distance value isn't already computed
            if (!this.sdCache.ContainsKey(tuple))
            {
                // Calculate the distance standard deviation
                this.sdCache[tuple] = CalculateDistanceSD(attributeName, measure);
            }

            // Returns the cached standard deviation value
            return(this.sdCache[tuple]);
        }
예제 #14
0
        /// <summary>
        /// Returns the maximum distance for all values for the provided attribute
        /// using the provided ISimilarityMeasure
        /// </summary>
        /// <param name="attributeName">The name of the attribute to retrieve the maximum distance for</param>
        /// <param name="measure">The ISimilarityMeasure to use to calculate the distance</param>
        /// <returns>the maximum distance value for all values for the given attribute</returns>
        public double GetMaxDistance(string attributeName, ISimilarityMeasure measure)
        {
            Tuple <string, string> tuple = Tuple.Create(attributeName, measure.ToString());

            // Ensure that the maximum distance isn't already cached
            if (!this.distancesCache.ContainsKey(tuple))
            {
                // Get the maximum distance calculation, for the provided
                // attribute and measure, and cache it.
                this.distancesCache[tuple] = CalculateMaxDistance(attributeName, measure);
            }

            return(this.distancesCache[tuple]);
        }
예제 #15
0
        /// <summary>
        /// Calulates the standard deviation for all distances (over all values) for the
        /// given attribute and using the specified similarity measure
        /// </summary>
        /// <param name="attributeName">The attribute to compute the standard
        /// deviation for</param>
        /// <param name="measure">The similarity measure to be used</param>
        /// <returns>the standard deviation for all distances over all attribute
        /// values for the given attribute and similarity measure</returns>
        private double CalculateDistanceSD(string attributeName, ISimilarityMeasure measure)
        {
            // Get all the distances
            List <Tuple <double, int> > distanceValues = CalculateDistances(attributeName, measure);

            if (distanceValues == null || distanceValues.Count == 0)
            {
                return(0);
            }

            // To compute SD, we need the mean
            double mean = GetDistanceMean(attributeName, measure);

            // Calculate and return the standard deviation
            return(distanceValues.StandardDeviation(mean));
        }
예제 #16
0
        /// <summary>
        /// Returns the appropriate similarity measure for the provided attributes and values
        /// </summary>
        /// <param name="attributeName">The name of the attribute</param>
        /// <param name="sourceValue">The source value</param>
        /// <param name="targetValue">The target value</param>
        /// <returns>the appropriate similarity measure for the given attribute and values</returns>
        private ISimilarityMeasure DetermineSimilarityMeasure(string attributeName, string sourceValue, String targetValue)
        {
            ISimilarityMeasure measure = null;

            // Determine if we have already done the work of determing
            // the default similarity measure to be used
            if (attributeToMeasure.ContainsKey(attributeName))
            {
                // If the key was found, then we have already determined
                // the default similarity measure and can retrieve and
                // return that
                return(SimilarityMeasures[attributeToMeasure[attributeName]]);
            }
            else
            {
                // Determine and retrieve the instance of the appropriate
                // Similarity Measure for the given parameters
                if (IsNumber(sourceValue, targetValue))
                {
                    measure = GetSimilarityMeasureInstance(typeof(NumericSimilarityMeasure).FullName);
                }
                else if (IsDate(sourceValue, targetValue))
                {
                    measure = GetSimilarityMeasureInstance(typeof(DateTimeSimilarityMeasure).FullName);
                }
                else if (GeoCoordinate.IsValid(sourceValue) && GeoCoordinate.IsValid(targetValue))
                {
                    measure = GetSimilarityMeasureInstance(typeof(GeospatialSimilarityMeasure).FullName);
                }
                else
                {
                    measure = GetSimilarityMeasureInstance(typeof(LevenshteinDistanceStringSimilarityMeasure).FullName);
                }
            }

            //if (measure != null)
            //{
            // Cache the attribute name and corresponding index to similarity measure instance
            //this.attributeToMeasure.Add(attributeName, SimilarityMeasures.Value.IndexOf(measure));
            //}

            return(measure);
        }
예제 #17
0
        /// <summary>
        /// Returns an ISimilarityMeasure from the MEF maintained collection based
        /// on the specified FQN
        /// </summary>
        /// <param name="attributeType">A string containing the FQN of an ISimilarityMeasure class</param>
        /// <returns>an ISimilarityMeasure isntance maintained by MEF; otherwise null</returns>
        public ISimilarityMeasure GetSimilarityMeasureInstance(string attributeType)
        {
            // Get the type for the preferred similarity measure
            Type type = Type.GetType(attributeType);

            //if (SimilarityMeasures.IsValueCreated)
            //{
            ISimilarityMeasure measureFound = this.SimilarityMeasures.FirstOrDefault(measure => measure.GetType() == type);

            // Ensure that the found Similarity Measure isn't null
            if (measureFound == null)
            {
                return(null);
            }

            // Return the retrieved instance of the preferred similarity measure
            return(measureFound);
            //}
            //else
            //    return null;
        }
예제 #18
0
        /// <summary>
        /// Returns an instance of the default SimilarityMeasure for
        /// the provided attribute.  This method does not check the
        /// cache because the cache contains the currently associated
        /// Similarity Measure, which is not neccessairly the default.
        /// </summary>
        /// <param name="attributeName">The name of the targetAttribute</param>
        /// <returns>the default ISimilarityMeasure that should be used for the provided attribute</returns>
        public ISimilarityMeasure GetDefaultSimilarityMeasure(string attributeName)
        {
            // Validate parameter
            if (string.IsNullOrEmpty(attributeName))
            {
                throw new ArgumentNullException("AttributeName", "No valid attribute name was provided");
            }

            ISimilarityMeasure defaultMeasure = null;

            // Attempt to get the default similairy measure using the assigned
            // preffered similarity measure
            defaultMeasure = GetPrefferedSimilarityMeasure(attributeName);

            // If we have a good measure, we can return it
            if (defaultMeasure != null)
            {
                return(defaultMeasure);
            }

            // No preffered similarity measure is set so we will determine
            // the default by analyzing the type of data stored in the attribute
            string firstValue  = GlobalAttributeCollection.GetInstance(this.scope).GetAttributeValues(attributeName).FirstOrDefault();
            string secondValue = GlobalAttributeCollection.GetInstance(this.scope).GetAttributeValues(attributeName).LastOrDefault();

            if (string.IsNullOrEmpty(firstValue) || string.IsNullOrEmpty(secondValue))
            {
                return(GetSimilarityMeasureInstance(typeof(LevenshteinDistanceStringSimilarityMeasure).FullName));
            }

            // Determine the default by analyzing data stored in the specified
            // attribute
            defaultMeasure = DetermineSimilarityMeasure(attributeName, firstValue, secondValue);

            return(defaultMeasure);
        }
예제 #19
0
        /// <summary>
        /// Calulates the standard deviation for all distances (over all values) for the
        /// given attribute and using the specified similarity measure
        /// </summary>
        /// <param name="attributeName">The attribute to compute the standard
        /// deviation for</param>
        /// <param name="measure">The similarity measure to be used</param>
        /// <returns>the standard deviation for all distances over all attribute
        /// values for the given attribute and similarity measure</returns>
        private double CalculateDistanceSD(string attributeName, ISimilarityMeasure measure)
        {
            // Get all the distances
            List<Tuple<double, int>> distanceValues = CalculateDistances(attributeName, measure);

            if (distanceValues == null || distanceValues.Count ==0)
                return 0;

            // To compute SD, we need the mean
            double mean = GetDistanceMean(attributeName, measure);

            // Calculate and return the standard deviation
            return distanceValues.StandardDeviation(mean);
        }
예제 #20
0
        /// <summary>
        /// Returns a list of tuples that contain the calculated distances
        /// and the frequency of those distances
        /// </summary>
        /// <param name="attributeName">The name of the attribute that
        /// distances are being calculated for</param>
        /// <param name="measure">The similarity measure to be used</param>
        /// <returns>a collection of distances and the number of times
        /// that those distances occur</returns>
        private List<Tuple<double, int>> CalculateDistances(string attributeName, ISimilarityMeasure measure)
        {
            if (!GlobalAttributeCollection.GetInstance(this.scope).ContainsAttribute(attributeName))
                return null;

            List<Tuple<double, int>> distances = new List<Tuple<double, int>>();
            double frequencyTotal = 0;
            int nodeCount = 0;

            // Get the values for the attribute
            List<string> attributeValues = new List<string>(GlobalAttributeCollection.GetInstance(this.scope).GetAttributeValues(attributeName));

            // Loop over all the attribute values
            for (int i = 0; i <= attributeValues.Count - 1; i++)
            {
                // Get the frequency at which the source attribute value occurrs
                int sourceFrequency = GlobalAttributeCollection.GetInstance(this.scope).GetFrequency(attributeName, attributeValues[i]);

                nodeCount += sourceFrequency;

                // Compare the current attribute value to all other
                // attribute values
                for (int j = 0; j < i; j++)
                {
                    // Compute the distance for the two values
                    double? distance = measure.CalculateDistance(attributeValues[i], attributeValues[j]);

                    if (distance != null)
                    {
                        // Get the frequency at which the target attribute value occurrs
                        int targetFrequency = GlobalAttributeCollection.GetInstance(this.scope).GetFrequency(attributeName, attributeValues[j]);

                        distances.Add(Tuple.Create<double, int>(distance.Value, sourceFrequency * targetFrequency));

                        // Keep a running total of the frequencies
                        frequencyTotal += sourceFrequency * targetFrequency;
                     }
                }
            }

            // Since we only loop over unique attribute values we never make
            // the comparisons against nodes where the values would be the
            // same.  We need to determine if this case has occurred and insert
            // the appropriate number of zero distance items.

            // Use binomial function to determine the number of possible combinations
            //double combinations = (MathUtils.LogFactorial(nodeCount) / (2 * (MathUtils.LogFactorial(nodeCount - 2))));
            double combinations = Math.Exp(MathUtils.LogFactorial(nodeCount) - MathUtils.LogFactorial(2) - MathUtils.LogFactorial(nodeCount - 2));

            // Add in all the zero distance items that we need
            for (int i = 1; i <= combinations - frequencyTotal; i++)
            {
                distances.Add(Tuple.Create<double, int>(0, 1));
            }

            //foreach (Tuple<double, int> distanceCount in distances)
            //{
            //    System.Diagnostics.Debug.WriteLine("[{0},{1}]", distanceCount.Item1, distanceCount.Item2);
            //}

            return distances;
        }
예제 #21
0
        /// <summary>
        /// Calculates the mean for all distances (over all values) for the given
        /// attributes and using the specified similarity measure
        /// </summary>
        /// <param name="attributeName">The attribute to compute the standard
        /// deviation for</param>
        /// <param name="measure">The similarity measure to be used</param>
        /// <returns>the mean for all distances over all attribute
        /// values for the given attribute and similarity measure</returns>
        private double CalculateDistanceMean(string attributeName, ISimilarityMeasure measure)
        {
            // Get all the distances
            List<Tuple<double, int>> distanceValues = CalculateDistances(attributeName, measure);

            if (distanceValues == null || distanceValues.Count == 0)
                return 0;

            // Calculate and return the mean value
            return distanceValues.Mean();
        }
예제 #22
0
        /// <summary>
        /// Returns the maximum distance for all values for the provided attribute
        /// using the provided ISimilarityMeasure
        /// </summary>
        /// <param name="attributeName">The name of the attribute to retrieve the maximum distance for</param>
        /// <param name="measure">The ISimilarityMeasure to use to calculate the distance</param>
        /// <returns>the maximum distance value for all values for the given attribute</returns>
        public double GetMaxDistance(string attributeName, ISimilarityMeasure measure)
        {
            Tuple<string, string> tuple = Tuple.Create(attributeName, measure.ToString());

            // Ensure that the maximum distance isn't already cached
            if (!this.distancesCache.ContainsKey(tuple))
            {
                // Get the maximum distance calculation, for the provided
                // attribute and measure, and cache it.
                this.distancesCache[tuple] = CalculateMaxDistance(attributeName, measure);
            }

            return this.distancesCache[tuple];
        }
예제 #23
0
        /// <summary>
        /// Returns a list of tuples that contain the calculated distances
        /// and the frequency of those distances
        /// </summary>
        /// <param name="attributeName">The name of the attribute that
        /// distances are being calculated for</param>
        /// <param name="measure">The similarity measure to be used</param>
        /// <returns>a collection of distances and the number of times
        /// that those distances occur</returns>
        private List <Tuple <double, int> > CalculateDistances(string attributeName, ISimilarityMeasure measure)
        {
            if (!GlobalAttributeCollection.GetInstance(this.scope).ContainsAttribute(attributeName))
            {
                return(null);
            }

            List <Tuple <double, int> > distances = new List <Tuple <double, int> >();
            double frequencyTotal = 0;
            int    nodeCount      = 0;

            // Get the values for the attribute
            List <string> attributeValues = new List <string>(GlobalAttributeCollection.GetInstance(this.scope).GetAttributeValues(attributeName));

            // Loop over all the attribute values
            for (int i = 0; i <= attributeValues.Count - 1; i++)
            {
                // Get the frequency at which the source attribute value occurrs
                int sourceFrequency = GlobalAttributeCollection.GetInstance(this.scope).GetFrequency(attributeName, attributeValues[i]);

                nodeCount += sourceFrequency;

                // Compare the current attribute value to all other
                // attribute values
                for (int j = 0; j < i; j++)
                {
                    // Compute the distance for the two values
                    double?distance = measure.CalculateDistance(attributeValues[i], attributeValues[j]);

                    if (distance != null)
                    {
                        // Get the frequency at which the target attribute value occurrs
                        int targetFrequency = GlobalAttributeCollection.GetInstance(this.scope).GetFrequency(attributeName, attributeValues[j]);

                        distances.Add(Tuple.Create <double, int>(distance.Value, sourceFrequency * targetFrequency));

                        // Keep a running total of the frequencies
                        frequencyTotal += sourceFrequency * targetFrequency;
                    }
                }
            }

            // Since we only loop over unique attribute values we never make
            // the comparisons against nodes where the values would be the
            // same.  We need to determine if this case has occurred and insert
            // the appropriate number of zero distance items.

            // Use binomial function to determine the number of possible combinations
            //double combinations = (MathUtils.LogFactorial(nodeCount) / (2 * (MathUtils.LogFactorial(nodeCount - 2))));
            double combinations = Math.Exp(MathUtils.LogFactorial(nodeCount) - MathUtils.LogFactorial(2) - MathUtils.LogFactorial(nodeCount - 2));

            // Add in all the zero distance items that we need
            for (int i = 1; i <= combinations - frequencyTotal; i++)
            {
                distances.Add(Tuple.Create <double, int>(0, 1));
            }

            //foreach (Tuple<double, int> distanceCount in distances)
            //{
            //    System.Diagnostics.Debug.WriteLine("[{0},{1}]", distanceCount.Item1, distanceCount.Item2);
            //}

            return(distances);
        }
예제 #24
0
        /// <summary>
        /// Calculates the maximum distance for all values for the provided attribute
        /// name using the provided ISimilarityMeasure
        /// </summary>
        /// <param name="attributeName">The attribute name to retrieve the maximum distance for</param>
        /// <param name="measure">The ISimilarityMeasure to use to calculate the distance</param>
        /// <returns>the maximum distance value for all values for the given attribute</returns>
        private double CalculateMaxDistance(string attributeName, ISimilarityMeasure measure)
        {
            // Get the Attribute instance based on the provided attribute name
            Data.Attributes.Attribute attributeFound = GlobalAttributeCollection.GetInstance(this.scope).GetAttributes().Where(attribute => attribute.Name == attributeName).FirstOrDefault();

            return CalculateMaxDistance(attributeFound, measure);
        }
예제 #25
0
        /// <summary>
        /// Calculates the maximum distance for all values for the provided attribute
        /// using the provided ISimilarityMeasure
        /// </summary>
        /// <param name="attribute">The attribute to retrieve the maximum distance for</param>
        /// <param name="measure">The ISimilarityMeasure to use to calculate the distance</param>
        /// <returns>the maximum distance value for all values for the given attribute</returns>
        private double CalculateMaxDistance(Data.Attributes.Attribute attribute, ISimilarityMeasure measure)
        {
            // Ensure that the global collection contains the attribute
            if (!GlobalAttributeCollection.GetInstance(this.scope).ContainsAttribute(attribute))
                return 0;

            double maxDistance = double.MinValue;

            List<string> values = new List<string>(GlobalAttributeCollection.GetInstance(this.scope).GetAttributeValues(attribute));

            //TODO:  IS THERE A MORE EFFICIENT METHOD FOR HANDLING THIS??
            for (int i = 0; i < values.Count; i++)
            {
                // get the value for this item
                string a = values[i];

                // Compare the current item to all other items in the list
                for (int j = 0; j < i; j++)
                {
                    // get the value for this item
                    string b = values[j];
                    double? distance = measure.CalculateDistance(a, b);

                    if (distance != null)
                        maxDistance = Math.Max(maxDistance, (double)distance);

                }
            }

            return maxDistance;
        }
예제 #26
0
        public static ClusteringSolution CreateKMeansClusters(int seed, Dataset dataset, int clustersNumber, ISimilarityMeasure similarityMeasure, int maxIterations, bool fireEvents)
        {
            DataMining.Utilities.RandomUtility.Initialize(seed);
            KMeans kmeans = new KMeans(dataset, clustersNumber, similarityMeasure, maxIterations, fireEvents);

            kmeans.Initialize();
            kmeans.OnPostIteration += new EventHandler(kmeans_OnPostIteration);
            return(kmeans.CreateClusters());
        }
예제 #27
0
        public static BayesianClusterMultinetClassifier CreateClusteringBMNClassifier(int seed, int clusterNumber, Dataset dataset, ISimilarityMeasure similarityMeasure, IClassificationQualityMeasure accuracy, IClusteringAlgorithm algorithm, IClassificationAlgorithm naive, bool fireEvents)
        {
            DataMining.Utilities.RandomUtility.Initialize(seed);
            if (fireEvents)
            {
                if (algorithm is ACOClustering_IB)
                {
                    ((ACOClustering_IB)algorithm).OnPostColonyIteration += new EventHandler(antClustering_OnPostColonyIteration);
                }
                if (algorithm is ACOClustering_MB)
                {
                    ((ACOClustering_MB)algorithm).OnPostColonyIteration += new EventHandler(antClustering_OnPostColonyIteration);
                }
            }

            ClusterBMN cBMN = new ClusterBMN(dataset, clusterNumber, similarityMeasure, accuracy, algorithm, naive);

            return(cBMN.CreateClassifier() as BayesianClusterMultinetClassifier);
        }
예제 #28
0
 /// <summary>
 /// Returns the similarity of the two specified values for the given
 /// attribute name.  This method uses various methods to determine the
 /// most appropriate Similarity Measure that should be used.
 /// </summary>
 /// <param name="attributeName">The name of the attribute to measure similarity for</param>
 /// <param name="sourceValue">The source value for similarity measurement</param>
 /// <param name="targetValue">The target value for similarity measurement</param>
 /// <param name="measure"></param>
 /// <returns>the calculated similarity for the given attribute</returns>
 public double?ComputeAttributeSimilarity(string attributeName, string sourceValue, string targetValue, ISimilarityMeasure measure)
 {
     // Execute the similarity measure and return the results
     return(measure.MeasureSimilarity(attributeName, sourceValue, targetValue));
 }
예제 #29
0
 /// <summary>
 /// Creaes a new instance of AttributeSimilarityDescriptor using
 /// the provided property values
 /// </summary>
 public AttributeSimilarityDescriptor(string attributeName, ISimilarityMeasure similarityMeasure, double weight)
 {
     AttributeName     = attributeName;
     SimilarityMeasure = similarityMeasure;
     Weight            = weight;
 }
예제 #30
0
 /// <summary>
 /// Returns the similarity of the two specified values for the given
 /// attribute name.  This method uses various methods to determine the
 /// most appropriate Similarity Measure that should be used.
 /// </summary>
 /// <param name="attributeName">The name of the attribute to measure similarity for</param>
 /// <param name="sourceValue">The source value for similarity measurement</param>
 /// <param name="targetValue">The target value for similarity measurement</param>
 /// <param name="measure"></param>
 /// <returns>the calculated similarity for the given attribute</returns>
 public double? ComputeAttributeSimilarity(string attributeName, string sourceValue, string targetValue, ISimilarityMeasure measure)
 {
     // Execute the similarity measure and return the results
     return measure.MeasureSimilarity(attributeName, sourceValue, targetValue);
 }
예제 #31
0
        public static ClusteringSolution CreateACOClusters_MB(int seed, Dataset dataset, int clustersNumber, ISimilarityMeasure similarityMeasure, int maxIterations, int colonySize, int convergenceIterations, bool fireEvents, bool performLocalSearch)
        {
            DataMining.Utilities.RandomUtility.Initialize(seed);
            DefaultHeuristicCalculator <int> calculator  = new DefaultHeuristicCalculator <int>();
            ClusteringMBInvalidator          invalidator = new ClusteringMBInvalidator();

            DataMining.ProximityMeasures.IClusteringQualityMeasure measure = new CohesionClusteringMeasure();
            ClusteringQualityEvaluator cohesionEvaluator = new ClusteringQualityEvaluator(measure);
            KMeansLocalSearch          localSearch       = new KMeansLocalSearch(dataset, 1, similarityMeasure, cohesionEvaluator);

            ACO.ProblemSpecifics.ISolutionQualityEvaluator <int> evaluator = new ClusteringQualityEvaluator(measure);
            Problem <int> problem = new Problem <int>(invalidator, calculator, evaluator, localSearch);


            ACOClustering_MB antClustering = new ACOClustering_MB(maxIterations, colonySize, convergenceIterations, problem, clustersNumber, similarityMeasure, dataset, performLocalSearch);

            antClustering.OnPostColonyIteration += new EventHandler(antClustering_OnPostColonyIteration);

            return(antClustering.CreateClusters());
        }
예제 #32
0
        /// <summary>
        /// Returns the standard deviation for all distances (over all values) for the
        /// given attribute and using the specified similarity measure.  This method
        /// attempts to use the cached value.  If one doesn't exist, it is calculated.
        /// </summary>
        /// <param name="attributeName">The attribute to compute the standard
        /// deviation for</param>
        /// <param name="measure">The similarity measure to be used</param>
        /// <returns>the standard deviation for all distances over all attribute
        /// values for the given attribute and similarity measure</returns>
        public double GetDistanceStandardDeviation(string attributeName, ISimilarityMeasure measure)
        {
            Tuple<string, string> tuple = Tuple.Create(attributeName, measure.ToString());

            // Ensure that the standard deviation distance value isn't already computed
            if (!this.sdCache.ContainsKey(tuple))
            {
                // Calculate the distance standard deviation
                this.sdCache[tuple] = CalculateDistanceSD(attributeName, measure);
            }

            // Returns the cached standard deviation value
            return this.sdCache[tuple];
        }
예제 #33
0
        public static BayesianClusterMultinetClassifier CreateAntClustBMNClassifier_MB(int seed, Dataset dataset, int maxIterations, int colonySize, int convergence, int clustersNumber, ISimilarityMeasure similarityMeasure, IClassificationQualityMeasure classificationMeasure, IClassificationAlgorithm algorithm, bool fireEvents)
        {
            DataMining.Utilities.RandomUtility.Initialize(seed);
            DefaultHeuristicCalculator <int>         calculator  = new DefaultHeuristicCalculator <int>();
            ClusteringMBInvalidator                  invalidator = new ClusteringMBInvalidator();
            ClusteringClassificationQualityEvaluator evaluator   = new ClusteringClassificationQualityEvaluator(classificationMeasure, algorithm);

            evaluator.Dataset = dataset;
            KMeansLocalSearch localSearch = new KMeansLocalSearch(dataset, 1, similarityMeasure, evaluator);
            Problem <int>     problem     = new Problem <int>(invalidator, calculator, evaluator, localSearch);

            AntClustBMN_MB antClustBMN = new AntClustBMN_MB(maxIterations, colonySize, convergence, problem, clustersNumber, similarityMeasure, dataset, true, algorithm, classificationMeasure);

            antClustBMN.OnPostColonyIteration += new EventHandler(antClustering_OnPostColonyIteration);
            return(antClustBMN.CreateClassifier() as BayesianClusterMultinetClassifier);
        }
예제 #34
0
 /// <summary>
 /// Creaes a new instance of AttributeSimilarityDescriptor using
 /// the provided property values
 /// </summary>
 public AttributeSimilarityDescriptor(string attributeName, ISimilarityMeasure similarityMeasure, double weight)
 {
     AttributeName = attributeName;
     SimilarityMeasure = similarityMeasure;
     Weight = weight;
 }