/// <summary> /// Returns the similarity of the two specified values for the given /// attribute name. This method uses various methods to determine the /// most appropriate Similarity Measure that should be used. /// </summary> /// <param name="attributeName">The name of the attribute to measure similarity for</param> /// <param name="sourceValue">The source value for similarity measurement</param> /// <param name="targetValue">The target value for similarity measurement</param> /// <returns>the calculated similarity for the given attribute</returns> public double?ComputeAttributeSimilarity(string attributeName, string sourceValue, string targetValue) { ISimilarityMeasure measure = null; // Check if we have already identified an appropriate // Similarity Instance if (attributeToMeasure.ContainsKey(attributeName)) { // Get the pre-determined similarity measure measure = SimilarityMeasures[attributeToMeasure[attributeName]]; } else { // Try and get the perferred Similarity Measure measure = GetPrefferedSimilarityMeasure(attributeName); // If we couldn't get a preferred Similarity Measure, try and // determine the most appropriate one to use if (measure == null) { measure = DetermineSimilarityMeasure(attributeName, sourceValue, targetValue); } // We are going to cache the determined Similarity Measure this.attributeToMeasure.Add(attributeName, SimilarityMeasures.IndexOf(measure)); } // Call the main method to get the similarity value return(ComputeAttributeSimilarity(attributeName, sourceValue, targetValue, measure)); }
public KMeansLocalSearch(DataMining.Data.Dataset dataset, int maxIterations, ISimilarityMeasure similarityMeasure, ClusteringQualityEvaluator evaluator) { this._dataset = dataset; this._maxIterations = maxIterations; this.SolutionQualityEvaluator = evaluator; this._similarityMeasure = similarityMeasure; }
/// <summary> /// Calculates the maximum distance for all values for the provided attribute /// name using the provided ISimilarityMeasure /// </summary> /// <param name="attributeName">The attribute name to retrieve the maximum distance for</param> /// <param name="measure">The ISimilarityMeasure to use to calculate the distance</param> /// <returns>the maximum distance value for all values for the given attribute</returns> private double CalculateMaxDistance(string attributeName, ISimilarityMeasure measure) { // Get the Attribute instance based on the provided attribute name Data.Attributes.Attribute attributeFound = GlobalAttributeCollection.GetInstance(this.scope).GetAttributes().Where(attribute => attribute.Name == attributeName).FirstOrDefault(); return(CalculateMaxDistance(attributeFound, measure)); }
/// <summary> /// Calculates the maximum distance for all values for the provided attribute /// using the provided ISimilarityMeasure /// </summary> /// <param name="attribute">The attribute to retrieve the maximum distance for</param> /// <param name="measure">The ISimilarityMeasure to use to calculate the distance</param> /// <returns>the maximum distance value for all values for the given attribute</returns> private double CalculateMaxDistance(Data.Attributes.Attribute attribute, ISimilarityMeasure measure) { // Ensure that the global collection contains the attribute if (!GlobalAttributeCollection.GetInstance(this.scope).ContainsAttribute(attribute)) { return(0); } double maxDistance = double.MinValue; List <string> values = new List <string>(GlobalAttributeCollection.GetInstance(this.scope).GetAttributeValues(attribute)); //TODO: IS THERE A MORE EFFICIENT METHOD FOR HANDLING THIS?? for (int i = 0; i < values.Count; i++) { // get the value for this item string a = values[i]; // Compare the current item to all other items in the list for (int j = 0; j < i; j++) { // get the value for this item string b = values[j]; double?distance = measure.CalculateDistance(a, b); if (distance != null) { maxDistance = Math.Max(maxDistance, (double)distance); } } } return(maxDistance); }
public KMeans(Dataset dataset, int clustersNumber, ISimilarityMeasure similarityMeasure, int maxtIterations, bool fireEvents) { this._dataset = dataset; this._clustersNumber = clustersNumber; this._maxIterations = maxtIterations; this._similarityMeasure = similarityMeasure; this._fireEvents = fireEvents; }
public ClusterBMN(int clustersNumber, ISimilarityMeasure similarityMeasure, IClassificationQualityMeasure classificationMeasure, IClusteringAlgorithm clusteringAlgorithm, IClassificationAlgorithm classificationAlgorithm) { this._similarityMeasure = similarityMeasure; this._clustersNumber = clustersNumber; this._classificationMeasure = classificationMeasure; this._clusteringAlgorithm = clusteringAlgorithm; this._classificationAlgorithm = classificationAlgorithm; }
public ClusterBMN(DataMining.Data.Dataset trainingset, int clustersNumber, ISimilarityMeasure similarityMeasure, IClassificationQualityMeasure classificationMeasure, IClusteringAlgorithm clusteringAlgorithm, IClassificationAlgorithm classificationAlgorithm) { this._trainingset = trainingset; this._similarityMeasure = similarityMeasure; this._clustersNumber = clustersNumber; this._classificationMeasure = classificationMeasure; this._clusteringAlgorithm = clusteringAlgorithm; this._classificationAlgorithm = classificationAlgorithm; }
public SimilarityRetriever(ISimilarityMeasure measure, int threshold) { if (threshold < 1) { throw new ArgumentException("Case threshold should be greater than 0.", nameof(threshold)); } this.threshold = threshold; this.measure = measure ?? throw new ArgumentNullException(nameof(measure)); }
/// <summary> /// Creates a new <see cref="EvolutionaryDistanceCalculator{TProgram,TOutput}" /> with the given arguments. /// </summary> /// <param name="similarityMeasure">The similarity measure to guide the transformation search process.</param> /// <param name="primitiveSet">The primitives used for the crossover operator.</param> /// <param name="crossovers">The crossover operators to generate programs during the search.</param> /// <param name="mutations">The mutation operators to generate programs during the search.</param> public EvolutionaryDistanceCalculator( ISimilarityMeasure <TProgram> similarityMeasure, PrimitiveSet <TProgram> primitiveSet, IEnumerable <ICrossoverOperator <TProgram> > crossovers, IEnumerable <IMutationOperator <TProgram> > mutations) { this._similarityMeasure = similarityMeasure; this._primitiveSet = primitiveSet; this._crossovers = crossovers; this._mutations = mutations; }
public ClusteringSolution(Dataset dataset, int clustersNumber, ISimilarityMeasure similarityMeasure, double[,] proximityMatrix) { this._dataset = dataset; this.ProximityMatrix = proximityMatrix; this._similarityMeasure = similarityMeasure; this._clusters = new Cluster[clustersNumber]; for (int index = 0; index < this._clusters.Length; index++) { this._clusters[index] = new Cluster(this, index); } this._belongingness = new int[this._dataset.Size]; }
/// <summary> /// Constructs a valid and appropriate AttributeSimilarityDescriptor /// for this criterion. If this criterion is not valid, null is /// returned. /// </summary> /// <returns></returns> public AttributeSimilarityDescriptor GetSimilarityDescriptor() { // Validate the control Validate(); // Check if the this AttributeSimilarityCriterionViewModel instance // is valid. Valid means that all required input controls have values. if (IsValid && IsActive) { ISimilarityMeasure selectedMeasure = null; double currentWeight = 0.0; // Check if the tool is in simple mode or not if (currentMode == ClusteringToolMode.Simple) { // Check if 'Exact' is selected if (UseExactSimilarity) { // Set the similarity measure to an ExactMatchSimilarityMeasure selectedMeasure = AttributeSimilarityManager.Instance.GetSimilarityMeasureInstance(typeof(Berico.SnagL.Infrastructure.Similarity.ExactMatchSimilarityMeasure).FullName); } else { // Set the similarity measure to the default for // the selected selectedMeasure = AttributeSimilarityManager.Instance.GetDefaultSimilarityMeasure(SelectedAttribute); } // Set current weight to 1 (no weight) currentWeight = 1d; } else { // Set the similarity measure to the currently selected one selectedMeasure = SelectedSimilarityMeasure; // Set the current weight to the weight set on the slider currentWeight = Weight / 100d; } // Create a new AttributeSimilarityDescriptor instance using // the values from the input controls return(new AttributeSimilarityDescriptor(SelectedAttribute, selectedMeasure, currentWeight)); } else { // Return null if this AttributeSimilarityDescriptor instance // was invalid return(null); } }
/// <summary> /// Calculates the mean for all distances (over all values) for the given /// attributes and using the specified similarity measure /// </summary> /// <param name="attributeName">The attribute to compute the standard /// deviation for</param> /// <param name="measure">The similarity measure to be used</param> /// <returns>the mean for all distances over all attribute /// values for the given attribute and similarity measure</returns> private double CalculateDistanceMean(string attributeName, ISimilarityMeasure measure) { // Get all the distances List <Tuple <double, int> > distanceValues = CalculateDistances(attributeName, measure); if (distanceValues == null || distanceValues.Count == 0) { return(0); } // Calculate and return the mean value return(distanceValues.Mean()); }
/// <summary> /// Returns the standard deviation for all distances (over all values) for the /// given attribute and using the specified similarity measure. This method /// attempts to use the cached value. If one doesn't exist, it is calculated. /// </summary> /// <param name="attributeName">The attribute to compute the standard /// deviation for</param> /// <param name="measure">The similarity measure to be used</param> /// <returns>the standard deviation for all distances over all attribute /// values for the given attribute and similarity measure</returns> public double GetDistanceStandardDeviation(string attributeName, ISimilarityMeasure measure) { Tuple <string, string> tuple = Tuple.Create(attributeName, measure.ToString()); // Ensure that the standard deviation distance value isn't already computed if (!this.sdCache.ContainsKey(tuple)) { // Calculate the distance standard deviation this.sdCache[tuple] = CalculateDistanceSD(attributeName, measure); } // Returns the cached standard deviation value return(this.sdCache[tuple]); }
/// <summary> /// Returns the maximum distance for all values for the provided attribute /// using the provided ISimilarityMeasure /// </summary> /// <param name="attributeName">The name of the attribute to retrieve the maximum distance for</param> /// <param name="measure">The ISimilarityMeasure to use to calculate the distance</param> /// <returns>the maximum distance value for all values for the given attribute</returns> public double GetMaxDistance(string attributeName, ISimilarityMeasure measure) { Tuple <string, string> tuple = Tuple.Create(attributeName, measure.ToString()); // Ensure that the maximum distance isn't already cached if (!this.distancesCache.ContainsKey(tuple)) { // Get the maximum distance calculation, for the provided // attribute and measure, and cache it. this.distancesCache[tuple] = CalculateMaxDistance(attributeName, measure); } return(this.distancesCache[tuple]); }
/// <summary> /// Calulates the standard deviation for all distances (over all values) for the /// given attribute and using the specified similarity measure /// </summary> /// <param name="attributeName">The attribute to compute the standard /// deviation for</param> /// <param name="measure">The similarity measure to be used</param> /// <returns>the standard deviation for all distances over all attribute /// values for the given attribute and similarity measure</returns> private double CalculateDistanceSD(string attributeName, ISimilarityMeasure measure) { // Get all the distances List <Tuple <double, int> > distanceValues = CalculateDistances(attributeName, measure); if (distanceValues == null || distanceValues.Count == 0) { return(0); } // To compute SD, we need the mean double mean = GetDistanceMean(attributeName, measure); // Calculate and return the standard deviation return(distanceValues.StandardDeviation(mean)); }
/// <summary> /// Returns the appropriate similarity measure for the provided attributes and values /// </summary> /// <param name="attributeName">The name of the attribute</param> /// <param name="sourceValue">The source value</param> /// <param name="targetValue">The target value</param> /// <returns>the appropriate similarity measure for the given attribute and values</returns> private ISimilarityMeasure DetermineSimilarityMeasure(string attributeName, string sourceValue, String targetValue) { ISimilarityMeasure measure = null; // Determine if we have already done the work of determing // the default similarity measure to be used if (attributeToMeasure.ContainsKey(attributeName)) { // If the key was found, then we have already determined // the default similarity measure and can retrieve and // return that return(SimilarityMeasures[attributeToMeasure[attributeName]]); } else { // Determine and retrieve the instance of the appropriate // Similarity Measure for the given parameters if (IsNumber(sourceValue, targetValue)) { measure = GetSimilarityMeasureInstance(typeof(NumericSimilarityMeasure).FullName); } else if (IsDate(sourceValue, targetValue)) { measure = GetSimilarityMeasureInstance(typeof(DateTimeSimilarityMeasure).FullName); } else if (GeoCoordinate.IsValid(sourceValue) && GeoCoordinate.IsValid(targetValue)) { measure = GetSimilarityMeasureInstance(typeof(GeospatialSimilarityMeasure).FullName); } else { measure = GetSimilarityMeasureInstance(typeof(LevenshteinDistanceStringSimilarityMeasure).FullName); } } //if (measure != null) //{ // Cache the attribute name and corresponding index to similarity measure instance //this.attributeToMeasure.Add(attributeName, SimilarityMeasures.Value.IndexOf(measure)); //} return(measure); }
/// <summary> /// Returns an ISimilarityMeasure from the MEF maintained collection based /// on the specified FQN /// </summary> /// <param name="attributeType">A string containing the FQN of an ISimilarityMeasure class</param> /// <returns>an ISimilarityMeasure isntance maintained by MEF; otherwise null</returns> public ISimilarityMeasure GetSimilarityMeasureInstance(string attributeType) { // Get the type for the preferred similarity measure Type type = Type.GetType(attributeType); //if (SimilarityMeasures.IsValueCreated) //{ ISimilarityMeasure measureFound = this.SimilarityMeasures.FirstOrDefault(measure => measure.GetType() == type); // Ensure that the found Similarity Measure isn't null if (measureFound == null) { return(null); } // Return the retrieved instance of the preferred similarity measure return(measureFound); //} //else // return null; }
/// <summary> /// Returns an instance of the default SimilarityMeasure for /// the provided attribute. This method does not check the /// cache because the cache contains the currently associated /// Similarity Measure, which is not neccessairly the default. /// </summary> /// <param name="attributeName">The name of the targetAttribute</param> /// <returns>the default ISimilarityMeasure that should be used for the provided attribute</returns> public ISimilarityMeasure GetDefaultSimilarityMeasure(string attributeName) { // Validate parameter if (string.IsNullOrEmpty(attributeName)) { throw new ArgumentNullException("AttributeName", "No valid attribute name was provided"); } ISimilarityMeasure defaultMeasure = null; // Attempt to get the default similairy measure using the assigned // preffered similarity measure defaultMeasure = GetPrefferedSimilarityMeasure(attributeName); // If we have a good measure, we can return it if (defaultMeasure != null) { return(defaultMeasure); } // No preffered similarity measure is set so we will determine // the default by analyzing the type of data stored in the attribute string firstValue = GlobalAttributeCollection.GetInstance(this.scope).GetAttributeValues(attributeName).FirstOrDefault(); string secondValue = GlobalAttributeCollection.GetInstance(this.scope).GetAttributeValues(attributeName).LastOrDefault(); if (string.IsNullOrEmpty(firstValue) || string.IsNullOrEmpty(secondValue)) { return(GetSimilarityMeasureInstance(typeof(LevenshteinDistanceStringSimilarityMeasure).FullName)); } // Determine the default by analyzing data stored in the specified // attribute defaultMeasure = DetermineSimilarityMeasure(attributeName, firstValue, secondValue); return(defaultMeasure); }
/// <summary> /// Calulates the standard deviation for all distances (over all values) for the /// given attribute and using the specified similarity measure /// </summary> /// <param name="attributeName">The attribute to compute the standard /// deviation for</param> /// <param name="measure">The similarity measure to be used</param> /// <returns>the standard deviation for all distances over all attribute /// values for the given attribute and similarity measure</returns> private double CalculateDistanceSD(string attributeName, ISimilarityMeasure measure) { // Get all the distances List<Tuple<double, int>> distanceValues = CalculateDistances(attributeName, measure); if (distanceValues == null || distanceValues.Count ==0) return 0; // To compute SD, we need the mean double mean = GetDistanceMean(attributeName, measure); // Calculate and return the standard deviation return distanceValues.StandardDeviation(mean); }
/// <summary> /// Returns a list of tuples that contain the calculated distances /// and the frequency of those distances /// </summary> /// <param name="attributeName">The name of the attribute that /// distances are being calculated for</param> /// <param name="measure">The similarity measure to be used</param> /// <returns>a collection of distances and the number of times /// that those distances occur</returns> private List<Tuple<double, int>> CalculateDistances(string attributeName, ISimilarityMeasure measure) { if (!GlobalAttributeCollection.GetInstance(this.scope).ContainsAttribute(attributeName)) return null; List<Tuple<double, int>> distances = new List<Tuple<double, int>>(); double frequencyTotal = 0; int nodeCount = 0; // Get the values for the attribute List<string> attributeValues = new List<string>(GlobalAttributeCollection.GetInstance(this.scope).GetAttributeValues(attributeName)); // Loop over all the attribute values for (int i = 0; i <= attributeValues.Count - 1; i++) { // Get the frequency at which the source attribute value occurrs int sourceFrequency = GlobalAttributeCollection.GetInstance(this.scope).GetFrequency(attributeName, attributeValues[i]); nodeCount += sourceFrequency; // Compare the current attribute value to all other // attribute values for (int j = 0; j < i; j++) { // Compute the distance for the two values double? distance = measure.CalculateDistance(attributeValues[i], attributeValues[j]); if (distance != null) { // Get the frequency at which the target attribute value occurrs int targetFrequency = GlobalAttributeCollection.GetInstance(this.scope).GetFrequency(attributeName, attributeValues[j]); distances.Add(Tuple.Create<double, int>(distance.Value, sourceFrequency * targetFrequency)); // Keep a running total of the frequencies frequencyTotal += sourceFrequency * targetFrequency; } } } // Since we only loop over unique attribute values we never make // the comparisons against nodes where the values would be the // same. We need to determine if this case has occurred and insert // the appropriate number of zero distance items. // Use binomial function to determine the number of possible combinations //double combinations = (MathUtils.LogFactorial(nodeCount) / (2 * (MathUtils.LogFactorial(nodeCount - 2)))); double combinations = Math.Exp(MathUtils.LogFactorial(nodeCount) - MathUtils.LogFactorial(2) - MathUtils.LogFactorial(nodeCount - 2)); // Add in all the zero distance items that we need for (int i = 1; i <= combinations - frequencyTotal; i++) { distances.Add(Tuple.Create<double, int>(0, 1)); } //foreach (Tuple<double, int> distanceCount in distances) //{ // System.Diagnostics.Debug.WriteLine("[{0},{1}]", distanceCount.Item1, distanceCount.Item2); //} return distances; }
/// <summary> /// Calculates the mean for all distances (over all values) for the given /// attributes and using the specified similarity measure /// </summary> /// <param name="attributeName">The attribute to compute the standard /// deviation for</param> /// <param name="measure">The similarity measure to be used</param> /// <returns>the mean for all distances over all attribute /// values for the given attribute and similarity measure</returns> private double CalculateDistanceMean(string attributeName, ISimilarityMeasure measure) { // Get all the distances List<Tuple<double, int>> distanceValues = CalculateDistances(attributeName, measure); if (distanceValues == null || distanceValues.Count == 0) return 0; // Calculate and return the mean value return distanceValues.Mean(); }
/// <summary> /// Returns the maximum distance for all values for the provided attribute /// using the provided ISimilarityMeasure /// </summary> /// <param name="attributeName">The name of the attribute to retrieve the maximum distance for</param> /// <param name="measure">The ISimilarityMeasure to use to calculate the distance</param> /// <returns>the maximum distance value for all values for the given attribute</returns> public double GetMaxDistance(string attributeName, ISimilarityMeasure measure) { Tuple<string, string> tuple = Tuple.Create(attributeName, measure.ToString()); // Ensure that the maximum distance isn't already cached if (!this.distancesCache.ContainsKey(tuple)) { // Get the maximum distance calculation, for the provided // attribute and measure, and cache it. this.distancesCache[tuple] = CalculateMaxDistance(attributeName, measure); } return this.distancesCache[tuple]; }
/// <summary> /// Returns a list of tuples that contain the calculated distances /// and the frequency of those distances /// </summary> /// <param name="attributeName">The name of the attribute that /// distances are being calculated for</param> /// <param name="measure">The similarity measure to be used</param> /// <returns>a collection of distances and the number of times /// that those distances occur</returns> private List <Tuple <double, int> > CalculateDistances(string attributeName, ISimilarityMeasure measure) { if (!GlobalAttributeCollection.GetInstance(this.scope).ContainsAttribute(attributeName)) { return(null); } List <Tuple <double, int> > distances = new List <Tuple <double, int> >(); double frequencyTotal = 0; int nodeCount = 0; // Get the values for the attribute List <string> attributeValues = new List <string>(GlobalAttributeCollection.GetInstance(this.scope).GetAttributeValues(attributeName)); // Loop over all the attribute values for (int i = 0; i <= attributeValues.Count - 1; i++) { // Get the frequency at which the source attribute value occurrs int sourceFrequency = GlobalAttributeCollection.GetInstance(this.scope).GetFrequency(attributeName, attributeValues[i]); nodeCount += sourceFrequency; // Compare the current attribute value to all other // attribute values for (int j = 0; j < i; j++) { // Compute the distance for the two values double?distance = measure.CalculateDistance(attributeValues[i], attributeValues[j]); if (distance != null) { // Get the frequency at which the target attribute value occurrs int targetFrequency = GlobalAttributeCollection.GetInstance(this.scope).GetFrequency(attributeName, attributeValues[j]); distances.Add(Tuple.Create <double, int>(distance.Value, sourceFrequency * targetFrequency)); // Keep a running total of the frequencies frequencyTotal += sourceFrequency * targetFrequency; } } } // Since we only loop over unique attribute values we never make // the comparisons against nodes where the values would be the // same. We need to determine if this case has occurred and insert // the appropriate number of zero distance items. // Use binomial function to determine the number of possible combinations //double combinations = (MathUtils.LogFactorial(nodeCount) / (2 * (MathUtils.LogFactorial(nodeCount - 2)))); double combinations = Math.Exp(MathUtils.LogFactorial(nodeCount) - MathUtils.LogFactorial(2) - MathUtils.LogFactorial(nodeCount - 2)); // Add in all the zero distance items that we need for (int i = 1; i <= combinations - frequencyTotal; i++) { distances.Add(Tuple.Create <double, int>(0, 1)); } //foreach (Tuple<double, int> distanceCount in distances) //{ // System.Diagnostics.Debug.WriteLine("[{0},{1}]", distanceCount.Item1, distanceCount.Item2); //} return(distances); }
/// <summary> /// Calculates the maximum distance for all values for the provided attribute /// name using the provided ISimilarityMeasure /// </summary> /// <param name="attributeName">The attribute name to retrieve the maximum distance for</param> /// <param name="measure">The ISimilarityMeasure to use to calculate the distance</param> /// <returns>the maximum distance value for all values for the given attribute</returns> private double CalculateMaxDistance(string attributeName, ISimilarityMeasure measure) { // Get the Attribute instance based on the provided attribute name Data.Attributes.Attribute attributeFound = GlobalAttributeCollection.GetInstance(this.scope).GetAttributes().Where(attribute => attribute.Name == attributeName).FirstOrDefault(); return CalculateMaxDistance(attributeFound, measure); }
/// <summary> /// Calculates the maximum distance for all values for the provided attribute /// using the provided ISimilarityMeasure /// </summary> /// <param name="attribute">The attribute to retrieve the maximum distance for</param> /// <param name="measure">The ISimilarityMeasure to use to calculate the distance</param> /// <returns>the maximum distance value for all values for the given attribute</returns> private double CalculateMaxDistance(Data.Attributes.Attribute attribute, ISimilarityMeasure measure) { // Ensure that the global collection contains the attribute if (!GlobalAttributeCollection.GetInstance(this.scope).ContainsAttribute(attribute)) return 0; double maxDistance = double.MinValue; List<string> values = new List<string>(GlobalAttributeCollection.GetInstance(this.scope).GetAttributeValues(attribute)); //TODO: IS THERE A MORE EFFICIENT METHOD FOR HANDLING THIS?? for (int i = 0; i < values.Count; i++) { // get the value for this item string a = values[i]; // Compare the current item to all other items in the list for (int j = 0; j < i; j++) { // get the value for this item string b = values[j]; double? distance = measure.CalculateDistance(a, b); if (distance != null) maxDistance = Math.Max(maxDistance, (double)distance); } } return maxDistance; }
public static ClusteringSolution CreateKMeansClusters(int seed, Dataset dataset, int clustersNumber, ISimilarityMeasure similarityMeasure, int maxIterations, bool fireEvents) { DataMining.Utilities.RandomUtility.Initialize(seed); KMeans kmeans = new KMeans(dataset, clustersNumber, similarityMeasure, maxIterations, fireEvents); kmeans.Initialize(); kmeans.OnPostIteration += new EventHandler(kmeans_OnPostIteration); return(kmeans.CreateClusters()); }
public static BayesianClusterMultinetClassifier CreateClusteringBMNClassifier(int seed, int clusterNumber, Dataset dataset, ISimilarityMeasure similarityMeasure, IClassificationQualityMeasure accuracy, IClusteringAlgorithm algorithm, IClassificationAlgorithm naive, bool fireEvents) { DataMining.Utilities.RandomUtility.Initialize(seed); if (fireEvents) { if (algorithm is ACOClustering_IB) { ((ACOClustering_IB)algorithm).OnPostColonyIteration += new EventHandler(antClustering_OnPostColonyIteration); } if (algorithm is ACOClustering_MB) { ((ACOClustering_MB)algorithm).OnPostColonyIteration += new EventHandler(antClustering_OnPostColonyIteration); } } ClusterBMN cBMN = new ClusterBMN(dataset, clusterNumber, similarityMeasure, accuracy, algorithm, naive); return(cBMN.CreateClassifier() as BayesianClusterMultinetClassifier); }
/// <summary> /// Returns the similarity of the two specified values for the given /// attribute name. This method uses various methods to determine the /// most appropriate Similarity Measure that should be used. /// </summary> /// <param name="attributeName">The name of the attribute to measure similarity for</param> /// <param name="sourceValue">The source value for similarity measurement</param> /// <param name="targetValue">The target value for similarity measurement</param> /// <param name="measure"></param> /// <returns>the calculated similarity for the given attribute</returns> public double?ComputeAttributeSimilarity(string attributeName, string sourceValue, string targetValue, ISimilarityMeasure measure) { // Execute the similarity measure and return the results return(measure.MeasureSimilarity(attributeName, sourceValue, targetValue)); }
/// <summary> /// Creaes a new instance of AttributeSimilarityDescriptor using /// the provided property values /// </summary> public AttributeSimilarityDescriptor(string attributeName, ISimilarityMeasure similarityMeasure, double weight) { AttributeName = attributeName; SimilarityMeasure = similarityMeasure; Weight = weight; }
/// <summary> /// Returns the similarity of the two specified values for the given /// attribute name. This method uses various methods to determine the /// most appropriate Similarity Measure that should be used. /// </summary> /// <param name="attributeName">The name of the attribute to measure similarity for</param> /// <param name="sourceValue">The source value for similarity measurement</param> /// <param name="targetValue">The target value for similarity measurement</param> /// <param name="measure"></param> /// <returns>the calculated similarity for the given attribute</returns> public double? ComputeAttributeSimilarity(string attributeName, string sourceValue, string targetValue, ISimilarityMeasure measure) { // Execute the similarity measure and return the results return measure.MeasureSimilarity(attributeName, sourceValue, targetValue); }
public static ClusteringSolution CreateACOClusters_MB(int seed, Dataset dataset, int clustersNumber, ISimilarityMeasure similarityMeasure, int maxIterations, int colonySize, int convergenceIterations, bool fireEvents, bool performLocalSearch) { DataMining.Utilities.RandomUtility.Initialize(seed); DefaultHeuristicCalculator <int> calculator = new DefaultHeuristicCalculator <int>(); ClusteringMBInvalidator invalidator = new ClusteringMBInvalidator(); DataMining.ProximityMeasures.IClusteringQualityMeasure measure = new CohesionClusteringMeasure(); ClusteringQualityEvaluator cohesionEvaluator = new ClusteringQualityEvaluator(measure); KMeansLocalSearch localSearch = new KMeansLocalSearch(dataset, 1, similarityMeasure, cohesionEvaluator); ACO.ProblemSpecifics.ISolutionQualityEvaluator <int> evaluator = new ClusteringQualityEvaluator(measure); Problem <int> problem = new Problem <int>(invalidator, calculator, evaluator, localSearch); ACOClustering_MB antClustering = new ACOClustering_MB(maxIterations, colonySize, convergenceIterations, problem, clustersNumber, similarityMeasure, dataset, performLocalSearch); antClustering.OnPostColonyIteration += new EventHandler(antClustering_OnPostColonyIteration); return(antClustering.CreateClusters()); }
/// <summary> /// Returns the standard deviation for all distances (over all values) for the /// given attribute and using the specified similarity measure. This method /// attempts to use the cached value. If one doesn't exist, it is calculated. /// </summary> /// <param name="attributeName">The attribute to compute the standard /// deviation for</param> /// <param name="measure">The similarity measure to be used</param> /// <returns>the standard deviation for all distances over all attribute /// values for the given attribute and similarity measure</returns> public double GetDistanceStandardDeviation(string attributeName, ISimilarityMeasure measure) { Tuple<string, string> tuple = Tuple.Create(attributeName, measure.ToString()); // Ensure that the standard deviation distance value isn't already computed if (!this.sdCache.ContainsKey(tuple)) { // Calculate the distance standard deviation this.sdCache[tuple] = CalculateDistanceSD(attributeName, measure); } // Returns the cached standard deviation value return this.sdCache[tuple]; }
public static BayesianClusterMultinetClassifier CreateAntClustBMNClassifier_MB(int seed, Dataset dataset, int maxIterations, int colonySize, int convergence, int clustersNumber, ISimilarityMeasure similarityMeasure, IClassificationQualityMeasure classificationMeasure, IClassificationAlgorithm algorithm, bool fireEvents) { DataMining.Utilities.RandomUtility.Initialize(seed); DefaultHeuristicCalculator <int> calculator = new DefaultHeuristicCalculator <int>(); ClusteringMBInvalidator invalidator = new ClusteringMBInvalidator(); ClusteringClassificationQualityEvaluator evaluator = new ClusteringClassificationQualityEvaluator(classificationMeasure, algorithm); evaluator.Dataset = dataset; KMeansLocalSearch localSearch = new KMeansLocalSearch(dataset, 1, similarityMeasure, evaluator); Problem <int> problem = new Problem <int>(invalidator, calculator, evaluator, localSearch); AntClustBMN_MB antClustBMN = new AntClustBMN_MB(maxIterations, colonySize, convergence, problem, clustersNumber, similarityMeasure, dataset, true, algorithm, classificationMeasure); antClustBMN.OnPostColonyIteration += new EventHandler(antClustering_OnPostColonyIteration); return(antClustBMN.CreateClassifier() as BayesianClusterMultinetClassifier); }