public IClassifier CreateClassifier() { this.Initialize(); this._clusteringSolution = this._clusteringAlgorithm.CreateClusters(); this._BMNClassifier = BayesianClusterMultinetClassifier.ConstructClusterBMNClassifier(this._clusteringSolution, this._classificationAlgorithm, this._trainingset); return(this._BMNClassifier); }
public void PerformLocalSearch(Ant <ClusterExampleAssignment> ant) { Solution <ClusterExampleAssignment> originalSolution = ant.Solution; _kmeans.SetAssignment(originalSolution.ToList()); ClusteringSolution clusteringSolution = _kmeans.CreateClusters(); Solution <ClusterExampleAssignment> optimizedSolution = new Solution <ClusterExampleAssignment>(); List <int> optimizedTrail = new List <int>(); foreach (Cluster cluster in clusteringSolution.Clusters) { foreach (DataMining.Data.Example example in cluster.Examples) { int componentIndex = (example.Index * ClustersNumber) + cluster.Label; optimizedSolution.Components.Add(new DecisionComponent <ClusterExampleAssignment>(componentIndex, new ClusterExampleAssignment(example.Index, cluster.Label))); optimizedTrail.Add(componentIndex); } } this.SolutionQualityEvaluator.EvaluateSolutionQuality(optimizedSolution); if (optimizedSolution.Quality > originalSolution.Quality) { ant.Solution = optimizedSolution; ant.Trail = optimizedTrail; } }
public void PerformLocalSearch(Ant <int> ant) { Solution <int> originalSolution = ant.Solution; _kmeans.SetAssignment(originalSolution.ToList()); ClusteringSolution clusteringSolution = _kmeans.CreateClusters(); Solution <int> optimizedSolution = new Solution <int>(); List <int> optimizedTrail = new List <int>(); int[] optimizedMedoids = clusteringSolution.GetMedoids(); foreach (int exampleIndex in optimizedMedoids) { optimizedSolution.Components.Add(new DecisionComponent <int>(exampleIndex, exampleIndex)); optimizedTrail.Add(exampleIndex); } this.SolutionQualityEvaluator.EvaluateSolutionQuality(optimizedSolution); if (optimizedSolution.Quality > originalSolution.Quality) { ant.Solution = optimizedSolution; ant.Trail = optimizedTrail; } }
/* run the algorithm with centroid random start */ public ClusteringSolution runKmeans(int randomStarts = 1, int maxIterations = 10000) { ClusteringSolution currentSol, bestSol = null; for (int i = 0; i < randomStarts; i++) { currentSol = new ClusteringSolution(dataset.Length, this.K); Entity[] centroids = initCentroids(); //foreach (Entity e in centroids) Debug.Log ("Beginning - centroid: "+e); int iter = 0; do { computeClusters(currentSol, centroids); iter++; } while(computeCentroids(currentSol, ref centroids) == true && iter < maxIterations); //Debug.Log("Iterations:"+iter); //foreach (Entity e in centroids) Debug.Log ("End - Centroid: "+e); currentSol.computeSolutionWCSS(this.dataset, centroids); if (i == 0) { bestSol = currentSol; } else if (currentSol.CompareTo(bestSol) < 0) { bestSol = currentSol; } } return(bestSol); }
/* returns true if the centroids changed */ private bool computeCentroids(ClusteringSolution sol, ref Entity[] oldCentroids) { int[] counters = new int[K]; Entity[] temp = new Entity[K]; // temp variable for new centroids for (int i = 0; i < K; i++) { temp[i] = new Entity(oldCentroids[0].Dimensions); } for (int i = 0; i < dataset.Length; i++) { temp[sol.mapDataPointToCluster[i]] = temp[sol.mapDataPointToCluster[i]] + dataset[i]; // clear counters[sol.mapDataPointToCluster[i]]++; } for (int i = 0; i < dataset.Length; i++) { temp[sol.mapDataPointToCluster[i]] = temp[sol.mapDataPointToCluster[i]] / counters[sol.mapDataPointToCluster[i]]; } for (int i = 0; i < K; i++) { if (oldCentroids[i].Equals(temp[i]) == false) //one at least changed { oldCentroids = temp; // update centroids return(true); } } //nothing changed, return false return(false); }
public override void Initialize() { this._clusteringSolution = new ClusteringSolution(this._dataset, this._clustersNumber, this._similarityMeasure); this._graph = ConstructionGraphBuilder.BuildMBClusteringConstructionGraph(this.Dataset, this._clustersNumber); ((KMeansLocalSearch)this.Problem.LocalSearch).ClustersNumber = this._clustersNumber; ((KMeansLocalSearch)this.Problem.LocalSearch).ProximityMatrix = this._clusteringSolution.ProximityMatrix; ((ClusteringMBInvalidator)this.Problem.ComponentInvalidator).ClustersNumber = this._clustersNumber; ((ClusteringQualityEvaluator)this.Problem.SolutionQualityEvaluator).ClusteringSolution = this._clusteringSolution; ((ClusteringQualityEvaluator)((KMeansLocalSearch)this.Problem.LocalSearch).SolutionQualityEvaluator).ClusteringSolution = this._clusteringSolution; this.ConstructionGraph.InitializePheromone(1); this.ConstructionGraph.SetHeuristicValues(this._problem.HeuristicsCalculator, false); this._bestAnt = null; this._iterationBestAnt = null; }
/* run the algorithm with user input start for centroids */ public ClusteringSolution runKmeans(Entity[] userInputCentroids, int maxIterations = 1000) { ClusteringSolution solution = new ClusteringSolution(dataset.Length, this.K); foreach (Entity e in userInputCentroids) { Debug.Log("Centroid: " + e); } int iter = 0; do { computeClusters(solution, userInputCentroids); iter++; } while(computeCentroids(solution, ref userInputCentroids) == true && iter < maxIterations); return(solution); }
public void computeClusters(ClusteringSolution sol, Entity[] centroids) { float currentDistance, minDistance; // scan through the dataset for (int i = 0; i < dataset.Length; i++) { minDistance = float.MaxValue; // scan through the centroids for (int z = 0; z < centroids.Length; z++) { currentDistance = dataset[i].computeSquareEuclideanDistance(centroids[z]); if (currentDistance < minDistance) { minDistance = currentDistance; sol.mapDataPointToCluster[i] = z; } } } }
public void Initialize() { if (_dataset == null) { throw new Exception("Uninitialized Algorithm"); } if (_proximityMatrix != null && this._similarityMeasure != null) { this._clusteringSolution = new ClusteringSolution(this._dataset, this._clustersNumber, this._similarityMeasure, _proximityMatrix); } else if (this._similarityMeasure != null) { this._clusteringSolution = new ClusteringSolution(this._dataset, this._clustersNumber, this._similarityMeasure); } else { throw new Exception("Uninitialized Algorithm"); } this._initialized = true; this.InitializeAssignment(); }
} // End getSquaredScaledDistancefromPoint public static double getSquaredScaledDistanceTweenActiveClusters(int ActiveClusterIndex1, int ActiveClusterIndex2, ClusteringSolution Solution) { double[] firstarg; double[] secondarg; double[] sigma; if (Solution.DistributedExecutionMode && (ActiveClusterIndex1 >= ClusteringSolution.NumberLocalActiveClusters)) { int RemoteIndex1 = ActiveClusterIndex1 - ClusteringSolution.NumberLocalActiveClusters; firstarg = DistributedClusteringSolution.StorageforTransportedClusters.TotalTransportedY_t_i[RemoteIndex1]; sigma = DistributedClusteringSolution.StorageforTransportedClusters.TotalTransportedSigma_t_i[RemoteIndex1]; } else { int RealClusterIndex1 = ClusteringSolution.RealClusterIndices[ActiveClusterIndex1]; firstarg = Solution.Y_k_i_[RealClusterIndex1]; sigma = Solution.Sigma_k_i_[RealClusterIndex1]; } if (Solution.DistributedExecutionMode && (ActiveClusterIndex1 >= ClusteringSolution.NumberLocalActiveClusters)) { int RemoteIndex2 = ActiveClusterIndex2 - ClusteringSolution.NumberLocalActiveClusters; secondarg = DistributedClusteringSolution.StorageforTransportedClusters.TotalTransportedY_t_i[RemoteIndex2]; } else { int RealClusterIndex2 = ClusteringSolution.RealClusterIndices[ActiveClusterIndex2]; secondarg = Solution.Y_k_i_[RealClusterIndex2]; } return(getSquaredScaledDistancebetweenVectors(firstarg, secondarg, sigma)); } // End getSquaredScaledDistancefromCluster
} // End getClusterDistancefromPoint public static double getSquaredScaledDistancePointActiveCluster(int LocalToProcessIndex, int ActiveClusterIndex, ClusteringSolution Solution) { if (Solution.DistributedExecutionMode && (ActiveClusterIndex >= ClusteringSolution.NumberLocalActiveClusters)) { int RemoteIndex = ActiveClusterIndex - ClusteringSolution.NumberLocalActiveClusters; return(getSquaredScaledDistancebetweenVectors(Program.PointPosition[LocalToProcessIndex], DistributedClusteringSolution.StorageforTransportedClusters.TotalTransportedY_t_i[RemoteIndex], DistributedClusteringSolution.StorageforTransportedClusters.TotalTransportedSigma_t_i[RemoteIndex])); } int RealClusterIndex = ClusteringSolution.RealClusterIndices[ActiveClusterIndex]; return(getSquaredScaledDistancebetweenVectors(Program.PointPosition[LocalToProcessIndex], Solution.Y_k_i_[RealClusterIndex], Solution.Sigma_k_i_[RealClusterIndex])); } // End getSquaredScaledDistancefromPoint
} // End getEigenvalue(double[,] SecondDerivMatrix) public void SetAllEigenvaluesIteratively(ClusteringSolution Solution) { if (Solution.DistributedExecutionMode) { Exception e = DAVectorUtility.SALSAError(" Illegal Eigenvalue and Parallelization Combination "); throw (e); } if (Program.SigmaMethod > 0) { Exception e = DAVectorUtility.SALSAError(" Illegal Eigenvalue and Sigma Method Combination " + Program.SigmaMethod.ToString()); throw (e); } this.CurrentSolution = Solution; this.CenterEigenvector = this.CurrentSolution.Eigenvector_k_i; this.CenterEigenvalue = this.CurrentSolution.Eigenvalue_k; this.InitVector = new double[Program.ParameterVectorDimension]; this.FirstTerm = new double[this.CurrentSolution.Ncent_Global]; this.CenterEigenstatus = new int[this.CurrentSolution.Ncent_Global]; this.CenterEigenconvergence = new int[this.CurrentSolution.Ncent_Global]; Random random = new Random(); double InitNorm = 0.0; for (int VectorIndex = 0; VectorIndex < Program.ParameterVectorDimension; VectorIndex++) { InitVector[VectorIndex] = -0.5 + random.NextDouble(); InitNorm += InitVector[VectorIndex] * InitVector[VectorIndex]; } InitNorm = 1.0 / Math.Sqrt(InitNorm); for (int VectorIndex = 0; VectorIndex < Program.ParameterVectorDimension; VectorIndex++) { InitVector[VectorIndex] *= InitNorm; } // Initialization Loop over Clusters int somethingtodo = 0; for (int ClusterIndex = 0; ClusterIndex < this.CurrentSolution.Ncent_Global; ClusterIndex++) { this.CenterEigenconvergence[ClusterIndex] = 0; this.CenterEigenstatus[ClusterIndex] = 0; this.FirstTerm[ClusterIndex] = 0; if (this.CurrentSolution.Splittable_k_[ClusterIndex] != 1) { continue; } ++somethingtodo; for (int VectorIndex = 0; VectorIndex < Program.ParameterVectorDimension; VectorIndex++) { this.CenterEigenvector[ClusterIndex][VectorIndex] = InitVector[VectorIndex]; } } // End Loop over Clusters if (somethingtodo == 0) { return; } GlobalReductions.FindVectorDoubleSum FindClusterFirstTerm = new GlobalReductions.FindVectorDoubleSum(DAVectorUtility.ThreadCount, this.CurrentSolution.Ncent_Global); GlobalReductions.FindDoubleSum FindNumberScalarProducts = new GlobalReductions.FindDoubleSum(DAVectorUtility.ThreadCount); for (int NumPowerIterations = 0; NumPowerIterations < Program.PowerIterationLimit; NumPowerIterations++) { somethingtodo = 0; for (int ClusterIndex = 0; ClusterIndex < this.CurrentSolution.Ncent_Global; ClusterIndex++) { if (this.CurrentSolution.LocalStatus[ClusterIndex] != 1) { continue; } if (this.CurrentSolution.Splittable_k_[ClusterIndex] != 1) { continue; } if (this.CenterEigenconvergence[ClusterIndex] == 0) { ++somethingtodo; } } if (somethingtodo == 0) { break; } GlobalReductions.FindVectorDoubleSum3 FindNewPowerVectors = new GlobalReductions.FindVectorDoubleSum3(DAVectorUtility.ThreadCount, Program.ParameterVectorDimension, this.CurrentSolution.Ncent_Global); Parallel.For(0, Program.ParallelOptions.MaxDegreeOfParallelism, Program.ParallelOptions, (ThreadNo) => { FindNewPowerVectors.startthread(ThreadNo); double[] PartVector = new double[Program.ParameterVectorDimension]; int indexlen = DAVectorUtility.PointsperThread[ThreadNo]; int beginpoint = DAVectorUtility.StartPointperThread[ThreadNo] - DAVectorUtility.PointStart_Process; for (int alpha = beginpoint; alpha < indexlen + beginpoint; alpha++) { int IndirectSize = this.CurrentSolution.NumClusters_alpha_[alpha]; for (int IndirectClusterIndex = 0; IndirectClusterIndex < IndirectSize; IndirectClusterIndex++) { // Loop over Clusters for this point int RealClusterIndex = -1; int RemoteIndex = -1; int ActiveClusterIndex = -1; VectorAnnealIterate.ClusterPointersforaPoint(alpha, IndirectClusterIndex, ref RealClusterIndex, ref ActiveClusterIndex, ref RemoteIndex); if (this.CurrentSolution.Splittable_k_[RealClusterIndex] != 1) { continue; } double Mvalue = this.CurrentSolution.M_alpha_kpointer_[alpha][IndirectClusterIndex]; if (NumPowerIterations == 0) { FindClusterFirstTerm.addapoint(ThreadNo, Mvalue, RealClusterIndex); } double multiplier = 0.0; for (int VectorIndex = 0; VectorIndex < Program.ParameterVectorDimension; VectorIndex++) { PartVector[VectorIndex] = this.CurrentSolution.Y_k_i_[RealClusterIndex][VectorIndex] - Program.PointPosition[alpha][VectorIndex]; multiplier += PartVector[VectorIndex] * CenterEigenvector[RealClusterIndex][VectorIndex]; } FindNumberScalarProducts.addapoint(ThreadNo, 1.0); double wgt = Mvalue * multiplier; for (int VectorIndex = 0; VectorIndex < Program.ParameterVectorDimension; VectorIndex++) { PartVector[VectorIndex] *= wgt; } FindNewPowerVectors.addapoint(ThreadNo, PartVector, RealClusterIndex); } } // End Loop over points }); // End loop initialing Point dependent quantities FindNewPowerVectors.sumoverthreadsandmpi(); for (int ClusterIndex = 0; ClusterIndex < this.CurrentSolution.Ncent_Global; ClusterIndex++) { if (this.CurrentSolution.LocalStatus[ClusterIndex] != 1) { continue; } if ((this.CurrentSolution.Splittable_k_[ClusterIndex] != 1) || (this.CenterEigenconvergence[ClusterIndex] != 0)) { continue; } double[] sums = new double[3]; // Old.New Old.Old New.New for (int loop = 0; loop < 3; loop++) { sums[loop] = 0.0; } for (int VectorIndex = 0; VectorIndex < Program.ParameterVectorDimension; VectorIndex++) { int TotalIndex = VectorIndex + ClusterIndex * Program.ParameterVectorDimension; double newvalue = FindNewPowerVectors.TotalVectorSum[TotalIndex]; double oldvalue = CenterEigenvector[ClusterIndex][VectorIndex]; sums[0] += oldvalue * newvalue; sums[1] += oldvalue * oldvalue; sums[2] += newvalue * newvalue; CenterEigenvector[ClusterIndex][VectorIndex] = newvalue; } // Decide if finished and set eigenvalue double CandidateEigenvalue = sums[0] / sums[1]; bool LegalEigenvalue = (CandidateEigenvalue > 0.0); DAVectorUtility.SynchronizeMPIvariable(ref LegalEigenvalue); // Check if converged // Do this in one process ONLY if ((NumPowerIterations > 5) && LegalEigenvalue) { // Arbitrary choice for Number of Power Iterations Cut int EigenvalueDone = 0; if (DAVectorUtility.MPI_Rank == 0) { // Decisions can only be made in one process if (Math.Abs(CandidateEigenvalue - this.CenterEigenvalue[ClusterIndex]) > CandidateEigenvalue * Program.eigenvaluechange) { ++EigenvalueDone; } double delta = sums[2] - 2.0 * sums[0] * CandidateEigenvalue + sums[1] * CandidateEigenvalue * CandidateEigenvalue; // (Ax- Eigenvalue*Axold)**2 if (Math.Abs(delta) > CandidateEigenvalue * CandidateEigenvalue * Program.eigenvectorchange) { ++EigenvalueDone; } } // End Test on Convergence DAVectorUtility.SynchronizeMPIvariable(ref EigenvalueDone); if (EigenvalueDone == 0) { this.CenterEigenconvergence[ClusterIndex] = 1 + NumPowerIterations; } } this.CenterEigenvalue[ClusterIndex] = CandidateEigenvalue; // Normalize current Power Vector to 1 double wgt = 1.0 / Math.Sqrt(sums[2]); for (int VectorIndex = 0; VectorIndex < Program.ParameterVectorDimension; VectorIndex++) { CenterEigenvector[ClusterIndex][VectorIndex] *= wgt; } } // End Loop over Clusters } // End Loop over NumPowerIterations FindClusterFirstTerm.sumoverthreadsandmpi(); FindNumberScalarProducts.sumoverthreadsandmpi(); Program.SumEigenSPCalcs += FindNumberScalarProducts.Total; for (int ClusterIndex = 0; ClusterIndex < this.CurrentSolution.Ncent_Global; ClusterIndex++) { this.CenterEigenstatus[ClusterIndex] = 0; if (this.CurrentSolution.LocalStatus[ClusterIndex] != 1) { continue; } if ((this.CurrentSolution.Splittable_k_[ClusterIndex] != 1) || (this.CenterEigenconvergence[ClusterIndex] <= 0)) { continue; } this.CenterEigenstatus[ClusterIndex] = 1; this.FirstTerm[ClusterIndex] = FindClusterFirstTerm.TotalVectorSum[ClusterIndex]; double tmp = this.CenterEigenvalue[ClusterIndex] / this.CurrentSolution.Temperature; this.CenterEigenvalue[ClusterIndex] = this.FirstTerm[ClusterIndex] - tmp; } } // End SetEigenvaluesIteratively(ClusteringSolution Solution)
public static BayesianClusterMultinetClassifier ConstructClusterBMNClassifier(ClusteringSolution clusteringSolution, IClassificationAlgorithm BayesianClassificationAlgorithms, DataMining.Data.Dataset trainingSet) { BayesianClusterMultinetClassifier BMNClassifier = new BayesianClusterMultinetClassifier(trainingSet.Metadata, clusteringSolution); foreach (Cluster cluster in clusteringSolution.Clusters) { BayesianClassificationAlgorithms.Dataset = cluster.ConvertToDataset(); BayesianNetworkClassifier BNClassifier = BayesianClassificationAlgorithms.CreateClassifier() as BayesianNetworkClassifier; BMNClassifier.AddBayesianNetworkClassifier(cluster.Label, BNClassifier); } return(BMNClassifier); }
private BayesianClusterMultinetClassifier(DataMining.Data.Metadata metadata, ClusteringSolution clusteringSolution) { this._bayesianNetworkClassfiers = new Dictionary <int, BayesianNetworkClassifier>(); this._metadata = metadata; this._clusteringSolution = clusteringSolution; }