public void Test_Save() { double[][] clusterCenters = new double[3][]; clusterCenters[0] = new double[] { 5.0, 5.0 }; clusterCenters[1] = new double[] { 15.0, 15.0 }; clusterCenters[2] = new double[] { 30.0, 30.0 }; string[] attributes = new string[] { "Height", "Weight" }; int numAttributes = attributes.Length; // 2 in this demo (height,weight) int numClusters = 3; // vary this to experiment (must be between 2 and number data tuples) int maxCount = 300; // trial and error ClusteringSettings settings = new ClusteringSettings(maxCount, numClusters, numAttributes, KmeansAlgorithm: 2); // Creates learning api object LearningApi api = new LearningApi(loadDescriptor()); // creates data var rawData = Helpers.CreateSampleData(clusterCenters, 2, 10000, 0.5); KMeansAlgorithm kMeans = new KMeansAlgorithm(settings); // train var response = kMeans.Run(rawData, api.Context); string fileName = "Test01.json"; kMeans.Save(rootFolder + fileName); }
private async Task <DrawingImage> GetDrawingImage() { var kmeansAlgorithm = new KMeansAlgorithm(_points, _areaPoints); var result = await kmeansAlgorithm.GetResultAsync(); return(result.GetDrawingImage()); }
/// <summary> /// UseKMeans is an extension that call KMeans through LearningAPI /// </summary> /// <param name="api">the LearningAPI object</param> /// <param name="settings">the desired clustering settings</param> /// <returns></returns> public static LearningApi UseKMeans(this LearningApi api, ClusteringSettings settings, double[] maxDistance = null) { var alg = new KMeansAlgorithm(settings.Clone(), maxDistance); api.AddModule(alg, "Rbm"); return(api); }
private async Task <DrawingImage> GetDrawingImage(int countOfPoints, int countOfClasses, int maxX, int maxY) { var kMeansAlgorithm = new KMeansAlgorithm(countOfPoints.GenerateRandomPoints(maxX, maxY), countOfClasses); var result = await kMeansAlgorithm.GetResultAsync(); return(result.GetDrawingImage()); }
public Experiment1(int variant) { points = FileService.readPoints(); kmeans = new KMeansAlgorithm(); this.RandClusters(variant); iteration = kmeans.StartAlgorithm(ref points, ref clusters); }
public static void Main() { String userInput; Console.WriteLine("Would you like to Run Problem 1 or Problem 2? [1/2]"); userInput = Console.ReadLine(); if (userInput.Equals("1")) { DataSetIO dataSet; Console.WriteLine("Would you like to create a new data set? [y/n]"); userInput = Console.ReadLine(); if (userInput.Equals("y")) { dataSet = new DataSetIO(true); Console.WriteLine("How many entries would you like in your data set? [Any Number]"); userInput = Console.ReadLine(); dataSet.CreateDataSet(Convert.ToInt32(userInput)); Console.WriteLine("Created dataset."); } else { dataSet = new DataSetIO(false); Console.WriteLine("Using Existing Dataset"); } dataSet.ReadDataSet(); KMeansAlgorithm kMeans = new KMeansAlgorithm(); List <Patient> patients = dataSet.getPatientsList(); ClusterIO.ClearDataSet(); kMeans.DetermineInitialValues(patients); foreach (var patient in patients) { kMeans.IntializePoints(patient); } kMeans.PlantSeeds(); kMeans.CalculateClusterContents(); kMeans.DetermineOutliers(); } else { Console.WriteLine("Please enter a path in the format of 0,1,2 for numbers [0-2]"); userInput = Console.ReadLine(); Matrix.DisplayProbability(Matrix.RequestedEmisition(userInput)); //MatrixIO.ReadMatrixData(); } }
public void Test_IncrementalMeanAverage() { // Test samples. double[][] data = new double[10][]; // Each sample belongs to some cluster. int[] clustering = new int[data.Length]; for (int i = 0; i < 10; i++) { data[i] = new double[] { i }; clustering[i] = 0; // We have a single cluster. Every sample belongs to that cluster. } double[][] means = new double[1][]; means[0] = new double[] { 0 }; KMeansAlgorithm.UpdateMeans(data, clustering, means); // Mean of 1,2,3,4,5,6,7,8,9, 10 is 4.5 Assert.True(means[0][0] == 4.5); data = new double[5][]; // Each sample belongs to some cluster. clustering = new int[data.Length]; for (int i = 0; i < 5; i++) { data[i] = new double[] { i }; clustering[i] = 0; // We have a single cluster. Every sample belongs to that cluster. } // Mean of 1,2,3,4,5 is 2 KMeansAlgorithm.UpdateMeans(data, clustering, means, 0, new double[] { 0 }); Assert.True(means[0][0] == 2); data = new double[5][]; // Each sample belongs to some cluster. clustering = new int[data.Length]; for (int i = 0; i < 5; i++) { data[i] = new double[] { i + 5 }; clustering[i] = 0; // We have a single cluster. Every sample belongs to that cluster. } KMeansAlgorithm.UpdateMeans(data, clustering, means, 5, new double[] { 2 }); // M1 = mean of 1,2,3,4,5 // M2 = mean of 6,7,8,9,10 // Mean of M1 and M2 together is 4.5 // (1/q1+q2)[q1*M1+q2*M2] // where q1 is number of elements inside of M1 and q2 number of elements inside of M2 Assert.True(means[0][0] == 4.5); }
public void Test_Load() { string fileName = "Test01.json"; KMeansAlgorithm kMeans = new KMeansAlgorithm(new ClusteringSettings(0, 2, 2)); kMeans.Load(rootFolder + fileName); Assert.True(kMeans.Instance != null); Assert.True(kMeans.Instance.Clusters != null); }
public ActionResult <KMeansResponse> Post([FromForm(Name = "file")] IFormFile file, [FromQuery] int numberOfClusters = 3) { var lines = _getScvRows.GetLines(file); var points = lines.Transform(); var result = new KMeansResponse(); var centroid = new Centroids(); List <List <double> > clusterCenters = AlgorithmsUtils.MakeInitialSeeds(points, numberOfClusters); bool stop = false; Dictionary <List <double>, List <double> > clusters = null; while (!stop) { _logger.LogInformation($"Iteration = {iteration}"); iteration++; clusters = KMeansAlgorithm.MakeClusters(points, clusterCenters); List <List <double> > oldClusterCenters = clusterCenters; //recalculete center of clusters clusterCenters = KMeansAlgorithm.RecalculateCoordinateOfClusterCenters(clusters, clusterCenters); if (ListUtils.IsListEqualsToAnother(clusterCenters, oldClusterCenters)) { int counter = 1; stop = true; result.Centroids = new Centroids(); var list = new List <PointsAndClusterNumber>(); foreach (var center in clusterCenters) { var map = clusters.Where(point => ListUtils.IsListEqualsToAnother(point.Value, center)); foreach (var item in map) { var pointAndCluster = new PointsAndClusterNumber() { Point = new List <double>() }; pointAndCluster.Point = item.Key; pointAndCluster.ClusterNumber = counter; list.Add(pointAndCluster); } counter++; } result.PointsAndClusterNumber = list; result.Centroids.Centroid = clusterCenters; } } return(Ok(result)); }
public void DoStuffer() { Outputs.Clear(); CalculateHiddenLayerOutputs(); KMeansAlgorithm.SamplePoints = SamplePoints; KMeansAlgorithm.AssignSamplePointsToNearestCentroids(); SamplePoints = KMeansAlgorithm.SamplePoints; for (var i = 0; i < hiddenLayerOutputs.Count; i++) { _neuron.Inputs = hiddenLayerOutputs[i]; _neuron.CalculateOutput(); Outputs.Add(_neuron.Output); } }
/// <summary> /// calculateNearestCluster is a function that determines the nearest cluster and calculates the distance between those two clusters. /// </summary> /// <param name="Centroids">the centroids of the clusters</param> /// <param name="SamplesInClusters">number of samples in each cluster</param> /// <returns>Tuple of two Items: <br /> /// - Item 1: contains the number of nearest cluster <br /> /// - Item 2: contains the distance to the nearest cluster /// </returns> private static Tuple <int[], double[]> calculateNearestCluster(double[][] Centroids, int[] SamplesInClusters) { int[] NearestClustersArray = new int[Centroids.Length]; double[] DistanceToNearestClusterArray = new double[Centroids.Length]; int Code; string Message = "Function <calculateNearestCluster>: "; try { double curDistance; for (int i = 0; i < Centroids.Length; i++) { //in case of empty cluster if (SamplesInClusters[i] == 0) { NearestClustersArray[i] = -1; DistanceToNearestClusterArray[i] = -1; continue; } DistanceToNearestClusterArray[i] = double.MaxValue; for (int j = 0; j < Centroids.Length; j++) { if (i == j || SamplesInClusters[j] == 0) { continue; } curDistance = KMeansAlgorithm.calculateDistance(Centroids[i], Centroids[j]); if (curDistance < DistanceToNearestClusterArray[i]) { DistanceToNearestClusterArray[i] = curDistance; NearestClustersArray[i] = j; } } } return(Tuple.Create(NearestClustersArray, DistanceToNearestClusterArray)); } catch (Exception Ex) { Code = 400; Message += "Unhandled exception:\t" + Ex.ToString(); throw new KMeansException(Code, Message); } }
/// <summary> /// adjustInClusterMaxDistance is a function that recalculates/approximate the maximum distance in the cluster for partial clustering /// </summary> /// <param name="cluster">index of the cluster</param> private void adjustInClusterMaxDistance(int cluster, KMeansScore res, double[][] oldCentroids) { // calculate new in cluster max distance double curDistance = KMeansAlgorithm.calculateDistance(res.Model.Clusters[cluster].Centroid, this.Score.Centroids[cluster]); // compare to max possible old in cluster max distance double oldDistance = this.Score.InClusterMaxDistance[cluster] + KMeansAlgorithm.calculateDistance(this.Score.Centroids[cluster], oldCentroids[cluster]); if (oldDistance > curDistance) { curDistance = oldDistance; } this.Score.InClusterMaxDistance[cluster] = curDistance; }
/// <summary> /// Initialization Prior to receiving any inputs, the region is initialized by computing a list of initial potential /// synapses for each column. This consists of a random set of inputs selected from the input space. Each input is /// represented by a synapse and assigned a random permanence value. The random permanence values are chosen with two /// criteria. First, the values are chosen to be in a small range around connectedPerm (the minimum permanence value /// at which a synapse is considered "connected"). This enables potential synapses to become connected (or /// disconnected) after a small number of training iterations. Second, each column has a natural center over the /// input region, and the permanence values have a bias towards this center (they have higher values near the /// center). /// </summary> /// <param name="input"></param> public void Init(HtmInput input) { _input = input; _columnList = new List <HtmColumn>(); _activeColumns = new List <HtmColumn>(); var inputIndexList = new List <int>(); for (int i = 0; i < input.Matrix.GetLength(0) * input.Matrix.GetLength(1); i++) { inputIndexList.Add(i); } IEnumerable <KMeansCluster> clusters = KMeansAlgorithm.FindMatrixClusters(input.Matrix.GetLength(0), input.Matrix.GetLength(1), HtmParameters.ColumnsCount); foreach (KMeansCluster cluster in clusters) { List <int> htmSynapses = inputIndexList.Shuffle(Ran).ToList(); var synapses = new List <HtmForwardSynapse>(); for (int j = 0; j < HtmParameters.AmountOfPotentialSynapses; j++) { var newSynapse = new HtmForwardSynapse(HtmParameters.ConnectedPermanence) { Input = input, Y = htmSynapses[j] / input.Matrix.GetLength(0), X = htmSynapses[j] % input.Matrix.GetLength(0), Permanance = (Ran.Next(5)) / (double)10, }; synapses.Add(newSynapse); } _columnList.Add(new HtmColumn { Y = (int)Math.Round(cluster.Location.Y), X = (int)Math.Round(cluster.Location.X), PotentialSynapses = synapses }); } _activeColumns = new List <HtmColumn>(); }
public void Test_OptimalNumberOfCLusters() { // directory to load string loadDirectory = rootFolder + "Functions\\"; string FunctionName = "SIN X"; //without extension string savePath = rootFolder + "Optimal Clusters\\" + FunctionName + " Results.csv"; double[][] function = Helpers.LoadFunctionData(loadDirectory + FunctionName + "\\" + FunctionName + ".csv"); function = TestFunctionGenerators.normalizeData(function); int numAttributes = 2; // 2 in this demo (height,weight) int numClusters = 0; // vary this to experiment (must be between 2 and number data tuples) int maxCount = 300; // trial and error ClusteringSettings settings = new ClusteringSettings(maxCount, numClusters, numAttributes, KmeansAlgorithm: 2); // Creates learning api object LearningApi api = new LearningApi(); api.UseActionModule <object, double[][]>((data, ctx) => { return(KMeansAlgorithm.transposeFunction(function)); }); api.UseKMeans(settings); // train var resp = api.Run() as KMeansScore; Assert.True(resp.Model.NumberOfClusters > 1); double[][] OptimalClustersResults = new double[4][]; OptimalClustersResults[0] = new double[] { 2, 3, 4, 5, 6, 7, 8, 9, 10 }; OptimalClustersResults[1] = resp.Model.D; OptimalClustersResults[2] = resp.Model.DPrime; OptimalClustersResults[3] = resp.Model.Fmin; Helpers.Write2CSVFile(OptimalClustersResults, savePath); // implement }
public void Test_FunctionRecognition() { int numAttributes = 3; int numClusters = 2; int maxCount = 300; // a value in % representing the tolerance to possible outliers double tolerance = 0; // directory to load string loadDirectory = rootFolder + "Functions\\"; // directory to save string saveDirectory = rootFolder + "Function Recognition\\"; string TrainedFuncName = "SIN_SIN X"; /* * // functions' paths * string[] FunctionPaths = new string[] * { * loadDirectory + TrainedFuncName + "\\NRP5-10\\" + TrainedFuncName + " SimilarFunctions Normalized NRP5-10.csv", * loadDirectory + "COS X\\NRP5-10\\COS X SimilarFunctions Normalized NRP5-10.csv", * loadDirectory + "Triangular\\NRP5-10\\Triangular SimilarFunctions Normalized NRP5-10.csv", * loadDirectory + "Square\\NRP5-10\\Square SimilarFunctions Normalized NRP5-10.csv", * loadDirectory + "Line -X\\NRP5-10\\Line -X SimilarFunctions Normalized NRP5-10.csv" * };*/ /* * // functions' paths * string[] FunctionPaths = new string[] * { * loadDirectory + TrainedFuncName + "\\NRP5-10\\" + TrainedFuncName + " SimilarFunctions NRP5-10.csv", * loadDirectory + "5 SIN 2X\\NRP5-10\\5 SIN 2X SimilarFunctions NRP5-10.csv" * };*/ /* * // functions' paths * string[] FunctionPaths = new string[] * { * loadDirectory + TrainedFuncName + "\\NRP5-10\\" + TrainedFuncName + " SimilarFunctions Normalized NRP5-10.csv", * loadDirectory + "SIN 1.5X\\NRP5-10\\SIN 1.5X SimilarFunctions Normalized NRP5-10.csv", * loadDirectory + "SIN 2X\\NRP5-10\\SIN 2X SimilarFunctions Normalized NRP5-10.csv" * };*/ // functions' paths string[] FunctionPaths = new string[] { loadDirectory + TrainedFuncName + "\\NRP5-10\\" + TrainedFuncName + " SimilarFunctions Normalized NRP5-10.csv", loadDirectory + "SIN_COS X\\NRP5-10\\SIN_COS X SimilarFunctions Normalized NRP5-10.csv", loadDirectory + "COS_COS X\\NRP5-10\\COS_COS X SimilarFunctions Normalized NRP5-10.csv", loadDirectory + "COS_SIN X\\NRP5-10\\COS_SIN X SimilarFunctions Normalized NRP5-10.csv" }; int numTrainFun = 800; int numTestFun = 200; double[][] loadedSimFunctions = Helpers.LoadFunctionData(FunctionPaths[0]); int numLoadedFunc = 0; ClusteringSettings settings = new ClusteringSettings(maxCount, numClusters, numAttributes, KmeansAlgorithm: 2); LearningApi api = new LearningApi(); api.UseActionModule <object, double[][]>((funcData, ctx) => { numLoadedFunc++; return(KMeansAlgorithm.transposeFunction(KMeansAlgorithm.selectFunction(loadedSimFunctions, numLoadedFunc, numAttributes))); }); api.UseKMeansFunctionRecognitionModule(settings); KMeansFunctionRecognitonScore res = new KMeansFunctionRecognitonScore(); //train for (int i = 0; i < numTrainFun; i++) { res = api.Run() as KMeansFunctionRecognitonScore; } // save the formed clusters (just for plotting the function recognition results) Helpers.Write2CSVFile(res.Centroids, saveDirectory + "Calculated Centroids.csv"); double[][] tempMaxDistance = new double[1][]; tempMaxDistance[0] = res.InClusterMaxDistance; Helpers.Write2CSVFile(tempMaxDistance, saveDirectory + "Calculated Max Distance.csv"); // save the trained clusters in a persistant location (just for plotting the clusters) Helpers.Write2CSVFile(res.Centroids, saveDirectory + TrainedFuncName + "\\Calculated Centroids C" + numClusters + ".csv"); Helpers.Write2CSVFile(tempMaxDistance, saveDirectory + TrainedFuncName + "\\Calculated Max Distance C" + numClusters + ".csv"); // start testing for function recognition double[] testingResults = new double[numTestFun * FunctionPaths.Length]; double[][] data; for (int l = 0; l < FunctionPaths.Length; l++) { loadedSimFunctions = Helpers.LoadFunctionData(FunctionPaths[l]); for (int i = 0; i < numTestFun; i++) { data = KMeansAlgorithm.transposeFunction(KMeansAlgorithm.selectFunction(loadedSimFunctions, numTrainFun + i + 1, numAttributes)); var predictionResult = api.Algorithm.Predict(data, null) as KMeansFunctionRecognitionResult; if (predictionResult.Result) { testingResults[i + l * numTestFun] = 1; } else { testingResults[i + l * numTestFun] = 0; } } } // save results double[][] tempFunResults = new double[1][]; tempFunResults[0] = testingResults; Helpers.Write2CSVFile(tempFunResults, saveDirectory + "Results.csv"); double[][] TFMat = createTrueFalseMatrix(testingResults, FunctionPaths.Length); Helpers.Write2CSVFile(TFMat, saveDirectory + "TrueFalseMatrix.csv"); }
public void GenerateCentroids() { KMeansAlgorithm.SamplePoints = SamplePoints; KMeansAlgorithm.GenrateCentroids(HiddenNeuronsNumber); Centroids = KMeansAlgorithm.Centroids; }
public void Test_TrainPartials() { double[][] clusterCenters = new double[3][]; clusterCenters[0] = new double[] { 5.0, 5.0 }; clusterCenters[1] = new double[] { 15.0, 15.0 }; clusterCenters[2] = new double[] { 30.0, 30.0 }; double[][] clusterCenters2 = new double[3][]; clusterCenters2[0] = new double[] { 6, 5 }; clusterCenters2[1] = new double[] { 17, 18 }; clusterCenters2[2] = new double[] { 28, 30 }; string[] attributes = new string[] { "Height", "Weight" }; int numAttributes = attributes.Length; // 2 in this demo (height,weight) int numClusters = 3; // vary this to experiment (must be between 2 and number data tuples) int maxCount = 300; // trial and error double[][] apiResp1Centroid = new double[numClusters][]; double[] apiResp1MaxDistance = new double[numClusters]; double[] apiResp1NumSamples = new double[numClusters]; ClusteringSettings settings = new ClusteringSettings(maxCount, numClusters, numAttributes, KmeansAlgorithm: 2); // Creates learning api object LearningApi api = new LearningApi(loadDescriptor()); LearningApi api2 = new LearningApi(loadDescriptor()); double[][] rawData = Helpers.CreateSampleData(clusterCenters, 2, 10000, 0.5); double[][] rawData2 = Helpers.CreateSampleData(clusterCenters2, 2, 5000, 0.5); int runNum = 0; api.UseActionModule <object, double[][]>((data, ctx) => { if (runNum == 0) { return(rawData); } else { return(rawData2); } }); api2.UseActionModule <object, double[][]>((data, ctx) => { return(rawData2); }); // start api2 that runs only second raw data (rawData2) api2.UseKMeans(settings); // train var api2Resp = api2.Run() as KMeansScore; Assert.True(api2Resp.Model.Clusters != null); Assert.True(api2Resp.Model.Clusters.Length == clusterCenters.Length); // start api that runs first raw data (rawData) and save results in variables api.UseKMeans(settings); // train var apiResp = api.Run() as KMeansScore; Assert.True(apiResp.Model.Clusters != null); Assert.True(apiResp.Model.Clusters.Length == clusterCenters.Length); // save first run results in variables for (int i = 0; i < numClusters; i++) { apiResp1Centroid[i] = apiResp.Model.Clusters[i].Centroid; apiResp1MaxDistance[i] = apiResp.Model.Clusters[i].InClusterMaxDistance; apiResp1NumSamples[i] = apiResp.Model.Clusters[i].NumberOfSamples; } /// run with new data runNum++; // continue partial api run using second raw data (rawData2) settings = new ClusteringSettings(maxCount, numClusters, numAttributes, KmeansAlgorithm: 2, initialCentroids: apiResp1Centroid); // train apiResp = api.Run() as KMeansScore; Assert.True(apiResp.Model.Clusters != null); Assert.True(apiResp.Model.Clusters.Length == clusterCenters.Length); //// compare results double f, res; for (int i = 0; i < numClusters; i++) { // partial formula f*res f = (double)1 / apiResp.Model.Clusters[i].NumberOfSamples; for (int j = 0; j < numAttributes; j++) { res = apiResp1Centroid[i][j] * apiResp1NumSamples[i] + api2Resp.Model.Clusters[i].Centroid[j] * api2Resp.Model.Clusters[i].NumberOfSamples; // partial centroid check Assert.True(apiResp.Model.Clusters[i].Centroid[j] == f * res); } // max distance in cluster check Assert.True(apiResp.Model.Clusters[i].InClusterMaxDistance >= apiResp1MaxDistance[i] + KMeansAlgorithm.calculateDistance(apiResp1Centroid[i], apiResp.Model.Clusters[i].Centroid)); } }
/// <summary> /// Predicts if the specified function fits in the trainined MIN-MAX cluster interval. /// All calculated clusters must fit in trained cluster MIN-MAX intervals. /// </summary> /// <param name="funcData"></param> /// <param name="ctx"></param> /// <returns></returns> public IResult Predict(double[][] funcData, IContext ctx) { /* * for (int i = 0; i < this.Settings.NumberOfClusters; i++) * { * this.Settings.InitialCentroids[i] = this.Score.Centroids[i]; * }*/ this.Settings.InitialCentroids = null; KMeansAlgorithm kmeans = new KMeansAlgorithm(this.Settings.Clone()); kmeans.Instance = null; KMeansScore res = kmeans.Train(funcData, ctx) as KMeansScore; int scores = 0; KMeansFunctionRecognitionResult predRes = new KMeansFunctionRecognitionResult(); predRes.ResultsPerCluster = new bool[Settings.NumberOfClusters]; double[][] results = new double[Settings.NumberOfClusters][]; if (this.Settings.FuncRecogMethod == 1) { //double[] currDistance = new double[results.Length]; double currDistance; for (int i = 0; i < results.Length; i++) { currDistance = KMeansAlgorithm.calculateDistance(Score.Centroids[i], res.Model.Clusters[i].Centroid); if (currDistance <= Score.InClusterMaxDistance[i] * (1.0 + this.Settings.Tolerance / 100.0)) { predRes.ResultsPerCluster[i] = true; scores++; } else { predRes.ResultsPerCluster[i] = false; } } predRes.Result = (scores == Settings.NumberOfClusters); predRes.Loss = ((float)scores) / (Settings.NumberOfClusters); } else { for (int i = 0; i < results.Length; i++) { results[i] = new double[Settings.NumOfDimensions]; for (int dim = 0; dim < Settings.NumOfDimensions; dim++) { if (res.Model.Clusters[i].Centroid[dim] >= Score.MinCentroid[i][dim] && res.Model.Clusters[i].Centroid[dim] <= Score.MaxCentroid[i][dim]) { results[i][dim] = 1; scores++; } else { results[i][dim] = 0; } // // We calculate here the result of cluster over all dimensions. If all dimensions fits then cluster result is true. if (results[i].Count(r => r == 1) == Settings.NumOfDimensions) { predRes.ResultsPerCluster[i] = true; } else { predRes.ResultsPerCluster[i] = false; } } } predRes.Result = (scores == Settings.NumberOfClusters * Settings.NumOfDimensions); predRes.Loss = ((float)scores) / (Settings.NumberOfClusters * Settings.NumOfDimensions); } return(predRes); }
public static int OptimalNumberOfClusters_TwoFunctions(double[][] functions1, double[][] functions2, ClusteringSettings settings, int MinNumClusters, int MaxNumClusters, out double[] Fmins_k) { if (MinNumClusters < 2) { MinNumClusters = 2; } double[][] oneFunction = new double[settings.NumOfDimensions][]; int numFun = 0; double dist; int score = 0; KMeansFunctionRecognitionAlgorithm kMeansFR1, kMeansFR2; KMeansFunctionRecognitonScore res1, res2; double Fmax = 0; double Fmin = double.MaxValue; double[] Fmins; Fmins_k = new double[MaxNumClusters - MinNumClusters + 1]; double F = 0; int cluster = -1; for (int k = MinNumClusters; k <= MaxNumClusters; k++) { settings.InitialCentroids = null; settings.NumberOfClusters = k; kMeansFR1 = new KMeansFunctionRecognitionAlgorithm(settings); kMeansFR2 = new KMeansFunctionRecognitionAlgorithm(settings); res1 = new KMeansFunctionRecognitonScore(); res2 = new KMeansFunctionRecognitonScore(); numFun = functions1.Length / settings.NumOfDimensions; for (int f = 0; f < numFun; f++) { oneFunction = KMeansAlgorithm.transposeFunction(KMeansAlgorithm.selectFunction(functions1, f + 1, settings.NumOfDimensions)); res1 = kMeansFR1.Run(oneFunction, null) as KMeansFunctionRecognitonScore; } numFun = functions2.Length / settings.NumOfDimensions; settings.InitialCentroids = null; for (int f = 0; f < numFun; f++) { oneFunction = KMeansAlgorithm.transposeFunction(KMeansAlgorithm.selectFunction(functions2, f + 1, settings.NumOfDimensions)); res2 = kMeansFR2.Run(oneFunction, null) as KMeansFunctionRecognitonScore; } Fmins = new double[k]; // check if there is a non intersection cluster for (int i = 0; i < k; i++) { Fmin = double.MaxValue; score = 0; for (int j = 0; j < k; j++) { dist = KMeansAlgorithm.calculateDistance(res1.Centroids[i], res2.Centroids[j]); if (dist > res1.InClusterMaxDistance[i] + res2.InClusterMaxDistance[j]) { score++; } } if (score == k) { //calculate F; for (int j = 0; j < k; j++) { F = KMeansAlgorithm.calculateDistance(res1.Centroids[i], res2.Centroids[j]) / (res1.InClusterMaxDistance[i] + res2.InClusterMaxDistance[j]); // select min F among the two functions if (F < Fmin) { Fmin = F; } } } //save Fmin of each centroid if (Fmin != double.MaxValue) { Fmins[i] = Fmin; } } // save max Fmin per number of clusters for (int i = 0; i < k; i++) { if (Fmins[i] > Fmins_k[k - MinNumClusters]) { Fmins_k[k - MinNumClusters] = Fmins[i]; } } } //select max F among different number of cluters for (int i = 0; i < Fmins_k.Length; i++) { if (Fmins_k[i] > Fmax) { Fmax = Fmins_k[i]; cluster = i + MinNumClusters; } } return(cluster); }
/// <summary> /// calculateNoreStatistics is a function that claculates statistics of a cluster. These statistics are dependent on other clusters. /// </summary> /// <param name="RawData">data to be clustered</param> /// <param name="DataToClusterMapping">contains the assigned cluster number for each sample of the RawData</param> /// <param name="Centroids">the centroids of the clusters</param> /// <param name="NearestCluster">nearest cluster number</param> /// <param name="NearestForeignSampleInNearestCluster">nearest sample belonging of the nearest cluster to this cluster's centroid</param> /// <param name="DistanceToNearestForeignSampleInNearestCluster">distance between the nearest sample of the nearest cluster and this cluster's centroid</param> /// <param name="NearestForeignSample">nearest sample not belonging to this cluster and this cluster's centroid</param> /// <param name="DistanceToNearestForeignSample">distance between the nearest foreign sample and this cluster's centroid</param> /// <param name="ClusterOfNearestForeignSample">the cluster to which the nearest foreign sample belongs</param> private static void calculateMoreStatistics(double[][] RawData, int[] DataToClusterMapping, double[][] Centroids, int[] NearestCluster, out double[][] NearestForeignSampleInNearestCluster, out double[] DistanceToNearestForeignSampleInNearestCluster, out double[][] NearestForeignSample, out double[] DistanceToNearestForeignSample, out int[] ClusterOfNearestForeignSample) { int Code; string Message = "Function <calculateMoreStatistics>: "; try { NearestForeignSampleInNearestCluster = new double[Centroids.Length][]; DistanceToNearestForeignSampleInNearestCluster = new double[Centroids.Length]; NearestForeignSample = new double[Centroids.Length][]; DistanceToNearestForeignSample = new double[Centroids.Length]; ClusterOfNearestForeignSample = new int[Centroids.Length]; for (int i = 0; i < Centroids.Length; i++) { //in case of empty cluster if (NearestCluster[i] == -1) { NearestForeignSampleInNearestCluster[i] = null; NearestForeignSample[i] = null; DistanceToNearestForeignSampleInNearestCluster[i] = -1; DistanceToNearestForeignSample[i] = -1; ClusterOfNearestForeignSample[i] = -1; } else { DistanceToNearestForeignSampleInNearestCluster[i] = double.MaxValue; DistanceToNearestForeignSample[i] = double.MaxValue; } } double curDistance; for (int i = 0; i < RawData.Length; i++) { for (int j = 0; j < Centroids.Length; j++) { //skip if sample belong to the cluster itself or the cluster is empty if (DataToClusterMapping[i] == j || NearestCluster[j] == -1) { continue; } curDistance = KMeansAlgorithm.calculateDistance(RawData[i], Centroids[j]); if (curDistance < DistanceToNearestForeignSample[j]) { DistanceToNearestForeignSample[j] = curDistance; NearestForeignSample[j] = RawData[i]; ClusterOfNearestForeignSample[j] = DataToClusterMapping[i]; } if (DataToClusterMapping[i] == NearestCluster[j]) { if (curDistance < DistanceToNearestForeignSampleInNearestCluster[j]) { DistanceToNearestForeignSampleInNearestCluster[j] = curDistance; NearestForeignSampleInNearestCluster[j] = RawData[i]; } } } } } catch (Exception Ex) { Code = 400; Message += "Unhandled exception:\t" + Ex.ToString(); throw new KMeansException(Code, Message); } }
public void Test_IncrementalMeanAverageSet() { for (int numOfSamples = 100; numOfSamples < 150000; numOfSamples += 15000) { // Test samples. double[][] data = new double[numOfSamples][]; // Each sample belongs to some cluster. int[] clustering = new int[data.Length]; for (int i = 0; i < numOfSamples; i++) { data[i] = new double[] { i }; clustering[i] = 0; // We have a single cluster. Every sample belongs to that cluster. } double[][] means = new double[1][]; means[0] = new double[] { 0 }; KMeansAlgorithm.UpdateMeans(data, clustering, means); // Mean of numOfSamples var mean = means[0][0]; data = new double[numOfSamples / 2][]; // Each sample belongs to some cluster. clustering = new int[data.Length]; for (int i = 0; i < numOfSamples / 2; i++) { data[i] = new double[] { i }; clustering[i] = 0; // We have a single cluster. Every sample belongs to that cluster. } // Calculate mean of numOfSamples/2 KMeansAlgorithm.UpdateMeans(data, clustering, means, 0, new double[] { 0 }); // Mean of numOfSamples/2 var mean1 = means[0][0]; data = new double[numOfSamples / 2][]; // Each sample belongs to some cluster. clustering = new int[data.Length]; for (int i = 0; i < numOfSamples / 2; i++) { data[i] = new double[] { i + (numOfSamples / 2) }; clustering[i] = 0; // We have a single cluster. Every sample belongs to that cluster. } KMeansAlgorithm.UpdateMeans(data, clustering, means, numOfSamples / 2, new double[] { mean1 }); // Mean for numbers from numOfSamples/2 to numOfSamples var mean2 = means[0][0]; // M1 = mean of numOfSamples/2 (minibatch 1) // M2 = mean for numbers from numOfSamples/2 to numOfSamples (minibatch 2) // mean is batch for numbers from 1 to numOfSamples // (1/q1+q2)[q1*M1+q2*M2] // where q1 is number of elements inside of M1 and q2 number of elements inside of M2 Assert.True(Math.Round(mean2, 2) == Math.Round(mean, 2)); } }
/// <summary> /// RecommendedNumberOfClusters is a function that gives a recommended number of clusters for the given samples based on some provided methods. /// </summary> /// <param name="rawData">The samples to be clustered</param> /// <param name="kmeansMaxIterations">Maximum allowed number of Kmeans iteration for clustering</param> /// <param name="kmeansAlgorithm">The desired Kmeans clustering algorithm (1 or 2) /// <ul style="list-style-type:none"> /// <li> - 1: Centoids are the nearest samples to the means</li> /// <li> - 2: Centoids are the means</li> /// </ul></param> /// <param name="numberOfAttributes">Number of attributes for each sample</param> /// <param name="maxNumberOfClusters">Maximum desired number of clusters</param> /// <param name="minNumberOfClusters">Minimum desired number of clusters</param> /// <param name="method">Integer 0,1,2 or 3 representing the method to be used /// <ul style = "list-style-type:none" > /// <li> - Method 0: Radial method in which the farthest sample of each cluster must be closer to the cluster centoid than the nearest foreign sample of the other clusters </li> /// <li> - Method 1: Standard Deviation method in which the standard deviation in each cluster must be less than the desired standard deviation </li> /// <li> - Method 2: Both. uses radial and standard deviation methods at the same time </li> /// <li> - Method 3: Balanced clusters method in which the clusters contain the closest number of samples</li> /// </ul> /// </param> /// <param name="standardDeviation">The desired standard deviation upper limit in each cluster</param> /// <param name="recommendedNumbersOfCluster">The variable through which the recommended number of clusters is returned</param> /// <param name="centroids">Initial Centroids</param> /// <returns>The recommended number of clusters for the given samples based on the specified method.</returns> public int RecommendedNumberOfClusters(double[][] rawData, int kmeansMaxIterations, int numberOfAttributes, int maxNumberOfClusters, int minNumberOfClusters, int method, double[] standardDeviation, int kmeansAlgorithm = 1, double[][] centroids = null) { int recommendedNumbersOfCluster; int Code; string Message = "Function <RecommendedNumberOfClusters>: "; try { //some checks if (maxNumberOfClusters < 2) { Code = 104; Message += "Maximum number of clusters must be at least 2"; throw new KMeansException(Code, Message); } int MaxClusters = Math.Min(rawData.Length, maxNumberOfClusters); if (minNumberOfClusters < 2) { minNumberOfClusters = 2; } if (method > 3 || method < 0) { Code = 122; Message += "Method must be either 0,1,2 or 3"; throw new KMeansException(Code, Message); } if ((method == 1 || method == 2) && standardDeviation == null) { Code = 123; Message += "Parameter StdDev is needed"; throw new KMeansException(Code, Message); } if (kmeansMaxIterations < 1) { Code = 108; Message += "Unacceptable number of maximum iterations"; throw new KMeansException(Code, Message); } if (rawData == null) { Code = 100; Message += "RawData is null"; throw new KMeansException(Code, Message); } if (numberOfAttributes < 1) { Code = 107; Message += "Unacceptable number of attributes. Must be at least 1"; throw new KMeansException(Code, Message); } if (kmeansAlgorithm != 2) { kmeansAlgorithm = 1; } //checks that all the samples have same number of attributes KMeansAlgorithm.verifyRawDataConsistency(rawData, numberOfAttributes); double[][] Centroids; int IterationReached = -1; int[] kMeansResponse; Cluster[] cluster; bool isRadial, isStandardDeviation; double[] balancedError = new double[MaxClusters - minNumberOfClusters + 1]; for (int i = minNumberOfClusters; i <= MaxClusters; i++) { //cluster the data with number of clusters equals to i kMeansResponse = KMeansAlgorithm.runKMeansAlgorithm(rawData, i, numberOfAttributes, kmeansMaxIterations, kmeansAlgorithm, centroids, out Centroids, out IterationReached); cluster = ClusteringResults.CreateClusteringResult(rawData, kMeansResponse, Centroids, i); isRadial = true; isStandardDeviation = true; if (method == 0 || method == 2) { //radial method check isRadial = radialClustersCheck(cluster); } if (method == 1 || method == 2) { //standard deviation check isStandardDeviation = stdDeviationClustersCheck(cluster, standardDeviation); } if (method == 3) { //start balanced check balancedError[i - minNumberOfClusters] = 0; double[] countSamples = new double[i]; double average = 0; for (int c = 0; c < i; c++) { countSamples[c] = cluster[c].ClusterData.Length; average = average + countSamples[c] / i; } for (int c = 0; c < i; c++) { //error calculation balancedError[i - minNumberOfClusters] = balancedError[i - minNumberOfClusters] + Math.Pow(countSamples[c] - average, 2) / i; } } else if (isRadial && isStandardDeviation) { recommendedNumbersOfCluster = i; //return new AnomalyDetectionResponse(0, "OK"); return(recommendedNumbersOfCluster); } } if (method == 3) { // get minimum value (most balanced solution) int minIndex = 0; for (int l = 1; l < balancedError.Length; l++) { if (balancedError[l] < balancedError[minIndex]) { minIndex = l; } } recommendedNumbersOfCluster = minIndex + minNumberOfClusters; //return new AnomalyDetectionResponse(0, "OK"); return(recommendedNumbersOfCluster); } ///// find a way to throw the response recommendedNumbersOfCluster = 0; //return new AnomalyDetectionResponse(1, "Could not find a recommended number of clusters based on the desired constraints"); return(recommendedNumbersOfCluster); } catch (Exception Ex) { Code = 400; Message += "Unhandled exception:\t" + Ex.ToString(); throw new KMeansException(Code, Message); } }
/// <summary> /// Data of a single function for which KMeans will be calculated. /// </summary> /// <param name="data"></param> /// <param name="ctx"></param> /// <returns></returns> public IScore Run(double[][] data, IContext ctx) { /* * if (this.Score.NomOfTrainedFunctions == 1) * { * this.Settings.InitialCentroids = new double[this.Settings.NumberOfClusters][]; * for (int i = 0; i < this.Settings.NumberOfClusters; i++) * { * this.Settings.InitialCentroids[i] = this.Score.Centroids[i]; * } * }*/ this.Settings.InitialCentroids = null; KMeansAlgorithm kmeans = new KMeansAlgorithm(this.Settings.Clone()); KMeansScore res = kmeans.Train(data, ctx) as KMeansScore; this.Score.NomOfTrainedFunctions += 1; if (this.Settings.FuncRecogMethod == 1) { double[][] oldCentroids = this.Score.Centroids; for (int clusterIndx = 0; clusterIndx < res.Model.Clusters.Length; clusterIndx++) { if (this.Score.NomOfTrainedFunctions == 1) { this.Score.Centroids[clusterIndx] = res.Model.Clusters[clusterIndx].Centroid; //this.Score.InClusterMaxDistance[clusterIndx] = res.Model.Clusters[clusterIndx].InClusterMaxDistance; } else { for (int d = 0; d < this.Settings.NumOfDimensions; d++) { this.Score.Centroids[clusterIndx][d] = (res.Model.Clusters[clusterIndx].Centroid[d] + oldCentroids[clusterIndx][d] * (this.Score.NomOfTrainedFunctions - 1)) / this.Score.NomOfTrainedFunctions; } adjustInClusterMaxDistance(clusterIndx, res, oldCentroids); } } } else { //Debug.WriteLine($"C0: {res.Model.Clusters[0].Centroid[0]},{res.Model.Clusters[0].Centroid[1]}"); //Debug.WriteLine($"C1: {res.Model.Clusters[1].Centroid[0]},{res.Model.Clusters[0].Centroid[1]}"); //Debug.WriteLine($"C2: {res.Model.Clusters[2].Centroid[0]},{res.Model.Clusters[0].Centroid[1]}"); //Debug.WriteLine($"C3: {res.Model.Clusters[3].Centroid[0]},{res.Model.Clusters[0].Centroid[1]}"); for (int clusterIndx = 0; clusterIndx < res.Model.Clusters.Length; clusterIndx++) { for (int dim = 0; dim < this.Settings.NumOfDimensions; dim++) { if (res.Model.Clusters[clusterIndx].Centroid[dim] > this.Score.MaxCentroid[clusterIndx][dim]) { this.Score.MaxCentroid[clusterIndx][dim] = res.Model.Clusters[clusterIndx].Centroid[dim]; } if (res.Model.Clusters[clusterIndx].Centroid[dim] < this.Score.MinCentroid[clusterIndx][dim]) { this.Score.MinCentroid[clusterIndx][dim] = res.Model.Clusters[clusterIndx].Centroid[dim]; } } } for (int clusterIndex = 0; clusterIndex < res.Model.Clusters.Length; clusterIndex++) { this.Score.Centroids[clusterIndex] = new double[Settings.NumOfDimensions]; for (int dim = 0; dim < Settings.NumOfDimensions; dim++) { if (this.Score.MinCentroid[clusterIndex][dim] >= 0) { this.Score.Centroids[clusterIndex][dim] = (this.Score.MaxCentroid[clusterIndex][dim] + this.Score.MinCentroid[clusterIndex][dim]) / 2; } else { this.Score.Centroids[clusterIndex][dim] = ((this.Score.MaxCentroid[clusterIndex][dim] - this.Score.MinCentroid[clusterIndex][dim]) / 2) + this.Score.MinCentroid[clusterIndex][dim]; } } } } return(Score); }
private void btnStart_Click(object sender, RoutedEventArgs e) { int nc; bool ok = int.TryParse(txtCentroidN.Text, out nc); //va messo a posto if (!ok || nc <= 0) { MessageBox.Show("Inserire un numero intero maggiore di 0!", "Errore", MessageBoxButton.OK, MessageBoxImage.Error); return; } try { if (_mode == 0) { _differciateCentroids = (bool)chbDiff.IsChecked; _timerSecondsDrawingLength = sldVel.Value; _pointRadius = sldGP.Value; _centroidRadius = sldGC.Value; _alg = new KMeansAlgorithm(nc); foreach (KMeans.Point p in _points) { _alg.AddPoint(p); } foreach (Centroid c in _centroids) { _alg.AddCentroid(c); } if (_differciateCentroids) { GenerateRandomCentroidsColor(nc); } _alg.InitializeAlgorithm(); _dt = new DispatcherTimer(); _dt.Interval = TimeSpan.FromSeconds(_timerSecondsDrawingLength); _dt.Tick += Dt_Tick; _dt.Start(); } else if (_mode == 1) { int k; bool ok2 = int.TryParse(txtK.Text, out k); //va messo a posto if (!ok2 || k <= 0) { MessageBox.Show("Inserire un numero intero maggiore di 0!", "Errore", MessageBoxButton.OK, MessageBoxImage.Error); return; } _alg = new KMeansAlgorithm(nc); foreach (KMeans.Point p in _points) { _alg.AddPoint(p); } foreach (Centroid c in _centroids) { _alg.AddCentroid(c); } _centroids = _alg.CalculateResult(); UpdateDataGrids(); cnvGraphic.Children.Clear(); _pointsColorList = ColorCalculatedPoints(); _calculatedAndSet = true; _knnAlg = new KNNAlgorithm(k, _centroids); } } catch (KMeansException ex) { MessageBox.Show(ex.Message, "Algorithm error", MessageBoxButton.OK, MessageBoxImage.Error); } catch (Exception ex) { MessageBox.Show(ex.Message, "Error", MessageBoxButton.OK, MessageBoxImage.Error); } }
/// <summary> /// calculateStatistics is a function that claculates statistics and properties of a cluster. These statistics are independent on other clusters. /// </summary> /// <param name="Cluster">a cluster object</param> private static void calculateStatistics(Cluster cls) { int Code; string Message = "Function <calculateStatistics>: "; try { int NumberOfSamples = cls.ClusterData.Length; int NumberOfAttributes = cls.Centroid.Length; cls.ClusterDataDistanceToCentroid = new double[NumberOfSamples]; cls.Mean = new double[NumberOfAttributes]; cls.StandardDeviation = new double[NumberOfAttributes]; cls.InClusterMaxDistance = -1; //in case of empty cluster if (NumberOfSamples == 0) { cls.InClusterFarthestSampleIndex = 0; cls.InClusterMaxDistance = 0; cls.InClusterFarthestSample = new double[NumberOfAttributes]; for (int j = 0; j < NumberOfAttributes; j++) { cls.Mean[j] = 0; cls.Centroid[j] = 0; cls.InClusterFarthestSample[j] = 0; } cls.NearestCluster = -1; } else { for (int i = 0; i < NumberOfSamples; i++) { //calculate distance for each sample cls.ClusterDataDistanceToCentroid[i] = KMeansAlgorithm.calculateDistance(cls.ClusterData[i], cls.Centroid); if (cls.ClusterDataDistanceToCentroid[i] > cls.InClusterMaxDistance) { //farthest sample cls.InClusterFarthestSampleIndex = i; cls.InClusterFarthestSample = cls.ClusterData[i]; cls.InClusterMaxDistance = cls.ClusterDataDistanceToCentroid[i]; } for (int j = 0; j < NumberOfAttributes; j++) { cls.Mean[j] += cls.ClusterData[i][j] / NumberOfSamples; } } double[] ClusterVariance = new double[NumberOfAttributes]; for (int i = 0; i < NumberOfSamples; i++) { for (int j = 0; j < NumberOfAttributes; j++) { ClusterVariance[j] += Math.Pow((cls.ClusterData[i][j] - cls.Mean[j]), 2) / NumberOfSamples; } } for (int i = 0; i < NumberOfAttributes; i++) { cls.StandardDeviation[i] = Math.Sqrt(ClusterVariance[i]); } } } catch (Exception Ex) { Code = 400; Message += "Unhandled exception:\t" + Ex.ToString(); throw new KMeansException(Code, Message); } }