/// <summary> /// Computes the natural clustering of the resulting trajectories (behaviors) along with the behavioral entropy /// (diversity). /// </summary> /// <param name="evaluationUnits">The agent/maze evaluations to cluster and compute entropy.</param> /// <param name="clusterImprovementThreshold"> /// The number of cluster additions that are permitted without further /// maximization of silhouette width. When this is exceeded, the incremental cluster additions will stop and the /// number of clusters resulting in the highest silhouette width will be considered optimal. /// </param> /// <returns></returns> public static ClusterDiversityUnit CalculateNaturalClustering( IList<MazeNavigatorEvaluationUnit> evaluationUnits, int clusterImprovementThreshold) { Dictionary<int, double> clusterSilhoutteMap = new Dictionary<int, double>(); Tuple<int, double> clusterWithMaxSilhouetteWidth = null; // Always start with zero clusters and increment on first iteration of loop const int initClusterCnt = 0; // Only consider successful trials IList<MazeNavigatorEvaluationUnit> successfulEvaluations = evaluationUnits.Where(eu => eu.IsMazeSolved).ToList(); // Define the trajectory matrix in which to store all trajectory points for each trajectory // (this becomes a collection of observation vectors that's fed into k-means) double[][] trajectoryMatrix = new double[successfulEvaluations.Count][]; // Get the maximum observation vector length (max simulation runtime) // (multiplied by 2 to account for each timestep containing a 2-dimensional position) int maxObservationLength = successfulEvaluations.Max(x => x.NumTimesteps)*2; for (int idx = 0; idx < successfulEvaluations.Count; idx++) { // If there are few observations than the total elements in the observation vector, // fill out the vector with the existing observations and set the rest equal to the last // position in the simulation if (successfulEvaluations[idx].AgentTrajectory.Length < maxObservationLength) { trajectoryMatrix[idx] = successfulEvaluations[idx].AgentTrajectory.Concat( Enumerable.Repeat( successfulEvaluations[idx].AgentTrajectory[ successfulEvaluations[idx].AgentTrajectory.Length - 1], maxObservationLength - successfulEvaluations[idx].AgentTrajectory.Length)).ToArray(); } // If they are equal, just set the trajectory points else { trajectoryMatrix[idx] = successfulEvaluations[idx].AgentTrajectory; } } // Set the initial cluster count int clusterCount = initClusterCnt; // Continue loop until the maximum number of iterations without silhouette width improvement has elapsed // (also don't allow number of clusters to match the number of observations) while (clusterWithMaxSilhouetteWidth == null || (clusterSilhoutteMap.Count <= clusterImprovementThreshold || clusterSilhoutteMap.Where( csm => (csm.Key - initClusterCnt) >= clusterSilhoutteMap.Count - clusterImprovementThreshold) .Any( csm => csm.Value >= clusterWithMaxSilhouetteWidth.Item2)) && clusterCount < trajectoryMatrix.Length - 1) { // Increment cluster count clusterCount++; // Create a new k-means instance with the specified number of clusters var kmeans = new KMeans(clusterCount); // TODO: The below logic is in support of a work-around to an Accord.NET bug wherein // TODO: an internal random number generator sometimes generates out-of-bounds values // TODO: (i.e. a probability that is not between 0 and 1) // TODO: https://github.com/accord-net/framework/issues/259 // Use uniform initialization kmeans.UseSeeding = Seeding.Uniform; // Determine the resulting clusters var clusters = kmeans.Learn(trajectoryMatrix); // Compute the silhouette width for the current number of clusters double silhouetteWidth = ComputeSilhouetteWidth(clusters, trajectoryMatrix); // Compute silhouette width and add to map with the current cluster count clusterSilhoutteMap.Add(clusterCount, silhouetteWidth); // If greater than the max silhouette width, reset the cluster with the max if (clusterWithMaxSilhouetteWidth == null || silhouetteWidth > clusterWithMaxSilhouetteWidth.Item2) { clusterWithMaxSilhouetteWidth = new Tuple<int, double>(clusterCount, silhouetteWidth); } } // Rerun kmeans for the final cluster count var optimalClustering = new KMeans(clusterCount); // TODO: The below logic is in support of a work-around to an Accord.NET bug wherein // TODO: an internal random number generator sometimes generates out-of-bounds values // TODO: (i.e. a probability that is not between 0 and 1) // TODO: https://github.com/accord-net/framework/issues/259 // Use uniform initialization optimalClustering.UseSeeding = Seeding.Uniform; // Determine cluster assignments optimalClustering.Learn(trajectoryMatrix); double sumLogProportion = 0.0; // Compute the shannon entropy of the population for (int idx = 0; idx < optimalClustering.Clusters.Count; idx++) { sumLogProportion += optimalClustering.Clusters[idx].Proportion* Math.Log(optimalClustering.Clusters[idx].Proportion, 2); } // Multiply by negative one to get the Shannon entropy double shannonEntropy = sumLogProportion*-1; // Return the resulting cluster diversity info return new ClusterDiversityUnit(optimalClustering.Clusters.Count, shannonEntropy); }
public Dictionary<int, List<int>> apply(Matrix<double> objCoords, int K, int depth) { Dictionary<int, List<int>> dict = new Dictionary<int, List<int>>(); double[][] objects = new double[objCoords.RowCount][]; for (int rowIdx = 0; rowIdx < objCoords.RowCount; rowIdx++) { objects[rowIdx] = new double[objCoords.ColumnCount]; for (int columnIdx = 0; columnIdx < objCoords.ColumnCount; columnIdx++) { objects[rowIdx][columnIdx] = objCoords[rowIdx, columnIdx]; } } KMeans kmeans = new KMeans(K); int[] clusterIDs = kmeans.Compute(objects); for (int objIdx = 0; objIdx < clusterIDs.Length; objIdx++) { int clusterID = clusterIDs[objIdx]; List<int> currentElements; if (dict.TryGetValue(clusterID, out currentElements)) { currentElements.Add(objIdx); dict[clusterID] = currentElements; } else { dict[clusterID] = new List<int>(new int[] { objIdx }); } } return dict; }
public static void CompareStratMeansVar(KMeans km1, KMeans km2, out double[] meanPvalues, out double[] varPvalues) { meanPvalues = null; varPvalues = null; int nPv1 = km1.Clusters.Count; int nPv2 = km2.Clusters.Count; if (nPv1 != nPv2) { System.Windows.Forms.MessageBox.Show("Not the same number of strata! Models are not comparable!"); return; } meanPvalues = new double[nPv1]; varPvalues = new double[nPv2]; for (int i = 0; i < nPv1; i++) { KMeansCluster kmC1 = km1.Clusters[i]; KMeansCluster kmC2 = km2.Clusters[i]; double[] means1 = kmC1.Mean; double[] means2 = kmC2.Mean; double[,] cov1 = kmC1.Covariance; double[,] cov2 = kmC2.Covariance; double m, v; PairedTTestPValues(means1, cov1, means2, cov2, out m, out v); meanPvalues[i] = m; varPvalues[i] = v; } }
private void btnGenerateRandom_Click(object sender, EventArgs e) { k = (int)numClusters.Value; // Generate data with n Gaussian distributions double[][][] data = new double[k][][]; for (int i = 0; i < k; i++) { // Create random centroid to place the Gaussian distribution double[] mean = Matrix.Random(2, -6.0, +6.0); // Create random covariance matrix for the distribution double[,] covariance = Accord.Statistics.Tools.RandomCovariance(2, -5, 5); // Create the Gaussian distribution var gaussian = new MultivariateNormalDistribution(mean, covariance); int samples = Accord.Math.Tools.Random.Next(150, 250); data[i] = gaussian.Generate(samples); } // Join the generated data mixture = Matrix.Stack(data); // Update the scatterplot CreateScatterplot(graph, mixture, k); // Forget previous initialization kmeans = null; }
private void btnRun_Click(object sender, EventArgs e) { // Retrieve the number of clusters int k = (int)numClusters.Value; // Load original image Bitmap image = Properties.Resources.leaf; // Transform the image into an array of pixel values double[][] pixels = image.ToDoubleArray(); // Create a K-Means algorithm using given k and a // square euclidean distance as distance metric. KMeans kmeans = new KMeans(k, Distance.SquareEuclidean); // Compute the K-Means algorithm until the difference in // cluster centroids between two iterations is below 0.05 int[] idx = kmeans.Compute(pixels, 0.05); // Replace every pixel with its corresponding centroid pixels.ApplyInPlace((x, i) => kmeans.Clusters.Centroids[idx[i]]); // Show resulting image in the picture box pictureBox.Image = pixels.ToBitmap(image.Width, image.Height); }
public void KMeansConstructorTest() { Accord.Math.Tools.SetupGenerator(0); // Declare some observations double[][] observations = { new double[] { -5, -2, -1 }, new double[] { -5, -5, -6 }, new double[] { 2, 1, 1 }, new double[] { 1, 1, 2 }, new double[] { 1, 2, 2 }, new double[] { 3, 1, 2 }, new double[] { 11, 5, 4 }, new double[] { 15, 5, 6 }, new double[] { 10, 5, 6 }, }; double[][] orig = observations.MemberwiseClone(); // Create a new K-Means algorithm with 3 clusters KMeans kmeans = new KMeans(3); // Compute the algorithm, retrieving an integer array // containing the labels for each of the observations int[] labels = kmeans.Compute(observations); // As a result, the first two observations should belong to the // same cluster (thus having the same label). The same should // happen to the next four observations and to the last three. Assert.AreEqual(labels[0], labels[1]); Assert.AreEqual(labels[2], labels[3]); Assert.AreEqual(labels[2], labels[4]); Assert.AreEqual(labels[2], labels[5]); Assert.AreEqual(labels[6], labels[7]); Assert.AreEqual(labels[6], labels[8]); Assert.AreNotEqual(labels[0], labels[2]); Assert.AreNotEqual(labels[2], labels[6]); Assert.AreNotEqual(labels[0], labels[6]); int[] labels2 = kmeans.Clusters.Nearest(observations); Assert.IsTrue(labels.IsEqual(labels2)); // the data must not have changed! Assert.IsTrue(orig.IsEqual(observations)); }
/// <summary> /// Deserialization /// </summary> public Gesture(SerializationInfo info, StreamingContext ctxt) { //Get the values from info and assign them to the appropriate properties this.name = (String)info.GetValue("name", typeof(string)); this.sampleDimensionsCount = (int)info.GetValue("sampleDimensionsCount", typeof(int)); this.frameCount = (int)info.GetValue("frameCount", typeof(int)); this.trainingSampleCount = (int)info.GetValue("trainingSampleCount", typeof(int)); this.alphabetCount = (int)info.GetValue("alphabetCount", typeof(int)); this.statesCount = (int)info.GetValue("statesCount", typeof(int)); this.trainingDataKMeans = (KMeans)info.GetValue("trainingDataKMeans", typeof(KMeans)); this.recognitionThreshold = (double)info.GetValue("recognitionThreshold", typeof(double)); this.model = (HiddenMarkovModel)info.GetValue("model", typeof(HiddenMarkovModel)); }
public int[] Classify() { var clusterData = features.Select(); var theta = MinMaxTheta(clusterData); var args = new BsasArgs(theta, m_iterations, clusterData, m_thetaStepNum); var bsas = new BasicSequentialAlgorithmicScheme(args); var clusters = bsas.CalculateClasses(); var kmeans = new KMeans(clusters, Distance.Euclidean); var idx = kmeans.Compute(clusterData); return idx; }
/// <summary> /// The cluster. /// </summary> /// <param name="clustersCount"> /// The clusters count. /// </param> /// <param name="data"> /// The data. /// </param> /// <returns> /// The <see cref="T:int[]"/>. /// </returns> public int[] Cluster(int clustersCount, double[][] data) { KMeans kMeans = new KMeans(clustersCount); var clusters = kMeans.Learn(data); var result = new int[data.Length]; for (int i = 0; i < result.Length; i++) { result[i] = clusters.Decide(data[i]); } return result; }
/// <summary> /// Initializes a new instance of GestureProcessingLibrary.Gesture for the specified gesture name. /// </summary> /// <param name="name">The name of the gesture to be modeled</param> /// <param name="alphabetCount">The number of alphabet signs in the model</param> /// <param name="statesCount">The number of hidden states in the model</param> public Gesture(string name, int alphabetCount = 8, int statesCount = 12) { this.name = name; this.trainingData = DataMethods.LoadPositionData(this.name); this.sampleDimensionsCount = trainingData[0][0].Count(); this.frameCount = trainingData[0].Count(); this.trainingSampleCount = trainingData.Count(); this.statesCount = statesCount; this.alphabetCount = alphabetCount; this.trainingDataKMeans = new KMeans(alphabetCount); this.TrainModel(); }
public static void Main() { modshogun.init_shogun_with_defaults(); int k = 3; // already tried init_random(17) Math.init_random(17); double[,] fm_train = Load.load_numbers("../data/fm_train_real.dat"); RealFeatures feats_train = new RealFeatures(fm_train); EuclideanDistance distance = new EuclideanDistance(feats_train, feats_train); KMeans kmeans = new KMeans(k, distance); kmeans.train(); double[,] out_centers = kmeans.get_cluster_centers(); kmeans.get_radiuses(); }
public static Output Clustering(double[,] patches, int numberOfClusters) { // "Generator.Seed" sets a random seed for the framework's main internal number generator, which // gets a reference to the random number generator used internally by the Accord.NET classes and methods. // If set to a value less than or equal to zero, all generators will start with the same fixed seed, even among multiple threads. // If set to any other value, the generators in other threads will start with fixed, but different, seeds. // this method should be called before other computations. Accord.Math.Random.Generator.Seed = 0; KMeans kmeans = new KMeans(k: numberOfClusters) { UseSeeding = Seeding.KMeansPlusPlus, Distance = default(Cosine), }; var clusters = kmeans.Learn(patches.ToJagged()); // get the cluster size Dictionary <int, double> clusterIdSize = new Dictionary <int, double>(); Dictionary <int, double[]> clusterIdCentroid = new Dictionary <int, double[]>(); foreach (var clust in clusters.Clusters) { clusterIdSize.Add(clust.Index, clust.Proportion); clusterIdCentroid.Add(clust.Index, clust.Centroid); } var output = new Output() { ClusterIdCentroid = clusterIdCentroid, ClusterIdSize = clusterIdSize, Clusters = clusters, }; return(output); }
/// <summary> /// Runs the K-Means algorithm. /// </summary> /// private void runKMeans() { // Retrieve the number of clusters int k = (int)numClusters.Value; // Load original image Bitmap image = Properties.Resources.leaf; // Create converters ImageToArray imageToArray = new ImageToArray(min: -1, max: +1); ArrayToImage arrayToImage = new ArrayToImage(image.Width, image.Height, min: -1, max: +1); // Transform the image into an array of pixel values double[][] pixels; imageToArray.Convert(image, out pixels); // Create a K-Means algorithm using given k and a // square Euclidean distance as distance metric. KMeans kmeans = new KMeans(k, new SquareEuclidean()) { Tolerance = 0.05 }; // Compute the K-Means algorithm until the difference in // cluster centroids between two iterations is below 0.05 int[] idx = kmeans.Learn(pixels).Decide(pixels); // Replace every pixel with its corresponding centroid pixels.Apply((x, i) => kmeans.Clusters.Centroids[idx[i]], result: pixels); // Show resulting image in the picture box Bitmap result; arrayToImage.Convert(pixels, out result); pictureBox.Image = result; }
internal static ArrayList run(int para) { modshogun.init_shogun_with_defaults(); int k = para; init_random(17); DoubleMatrix fm_train = Load.load_numbers("../data/fm_train_real.dat"); RealFeatures feats_train = new RealFeatures(fm_train); EuclidianDistance distance = new EuclidianDistance(feats_train, feats_train); KMeans kmeans = new KMeans(k, distance); kmeans.train(); DoubleMatrix out_centers = kmeans.get_cluster_centers(); kmeans.get_radiuses(); ArrayList result = new ArrayList(); result.Add(kmeans); result.Add(out_centers); modshogun.exit_shogun(); return result; }
public void KMeansMoreClustersThanSamples() { Accord.Math.Tools.SetupGenerator(0); // Declare some observations double[][] observations = { new double[] { -5, -2, -1 }, new double[] { -5, -5, -6 }, new double[] { 2, 1, 1 }, new double[] { 1, 1, 2 }, new double[] { 1, 2, 2 }, new double[] { 3, 1, 2 }, new double[] { 11, 5, 4 }, new double[] { 15, 5, 6 }, new double[] { 10, 5, 6 }, }; double[][] orig = observations.MemberwiseClone(); KMeans kmeans = new KMeans(15); bool thrown = false; try { int[] labels = kmeans.Compute(observations); } catch (ArgumentException) { thrown = true; } Assert.IsTrue(thrown); }
public void DeserializationTest1() { MemoryStream stream = new MemoryStream(Properties.Resources.kmeans); KMeans kmeans = Serializer.Load <KMeans>(stream); KMeans kbase = new KMeans(3); Assert.AreEqual(kbase.Iterations, kmeans.Iterations); Assert.AreEqual(kbase.MaxIterations, kmeans.MaxIterations); Assert.AreEqual(kbase.Tolerance, kmeans.Tolerance); Assert.AreEqual(kbase.UseSeeding, kmeans.UseSeeding); Assert.AreEqual(kbase.ComputeCovariances, kmeans.ComputeCovariances); Assert.AreEqual(kbase.ComputeError, kmeans.ComputeError); Assert.AreEqual(kbase.ComputeCovariances, kmeans.ComputeCovariances); Assert.AreEqual(kbase.Error, kmeans.Error); Assert.IsTrue(kbase.ComputeError); Assert.IsTrue(kbase.ComputeCovariances); Assert.AreEqual(kbase.Distance.GetType(), kmeans.Distance.GetType()); }
private void Calculate() { KMeans kMeans = new KMeans(Points, Clusters); var finish = true; while (finish) { var distance = kMeans.Cluster(); var clusters = kMeans.GetClusters(); Thread.Sleep(2000); UpdateGraphData(clusters); if (distance == 0) { finish = false; } } MessageBox.Show(@"Done"); }
private void buildModel() { if (inputMatrix == null) getMatrix(); kmeans = new KMeans(k); kmeans.Compute(inputMatrix,0.0001); lbl = new List<string>(); for (int i = 0; i < k; i++) { lbl.Add(i.ToString()); } }
/// <summary> /// Runs the K-Means algorithm. /// </summary> /// private void runKMeans() { // Retrieve the number of clusters int k = (int)numClusters.Value; // Load original image Bitmap image = Properties.Resources.leaf; // Create converters ImageToArray imageToArray = new ImageToArray(min: -1, max: +1); ArrayToImage arrayToImage = new ArrayToImage(image.Width, image.Height, min: -1, max: +1); // Transform the image into an array of pixel values double[][] pixels; imageToArray.Convert(image, out pixels); // Create a K-Means algorithm using given k and a // square Euclidean distance as distance metric. KMeans kmeans = new KMeans(k, Distance.SquareEuclidean) { Tolerance = 0.05 }; // Compute the K-Means algorithm until the difference in // cluster centroids between two iterations is below 0.05 int[] idx = kmeans.Compute(pixels); // Replace every pixel with its corresponding centroid pixels.ApplyInPlace((x, i) => kmeans.Clusters.Centroids[idx[i]]); // Show resulting image in the picture box Bitmap result; arrayToImage.Convert(pixels, out result); pictureBox.Image = result; }
public void runKMeans() { KMeans.runKMeans(weightsByCreature, creatures); Debug.Log("updating clusters"); }
private void makeKMeans() { kmeans = new KMeans(k); KMeansClusterCollection kmeansColl = kmeans.Clusters; for (int i = 0; i < k; i++) { double[] mns = meansLst[i]; double p = proportionsLst[i]; double[,] scov = scovLst[i]; KMeansCluster kc = new KMeansCluster(kmeansColl, i); kc.Mean = mns; kc.Covariance = scov; kc.Proportion = p; } }
// K-Means private void Kmeans(Dictionary <int, Dictionary <int, double> > data, int clusters, string type, string codage) { double[,] db = null; if (data.Count > 1) { if (codage != "Binaire") { db = convertData(data); } else { db = convertDataBin(data); } } else { return; } Console.WriteLine("Donnees convertie"); cluster = KMeans.ClusterDataSet(clusters, db, type); //MessageBox.Show(this,"Nombre de cluster : " + cluster.Count); Console.WriteLine("Kmeans calcule"); //Dictionary<int, int> stationCluster = new Dictionary<int, int>(); stationCluster = new Dictionary <int, int>(); for (int i = 0; i < cluster.Count; i++) { Console.WriteLine("Cluster : " + i + " - Taille : " + cluster[i].Count); //string s = "Cluster " + i + ", taille = " + cluster[i].getValues.Count + " : "; foreach (int z in cluster[i].getValues) { //Console.WriteLine(stations[z]); int temp = stations[z]; stationCluster.Add(temp, i); //string s = stations[z] + " : " + i ; } } ScrollableMaps maps = new ScrollableMaps(); maps.drawClusters(stationCluster); PictureBox map = maps.mapBox; map.SizeMode = PictureBoxSizeMode.Zoom; map.Dock = DockStyle.Fill; Panel tempPanel = new Panel(); tempPanel.Dock = DockStyle.Fill; combo1 = new ComboBox(); combo1.Text = "Affichage des statistiques pour les clusters"; combo1.Items.Add("Tous les clusters"); combo1.Items.Add("Altitudes"); combo1.Items.Add("Point of Interest"); for (int i = 0; i < cluster.Count; i++) { combo1.Items.Add(i); } combo1.SelectedIndexChanged += changeCluster; combo1.Dock = DockStyle.Top; tempPanel.Controls.Add(combo1); tempPanel.Controls.Add(map); panel.addControls(tempPanel); }
/// <summary> /// Initializes the Gaussian Mixture Models using K-Means /// parameters as an initial parameter guess. /// </summary> /// private void btnInitialize_Click(object sender, EventArgs e) { // Creates and computes a new // K-Means clustering algorithm: kmeans = new KMeans(k); kmeans.Compute(mixture); // Classify all instances in mixture data int[] classifications = kmeans.Clusters.Nearest(mixture); // Draw the classifications updateGraph(classifications); }
public static Tuple <Dictionary <string, Dictionary <int, List <Room> > >, Dictionary <string, double[, ]> > Factorize(List <Room> rooms, int dimensionReduction) { Dictionary <string, double[, ]> matrices = new Dictionary <string, double[, ]>(); int column = 0; int size = 0; foreach (var room in rooms) { foreach (var layer in room.objects) { if (!matrices.ContainsKey(layer.Key)) { size = layer.Value.GetLength(0) * layer.Value.GetLength(1); matrices[layer.Key] = new double[rooms.Count, layer.Value.GetLength(0) * layer.Value.GetLength(1)]; } matrices[layer.Key].FillColumn(column, layer.Value); } column++; } Dictionary <string, double[, ]> Ws = new Dictionary <string, double[, ]>(); Dictionary <string, double[, ]> Hs = new Dictionary <string, double[, ]>(); Dictionary <string, double[, ]> components = new Dictionary <string, double[, ]>(); foreach (var mat in matrices) { PrincipalComponentAnalysis pca = new PrincipalComponentAnalysis(mat.Value); pca.Compute(); for (int ii = 0; ii < dimensionReduction; ii++) { pca.ComponentMatrix.rowToMatrix(ii, 12, 10).matToBitmap(0, 0).Save("pca" + mat.Key + ii + ".png"); } components[mat.Key] = pca.ComponentMatrix; for (int jj = 0; jj < rooms.Count; jj++) { rooms[jj].setCoefficients(mat.Key, pca.Result, jj, dimensionReduction); } /* * NMF nmf = new NMF(mat.Value, dimensionReduction, 2000); * Ws[mat.Key] = nmf.LeftNonnegativeFactors; * Hs[mat.Key] = nmf.RightNonnegativeFactors; * for (int ii = 0; ii < rooms.Count; ii++) { * rooms[ii].setCoefficients(mat.Key,nmf.RightNonnegativeFactors, ii); * } * string str = ""; * for (int xx = 0; xx < nmf.RightNonnegativeFactors.GetLength(1); xx++) { * for (int jj = 0; jj < nmf.RightNonnegativeFactors.GetLength(0); jj++) { * str += nmf.RightNonnegativeFactors[jj, xx] + ","; * } * str += "\n"; * } * System.IO.File.WriteAllText(mat.Key + "W.txt", str); * str = ""; * for (int xx = 0; xx < nmf.LeftNonnegativeFactors.GetLength(1); xx++) { * for (int jj = 0; jj < nmf.LeftNonnegativeFactors.GetLength(0); jj++) { * str += nmf.LeftNonnegativeFactors[jj, xx] + ","; * } * str += "\n"; * } * System.IO.File.WriteAllText(mat.Key + "H.txt", str); * for (int ii = 0; ii < nmf.LeftNonnegativeFactors.GetLength(1); ii++) { * double[,] W = nmf.LeftNonnegativeFactors.rowToMatrix(ii, 12, 10); * W.matToBitmap(0, 25).Save(mat.Key + ii + "W.png"); * } * if (mat.Key == "blocks") { * double[,] reconstructed = new double[12, 10]; * for (int ii = 0; ii < rooms[0].coefficients["blocks"].Length; ii++) { * double w = rooms[0].coefficients["blocks"][ii]; * int counter = 0; * for (int xx = 0; xx < 12; xx++) { * for (int yy = 0; yy < 10; yy++) { * reconstructed[xx, yy] += w * Ws["blocks"][counter, ii]; * counter++; * } * } * } * reconstructed.matToBitmap(0, 1).Save("room0Reconstructed.png"); * } * */ } int counter = 0; // double[,] xy = new double[rooms.Count,2]; double[][] clusterData = new double[rooms.Count][]; foreach (var room in rooms) { int compCounter = 0; double[] coeffs = new double[room.coefficients.Count * dimensionReduction]; foreach (var comp in room.coefficients) { foreach (var coef in comp.Value) { coeffs[compCounter] = coef; compCounter++; } } clusterData[counter] = coeffs; Room reconstructed = room.reconstruct(components, 1); reconstructed.toBitmap().Save("room" + counter + "Reconstructed.png"); counter++; } int numberofClusters = 25; KMeans kmeans = new KMeans(numberofClusters); kmeans.Tolerance = 0.5; int[] clusters = kmeans.Compute(clusterData); Dictionary <string, SortedSet <int> > clusteredRooms = new Dictionary <string, SortedSet <int> >(); Dictionary <int, SortedSet <string> > roomClusters = new Dictionary <int, SortedSet <string> >(); Dictionary <string, Dictionary <int, List <Room> > > output = new Dictionary <string, Dictionary <int, List <Room> > >(); int[] clusterCounts = new int[numberofClusters]; for (int ii = 0; ii < rooms.Count; ii++) { rooms[ii].setType(); if (!clusteredRooms.ContainsKey(rooms[ii].roomType)) { output[rooms[ii].roomType] = new Dictionary <int, List <Room> >(); clusteredRooms[rooms[ii].roomType] = new SortedSet <int>(); } if (!output[rooms[ii].roomType].ContainsKey(clusters[ii])) { output[rooms[ii].roomType][clusters[ii]] = new List <Room>(); } if (!roomClusters.ContainsKey(clusters[ii])) { roomClusters[clusters[ii]] = new SortedSet <string>(); } output[rooms[ii].roomType][clusters[ii]].Add(rooms[ii]); roomClusters[clusters[ii]].Add(rooms[ii].roomType); clusterCounts[clusters[ii]]++; clusteredRooms[rooms[ii].roomType].Add(clusters[ii]); // Console.WriteLine(ii + " " + clusters[ii]); } for (int ii = 0; ii < clusterCounts.Length; ii++) { string str = ""; foreach (var roomtype in roomClusters[ii]) { str += roomtype + " "; } // Console.WriteLine("Cluster "+ ii + " = " +clusterCounts[ii] + " : " + str); } foreach (var roomType in clusteredRooms) { string str = ""; foreach (var cluster in roomType.Value) { str += cluster + " "; } // Console.WriteLine(roomType.Key + " " + str); } return(new Tuple <Dictionary <string, Dictionary <int, List <Room> > >, Dictionary <string, double[, ]> >(output, components)); }
public void IndexFiles(FileInfo[] imageFiles, System.ComponentModel.BackgroundWorker IndexBgWorker, Action <string> logWriter, LocateSettings locateSetting = null) { //For Time Profilling long extractingTime, kMeanTime = 0, calcBagOfVisualTime = 0; Stopwatch sw1; SimpleSurfSift.LoCATe descriptorExtractor = new SimpleSurfSift.LoCATe(); sw1 = Stopwatch.StartNew(); logWriter("Index started, extracting Descriptors..."); List <double[]> ListofDescriptorsForCookBook = new List <double[]>(); List <LoCATeRecord> ListOfAllImageDescriptors = new List <LoCATeRecord>(); int totalFileCount = imageFiles.Length; if (totalFileCount == 0) { logWriter("No files to index"); return; } ; for (int i = 0; i < totalFileCount; i++) { var fi = imageFiles[i]; using (Bitmap observerImage = (Bitmap)Image.FromFile(fi.FullName)) { List <double[]> locateDescriptors = descriptorExtractor.extract(observerImage, "SURF"); ListOfAllImageDescriptors.Add(new LoCATeRecord { Id = i, ImageName = fi.Name, ImagePath = fi.FullName, LoCATeDescriptors = locateDescriptors }); if (locateSetting.IsCodeBookNeedToBeCreated) { if (locateDescriptors.Count > 4) { RandomHelper randNumGenerator = new RandomHelper(); List <int> randIndexes = randNumGenerator.GetRandomNumberInRange(0, locateDescriptors.Count, 10d); foreach (int index in randIndexes) { ListofDescriptorsForCookBook.Add(locateDescriptors[index]); } } else { Debug.WriteLine(fi.Name + " skip from index, because it didn't have significant feature"); } } } IndexBgWorker.ReportProgress(i); } sw1.Stop(); extractingTime = Convert.ToInt32(sw1.Elapsed.TotalSeconds); double[][] codeBook = null; if (locateSetting.IsCodeBookNeedToBeCreated) { logWriter("Indexing, Calculating Mean..."); sw1.Reset(); sw1.Start(); KMeans kMeans = new KMeans(locateSetting.SizeOfCodeBook); kMeans.Compute(ListofDescriptorsForCookBook.ToArray()); codeBook = kMeans.Clusters.Centroids; //------------Save CookBook string fullFileName = locateSetting.CodeBookFullPath; if (File.Exists(fullFileName)) { File.Delete(fullFileName); } using (FileStream fs = new FileStream(fullFileName, FileMode.Create, FileAccess.Write, FileShare.None)) { System.Runtime.Serialization.Formatters.Binary.BinaryFormatter bf = new System.Runtime.Serialization.Formatters.Binary.BinaryFormatter(); bf.Serialize(fs, codeBook); fs.Close(); } sw1.Stop(); kMeanTime = Convert.ToInt32(sw1.Elapsed.TotalSeconds); } else { string fullFileName = locateSetting.CodeBookFullPath; if (!File.Exists(fullFileName)) { string msg = string.Format("Couldn't find {0}, Please Index before querying with Locate", fullFileName); throw new InvalidOperationException(msg); } using (FileStream fs = new FileStream(fullFileName, FileMode.Open, FileAccess.Read, FileShare.None)) { System.Runtime.Serialization.Formatters.Binary.BinaryFormatter bf = new System.Runtime.Serialization.Formatters.Binary.BinaryFormatter(); codeBook = (double[][])bf.Deserialize(fs); fs.Close(); } } logWriter("Indexing, Calculating Bag of Visual Words..."); sw1.Reset(); sw1.Start(); List <LoCaTeBoWRecord> ListOfImageVisualBagOfWorks = new List <LoCaTeBoWRecord>(); for (int i = 0; i < ListOfAllImageDescriptors.Count; i++) { double[] visualWordForImage = createVisualWord(ListOfAllImageDescriptors[i].LoCATeDescriptors, codeBook); LoCaTeBoWRecord rec = new LoCaTeBoWRecord { Id = ListOfAllImageDescriptors[i].Id, ImageName = ListOfAllImageDescriptors[i].ImageName, ImagePath = ListOfAllImageDescriptors[i].ImagePath, VisaulWord = visualWordForImage }; ListOfImageVisualBagOfWorks.Add(rec); IndexBgWorker.ReportProgress(i); } logWriter("Indexing, Calculating ltcData..."); int[] histogramSumOfAllVisualWords = null; //------------Creating sum histogram of all words double[][] AllDatas = ListOfImageVisualBagOfWorks.Select(des => des.VisaulWord).ToArray(); histogramSumOfAllVisualWords = createIndex((double[][])(AllDatas)); //------------Creating Image Records Data LoCaTeDataSet locateDS = new LoCaTeDataSet { AllImageRecordSet = ListOfImageVisualBagOfWorks, HistogramSumOfAllVisualWords = histogramSumOfAllVisualWords }; logWriter("Indexing, Saving Image Data..."); //------------Save CookBook string ImageRecordName = Path.Combine(DirectoryHelper.SaveDirectoryPath, "LoCATeImageRecords.bin"); if (File.Exists(ImageRecordName)) { File.Delete(ImageRecordName); } using (FileStream fs = new FileStream(ImageRecordName, FileMode.Create, FileAccess.Write, FileShare.None)) { System.Runtime.Serialization.Formatters.Binary.BinaryFormatter bf = new System.Runtime.Serialization.Formatters.Binary.BinaryFormatter(); bf.Serialize(fs, locateDS); fs.Close(); } sw1.Stop(); calcBagOfVisualTime = Convert.ToInt32(sw1.Elapsed.TotalSeconds); logWriter(string.Format("Extracting: {0} sec, KMeanTime: {1} sec, CalcBagOfVisalTime: {2} sec", extractingTime, kMeanTime, calcBagOfVisualTime)); }
public string kmeans(DataTable tbl) { Codification codebook = new Codification(tbl); DataTable symbols = codebook.Apply(tbl); double[][] inputs = symbols.ToIntArray("Clump Thickness", "Uniformity of Cell Size", "Uniformity of Cell Shape", "Marginal Adhesion", "Single Epithelial Cell Size", "Bare Nuclei", "Bland Chromatin", "Normal Nucleoli", "Mitoses").ToDouble(); int sayac = 0; int[] outputs = symbols.ToIntArray("Class").GetColumn(0); // Declare some observations //double[][] observations = // { // new double[] { -5, -2, -1 }, // new double[] { -5, -5, -6 }, // new double[] { 2, 1, 1 }, // new double[] { 1, 1, 2 }, // new double[] { 1, 2, 2 }, // new double[] { 3, 1, 2 }, // new double[] { 11, 5, 4 }, // new double[] { 15, 5, 6 }, // new double[] { 10, 5, 6 }, // }; KMeans kmeans = new KMeans(2); int[] labels = kmeans.Compute(inputs); int c = kmeans.Clusters.Nearest(new double[] { Convert.ToInt32(inputlar[0]), Convert.ToInt32(inputlar[1]), Convert.ToInt32( inputlar[2]), Convert.ToInt32( inputlar[3]), Convert.ToInt32( inputlar[4]), Convert.ToInt32( inputlar[5]), Convert.ToInt32( inputlar[6]), Convert.ToInt32( inputlar[7]), Convert.ToInt32( inputlar[8]) }); return c.ToString(); ; }
static void Main(string[] args) { #if Cluster // output file List <string> outputLines = new List <string>(); DateTime timeStart = new DateTime(); // Some example documents. string[] documents = new GetTweets().GetTweetsFromExcelFile("Train_NN.xlsx"); // Apply TF*IDF to the documents and get the resulting vectors. double[][] inputs = TFIDF.Transform(documents, 0); Console.WriteLine("time to transformation " + (DateTime.Now - timeStart)); outputLines.Add("time to transformation " + (DateTime.Now - timeStart)); Console.WriteLine("TFIDF transformation done..."); inputs = TFIDF.Normalize(inputs); Console.WriteLine("time to Normalization " + (DateTime.Now - timeStart)); outputLines.Add("time to Normalization " + (DateTime.Now - timeStart)); Console.WriteLine("TFIDF Normalization done..."); //inputs = Accord.Math.Norm.Norm2(inputs); string[] topics = TFIDF.Topics(documents, 5); Console.WriteLine("time to topics " + (DateTime.Now - timeStart)); outputLines.Add("time to topics " + (DateTime.Now - timeStart)); Console.WriteLine("Topics gathered..."); //Random random = new Random(); //double[][] rand = new double[inputs.Length][]; //for (int i = 0; i < inputs.Length; i++) //{ // rand[i] = new double[inputs[i].Length]; // for (int j = 0; j < inputs[i].Length; j++) // { // rand[i][j] = random.NextDouble(); // } //} //Console.WriteLine("time to generate random numbers " + (DateTime.Now - timeStart)); //outputLines.Add("time to topics " + (DateTime.Now - timeStart)); //Console.WriteLine("Randoms generated..."); KMeans cluster = new KMeans(topics.Length, Distance.Cosine); //cluster.MaxIterations = 1; //cluster.Randomize(rand); int[] index = cluster.Compute(inputs); Console.WriteLine("time to cluster " + (DateTime.Now - timeStart)); outputLines.Add("time to cluster " + (DateTime.Now - timeStart)); Console.WriteLine("Clustering done..."); //Accord.Statistics.Analysis.PrincipalComponentAnalysis pca = new Accord.Statistics.Analysis.PrincipalComponentAnalysis(inputs, Accord.Statistics.Analysis.AnalysisMethod.Center); //pca.Compute(); //double[][] newinput = pca.Transform(inputs, 2); //ScatterplotBox.Show("KMeans Clustering of Tweets", newinput, index).Hold(); for (double i = 0; i <= topics.Length; i++) { outputLines.Add(Convert.ToString(i + 1)); List <string> topicDecider = new List <string>(); string[] topicString; int j = 0; foreach (int x in index) { if (x == i + 1) { topicDecider.Add(documents[j]); } j++; } topicString = TFIDF.Topics(topicDecider.ToArray(), topicDecider.Count / 2); if (topicString.Length == 0) { outputLines.Add("--------------------------------------------------------"); outputLines.Add("TOPIC: other"); outputLines.Add("--------------------------------------------------------"); } else { outputLines.Add("--------------------------------------------------------"); outputLines.Add("TOPIC: " + topicString[0]); outputLines.Add("--------------------------------------------------------"); } j = 0; foreach (int x in index) { if (x == i + 1) { outputLines.Add("Tweet ID " + j + ":\t" + documents[j]); } j++; } outputLines.Add(""); outputLines.Add(""); outputLines.Add(""); outputLines.Add(""); } System.IO.File.WriteAllLines(@"Train_NN_2.txt", outputLines.ToArray()); Console.WriteLine("Output is written..."); #else // output file List <string> outputLines = new List <string>(); DateTime timeStart = new DateTime(); // Some example documents. string[] documents_Train = new GetTweets().GetTweetsFromExcelFile("Train_NN.xlsx"); double[][] Train_Labels = new GetTweets().GetLabelsFromExcelFile("Train_Labels.xlsx"); // Apply TF*IDF to the documents and get the resulting vectors. double[][] inputs = TFIDF.Transform(documents_Train, 0); Console.WriteLine("time to transformation " + (DateTime.Now - timeStart)); outputLines.Add("time to transformation " + (DateTime.Now - timeStart)); Console.WriteLine("TFIDF transformation done..."); inputs = TFIDF.Normalize(inputs); Console.WriteLine("time to Normalization " + (DateTime.Now - timeStart)); outputLines.Add("time to Normalization " + (DateTime.Now - timeStart)); Console.WriteLine("TFIDF Normalization done..."); //double[][] inputs; double[][] train_input = new double[140][]; double[][] outputs; double[][] testInputs = new double[1000 - 140][]; double[][] testOutputs = new double[1000 - 140][]; for (int i = 0; i < 140; i++) { train_input[i] = new double[inputs[i].Length]; for (int j = 0; j < inputs[i].Length; j++) { train_input[i][j] = inputs[i][j]; } } for (int i = 0; i < 1000 - 140; i++) { testInputs[i] = new double[inputs[i].Length]; for (int j = 0; j < inputs[i].Length; j++) { testInputs[i][j] = inputs[i][j]; } } // The first 500 data rows will be for training. The rest will be for testing. //testInputs = inputs.Skip(500).ToArray(); //testOutputs = outputs.Skip(500).ToArray(); //inputs = inputs.Take(500).ToArray(); //outputs = outputs.Take(500).ToArray(); // Setup the deep belief network and initialize with random weights. DeepBeliefNetwork network = new DeepBeliefNetwork(train_input.First().Length, 7); new GaussianWeights(network, 0.1).Randomize(); network.UpdateVisibleWeights(); // Setup the learning algorithm. DeepBeliefNetworkLearning teacher = new DeepBeliefNetworkLearning(network) { Algorithm = (h, v, i) => new ContrastiveDivergenceLearning(h, v) { LearningRate = 0.1, Momentum = 0.5, Decay = 0.001, } }; // Setup batches of input for learning. int batchCount = Math.Max(1, train_input.Length / 100); // Create mini-batches to speed learning. int[] groups = Accord.Statistics.Tools.RandomGroups(train_input.Length, batchCount); double[][][] batches = train_input.Subgroups(groups); // Learning data for the specified layer. double[][][] layerData; // Unsupervised learning on each hidden layer, except for the output layer. for (int layerIndex = 0; layerIndex < network.Machines.Count - 1; layerIndex++) { teacher.LayerIndex = layerIndex; layerData = teacher.GetLayerInput(batches); for (int i = 0; i < 200; i++) { double error = teacher.RunEpoch(layerData) / train_input.Length; if (i % 10 == 0) { Console.WriteLine(i + ", Error = " + error); } } } // Supervised learning on entire network, to provide output classification. var teacher2 = new BackPropagationLearning(network) { LearningRate = 0.1, Momentum = 0.5 }; //Transpose double[][] Train_Labels_T = new double[140][]; for (int i = 0; i < 140; i++) { Train_Labels_T[i] = new double[7]; for (int j = 0; j < 7; j++) { Train_Labels_T[i][j] = Train_Labels[j][i]; } } // Run supervised learning. for (int i = 0; i < 500; i++) { double error = teacher2.RunEpoch(train_input, Train_Labels_T) / train_input.Length; if (i % 10 == 0) { Console.WriteLine(i + ", Error = " + error); } } outputLines.Add("time to Training " + (DateTime.Now - timeStart)); // Test the resulting accuracy. double[][] outputValues = new double[testInputs.Length][]; for (int i = 0; i < testInputs.Length; i++) { outputValues[i] = network.Compute(testInputs[i]); } outputLines.Add("time to Testing/clustering " + (DateTime.Now - timeStart)); outputLines.Add(""); outputLines.Add(""); outputLines.Add(""); List <string> class1 = new List <string>(); List <string> class2 = new List <string>(); List <string> class3 = new List <string>(); List <string> class4 = new List <string>(); List <string> class5 = new List <string>(); List <string> class6 = new List <string>(); List <string> class7 = new List <string>(); //creating output file for (int i = 0; i < documents_Train.Length; i++) { if (i < 10 && i > -1) { if (i == 0) { class1.Add("-------------------------------"); class1.Add("TOPIC: WEATHER"); class1.Add("-------------------------------"); } class1.Add("Training_Tweet:\t" + documents_Train[i]); } if (i < 20 && i > 9) { if (i == 10) { class2.Add("-------------------------------"); class2.Add("TOPIC: MUSIC"); class2.Add("-------------------------------"); } class2.Add("Training_Tweet:\t" + documents_Train[i]); } if (i < 30 && i > 19) { if (i == 20) { class3.Add("-------------------------------"); class3.Add("TOPIC: ITALY"); class3.Add("-------------------------------"); } class3.Add("Training_Tweet:\t" + documents_Train[i]); } if (i < 40 && i > 29) { if (i == 30) { class4.Add("-------------------------------"); class4.Add("TOPIC: FOOD"); class4.Add("-------------------------------"); } class4.Add("Training_Tweet:\t" + documents_Train[i]); } if (i < 50 && i > 39) { if (i == 40) { class5.Add("-------------------------------"); class5.Add("TOPIC: FASHION"); class5.Add("-------------------------------"); } class5.Add("Training_Tweet:\t" + documents_Train[i]); } if (i < 60 && i > 49) { if (i == 50) { class6.Add("-------------------------------"); class6.Add("TOPIC: FOOTBALL"); class6.Add("-------------------------------"); } class6.Add("Training_Tweet:\t" + documents_Train[i]); } if (i < 140 && i > 59) { if (i == 60) { class7.Add("-------------------------------"); class7.Add("TOPIC: OTHER"); class7.Add("-------------------------------"); } class7.Add("Training_Tweet:\t" + documents_Train[i]); } if (i >= 140) { int what; what = outputValues[i - 140].IndexOf(outputValues[i - 140].Max()); switch (what) { case 0: class1.Add("Test_Tweet:\t" + documents_Train[i]); break; case 1: class2.Add("Test_Tweet:\t" + documents_Train[i]); break; case 2: class3.Add("Test_Tweet:\t" + documents_Train[i]); break; case 3: class4.Add("Test_Tweet:\t" + documents_Train[i]); break; case 4: class5.Add("Test_Tweet:\t" + documents_Train[i]); break; case 5: class6.Add("Test_Tweet:\t" + documents_Train[i]); break; case 6: class7.Add("Test_Tweet:\t" + documents_Train[i]); break; } } } outputLines.Add(""); outputLines.Add(""); outputLines.Add(""); outputLines.AddRange(class1); outputLines.Add(""); outputLines.Add(""); outputLines.Add(""); outputLines.AddRange(class2); outputLines.Add(""); outputLines.Add(""); outputLines.Add(""); outputLines.AddRange(class3); outputLines.Add(""); outputLines.Add(""); outputLines.Add(""); outputLines.AddRange(class4); outputLines.Add(""); outputLines.Add(""); outputLines.Add(""); outputLines.AddRange(class5); outputLines.Add(""); outputLines.Add(""); outputLines.Add(""); outputLines.AddRange(class6); outputLines.Add(""); outputLines.Add(""); outputLines.Add(""); outputLines.AddRange(class7); outputLines.Add(""); outputLines.Add(""); outputLines.Add(""); System.IO.File.WriteAllLines(@"Train_NN_With_Test_2.txt", outputLines.ToArray()); Console.Write("Press any key to quit .."); #endif Console.ReadKey(); }
public void Test_TrainPartials() { double[][] clusterCenters = new double[3][]; clusterCenters[0] = new double[] { 5.0, 5.0 }; clusterCenters[1] = new double[] { 15.0, 15.0 }; clusterCenters[2] = new double[] { 30.0, 30.0 }; double[][] clusterCenters2 = new double[3][]; clusterCenters2[0] = new double[] { 6, 5 }; clusterCenters2[1] = new double[] { 17, 18 }; clusterCenters2[2] = new double[] { 28, 30 }; string[] attributes = new string[] { "Height", "Weight" }; int numAttributes = attributes.Length; // 2 in this demo (height,weight) int numClusters = 3; // vary this to experiment (must be between 2 and number data tuples) int maxCount = 300; // trial and error double[][] apiResp1Centroid = new double[numClusters][]; double[] apiResp1MaxDistance = new double[numClusters]; double[] apiResp1NumSamples = new double[numClusters]; ClusteringSettings settings = new ClusteringSettings(maxCount, numClusters, numAttributes, KmeansAlgorithm: 2); // Creates learning api object LearningApi api = new LearningApi(loadDescriptor()); LearningApi api2 = new LearningApi(loadDescriptor()); double[][] rawData = Helpers.CreateSampleData(clusterCenters, 2, 10000, 0.5); double[][] rawData2 = Helpers.CreateSampleData(clusterCenters2, 2, 5000, 0.5); int runNum = 0; api.UseActionModule <object, double[][]>((data, ctx) => { if (runNum == 0) { return(rawData); } else { return(rawData2); } }); api2.UseActionModule <object, double[][]>((data, ctx) => { return(rawData2); }); // start api2 that runs only second raw data (rawData2) api2.UseKMeans(settings); // train var api2Resp = api2.Run() as KMeansScore; Assert.True(api2Resp.Model.Clusters != null); Assert.True(api2Resp.Model.Clusters.Length == clusterCenters.Length); // start api that runs first raw data (rawData) and save results in variables api.UseKMeans(settings); // train var apiResp = api.Run() as KMeansScore; Assert.True(apiResp.Model.Clusters != null); Assert.True(apiResp.Model.Clusters.Length == clusterCenters.Length); // save first run results in variables for (int i = 0; i < numClusters; i++) { apiResp1Centroid[i] = apiResp.Model.Clusters[i].Centroid; apiResp1MaxDistance[i] = apiResp.Model.Clusters[i].InClusterMaxDistance; apiResp1NumSamples[i] = apiResp.Model.Clusters[i].NumberOfSamples; } /// run with new data runNum++; // continue partial api run using second raw data (rawData2) settings = new ClusteringSettings(maxCount, numClusters, numAttributes, KmeansAlgorithm: 2, initialCentroids: apiResp1Centroid); // train apiResp = api.Run() as KMeansScore; Assert.True(apiResp.Model.Clusters != null); Assert.True(apiResp.Model.Clusters.Length == clusterCenters.Length); //// compare results double f, res; for (int i = 0; i < numClusters; i++) { // partial formula f*res f = (double)1 / apiResp.Model.Clusters[i].NumberOfSamples; for (int j = 0; j < numAttributes; j++) { res = apiResp1Centroid[i][j] * apiResp1NumSamples[i] + api2Resp.Model.Clusters[i].Centroid[j] * api2Resp.Model.Clusters[i].NumberOfSamples; // partial centroid check Assert.True(apiResp.Model.Clusters[i].Centroid[j] == f * res); } // max distance in cluster check Assert.True(apiResp.Model.Clusters[i].InClusterMaxDistance >= apiResp1MaxDistance[i] + KMeans.calculateDistance(apiResp1Centroid[i], apiResp.Model.Clusters[i].Centroid)); } }
public void dayWorkSchedule(DateTime date) { Func <WorkSchedule, bool> predicat = (w => w.workSchedule_date == date); List <WorkSchedule> workSchedules = selectWorkSchedule(predicat); List <Distributors> alldistributors = new List <Distributors>(); List <Distributors> ezerList = new List <Distributors>(); foreach (WorkSchedule item in workSchedules) { ezerList = selectDistributors(d => d.distributors_id == item.distributor_id); if (ezerList.Count != 1) { throw new Exception("שגיאה בחיפוש אחרי מחלק זה"); } alldistributors.Add(ezerList[0]); } List <Recipients> allrecipients = recipientsPackageByDay(date); double[][] Coordinates = new double[allrecipients.Count][]; int i = 0; foreach (Recipients item in allrecipients) { Coordinates[i] = getLatLongFromAddress(item.recipients_address); i++; } // Create a new K-Means algorithm with 3 clusters KMeans kmeans = new KMeans(3); // Compute the algorithm, retrieving an integer array // containing the labels for each of the observations KMeansClusterCollection clusters = kmeans.Learn(Coordinates); // As a result, the first two observations should belong to the // same cluster (thus having the same label). The same should // happen to the next four observations and to the last three. int[] labels = clusters.Decide(Coordinates); List <Recipients> group0 = new List <Recipients>(); List <Recipients> group1 = new List <Recipients>(); List <Recipients> group2 = new List <Recipients>(); for (int k = 0; k < labels.Length; k++) { if (labels[k] == 0) { group0.Add(allrecipients[k]); } if (labels[k] == 1) { group1.Add(allrecipients[k]); } if (labels[k] == 2) { group2.Add(allrecipients[k]); } } double lat = 0; double lon = 0; double[] d0 = new double[2]; double[] d1 = new double[2]; double[] d2 = new double[2]; foreach (var item in group0) { d0 = getLatLongFromAddress(item.recipients_address); lat += d0[0]; lon += d0[1]; } d0[0] = lat / group0.Count; d0[1] = lon / group0.Count; lat = 0; lon = 0; foreach (var item in group1) { d1 = getLatLongFromAddress(item.recipients_address); lat += d1[0]; lon += d1[1]; } d1[0] = lat / group1.Count; d1[1] = lon / group1.Count; lat = 0; lon = 0; foreach (var item in group2) { d2 = getLatLongFromAddress(item.recipients_address); lat += d2[0]; lon += d2[1]; } d2[0] = lat / group2.Count; d2[1] = lon / group2.Count; // double[] d0 = getLatLongFromAddress(group0[0].recipients_address); // double[] d1 = getLatLongFromAddress(group1[0].recipients_address); // double[] d2 = getLatLongFromAddress(group2[0].recipients_address); double[,] distance = new double[3, 3]; for (i = 0; i < 3; i++) { double[] d3 = getLatLongFromAddress(alldistributors[i].distributors_address); for (int j = 0; j < 3; j++) { if (j == 0) { distance[i, j] = dalImp.addressCalculations.calculateDistance(d0, d3); } if (j == 1) { distance[i, j] = dalImp.addressCalculations.calculateDistance(d1, d3); } if (j == 2) { distance[i, j] = dalImp.addressCalculations.calculateDistance(d2, d3); } } } int [,] minIndex = new int[3, 3]; for (i = 0; i < 3; i++) { for (int j = 0; j < 3; j++) { minIndex[i, j] = findMinDist(distance[i, 0], distance[i, 1], distance[i, 2], j); } } if ((minIndex[0, 0] != minIndex[2, 0]) && (minIndex[0, 0] != minIndex[1, 0]) && (minIndex[2, 0] != minIndex[1, 0])) { if (minIndex[0, 0] == 0) { UpdateWorkSchedule(new WorkSchedule(alldistributors[0].distributors_id, date, group0)); if (minIndex[1, 0] == 1) { UpdateWorkSchedule(new WorkSchedule(alldistributors[1].distributors_id, date, group1)); UpdateWorkSchedule(new WorkSchedule(alldistributors[2].distributors_id, date, group2)); } else if (minIndex[1, 0] == 2) { UpdateWorkSchedule(new WorkSchedule(alldistributors[1].distributors_id, date, group2)); UpdateWorkSchedule(new WorkSchedule(alldistributors[2].distributors_id, date, group1)); } } else if (minIndex[0, 0] == 1) { UpdateWorkSchedule(new WorkSchedule(alldistributors[0].distributors_id, date, group1)); if (minIndex[1, 0] == 0) { UpdateWorkSchedule(new WorkSchedule(alldistributors[1].distributors_id, date, group0)); UpdateWorkSchedule(new WorkSchedule(alldistributors[2].distributors_id, date, group2)); } else if (minIndex[1, 0] == 2) { UpdateWorkSchedule(new WorkSchedule(alldistributors[1].distributors_id, date, group2)); UpdateWorkSchedule(new WorkSchedule(alldistributors[2].distributors_id, date, group0)); } } else if (minIndex[0, 0] == 2) { UpdateWorkSchedule(new WorkSchedule(alldistributors[0].distributors_id, date, group2)); if (minIndex[1, 0] == 0) { UpdateWorkSchedule(new WorkSchedule(alldistributors[1].distributors_id, date, group0)); UpdateWorkSchedule(new WorkSchedule(alldistributors[2].distributors_id, date, group1)); } else if (minIndex[1, 0] == 1) { UpdateWorkSchedule(new WorkSchedule(alldistributors[1].distributors_id, date, group1)); UpdateWorkSchedule(new WorkSchedule(alldistributors[2].distributors_id, date, group0)); } } } else { if ((minIndex[0, 0] == minIndex[2, 0]) && (minIndex[0, 0] == minIndex[1, 0]) && (minIndex[2, 0] == minIndex[1, 0])) { UpdateWorkSchedule(new WorkSchedule(alldistributors[minIndex[0, 0]].distributors_id, date, group0)); if ((minIndex[1, 1] == minIndex[2, 1])) { UpdateWorkSchedule(new WorkSchedule(alldistributors[minIndex[1, 1]].distributors_id, date, group1)); UpdateWorkSchedule(new WorkSchedule(alldistributors[minIndex[2, 2]].distributors_id, date, group2)); } else { UpdateWorkSchedule(new WorkSchedule(alldistributors[minIndex[1, 1]].distributors_id, date, group1)); UpdateWorkSchedule(new WorkSchedule(alldistributors[minIndex[2, 1]].distributors_id, date, group2)); } } else { int place; if (minIndex[1, 0] == minIndex[2, 0]) { UpdateWorkSchedule(new WorkSchedule(alldistributors[minIndex[0, 0]].distributors_id, date, group0)); place = minIndex[1, 1] + minIndex[1, 1] % 3; UpdateWorkSchedule(new WorkSchedule(alldistributors[minIndex[1, 0]].distributors_id, date, group1)); UpdateWorkSchedule(new WorkSchedule(alldistributors[place].distributors_id, date, group2)); } else { if (minIndex[0, 0] == minIndex[2, 0]) { UpdateWorkSchedule(new WorkSchedule(alldistributors[minIndex[1, 0]].distributors_id, date, group1)); place = minIndex[0, 0] + minIndex[1, 0] % 3; UpdateWorkSchedule(new WorkSchedule(alldistributors[minIndex[0, 0]].distributors_id, date, group1)); UpdateWorkSchedule(new WorkSchedule(alldistributors[place].distributors_id, date, group2)); } else { UpdateWorkSchedule(new WorkSchedule(alldistributors[minIndex[2, 0]].distributors_id, date, group2)); place = minIndex[0, 0] + minIndex[2, 0] % 3; UpdateWorkSchedule(new WorkSchedule(alldistributors[minIndex[1, 1]].distributors_id, date, group0)); UpdateWorkSchedule(new WorkSchedule(alldistributors[place].distributors_id, date, group1)); } } } } }
public void learn_test_weights() { #region doc_learn_weights Accord.Math.Random.Generator.Seed = 0; // A common desire when doing clustering is to attempt to find how to // weight the different components / columns of a dataset, giving them // different importances depending on the end goal of the clustering task. // Declare some observations double[][] observations = { new double[] { -5, -2, -1 }, new double[] { -5, -5, -6 }, new double[] { 2, 1, 1 }, new double[] { 1, 1, 2 }, new double[] { 1, 2, 2 }, new double[] { 3, 1, 2 }, new double[] { 11, 5, 4 }, new double[] { 15, 5, 6 }, new double[] { 10, 5, 6 }, }; // Create a new K-Means algorithm KMeans kmeans = new KMeans(k: 3) { // For example, let's say we would like to consider the importance of // the first column as 0.1, the second column as 0.7 and the third 0.9 Distance = new WeightedSquareEuclidean(new double[] { 0.1, 0.7, 1.1 }) }; // Compute and retrieve the data centroids var clusters = kmeans.Learn(observations); // Use the centroids to parition all the data int[] labels = clusters.Decide(observations); #endregion Assert.AreEqual(labels[0], labels[2]); Assert.AreEqual(labels[0], labels[2]); Assert.AreEqual(labels[0], labels[3]); Assert.AreEqual(labels[0], labels[4]); Assert.AreEqual(labels[0], labels[4]); Assert.AreEqual(labels[6], labels[7]); Assert.AreEqual(labels[6], labels[8]); Assert.AreNotEqual(labels[0], labels[1]); Assert.AreNotEqual(labels[2], labels[6]); Assert.AreNotEqual(labels[0], labels[6]); int[] labels2 = kmeans.Clusters.Decide(observations); Assert.IsTrue(labels.IsEqual(labels2)); var c = new KMeansClusterCollection.KMeansCluster[clusters.Count]; int i = 0; foreach (var cluster in clusters) { c[i++] = cluster; } for (i = 0; i < c.Length; i++) { Assert.AreSame(c[i], clusters[i]); } }
public void DeserializationTest1() { MemoryStream stream = new MemoryStream(Properties.Resources.kmeans); BinaryFormatter bf = new BinaryFormatter(); object o = bf.DeserializeAnyVersion(stream); KMeans kmeans = (KMeans)o; KMeans kbase = new KMeans(3); Assert.AreEqual(kbase.Iterations, kmeans.Iterations); Assert.AreEqual(kbase.MaxIterations, kmeans.MaxIterations); Assert.AreEqual(kbase.Tolerance, kmeans.Tolerance); Assert.AreEqual(kbase.UseCentroidSeeding, kmeans.UseCentroidSeeding); Assert.AreEqual(kbase.ComputeInformation, kmeans.ComputeInformation); Assert.AreEqual(kbase.Distance, kmeans.Distance); }
private Image <Bgr, Byte> kmeans() { int trainSampleCount = 1500; int sigma = 60; Matrix <float> trainData = new Matrix <float>(trainSampleCount, 2); Matrix <float> trainData1 = trainData.GetRows(0, trainSampleCount / 3, 1); trainData1.GetCols(0, 1).SetRandNormal(new MCvScalar(100), new MCvScalar(sigma)); trainData1.GetCols(1, 2).SetRandNormal(new MCvScalar(300), new MCvScalar(sigma)); Matrix <float> trainData2 = trainData.GetRows(trainSampleCount / 3, 2 * trainSampleCount / 3, 1); trainData2.SetRandNormal(new MCvScalar(400), new MCvScalar(sigma)); Matrix <float> trainData3 = trainData.GetRows(2 * trainSampleCount / 3, trainSampleCount, 1); trainData3.GetCols(0, 1).SetRandNormal(new MCvScalar(300), new MCvScalar(sigma)); trainData3.GetCols(1, 2).SetRandNormal(new MCvScalar(100), new MCvScalar(sigma)); PointF[] points = new PointF[trainSampleCount]; for (int i = 0; i < points.Length; ++i) { points[i] = new PointF(trainData[i, 0], trainData[i, 1]); } var km = new KMeans <PointF>(points, 3, (a, b) => ((a.X - b.X) * (a.X - b.X) + (a.Y - b.Y) * (a.Y - b.Y)), list => new PointF(list.Average(p => p.X), list.Average(p => p.Y)) ); int it = 0; MyTimer timer = new MyTimer(); timer.Restart(); //var cluster = km.Cluster(); var cluster = km.AnnealCluster( (a, b) => new PointF(a.X + b.X, a.Y + b.Y), (a, b) => new PointF(a.X - b.X, a.Y - b.Y), (p, v) => new PointF((float)(p.X / v), (float)(p.Y / v)), out it); var time = timer.Stop(); this.Text = String.Format("n={0}, k={1}, time={2}ms, iter={3}.", trainSampleCount, 3, time, it); Image <Bgr, Byte> img = new Image <Bgr, byte>(500, 500); for (int y = 0; y < 500; ++y) { for (int x = 0; x < 500; ++x) { double d0 = (x - cluster[0].Center.X) * (x - cluster[0].Center.X) + (y - cluster[0].Center.Y) * (y - cluster[0].Center.Y); double d1 = (x - cluster[1].Center.X) * (x - cluster[1].Center.X) + (y - cluster[1].Center.Y) * (y - cluster[1].Center.Y); double d2 = (x - cluster[2].Center.X) * (x - cluster[2].Center.X) + (y - cluster[2].Center.Y) * (y - cluster[2].Center.Y); Bgr color = new Bgr(0, 0, 0); if (d0 < d1 && d0 < d2) { color = new Bgr(20, 0, 0); } if (d1 < d0 && d1 < d2) { color = new Bgr(0, 20, 0); } if (d2 < d0 && d2 < d1) { color = new Bgr(0, 0, 20); } img[y, x] = color; } } Bgr[] colors = new[] { new Bgr(128, 0, 0), new Bgr(0, 128, 0), new Bgr(0, 0, 128) }; Bgr[] centers = new[] { new Bgr(255, 0, 0), new Bgr(0, 255, 0), new Bgr(0, 0, 255) }; for (int i = 0; i < 3; ++i) { foreach (var p in cluster[i]) { img.Draw(new CircleF(p, 2), colors[i], 1); } img.Draw(new CircleF(cluster[i].Center, 5), centers[i], 3); } img.Draw(new CircleF(new PointF(100, 300), sigma), new Bgr(128, 128, 128), 2); img.Draw(new CircleF(new PointF(100, 300), 3), new Bgr(128, 128, 128), 2); img.Draw(new CircleF(new PointF(300, 100), sigma), new Bgr(128, 128, 128), 2); img.Draw(new CircleF(new PointF(300, 100), 3), new Bgr(128, 128, 128), 2); img.Draw(new CircleF(new PointF(400, 400), sigma), new Bgr(128, 128, 128), 2); img.Draw(new CircleF(new PointF(400, 400), 3), new Bgr(128, 128, 128), 2); return(img); }
private List<String> GetSimilaresDatabaseKmeans(List<Double> descriptoresEntrada) { ModeloSimilitudEntities db = new ModeloSimilitudEntities(); List<canciones> ListaCanciones = db.canciones.ToList(); Double[] vectorEntrada = descriptoresEntrada.ToArray(); vectorEntrada = Normalizar(vectorEntrada); Double[][] matriz = csvtoMatrix("descriptoresNormalizados"); int Nclusters = 7; KMeans kmeans = new KMeans(Nclusters, Accord.Math.Distance.Chebyshev); int[] indices = kmeans.Compute(matriz); int Cluster =kmeans.Nearest(vectorEntrada); int nroSimilares = 10; int[] indiceSimilar=new int[nroSimilares]; for(int j=0;j<nroSimilares;j++){ Double distancia = 1000; for (int i = 0; i < indices.Length; i++) { if (!indiceSimilar.Contains(i)) { if (Cluster == indices[i]) { Double distanciatemp = Accord.Math.Distance.Chebyshev(vectorEntrada, matriz[i]); if (distanciatemp < distancia) { distancia = distanciatemp; indiceSimilar[j] = i; } } } } } List<String> listaSimilares = new List<String>(); foreach (int i in indiceSimilar) { listaSimilares.Add(ListaCanciones[i].id_spotify.Substring(14)); } //string select="select * from canciones where energy={0} and liveness={1} and tempo={2} and speechiness={3} and acousticness={4} and loudness={5} and valence={6} and danceability={7} and instrumentalness={8} and key={9}"; //string select2 = "select * from canciones"; //for(int j=0;j<cercanos.Length;j++){ // object[] parameters = new object[10]; // for (int i = 0; i < 10; i++) // { // SqlParameter param = new SqlParameter("i", cercanos[j][i]); // parameters[i] = cercanos[j][i]; // } // var stores = db.Database.SqlQuery<canciones>(select, parameters).ToList(); // listaSimilares.Add(stores[0].id_spotify); //} return listaSimilares; }
public void buildModel(string modelPath) { outmodelpath = modelPath; using (System.IO.StreamReader sr = new System.IO.StreamReader(outmodelpath)) { dataPrepBase.modelTypes mType = (dataPrepBase.modelTypes)Enum.Parse(typeof(dataPrepBase.modelTypes), sr.ReadLine()); if (mType != dataPrepBase.modelTypes.StrataCovCorr) { System.Windows.Forms.MessageBox.Show("Not a StrataCovCorr Model!!", "Error", System.Windows.Forms.MessageBoxButtons.OK, System.Windows.Forms.MessageBoxIcon.Error); return; } inpath = sr.ReadLine(); VariableFieldNames = sr.ReadLine().Split(new char[] { ',' }); n = System.Convert.ToInt32(sr.ReadLine()); prop = System.Convert.ToDouble(sr.ReadLine()); k = System.Convert.ToInt32(sr.ReadLine()); lbl = sr.ReadLine().Split(new char[] { ',' }).ToList(); kmeans = new KMeans(k); KMeansClusterCollection kmeansColl = kmeans.Clusters; for (int i = 0; i < k; i++) { double[] mns = (from s in (sr.ReadLine().Split(new char[] { ',' })) select System.Convert.ToDouble(s)).ToArray(); string[] covVlsStr = sr.ReadLine().Split(new char[] { ',' }); double p = System.Convert.ToDouble(sr.ReadLine()); double[,] cov = new double[VariableFieldNames.Length, VariableFieldNames.Length]; for (int j = 0; j < VariableFieldNames.Length; j++) { for (int l = 0; l < VariableFieldNames.Length; l++) { int indexVl = (j * VariableFieldNames.Length) + l; cov[l, j] = System.Convert.ToDouble(covVlsStr[indexVl]); } } KMeansCluster kc = new KMeansCluster(kmeansColl, i); kc.Mean = mns; kc.Covariance = cov; kc.Proportion = p; } sr.Close(); } }
private void bClus_Click(object sender, RoutedEventArgs e) { int k = Int16.Parse(kKmeans.Text); int nB = classPoints[false].Count; int nR = classPoints[true].Count; if (nR + nB < 1) { textBlock.Text = "No dots."; return; } if (k < 1) { textBlock.Text = "Number of classes must be > 0."; return; } double[][] inputs = new double[nB + nR][]; int i = 0; foreach (System.Windows.Point p in classPoints[false]) { inputs[i++] = new double[] { p.X, p.Y }; } foreach (System.Windows.Point p in classPoints[true]) { inputs[i++] = new double[] { p.X, p.Y }; } KMeans kmeans = new KMeans(k: k); var clusters = kmeans.Learn(inputs); int[] labels = clusters.Decide(inputs); var centroids = clusters.Centroids; for (int j = 0; j < k; ++j) { Ellipse dot = new Ellipse(); dot.Width = 11; dot.Height = 11; dot.StrokeThickness = 2; dot.Fill = blackBrush; Canvas.SetTop(dot, centroids[j][1] - 5); Canvas.SetLeft(dot, centroids[j][0] - 5); cvsML.Children.Add(dot); double max = 0; for (int z = 0; z < labels.Length; ++z) { if (labels[z] == j) { double d = Math.Sqrt(Math.Pow(centroids[j][0] - inputs[z][0], 2) + Math.Pow(centroids[j][1] - inputs[z][1], 2)); if (d > max) { max = d; } } } Ellipse circle = new Ellipse(); circle.Width = 2 * max + 10; circle.Height = 2 * max + 10; circle.StrokeThickness = 2; circle.Stroke = blackBrush; Canvas.SetTop(circle, centroids[j][1] - max - 5); Canvas.SetLeft(circle, centroids[j][0] - max - 5); cvsML.Children.Add(circle); } }
/// <summary> /// Train on number of clusters using gap statistic /// </summary> private async Task ComputeK(int maxK = 100, int B = 10, int driverID = 0, DateTime?startDate = null, DateTime?endDate = null) { double[] Wk = new double[maxK]; double[][] Wref_kb = new double[maxK][]; double[] Gap = new double[maxK]; double[] sd = new double[maxK]; KMeansClusterCollection[] clusterCollections = new KMeansClusterCollection[maxK]; // obtain dataset IEnumerable <Leg> legs = driverID == 0 ? await _legRepository.ListAsync() : await _legRepository.ListForDriverAsync(driverID); if (startDate == null) { startDate = DateTime.MinValue; } if (endDate == null) { endDate = DateTime.MaxValue; } legs = legs.Where(leg => leg.StartTime.CompareTo(startDate) >= 0 && leg.StartTime.CompareTo(endDate) < 0); double[][] dataset = GetDataset(legs); // first cluster the dataset varying K for (int k = 1; k <= maxK; k++) { KMeans kMeans = new KMeans(k) { // distance function for geographic coordinates Distance = new GeographicDistance() }; clusterCollections[k - 1] = kMeans.Learn(dataset); double[][][] clusterData = ClusterPoints(dataset, k, clusterCollections[k - 1]); // sum of pairwise distances Wk[k - 1] = ComputeWk(clusterData, clusterCollections[k - 1]); } // then generate the reference data sets double[] lowerBounds = new double[4]; double[] boxDimensions = new double[4]; for (int i = 0; i < 4; i++) { lowerBounds[i] = dataset.Select(l => l[i]).Min(); boxDimensions[i] = dataset.Select(l => l[i]).Max() - lowerBounds[i]; } CorrectLongitudeBounds(lowerBounds, boxDimensions, 1); CorrectLongitudeBounds(lowerBounds, boxDimensions, 3); Random random = new Random(); for (int k = 1; k <= maxK; k++) { Wref_kb[k - 1] = new double[B]; for (int c = 0; c < B; c++) { double[][] refDataset = new double[dataset.Length][]; for (int i = 0; i < refDataset.Length; i++) { double[] dataPoint = new double[4]; for (int j = 0; j < 4; j++) { dataPoint[j] = random.NextDouble() * boxDimensions[j] + lowerBounds[j]; if ((j == 1 || j == 3) && dataPoint[j] > 180) { dataPoint[j] -= 360; } } refDataset[i] = dataPoint; } // cluster reference dataset KMeans refKmeans = new KMeans(k); refKmeans.Distance = new GeographicDistance(); KMeansClusterCollection refClusters = refKmeans.Learn(refDataset); // points in each cluster double[][][] refClusterData = ClusterPoints(refDataset, k, refClusters); // compute pairwise distance sum for refDataset Wref_kb[k - 1][c] = ComputeWk(refClusterData, refClusters); } // compute gap statistic double l_avg = Wref_kb[k - 1].Select(x => Log(x)).Average(); Gap[k - 1] = l_avg - Log(Wk[k - 1]); sd[k - 1] = Sqrt(Wref_kb[k - 1].Select(x => (Log(x) - l_avg) * (Log(x) - l_avg)).Average()); // decide optimal k if (k > 1 && Gap[k - 2] >= Gap[k - 1] - sd[k - 1]) { ClusterCollection = clusterCollections[k - 2]; NumberOfClustersLastChanged = DateTime.Now; return; } } }
public void KMeansConstructorTest2() { Accord.Math.Tools.SetupGenerator(0); // Declare some observations double[][] observations = { new double[] { -5, -2, -1 }, new double[] { -5, -5, -6 }, new double[] { 2, 1, 1 }, new double[] { 1, 1, 2 }, new double[] { 1, 2, 2 }, new double[] { 3, 1, 2 }, new double[] { 11, 5, 4 }, new double[] { 15, 5, 6 }, new double[] { 10, 5, 6 }, }; double error, e; // Create a new algorithm KMeans kmeans = new KMeans(3); kmeans.Randomize(observations); // Save the first initialization double[][] initial = kmeans.Clusters.Centroids; // Compute the first K-Means kmeans.Compute(observations, out error); // Create more K-Means algorithms // with the same initializations for (int i = 0; i < 1000; i++) { kmeans = new KMeans(3); kmeans.Clusters.Centroids = initial; kmeans.Compute(observations, out e); Assert.AreEqual(error, e); } // Create more K-Means algorithms // without the same initialization bool differ = false; for (int i = 0; i < 1000; i++) { kmeans = new KMeans(3); kmeans.Compute(observations, out e); if (error != e) differ = true; } Assert.IsTrue(differ); }
public void initKMeans() { KMeans.createRandomCenters(weightsByCreature); Debug.Log("initializing KMeans"); }
public override Structuring BuildStructuring() { if (Structurings == null || Set == null) { throw new NullReferenceException(); } if (IContainerProgressBar != null) { IContainerProgressBar.ResetProgressBar(1, 1, true); IContainerProgressBar.UpdateProgressBar(0, "Running QMI algorithm...", true); } List <Attribute> list_att = new List <Attribute>(); int cont = 0; foreach (Structuring s in Structurings) { foreach (Cluster c in s.Clusters.Values) { Attribute att = new Attribute("x" + cont, null); cont++; att.AttributeType = AttributeType.Numeric; list_att.Add(att); } } Set newset = new Set("Artificial"); newset.Attributes = new Attributes(list_att); newset.ElementType = ElementType.Numeric; foreach (Element e in Set.Elements) { List <object> values = new List <object>(); foreach (Structuring s in Structurings) { foreach (Cluster c in s.Clusters.Values) { double temp = c.HaveElement(e) ? 1 : 0; temp = temp - ((double)c.ElementsCount / (double)Set.ElementsCount); values.Add(temp); } } Element newelement = new Element(newset, values); newelement.Name = e.Name; newelement.Index = e.Index; newset.AddElement(newelement); } KMeans kms = new KMeans(newset, new EuclideanDistance() { AttributesToCalculateProximity = newset.Attributes.Values }); kms.ClustersCount = ClusterCount; kms.IterationsCount = IterationsCount; kms.Seed = Environment.TickCount; kms.IContainerProgressBar = IContainerProgressBar; Structuring art_struct = kms.BuildStructuring(); List <Cluster> clusters = new List <Cluster>(); cont = 0; foreach (Cluster c in art_struct.Clusters.Values) { Cluster temp = new Cluster("C-" + cont); cont++; foreach (Element item in c.Elements) { temp.AddElement(Set[item.Index]); } clusters.Add(temp); } Dictionary <string, Cluster> temp_dic = new Dictionary <string, Cluster>(); foreach (Cluster item in clusters) { temp_dic.Add(item.Name, item); } Structuring real_struct = new Partition() { Clusters = temp_dic }; return(real_struct); }
public void updateColors() { updateWeightsByCreature(); Debug.Log("updating colors"); KMeans.setColors(weightsByCreature, creatures); }
private static float[] PrepareVector(KMeans kmeans, IVectorPack <float> x, CancellationToken cancellationToken) { return(kmeans.Transform(x, null, false, null, cancellationToken)); }
public void Test_IncrementalMeanAverageSet() { for (int numOfSamples = 100; numOfSamples < 150000; numOfSamples += 15000) { // Test samples. double[][] data = new double[numOfSamples][]; // Each sample belongs to some cluster. int[] clustering = new int[data.Length]; for (int i = 0; i < numOfSamples; i++) { data[i] = new double[] { i }; clustering[i] = 0; // We have a single cluster. Every sample belongs to that cluster. } double[][] means = new double[1][]; means[0] = new double[] { 0 }; KMeans.updateMeans(data, clustering, means); // Mean of numOfSamples var mean = means[0][0]; data = new double[numOfSamples / 2][]; // Each sample belongs to some cluster. clustering = new int[data.Length]; for (int i = 0; i < numOfSamples / 2; i++) { data[i] = new double[] { i }; clustering[i] = 0; // We have a single cluster. Every sample belongs to that cluster. } // Calculate mean of numOfSamples/2 KMeans.updateMeans(data, clustering, means, 0, new double[] { 0 }); // Mean of numOfSamples/2 var mean1 = means[0][0]; data = new double[numOfSamples / 2][]; // Each sample belongs to some cluster. clustering = new int[data.Length]; for (int i = 0; i < numOfSamples / 2; i++) { data[i] = new double[] { i + (numOfSamples / 2) }; clustering[i] = 0; // We have a single cluster. Every sample belongs to that cluster. } KMeans.updateMeans(data, clustering, means, numOfSamples / 2, new double[] { mean1 }); // Mean for numbers from numOfSamples/2 to numOfSamples var mean2 = means[0][0]; // M1 = mean of numOfSamples/2 (minibatch 1) // M2 = mean for numbers from numOfSamples/2 to numOfSamples (minibatch 2) // mean is batch for numbers from 1 to numOfSamples // (1/q1+q2)[q1*M1+q2*M2] // where q1 is number of elements inside of M1 and q2 number of elements inside of M2 Assert.True(Math.Round(mean2, 2) == Math.Round(mean, 2)); } }
public void buildModel() { dataPrepStrata dStrat; if(InValueRaster==null) { dStrat = new dataPrepStrata(InTable, VariableFieldNames, StrataField); } else { dStrat = new dataPrepStrata(InValueRaster,InStrataRaster); } inpath = dStrat.InPath; n = dStrat.N; kmeans = dStrat.Model; lbl = dStrat.Labels; k = lbl.Count; prop = 1; }
public void KMeansConstructorTest3() { // Create a new algorithm KMeans kmeans = new KMeans(3); Assert.IsNotNull(kmeans.Clusters); Assert.IsNotNull(kmeans.Distance); Assert.IsNotNull(kmeans.Clusters.Centroids); Assert.IsNotNull(kmeans.Clusters.Count); Assert.IsNotNull(kmeans.Clusters.Covariances); Assert.IsNotNull(kmeans.Clusters.Proportions); }
internal static HandleRef getCPtr(KMeans obj) { return((obj == null) ? new HandleRef(null, IntPtr.Zero) : obj.swigCPtr); }
public List <List <Instance> > FindClusters(InstanceModel model, List <Instance> instances, out List <IEmergingPattern> selectedPatterns) { NominalFeature classFeature = null; FeatureInformation backupFeatureInformation = null; string[] backupClassValues = null; double[] backupClassByInstance = null; bool isClassPresent = true; if (model.ClassFeature() == null) { isClassPresent = false; classFeature = new NominalFeature("class", model.Features.Length); var backupFeatures = model.Features; model.Features = new Feature[backupFeatures.Length + 1]; for (int i = 0; i < backupFeatures.Length; i++) { model.Features[i] = backupFeatures[i]; } model.Features[backupFeatures.Length] = classFeature; } else { classFeature = model.ClassFeature() as NominalFeature; backupFeatureInformation = classFeature.FeatureInformation; backupClassValues = classFeature.Values; backupClassByInstance = new double[instances.Count]; for (int i = 0; i < instances.Count; i++) { backupClassByInstance[i] = instances[i][classFeature]; instances[i][classFeature] = 0; } } classFeature.FeatureInformation = new NominalFeatureInformation() { Distribution = new double[] { 1, 1, 1, 1, 1 }, Ratio = new double[] { 1, 1, 1, 1, 1 }, ValueProbability = new double[] { 1, 1, 1, 1, 1 } }; classFeature.Values = new string[1] { "Unknown" }; var Miner = new UnsupervisedRandomForestMiner() { ClusterCount = ClusterCount, TreeCount = 100 }; var patterns = Miner.Mine(model, instances, classFeature); var instIdx = new Dictionary <Instance, int>(); for (int i = 0; i < instances.Count; i++) { instIdx.Add(instances[i], i); } int[,] similarityMatrix = new int[instances.Count, instances.Count + 1]; var coverSetByPattern = new Dictionary <IEmergingPattern, HashSet <Instance> >(); foreach (var pattern in patterns) { if (pattern != null) { var currentCluster = new List <int>(); var currentCoverSet = new HashSet <Instance>(); for (int i = 0; i < instances.Count; i++) { if (pattern.IsMatch(instances[i])) { currentCluster.Add(i); currentCoverSet.Add(instances[i]); } } for (int i = 0; i < currentCluster.Count; i++) { for (int j = 0; j < currentCluster.Count; j++) { similarityMatrix[currentCluster[i], currentCluster[j]] += 1; similarityMatrix[currentCluster[i], instances.Count] += 1; } } coverSetByPattern.Add(pattern, currentCoverSet); } } var kmeans = new KMeans() { K = ClusterCount, classFeature = classFeature, similarityMatrix = similarityMatrix, instIdx = instIdx }; var clusterList = kmeans.FindClusters(instances); var patternClusterList = new List <List <IEmergingPattern> >(); for (int i = 0; i < ClusterCount; i++) { patternClusterList.Add(new List <IEmergingPattern>()); } foreach (var pattern in patterns) { if (pattern != null) { var bestIdx = 0; var maxCoverCount = int.MinValue; pattern.Supports = new double[ClusterCount]; pattern.Counts = new double[ClusterCount]; HashSet <Instance> bestCover = null; for (int i = 0; i < ClusterCount; i++) { HashSet <Instance> currentCover = new HashSet <Instance>(coverSetByPattern[pattern].Intersect(clusterList[i])); var currentCoverCount = currentCover.Count; pattern.Counts[i] = currentCoverCount; pattern.Supports[i] = 1.0 * currentCoverCount / clusterList[i].Count; if (currentCoverCount > maxCoverCount) { maxCoverCount = currentCoverCount; bestIdx = i; bestCover = currentCover; } } coverSetByPattern[pattern] = bestCover; patternClusterList[bestIdx].Add(pattern); } } selectedPatterns = FilterPatterns(instances, patternClusterList); if (isClassPresent) { classFeature.FeatureInformation = backupFeatureInformation; classFeature.Values = backupClassValues; for (int i = 0; i < instances.Count; i++) { instances[i][classFeature] = backupClassByInstance[i]; } } return(clusterList); }
private Image<Bgr, Byte> kmeans() { int trainSampleCount = 1500; int sigma = 60; Matrix<float> trainData = new Matrix<float>(trainSampleCount, 2); Matrix<float> trainData1 = trainData.GetRows(0, trainSampleCount / 3, 1); trainData1.GetCols(0, 1).SetRandNormal(new MCvScalar(100), new MCvScalar(sigma)); trainData1.GetCols(1, 2).SetRandNormal(new MCvScalar(300), new MCvScalar(sigma)); Matrix<float> trainData2 = trainData.GetRows(trainSampleCount / 3, 2 * trainSampleCount / 3, 1); trainData2.SetRandNormal(new MCvScalar(400), new MCvScalar(sigma)); Matrix<float> trainData3 = trainData.GetRows(2 * trainSampleCount / 3, trainSampleCount, 1); trainData3.GetCols(0, 1).SetRandNormal(new MCvScalar(300), new MCvScalar(sigma)); trainData3.GetCols(1, 2).SetRandNormal(new MCvScalar(100), new MCvScalar(sigma)); PointF[] points = new PointF[trainSampleCount]; for (int i = 0; i < points.Length; ++i) { points[i] = new PointF(trainData[i, 0], trainData[i, 1]); } var km = new KMeans<PointF>(points, 3, (a, b) => ((a.X - b.X) * (a.X - b.X) + (a.Y - b.Y) * (a.Y - b.Y)), list => new PointF(list.Average(p => p.X), list.Average(p => p.Y)) ); int it = 0; MyTimer timer = new MyTimer(); timer.Restart(); //var cluster = km.Cluster(); var cluster = km.AnnealCluster( (a, b) => new PointF(a.X + b.X, a.Y + b.Y), (a, b) => new PointF(a.X - b.X, a.Y - b.Y), (p, v) => new PointF((float)(p.X / v), (float)(p.Y / v)), out it); var time = timer.Stop(); this.Text = String.Format("n={0}, k={1}, time={2}ms, iter={3}.", trainSampleCount, 3, time, it); Image<Bgr, Byte> img = new Image<Bgr, byte>(500, 500); for (int y = 0; y < 500; ++y) { for (int x = 0; x < 500; ++x) { double d0 = (x - cluster[0].Center.X) * (x - cluster[0].Center.X) + (y - cluster[0].Center.Y) * (y - cluster[0].Center.Y); double d1 = (x - cluster[1].Center.X) * (x - cluster[1].Center.X) + (y - cluster[1].Center.Y) * (y - cluster[1].Center.Y); double d2 = (x - cluster[2].Center.X) * (x - cluster[2].Center.X) + (y - cluster[2].Center.Y) * (y - cluster[2].Center.Y); Bgr color = new Bgr(0, 0, 0); if (d0 < d1 && d0 < d2) { color = new Bgr(20, 0, 0); } if (d1 < d0 && d1 < d2) { color = new Bgr(0, 20, 0); } if (d2 < d0 && d2 < d1) { color = new Bgr(0, 0, 20); } img[y, x] = color; } } Bgr[] colors = new[] { new Bgr(128, 0, 0), new Bgr(0, 128, 0), new Bgr(0, 0, 128) }; Bgr[] centers = new[] { new Bgr(255, 0, 0), new Bgr(0, 255, 0), new Bgr(0, 0, 255) }; for (int i = 0; i < 3; ++i) { foreach (var p in cluster[i]) { img.Draw(new CircleF(p, 2), colors[i], 1); } img.Draw(new CircleF(cluster[i].Center, 5), centers[i], 3); } img.Draw(new CircleF(new PointF(100, 300), sigma), new Bgr(128, 128, 128), 2); img.Draw(new CircleF(new PointF(100, 300), 3), new Bgr(128, 128, 128), 2); img.Draw(new CircleF(new PointF(300, 100), sigma), new Bgr(128, 128, 128), 2); img.Draw(new CircleF(new PointF(300, 100), 3), new Bgr(128, 128, 128), 2); img.Draw(new CircleF(new PointF(400, 400), sigma), new Bgr(128, 128, 128), 2); img.Draw(new CircleF(new PointF(400, 400), 3), new Bgr(128, 128, 128), 2); return img; }
private static void clusterSC() { int s = 100, K = 100; string[] templatefiles = Directory.GetFiles(@"D:\Play Data\my_template_data\sc\", "*.sc") .Take(180).ToArray(); int templatecount = templatefiles.Length; #region 读取模板 Debug("打开{0}个模板--------------------------------------------", templatecount); double[][] templates = new double[templatecount * s][]; MyTimer timer = new MyTimer(); timer.Restart(); for (int i = 0; i < templatefiles.Length; ++i) { string file = templatefiles[i]; string filename = Path.GetFileNameWithoutExtension(file); using (var fs = new FileStream(file, FileMode.Open)) { using (var br = new BinaryReader(fs)) { for (int j = 0; j < s; ++j) { templates[i * s + j] = new double[60]; for (int k = 0; k < 60; ++k) { templates[i * s + j][k] = br.ReadDouble(); } } } } if (i % 100 == 0) Debug("已完成{0}个", i); } Debug("模板读取完成,用时{0}ms.", timer.Stop()); #endregion #region 聚类 timer.Restart(); KMeans<double[]> kmeans = new KMeans<double[]>(templates, K, Jim.OCR.ShapeContext2D.ShapeContext.HistCost, scs => { double[] scnew = new double[60]; for (int k = 0; k < 60; ++k) { scnew[k] = scs.Average(sc => sc[k]); } return scnew; } ); kmeans.MaxIterate = 100; var cluster = kmeans.Cluster(); Debug("聚类完成,用时{0}ms.", timer.Stop()); #endregion using (var fs = new FileStream(Path.Combine(@"D:\Play Data\my_template_data\sm-" + K, templatecount + ".sm"), FileMode.Create)) { using (var bw = new BinaryWriter(fs)) { for (int i = 0; i < K; ++i) { for (int k = 0; k < 60; ++k) { bw.Write(cluster[i].Center[k]); } } } } }
public void IndexFiles(FileInfo[] imageFiles, System.ComponentModel.BackgroundWorker IndexBgWorker, Action<string> logWriter, LocateSettings locateSetting = null) { //For Time Profilling long extractingTime, kMeanTime = 0, calcBagOfVisualTime = 0; Stopwatch sw1; SimpleSurfSift.LoCATe descriptorExtractor = new SimpleSurfSift.LoCATe(); sw1 = Stopwatch.StartNew(); logWriter("Index started, extracting Descriptors..."); List<double[]> ListofDescriptorsForCookBook = new List<double[]>(); List<LoCATeRecord> ListOfAllImageDescriptors = new List<LoCATeRecord>(); int totalFileCount = imageFiles.Length; if (totalFileCount == 0) { logWriter("No files to index"); return; }; for (int i = 0; i < totalFileCount; i++) { var fi = imageFiles[i]; using (Bitmap observerImage = (Bitmap)Image.FromFile(fi.FullName)) { List<double[]> locateDescriptors = descriptorExtractor.extract(observerImage, "SURF"); ListOfAllImageDescriptors.Add(new LoCATeRecord { Id = i, ImageName = fi.Name, ImagePath = fi.FullName, LoCATeDescriptors = locateDescriptors }); if (locateSetting.IsCodeBookNeedToBeCreated) { if (locateDescriptors.Count > 4) { RandomHelper randNumGenerator = new RandomHelper(); List<int> randIndexes = randNumGenerator.GetRandomNumberInRange(0, locateDescriptors.Count, 10d); foreach (int index in randIndexes) { ListofDescriptorsForCookBook.Add(locateDescriptors[index]); } } else { Debug.WriteLine(fi.Name + " skip from index, because it didn't have significant feature"); } } } IndexBgWorker.ReportProgress(i); } sw1.Stop(); extractingTime = Convert.ToInt32(sw1.Elapsed.TotalSeconds); double[][] codeBook = null; if (locateSetting.IsCodeBookNeedToBeCreated) { logWriter("Indexing, Calculating Mean..."); sw1.Reset(); sw1.Start(); KMeans kMeans = new KMeans(locateSetting.SizeOfCodeBook); kMeans.Compute(ListofDescriptorsForCookBook.ToArray()); codeBook = kMeans.Clusters.Centroids; //------------Save CookBook string fullFileName = locateSetting.CodeBookFullPath; if (File.Exists(fullFileName)) File.Delete(fullFileName); using (FileStream fs = new FileStream(fullFileName, FileMode.Create, FileAccess.Write, FileShare.None)) { System.Runtime.Serialization.Formatters.Binary.BinaryFormatter bf = new System.Runtime.Serialization.Formatters.Binary.BinaryFormatter(); bf.Serialize(fs, codeBook); fs.Close(); } sw1.Stop(); kMeanTime = Convert.ToInt32(sw1.Elapsed.TotalSeconds); } else { string fullFileName = locateSetting.CodeBookFullPath; if (!File.Exists(fullFileName)) { string msg = string.Format("Couldn't find {0}, Please Index before querying with Locate", fullFileName); throw new InvalidOperationException(msg); } using (FileStream fs = new FileStream(fullFileName, FileMode.Open, FileAccess.Read, FileShare.None)) { System.Runtime.Serialization.Formatters.Binary.BinaryFormatter bf = new System.Runtime.Serialization.Formatters.Binary.BinaryFormatter(); codeBook = (double[][])bf.Deserialize(fs); fs.Close(); } } logWriter("Indexing, Calculating Bag of Visual Words..."); sw1.Reset(); sw1.Start(); List<LoCaTeBoWRecord> ListOfImageVisualBagOfWorks = new List<LoCaTeBoWRecord>(); for (int i = 0; i < ListOfAllImageDescriptors.Count; i++) { double[] visualWordForImage = createVisualWord(ListOfAllImageDescriptors[i].LoCATeDescriptors, codeBook); LoCaTeBoWRecord rec = new LoCaTeBoWRecord { Id = ListOfAllImageDescriptors[i].Id, ImageName = ListOfAllImageDescriptors[i].ImageName, ImagePath = ListOfAllImageDescriptors[i].ImagePath, VisaulWord = visualWordForImage }; ListOfImageVisualBagOfWorks.Add(rec); IndexBgWorker.ReportProgress(i); } logWriter("Indexing, Calculating ltcData..."); int[] histogramSumOfAllVisualWords = null; //------------Creating sum histogram of all words double[][] AllDatas = ListOfImageVisualBagOfWorks.Select(des => des.VisaulWord).ToArray(); histogramSumOfAllVisualWords = createIndex((double[][])(AllDatas)); //------------Creating Image Records Data LoCaTeDataSet locateDS = new LoCaTeDataSet { AllImageRecordSet = ListOfImageVisualBagOfWorks, HistogramSumOfAllVisualWords = histogramSumOfAllVisualWords }; logWriter("Indexing, Saving Image Data..."); //------------Save CookBook string ImageRecordName = Path.Combine(DirectoryHelper.SaveDirectoryPath, "LoCATeImageRecords.bin"); if (File.Exists(ImageRecordName)) File.Delete(ImageRecordName); using (FileStream fs = new FileStream(ImageRecordName, FileMode.Create, FileAccess.Write, FileShare.None)) { System.Runtime.Serialization.Formatters.Binary.BinaryFormatter bf = new System.Runtime.Serialization.Formatters.Binary.BinaryFormatter(); bf.Serialize(fs, locateDS); fs.Close(); } sw1.Stop(); calcBagOfVisualTime = Convert.ToInt32(sw1.Elapsed.TotalSeconds); logWriter(string.Format("Extracting: {0} sec, KMeanTime: {1} sec, CalcBagOfVisalTime: {2} sec", extractingTime, kMeanTime, calcBagOfVisualTime)); }
public (List <string> classes, KMeans kmeans, OneVsAllSupportVectorMachine svm) FinishLearning( int vectorLength, CancellationToken cancellationToken) { // count classes List <string> classes = new List <string>(this.features.Select(x => x.truth).ToLookup(x => x).Select(x => x.Key)); if (classes.Count < 2) { throw new ArgumentException(); } classes.Sort(); // count vectors int numberOfVectors = this.features.Sum(x => x.features.Count); // copy vectors Dictionary <IVector <float>, float> vectors = new Dictionary <IVector <float>, float>(numberOfVectors); for (int i = 0, ii = this.features.Count; i < ii; i++) { FeatureDetectors.Features f = this.features[i].features; for (int j = 0, jj = f.Count, len = f.Length, off = 0; j < jj; j++, off += len) { ////DenseVectorF vector = new DenseVectorF(len, f.X, off); SparseVectorF vector = SparseVectorF.FromDense(len, f.X, off); vectors[vector] = vectors.TryGetValue(vector, out float weight) ? weight + 1.0f : 1.0f; } } cancellationToken.ThrowIfCancellationRequested(); // learn k-means KMeans kmeans = KMeans.Learn( vectorLength, KMeansSeeding.Random, 2, default(EuclideanDistance), vectors.Keys.ToList(), vectors.Values.ToList(), cancellationToken); cancellationToken.ThrowIfCancellationRequested(); // learn svm Dictionary <string, int> classesLookup = classes.ToDictionary((x, i) => x, (x, i) => i); SequentualMinimalOptimization smo = new SequentualMinimalOptimization(new ChiSquare()) { Algorithm = SMOAlgorithm.LibSVM, Tolerance = 0.01f, }; List <float[]> svmx = new List <float[]>(this.features.Count); List <int> svmy = new List <int>(this.features.Count); for (int i = 0, ii = this.features.Count; i < ii; i++) { (FeatureDetectors.Features features, string truth) = this.features[i]; svmx.Add(PointsOfInterestClassifier.PrepareVector(kmeans, features, cancellationToken)); svmy.Add(classesLookup[truth]); } cancellationToken.ThrowIfCancellationRequested(); OneVsAllSupportVectorMachine svm = OneVsAllSupportVectorMachine.Learn( smo, classes.Count, svmx, svmy, null, cancellationToken); cancellationToken.ThrowIfCancellationRequested(); return(classes, kmeans, svm); }
public IActionResult Partition2(int partitionCount) { using (var db = new FusekiContext()) { var articles = db.Articles.Where(el => el.Published == true) .Include(el => el.Tags).Take(20).ToList(); //get a tag vector for each article. var allTags = new HashSet <string>(); //TODO: What happens if we remove all tags which only occur once. foreach (var article in articles) { foreach (var tag in article.Tags) { allTags.Add(tag.Name); } } var newAllTags = new HashSet <string>(); foreach (var t in allTags) { var relatedArticles = db.Articles.Where(el => el.Tags.Select(tag => tag.Name).Contains(t)); if (relatedArticles.Count() > 1) { newAllTags.Add(t); } } allTags = newAllTags; var allTagsOrdered = allTags.OrderBy(el => el); var obs = new List <List <double> >(); var dict = new Dictionary <string, object>(); foreach (var article in articles) { var articleTags = article.Tags.Select(el => el.Name); var vector = new List <double>(); foreach (var tag in allTagsOrdered) { if (articleTags.Contains(tag)) { vector.Add(1); } else { vector.Add(0); } } obs.Add(vector); } var vecvec = obs.Select(el => el.ToArray()).ToArray(); var kmeans = new KMeans(k: partitionCount); var clusters = kmeans.Learn(vecvec); dict["Kmeans Error"] = kmeans.Error; dict["dimensionality"] = kmeans.Dimension; dict["Iterations"] = kmeans.Iterations; dict["MaxIterations"] = kmeans.MaxIterations; dict["Tolerance"] = kmeans.Tolerance; int[] labels = clusters.Decide(vecvec); //labels is array[articleId] => partitionNumber var ii = 0; var psets = new List <PartitionSet <Article> >(); //this is totally fake. TODO: refactor these to be dumber - no need to have comparators etc. var dm = new DistanceMetrics <Article>((a, b) => Comparators.GetTagCommonality(a, b), (a, b) => Comparators.ArticleKeyLookup(a, b)); while (ii < partitionCount) { //TODO: is accord zero indexed? psets.Add(new PartitionSet <Article>(dm, ii)); ii++; } var index = 0; foreach (var l in labels) { var article = articles[index]; index++; psets[l].Add(article); } var partitiondata = new PartitionData <Article>(psets, dict); var model = new ArticlePartitionModel(partitiondata); return(View("ArticlePartitions", model)); } }
public void learn_test_mixed() { #region doc_learn_mixed Accord.Math.Random.Generator.Seed = 0; // Declare some mixed discrete and continuous observations double[][] observations = { // (categorical) (discrete) (continuous) new double[] { 1, -1, -2.2 }, new double[] { 1, -6, -5.5 }, new double[] { 2, 1, 1.1 }, new double[] { 2, 2, 1.2 }, new double[] { 2, 2, 2.6 }, new double[] { 3, 2, 1.4 }, new double[] { 3, 4, 5.2 }, new double[] { 1, 6, 5.1 }, new double[] { 1, 6, 5.9 }, }; // Create a new codification algorithm to convert // the mixed variables above into all continuous: var codification = new Codification <double>() { CodificationVariable.Categorical, CodificationVariable.Discrete, CodificationVariable.Continuous }; // Learn the codification from observations var model = codification.Learn(observations); // Transform the mixed observations into only continuous: double[][] newObservations = model.ToDouble().Transform(observations); // (newObservations will be equivalent to) double[][] expected = { // (one hot) (discrete) (continuous) new double[] { 1, 0, 0, -1, -2.2 }, new double[] { 1, 0, 0, -6, -5.5 }, new double[] { 0, 1, 0, 1, 1.1 }, new double[] { 0, 1, 0, 2, 1.2 }, new double[] { 0, 1, 0, 2, 2.6 }, new double[] { 0, 0, 1, 2, 1.4 }, new double[] { 0, 0, 1, 4, 5.2 }, new double[] { 1, 0, 0, 6, 5.1 }, new double[] { 1, 0, 0, 6, 5.9 }, }; // Create a new K-Means algorithm KMeans kmeans = new KMeans(k: 3); // Compute and retrieve the data centroids var clusters = kmeans.Learn(observations); // Use the centroids to parition all the data int[] labels = clusters.Decide(observations); #endregion Assert.IsTrue(expected.IsEqual(newObservations, 1e-8)); Assert.AreEqual(3, codification.NumberOfInputs); Assert.AreEqual(5, codification.NumberOfOutputs); Assert.AreEqual(3, codification.Columns.Count); Assert.AreEqual("0", codification.Columns[0].ColumnName); Assert.AreEqual(3, codification.Columns[0].NumberOfSymbols); Assert.AreEqual(1, codification.Columns[0].NumberOfInputs); Assert.AreEqual(1, codification.Columns[0].NumberOfOutputs); Assert.AreEqual(3, codification.Columns[0].NumberOfClasses); Assert.AreEqual(CodificationVariable.Categorical, codification.Columns[0].VariableType); Assert.AreEqual("1", codification.Columns[1].ColumnName); Assert.AreEqual(1, codification.Columns[1].NumberOfSymbols); Assert.AreEqual(1, codification.Columns[1].NumberOfInputs); Assert.AreEqual(1, codification.Columns[1].NumberOfOutputs); Assert.AreEqual(1, codification.Columns[1].NumberOfClasses); Assert.AreEqual(CodificationVariable.Discrete, codification.Columns[1].VariableType); Assert.AreEqual("2", codification.Columns[2].ColumnName); Assert.AreEqual(1, codification.Columns[2].NumberOfSymbols); Assert.AreEqual(1, codification.Columns[2].NumberOfInputs); Assert.AreEqual(1, codification.Columns[2].NumberOfOutputs); Assert.AreEqual(1, codification.Columns[2].NumberOfClasses); Assert.AreEqual(CodificationVariable.Continuous, codification.Columns[2].VariableType); Assert.AreEqual(labels[0], labels[2]); Assert.AreEqual(labels[0], labels[3]); Assert.AreEqual(labels[0], labels[4]); Assert.AreEqual(labels[0], labels[5]); Assert.AreEqual(labels[6], labels[7]); Assert.AreEqual(labels[6], labels[8]); Assert.AreNotEqual(labels[0], labels[1]); Assert.AreNotEqual(labels[0], labels[6]); int[] labels2 = kmeans.Clusters.Decide(observations); Assert.IsTrue(labels.IsEqual(labels2)); var c = new KMeansClusterCollection.KMeansCluster[clusters.Count]; int i = 0; foreach (var cluster in clusters) { c[i++] = cluster; } for (i = 0; i < c.Length; i++) { Assert.AreSame(c[i], clusters[i]); } }
public void buildModel() { if (inputMatrix == null) getMatrix(); switch (cType) { case clusterType.KMEANS: KMeans kmeans = new KMeans(k); kmeans.Compute(inputMatrix, precision); clusterCollection = kmeans.Clusters; model = kmeans; break; case clusterType.BINARY: BinarySplit bSplit = new BinarySplit(k); bSplit.Compute(inputMatrix, precision); clusterCollection = bSplit.Clusters; model = bSplit; //Console.WriteLine("BinarySplit"); break; case clusterType.GAUSSIANMIXTURE: GaussianMixtureModel gModel = new GaussianMixtureModel(k); gModel.Compute(inputMatrix, precision); clusterCollection = gModel.Gaussians; model = gModel; break; default: break; } lbl = new List<string>(); for (int i = 0; i < k; i++) { lbl.Add(i.ToString()); } }
public void learn_test() { #region doc_learn Accord.Math.Random.Generator.Seed = 0; // Declare some observations double[][] observations = { new double[] { -5, -2, -1 }, new double[] { -5, -5, -6 }, new double[] { 2, 1, 1 }, new double[] { 1, 1, 2 }, new double[] { 1, 2, 2 }, new double[] { 3, 1, 2 }, new double[] { 11, 5, 4 }, new double[] { 15, 5, 6 }, new double[] { 10, 5, 6 }, }; // Create a new K-Means algorithm KMeans kmeans = new KMeans(k: 3); // Compute and retrieve the data centroids var clusters = kmeans.Learn(observations); // Use the centroids to parition all the data int[] labels = clusters.Decide(observations); #endregion Assert.AreEqual(labels[0], labels[1]); Assert.AreEqual(labels[2], labels[3]); Assert.AreEqual(labels[2], labels[4]); Assert.AreEqual(labels[2], labels[5]); Assert.AreEqual(labels[6], labels[7]); Assert.AreEqual(labels[6], labels[8]); Assert.AreNotEqual(labels[0], labels[2]); Assert.AreNotEqual(labels[2], labels[6]); Assert.AreNotEqual(labels[0], labels[6]); int[] labels2 = kmeans.Clusters.Decide(observations); Assert.IsTrue(labels.IsEqual(labels2)); // the data must not have changed! double[][] orig = { new double[] { -5, -2, -1 }, new double[] { -5, -5, -6 }, new double[] { 2, 1, 1 }, new double[] { 1, 1, 2 }, new double[] { 1, 2, 2 }, new double[] { 3, 1, 2 }, new double[] { 11, 5, 4 }, new double[] { 15, 5, 6 }, new double[] { 10, 5, 6 }, }; Assert.IsTrue(orig.IsEqual(observations)); var c = new KMeansClusterCollection.KMeansCluster[clusters.Count]; int i = 0; foreach (var cluster in clusters) { c[i++] = cluster; } for (i = 0; i < c.Length; i++) { Assert.AreSame(c[i], clusters[i]); } }
private void setKMeansCluster(System.IO.StreamReader sr) { KMeans kmeans = new KMeans(k); KMeansClusterCollection kmeansColl = kmeans.Clusters; for (int i = 0; i < k; i++) { double[] mns = (from s in (sr.ReadLine().Split(new char[] { ',' })) select System.Convert.ToDouble(s)).ToArray(); string[] covVlsStr = sr.ReadLine().Split(new char[] { ',' }); double p = System.Convert.ToDouble(sr.ReadLine()); double[,] cov = new double[VariableFieldNames.Length, VariableFieldNames.Length]; for (int j = 0; j < VariableFieldNames.Length; j++) { for (int l = 0; l < VariableFieldNames.Length; l++) { int indexVl = (j * VariableFieldNames.Length) + l; cov[l, j] = System.Convert.ToDouble(covVlsStr[indexVl]); } } KMeansCluster kc = new KMeansCluster(kmeansColl, i); kc.Mean = mns; kc.Covariance = cov; kc.Proportion = p; } clusterCollection = kmeansColl; model = kmeans; }
static void Main(string[] args) { // 1. allocate a new dataflow computation. using (var computation = NewComputation.FromArgs(ref args)) { if (args.Length != 6) { PrintHelp(); return; } Int32 procid = computation.Configuration.ProcessID; Int32 thread_num = computation.Configuration.WorkerCount; Int32 worker_num = computation.Configuration.Processes; Int32 dimension = Int32.Parse(args[0]); Int32 cluster_num = Int32.Parse(args[1]); Int32 iteration_num = Int32.Parse(args[2]); Int32 partition_num = Int32.Parse(args[3]); double sample_num_m = Convert.ToDouble(args[4]); Int64 spin_wait = Int64.Parse(args[5]); Console.Out.WriteLine("dimension: " + dimension); Console.Out.WriteLine("cluster_num: " + cluster_num); Console.Out.WriteLine("iteration_num: " + iteration_num); Console.Out.WriteLine("partition_num: " + partition_num); Console.Out.WriteLine("sample_num_m: " + sample_num_m); Console.Out.WriteLine("spin_wait: " + spin_wait); Console.Out.WriteLine("procid: " + procid); Console.Out.WriteLine("worker_num: " + worker_num); Console.Out.WriteLine("thread_num: " + thread_num); Console.Out.Flush(); KMeans km = new KMeans(dimension, cluster_num, iteration_num, partition_num, sample_num_m, spin_wait, procid, worker_num, thread_num); Stream <SampleBatch, Epoch> samples = km.GenerateSamples().AsNaiadStream(computation); samples = samples.PartitionBy(s => (int)(s[0][0])); var end_samples = samples.Iterate((lc, s) => km.Advance(s), iteration_num, "KMeans"); // var output = end_samples.Subscribe(x => { // Console.Out.WriteLine("Final center 0: " + PrintList(km.means_[0])); // Console.Out.Flush(); // }); Console.Out.WriteLine("Before Activate!"); Console.Out.Flush(); // start the computation, fixing the structure of the dataflow graph. computation.Activate(); Console.Out.WriteLine("After Activate!"); Console.Out.Flush(); // block until all work is finished. computation.Join(); Console.Out.WriteLine("After Join!"); double average_total = km.total_times_.GetRange(truncate_index_, iteration_num - truncate_index_).Average(); double average_compute = km.compute_times_.GetRange(truncate_index_, iteration_num - truncate_index_).Average(); double average_idle = average_total - average_compute; Console.Out.WriteLine("*** Average for the last {0:D2} iterations: compute(ms): {1:F2} total(ms): {2:F2} (idle(ms): {3:F2})", iteration_num - truncate_index_, 1000 * average_compute, 1000 * average_total, 1000 * average_idle); for (int i = 0; i < cluster_num; ++i) { Console.Out.WriteLine("Final center {0:D2}: {1:S}: ", i, PrintList(km.means_[i])); } Console.Out.WriteLine("Samples Counts: " + PrintList(km.sample_counter)); Console.Out.WriteLine("Reduce Level 1 Counts: " + PrintList(km.reduce_l1_counter_)); Console.Out.WriteLine("Reduce Level 2 Counts: " + PrintList(km.reduce_l2_counter_)); Console.Out.WriteLine("Sync Level 1 Counts: " + PrintList(km.sync_l1_counter_)); Console.Out.WriteLine("Sync Level 2 Counts: " + PrintList(km.sync_l2_counter_)); Console.Out.WriteLine("Sync Tags: " + PrintHashSet(km.sync_tags_)); Console.Out.WriteLine("Reduce Tags: " + PrintHashSet(km.reduce_tags_)); Console.Out.WriteLine("Clustering Tags: " + PrintHashSet(km.clustering_tags_)); Console.Out.Flush(); } }
void Update() { if (Input.GetKeyDown(KeyCode.F)) { ServiceContainers.Explode(ForceScale); return; } if (Input.GetKeyDown(KeyCode.C)) { var itemsTmp = ServiceContainers.Select(c => c.transform.position).ToArray(); var resultTmp = KMeans.Cluster(itemsTmp, ClusterCount, Iterations, 0); foreach (var cluster in resultTmp.clusters) { var force = Random.onUnitSphere * ForceScale; cluster.Select(i => ServiceContainers[i]).AddForce(force); } return; } if (Input.GetKeyDown(KeyCode.A)) { foreach (var spring in ServiceContainers.SelectMany(s => s.gameObject.GetComponents <SpringJoint>())) { spring.maxDistance = 3.0f; spring.damper = 1.0f; } } if (Input.GetKeyDown(KeyCode.Space)) { if (running) { timeScaleBackup = Time.timeScale; Time.timeScale = 0; running = false; } else { Time.timeScale = timeScaleBackup; running = true; clusterized = false; } } if (running || clusterized) { return; } var items = ServiceContainers.Select(c => c.transform.position).ToArray(); var result = KMeans.Cluster(items, ClusterCount, Iterations, 0); for (var i = 0; i < result.clusters.Length; i++) { var color = Color.HSVToRGB(1f * i / result.clusters.Length, 1f, 1f); for (var j = 0; j < result.clusters[i].Length; j++) { var index = result.clusters[i][j]; ServiceContainers[index].GetComponent <MeshRenderer>().material.color = color; } } clusterized = true; }
static void Main() { Accord.Math.Random.Generator.Seed = 1232; // Declare some observations double[][] observations = { new double[] { 291.5, 81.5 }, new double[] { 316, 87.5 }, new double[] { 337, 92.5 }, new double[] { 367, 87 }, new double[] { 363.5, 102 }, new double[] { 378, 105 }, new double[] { 411, 108.5 }, new double[] { 428.5, 116.5 }, new double[] { 465.5, 120 }, new double[] { 477, 111.5 }, new double[] { 448.5, 124.5 }, new double[] { 276, 126.5 }, new double[] { 503.5, 129 }, new double[] { 474, 126.5 }, new double[] { 485.5, 129 }, new double[] { 293.5, 134.5 }, new double[] { 313, 138.5 }, new double[] { 333.5, 146.5 }, new double[] { 355, 147.5 }, new double[] { 373.5, 152.5 }, new double[] { 393, 160 }, new double[] { 413.5, 161 }, new double[] { 98.5, 327.5 }, new double[] { 113, 338.5 }, new double[] { 130.5, 344.5 }, new double[] { 146.5, 347.5 }, new double[] { 171, 355 }, new double[] { 189, 364.5 }, new double[] { 223.5, 372 }, new double[] { 208, 374.5 }, new double[] { 237, 365.5 }, new double[] { 74, 379 }, new double[] { 232, 379.5 }, new double[] { 262, 385.5 }, new double[] { 244, 384 }, new double[] { 92, 388.5 }, new double[] { 112, 395 }, new double[] { 131, 405 }, new double[] { 152, 408.5 }, new double[] { 170.5, 415.5 }, new double[] { 485, 421.5 }, new double[] { 546, 421.5 }, new double[] { 742, 421.5 }, new double[] { 189, 425 }, new double[] { 506.5, 424.5 }, new double[] { 528.5, 424.5 }, new double[] { 583, 424.5 }, new double[] { 604, 424.5 }, new double[] { 624.5, 424.5 }, new double[] { 653, 430 }, new double[] { 695, 424.5 }, new double[] { 764.5, 424.5 }, new double[] { 721, 425 }, new double[] { 208, 428.5 }, new double[] { 242, 436.5 }, new double[] { 267, 445 }, new double[] { 286.5, 452 }, }; //observations.Add(new double[][] { (double)2.8, 3.3 }); // Create a new K-Means algorithm KMeans kmeans = new KMeans(k: 3); // Compute and retrieve the data centroids var clusters = kmeans.Learn(observations); // Use the centroids to parition all the data int[] labels = clusters.Decide(observations); Console.WriteLine("Hello World!"); // Keep the console window open in debug mode. Console.WriteLine("Press any key to exit."); Console.ReadKey(); }
private void btnInitialize_Click(object sender, EventArgs e) { kmeans = new KMeans(k); kmeans.Compute(mixture); // Classify all instances in mixture data int[] classifications = kmeans.Classify(mixture); // Draw the classifications updateGraph(classifications); }