private static void Main() { // Here we declare that our samples will be 2 dimensional column vectors. // (Note that if you don't know the dimensionality of your vectors at compile time // you can change the 2 to a 0 and then set the size at runtime) // Now we are making a typedef for the kind of kernel we want to use. I picked the // radial basis kernel because it only has one parameter and generally gives good // results without much fiddling. using (var rbk = new RadialBasisKernel <double, Matrix <double> >(0.1d, 2, 1)) { // Here we declare an instance of the kcentroid object. The kcentroid has 3 parameters // you need to set. The first argument to the constructor is the kernel we wish to // use. The second is a parameter that determines the numerical accuracy with which // the object will perform the centroid estimation. Generally, smaller values // give better results but cause the algorithm to attempt to use more dictionary vectors // (and thus run slower and use more memory). The third argument, however, is the // maximum number of dictionary vectors a kcentroid is allowed to use. So you can use // it to control the runtime complexity. using (var test = new KCentroid <double, RadialBasisKernel <double, Matrix <double> > >(rbk, 0.01, 15)) { // now we train our object on a few samples of the sinc function. using (var m = Matrix <double> .CreateTemplateParameterizeMatrix(2, 1)) { for (double x = -15; x <= 8; x += 1) { m[0] = x; m[1] = Sinc(x); test.Train(m); } using (var rs = new RunningStats <double>()) { // Now let's output the distance from the centroid to some points that are from the sinc function. // These numbers should all be similar. We will also calculate the statistics of these numbers // by accumulating them into the running_stats object called rs. This will let us easily // find the mean and standard deviation of the distances for use below. Console.WriteLine("Points that are on the sinc function:"); m[0] = -1.5; m[1] = Sinc(m[0]); Console.WriteLine($" {test.Operator(m)}"); rs.Add(test.Operator(m)); m[0] = -1.5; m[1] = Sinc(m[0]); Console.WriteLine($" {test.Operator(m)}"); rs.Add(test.Operator(m)); m[0] = -0; m[1] = Sinc(m[0]); Console.WriteLine($" {test.Operator(m)}"); rs.Add(test.Operator(m)); m[0] = -0.5; m[1] = Sinc(m[0]); Console.WriteLine($" {test.Operator(m)}"); rs.Add(test.Operator(m)); m[0] = -4.1; m[1] = Sinc(m[0]); Console.WriteLine($" {test.Operator(m)}"); rs.Add(test.Operator(m)); m[0] = -1.5; m[1] = Sinc(m[0]); Console.WriteLine($" {test.Operator(m)}"); rs.Add(test.Operator(m)); m[0] = -0.5; m[1] = Sinc(m[0]); Console.WriteLine($" {test.Operator(m)}"); rs.Add(test.Operator(m)); Console.WriteLine(); // Let's output the distance from the centroid to some points that are NOT from the sinc function. // These numbers should all be significantly bigger than previous set of numbers. We will also // use the rs.scale() function to find out how many standard deviations they are away from the // mean of the test points from the sinc function. So in this case our criterion for "significantly bigger" // is > 3 or 4 standard deviations away from the above points that actually are on the sinc function. Console.WriteLine("Points that are NOT on the sinc function:"); m[0] = -1.5; m[1] = Sinc(m[0]) + 4; Console.WriteLine($" {test.Operator(m)} is {rs.Scale(test.Operator(m))} standard deviations from sinc."); m[0] = -1.5; m[1] = Sinc(m[0]) + 3; Console.WriteLine($" {test.Operator(m)} is {rs.Scale(test.Operator(m))} standard deviations from sinc."); m[0] = -0; m[1] = -Sinc(m[0]); Console.WriteLine($" {test.Operator(m)} is {rs.Scale(test.Operator(m))} standard deviations from sinc."); m[0] = -0.5; m[1] = -Sinc(m[0]); Console.WriteLine($" {test.Operator(m)} is {rs.Scale(test.Operator(m))} standard deviations from sinc."); m[0] = -4.1; m[1] = Sinc(m[0]) + 2; Console.WriteLine($" {test.Operator(m)} is {rs.Scale(test.Operator(m))} standard deviations from sinc."); m[0] = -1.5; m[1] = Sinc(m[0]) + 0.9; Console.WriteLine($" {test.Operator(m)} is {rs.Scale(test.Operator(m))} standard deviations from sinc."); m[0] = -0.5; m[1] = Sinc(m[0]) + 1; Console.WriteLine($" {test.Operator(m)} is {rs.Scale(test.Operator(m))} standard deviations from sinc."); // And finally print out the mean and standard deviation of points that are actually from sinc(). Console.WriteLine($"\nmean: {rs.Mean}"); Console.WriteLine($"standard deviation: {rs.StdDev}"); // The output is as follows: /* * Points that are on the sinc function: * 0.869913 * 0.869913 * 0.873408 * 0.872807 * 0.870432 * 0.869913 * 0.872807 * * Points that are NOT on the sinc function: * 1.06366 is 119.65 standard deviations from sinc. * 1.02212 is 93.8106 standard deviations from sinc. * 0.921382 is 31.1458 standard deviations from sinc. * 0.918439 is 29.3147 standard deviations from sinc. * 0.931428 is 37.3949 standard deviations from sinc. * 0.898018 is 16.6121 standard deviations from sinc. * 0.914425 is 26.8183 standard deviations from sinc. * * mean: 0.871313 * standard deviation: 0.00160756 */ // So we can see that in this example the kcentroid object correctly indicates that // the non-sinc points are definitely not points from the sinc function. } } } } }
private static void Main() { // Here we declare that our samples will be 2 dimensional column vectors. // (Note that if you don't know the dimensionality of your vectors at compile time // you can change the 2 to a 0 and then set the size at runtime) // Now we are making a typedef for the kind of kernel we want to use. I picked the // radial basis kernel because it only has one parameter and generally gives good // results without much fiddling. using (var rbk = new RadialBasisKernel <double, Matrix <double> >(0.1d, 2, 1)) { // Here we declare an instance of the kcentroid object. It is the object used to // represent each of the centers used for clustering. The kcentroid has 3 parameters // you need to set. The first argument to the constructor is the kernel we wish to // use. The second is a parameter that determines the numerical accuracy with which // the object will perform part of the learning algorithm. Generally, smaller values // give better results but cause the algorithm to attempt to use more dictionary vectors // (and thus run slower and use more memory). The third argument, however, is the // maximum number of dictionary vectors a kcentroid is allowed to use. So you can use // it to control the runtime complexity. using (var kc = new KCentroid <double, RadialBasisKernel <double, Matrix <double> > >(rbk, 0.01, 8)) { // Now we make an instance of the kkmeans object and tell it to use kcentroid objects // that are configured with the parameters from the kc object we defined above. using (var test = new KKMeans <double, RadialBasisKernel <double, Matrix <double> > >(kc)) { var samples = new List <Matrix <double> >(); using (var m = Matrix <double> .CreateTemplateParameterizeMatrix(2, 1)) using (var rnd = new Rand()) { // we will make 50 points from each class const int num = 50; // make some samples near the origin var radius = 0.5d; for (var i = 0; i < num; ++i) { double sign = 1; if (rnd.GetRandomDouble() < 0.5) { sign = -1; } m[0] = 2 * radius * rnd.GetRandomDouble() - radius; m[1] = sign * Math.Sqrt(radius * radius - m[0] * m[0]); // add this sample to our set of samples we will run k-means samples.Add(m.Clone()); } // make some samples in a circle around the origin but far away radius = 10.0; for (var i = 0; i < num; ++i) { double sign = 1; if (rnd.GetRandomDouble() < 0.5) { sign = -1; } m[0] = 2 * radius * rnd.GetRandomDouble() - radius; m[1] = sign * Math.Sqrt(radius * radius - m[0] * m[0]); // add this sample to our set of samples we will run k-means samples.Add(m.Clone()); } // make some samples in a circle around the point (25,25) radius = 4.0; for (var i = 0; i < num; ++i) { double sign = 1; if (rnd.GetRandomDouble() < 0.5) { sign = -1; } m[0] = 2 * radius * rnd.GetRandomDouble() - radius; m[1] = sign * Math.Sqrt(radius * radius - m[0] * m[0]); // translate this point away from the origin m[0] += 25; m[1] += 25; // add this sample to our set of samples we will run k-means samples.Add(m.Clone()); } // tell the kkmeans object we made that we want to run k-means with k set to 3. // (i.e. we want 3 clusters) test.NumberOfCenters = 3; // You need to pick some initial centers for the k-means algorithm. So here // we will use the dlib::pick_initial_centers() function which tries to find // n points that are far apart (basically). var initialCenters = Dlib.PickInitialCenters(3, samples, test.Kernel); // now run the k-means algorithm on our set of samples. test.Train(samples, initialCenters); // now loop over all our samples and print out their predicted class. In this example // all points are correctly identified. for (var i = 0; i < samples.Count / 3; ++i) { Console.Write($"{test.Operator(samples[i])} "); Console.Write($"{test.Operator(samples[i + num])} "); Console.WriteLine($"{test.Operator(samples[i + 2 * num])}"); } // Now print out how many dictionary vectors each center used. Note that // the maximum number of 8 was reached. If you went back to the kcentroid // constructor and changed the 8 to some bigger number you would see that these // numbers would go up. However, 8 is all we need to correctly cluster this dataset. Console.WriteLine($"num dictionary vectors for center 0: {test.GetKCentroid(0).DictionarySize}"); Console.WriteLine($"num dictionary vectors for center 1: {test.GetKCentroid(1).DictionarySize}"); Console.WriteLine($"num dictionary vectors for center 2: {test.GetKCentroid(2).DictionarySize}"); // Finally, we can also solve the same kind of non-linear clustering problem with // spectral_cluster(). The output is a vector that indicates which cluster each sample // belongs to. Just like with kkmeans, it assigns each point to the correct cluster. using (var tmp = new RadialBasisKernel <double, Matrix <double> >(0.1, 2, 1)) { var assignments = Dlib.SpectralCluster(tmp, samples, 3); using (var mat = Dlib.Mat(assignments)) Console.WriteLine($"{mat}"); } } samples.DisposeElement(); } } } }