/// <summary> /// Learns a <see cref="KMeans"/> model that can map the given inputs to the desired outputs. /// </summary> /// <param name="k">The number of clusters.</param> /// <param name="seeding">The cluster initialization algorithm.</param> /// <param name="maxiter">The maximum number of iterations.</param> /// <param name="distance">The distance function.</param> /// <param name="x">The data points <paramref name="x"/> to clusterize.</param> /// <param name="weights">The <c>weight</c> of importance for each data point.</param> /// <param name="cancellationToken">The cancellationToken token used to notify the classifier that the operation should be canceled.</param> /// <returns> /// The <see cref="KMeans"/> clusterizer learned by this method. /// </returns> /// <exception cref="ArgumentNullException"> /// <para><paramref name="x"/> is <b>null</b>.</para> /// <para>-or-</para> /// <para><paramref name="distance"/> is <b>null</b>.</para> /// </exception> /// <exception cref="ArgumentException"> /// <para><paramref name="weights"/> is not <b>null</b> and the number of elements in <paramref name="weights"/> does not match the number of elements in <paramref name="x"/>.</para> /// </exception> public static KMeans Learn( int k, KMeansSeeding seeding, int maxiter, IVectorDistance <float, IVector <float>, float> distance, IList <IVector <float> > x, IList <float> weights, CancellationToken cancellationToken) { if (x == null) { throw new ArgumentNullException(nameof(x)); } if (weights != null && weights.Count != x.Count) { throw new ArgumentException("The number of weights must match the number of input vectors.", nameof(weights)); } int sampleCount = x.Count; int dimension = x[0].Length; KMeansClusterCollection clusters = new KMeansClusterCollection(k, dimension, distance); switch (seeding) { case KMeansSeeding.KMeansPlusPlus: clusters.KMeansPlusPlusSeeding(x, weights, cancellationToken); break; default: clusters.RandomSeeding(x, weights, cancellationToken); break; } float[] counts = new float[k]; float[] means = new float[k * dimension]; object sync = new object(); for (int iter = 0; iter < maxiter; iter++) { cancellationToken.ThrowIfCancellationRequested(); // reset means and counts if (iter > 0) { Vectors.Set(counts.Length, 0.0f, counts, 0); Vectors.Set(means.Length, 0.0f, means, 0); } // assign vectors to new clusters CommonParallel.For( 0, sampleCount, (a, b) => { float[] lcounts = new float[counts.Length]; float[] lmeans = new float[means.Length]; for (int i = a; i < b; i++) { int index = clusters.Assign(x[i]); float weight = weights?[i] ?? 1.0f; lcounts[index] += weight; x[i].AddProductC(weight, lmeans, index * dimension); } lock (sync) { Mathematics.Add(lcounts.Length, lcounts, 0, counts, 0); Mathematics.Add(lmeans.Length, lmeans, 0, means, 0); } }, new ParallelOptions()); // calculate new centroids for (int i = 0, off = 0; i < k; i++, off += dimension) { if (counts[i] != 0) { Mathematics.DivC(dimension, means, off, counts[i], clusters[i].Centroid, 0); } } } return(new KMeans(clusters) { Seeding = seeding, }); }
/// <summary> /// Initializes a new instance of the <see cref="KMeans"/> class. /// </summary> /// <param name="clusters">The collection of clusters.</param> /// <exception cref="ArgumentNullException"> /// <paramref name="clusters"/> is <b>null</b>. /// </exception> private KMeans(KMeansClusterCollection clusters) { this.clusters = clusters ?? throw new ArgumentNullException(nameof(clusters)); }