Exemple #1
0
        /// <summary>
        /// Learns a <see cref="KMeans"/> model that can map the given inputs to the desired outputs.
        /// </summary>
        /// <param name="k">The number of clusters.</param>
        /// <param name="seeding">The cluster initialization algorithm.</param>
        /// <param name="maxiter">The maximum number of iterations.</param>
        /// <param name="distance">The distance function.</param>
        /// <param name="x">The data points <paramref name="x"/> to clusterize.</param>
        /// <param name="weights">The <c>weight</c> of importance for each data point.</param>
        /// <param name="cancellationToken">The cancellationToken token used to notify the classifier that the operation should be canceled.</param>
        /// <returns>
        /// The <see cref="KMeans"/> clusterizer learned by this method.
        /// </returns>
        /// <exception cref="ArgumentNullException">
        /// <para><paramref name="x"/> is <b>null</b>.</para>
        /// <para>-or-</para>
        /// <para><paramref name="distance"/> is <b>null</b>.</para>
        /// </exception>
        /// <exception cref="ArgumentException">
        /// <para><paramref name="weights"/> is not <b>null</b> and the number of elements in <paramref name="weights"/> does not match the number of elements in <paramref name="x"/>.</para>
        /// </exception>
        public static KMeans Learn(
            int k,
            KMeansSeeding seeding,
            int maxiter,
            IVectorDistance <float, IVector <float>, float> distance,
            IList <IVector <float> > x,
            IList <float> weights,
            CancellationToken cancellationToken)
        {
            if (x == null)
            {
                throw new ArgumentNullException(nameof(x));
            }

            if (weights != null && weights.Count != x.Count)
            {
                throw new ArgumentException("The number of weights must match the number of input vectors.", nameof(weights));
            }

            int sampleCount = x.Count;
            int dimension   = x[0].Length;

            KMeansClusterCollection clusters = new KMeansClusterCollection(k, dimension, distance);

            switch (seeding)
            {
            case KMeansSeeding.KMeansPlusPlus:
                clusters.KMeansPlusPlusSeeding(x, weights, cancellationToken);
                break;

            default:
                clusters.RandomSeeding(x, weights, cancellationToken);
                break;
            }

            float[] counts = new float[k];
            float[] means  = new float[k * dimension];
            object  sync   = new object();

            for (int iter = 0; iter < maxiter; iter++)
            {
                cancellationToken.ThrowIfCancellationRequested();

                // reset means and counts
                if (iter > 0)
                {
                    Vectors.Set(counts.Length, 0.0f, counts, 0);
                    Vectors.Set(means.Length, 0.0f, means, 0);
                }

                // assign vectors to new clusters
                CommonParallel.For(
                    0,
                    sampleCount,
                    (a, b) =>
                {
                    float[] lcounts = new float[counts.Length];
                    float[] lmeans  = new float[means.Length];

                    for (int i = a; i < b; i++)
                    {
                        int index    = clusters.Assign(x[i]);
                        float weight = weights?[i] ?? 1.0f;

                        lcounts[index] += weight;
                        x[i].AddProductC(weight, lmeans, index * dimension);
                    }

                    lock (sync)
                    {
                        Mathematics.Add(lcounts.Length, lcounts, 0, counts, 0);
                        Mathematics.Add(lmeans.Length, lmeans, 0, means, 0);
                    }
                },
                    new ParallelOptions());

                // calculate new centroids
                for (int i = 0, off = 0; i < k; i++, off += dimension)
                {
                    if (counts[i] != 0)
                    {
                        Mathematics.DivC(dimension, means, off, counts[i], clusters[i].Centroid, 0);
                    }
                }
            }

            return(new KMeans(clusters)
            {
                Seeding = seeding,
            });
        }
Exemple #2
0
 /// <summary>
 /// Initializes a new instance of the <see cref="KMeans"/> class.
 /// </summary>
 /// <param name="clusters">The collection of clusters.</param>
 /// <exception cref="ArgumentNullException">
 /// <paramref name="clusters"/> is <b>null</b>.
 /// </exception>
 private KMeans(KMeansClusterCollection clusters)
 {
     this.clusters = clusters ?? throw new ArgumentNullException(nameof(clusters));
 }