Exemplo n.º 1
0
        public static void Quadratic(
            int imin, int imax, int q,
            number[][] S, int[][] J,
            number[] sum_x, number[] sum_x_sq, number[] sum_w, number[] sum_w_sq,
            DissimilarityType criterion)
        {
            // Assumption: each cluster must have at least one point.
            for (int i = imin; i <= imax; ++i)
            {
                S[q][i] = S[q - 1][i - 1];
                J[q][i] = i;

                int jmin = Math.Max(q, (int)J[q - 1][i]);

                for (int j = i - 1; j >= jmin; --j)
                {
                    number Sj = S[q - 1][j - 1] + WithinCluster.Dissimilarity(criterion, j, i, sum_x, sum_x_sq, sum_w, sum_w_sq);

                    // ssq(j, i, sum_x, sum_x_sq, sum_w)

                    if (Sj < S[q][i])
                    {
                        S[q][i] = Sj;
                        J[q][i] = j;
                    }
                }
            }
        }
Exemplo n.º 2
0
        public static void SMAWK(
            int imin, int imax, int q,
            number[][] S, int[][] J,
            number[] sum_x, number[] sum_x_sq, number[] sum_w, number[] sum_w_sq,
            DissimilarityType criterion)
        {
            int[] js = new int[imax - q + 1];

            int abs = (q);

            for (int iter = 0; iter < js.Length; iter++)
            {
                js[iter] = abs++;
            }

            SMAWK(imin, imax, 1, q, js, S, J, sum_x, sum_x_sq, sum_w, sum_w_sq, criterion);
        }
Exemplo n.º 3
0
        private static void FindMinFromCandidates(
            int imin, int imax, int istep, int q,
            int[] js,
            number[][] S, int[][] J,
            number[] sum_x, number[] sum_x_sq, number[] sum_w, number[] sum_w_sq,
            DissimilarityType criterion)
        {
            int rmin_prev = 0;

            for (int i = (imin); i <= imax; i += istep)
            {
                int rmin = rmin_prev;

                // Initialization of S[q][i] and J[q][i]
                S[q][i] = S[q - 1][js[rmin] - 1] + WithinCluster.Dissimilarity(criterion, js[rmin], i, sum_x, sum_x_sq, sum_w, sum_w_sq);
                // ssq(js[rmin], i, sum_x, sum_x_sq, sum_w);
                J[q][i] = js[rmin];

                for (int r = (rmin + 1); r < js.Length; ++r)
                {
                    int j_abs = js[r];

                    if (j_abs < J[q - 1][i])
                    {
                        continue;
                    }
                    if (j_abs > i)
                    {
                        break;
                    }

                    number Sj = (S[q - 1][j_abs - 1] + WithinCluster.Dissimilarity(criterion, j_abs, i, sum_x, sum_x_sq, sum_w, sum_w_sq));
                    // ssq(j_abs, i, sum_x, sum_x_sq, sum_w));
                    if (Sj <= S[q][i])
                    {
                        S[q][i]   = Sj;
                        J[q][i]   = js[r];
                        rmin_prev = r;
                    }
                }
            }
        }
Exemplo n.º 4
0
        public static number Dissimilarity(DissimilarityType disType, int j, int i,
                                           number[] sum_x, number[] sum_x_sq, number[] sum_w, number[] sum_w_sq = null)
        {
            number d = 0;

            switch (disType)
            {
            case DissimilarityType.L1:
                d = SABS(j, i, sum_x, sum_w);
                break;

            case DissimilarityType.L2:
                d = SSQ(j, i, sum_x, sum_x_sq, sum_w);
                break;

            case DissimilarityType.L2Y:
                d = SSQ(j, i, sum_w, sum_w_sq);
                break;
            }
            return(d);
        }
Exemplo n.º 5
0
        public static void FillMatrix(number[] x, number[] w, number[][] S, int[][] J, Method method, DissimilarityType criterion)
        {
            /*
             * x: One dimension vector to be clustered, must be sorted (in any order).
             * S: K x N matrix. S[q][i] is the sum of squares of the distance from
             * each x[i] to its cluster mean when there are exactly x[i] is the
             * last point in cluster q
             * J: K x N backtrack matrix
             *
             * NOTE: All vector indices in this program start at position 0
             */

            int K = S.Length;
            int N = S[0].Length;

            number[] sum_x    = new number[N];
            number[] sum_x_sq = new number[N];

            number[] sum_w    = null;
            number[] sum_w_sq = null;

            int[] jseq = new int[N];

            number shift = x[N / 2]; // median. used to shift the values of x to

            //  improve numerical stability

            if (w == null || w.Length == 0)
            {
                // equally weighted
                sum_x[0]    = x[0] - shift;
                sum_x_sq[0] = (x[0] - shift) * (x[0] - shift);
            }
            else
            { // unequally weighted
                sum_x[0]    = w[0] * (x[0] - shift);
                sum_x_sq[0] = w[0] * (x[0] - shift) * (x[0] - shift);

                sum_w    = new number[N];
                sum_w_sq = new number[N];

                sum_w[0]    = w[0];
                sum_w_sq[0] = w[0] * w[0];
            }

            S[0][0] = 0;
            J[0][0] = 0;

            for (int i = 1; i < N; ++i)
            {
                if (w == null || w.Length == 0)
                { // equally weighted
                    sum_x[i]    = sum_x[i - 1] + x[i] - shift;
                    sum_x_sq[i] = sum_x_sq[i - 1] + (x[i] - shift) * (x[i] - shift);
                }
                else
                { // unequally weighted
                    sum_x[i]    = sum_x[i - 1] + w[i] * (x[i] - shift);
                    sum_x_sq[i] = sum_x_sq[i - 1] + w[i] * (x[i] - shift) * (x[i] - shift);
                    sum_w[i]    = sum_w[i - 1] + w[i];
                    sum_w_sq[i] = sum_w_sq[i - 1] + w[i] * w[i];
                }

                // Initialize for q = 0
                S[0][i] = WithinCluster.Dissimilarity(criterion, 0, i, sum_x, sum_x_sq, sum_w, sum_w_sq); // ssq(0, i, sum_x, sum_x_sq, sum_w);
                J[0][i] = 0;
            }

            /*
             #if DEBUG
             *          for (int i = 0; i < x.Length; ++i)
             *          {
             *              Console.Write(x[i] + ",");
             *          }
             *          Console.WriteLine();
             #endif
             */
            for (int q = 1; q < K; ++q)
            {
                int imin;
                if (q < K - 1)
                {
                    imin = Math.Max(1, q);
                }
                else
                {
                    // No need to compute S[K-1][0] ... S[K-1][N-2]
                    imin = N - 1;
                }

                /*
                 # ifdef DEBUG
                 #              // std::cout << std::endl << "q=" << q << ":" << std::endl;
                 #endif
                 */
                // fill_row_k_linear_recursive(imin, N-1, 1, q, jseq, S, J, sum_x, sum_x_sq);
                // fill_row_k_linear(imin, N-1, q, S, J, sum_x, sum_x_sq);
                if (method == Method.Linear)
                {
                    Fill.SMAWK(imin, N - 1, q, S, J, sum_x, sum_x_sq, sum_w, sum_w_sq, criterion);
                }
                else if (method == Method.LogLinear)
                {
                    Fill.LogLinear(imin, N - 1, q, q, N - 1, S, J, sum_x, sum_x_sq, sum_w, sum_w_sq, criterion);
                }
                else if (method == Method.Quadratic)
                {
                    Fill.Quadratic(imin, N - 1, q, S, J, sum_x, sum_x_sq, sum_w, sum_w_sq, criterion);
                }
                else
                {
                    throw new Exception("ERROR: unknown method " + method + "!");
                }

                /*
                 #if DEBUG
                 *
                 * fill_row_q_log_linear(imin, N - 1, q, q, N - 1, SS, JJ, sum_x, sum_x_sq, sum_w, sum_w_sq, criterion);
                 *
                 * for (int i = imin; i < N; ++i)
                 * {
                 *  if (S[q][i] != SS[q][i] || J[q][i] != JJ[q][i])
                 *  {
                 *      std::cout << "ERROR: q=" << q << ", i=" << i << std::endl;
                 *      std::cout << "\tS=" << S[q][i] << "\tJ=" << J[q][i] << std::endl;
                 *      std::cout << "Truth\tSS=" << SS[q][i] << "\tJJ=" << JJ[q][i];
                 *      std::cout << std::endl;
                 *      assert(false);
                 *
                 *  }
                 *  else
                 *  {
                 *
                 *          std::cout << "OK: q=" << q << ", i=" << i << std::endl;
                 *          std::cout << "\tS=" << S[q][i] << "\tJ=" << J[q][i] << std::endl;
                 *          std::cout << "Truth\tSS=" << SS[q][i] << "\tJJ=" << JJ[q][i];
                 *          std::cout << std::endl;
                 *
                 *          }
                 *
                 *      }
                 #endif
                 */
            }

/*
 # ifdef DEBUG
 #          std::cout << "Linear & log-linear code returned identical dp index matrix."
 #                    << std::endl;
 #endif
 */
        }
Exemplo n.º 6
0
        private static void KMeans(number[] x, number[] y, int Kmin, int Kmax, out int[] clusters, out number[] centers, out number[] withinss, out number[] size, out double[] BIC, Method method, DissimilarityType criterion)
        {
            // Input:
            // x -- an array of double precision numbers, not necessarily sorted
            // Kmin -- the minimum number of clusters expected
            // Kmax -- the maximum number of clusters expected
            // NOTE: All vectors in this program is considered starting at position 0.

            int N = x.Length;

            clusters = new int[N];
            BIC      = new double[Kmax - Kmin];

            int[] order = new int[N];

            for (int i = 0; i < order.Length; ++i)
            {
                order[i] = i;
            }

            bool is_sorted = true;

            for (int i = 0; i < N - 1; ++i)
            {
                if (x[i] > x[i + 1])
                {
                    is_sorted = false;
                    break;
                }
            }

            number[] x_sorted = null;

            number[] y_sorted            = null;
            bool     is_equally_weighted = true;

            if (!is_sorted)
            {
                x_sorted = new number[x.Length];

                Array.Copy(x, x_sorted, x.Length);
                Array.Sort(x_sorted, order);

                for (int i = 0; i < x_sorted.Length; i++)
                {
                    x_sorted[i] = x[order[i]];
                }
            }
            else
            {
                x_sorted = x;
            }

            // check to see if unequal weight is provided
            if (y != null)
            {
                is_equally_weighted = true;
                for (int i = 1; i < N; ++i)
                {
                    if (y[i] != y[i - 1])
                    {
                        is_equally_weighted = false;
                        break;
                    }
                }
            }

            if (!is_equally_weighted)
            {
                y_sorted = new number[N];

                for (int i = 0; i < N; ++i)
                {
                    y_sorted[i] = y[order[i]];
                }
            }
            else
            {
                y = null;
            }

            int nUnique = 1;

            if (N == 0)
            {
                nUnique = 0;
            }

            if (N > 1)
            {
                for (int i = 1; i < N; i++)
                {
                    if (x_sorted[i - 1] != x_sorted[i])
                    {
                        nUnique++;
                    }
                }
            }

            Kmax = nUnique < Kmax ? nUnique : Kmax;

            if (nUnique > 1)
            { // The case when not all elements are equal.
                number[][] S = new number[Kmax][];

                for (int i = 0; i < Kmax; i++)
                {
                    S[i] = new number[N];
                }

                int[][] J = new int[Kmax][];

                for (int i = 0; i < Kmax; i++)
                {
                    J[i] = new int[N];
                }

                int Kopt;

                DynamicProgramming.FillMatrix(x_sorted, y_sorted, S, J, method, criterion);

                // Fill in dynamic programming matrix
                if (is_equally_weighted)
                {
                    Kopt = NonWeighted.SelectLevels(x_sorted, J, Kmin, Kmax, BIC);
                }
                else
                {
                    switch (criterion)
                    {
                    case DissimilarityType.L2Y:
                        Kopt = NonWeighted.SelectLevels(y_sorted, J, Kmin, Kmax, BIC);
                        break;

                    default:
                        Kopt = Weighted.SelectLevels(x_sorted, y_sorted, J, Kmin, Kmax, BIC);

                        break;
                    }
                }

                centers  = new number[Kopt];
                withinss = new number[Kopt];
                size     = new number[Kopt];

                if (Kopt < Kmax)
                { // Reform the dynamic programming matrix S and J
                    Array.Resize(ref J, Kopt);
                }

                int[] cluster_sorted = new int[N];

                // Backtrack to find the clusters beginning and ending indices
                if (is_equally_weighted && criterion == DissimilarityType.L1)
                {
                    DynamicProgramming.BacktrackL1(x_sorted, J, cluster_sorted, centers, withinss, size);
                }
                else if (is_equally_weighted && criterion == DissimilarityType.L2)
                {
                    DynamicProgramming.Backtrack(x_sorted, J, cluster_sorted, centers, withinss, size);
                }
                else if (criterion == DissimilarityType.L2Y)
                {
                    DynamicProgramming.BacktrackL2Y(x_sorted, y_sorted, J, cluster_sorted, centers, withinss, size);
                }
                else
                {
                    DynamicProgramming.BacktrackWeighted(x_sorted, y_sorted, J, cluster_sorted, centers, withinss, size);
                }

                /*#if DEBUG
                 *              std::cout << "backtrack done." << std::endl;
                 #endif*/

                for (int i = 0; i < N; ++i)
                {
                    // Obtain clustering on data in the original order
                    clusters[order[i]] = cluster_sorted[i];
                }
            }
            else
            {
                // A single cluster that contains all elements
                for (int i = 0; i < N; ++i)
                {
                    clusters[i] = 0;
                }


                centers  = new number[1];
                withinss = new number[1];
                size     = new number[1];

                centers[0]  = x[0];
                withinss[0] = 0;
                size[0]     = N * (is_equally_weighted ? 1 : y[0]);
            }
        }
Exemplo n.º 7
0
        public static void LogLinear(
            int imin, int imax, int q,
            int jmin, int jmax,
            number[][] S, int[][] J,
            number[] sum_x, number[] sum_x_sq, number[] sum_w, number[] sum_w_sq,
            DissimilarityType criterion)
        {
            if (imin > imax)
            {
                return;
            }

            int N = S[0].Length;
            int i = (imin + imax) / 2;

#if DEBUG
            // std::cout << "  i=" << i << ": ";
#endif
            // Initialization of S[q][i]:
            S[q][i] = S[q - 1][i - 1];
            J[q][i] = i;

            int jlow = q; // the lower end for j

            if (imin > q)
            {
                // jlow = std::max(jlow, (int)J[q][imin-1]);
                jlow = Math.Max(jlow, jmin);
            }
            jlow = Math.Max(jlow, J[q - 1][i]);

            int jhigh = i - 1; // the upper end for j
            if (imax < N - 1)
            {
                // jhigh = std::min(jhigh, (int)J[q][imax+1]);
                jhigh = Math.Min(jhigh, jmax);
            }

#if DEBUG
            // std::cout << "    j-=" << jlow << ", j+=" << jhigh << ": ";
#endif

            for (int j = jhigh; j >= jlow; --j)
            {
                // compute s(j,i)
                number sji = WithinCluster.SSQ(j, i, sum_x, sum_x_sq, sum_w);

                // MS May 11, 2016 Added:
                if (sji + S[q - 1][jlow - 1] >= S[q][i])
                {
                    break;
                }

                // Examine the lower bound of the cluster border
                // compute s(jlow, i)
                number sjlowi = WithinCluster.Dissimilarity(criterion, jlow, i, sum_x, sum_x_sq, sum_w, sum_w_sq);
                // ssq(jlow, i, sum_x, sum_x_sq, sum_w);

                number SSQ_jlow = sjlowi + S[q - 1][jlow - 1];

                if (SSQ_jlow < S[q][i])
                {
                    // shrink the lower bound
                    S[q][i] = SSQ_jlow;
                    J[q][i] = jlow;
                }

                jlow++;

                number SSQ_j = sji + S[q - 1][j - 1];
                if (SSQ_j < S[q][i])
                {
                    S[q][i] = SSQ_j;
                    J[q][i] = j;
                }
            }

#if DEBUG
            //std::cout << // " q=" << q << ": " <<
            //  "\t" << S[q][i] << "\t" << J[q][i];
            //std::cout << std::endl;
#endif

            jmin = (imin > q) ? (int)J[q][imin - 1] : q;
            jmax = (int)J[q][i];

            LogLinear(imin, i - 1, q, jmin, jmax,
                      S, J, sum_x, sum_x_sq, sum_w,
                      sum_w_sq, criterion);

            jmin = (int)J[q][i];
            jmax = (imax < N - 1) ? (int)J[q][imax + 1] : imax;
            LogLinear(i + 1, imax, q, jmin, jmax,
                      S, J, sum_x, sum_x_sq, sum_w,
                      sum_w_sq, criterion);
        }
Exemplo n.º 8
0
        private static void SMAWK(
            int imin, int imax, int istep, int q,
            int[] js,
            number[][] S, int[][] J,
            number[] sum_x, number[] sum_x_sq, number[] sum_w, number[] sum_w_sq,
            DissimilarityType criterion)
        {
#if DEBUG_REDUCE
            std::cout << "i:" << '[' << imin << ',' << imax << ']' << '+' << istep
                      << std::endl;
#endif

            if (imax - imin <= 0 * istep)
            { // base case only one element left
                FindMinFromCandidates(
                    imin, imax, istep, q, js, S, J, sum_x, sum_x_sq, sum_w,
                    sum_w_sq, criterion
                    );
            }
            else
            {
                // REDUCE

#if DEBUG_REDUCE
                std::cout << "js:";
                for (size_t l = 0; l < js.size(); ++l)
                {
                    std::cout << js[l] << ",";
                }
                std::cout << std::endl;
                std::cout << std::endl;
#endif
                int[] js_odd;

                ReduceInPlace(imin, imax, istep, q, js, out js_odd,
                              S, J, sum_x, sum_x_sq, sum_w,
                              sum_w_sq, criterion);

                int istepx2  = (istep << 1);
                int imin_odd = (imin + istep);
                int imax_odd = (imin_odd + (imax - imin_odd) / istepx2 * istepx2);

                // Recursion on odd rows (0-based):
                SMAWK(imin_odd, imax_odd, istepx2,
                      q, js_odd, S, J, sum_x, sum_x_sq, sum_w,
                      sum_w_sq, criterion);

#if DEBUG_REDUCE
                std::cout << "js_odd (reduced):";
                for (size_t l = 0; l < js_odd.size(); ++l)
                {
                    std::cout << js_odd[l] << ",";
                }
                std::cout << std::endl << std::endl;

                std::cout << "even pos:";
                for (int i = imin; i < imax; i += istepx2)
                {
                    std::cout << i << ",";
                }
                std::cout << std::endl << std::endl;
#endif

                FillEvenPositions(imin, imax, istep, q, js,
                                  S, J, sum_x, sum_x_sq, sum_w,
                                  sum_w_sq, criterion);
            }
        }
Exemplo n.º 9
0
        private static void FillEvenPositions(
            int imin, int imax, int istep, int q,
            int[] js,
            number[][] S, int[][] J,
            number[] sum_x, number[] sum_x_sq, number[] sum_w, number[] sum_w_sq,
            DissimilarityType criterion)
        {
            // Derive j for even rows (0-based)
            int n       = js.Length;
            int istepx2 = (istep << 1);
            int jl      = js[0];

            for (int i = imin, r = 0; i <= imax; i += istepx2)
            {
                // auto jmin = (i == imin) ? js[0] : J[q][i - istep];
                while (js[r] < jl)
                {
                    // Increase r until it points to a value of at least jmin
                    r++;
                }

                // Initialize S[q][i] and J[q][i]
                S[q][i] = S[q - 1][js[r] - 1] +
                          WithinCluster.Dissimilarity(criterion, js[r], i, sum_x, sum_x_sq, sum_w, sum_w_sq);
                // ssq(js[r], i, sum_x, sum_x_sq, sum_w);
                J[q][i] = js[r]; // rmin

                // Look for minimum S upto jmax within js
                int jh = (i + istep <= imax)
                  ? J[q][i + istep] : js[n - 1];

                int jmax = Math.Min(jh, i);

                number sjimin = WithinCluster.Dissimilarity(criterion, jmax, i, sum_x, sum_x_sq, sum_w, sum_w_sq);
                // ssq(jmax, i, sum_x, sum_x_sq, sum_w)


                for (++r; r < n && js[r] <= jmax; r++)
                {
                    int jabs = js[r];

                    if (jabs > i)
                    {
                        break;
                    }

                    if (jabs < J[q - 1][i])
                    {
                        continue;
                    }

                    number s = WithinCluster.Dissimilarity(criterion, jabs, i, sum_x, sum_x_sq, sum_w, sum_w_sq);
                    // (ssq(jabs, i, sum_x, sum_x_sq, sum_w));
                    number Sj = (S[q - 1][jabs - 1] + s);

                    if (Sj <= S[q][i])
                    {
                        S[q][i] = Sj;
                        J[q][i] = js[r];
                    }
                    else if (S[q - 1][jabs - 1] + sjimin > S[q][i])
                    {
                        break;
                    }

                    /*else if(S[q-1][js[rmin]-1] + s > S[q][i]) {
                     *  break;
                     * }*/
                }

                r--;
                jl = jh;
            }
        }
Exemplo n.º 10
0
        //SMAWK

        private static void ReduceInPlace(
            int imin, int imax, int istep, int q,
            int[] js, out int[] js_red,
            number[][] S, int[][] J,
            number[] sum_x, number[] sum_x_sq, number[] sum_w, number[] sum_w_sq,
            DissimilarityType criterion)
        {
            int N = (imax - imin) / istep + 1;

            js_red = js;

            if (N >= js.Length)
            {
                return;
            }

            // Two positions to move candidate j's back and forth
            int left  = -1; // points to last favorable position / column
            int right = 0;  // points to current position / column

            int m = js_red.Length;

            while (m > N)
            { // js_reduced has more than N positions / columns
                int p = (left + 1);

                int    i  = (imin + p * istep);
                int    j  = (js_red[right]);
                number Sl = (S[q - 1][j - 1] + WithinCluster.Dissimilarity(criterion, j, i, sum_x, sum_x_sq, sum_w, sum_w_sq));
                // ssq(j, i, sum_x, sum_x_sq, sum_w));

                int    jplus1  = (js_red[right + 1]);
                number Slplus1 = (S[q - 1][jplus1 - 1] + WithinCluster.Dissimilarity(criterion, jplus1, i, sum_x, sum_x_sq, sum_w, sum_w_sq));
                // ssq(jplus1, i, sum_x, sum_x_sq, sum_w));

                if (Sl < Slplus1 && p < N - 1)
                {
                    js_red[++left] = j; // i += istep;
                    right++;            // move on to next position / column p+1
                }
                else if (Sl < Slplus1 && p == N - 1)
                {
                    js_red[++right] = j; // delete position / column p+1
                    m--;
                }
                else
                {     // (Sl >= Slplus1)
                    if (p > 0)
                    { // i > imin
                      // delete position / column p and
                      //   move back to previous position / column p-1:
                        js_red[right] = js_red[left--];
                        // p --; // i -= istep;
                    }
                    else
                    {
                        right++; // delete position / column 0
                    }
                    m--;
                }
            }

            for (int r = (left + 1); r < m; ++r)
            {
                js_red[r] = js_red[right++];
            }

            Array.Resize(ref js_red, m);

            return;
        }