public static void Quadratic( int imin, int imax, int q, number[][] S, int[][] J, number[] sum_x, number[] sum_x_sq, number[] sum_w, number[] sum_w_sq, DissimilarityType criterion) { // Assumption: each cluster must have at least one point. for (int i = imin; i <= imax; ++i) { S[q][i] = S[q - 1][i - 1]; J[q][i] = i; int jmin = Math.Max(q, (int)J[q - 1][i]); for (int j = i - 1; j >= jmin; --j) { number Sj = S[q - 1][j - 1] + WithinCluster.Dissimilarity(criterion, j, i, sum_x, sum_x_sq, sum_w, sum_w_sq); // ssq(j, i, sum_x, sum_x_sq, sum_w) if (Sj < S[q][i]) { S[q][i] = Sj; J[q][i] = j; } } } }
private static void FindMinFromCandidates( int imin, int imax, int istep, int q, int[] js, number[][] S, int[][] J, number[] sum_x, number[] sum_x_sq, number[] sum_w, number[] sum_w_sq, DissimilarityType criterion) { int rmin_prev = 0; for (int i = (imin); i <= imax; i += istep) { int rmin = rmin_prev; // Initialization of S[q][i] and J[q][i] S[q][i] = S[q - 1][js[rmin] - 1] + WithinCluster.Dissimilarity(criterion, js[rmin], i, sum_x, sum_x_sq, sum_w, sum_w_sq); // ssq(js[rmin], i, sum_x, sum_x_sq, sum_w); J[q][i] = js[rmin]; for (int r = (rmin + 1); r < js.Length; ++r) { int j_abs = js[r]; if (j_abs < J[q - 1][i]) { continue; } if (j_abs > i) { break; } number Sj = (S[q - 1][j_abs - 1] + WithinCluster.Dissimilarity(criterion, j_abs, i, sum_x, sum_x_sq, sum_w, sum_w_sq)); // ssq(j_abs, i, sum_x, sum_x_sq, sum_w)); if (Sj <= S[q][i]) { S[q][i] = Sj; J[q][i] = js[r]; rmin_prev = r; } } } }
public static void FillMatrix(number[] x, number[] w, number[][] S, int[][] J, Method method, DissimilarityType criterion) { /* * x: One dimension vector to be clustered, must be sorted (in any order). * S: K x N matrix. S[q][i] is the sum of squares of the distance from * each x[i] to its cluster mean when there are exactly x[i] is the * last point in cluster q * J: K x N backtrack matrix * * NOTE: All vector indices in this program start at position 0 */ int K = S.Length; int N = S[0].Length; number[] sum_x = new number[N]; number[] sum_x_sq = new number[N]; number[] sum_w = null; number[] sum_w_sq = null; int[] jseq = new int[N]; number shift = x[N / 2]; // median. used to shift the values of x to // improve numerical stability if (w == null || w.Length == 0) { // equally weighted sum_x[0] = x[0] - shift; sum_x_sq[0] = (x[0] - shift) * (x[0] - shift); } else { // unequally weighted sum_x[0] = w[0] * (x[0] - shift); sum_x_sq[0] = w[0] * (x[0] - shift) * (x[0] - shift); sum_w = new number[N]; sum_w_sq = new number[N]; sum_w[0] = w[0]; sum_w_sq[0] = w[0] * w[0]; } S[0][0] = 0; J[0][0] = 0; for (int i = 1; i < N; ++i) { if (w == null || w.Length == 0) { // equally weighted sum_x[i] = sum_x[i - 1] + x[i] - shift; sum_x_sq[i] = sum_x_sq[i - 1] + (x[i] - shift) * (x[i] - shift); } else { // unequally weighted sum_x[i] = sum_x[i - 1] + w[i] * (x[i] - shift); sum_x_sq[i] = sum_x_sq[i - 1] + w[i] * (x[i] - shift) * (x[i] - shift); sum_w[i] = sum_w[i - 1] + w[i]; sum_w_sq[i] = sum_w_sq[i - 1] + w[i] * w[i]; } // Initialize for q = 0 S[0][i] = WithinCluster.Dissimilarity(criterion, 0, i, sum_x, sum_x_sq, sum_w, sum_w_sq); // ssq(0, i, sum_x, sum_x_sq, sum_w); J[0][i] = 0; } /* #if DEBUG * for (int i = 0; i < x.Length; ++i) * { * Console.Write(x[i] + ","); * } * Console.WriteLine(); #endif */ for (int q = 1; q < K; ++q) { int imin; if (q < K - 1) { imin = Math.Max(1, q); } else { // No need to compute S[K-1][0] ... S[K-1][N-2] imin = N - 1; } /* # ifdef DEBUG # // std::cout << std::endl << "q=" << q << ":" << std::endl; #endif */ // fill_row_k_linear_recursive(imin, N-1, 1, q, jseq, S, J, sum_x, sum_x_sq); // fill_row_k_linear(imin, N-1, q, S, J, sum_x, sum_x_sq); if (method == Method.Linear) { Fill.SMAWK(imin, N - 1, q, S, J, sum_x, sum_x_sq, sum_w, sum_w_sq, criterion); } else if (method == Method.LogLinear) { Fill.LogLinear(imin, N - 1, q, q, N - 1, S, J, sum_x, sum_x_sq, sum_w, sum_w_sq, criterion); } else if (method == Method.Quadratic) { Fill.Quadratic(imin, N - 1, q, S, J, sum_x, sum_x_sq, sum_w, sum_w_sq, criterion); } else { throw new Exception("ERROR: unknown method " + method + "!"); } /* #if DEBUG * * fill_row_q_log_linear(imin, N - 1, q, q, N - 1, SS, JJ, sum_x, sum_x_sq, sum_w, sum_w_sq, criterion); * * for (int i = imin; i < N; ++i) * { * if (S[q][i] != SS[q][i] || J[q][i] != JJ[q][i]) * { * std::cout << "ERROR: q=" << q << ", i=" << i << std::endl; * std::cout << "\tS=" << S[q][i] << "\tJ=" << J[q][i] << std::endl; * std::cout << "Truth\tSS=" << SS[q][i] << "\tJJ=" << JJ[q][i]; * std::cout << std::endl; * assert(false); * * } * else * { * * std::cout << "OK: q=" << q << ", i=" << i << std::endl; * std::cout << "\tS=" << S[q][i] << "\tJ=" << J[q][i] << std::endl; * std::cout << "Truth\tSS=" << SS[q][i] << "\tJJ=" << JJ[q][i]; * std::cout << std::endl; * * } * * } #endif */ } /* # ifdef DEBUG # std::cout << "Linear & log-linear code returned identical dp index matrix." # << std::endl; #endif */ }
public static void LogLinear( int imin, int imax, int q, int jmin, int jmax, number[][] S, int[][] J, number[] sum_x, number[] sum_x_sq, number[] sum_w, number[] sum_w_sq, DissimilarityType criterion) { if (imin > imax) { return; } int N = S[0].Length; int i = (imin + imax) / 2; #if DEBUG // std::cout << " i=" << i << ": "; #endif // Initialization of S[q][i]: S[q][i] = S[q - 1][i - 1]; J[q][i] = i; int jlow = q; // the lower end for j if (imin > q) { // jlow = std::max(jlow, (int)J[q][imin-1]); jlow = Math.Max(jlow, jmin); } jlow = Math.Max(jlow, J[q - 1][i]); int jhigh = i - 1; // the upper end for j if (imax < N - 1) { // jhigh = std::min(jhigh, (int)J[q][imax+1]); jhigh = Math.Min(jhigh, jmax); } #if DEBUG // std::cout << " j-=" << jlow << ", j+=" << jhigh << ": "; #endif for (int j = jhigh; j >= jlow; --j) { // compute s(j,i) number sji = WithinCluster.SSQ(j, i, sum_x, sum_x_sq, sum_w); // MS May 11, 2016 Added: if (sji + S[q - 1][jlow - 1] >= S[q][i]) { break; } // Examine the lower bound of the cluster border // compute s(jlow, i) number sjlowi = WithinCluster.Dissimilarity(criterion, jlow, i, sum_x, sum_x_sq, sum_w, sum_w_sq); // ssq(jlow, i, sum_x, sum_x_sq, sum_w); number SSQ_jlow = sjlowi + S[q - 1][jlow - 1]; if (SSQ_jlow < S[q][i]) { // shrink the lower bound S[q][i] = SSQ_jlow; J[q][i] = jlow; } jlow++; number SSQ_j = sji + S[q - 1][j - 1]; if (SSQ_j < S[q][i]) { S[q][i] = SSQ_j; J[q][i] = j; } } #if DEBUG //std::cout << // " q=" << q << ": " << // "\t" << S[q][i] << "\t" << J[q][i]; //std::cout << std::endl; #endif jmin = (imin > q) ? (int)J[q][imin - 1] : q; jmax = (int)J[q][i]; LogLinear(imin, i - 1, q, jmin, jmax, S, J, sum_x, sum_x_sq, sum_w, sum_w_sq, criterion); jmin = (int)J[q][i]; jmax = (imax < N - 1) ? (int)J[q][imax + 1] : imax; LogLinear(i + 1, imax, q, jmin, jmax, S, J, sum_x, sum_x_sq, sum_w, sum_w_sq, criterion); }
private static void FillEvenPositions( int imin, int imax, int istep, int q, int[] js, number[][] S, int[][] J, number[] sum_x, number[] sum_x_sq, number[] sum_w, number[] sum_w_sq, DissimilarityType criterion) { // Derive j for even rows (0-based) int n = js.Length; int istepx2 = (istep << 1); int jl = js[0]; for (int i = imin, r = 0; i <= imax; i += istepx2) { // auto jmin = (i == imin) ? js[0] : J[q][i - istep]; while (js[r] < jl) { // Increase r until it points to a value of at least jmin r++; } // Initialize S[q][i] and J[q][i] S[q][i] = S[q - 1][js[r] - 1] + WithinCluster.Dissimilarity(criterion, js[r], i, sum_x, sum_x_sq, sum_w, sum_w_sq); // ssq(js[r], i, sum_x, sum_x_sq, sum_w); J[q][i] = js[r]; // rmin // Look for minimum S upto jmax within js int jh = (i + istep <= imax) ? J[q][i + istep] : js[n - 1]; int jmax = Math.Min(jh, i); number sjimin = WithinCluster.Dissimilarity(criterion, jmax, i, sum_x, sum_x_sq, sum_w, sum_w_sq); // ssq(jmax, i, sum_x, sum_x_sq, sum_w) for (++r; r < n && js[r] <= jmax; r++) { int jabs = js[r]; if (jabs > i) { break; } if (jabs < J[q - 1][i]) { continue; } number s = WithinCluster.Dissimilarity(criterion, jabs, i, sum_x, sum_x_sq, sum_w, sum_w_sq); // (ssq(jabs, i, sum_x, sum_x_sq, sum_w)); number Sj = (S[q - 1][jabs - 1] + s); if (Sj <= S[q][i]) { S[q][i] = Sj; J[q][i] = js[r]; } else if (S[q - 1][jabs - 1] + sjimin > S[q][i]) { break; } /*else if(S[q-1][js[rmin]-1] + s > S[q][i]) { * break; * }*/ } r--; jl = jh; } }
//SMAWK private static void ReduceInPlace( int imin, int imax, int istep, int q, int[] js, out int[] js_red, number[][] S, int[][] J, number[] sum_x, number[] sum_x_sq, number[] sum_w, number[] sum_w_sq, DissimilarityType criterion) { int N = (imax - imin) / istep + 1; js_red = js; if (N >= js.Length) { return; } // Two positions to move candidate j's back and forth int left = -1; // points to last favorable position / column int right = 0; // points to current position / column int m = js_red.Length; while (m > N) { // js_reduced has more than N positions / columns int p = (left + 1); int i = (imin + p * istep); int j = (js_red[right]); number Sl = (S[q - 1][j - 1] + WithinCluster.Dissimilarity(criterion, j, i, sum_x, sum_x_sq, sum_w, sum_w_sq)); // ssq(j, i, sum_x, sum_x_sq, sum_w)); int jplus1 = (js_red[right + 1]); number Slplus1 = (S[q - 1][jplus1 - 1] + WithinCluster.Dissimilarity(criterion, jplus1, i, sum_x, sum_x_sq, sum_w, sum_w_sq)); // ssq(jplus1, i, sum_x, sum_x_sq, sum_w)); if (Sl < Slplus1 && p < N - 1) { js_red[++left] = j; // i += istep; right++; // move on to next position / column p+1 } else if (Sl < Slplus1 && p == N - 1) { js_red[++right] = j; // delete position / column p+1 m--; } else { // (Sl >= Slplus1) if (p > 0) { // i > imin // delete position / column p and // move back to previous position / column p-1: js_red[right] = js_red[left--]; // p --; // i -= istep; } else { right++; // delete position / column 0 } m--; } } for (int r = (left + 1); r < m; ++r) { js_red[r] = js_red[right++]; } Array.Resize(ref js_red, m); return; }