/// LB_Keogh 2: Create Envelop for the data /// Note that the envelops have been created (in main function) when each data point has been read. /// /// Variable Explanation, /// qo: sorted query /// cb: (output) current bound at each position. Used later for early abandoning in DTW. /// l,u: lower and upper envelop of the current data /// I: array pointer public MDVector dataCumulative( int[] order, MDVector[] qo, MDVector[] cb, MDVector[] l, MDVector[] u, int I, int len, MDVector mean, MDVector std, double bsf = double.PositiveInfinity) // best so far { MDVector a; _lb.set(0); _d.set(0); for (int i = 0; i < len && _lb < bsf; i++) { _uu = Utilities.normalize(u[order[i] + I], mean, std, _uu); _ll = Utilities.normalize(l[order[i] + I], mean, std, _ll); a = _d; if (qo[i] > _uu) { a = Utilities.distanceSquared(qo[i], _uu, _result); } else { if (qo[i] < _ll) { a = Utilities.distanceSquared(qo[i], _ll, _result); } } _lb = MDVector.add(_lb, a, _lb); cb[order[i]].set(a); } return(_lb); }
/// Calculate quick lower bound /// Usually, LB_Kim take time O(m) for finding top,bottom,fist and last. /// However, because of z-normalization the top and bottom cannot give siginifant benefits. /// And using the first and last points can be computed in constant time. /// The prunning power of LB_Kim is non-trivial, especially when the query is not long, say in length 128. public MDVector hierarchy(MDVector[] t, MDVector[] q, long j, int len, MDVector mean, MDVector std, double bsf = double.PositiveInfinity) { MDVector d; /// 1 point at front and back _x0 = Utilities.normalize(t[j], mean, std, _x0); _y0 = Utilities.normalize(t[(len - 1 + j)], mean, std, _y0); _lb = MDVector.add( Utilities.distanceSquared(_x0, q[0], _result1), Utilities.distanceSquared(_y0, q[len - 1], _result2), _lb); if (_lb >= bsf) { return(_lb); } /// 2 points at front _x1 = Utilities.normalize(t[(j + 1)], mean, std, _x1); d = Utilities.min( Utilities.distanceSquared(_x1, q[0], _result1), Utilities.distanceSquared(_x0, q[1], _result2)); d = Utilities.min(d, Utilities.distanceSquared(_x1, q[1], _result3)); _lb = MDVector.add(_lb, d, _lb); if (_lb >= bsf) { return(_lb); } /// 2 points at back _y1 = Utilities.normalize(t[(len - 2 + j)], mean, std, _y1); d = Utilities.min( Utilities.distanceSquared(_y1, q[len - 1], _result1), Utilities.distanceSquared(_y0, q[len - 2], _result2)); d = Utilities.min(d, Utilities.distanceSquared(_y1, q[len - 2], _result3)); _lb = MDVector.add(_lb, d, _lb); if (_lb >= bsf) { return(_lb); } /// 3 points at front _x2 = Utilities.normalize(t[(j + 2)], mean, std, _x2); d = Utilities.min( Utilities.distanceSquared(_x0, q[2], _result1), Utilities.distanceSquared(_x1, q[2], _result2)); d = Utilities.min(d, Utilities.distanceSquared(_x2, q[2], _result3)); d = Utilities.min(d, Utilities.distanceSquared(_x2, q[1], _result4)); d = Utilities.min(d, Utilities.distanceSquared(_x2, q[0], _result5)); _lb = MDVector.add(_lb, d, _lb); if (_lb >= bsf) { return(_lb); } /// 3 points at back _y2 = Utilities.normalize(t[(len - 3 + j)], mean, std, _y2); d = Utilities.min( Utilities.distanceSquared(_y0, q[len - 3], _result1), Utilities.distanceSquared(_y1, q[len - 3], _result2)); d = Utilities.min(d, Utilities.distanceSquared(_y2, q[len - 3], _result3)); d = Utilities.min(d, Utilities.distanceSquared(_y2, q[len - 2], _result4)); d = Utilities.min(d, Utilities.distanceSquared(_y2, q[len - 1], _result5)); _lb = MDVector.add(_lb, d, _lb); return(_lb); }
public DTWResult warp(Query query) { //prepare/reset knn search: jumpsize = query.Length; wk = 0; _wk = 0; lastloc = 0; for (int tmpk = 0; tmpk < bestk; tmpk++) { _kvec[tmpk] = double.PositiveInfinity; _lvec[tmpk] = 0; } double bsf; // best-so-far MDVector[] t; // data array MDVector[] tz, cb, cb1, cb2; MDVector d; int i, j; int dataIndex = 0; int matchIndex = 0; int kim = 0, keogh = 0, keogh2 = 0; double distance = 0; // prepare query object query.process(); cb = new MDVector[query.Length]; cb1 = new MDVector[query.Length]; cb2 = new MDVector[query.Length]; t = new MDVector[query.Length * 2]; for (i = 0; i < t.Length; i++) { t[i] = new MDVector(_dimensions); } tz = new MDVector[query.Length]; for (i = 0; i < tz.Length; i++) { tz[i] = new MDVector(_dimensions); } // Initialize the cummulative lower bound for (i = 0; i < query.Length; i++) { cb[i] = new MDVector(_dimensions); cb[i].set(0); cb1[i] = new MDVector(_dimensions); cb1[i].set(0); cb2[i] = new MDVector(_dimensions); cb2[i].set(0); } // Initialize bsf = double.PositiveInfinity; i = 0; /// current index of the data in current chunk of size EPOCH j = 0; /// the starting index of the data in the circular array, t ex.set(0); ex2.set(0); bool done = false; int it = 0, ep = 0, k = 0; int I; /// the starting index of the data in current chunk of size EPOCH LemireEnvelope lemireEnvelope = new LemireEnvelope(EPOCH, query.WarpingWindow, _dimensions); DTWCalculator dtwCalculator = new DTWCalculator(query.Length, query.WarpingWindow, _dimensions); while (!done) { // Read first query.Length-1 points ep = 0; if (it == 0) { for (k = 0; k < query.Length - 1; k++) { if (dataIndex < _data.Count) { buffer[k].set(_data[dataIndex++]); } } } else { for (k = 0; k < query.Length - 1; k++) { buffer[k].set(buffer[EPOCH - query.Length + 1 + k]); } } // Read buffer of size EPOCH or when all data has been read. ep = query.Length - 1; while (ep < EPOCH) { if (dataIndex >= _data.Count) { break; } buffer[ep].set(_data[dataIndex++]); ep++; } // Data are read in chunk of size EPOCH. // When there is nothing to read, the loop is end. if (ep <= query.Length - 1) { done = true; } else { lemireEnvelope.process(buffer, ep); /// Do main task here.. ex.set(0); ex2.set(0); for (i = 0; i < ep; i++) { // A bunch of data has been read and pick one of them at a time to use d = buffer[i]; // Calcualte sum and sum square ex = MDVector.add(ex, d, ex); result = MDVector.multiply(d, d, result); ex2 = MDVector.add(ex2, result, ex2); // t is a circular array for keeping current data t[i % query.Length].set(d); // double the size for avoiding using modulo "%" operator t[(i % query.Length) + query.Length].set(d); // Start the task when there are more than query.Length-1 points in the current chunk if (i >= query.Length - 1) { mean = MDVector.divide(ex, query.Length, mean); std = MDVector.divide(ex2, query.Length, std); mean2 = MDVector.multiply(mean, mean, mean2); std = MDVector.subtract(std, mean2, std); std.sqrt(); // compute the start location of the data in the current circular array, t j = (i + 1) % query.Length; // the start location of the data in the current chunk I = i - (query.Length - 1); // Use a constant lower bound to prune the obvious subsequence lb_kim = _kimLb.hierarchy(t, query.BaseValues, j, query.Length, mean, std, bsf); if (lb_kim < bsf) { // Use a linear time lower bound to prune; z_normalization of t will be computed on the fly. // uo, lo are envelop of the query. lb_k = _keoghLb.cumulative(query.Ordered, t, query.OrderedUpperEnvelope, query.OrderedLowerEnvelope, cb1, j, query.Length, mean, std, bsf); if (lb_k < bsf) { // Take another linear time to compute z_normalization of t. // Note that for better optimization, this can merge to the previous function. for (k = 0; k < query.Length; k++) { tz[k] = Utilities.normalize(t[(k + j)], mean, std, tz[k]); } // Use another lb_keogh to prune // qo is the sorted query. tz is unsorted z_normalized data. // l_buff, u_buff are big envelop for all data in this chunk lb_k2 = _keoghLb.dataCumulative(query.Ordered, query.OrderedValues, cb2, lemireEnvelope.Lower, lemireEnvelope.Upper, I, query.Length, mean, std, bsf); if (lb_k2 < bsf) { // Choose better lower bound between lb_keogh and lb_keogh2 to be used in early abandoning DTW // Note that cb and cb2 will be cumulative summed here. if (lb_k > lb_k2) { cb[query.Length - 1].set(cb1[query.Length - 1]); for (k = query.Length - 2; k >= 0; k--) { cb[k] = MDVector.add(cb[k + 1], cb1[k], cb[k]); } } else { cb[query.Length - 1].set(cb2[query.Length - 1]); for (k = query.Length - 2; k >= 0; k--) { cb[k] = MDVector.add(cb[k + 1], cb2[k], cb[k]); } } // Compute DTW and early abandoning if possible distance = dtwCalculator.distance(tz, query.BaseValues, cb, bsf); if (distance < bsf) { // Update bsf // loc is the real starting location of the nearest neighbor in the file matchIndex = (it) * (EPOCH - query.Length + 1) + i - query.Length + 1; if (bestk == 1) { _kvec[0] = bsf; _lvec[0] = matchIndex; bsf = distance; } else { bsf = ucr_set_knn(distance, matchIndex); } } } else { keogh2++; } } else { keogh++; } } else { kim++; } // Reduce absolute points from sum and sum square ex = MDVector.subtract(ex, t[j], ex); result = MDVector.multiply(t[j], t[j], result); ex2 = MDVector.subtract(ex2, result, ex2); } } // If the size of last chunk is less than EPOCH, then no more data and terminate. if (ep < EPOCH) { done = true; } else { it++; } } } Array.Sort(_kvec, _lvec); return(new DTWResult() { Locations = _lvec, Distances = _kvec }); }
/// Calculate Dynamic Time Warping distance /// A,B: data and query, respectively /// cb : cummulative bound used for early abandoning /// _warpingWindow: size of Sakoe-Chiba warping band public double distance(MDVector[] A, MDVector[] B, MDVector[] cb, double bsf = double.PositiveInfinity) { int i, j, k; for (k = 0; k < 2 * _warpingWindow + 1; k++) { _costPrevious[k].set(float.PositiveInfinity); _cost[k].set(float.PositiveInfinity); } for (i = 0; i < _length; i++) { k = Utilities.max(0, _warpingWindow - i); _minCost.set(float.PositiveInfinity); for (j = Utilities.max(0, i - _warpingWindow); j <= Utilities.min(_length - 1, i + _warpingWindow); j++, k++) { // Initialize all row and column if ((i == 0) && (j == 0)) { _cost[k] = Utilities.distanceSquared(A[0], B[0], _cost[k]); _minCost.set(_cost[k]); continue; } if ((j - 1 < 0) || (k - 1 < 0)) { _y.set(float.PositiveInfinity); } else { _y.set(_cost[k - 1]); } if ((i - 1 < 0) || (k + 1 > 2 * _warpingWindow)) { _x.set(float.PositiveInfinity); } else { _x.set(_costPrevious[k + 1]); } if ((i - 1 < 0) || (j - 1 < 0)) { _z.set(float.PositiveInfinity); } else { _z.set(_costPrevious[k]); } // Classic DTW calculation _cost[k].set( MDVector.add( Utilities.min(Utilities.min(_x, _y), _z), Utilities.distanceSquared(A[i], B[j], _result), _result)); // Find minimum cost in row for early abandoning (possibly to use column instead of row). if (_cost[k] < _minCost) { _minCost.set(_cost[k]); } } // We can abandon early if the current cummulative distace with lower bound together are larger than bsf if (((i + _warpingWindow) < (_length - 1)) && (MDVector.add(_minCost, cb[i + _warpingWindow + 1], _result) >= bsf)) { return(_result.absSum()); // _result was calculated in if() above } // Move current array to previous array. _costTemp = _cost; _cost = _costPrevious; _costPrevious = _costTemp; } k--; // the DTW distance is in the last cell in the matrix of size O(_length^2) or at the middle of our array. return(_costPrevious[k].absSum()); }