// Set by copy public void set(MDVector v) { for (int i = 0; i < this.Dimensions; i++) { this._values[i] = v[i]; } }
private void setup() { this.Length = _values.Count; this.WarpingWindow = calculateWarpingWindow(this.Length, _warpingWindow); _sortingBuffer = new OrderedVector[this.Length]; BaseValues = new MDVector[this.Length]; OrderedValues = new MDVector[this.Length]; Ordered = new int[this.Length]; _envelope = new LemireEnvelope(this.Length, this.WarpingWindow, _dimensions); OrderedUpperEnvelope = new MDVector[this.Length]; OrderedLowerEnvelope = new MDVector[this.Length]; for (int i = 0; i < this.Length; i++) { this.BaseValues[i] = _values[i]; _sortingBuffer[i] = new OrderedVector() { Vector = _values[i], Index = i }; OrderedValues[i] = new MDVector(_dimensions); OrderedUpperEnvelope[i] = new MDVector(_dimensions); OrderedLowerEnvelope[i] = new MDVector(_dimensions); } }
// Constructor public DTW(int dimensions = 1, int k = 1, float warpingWindow = 0.05f) { _data = new List <MDVector>(); _dimensions = dimensions; _warpingWindow = warpingWindow; bestk = k; _kimLb = new KimLowerBound(dimensions); _keoghLb = new KeoghLowerBound(dimensions); ex = new MDVector(dimensions); ex2 = new MDVector(dimensions); mean = new MDVector(dimensions); mean2 = new MDVector(dimensions); std = new MDVector(dimensions); result = new MDVector(dimensions); lb_kim = new MDVector(dimensions); lb_k = new MDVector(dimensions); lb_k2 = new MDVector(dimensions); buffer = new MDVector[EPOCH]; for (int i = 0; i < buffer.Length; i++) { buffer[i] = new MDVector(dimensions); } _kvec = new double[bestk]; _lvec = new int[bestk]; }
// Z-score normalization: zero mean & unit variance public static MDVector normalize(MDVector value, MDVector mean, MDVector std, MDVector result) { MDVector.subtract(value, mean, result); MDVector.divide(result, std, result); return(result); }
public MDVector(MDVector vector) : this(vector.Dimensions) { for (int i = 0; i < this.Dimensions; i++) { this._values[i] = vector._values[i]; } }
public static MDVector multiply(MDVector v1, MDVector v2, MDVector result) { for (int i = 0; i < v1.Dimensions; i++) { result[i] = (v1[i] * v2[i]); } return(result); }
public void addQueryItem(MDVector vector) { if (vector.Dimensions != _dimensions) { throw new Exception("Dimension of query item added [" + vector.Dimensions + "] does not match expected [" + _dimensions + "]."); } _values.Add(new MDVector(vector)); }
public void addDataItem(MDVector vector) { if (vector.Dimensions != _dimensions) { throw new Exception("Dimension of data item added [" + vector.Dimensions + "] does not match expected [" + _dimensions + "]."); } _data.Add(new MDVector(vector)); }
public static MDVector divide(MDVector v1, double d, MDVector result) { for (int i = 0; i < v1.Dimensions; i++) { result[i] = (v1[i] / d); } return(result); }
public static MDVector subtract(MDVector v1, MDVector v2, MDVector result) { for (int i = 0; i < v1.Dimensions; i++) { result[i] = (v1[i] - v2[i]); } return(result); }
private MDVector _result; // temp public KeoghLowerBound(int dimensions = 1) { _lb = new MDVector(dimensions); _d = new MDVector(dimensions); _z = new MDVector(dimensions); _uu = new MDVector(dimensions); _ll = new MDVector(dimensions); _result = new MDVector(dimensions); }
public static MDVector operator *(MDVector v1, double d) { MDVector v = new MDVector(v1.Dimensions); for (int i = 0; i < v1.Dimensions; i++) { v[i] = (v1[i] * d); } return(v); }
private MDVector _result1, _result2, _result3, _result4, _result5; // temps public KimLowerBound(int dimensions = 1) { _x0 = new MDVector(dimensions); _x1 = new MDVector(dimensions); _x2 = new MDVector(dimensions); _y0 = new MDVector(dimensions); _y1 = new MDVector(dimensions); _y2 = new MDVector(dimensions); _lb = new MDVector(dimensions); _result1 = new MDVector(dimensions); _result2 = new MDVector(dimensions); _result3 = new MDVector(dimensions); _result4 = new MDVector(dimensions); _result5 = new MDVector(dimensions); }
/// LB_Keogh 2: Create Envelop for the data /// Note that the envelops have been created (in main function) when each data point has been read. /// /// Variable Explanation, /// qo: sorted query /// cb: (output) current bound at each position. Used later for early abandoning in DTW. /// l,u: lower and upper envelop of the current data /// I: array pointer public MDVector dataCumulative( int[] order, MDVector[] qo, MDVector[] cb, MDVector[] l, MDVector[] u, int I, int len, MDVector mean, MDVector std, double bsf = double.PositiveInfinity) // best so far { MDVector a; _lb.set(0); _d.set(0); for (int i = 0; i < len && _lb < bsf; i++) { _uu = Utilities.normalize(u[order[i] + I], mean, std, _uu); _ll = Utilities.normalize(l[order[i] + I], mean, std, _ll); a = _d; if (qo[i] > _uu) { a = Utilities.distanceSquared(qo[i], _uu, _result); } else { if (qo[i] < _ll) { a = Utilities.distanceSquared(qo[i], _ll, _result); } } _lb = MDVector.add(_lb, a, _lb); cb[order[i]].set(a); } return(_lb); }
public Query(float warpingWindow, int dimensions = 1) { if (dimensions < 1) { throw new ArgumentOutOfRangeException(); } _dimensions = dimensions; _warpingWindow = warpingWindow; _mean = new MDVector(dimensions); _stdDeviation = new MDVector(dimensions); _values = new List <MDVector>(); _processed = false; this.Length = 0; this.WarpingWindow = 0; }
/// LB_Keogh 1: Create Envelop for the query /// Note that because the query is known, envelop can be created once at the begenining. /// /// Variable Explanation, /// order : sorted indices for the query. /// uo, lo: upper and lower envelops for the query, which already sorted. /// t : a circular array keeping the current data. /// j : index of the starting location in t /// cb : (output) current bound at each position. It will be used later for early abandoning in DTW. public MDVector cumulative( int[] order, MDVector[] t, MDVector[] uo, MDVector[] lo, MDVector[] cb, long j, int len, MDVector mean, MDVector std, double bsf = double.PositiveInfinity) // best so far { MDVector a; _lb.set(0); _d.set(0); for (int i = 0; i < len && _lb < bsf; i++) { _z = Utilities.normalize(t[(order[i] + j)], mean, std, _z); a = _d; if (_z > uo[i]) { a = Utilities.distanceSquared(_z, uo[i], _result); } else if (_z < lo[i]) { a = Utilities.distanceSquared(_z, lo[i], _result); } _lb += a; cb[order[i]].set(a); } return(_lb); }
private MDVector _result; // temp public DTWCalculator(int length, int window, int dimensions = 1) { _length = length; _warpingWindow = window; _dimensions = dimensions; _x = new MDVector(dimensions); _y = new MDVector(dimensions); _z = new MDVector(dimensions); _minCost = new MDVector(dimensions); _result = new MDVector(dimensions); /// Instead of using matrix of size O(_length^2) or O(mr), we will reuse two array of size O(_window). _cost = new MDVector[2 * _warpingWindow + 1]; _costPrevious = new MDVector[2 * _warpingWindow + 1]; for (int i = 0; i < 2 * _warpingWindow + 1; i++) { _cost[i] = new MDVector(dimensions); _costPrevious[i] = new MDVector(dimensions); } }
static public MDVector max(MDVector x, MDVector y) { return(x > y ? x : y); }
static public MDVector min(MDVector x, MDVector y) { return(x < y ? x : y); }
/* * L2-norm without square root operation * (omitting square root operation saves computation in comparison concerned only with unit-less magnitudes) */ static public MDVector distanceSquared(MDVector x, MDVector y, MDVector result) { MDVector.subtract(x, y, result); return(MDVector.multiply(result, result, result)); }
/// Calculate quick lower bound /// Usually, LB_Kim take time O(m) for finding top,bottom,fist and last. /// However, because of z-normalization the top and bottom cannot give siginifant benefits. /// And using the first and last points can be computed in constant time. /// The prunning power of LB_Kim is non-trivial, especially when the query is not long, say in length 128. public MDVector hierarchy(MDVector[] t, MDVector[] q, long j, int len, MDVector mean, MDVector std, double bsf = double.PositiveInfinity) { MDVector d; /// 1 point at front and back _x0 = Utilities.normalize(t[j], mean, std, _x0); _y0 = Utilities.normalize(t[(len - 1 + j)], mean, std, _y0); _lb = MDVector.add( Utilities.distanceSquared(_x0, q[0], _result1), Utilities.distanceSquared(_y0, q[len - 1], _result2), _lb); if (_lb >= bsf) { return(_lb); } /// 2 points at front _x1 = Utilities.normalize(t[(j + 1)], mean, std, _x1); d = Utilities.min( Utilities.distanceSquared(_x1, q[0], _result1), Utilities.distanceSquared(_x0, q[1], _result2)); d = Utilities.min(d, Utilities.distanceSquared(_x1, q[1], _result3)); _lb = MDVector.add(_lb, d, _lb); if (_lb >= bsf) { return(_lb); } /// 2 points at back _y1 = Utilities.normalize(t[(len - 2 + j)], mean, std, _y1); d = Utilities.min( Utilities.distanceSquared(_y1, q[len - 1], _result1), Utilities.distanceSquared(_y0, q[len - 2], _result2)); d = Utilities.min(d, Utilities.distanceSquared(_y1, q[len - 2], _result3)); _lb = MDVector.add(_lb, d, _lb); if (_lb >= bsf) { return(_lb); } /// 3 points at front _x2 = Utilities.normalize(t[(j + 2)], mean, std, _x2); d = Utilities.min( Utilities.distanceSquared(_x0, q[2], _result1), Utilities.distanceSquared(_x1, q[2], _result2)); d = Utilities.min(d, Utilities.distanceSquared(_x2, q[2], _result3)); d = Utilities.min(d, Utilities.distanceSquared(_x2, q[1], _result4)); d = Utilities.min(d, Utilities.distanceSquared(_x2, q[0], _result5)); _lb = MDVector.add(_lb, d, _lb); if (_lb >= bsf) { return(_lb); } /// 3 points at back _y2 = Utilities.normalize(t[(len - 3 + j)], mean, std, _y2); d = Utilities.min( Utilities.distanceSquared(_y0, q[len - 3], _result1), Utilities.distanceSquared(_y1, q[len - 3], _result2)); d = Utilities.min(d, Utilities.distanceSquared(_y2, q[len - 3], _result3)); d = Utilities.min(d, Utilities.distanceSquared(_y2, q[len - 2], _result4)); d = Utilities.min(d, Utilities.distanceSquared(_y2, q[len - 1], _result5)); _lb = MDVector.add(_lb, d, _lb); return(_lb); }
public DTWResult warp(Query query) { //prepare/reset knn search: jumpsize = query.Length; wk = 0; _wk = 0; lastloc = 0; for (int tmpk = 0; tmpk < bestk; tmpk++) { _kvec[tmpk] = double.PositiveInfinity; _lvec[tmpk] = 0; } double bsf; // best-so-far MDVector[] t; // data array MDVector[] tz, cb, cb1, cb2; MDVector d; int i, j; int dataIndex = 0; int matchIndex = 0; int kim = 0, keogh = 0, keogh2 = 0; double distance = 0; // prepare query object query.process(); cb = new MDVector[query.Length]; cb1 = new MDVector[query.Length]; cb2 = new MDVector[query.Length]; t = new MDVector[query.Length * 2]; for (i = 0; i < t.Length; i++) { t[i] = new MDVector(_dimensions); } tz = new MDVector[query.Length]; for (i = 0; i < tz.Length; i++) { tz[i] = new MDVector(_dimensions); } // Initialize the cummulative lower bound for (i = 0; i < query.Length; i++) { cb[i] = new MDVector(_dimensions); cb[i].set(0); cb1[i] = new MDVector(_dimensions); cb1[i].set(0); cb2[i] = new MDVector(_dimensions); cb2[i].set(0); } // Initialize bsf = double.PositiveInfinity; i = 0; /// current index of the data in current chunk of size EPOCH j = 0; /// the starting index of the data in the circular array, t ex.set(0); ex2.set(0); bool done = false; int it = 0, ep = 0, k = 0; int I; /// the starting index of the data in current chunk of size EPOCH LemireEnvelope lemireEnvelope = new LemireEnvelope(EPOCH, query.WarpingWindow, _dimensions); DTWCalculator dtwCalculator = new DTWCalculator(query.Length, query.WarpingWindow, _dimensions); while (!done) { // Read first query.Length-1 points ep = 0; if (it == 0) { for (k = 0; k < query.Length - 1; k++) { if (dataIndex < _data.Count) { buffer[k].set(_data[dataIndex++]); } } } else { for (k = 0; k < query.Length - 1; k++) { buffer[k].set(buffer[EPOCH - query.Length + 1 + k]); } } // Read buffer of size EPOCH or when all data has been read. ep = query.Length - 1; while (ep < EPOCH) { if (dataIndex >= _data.Count) { break; } buffer[ep].set(_data[dataIndex++]); ep++; } // Data are read in chunk of size EPOCH. // When there is nothing to read, the loop is end. if (ep <= query.Length - 1) { done = true; } else { lemireEnvelope.process(buffer, ep); /// Do main task here.. ex.set(0); ex2.set(0); for (i = 0; i < ep; i++) { // A bunch of data has been read and pick one of them at a time to use d = buffer[i]; // Calcualte sum and sum square ex = MDVector.add(ex, d, ex); result = MDVector.multiply(d, d, result); ex2 = MDVector.add(ex2, result, ex2); // t is a circular array for keeping current data t[i % query.Length].set(d); // double the size for avoiding using modulo "%" operator t[(i % query.Length) + query.Length].set(d); // Start the task when there are more than query.Length-1 points in the current chunk if (i >= query.Length - 1) { mean = MDVector.divide(ex, query.Length, mean); std = MDVector.divide(ex2, query.Length, std); mean2 = MDVector.multiply(mean, mean, mean2); std = MDVector.subtract(std, mean2, std); std.sqrt(); // compute the start location of the data in the current circular array, t j = (i + 1) % query.Length; // the start location of the data in the current chunk I = i - (query.Length - 1); // Use a constant lower bound to prune the obvious subsequence lb_kim = _kimLb.hierarchy(t, query.BaseValues, j, query.Length, mean, std, bsf); if (lb_kim < bsf) { // Use a linear time lower bound to prune; z_normalization of t will be computed on the fly. // uo, lo are envelop of the query. lb_k = _keoghLb.cumulative(query.Ordered, t, query.OrderedUpperEnvelope, query.OrderedLowerEnvelope, cb1, j, query.Length, mean, std, bsf); if (lb_k < bsf) { // Take another linear time to compute z_normalization of t. // Note that for better optimization, this can merge to the previous function. for (k = 0; k < query.Length; k++) { tz[k] = Utilities.normalize(t[(k + j)], mean, std, tz[k]); } // Use another lb_keogh to prune // qo is the sorted query. tz is unsorted z_normalized data. // l_buff, u_buff are big envelop for all data in this chunk lb_k2 = _keoghLb.dataCumulative(query.Ordered, query.OrderedValues, cb2, lemireEnvelope.Lower, lemireEnvelope.Upper, I, query.Length, mean, std, bsf); if (lb_k2 < bsf) { // Choose better lower bound between lb_keogh and lb_keogh2 to be used in early abandoning DTW // Note that cb and cb2 will be cumulative summed here. if (lb_k > lb_k2) { cb[query.Length - 1].set(cb1[query.Length - 1]); for (k = query.Length - 2; k >= 0; k--) { cb[k] = MDVector.add(cb[k + 1], cb1[k], cb[k]); } } else { cb[query.Length - 1].set(cb2[query.Length - 1]); for (k = query.Length - 2; k >= 0; k--) { cb[k] = MDVector.add(cb[k + 1], cb2[k], cb[k]); } } // Compute DTW and early abandoning if possible distance = dtwCalculator.distance(tz, query.BaseValues, cb, bsf); if (distance < bsf) { // Update bsf // loc is the real starting location of the nearest neighbor in the file matchIndex = (it) * (EPOCH - query.Length + 1) + i - query.Length + 1; if (bestk == 1) { _kvec[0] = bsf; _lvec[0] = matchIndex; bsf = distance; } else { bsf = ucr_set_knn(distance, matchIndex); } } } else { keogh2++; } } else { keogh++; } } else { kim++; } // Reduce absolute points from sum and sum square ex = MDVector.subtract(ex, t[j], ex); result = MDVector.multiply(t[j], t[j], result); ex2 = MDVector.subtract(ex2, result, ex2); } } // If the size of last chunk is less than EPOCH, then no more data and terminate. if (ep < EPOCH) { done = true; } else { it++; } } } Array.Sort(_kvec, _lvec); return(new DTWResult() { Locations = _lvec, Distances = _kvec }); }
/// Calculate Dynamic Time Warping distance /// A,B: data and query, respectively /// cb : cummulative bound used for early abandoning /// _warpingWindow: size of Sakoe-Chiba warping band public double distance(MDVector[] A, MDVector[] B, MDVector[] cb, double bsf = double.PositiveInfinity) { int i, j, k; for (k = 0; k < 2 * _warpingWindow + 1; k++) { _costPrevious[k].set(float.PositiveInfinity); _cost[k].set(float.PositiveInfinity); } for (i = 0; i < _length; i++) { k = Utilities.max(0, _warpingWindow - i); _minCost.set(float.PositiveInfinity); for (j = Utilities.max(0, i - _warpingWindow); j <= Utilities.min(_length - 1, i + _warpingWindow); j++, k++) { // Initialize all row and column if ((i == 0) && (j == 0)) { _cost[k] = Utilities.distanceSquared(A[0], B[0], _cost[k]); _minCost.set(_cost[k]); continue; } if ((j - 1 < 0) || (k - 1 < 0)) { _y.set(float.PositiveInfinity); } else { _y.set(_cost[k - 1]); } if ((i - 1 < 0) || (k + 1 > 2 * _warpingWindow)) { _x.set(float.PositiveInfinity); } else { _x.set(_costPrevious[k + 1]); } if ((i - 1 < 0) || (j - 1 < 0)) { _z.set(float.PositiveInfinity); } else { _z.set(_costPrevious[k]); } // Classic DTW calculation _cost[k].set( MDVector.add( Utilities.min(Utilities.min(_x, _y), _z), Utilities.distanceSquared(A[i], B[j], _result), _result)); // Find minimum cost in row for early abandoning (possibly to use column instead of row). if (_cost[k] < _minCost) { _minCost.set(_cost[k]); } } // We can abandon early if the current cummulative distace with lower bound together are larger than bsf if (((i + _warpingWindow) < (_length - 1)) && (MDVector.add(_minCost, cb[i + _warpingWindow + 1], _result) >= bsf)) { return(_result.absSum()); // _result was calculated in if() above } // Move current array to previous array. _costTemp = _cost; _cost = _costPrevious; _costPrevious = _costTemp; } k--; // the DTW distance is in the last cell in the matrix of size O(_length^2) or at the middle of our array. return(_costPrevious[k].absSum()); }