private double K_LOWER_PERCENT = (5 / 100.0); // note: double type private void doStream(String file_name, String algorithm, List <double> result, List <double> norm_buffer, List <double> data_to_calc_w, List <double> stream_data, int N_LENGTH, int W_LENGTH, List <double> raw_buffer, double threshold_mean, double threshold_std, double threshold_sim, int R, int maxEntry, int minEntry, int period) { bool is_the_first_time = true;//control running HOTSAX for the first time int raw_buffer_len = raw_buffer.Count; int norm_buffer_len = norm_buffer.Count; //get last raw segment: //List<double> last_raw_segment = raw_buffer.GetRange(raw_buffer_len - N_LENGTH, N_LENGTH); //store index of the limited searching space (in case (b)): List <int> candidate_list = new List <int>(); int index_stream = 0; //control the stream data point int index_table = 0; int i_b = 0; //calc number of cases (b). int data_calc_w_len = data_to_calc_w.Count; //store result from HOTSAX algorithm: //List<double> result //result[0]: dist //result[1]: loc double next_data_point; double currDist; System.Diagnostics.Stopwatch watch2;; ///calc execution time long elapsedMs2 = 0; double small_match_dist = 0; List <double> first_segment; double new_norm_point; AugmentedTrie tree; Dictionary <string, int> count_table; Dictionary <int, string> total_table; count_table = new Dictionary <string, int>(); total_table = new Dictionary <int, string>(); //Making the root node: HOTSAX.TreeNode root = new HOTSAX.TreeNode('R'); //init the path (to print the tree later) List <char> path = new List <char>(); // Making the augmented tree: tree = new AugmentedTrie(root, path); // Squeezer List <int> cluster_belong = new List <int>(); List <Cluster_Squeezer> b_cluster = new List <Cluster_Squeezer>(); List <int> lCluster_NonMember = new List <int>(); // Bounding_Box int this_id_item = int.MinValue; List <int> this_id_itemList = new List <int>(); RTree <int> this_RTree = new RTree <int>(maxEntry, minEntry); List <BoundingBox.Rectangle> this_recList = new List <BoundingBox.Rectangle>(); double this_best_so_far_dist = -Constants.INFINITE; double this_best_so_far_loc = -1; BoundingBox.Rectangle new_rec; // Useless variable to pass parameter int dumb = 0; List <int> dumb_list = new List <int>(); List <BoundingBox.Rectangle> dumb_rectlist = new List <BoundingBox.Rectangle>(); RTree <int> dumb_rtree = new RTree <int>(maxEntry, minEntry); // Sanchez_method double this_best_so_far_dist_TheMostDiscord = -Constants.INFINITE; double old_mean = MathFuncs.CalcMean(raw_buffer); double old_std = MathFuncs.CalcStd(raw_buffer, old_mean); double new_mean, new_std; List <double> result_dist = new List <double>(); List <double> result_loc = new List <double>(); var watch = System.Diagnostics.Stopwatch.StartNew();///calc execution time int num_nor = 0; while (true) { manualResetEvent.WaitOne(Timeout.Infinite);//help for PAUSE, RESUME button if (is_the_first_time) { watch2 = System.Diagnostics.Stopwatch.StartNew();///calc execution time switch (algorithm) { case "HOTSAX_Squeezer_Stream": result = BitClusterDiscord.squeezer(ref lCluster_NonMember, ref cluster_belong, ref b_cluster, norm_buffer, N_LENGTH, W_LENGTH, threshold_sim, BitClusterDiscord.PAA); Console.WriteLine("Case 1"); break; case "BFHS": case "HOTSAX_Stream": result = HOTSAX.HOTSAX.originalHOTSAX(0, norm_buffer, N_LENGTH, W_LENGTH, ref tree, ref count_table, ref total_table, true); break; case "Bounding_Box": result = BoundingBox.BoundingBoxDiscordDiscovery.RunOfflineMinDist(raw_buffer, N_LENGTH, maxEntry, minEntry, R, W_LENGTH, ref this_id_item, ref this_id_itemList, ref this_recList, ref this_RTree, true); this_best_so_far_dist = result[0]; this_best_so_far_loc = result[1]; break; case "Sanchez_Method": result = BoundingBox.BoundingBoxDiscordDiscovery.RunOfflineMinDist(raw_buffer, N_LENGTH, maxEntry, minEntry, R, W_LENGTH, ref this_id_item, ref this_id_itemList, ref this_recList, ref this_RTree, true); this_best_so_far_dist_TheMostDiscord = result[0]; result = new List <double> { -1, -1 }; break; } is_the_first_time = false; watch2.Stop(); elapsedMs2 = watch2.ElapsedMilliseconds; //print through console Console.WriteLine("\n\t best_so_far_dist = " + result.ElementAt(0) + "\n\t best_so_far_loc = " + result.ElementAt(1) + "\n\t execution_time = " + elapsedMs2); Console.WriteLine("----------------- finished i = {0}--------------- \n\n", index_stream - 1); Console.WriteLine("Finished the first call."); if (chart_timeSeries.IsHandleCreated) { try { // call updateChart fucntion in GUI thread by chart thread if (algorithm == "Bounding_Box" || algorithm == "Sanchez_Method") { this.Invoke((MethodInvoker) delegate { updateChart(raw_buffer, (int)(result[1]), N_LENGTH, index_stream, elapsedMs2); }); } else { this.Invoke((MethodInvoker) delegate { updateChart(norm_buffer, (int)(result[1]), N_LENGTH, index_stream, elapsedMs2); }); } } catch { } } } else { if (index_stream >= stream_data.Count) { watch.Stop();//stop timer var elapsedMs = watch.ElapsedMilliseconds; Console.WriteLine("Streaming is Done in " + elapsedMs + ".\nKeep going...Please wait"); // Write File string[] all_dist = result_dist.Select(dist => dist.ToString()).ToArray(); string[] all_loc = result_loc.Select(loc => loc.ToString()).ToArray(); var data_name = System.IO.Path.GetFileNameWithoutExtension(file_name); var extension = System.IO.Path.GetExtension(file_name); string newPath = "Output\\" + data_name + "\\" + algorithm; System.IO.Directory.CreateDirectory(newPath); System.IO.File.WriteAllLines(newPath + "\\dist" + extension, all_dist); System.IO.File.WriteAllLines(newPath + "\\loc" + extension, all_loc); using (System.IO.StreamWriter file = new System.IO.StreamWriter(newPath + "\\time" + extension, false)) { file.WriteLine(elapsedMs); } this.txt_speed.Text = "0"; Statistical_Form statistical_form = new Statistical_Form(algorithm, file_name, result_loc, result_dist, elapsedMs); System.Windows.Forms.MessageBox.Show("stream_data ran out of points"); Console.WriteLine("num norm: " + num_nor); return; } //get the next data point: next_data_point = stream_data[index_stream]; new_mean = MathFuncs.CalcNewMean(old_mean, raw_buffer_len, raw_buffer[0], next_data_point); //store the last subsequence of the buffer for Bounding Box and Sanchez algorithms: List <double> last_sub = raw_buffer.GetRange(raw_buffer.Count - N_LENGTH, N_LENGTH); //get the first sub before update the buffer (help to find the small match in Liu's method) List <double> first_sub = raw_buffer.GetRange(0, N_LENGTH); //update raw_buffer: raw_buffer.Add(next_data_point); raw_buffer.RemoveAt(0); new_std = MathFuncs.CalcStd(raw_buffer, new_mean); watch2 = System.Diagnostics.Stopwatch.StartNew();///calc execution time switch (algorithm) { case "HOTSAX_Squeezer_Stream": case "BFHS": case "HOTSAX_Stream": if ((Math.Abs(new_mean - old_mean) <= threshold_mean) && (Math.Abs(new_std - old_std) <= threshold_std)) { new_norm_point = (next_data_point - old_mean) / old_std; index_table++; norm_buffer.Add(new_norm_point); //calc 'currDist': currDist = MathFuncs.EuDistance(norm_buffer.GetRange((int)result[1], N_LENGTH), norm_buffer.GetRange(norm_buffer.Count - N_LENGTH, N_LENGTH)); //if the case (a): Modify the Tree, Tables: if (currDist < result[0] || (int)(result[1]) == 0 || algorithm == "BFHS") { Console.WriteLine("--- Running case (a)... ---"); if (algorithm == "HOTSAX_Squeezer_Stream") { result = BitClusterDiscord.squeezer(ref lCluster_NonMember, ref cluster_belong, ref b_cluster, norm_buffer.Select(point => point).ToList(), N_LENGTH, W_LENGTH, threshold_sim, BitClusterDiscord.PAA, index_table, false); //update buffer: norm_buffer.RemoveAt(0); Console.WriteLine("B_cluster.Count: " + b_cluster.Count.ToString()); } else { //call the original HOTSAX ver3: result = HOTSAX.HOTSAX.originalHOTSAX(index_table, norm_buffer, N_LENGTH, W_LENGTH, ref tree, ref count_table, ref total_table); } } else // case (b), we can limit candidate_list: { Console.WriteLine("--------------- Running case (b)... -----------------"); /* Find the candidate_list: */ candidate_list.Clear(); //reset the list //The local discord at time t: candidate_list.Add((int)(result[1]) - 1); //The subsequence (m-n+1, n)(t+1): candidate_list.Add(norm_buffer_len - N_LENGTH); //The small match of subsequence (1, n)(t): first_segment = norm_buffer.GetRange(0, N_LENGTH); for (int j = N_LENGTH; j <= norm_buffer_len - N_LENGTH; j++) { small_match_dist = MathFuncs.EuDistance(norm_buffer.GetRange(j, N_LENGTH), first_segment); if (small_match_dist < result[0]) { if ((int)(result[1]) != j) { candidate_list.Add(j - 1); } } } if (algorithm == "HOTSAX_Squeezer_Stream") { result = BitClusterDiscord.squeezerAgain(ref lCluster_NonMember, candidate_list, index_table, ref cluster_belong, ref b_cluster, norm_buffer.Select(point => point).ToList(), N_LENGTH, W_LENGTH, threshold_sim, BitClusterDiscord.PAA); //update buffer: norm_buffer.RemoveAt(0); Console.WriteLine("B_cluster.Count: " + b_cluster.Count.ToString()); } else { //update buffer: norm_buffer.RemoveAt(0); //searching on candidates; result = HOTSAX.HOTSAX.candidateHOTSAX(candidate_list, index_table, norm_buffer, N_LENGTH, W_LENGTH, ref tree, ref count_table, ref total_table); } i_b++; //count number of cases (b) - just for testing Console.WriteLine("len(candidate_list) = {0}, Number of cases (b) is {1}/{2}", candidate_list.Count, i_b, index_stream); } //end else - case (b) } else { Console.WriteLine("--------------- Normalizing buffer ... -----------------"); num_nor++; index_table = 0; norm_buffer = MathFuncs.zScoreNorm(raw_buffer, raw_buffer_len); count_table = new Dictionary <string, int>(); total_table = new Dictionary <int, string>(); cluster_belong = new List <int>(); b_cluster = new List <Cluster_Squeezer>(); //Making the root node: root = new HOTSAX.TreeNode('R'); //init the path (to print the tree later) path = new List <char>(); // Making the augmented tree: tree = new AugmentedTrie(root, path); if (algorithm == "HOTSAX_Squeezer_Stream") { result = BitClusterDiscord.squeezer(ref lCluster_NonMember, ref cluster_belong, ref b_cluster, norm_buffer, N_LENGTH, W_LENGTH, threshold_sim, BitClusterDiscord.PAA); } else { result = HOTSAX.HOTSAX.originalHOTSAX(0, norm_buffer, N_LENGTH, W_LENGTH, ref tree, ref count_table, ref total_table, true); } old_std = new_std; old_mean = new_mean; } break; case "Bounding_Box": //update last_sub at time t to get new_sub at time (t+1): last_sub.Add(next_data_point); last_sub.RemoveAt(0); List <double> new_sub = last_sub; // the same object // Insert the new entry into the tree: this_id_item++; // Add the new rec to the tree: new_rec = new BoundingBox.Rectangle(Utils.MathFuncs.PAA_Lower(new_sub, W_LENGTH, R).ToArray(), Utils.MathFuncs.PAA_Upper(new_sub, W_LENGTH, R).ToArray(), raw_buffer.Count - N_LENGTH + 1 + index_stream); this_RTree.Add(new_rec, this_id_item); this_recList.Add(new_rec); this_id_itemList.Add(this_id_item); //remove the oldest entry: this_RTree.Delete(this_recList[index_stream], this_id_itemList[index_stream]); result = BoundingBoxDiscordDiscovery.RunOnline_LiuMethod_edited(raw_buffer, index_stream, first_sub, this_RTree, this_best_so_far_dist, (int)this_best_so_far_loc, N_LENGTH, W_LENGTH); this_best_so_far_dist = result[0]; this_best_so_far_loc = result[1]; break; case "Sanchez_Method": //update last_sub at time t to get new_sub at time (t+1): last_sub.Add(next_data_point); last_sub.RemoveAt(0); new_sub = last_sub; // the same object // Insert the new entry into the tree: this_id_item++; // Add the new rec to the tree: new_rec = new BoundingBox.Rectangle(Utils.MathFuncs.PAA_Lower(new_sub, W_LENGTH, R).ToArray(), Utils.MathFuncs.PAA_Upper(new_sub, W_LENGTH, R).ToArray(), raw_buffer.Count - N_LENGTH + 1 + index_stream); this_RTree.Add(new_rec, this_id_item); this_recList.Add(new_rec); this_id_itemList.Add(this_id_item); //remove the oldest entry: this_RTree.Delete(this_recList[index_stream], this_id_itemList[index_stream]); result = BoundingBoxDiscordDiscovery.NewOnlineAlgorithm(raw_buffer, 2 * period, index_stream, period, new_sub, this_RTree, N_LENGTH, W_LENGTH, R, maxEntry, minEntry, ref this_best_so_far_dist_TheMostDiscord); break; } watch2.Stop(); //stop timer elapsedMs2 = watch2.ElapsedMilliseconds; //print through console Console.WriteLine("\n\t best_so_far_dist = " + result.ElementAt(0) + "\n\t best_so_far_loc = " + result.ElementAt(1) + "\n\t execution_time = " + elapsedMs2); Console.WriteLine("----------------- finished i = {0}--------------- \n\n", index_stream); index_stream++;// make 'index' increase by 1 to get the next data point }//end else if (chart_timeSeries.IsHandleCreated) { try { // call updateChart fucntion in GUI thread by chart thread if (algorithm == "Bounding_Box" || algorithm == "Sanchez_Method") { this.Invoke((MethodInvoker) delegate { updateChart(raw_buffer, (int)(result[1]), N_LENGTH, index_stream, elapsedMs2); }); } else { this.Invoke((MethodInvoker) delegate { updateChart(norm_buffer, (int)(result[1]), N_LENGTH, index_stream, elapsedMs2); }); } } catch { } } // Store result result_dist.Add(result.ElementAt(0)); result_loc.Add(result.ElementAt(1)); //Thread.Sleep(2000); } }
/*Fig. 11 ('Luo_2011' paper): Estimating period with the median gap between two neighboring local minima*/ private void Luo_2011_Period() { //get max value for p, q: List <double> data_to_calc_w = IOFuncs.readStreamFile(txt_data_to_calc_W.Text); int data_len = data_to_calc_w.Count; int N_length = Convert.ToInt16(txt_NLength.Text); //normalize data: List <double> norm_data = MathFuncs.zScoreNorm(data_to_calc_w, data_len); //create a list to restore dists(p,q): List <double> dist_pq = new List <double>(); //calc collection of gaps delta for local minima of dist(p,q), here: // p is picked randomly // q = 0 to (bufer_len - N_length) //choose a random p: Random random_obj = new Random(); int p = random_obj.Next(data_len - N_length + 1); //get segment p: List <double> p_segment = norm_data.GetRange(p, N_length); //go through all q(s), compute dist(p,q) for every q: for (int q = 0; q <= (data_len - N_length); q++) { dist_pq.Add(MathFuncs.EuDistance(p_segment, norm_data.GetRange(q, N_length))); } /* Find the lower K_LOWER_PERCENT quantile of all distances calculated in the previous step.*/ int dist_pq_len = dist_pq.Count(); // WriteFile.WriteFile_2(dist_pq); // WriteFile.WriteFile_2(norm_data, "norm_data.csv"); int k_percent_index = (int)(dist_pq_len * K_LOWER_PERCENT); var sorted_dist_pq = dist_pq.OrderBy(n => n); //lower K_LOWER_PERCENT: double cp = sorted_dist_pq.ElementAt(k_percent_index); //get all local minima q such that dist(p, q) <= cp. List <int> Q = new List <int>();//store the result for (int q = 0; q < dist_pq_len; q++) { if (dist_pq[q] <= cp) { Q.Add(q); } } //Order Q list from smallest to largest: Q.Sort(); int Q_len = Q.Count; //calc lag 1 (delta): List <int> delta = new List <int>(); for (int k = 0; k < Q_len - 1; k++) { if (Q[k + 1] - Q[k] > 2) { delta.Add(Q[k + 1] - Q[k]); } } //sort delta: delta.Sort(); int median_index = delta.Count / 2; int delta_median = delta[median_index]; txt_period.Text = delta_median.ToString(); }
}// end public static List <double> candidateHOTSAX(List <int> candidate_list, int index, List <double> norm_data, int N_LENGTH, int W_LENGTH, ref AugmentedTrie tree, ref Dictionary <string, int> count_table, ref Dictionary <int, string> total_table) { string old_segment_word, new_segment_word; //old_segment_word: store the first segment at time t to SAX word. //new_segment_word: store the new segment at time t+1 to SAX word. List <double> new_segment = norm_data.GetRange(norm_data.Count - N_LENGTH, N_LENGTH); old_segment_word = total_table[index - 1]; new_segment_word = convertSegmentToWord(new_segment, N_LENGTH, W_LENGTH); //update Tree, and Tables: // first, we update 'count_table': if (count_table.ContainsKey(old_segment_word)) { count_table[old_segment_word]--; } if (count_table.ContainsKey(new_segment_word)) { count_table[new_segment_word]++; } else { count_table.Add(new_segment_word, 1); } //update 'total_table': int len_total_table = norm_data.Count; total_table.Remove(index - 1); total_table.Add(index + len_total_table - N_LENGTH, new_segment_word); //update 'Tree': TreeNode old_seg_leaf = tree.FindtheLeaf(old_segment_word); TreeNode new_seg_leaf = tree.FindtheLeaf(new_segment_word); old_seg_leaf.GetDataNode().Remove(index - 1); new_seg_leaf.GetDataNode().Add(index + len_total_table - N_LENGTH); //end update double best_so_far_dist = 0; int best_so_far_loc = 0; double nearest_neighbor_dist = 0; double dist = 0; List <int> outer_list, inner_list; outer_list = candidate_list;//OuterArrangement(index, total_table, count_table); bool break_to_outer_loop = false; bool[] is_skip_at_p = new bool[norm_data.Count]; for (int i = 0; i < norm_data.Count; i++) { is_skip_at_p[i] = false; } foreach (int p in outer_list) { if (is_skip_at_p[p]) { //p was visited at inner loop before continue; } else { nearest_neighbor_dist = Constants.INFINITE; string word = total_table[p + index]; inner_list = InnerArrangement(index, total_table.Count, count_table, tree, word); foreach (int q in inner_list)// inner loop { if (Math.Abs(p - q) < N_LENGTH) { continue;// self-match => skip to the next one } else { //calculate the Distance between p and q dist = MathFuncs.EuDistance(norm_data.GetRange(p, N_LENGTH), norm_data.GetRange(q, N_LENGTH)); if (dist < best_so_far_dist) { //skip the element q at oute_loop, 'cuz if (p,q) is not a solution, so does (q,p). is_skip_at_p[q] = true; break_to_outer_loop = true; //break, to the next loop at outer_loop break; // break at inner_loop first } if (dist < nearest_neighbor_dist) { nearest_neighbor_dist = dist; } } }//end inner if (break_to_outer_loop) { break_to_outer_loop = false; //reset continue; //go to the next p in outer loop } if (nearest_neighbor_dist > best_so_far_dist) { best_so_far_dist = nearest_neighbor_dist; best_so_far_loc = p; } } //end else } //end outter List <double> result = new List <double> { best_so_far_dist, best_so_far_loc }; //Console.WriteLine("skip = " + number_skip); return(result); }
/* new_online_algorithm: */ public static List <double> NewOnlineAlgorithm(List <double> buffer, int startPoint, int index_stream, int period, List <double> new_sub, RTree <int> this_RTree, int this_NLength, int this_D, int this_R, int maxEntry, int minEntry, ref double this_best_so_far_dist_TheMostDiscord) { int best_so_far_loc = -1; // update data (for calculating thres) ? if (index_stream % period == 0) //update after a period { List <double> this_buffer_to_startPoint = buffer.GetRange(0, startPoint); // Useless variable to pass parameter int dumb = 0; List <int> dumb_list = new List <int>(); List <Rectangle> dumb_rectlist = new List <Rectangle>(); RTree <int> dumb_rtree = new RTree <int>(maxEntry, minEntry); //calc TheMostDiscord T[1:sp] (return at "this_best_so_far_dist_TheMostDiscord" variable): List <double> discord = RunOfflineMinDist(this_buffer_to_startPoint, this_NLength, maxEntry, minEntry, this_R, this_D, ref dumb, ref dumb_list, ref dumb_rectlist, ref dumb_rtree, false); this_best_so_far_dist_TheMostDiscord = discord[0]; Console.WriteLine("update data (for calculating thres), at index_stream " + index_stream); } //get threshold_dist: double threshold_dist = this_best_so_far_dist_TheMostDiscord; //index of new_subsequence q: int q_outer = buffer.Count - this_NLength; // get Inner list: Dictionary <int, Node <int> > nodeMap = this_RTree.getNodeMap(); List <Node <int> > leafNodes = nodeMap.Values.Where(node => node.level == 1).OrderBy(node => node.entryCount).ToList(); List <int> innerList = new List <int>(); for (int num = 0; num < leafNodes.Count; num++) { List <int> all_entry_IDs_from_a_node = leafNodes[num].entries.Where(mbr => (mbr != null) && (mbr.getIndexSubSeq(index_stream + 1) != q_outer)).Select(mbr => mbr.getIndexSubSeq(index_stream + 1)).ToList(); if (all_entry_IDs_from_a_node.Count == leafNodes[num].entryCount) // if q in the outer loop is NOT in this leaf: { innerList.AddRange(all_entry_IDs_from_a_node); } else // If q is IN this leaf: We add all entry ids in the leaf to the head of the innerList { innerList.InsertRange(0, all_entry_IDs_from_a_node); // note: In this case, all_Entry_IDs_from_a_node doesnt include 'q' id. } } double nearest_neighbor_dist = Constants.INFINITE; foreach (int p_inner in innerList) { if (Math.Abs(p_inner - q_outer) >= this_NLength) { //calculate the Distance between p and q double dist = MathFuncs.EuDistance(new_sub, buffer.GetRange(p_inner, this_NLength)); if (dist < nearest_neighbor_dist) { nearest_neighbor_dist = dist; // best_so_far_loc = p_inner; //store best_so_far_loc } if (dist < threshold_dist) { break; } } } if (nearest_neighbor_dist > threshold_dist) { best_so_far_loc = q_outer; Console.WriteLine("Discord!\nbest_so_far_loc = " + best_so_far_loc + "\nbest_so_far_dist = " + nearest_neighbor_dist); } else { Console.WriteLine("No discord"); best_so_far_loc = -1; nearest_neighbor_dist = -1; } return(new List <double>() { nearest_neighbor_dist, best_so_far_loc }); //return "-1" if there is no discord. }
} //end RunOnline_LiuEditedCaseA /* Called by RunOnline_LiuMethod_edited:*/ public static List <double> LiuEdited_CaseB(List <double> buffer, int index_stream, int first_candidate, int second_candidate, List <double> removed_sub, RTree <int> this_RTree, int this_NLength, int this_D, double this_best_so_far_dist) { List <int> candidateList = new List <int>(); List <int> beginIndexInner = new List <int>(); List <int> indexOfLeafMBRS = new List <int>(); Dictionary <int, Node <int> > nodeMap = this_RTree.getNodeMap(); List <Node <int> > leafNodes = nodeMap.Values.Where(node => ((node.level == 1))).OrderBy(node => node.entryCount).ToList(); List <Rectangle> leafMBRs = leafNodes.Select(node => node.mbr).ToList(); // List rectangle of leaf nodes in order of list leafNodes for (int num = 0; num < leafNodes.Count; num++) { List <Rectangle> leafEntries = leafNodes[num].entries.Where(mbr => mbr != null).Select(mbr => mbr).ToList(); if (leafEntries.Count > 0) { int beginIndex = candidateList.Count; // we change a bit at the following line, we subtract mbr indice by "index_stream + 1": candidateList.AddRange(leafEntries.Select(mbr => mbr.getIndexSubSeq(index_stream + 1))); beginIndexInner.AddRange(Enumerable.Range(1, leafEntries.Count).Select(x => beginIndex)); indexOfLeafMBRS.AddRange(Enumerable.Repeat(num, leafEntries.Count)); } } // end for // get the two first candidates to the head of candidateList int count = 0; int index = 0; while (count < 1) { if (candidateList[index] == first_candidate) { candidateList[index] = candidateList[0]; int temp = beginIndexInner[index]; beginIndexInner[index] = beginIndexInner[0]; beginIndexInner[0] = temp; count++; } if (candidateList[index] == second_candidate) { candidateList[index] = candidateList[1]; int temp = beginIndexInner[index]; beginIndexInner[index] = beginIndexInner[1]; beginIndexInner[1] = temp; count++; } index++; } candidateList[0] = first_candidate; candidateList[1] = second_candidate; double best_so_far_dist = 0; int best_so_far_loc = 0; double nearest_neighbor_dist = 0; double dist = 0; bool break_to_outer_loop = false; bool[] is_skipped_at_p = new bool[buffer.Count]; for (int i = 0; i < buffer.Count; i++) { is_skipped_at_p[i] = false; } for (int i = 0; i < candidateList.Count; i++) { int p = candidateList[i]; //check small_match: double small_match = Utils.MathFuncs.EuDistance(buffer.GetRange(p, this_NLength), removed_sub); if (i >= 2 && small_match >= this_best_so_far_dist) { continue; } if (is_skipped_at_p[p]) { //p was visited at inner loop before continue; } else { List <double> subseq_p = buffer.GetRange(p, this_NLength); //Rectangle p_rectangle = recList[p]; List <double> P_PAA = MathFuncs.PAA(subseq_p, this_D); nearest_neighbor_dist = Constants.INFINITE; List <bool> eliminatedMBR = new List <bool>(); for (int k = 0; k < leafMBRs.Count; k++) { eliminatedMBR.Add(false); } int indexMBRLeaf = -1; for (int j = 0; j < candidateList.Count; j++)// inner loop { // int q = innerList[j]; int index_inner = (beginIndexInner[i] + j) % candidateList.Count; int q = candidateList[index_inner]; int index_MBRInnner = (beginIndexInner[i] + j) % candidateList.Count; int MBRInnner = indexOfLeafMBRS[index_MBRInnner]; if (indexMBRLeaf < MBRInnner)//the first entry of the next node ? { indexMBRLeaf++; /* Test: * if (indexMBRInnner[j] == MBRInnner) * Console.WriteLine("OK");*/ //calc minDist: //double minDist = MathFuncs.MINDIST(p_rectangle, leafMBRs[MBRInnner], (NLength / (double)(D))); double minDist = MathFuncs.MINDIST(P_PAA, leafMBRs[MBRInnner], (this_NLength / (double)(this_D))); //if (minDist_keo > minDist) //{ // Console.WriteLine("STOPPP"); // return; //} if (minDist >= nearest_neighbor_dist) { eliminatedMBR[MBRInnner] = true; continue;// pruned => skip to the next one } else { if (Math.Abs(p - q) < this_NLength) { continue;// self-match => skip to the next one } //calculate the Distance between p and q dist = MathFuncs.EuDistance(subseq_p, buffer.GetRange(q, this_NLength)); if (dist < best_so_far_dist) { //skip the element q at oute_loop, 'cuz if (p,q) is not a solution, neither is (q,p). is_skipped_at_p[q] = true; break_to_outer_loop = true; //break, to the next loop at outer_loop break; // break at inner_loop first } if (dist < nearest_neighbor_dist) { nearest_neighbor_dist = dist; } } } else // still the same node { if (eliminatedMBR[MBRInnner]) // can prune ? { continue; } else //do it normally { if (Math.Abs(p - q) < this_NLength) { continue;// self-match => skip to the next one } else { //calculate the Distance between p and q dist = MathFuncs.EuDistance(subseq_p, buffer.GetRange(q, this_NLength)); if (dist < best_so_far_dist) { //skip the element q at oute_loop, 'cuz if (p,q) is not a solution, neither is (q,p). is_skipped_at_p[q] = true; break_to_outer_loop = true; //break, to the next loop at outer_loop break; // break at inner_loop first } if (dist < nearest_neighbor_dist) { nearest_neighbor_dist = dist; } } } } //end ELSE } //end for inner loop //Console.WriteLine("num_leaf_skips="+ num_leaf_skips); if (break_to_outer_loop) { break_to_outer_loop = false; //reset continue; //go to the next p in outer loop } if (nearest_neighbor_dist > best_so_far_dist) { best_so_far_dist = nearest_neighbor_dist; best_so_far_loc = p; } //////////////////////// } } // end for //update the results: Console.WriteLine("index_stream = " + index_stream); Console.WriteLine("best_so_far_loc = " + best_so_far_loc); Console.WriteLine("best_so_far_dist = " + best_so_far_dist); List <double> result = new List <double> { best_so_far_dist, best_so_far_loc }; return(result); } // end RunOnline_Liu_edit
//////////// Helper Functions /////////////// /* Called by RunOnline_LiuMethod_edited:*/ public static List <double> LiuEdited_CaseA(List <double> inputData, int index_stream, RTree <int> this_RTree, int this_NLength, int this_D) { /* This function is almost the same as Offline_minDist version. We just edit some lines*/ List <int> candidateList = new List <int>(); List <int> beginIndexInner = new List <int>(); List <int> indexOfLeafMBRS = new List <int>(); bool[] is_skipped_at_p = new bool[inputData.Count]; for (int i = 0; i < inputData.Count; i++) { is_skipped_at_p[i] = false; } double best_so_far_dist = 0; int best_so_far_loc = 0; double nearest_neighbor_dist = 0; double dist = 0; bool break_to_outer_loop = false; Dictionary <int, Node <int> > nodeMap = this_RTree.getNodeMap(); List <Node <int> > leafNodes = nodeMap.Values.Where(node => node.level == 1).OrderBy(node => node.entryCount).ToList(); List <Rectangle> leafMBRs = leafNodes.Select(node => node.mbr).ToList(); // List rectangle of leaf nodes in order of list leafNodes for (int i = 0; i < leafNodes.Count; i++) { List <Rectangle> leafEntries = leafNodes[i].entries.Where(mbr => mbr != null).Select(mbr => mbr).ToList(); if (leafEntries.Count > 0) { int beginIndex = candidateList.Count; // we change a bit at the following line, we subtract mbr indice by "index_stream + 1": candidateList.AddRange(leafEntries.Select(mbr => mbr.getIndexSubSeq(index_stream + 1))); beginIndexInner.AddRange(Enumerable.Repeat(beginIndex, leafEntries.Count)); indexOfLeafMBRS.AddRange(Enumerable.Repeat(i, leafEntries.Count)); } } for (int i = 0; i < candidateList.Count; i++)//outer loop { int p = candidateList[i]; if (is_skipped_at_p[p]) { //p was visited at inner loop before continue; } else { List <double> subseq_p = inputData.GetRange(p, this_NLength); //Rectangle p_rectangle = recList[p]; List <double> P_PAA = MathFuncs.PAA(subseq_p, this_D); nearest_neighbor_dist = Constants.INFINITE; List <bool> eliminatedMBR = new List <bool>(); for (int k = 0; k < leafMBRs.Count; k++) { eliminatedMBR.Add(false); } int indexMBRLeaf = -1; int num_leaf_skips = 0; for (int j = 0; j < candidateList.Count; j++)// inner loop { // int q = innerList[j]; int index_inner = (beginIndexInner[i] + j) % candidateList.Count; int q = candidateList[index_inner]; int index_MBRInnner = (beginIndexInner[i] + j) % candidateList.Count; int MBRInnner = indexOfLeafMBRS[index_MBRInnner]; if (indexMBRLeaf < MBRInnner)//the first entry of the next node ? { indexMBRLeaf++; //calc minDist: //double minDist = MathFuncs.MINDIST(p_rectangle, leafMBRs[MBRInnner], (NLength / (double)(D))); double minDist = MathFuncs.MINDIST(P_PAA, leafMBRs[MBRInnner], (this_NLength / (double)(this_D))); if (minDist >= nearest_neighbor_dist) { num_leaf_skips++; eliminatedMBR[MBRInnner] = true; continue;// pruned => skip to the next one } else { if (Math.Abs(p - q) < this_NLength) { continue;// self-match => skip to the next one } //calculate the Distance between p and q dist = MathFuncs.EuDistance(subseq_p, inputData.GetRange(q, this_NLength)); if (dist < best_so_far_dist) { //skip the element q at oute_loop, 'cuz if (p,q) is not a solution, neither is (q,p). is_skipped_at_p[q] = true; break_to_outer_loop = true; //break, to the next loop at outer_loop break; // break at inner_loop first } if (dist < nearest_neighbor_dist) { nearest_neighbor_dist = dist; } } } else // still the same node { if (eliminatedMBR[MBRInnner]) // can prune ? { continue; } else //do it normally { if (Math.Abs(p - q) < this_NLength) { continue;// self-match => skip to the next one } else { //calculate the Distance between p and q dist = MathFuncs.EuDistance(subseq_p, inputData.GetRange(q, this_NLength)); if (dist < best_so_far_dist) { //skip the element q at oute_loop, 'cuz if (p,q) is not a solution, neither is (q,p). is_skipped_at_p[q] = true; break_to_outer_loop = true; //break, to the next loop at outer_loop break; // break at inner_loop first } if (dist < nearest_neighbor_dist) { nearest_neighbor_dist = dist; } } } } //end ELSE } //end for inner loop if (break_to_outer_loop) { break_to_outer_loop = false; //reset continue; //go to the next p in outer loop } if (nearest_neighbor_dist > best_so_far_dist) { best_so_far_dist = nearest_neighbor_dist; best_so_far_loc = p; } } }//end outer loop Console.WriteLine("index_stream=" + index_stream); Console.WriteLine("best_so_far_loc=" + best_so_far_loc); Console.WriteLine("best_so_far_dist=" + best_so_far_dist); List <double> result = new List <double> { best_so_far_dist, best_so_far_loc }; return(result); } //end RunOnline_LiuEditedCaseA
////////////// Main Functions ////////////// /*Run new offline (minDist) */ public static List <double> RunOfflineMinDist(List <double> inputData, int NLength, int maxEntry, int minEntry, int R, int D, ref int this_id_item, ref List <int> this_id_itemList, ref List <Rectangle> this_rectList, ref RTree <int> this_RTree, bool is_first_time) { int id_item = int.MinValue; RTree <int> rtree = new RTree <int>(maxEntry, minEntry); List <int> candidateList = new List <int>(); List <int> beginIndexInner = new List <int>(); List <int> indexOfLeafMBRS = new List <int>(); double best_so_far_dist = 0; int best_so_far_loc = -1; double nearest_neighbor_dist = 0; double dist = 0; bool break_to_outer_loop = false; bool[] is_skipped_at_p = new bool[inputData.Count]; for (int i = 0; i < inputData.Count; i++) { is_skipped_at_p[i] = false; } if (minEntry > maxEntry / 2) { MessageBox.Show("Requirement: MinNodePerEntry <= MaxNodePerEntry/2"); return(new List <double> { best_so_far_dist, best_so_far_loc }); } List <Rectangle> recList = new List <Rectangle>(); List <int> id_itemList = new List <int>(); for (int i = 0; i <= inputData.Count - NLength; i++) { List <double> subseq = inputData.GetRange(i, NLength); id_item++; Rectangle new_rec = new Rectangle(MathFuncs.PAA_Lower(subseq, D, R).ToArray(), MathFuncs.PAA_Upper(subseq, D, R).ToArray(), i); rtree.Add(new_rec, id_item); recList.Add(new_rec); id_itemList.Add(id_item); } Dictionary <int, Node <int> > nodeMap = rtree.getNodeMap(); List <Node <int> > leafNodes = nodeMap.Values.Where(node => node.level == 1).OrderBy(node => node.entryCount).ToList(); List <Rectangle> leafMBRs = leafNodes.Select(node => node.mbr).ToList(); // List rectangle of leaf nodes in order of list leafNodes for (int i = 0; i < leafNodes.Count; i++) { List <Rectangle> leafEntries = leafNodes[i].entries.Where(mbr => mbr != null).Select(mbr => mbr).ToList(); if (leafEntries.Count > 0) { int beginIndex = candidateList.Count; candidateList.AddRange(leafEntries.Select(mbr => mbr.getIndexSubSeq())); beginIndexInner.AddRange(Enumerable.Repeat(beginIndex, leafEntries.Count)); indexOfLeafMBRS.AddRange(Enumerable.Repeat(i, leafEntries.Count)); } } for (int i = 0; i < candidateList.Count; i++)//outer loop { int p = candidateList[i]; // rectangle of subseq in p postion if (is_skipped_at_p[p]) { //p was visited at inner loop before continue; } else { List <double> subseq_p = inputData.GetRange(p, NLength); //Rectangle p_rectangle = recList[p]; List <double> P_PAA = MathFuncs.PAA(subseq_p, D); nearest_neighbor_dist = Constants.INFINITE; List <bool> eliminatedMBR = new List <bool>(); for (int k = 0; k < leafMBRs.Count; k++) { eliminatedMBR.Add(false); } int indexMBRLeaf = -1; int num_leaf_skips = 0; for (int j = 0; j < candidateList.Count; j++)// inner loop { // int q = innerList[j]; int index_inner = (beginIndexInner[i] + j) % candidateList.Count; int q = candidateList[index_inner]; int index_MBRInnner = (beginIndexInner[i] + j) % candidateList.Count; int MBRInnner = indexOfLeafMBRS[index_MBRInnner]; if (indexMBRLeaf < MBRInnner)//the first entry of the next node ? { indexMBRLeaf++; /* Test: * if (indexMBRInnner[j] == MBRInnner) * Console.WriteLine("OK");*/ //calc minDist: //double minDist = MathFuncss.MINDIST(p_rectangle, leafMBRs[MBRInnner], (NLength / (double)(D))); double minDist = MathFuncs.MINDIST(P_PAA, leafMBRs[MBRInnner], (NLength / (double)(D))); //if (minDist_keo > minDist) //{ // Console.WriteLine("STOPPP"); // return; //} if (minDist >= nearest_neighbor_dist) { num_leaf_skips++; eliminatedMBR[MBRInnner] = true; continue;// pruned => skip to the next one } else { if (Math.Abs(p - q) < NLength) { continue;// self-match => skip to the next one } //calculate the Distance between p and q dist = MathFuncs.EuDistance(subseq_p, inputData.GetRange(q, NLength)); if (dist < best_so_far_dist) { //skip the element q at oute_loop, 'cuz if (p,q) is not a solution, neither is (q,p). is_skipped_at_p[q] = true; break_to_outer_loop = true; //break, to the next loop at outer_loop break; // break at inner_loop first } if (dist < nearest_neighbor_dist) { nearest_neighbor_dist = dist; } } } else // still the same node { if (eliminatedMBR[MBRInnner]) // can prune ? { continue; } else //do it normally { if (Math.Abs(p - q) < NLength) { continue;// self-match => skip to the next one } else { //calculate the Distance between p and q dist = MathFuncs.EuDistance(subseq_p, inputData.GetRange(q, NLength)); if (dist < best_so_far_dist) { //skip the element q at oute_loop, 'cuz if (p,q) is not a solution, neither is (q,p). is_skipped_at_p[q] = true; break_to_outer_loop = true; //break, to the next loop at outer_loop break; // break at inner_loop first } if (dist < nearest_neighbor_dist) { nearest_neighbor_dist = dist; } } } } //end ELSE } //end for inner loop //Console.WriteLine("num_leaf_skips="+ num_leaf_skips); if (break_to_outer_loop) { break_to_outer_loop = false; //reset continue; //go to the next p in outer loop } if (nearest_neighbor_dist > best_so_far_dist) { best_so_far_dist = nearest_neighbor_dist; best_so_far_loc = p; } } }//end outer loop if (is_first_time) { this_id_item = id_item; this_id_itemList = id_itemList; this_RTree = rtree; this_rectList = recList; } return(new List <double> { best_so_far_dist, best_so_far_loc }); }