static public void squeezerCluster(RawDataType data, int type_convert_raw_data, out List <Cluster_Squeezer> b_cluster, out List <int> cluster_belong, double threshold, int N_LENGTH, int W_LENGTH, string file_name)
        {
            //store Bit series data
            Dictionary <int, BitseriesType> bit_series_data;


            if (type_convert_raw_data == Discord_Time_Series.BIT_SERIES)
            {
                //get bit series data from original data
                bit_series_data = HelperFunctions.bitSeriesDataset(data, N_LENGTH, W_LENGTH);
            }
            else
            {
                if (type_convert_raw_data == Discord_Time_Series.PLA)
                {
                    bit_series_data = HelperFunctions.PLA(data, N_LENGTH, W_LENGTH);
                }

                else
                {
                    bit_series_data = HelperFunctions.PAA(data, N_LENGTH, W_LENGTH);
                }
            }


            //set some variables:
            int bit_series_data_length = bit_series_data.Count;

            b_cluster = new List <Cluster_Squeezer>(); //store clusters

            cluster_belong = new List <int>();         //store the Cluster_Squeezer whose each point belong to

            //set initial values for cluster_belong: '-1'  here imply they dont lie in any clusters
            for (int i = 0; i < bit_series_data_length; i++)
            {
                cluster_belong.Add(-1);
            }


            //initialize the first data as a center point
            b_cluster.Add(new Cluster_Squeezer(new Cluster(0, 1), new List <Dictionary <int, int> >()));// or 0 ??
            b_cluster[0].getCluster().addToListMemberIndice(0);
            b_cluster[0].updateCountElements(bit_series_data[0]);
            cluster_belong[0] = 0;


            double sim_max       = 0;
            double sim_value     = 0;
            int    cluster_index = 0;                        //the Cluster_Squeezer whose point j belong to.

            for (int j = 1; j < bit_series_data_length; j++) //go through the BD data,  except the first one
            {
                sim_max       = 0;
                cluster_index = 0;
                //for each existed Cluster_Squeezer C
                for (int i = 0; i < b_cluster.Count; i++)
                {
                    sim_value = simComputation(b_cluster[i], bit_series_data[j]);
                    if (sim_max < sim_value)
                    {
                        cluster_index = i;
                        sim_max       = sim_value;
                    }
                }


                if (sim_max >= threshold)
                {
                    cluster_belong[j] = cluster_index;
                    b_cluster[cluster_index].getCluster().plusOneToNumberOfMembers();
                    b_cluster[cluster_index].getCluster().addToListMemberIndice(j);
                    b_cluster[cluster_index].updateCountElements(bit_series_data[j]);
                }
                else
                {
                    //make a new Cluster_Squeezer then add it to b_cluster
                    b_cluster.Add(new Cluster_Squeezer(new Cluster(j, 1), new List <Dictionary <int, int> >()));
                    cluster_belong[j] = b_cluster.Count - 1;
                    b_cluster[b_cluster.Count - 1].getCluster().addToListMemberIndice(j);
                    b_cluster[b_cluster.Count - 1].updateCountElements(bit_series_data[j]);
                }
            }//end of for

            Console.WriteLine("The number of clusters is " + b_cluster.Count);

            foreach (Cluster_Squeezer list_members in b_cluster)
            {
                list_members.getCluster().getListMemberIndice().Shuffle();
            }

            Console.WriteLine("End shuffle.");

            //double radius;
            //List<double> center_value;
            //for (int i = 0; i < b_cluster.Count; i++)
            //{
            //    center_value = data.GetRange(b_cluster[i].getCluster().getCenterIndex(), N_LENGTH);
            //    radius = HelperFunctions.calculateRadius(data, b_cluster[i].getCluster().getListMemberIndice(), center_value, N_LENGTH);
            //    b_cluster[i].getCluster().setRadius(radius);
            //}
        } //end squeezerCluster() function
Esempio n. 2
0
        static public void bitCluster(out List <Cluster> b_cluster, out List <int> cluster_belong, int K_CENTERS, int N_LENGTH, int W_LENGTH, string file_name)
        {
            //read data
            RawDataType data = IOFunctions.readFile(file_name);

            Dictionary <int, BitseriesType> bit_series_data;                              //store Bit series data

            bit_series_data = HelperFunctions.bitSeriesDataset(data, N_LENGTH, W_LENGTH); //get bit series data from original data


            //set some variables:
            int bit_series_data_length = bit_series_data.Count;



            b_cluster = new List <Cluster>();  //store k clusters

            List <int> k_centers;              //store k centers

            cluster_belong = new List <int>(); //store the cluster whose each point belong to

            double dist_i, dist_p;

            for (int i = 0; i < bit_series_data_length; i++)
            {
                cluster_belong.Add(-1);
            }

            //initialize K centers
            k_centers = getKRandomCenter(bit_series_data_length, K_CENTERS);
            //k_centers = getKCentersContinuously(K_CENTERS);



            //(line 1 - 4 of the paper): initialize k cluster, then append them to 'b_cluster'
            for (int i = 0; i < K_CENTERS; i++)
            {
                Cluster one_cluster = new Cluster(k_centers[i], 1);
                b_cluster.Add(one_cluster);
            }


            //(line 5-15 in the paper):
            for (int j = 0; j < bit_series_data_length; j++)//go through the BD data
            {
                //get each element:
                if (cluster_belong[j] == (-1)) //if the point hasn't belong any cluster yet
                {
                    int p = 0;
                    for (int i = 1; i < K_CENTERS; i++)
                    {
                        dist_i = HelperFunctions.bitSeriesDistance(bit_series_data[j], bit_series_data[b_cluster[i].getCenterIndex()]);
                        dist_p = HelperFunctions.bitSeriesDistance(bit_series_data[j], bit_series_data[b_cluster[p].getCenterIndex()]);


                        if (dist_i < dist_p)
                        {
                            p = i;
                        }
                    }//end for

                    // line 12-14 in the paper
                    cluster_belong[j] = p;
                    b_cluster[p].plusOneToNumberOfMembers();
                    b_cluster[p].addToListMemberIndice(j);
                }
            }//end of for

            double        radius;
            List <double> center_value;

            for (int i = 0; i < K_CENTERS; i++)
            {
                center_value = data.GetRange(b_cluster[i].getCenterIndex(), N_LENGTH);
                radius       = HelperFunctions.calculateRadius(data, b_cluster[i].getListMemberIndice(), center_value, N_LENGTH);
                b_cluster[i].setRadius(radius);
            }
        }//end of function
        //algorithm 2:
        static public Tuple <double, int> bitClusterDiscord(out RawDataType data, int N_LENGTH, int W_LENGTH, int K_CENTERS, string file_name)
        {
            //read data
            data = IOFunctions.readFile(file_name); //raw timeseries data

            //store Bit series data
            Dictionary <int, BitseriesType> bit_series_data = HelperFunctions.bitSeriesDataset(data, N_LENGTH, W_LENGTH);//get bit series data from original data

            //line 6: run algorithm 1 to have b_cluster
            List <Cluster> b_cluster;
            List <int>     cluster_belong;

            BitCluster.bitCluster(out b_cluster, out cluster_belong, K_CENTERS, N_LENGTH, W_LENGTH, file_name);

            Console.WriteLine("Algorithm1 is Done !\nKeep going...Please wait");



            // get Outer loop:
            List <int> outer_loop = getOuterLoop(b_cluster, K_CENTERS); //get outer

            List <int> inner_loop;
            bool       continue_to_outer_loop = false;

            double nearest_neighbor_dist = 0;
            double dist = 0;

            double best_so_far_dist = 0;
            int    best_so_far_loc  = 0;

            RawDataType p_center, q_center;

            bool[] is_skip_at_p = new bool[outer_loop.Count];
            for (int i = 0; i < outer_loop.Count; i++)
            {
                is_skip_at_p[i] = false;
            }

            int cluster_of_cur_outer; //thang p dang nam o cluster nao
            int cluster_of_cur_inner; // tracing for q's cluster at inner loop

            foreach (int p in outer_loop)
            {
                //if (is_skip_at_p[p] || Math.Abs(p - 10867) < N_LENGTH || Math.Abs(p - 3994) < N_LENGTH
                //    || Math.Abs(p - 13492) < N_LENGTH)//

                if (is_skip_at_p[p])
                {
                    //p was visited at inner loop before
                    continue;
                }
                else
                {
                    nearest_neighbor_dist = INFINITE;

                    cluster_of_cur_outer = cluster_belong[p];

                    inner_loop = getInnerLoop(b_cluster, K_CENTERS, cluster_of_cur_outer);

                    foreach (int q in inner_loop)// inner loop
                    {
                        if (Math.Abs(p - q) < N_LENGTH)
                        {
                            continue;// self-match => skip to the next one
                        }
                        else
                        {
                            cluster_of_cur_inner = cluster_belong[q];

                            p_center = data.GetRange(b_cluster[cluster_of_cur_outer].getCenterIndex(), N_LENGTH);
                            q_center = data.GetRange(b_cluster[cluster_of_cur_inner].getCenterIndex(), N_LENGTH);

                            if (distanceBetween2Clusters(p_center, q_center, b_cluster[cluster_of_cur_outer].getRadius(), b_cluster[cluster_of_cur_inner].getRadius()) >= best_so_far_dist)
                            {
                                continue_to_outer_loop = true;
                                break;
                            }
                            //calculate the Distance between p and q
                            dist = HelperFunctions.gaussDistance(data.GetRange(p, N_LENGTH), data.GetRange(q, N_LENGTH));

                            if (dist < best_so_far_dist)
                            {
                                //skip the element q at oute_loop, 'cuz if (p,q) is not a solution, so does (q,p).
                                is_skip_at_p[q] = true;

                                continue_to_outer_loop = true; //break, to the next loop at outer_loop
                                break;                         // break at inner_loop first
                            }

                            if (dist < nearest_neighbor_dist)
                            {
                                nearest_neighbor_dist = dist;
                            }
                        } //end else
                    }     //end inner
                    if (continue_to_outer_loop)
                    {
                        continue_to_outer_loop = false; //reset
                        continue;                       //go to the next p in outer loop
                    }

                    if (nearest_neighbor_dist > best_so_far_dist)
                    {
                        best_so_far_dist = nearest_neighbor_dist;
                        best_so_far_loc  = p;
                    }
                } //end else
            }     //end outter
            return(new Tuple <double, int>(best_so_far_dist, best_so_far_loc));
        }