Ejemplo n.º 1
0
    static void Main(string[] args)
    {
        // 1. allocate a new dataflow computation.
        using (var computation = NewComputation.FromArgs(ref args))
        {
            if (args.Length != 6)
            {
                PrintHelp();
                return;
            }

            Int32 procid     = computation.Configuration.ProcessID;
            Int32 thread_num = computation.Configuration.WorkerCount;
            Int32 worker_num = computation.Configuration.Processes;

            Int32  dimension     = Int32.Parse(args[0]);
            Int32  cluster_num   = Int32.Parse(args[1]);
            Int32  iteration_num = Int32.Parse(args[2]);
            Int32  partition_num = Int32.Parse(args[3]);
            double sample_num_m  = Convert.ToDouble(args[4]);
            Int64  spin_wait     = Int64.Parse(args[5]);

            Console.Out.WriteLine("dimension: " + dimension);
            Console.Out.WriteLine("cluster_num: " + cluster_num);
            Console.Out.WriteLine("iteration_num: " + iteration_num);
            Console.Out.WriteLine("partition_num: " + partition_num);
            Console.Out.WriteLine("sample_num_m: " + sample_num_m);
            Console.Out.WriteLine("spin_wait: " + spin_wait);
            Console.Out.WriteLine("procid: " + procid);
            Console.Out.WriteLine("worker_num: " + worker_num);
            Console.Out.WriteLine("thread_num: " + thread_num);
            Console.Out.Flush();

            KMeans km =
                new KMeans(dimension,
                           cluster_num,
                           iteration_num,
                           partition_num,
                           sample_num_m,
                           spin_wait,
                           procid,
                           worker_num,
                           thread_num);

            Stream <SampleBatch, Epoch> samples = km.GenerateSamples().AsNaiadStream(computation);
            samples = samples.PartitionBy(s => (int)(s[0][0]));
            var end_samples = samples.Iterate((lc, s) => km.Advance(s), iteration_num, "KMeans");
            // var output = end_samples.Subscribe(x => {
            //                                           Console.Out.WriteLine("Final center 0: " + PrintList(km.means_[0]));
            //                                           Console.Out.Flush();
            //                                        });

            Console.Out.WriteLine("Before Activate!");
            Console.Out.Flush();
            // start the computation, fixing the structure of the dataflow graph.
            computation.Activate();
            Console.Out.WriteLine("After Activate!");
            Console.Out.Flush();

            // block until all work is finished.
            computation.Join();
            Console.Out.WriteLine("After Join!");


            double average_total   = km.total_times_.GetRange(truncate_index_, iteration_num - truncate_index_).Average();
            double average_compute = km.compute_times_.GetRange(truncate_index_, iteration_num - truncate_index_).Average();
            double average_idle    = average_total - average_compute;
            Console.Out.WriteLine("*** Average for the last {0:D2} iterations: compute(ms): {1:F2} total(ms): {2:F2} (idle(ms): {3:F2})",
                                  iteration_num - truncate_index_, 1000 * average_compute, 1000 * average_total, 1000 * average_idle);


            for (int i = 0; i < cluster_num; ++i)
            {
                Console.Out.WriteLine("Final center {0:D2}: {1:S}: ", i, PrintList(km.means_[i]));
            }
            Console.Out.WriteLine("Samples Counts: " + PrintList(km.sample_counter));
            Console.Out.WriteLine("Reduce Level 1 Counts: " + PrintList(km.reduce_l1_counter_));
            Console.Out.WriteLine("Reduce Level 2 Counts: " + PrintList(km.reduce_l2_counter_));
            Console.Out.WriteLine("Sync Level 1 Counts: " + PrintList(km.sync_l1_counter_));
            Console.Out.WriteLine("Sync Level 2 Counts: " + PrintList(km.sync_l2_counter_));
            Console.Out.WriteLine("Sync Tags: " + PrintHashSet(km.sync_tags_));
            Console.Out.WriteLine("Reduce Tags: " + PrintHashSet(km.reduce_tags_));
            Console.Out.WriteLine("Clustering Tags: " + PrintHashSet(km.clustering_tags_));
            Console.Out.Flush();
        }
    }