예제 #1
0
        public NaiadSolution(string[] args)
        {
            rawUsers          = new List <User>();
            rawPosts          = new List <Post>();
            rawComments       = new List <Comment>();
            rawCommentedEdges = new List <CommentedEdge>();
            rawLikesEdges     = new List <LikesEdge>();
            rawPostEdges      = new List <PostEdge>();
            rawSubmitterEdges = new List <SubmitterEdge>();
            rawFriendEdges    = new List <FriendEdge>();

            computation = NewComputation.FromArgs(ref args);

            users          = computation.NewInputCollection <User>();
            posts          = computation.NewInputCollection <Post>();
            comments       = computation.NewInputCollection <Comment>();
            commentedEdges = computation.NewInputCollection <CommentedEdge>();
            likesEdges     = computation.NewInputCollection <LikesEdge>();
            postEdges      = computation.NewInputCollection <PostEdge>();
            submitterEdges = computation.NewInputCollection <SubmitterEdge>();
            friendEdges    = computation.NewInputCollection <FriendEdge>();

            actualEpoch = -1;

            isDisposed = false;
        }
예제 #2
0
        public void Execute(string[] args)
        {
            // allocate a new computation from command line arguments.
            using (var computation = NewComputation.FromArgs(ref args))
            {
                var nodeCount = args.Length == 3 ? Convert.ToInt32(args[1]) : 1000;
                var edgeCount = args.Length == 3 ? Convert.ToInt32(args[2]) : 2000;

                #region Generate a local fraction of input data

                var random            = new Random(0);
                var processes         = computation.Configuration.Processes;
                var thisProcess       = computation.Configuration.ProcessID;
                var graphFragmentList = new List <Pair <int, int> >();
                for (int i = 0; i < edgeCount; i++)
                {
                    // ensure we generate the same graph no matter how many processes there are
                    var edge = new Pair <int, int>(random.Next(nodeCount), random.Next(nodeCount));
                    if ((i % processes) == thisProcess)
                    {
                        graphFragmentList.Add(edge);
                    }
                }
                //graphFragmentList.Add(new Pair<int, int>(100, 100000000));
                //graphFragmentList.Add(new Pair<int, int>(200000000, 200));
                var graphFragment = graphFragmentList.ToArray();

                #endregion

                Console.WriteLine("size of graphFragmentList: {0}", graphFragmentList.Count());
                Console.WriteLine("Computing components of a random graph on {0} nodes and {1} edges", nodeCount, edgeCount);

                Stopwatch stopwatch = new Stopwatch();

                // convert array of edges to single-epoch stream.
                var edges = graphFragment.AsNaiadStream(computation)
                            .Synchronize(x => true);

                // symmetrize the graph by adding in transposed edges.
                edges = edges.Select(x => new Pair <int, int>(x.Second, x.First))
                        .Concat(edges);

                edges.DirectedReachability()
                .Subscribe(list => Console.WriteLine("labeled {0} nodes in {1}", list.Count(), stopwatch.Elapsed));

                edges.Subscribe(list =>
                {
                    int numEdges = 0;
                    foreach (var element in list)
                    {
                        numEdges++;
                    }
                    Console.WriteLine("# of edges is : {0}", numEdges);
                });

                stopwatch.Start();
                computation.Activate();     // start graph computation
                computation.Join();         // block until computation completes
            }
        }
예제 #3
0
        public void Execute(string[] args)
        {
            var containerName = args[1];
            var directoryName = args[2];

            var nodeCount = int.Parse(args[3]);
            var edgeCount = int.Parse(args[4]);

            CloudStorageAccount storageAccount = CloudStorageAccount.DevelopmentStorageAccount;

            var container = storageAccount.CreateCloudBlobClient()
                            .GetContainerReference(containerName);

            container.CreateIfNotExists();

            // allocate a new computation from command line arguments.
            using (var computation = NewComputation.FromArgs(ref args))
            {
                // create a new input from a constant data source
                var source = new ConstantDataSource <GraphProperties>(new GraphProperties(nodeCount, edgeCount));
                var input  = computation.NewInput(source);

                // generate the graph, partition by edge source, and write to Azure.
                input.SelectMany(x => GenerateGraph(x.NodeCount, x.EdgeCount))
                .PartitionBy(x => x.First)
                .WriteBinaryToAzureBlobs(container, directoryName + "/edges-{0}");

                // start job and wait.
                computation.Activate();
                computation.Join();
            }
        }
예제 #4
0
    static void Main(string[] args)
    {
        // 1. allocate a new dataflow computation.
        using (var computation = NewComputation.FromArgs(ref args))
        {
            int iterations             = 3;
            int counts                 = 5;
            Stream <Node, Epoch> nodes = GenerateNodes(counts, computation.Configuration.ProcessID).AsNaiadStream(computation);
            // nodes = nodes.PartitionBy(x => x.source);

            nodes.IterateAndAccumulate((lc, x) => x, x => Print(x), iterations, "LogisticRegression");

            // nodes.Iterate((lc , x) => Operate(lc.EnterLoop(x)), iterations, "LogisticRegression");

            Console.Out.WriteLine("Proc ID: " + computation.Configuration.ProcessID);
            Console.Out.Flush();

            // 2. define an object which accepts input strings.
            // var source = new BatchedDataSource<string>();

            // 3. convert the data source into a Naiad stream of strings.
            // var input = computation.NewInput(source);

            // 4.request a notification for each batch of strings
            // received.
            // var output = input.Subscribe(x =>
            //     {
            //     foreach (var line
            //       in x)
            //     Console.WriteLine(line);
            //     });

            Console.Out.WriteLine("Before Activate!");
            Console.Out.Flush();

            // 5. start the computation, fixing the structure of
            // the dataflow graph.
            computation.Activate();

            Console.Out.WriteLine("After Activate!");
            Console.Out.Flush();

            // 6. read inputs from the console as long as the
            // user supplies them.
            for (var l = Console.ReadLine(); l.Length > 0; l
                     = Console.ReadLine())
            {
            }
            // source.OnNext(l.Split());

            // 7. signal that the source is now complete.
            // source.OnCompleted();

            // 8. block until all work is finished.
            computation.Join();
        }
    }
예제 #5
0
        public void Execute(string[] args)
        {
            using (var computation = NewComputation.FromArgs(ref args))
            {
                int iterations = int.Parse(args[1]);

                // first construct a simple graph with a feedback loop.
                var inputStream = (new int[] { }).AsNaiadStream(computation);

                var loopContext = new LoopContext <Epoch>(inputStream.Context, "loop");
                var feedback    = loopContext.Delay <int>();
                var ingress     = loopContext.EnterLoop(inputStream);

                feedback.Input = Barrier.MakeStage(ingress, feedback.Output, iterations);

                // prepare measurement callbacks
                var sw       = new Stopwatch();
                var lastTime = 0L;
                var times    = new List <double>(iterations);

                computation.OnStartup        += (c, y) => { sw.Start(); };
                computation.OnFrontierChange += (v, b) =>
                {
                    var now = sw.ElapsedTicks;

                    if (lastTime > 0)
                    {
                        times.Add(1000.0 * (now - lastTime) / (double)Stopwatch.Frequency);
                    }

                    lastTime = now;
                };

                Console.WriteLine("Running barrier latency test with {0} iterations, vertices={1}", iterations, ingress.ForStage.Placement.Count);

                // start computation and block
                computation.Activate();
                computation.Join();

                // print results
                times.Sort();

                var percentiles = new[] { 0.00, 0.01, 0.05, 0.10, 0.25, 0.50, 0.75, 0.90, 0.95, 0.99 };
                var latencies   = percentiles.Select(f => times[(int)(iterations * f)]).ToArray();

                Console.WriteLine("Ran {0} iterations on {1} processes; this is process {2}", times.Count - 1, computation.Configuration.Processes, computation.Configuration.ProcessID);

                Console.WriteLine("%-ile\tLatency (ms)");
                for (int i = 0; i < latencies.Length; i++)
                {
                    Console.WriteLine("{0:0.00}:\t{1:0.00}", percentiles[i], latencies[i]);
                }

                Console.WriteLine("max:\t{0:0.00}", latencies[latencies.Length - 1]);
            }
        }
예제 #6
0
파일: Reachability.cs 프로젝트: omidm/naiad
        public void Execute(string[] args)
        {
            // a controller manages an instance of Naiad
            using (var computation = NewComputation.FromArgs(ref args))
            {
                // define a graph input from a filename and some transformations.
                var edgeStrings = new[] { args[1] }.AsNaiadStream(computation)
                .SelectMany(x => ReadLines(x))
                .Select(x => x.Split())
                .Select(x => x[0].PairWith(x[1]));

                // define reachability roots from a second filename.
                var rootStrings = new[] { args[2] }.AsNaiadStream(computation)
                .SelectMany(x => ReadLines(x));

                // convert (string, string) -> edge and string -> node.
                Stream <Edge, Epoch> edges;  // will eventually hold stream of edges
                Stream <Node, Epoch> roots;  // will eventually hold stream of roots

                // an autorenamer context is used to consistently rename identifiers.
                using (var renamer = new AutoRenamer <string>())
                {
                    var tempEdges = edgeStrings.RenameUsing(renamer, x => x.First)              // use the first string to find a name
                                    .Select(x => x.node.WithValue(x.value.Second))              // discard the first string
                                    .RenameUsing(renamer, x => x.value)                         // use the second string to find a name
                                    .Select(x => new Edge(x.value.node, x.node));               // discard the second string and form an edge

                    var tempRoots = rootStrings.RenameUsing(renamer, x => x)                    // use the string itself to find a name
                                    .Select(x => x.node);                                       // discard the string and keep the node

                    // FinishRenaming only after all RenameUsing
                    edges = tempEdges.FinishRenaming(renamer);
                    roots = tempRoots.FinishRenaming(renamer);
                }

                // iteratively expand reachable set as pairs (node, isReachable).
                var limit = roots.Select(x => x.WithValue(true))
                            .IterateAndAccumulate((lc, x) => x.TransmitAlong(lc.EnterLoop(edges))           // transmit (node, true) values along edges
                                                  .StateMachine((bool b, bool s) => true),                  // any received value sets the state to true
                                                  x => x.node.index,                                        // partitioning information
                                                  Int32.MaxValue,                                           // the number of iterations
                                                  "Reachability")                                           // a nice descriptive name
                            .Concat(roots.Select(x => x.WithValue(true)))                                   // add the original trusted nodes
                            .NodeAggregate((a, b) => true)
                            .Where(x => x.value);                                                           // aggregate, for the originals

                // print the results onto the screen (or write to file, as appopriate)
                limit.Select(x => x.node.index)
                .Subscribe(x => Console.WriteLine(x.Count()));

                // start the computation and wait until it finishes
                computation.Activate();
                computation.Join();
            }
        }
예제 #7
0
        public void Execute(string[] args)
        {
            using (var computation = NewComputation.FromArgs(ref args))
            {
                var keyvals = new BatchedDataSource <Pair <string, string> >();
                var queries = new BatchedDataSource <string>();

                computation.NewInput(keyvals)
                .KeyValueLookup(computation.NewInput(queries))
                .Subscribe(list => { foreach (var l in list)
                                     {
                                         Console.WriteLine("value[\"{0}\"]:\t\"{1}\"", l.First, l.Second);
                                     }
                           });

                computation.Activate();

                if (computation.Configuration.ProcessID == 0)
                {
                    Console.WriteLine("Enter two strings to insert/overwrite a (key, value) pairs.");
                    Console.WriteLine("Enter one string to look up a key.");

                    // repeatedly read lines and introduce records based on their structure.
                    // note: it is important to advance both inputs in order to make progress.
                    for (var line = Console.ReadLine(); line.Length > 0; line = Console.ReadLine())
                    {
                        var split = line.Split();

                        if (split.Length == 1)
                        {
                            queries.OnNext(line);
                            keyvals.OnNext();
                        }
                        if (split.Length == 2)
                        {
                            queries.OnNext();
                            keyvals.OnNext(split[0].PairWith(split[1]));
                        }
                        if (split.Length > 2)
                        {
                            Console.Error.WriteLine("error: lines with three or more strings are not understood.");
                        }
                    }
                }

                keyvals.OnCompleted();
                queries.OnCompleted();

                computation.Join();
            }
        }
예제 #8
0
        public void Execute(string[] args)
        {
            var containerName  = args[1];
            var directoryName  = args[2];
            var outputblobName = args[3];

            CloudStorageAccount storageAccount = CloudStorageAccount.DevelopmentStorageAccount;

            var container = storageAccount.CreateCloudBlobClient()
                            .GetContainerReference(containerName);

            if (!container.Exists())
            {
                throw new Exception("No such container exists");
            }

            // allocate a new computation from command line arguments.
            using (var computation = NewComputation.FromArgs(ref args))
            {
                // Set Console.Out to point at an Azure blob bearing the process id.
                // See the important note at end of method about closing Console.Out.
                computation.Controller.SetConsoleOut(container, "stdout-{0}.txt");

                System.Diagnostics.Stopwatch stopwatch = new System.Diagnostics.Stopwatch();

                // read the edges from azure storage
                var edges = computation.ReadBinaryFromAzureBlobs <Pair <int, int> >(container, directoryName);

                // symmetrize the graph by adding in transposed edges.
                edges = edges.Select(x => new Pair <int, int>(x.Second, x.First))
                        .Concat(edges);

                // invoke director reachability
                var result = edges.DirectedReachability();

                // listen to the output for reporting, and also write the output somewhere in Azure
                result.Subscribe(list => Console.WriteLine("labeled {0} nodes in {1}", list.Count(), stopwatch.Elapsed));
                result.WriteBinaryToAzureBlobs(container, outputblobName);

                stopwatch.Start();

                // start computation and wait.
                computation.Activate();
                computation.Join();
            }


            // very important to close the stream to flush writes to Azure.
            Console.Out.Close();
        }
예제 #9
0
        public void Execute(string[] args)
        {
            // the first thing to do is to allocate a computation from args.
            using (var computation = NewComputation.FromArgs(ref args))
            {
                // loading data
                //Console.WriteLine("Grep program starts");
                if (args.Length < 5)
                {
                    string parameters = "";
                    for (int i = 0; i < args.Length; ++i)
                    {
                        parameters = parameters + " " + args[i];
                    }
                    Console.WriteLine("current parameters: " + parameters);
                    Console.WriteLine("usage: Examples.exe terasort <inputPath> <numFiles> <pattern> <outputPath>");
                    return;
                }
                //for (int i = 0; i < args.Length; ++i){
                //    Console.WriteLine("the "+i+"th argument is " + args[i] );
                //}
                string inputDir         = args[1];
                int    numFiles         = Int32.Parse(args[2]);
                string pattern          = args[3];
                string outputPathFormat = args[4] + "{0}";

                var text = loadDiskFiles(computation, inputDir, numFiles);
                //text.Subscribe(l =>
                //{
                //    Console.WriteLine("input, # od records: " + l.Count());
                //});

                // computation
                var result = text.Where(x => x.Contains(pattern));


                //result.Subscribe(l =>
                //{
                //    Console.WriteLine("result, # of records: " + l.Count());
                //});

                //dumping
                Action <string, System.IO.BinaryWriter> writer = writeByte;
                result.WriteToFiles <string>(outputPathFormat, writer);

                Console.WriteLine("deploy successfully");
                computation.Activate();
                computation.Join();
            }
        }
예제 #10
0
    static void Main(string[] args)
    {
        // 1. allocate a new dataflow computation.
        using (var computation = NewComputation.FromArgs(ref args))
        {
            Console.Out.WriteLine("Proc ID: " + computation.Configuration.ProcessID);
            Console.Out.Flush();

            // 2. define an object which accepts input strings.
            var source = new BatchedDataSource <string>();

            // 3. convert the data source into a Naiad stream of strings.
            var input = computation.NewInput(source);

            // 4.request a notification for each batch of strings
            // received.
            var output = input.Subscribe(x =>
            {
                foreach (var line
                         in x)
                {
                    Console.WriteLine(line);
                }
            });

            Console.Out.WriteLine("Before Activate!");
            Console.Out.Flush();

            // 5. start the computation, fixing the structure of
            // the dataflow graph.
            computation.Activate();

            Console.Out.WriteLine("After Activate!");
            Console.Out.Flush();

            // 6. read inputs from the console as long as the
            // user supplies them.
            for (var l = Console.ReadLine(); l.Length > 0; l
                     = Console.ReadLine())
            {
                source.OnNext(l.Split());
            }

            // 7. signal that the source is now complete.
            source.OnCompleted();

            // 8. block until all work is finished.
            computation.Join();
        }
    }
예제 #11
0
        public void Execute(string[] args)
        {
            using (OneOffComputation computation = NewComputation.FromArgs(ref args))
            {
                int numToExchange = args.Length > 1 ? int.Parse(args[1]) : 1000000;

                Stream <int, Epoch> input = computation.NewInput(new ConstantDataSource <int>(5));

                Stream <int, Epoch>           stream   = ProducerVertex.MakeStage(numToExchange, computation.Configuration.WorkerCount, input);
                Stage <ConsumerVertex, Epoch> consumer = ConsumerVertex.MakeStage(numToExchange, computation.Configuration.WorkerCount, stream);

                computation.Activate();
                computation.Join();
            }
        }
예제 #12
0
        public void Execute(string[] args)
        {
            using (OneOffComputation computation = NewComputation.FromArgs(ref args))
            {
                int numToExchange = args.Length > 1 ? int.Parse(args[1]) : 1000000;
                int producers     = Int32.Parse(args[2]);
                int consumers     = Int32.Parse(args[3]);

                var exchange = args.Length > 4 && args[4] == "exchange";

                var input = new Pair <int, int>[] { }.AsNaiadStream(computation);

                Stream <Pair <int, int>, Epoch> stream = ProducerVertex.MakeStage(numToExchange, 0, producers, computation.Configuration.WorkerCount, input);
                Stage <ConsumerVertex, Epoch>   consumer = ConsumerVertex.MakeStage(numToExchange, computation.Configuration.Processes - consumers, computation.Configuration.Processes, computation.Configuration.WorkerCount, exchange, stream);

                computation.Activate();
                computation.Join();
            }
        }
예제 #13
0
        public void Execute(string[] args)
        {
            // allocate a new computation from command line arguments.
            using (var computation = NewComputation.FromArgs(ref args))
            {
                var nodeCount = args.Length == 3 ? Convert.ToInt32(args[1]) : 1000;
                var edgeCount = args.Length == 3 ? Convert.ToInt32(args[2]) : 2000;

                #region Generate a local fraction of input data

                var random        = new Random(0);
                var processes     = computation.Configuration.Processes;
                var graphFragment = new Pair <int, int> [edgeCount / processes];
                for (int i = 0; i < graphFragment.Length; i++)
                {
                    graphFragment[i] = new Pair <int, int>(random.Next(nodeCount), random.Next(nodeCount));
                }

                #endregion

                Console.WriteLine("Computing components of a random graph on {0} nodes and {1} edges", nodeCount, edgeCount);

                Stopwatch stopwatch = new Stopwatch();

                // convert array of edges to single-epoch stream.
                var edges = graphFragment.AsNaiadStream(computation)
                            .Synchronize();

                // symmetrize the graph by adding in transposed edges.
                edges = edges.Select(x => new Pair <int, int>(x.Second, x.First))
                        .Concat(edges);

                edges.DirectedReachability()
                .Subscribe(list => Console.WriteLine("labeled {0} nodes in {1}", list.Count(), stopwatch.Elapsed));

                stopwatch.Start();
                computation.Activate();     // start graph computation
                computation.Join();         // block until computation completes
            }
        }
예제 #14
0
        public void Execute(string[] args)
        {
            using (var computation = NewComputation.FromArgs(ref args))
            {
                computation.Controller.SetConsoleOut(computation.DefaultBlobContainer("naiad-outputs"), "out-{0}.txt");
                computation.Controller.SetConsoleError(computation.DefaultBlobContainer("naiad-outputs"), "err-{0}.txt");

                if (args.Length == 4)
                {
                    var containerName   = args[1];
                    var inputDirectory  = args[2];
                    var outputDirectory = args[3];

                    if (!inputDirectory.Equals(outputDirectory))
                    {
                        var container = computation.DefaultBlobContainer(containerName);

                        computation.ReadTextFromAzureBlobs(container, inputDirectory)
                        .PartitionBy(x => x.GetHashCode())
                        .WriteTextToAzureBlobs(container, outputDirectory + "/part-{0}-{1}.txt");
                    }
                    else
                    {
                        Console.Error.WriteLine("ERROR: Input directory name ({0}) equals output directory name ({1})", inputDirectory, outputDirectory);
                    }
                }
                else
                {
                    Console.Error.WriteLine("repartition requires three additional arguments: " + this.Usage);
                }

                computation.Activate();
                computation.Join();

                Console.Out.Close();
                Console.Error.Close();
            }
        }
예제 #15
0
        public void Execute(string[] args)
        {
            // the first thing to do is to allocate a computation from args.
            using (var computation = NewComputation.FromArgs(ref args))
            {
                // 1. Make a new data source, to which we will supply strings.
                var source = new BatchedDataSource <string>();

                // 2. Attach source, and apply count extension method.
                var counts = computation.NewInput(source).StreamingCount();

                // 3. Subscribe to the resulting stream with a callback to print the outputs.
                counts.Subscribe(list => { foreach (var element in list)
                                           {
                                               Console.WriteLine(element);
                                           }
                                 });

                computation.Activate();       // activate the execution of this graph (no new stages allowed).

                if (computation.Configuration.ProcessID == 0)
                {
                    // with our dataflow graph defined, we can start soliciting strings from the user.
                    Console.WriteLine("Start entering lines of text. An empty line will exit the program.");
                    Console.WriteLine("Naiad will display counts (and changes in counts) of words you type.");

                    // read lines of input and hand them to the input, until an empty line appears.
                    for (var line = Console.ReadLine(); line.Length > 0; line = Console.ReadLine())
                    {
                        source.OnNext(line.Split());
                    }
                }

                source.OnCompleted();   // signal the end of the input.
                computation.Join();     // waits until the graph has finished executing.
            }
        }
예제 #16
0
파일: WordCount.cs 프로젝트: omidm/naiad
        /// <summary>
        /// Executes a word counting Naiad program.
        /// </summary>
        /// <param name="config">Naiad controller configuration</param>
        /// <param name="args">Remaining arguments</param>
        public void Execute(string[] args)
        {
            // first, construct a Naiad controller.
            using (var computation = NewComputation.FromArgs(ref args))
            {
                // create an incrementally updateable collection
                var text = computation.NewInputCollection <string>();

                // segment strings, count, and print
                text.SelectMany(x => x.Split(' '))
                .Count(y => y, (k, c) => k + ":" + c)       // yields "word:count" for each word
                .Subscribe(l => { foreach (var element in l)
                                  {
                                      Console.WriteLine(element);
                                  }
                           });

                computation.Activate();

                if (computation.Configuration.ProcessID == 0)
                {
                    Console.WriteLine("Start entering lines of text. An empty line will exit the program.");
                    Console.WriteLine("Naiad will display counts (and changes in counts) of words you type.");

                    var line = Console.ReadLine();
                    for (int i = 0; line != ""; i++)
                    {
                        text.OnNext(line);
                        computation.Sync(i);
                        line = Console.ReadLine();
                    }
                }

                text.OnCompleted(); // closes input
                computation.Join();
            }
        }
예제 #17
0
    // public class Sample {
    //   public Sample() {
    //     vector = new List<float>();
    //     // vector = new List<float>(new float[dimension]);
    //     // label = 0;
    //   }
    //   public List<float> vector;
    //   public float label;
    // }

    static void Main(string[] args)
    {
        // 1. allocate a new dataflow computation.
        using (var computation = NewComputation.FromArgs(ref args))
        {
            Int32 iterations = 10;
            if (args.Length >= 1)
            {
                iterations = Int32.Parse(args[0]);
            }

            Console.Out.WriteLine("iterations: " + iterations);
            Console.Out.Flush();

            int counts    = 5;
            int dimension = 10;
            Stream <Sample, Epoch> nodes =
                GenerateNodes(dimension, counts, computation.Configuration.ProcessID).AsNaiadStream(computation);
            // nodes = nodes.PartitionBy(x => x.source);

            // nodes.IterateAndAccumulate((lc, x)  => x, x => Print(x), iterations, "LogisticRegression");

            var end_nodes  = nodes.Iterate((lc, x) => Operate(x), iterations, "LogisticRegression");
            var node_count = Microsoft.Research.Naiad.Frameworks.Lindi.ExtensionMethods.Count(end_nodes);
            end_nodes.WriteToFiles("output_nodes_{0}.txt", (record, writer) => writer.Write(true));
            node_count.WriteToFiles("output_count_{0}.txt", (record, writer) => writer.Write(true));
            // var end_nodes  = nodes.IterateAndAccumulate((lc , x) => Operate(x), iterations, "LogisticRegression");

            Console.Out.WriteLine("Proc ID: " + computation.Configuration.ProcessID);
            Console.Out.Flush();

            var output = node_count.Subscribe(x => {
                foreach (var e in x)
                {
                    Console.WriteLine("vector: " + PrintList(e.First) + " count: " + e.Second);
                }
            });

            // 2. define an object which accepts input strings.
            // var source = new BatchedDataSource<string>();

            // 3. convert the data source into a Naiad stream of strings.
            // var input = computation.NewInput(source);

            // 4.request a notification for each batch of strings
            // received.
            // var output = input.Subscribe(x =>
            //     {
            //     foreach (var line
            //       in x)
            //     Console.WriteLine(line);
            //     });

            Console.Out.WriteLine("Before Activate!");
            Console.Out.Flush();

            // 5. start the computation, fixing the structure of
            // the dataflow graph.
            computation.Activate();

            Console.Out.WriteLine("After Activate!");
            Console.Out.Flush();

            // 6. read inputs from the console as long as the
            // user supplies them.
            // for (var l = Console.ReadLine(); l.Length > 0; l
            //     = Console.ReadLine()) {}
            //   // source.OnNext(l.Split());

            // 7. signal that the source is now complete.
            // source.OnCompleted();

            // 8. block until all work is finished.
            computation.Join();

            Console.Out.WriteLine("After Join!");
            Console.Out.Flush();
        }
    }
예제 #18
0
        public void Execute(string[] args)
        {
            using (var computation = NewComputation.FromArgs(ref args))
            {
                // establish numbers of nodes and edges from input or from defaults.
                if (args.Length == 3)
                {
                    nodeCount = Convert.ToInt32(args[1]);
                    edgeCount = Convert.ToInt32(args[2]);
                }

                // generate a random graph
                var random = new Random(0);
                var graph  = new IntPair[edgeCount];
                for (int i = 0; i < edgeCount; i++)
                {
                    graph[i] = new IntPair(random.Next(nodeCount), random.Next(nodeCount));
                }

                var stopwatch = System.Diagnostics.Stopwatch.StartNew();

                // set up the CC computation
                var edges = computation.NewInputCollection <IntPair>();

                //Func<IntPair, int> priorityFunction = node => 0;
                //Func<IntPair, int> priorityFunction = node => Math.Min(node.t, 100);
                Func <IntPair, int> priorityFunction = node => 65536 * (node.t < 10 ? node.t : 10 + Convert.ToInt32(Math.Log(1 + node.t) / Math.Log(2.0)));

                var output = edges.ConnectedComponents(priorityFunction)
                             .Count(n => n.t, (l, c) => c)       // counts results with each label
                             .Consolidate()
                             .Subscribe(l =>
                {
                    Console.Error.WriteLine("Time to process: {0}", stopwatch.Elapsed);
                    foreach (var result in l.OrderBy(x => x.record))
                    {
                        Console.Error.WriteLine(result);
                    }
                });

                Console.Error.WriteLine("Running connected components on a random graph ({0} nodes, {1} edges)", nodeCount, edgeCount);
                Console.Error.WriteLine("For each size, the number of components of that size (may take a moment):");

                computation.Activate();

                edges.OnNext(computation.Configuration.ProcessID == 0 ? graph : Enumerable.Empty <IntPair>());

                // if we are up for interactive access ...
                if (computation.Configuration.Processes == 1)
                {
                    output.Sync(0);

                    Console.WriteLine();
                    Console.WriteLine("Next: sequentially rewiring random edges (press [enter] each time):");

                    for (int i = 0; true; i++)
                    {
                        Console.ReadLine();
                        stopwatch.Restart();
                        var newEdge = new IntPair(random.Next(nodeCount), random.Next(nodeCount));
                        Console.WriteLine("Rewiring edge: {0} -> {1}", graph[i], newEdge);
                        edges.OnNext(new[] { new Weighted <IntPair>(graph[i], -1), new Weighted <IntPair>(newEdge, 1) });
                        output.Sync(i + 1);
                    }
                }

                edges.OnCompleted();
                computation.Join();
            }
        }
예제 #19
0
        public void Execute(string[] args)
        {
            using (var computation = NewComputation.FromArgs(ref args))
            {
                //// either read inputs from a file, or generate them randomly.
                //Stream<Edge, Epoch> edges;
                //if (args.Length == 1)
                //{
                //    // generate a random graph in each process; pagerank computation is performed on the union of edges.
                //    edges = GenerateEdges(1000000, 20000000, computation.Configuration.ProcessID).AsNaiadStream(computation);
                //}
                //else
                //{
                //    var text = args.Skip(1)
                //                   .AsNaiadStream(computation)
                //                   .Distinct()
                //                   .SelectMany(x => x.ReadLinesOfText());

                //    edges = text.Where(x => !x.StartsWith("#"))
                //                .Select(x => x.Split())
                //                .Select(x => new Edge(new Node(Int32.Parse(x[0])), new Node(Int32.Parse(x[1]))));
                //}

                // loading data
                //Console.WriteLine("Grep program starts");
                if (args.Length < 5)
                {
                    string parameters = "";
                    for (int i = 0; i < args.Length; ++i)
                    {
                        parameters = parameters + " " + args[i];
                    }
                    Console.WriteLine("current parameters: " + parameters);
                    Console.WriteLine("usage: Examples.exe pr <inputPath> <numIters> <outputPath> <numIterations>");
                    return;
                }
                //for (int i = 0; i < args.Length; ++i){
                //    Console.WriteLine("the "+i+"th argument is " + args[i] );
                //}
                string inputDir         = args[1];
                int    numFiles         = Int32.Parse(args[2]);
                string outputPathFormat = args[3] + "{0}";
                var    iterations       = Int32.Parse(args[4]);

                var stopwatch = System.Diagnostics.Stopwatch.StartNew();
                var text      = loadDiskFiles(computation, inputDir, numFiles);

                //this will make the whole process slow!
                //var barrierL = text.Select(x => 1);
                //barrierL.Subscribe( list => {
                //    Console.WriteLine("number of edges: " + list.Count());
                //});
                //Console.WriteLine("loading finished at " + stopwatch.Elapsed);

                var edges = text.Select(x => x.Split())
                            .Select(x => new Edge(new Node(Int32.Parse(x[0])), new Node(Int32.Parse(x[1]))));
                Console.Out.WriteLine("Started up!");
                Console.Out.Flush();

                edges = edges.PartitionBy(x => x.source);

                // capture degrees before trimming leaves. countNodes has big problem!
                var degrees = edges.Select(x => x.source)
                              .CountNodes();


                // initial distribution of ranks.
                var start = degrees.Select(x => x.node.WithValue(0.15f));

                // define an iterative pagerank computation, add initial values, aggregate up the results and print them to the screen.

                //var result = start.NodeJoin(degrees, (rank, degree) => degree > 0 ? rank * (0.85f / degree) : 0.0f)
                //        .GraphReduce(edges, (x, y) => x + y, false)
                //        .Select(x => x.node.WithValue((float)(x.value + 0.15)))
                //        .NodeJoin(degrees, (rank, degree) => degree > 0 ? rank * (0.85f / degree) : 0.0f)
                //        .GraphReduce(edges, (x, y) => x + y, false);

                var ranks = start.Iterate((lc, deltas) => deltas.PageRankStep(lc.EnterLoop(degrees),
                                                                              lc.EnterLoop(edges)),
                                          x => x.node.index,
                                          iterations,
                                          "PageRank");


                computation.OnFrontierChange += (x, y) => { Console.WriteLine(stopwatch.Elapsed + "\t" + string.Join(", ", y.NewFrontier)); Console.Out.Flush(); };

                //dumping
                //Action<NodeWithValue<float>, System.IO.BinaryWriter> writer = writeNodeWithValue;
                //rank.WriteToFiles<NodeWithValue<float>>(outputPathFormat, writeNodeWithValue);

                Console.WriteLine("deploy successfully");

                // start computation, and block until completion.
                computation.Activate();
                computation.Join();
            }
        }
예제 #20
0
        public void Execute(string[] args)
        {
            // allocate a new computation from command line arguments.
            using (var computation = NewComputation.FromArgs(ref args))
            {
                //var nodeCount = args.Length == 3 ? Convert.ToInt32(args[1]) : 1000;
                //var edgeCount = args.Length == 3 ? Convert.ToInt32(args[2]) : 2000;

                //#region Generate a local fraction of input data

                //var random = new Random(0);
                //var processes = computation.Configuration.Processes;
                //var thisProcess = computation.Configuration.ProcessID;
                //var graphFragmentList = new List<Pair<int, int>>();
                //for (int i = 0; i < edgeCount; i++)
                //{
                //    // ensure we generate the same graph no matter how many processes there are
                //    var edge = new Pair<int, int>(random.Next(nodeCount), random.Next(nodeCount));
                //    if ((i % processes) == thisProcess)
                //    {
                //        graphFragmentList.Add(edge);
                //    }
                //}
                //var graphFragment = graphFragmentList.ToArray();

                //#endregion

                //Console.WriteLine("size of graphFragmentList: {0}", graphFragmentList.Count());
                //Console.WriteLine("Computing components of a random graph on {0} nodes and {1} edges", nodeCount, edgeCount);

                Stopwatch stopwatch = new Stopwatch();

                //// convert array of edges to single-epoch stream.
                //var edges = graphFragment.AsNaiadStream(computation)
                //                         .Synchronize(x => true);


                if (args.Length < 5)
                {
                    string parameters = "";
                    for (int i = 0; i < args.Length; ++i)
                    {
                        parameters = parameters + " " + args[i];
                    }
                    Console.WriteLine("current parameters: " + parameters);
                    Console.WriteLine("usage: Examples.exe sssp <inputPath> <numFiles> <outputPath> <srcId>");
                    return;
                }
                //for (int i = 0; i < args.Length; ++i){
                //    Console.WriteLine("the "+i+"th argument is " + args[i] );
                //}
                string inputDir         = args[1];
                int    numFiles         = Int32.Parse(args[2]);
                string outputPathFormat = args[3] + "{0}";
                var    srcId            = Int32.Parse(args[4]);

                //loading
                var text = loadDiskFiles(computation, inputDir, numFiles);

                //building graph
                var edges = text.Select(x => x.Split())
                            .Select(x => new Pair <int, int>(Int32.Parse(x[0]), Int32.Parse(x[1])));

                var result = edges.SSSP(srcId);
                //.Subscribe(list => {
                //    Console.WriteLine("labeled {0} nodes in {1}", list.Count(), stopwatch.Elapsed);
                //    foreach (var element in list) {
                //        Console.WriteLine("vertex: " + element.First + ",value: " + element.Second);
                //    }
                //});

                //edges.Subscribe(list =>
                //{
                //    int numEdges = 0;
                //    foreach (var element in list)
                //    {
                //        numEdges++;
                //    }
                //    Console.WriteLine("# of edges is : {0}", numEdges);
                //});

                //dumping
                Action <Pair <int, float>, System.IO.BinaryWriter> writer = writePair;
                result.WriteToFiles <Pair <int, float> >(outputPathFormat, writer);

                //Console.WriteLine("deploy successfully");

                stopwatch.Start();
                computation.OnFrontierChange += (x, y) => { Console.WriteLine(stopwatch.Elapsed + "\t" + string.Join(", ", y.NewFrontier)); Console.Out.Flush(); };
                computation.Activate();     // start graph computation
                computation.Join();         // block until computation completes
            }
        }
예제 #21
0
파일: KMeans.cs 프로젝트: omidm/naiad
    static void Main(string[] args)
    {
        // 1. allocate a new dataflow computation.
        using (var computation = NewComputation.FromArgs(ref args))
        {
            if (args.Length != 6)
            {
                PrintHelp();
                return;
            }

            Int32 procid     = computation.Configuration.ProcessID;
            Int32 thread_num = computation.Configuration.WorkerCount;
            Int32 worker_num = computation.Configuration.Processes;

            Int32  dimension     = Int32.Parse(args[0]);
            Int32  cluster_num   = Int32.Parse(args[1]);
            Int32  iteration_num = Int32.Parse(args[2]);
            Int32  partition_num = Int32.Parse(args[3]);
            double sample_num_m  = Convert.ToDouble(args[4]);
            Int64  spin_wait     = Int64.Parse(args[5]);

            Console.Out.WriteLine("dimension: " + dimension);
            Console.Out.WriteLine("cluster_num: " + cluster_num);
            Console.Out.WriteLine("iteration_num: " + iteration_num);
            Console.Out.WriteLine("partition_num: " + partition_num);
            Console.Out.WriteLine("sample_num_m: " + sample_num_m);
            Console.Out.WriteLine("spin_wait: " + spin_wait);
            Console.Out.WriteLine("procid: " + procid);
            Console.Out.WriteLine("worker_num: " + worker_num);
            Console.Out.WriteLine("thread_num: " + thread_num);
            Console.Out.Flush();

            KMeans km =
                new KMeans(dimension,
                           cluster_num,
                           iteration_num,
                           partition_num,
                           sample_num_m,
                           spin_wait,
                           procid,
                           worker_num,
                           thread_num);

            Stream <SampleBatch, Epoch> samples = km.GenerateSamples().AsNaiadStream(computation);
            samples = samples.PartitionBy(s => (int)(s[0][0]));
            var end_samples = samples.Iterate((lc, s) => km.Advance(s), iteration_num, "KMeans");
            // var output = end_samples.Subscribe(x => {
            //                                           Console.Out.WriteLine("Final center 0: " + PrintList(km.means_[0]));
            //                                           Console.Out.Flush();
            //                                        });

            Console.Out.WriteLine("Before Activate!");
            Console.Out.Flush();
            // start the computation, fixing the structure of the dataflow graph.
            computation.Activate();
            Console.Out.WriteLine("After Activate!");
            Console.Out.Flush();

            // block until all work is finished.
            computation.Join();
            Console.Out.WriteLine("After Join!");


            double average_total   = km.total_times_.GetRange(truncate_index_, iteration_num - truncate_index_).Average();
            double average_compute = km.compute_times_.GetRange(truncate_index_, iteration_num - truncate_index_).Average();
            double average_idle    = average_total - average_compute;
            Console.Out.WriteLine("*** Average for the last {0:D2} iterations: compute(ms): {1:F2} total(ms): {2:F2} (idle(ms): {3:F2})",
                                  iteration_num - truncate_index_, 1000 * average_compute, 1000 * average_total, 1000 * average_idle);


            for (int i = 0; i < cluster_num; ++i)
            {
                Console.Out.WriteLine("Final center {0:D2}: {1:S}: ", i, PrintList(km.means_[i]));
            }
            Console.Out.WriteLine("Samples Counts: " + PrintList(km.sample_counter));
            Console.Out.WriteLine("Reduce Level 1 Counts: " + PrintList(km.reduce_l1_counter_));
            Console.Out.WriteLine("Reduce Level 2 Counts: " + PrintList(km.reduce_l2_counter_));
            Console.Out.WriteLine("Sync Level 1 Counts: " + PrintList(km.sync_l1_counter_));
            Console.Out.WriteLine("Sync Level 2 Counts: " + PrintList(km.sync_l2_counter_));
            Console.Out.WriteLine("Sync Tags: " + PrintHashSet(km.sync_tags_));
            Console.Out.WriteLine("Reduce Tags: " + PrintHashSet(km.reduce_tags_));
            Console.Out.WriteLine("Clustering Tags: " + PrintHashSet(km.clustering_tags_));
            Console.Out.Flush();
        }
    }
예제 #22
0
        public void Execute(string[] args)
        {
            using (var computation = NewComputation.FromArgs(ref args))
            {
                // establish numbers of nodes and edges from input or from defaults.
                if (args.Length == 3)
                {
                    nodeCount = Convert.ToInt32(args[1]);
                    edgeCount = Convert.ToInt32(args[2]);
                }

                // generate a random graph
                var random = new Random(0);
                var graph  = new Edge[edgeCount];
                for (int i = 0; i < edgeCount; i++)
                {
                    graph[i] = new Edge(random.Next(nodeCount), random.Next(nodeCount));
                }

                // set up the SCC computation
                var edges = computation.NewInputCollection <Edge>();

                var stopwatch = System.Diagnostics.Stopwatch.StartNew();

                var result = edges.TrimLeavesAndFlip()
                             .TrimLeavesAndFlip()
                             .SCC()
                             .Subscribe(x => Console.WriteLine("{1}\tNet edge changes within SCCs: {0}", x.Sum(y => y.weight), stopwatch.Elapsed));

                Console.WriteLine("Strongly connected components on a random graph ({0} nodes, {1} edges)", nodeCount, edgeCount);
                Console.WriteLine("Reporting the numbers of edges within SCCs (may take a moment):");

                computation.Activate();

                // input graph and wait
                if (computation.Configuration.ProcessID == 0)
                {
                    edges.OnNext(graph);
                }
                else
                {
                    edges.OnNext();
                }

                result.Sync(0);

                Console.WriteLine("Computation completed");

                // if we are up for interactive access ...
                if (computation.Configuration.Processes == 1)
                {
                    Console.WriteLine();
                    Console.WriteLine("Press [enter] repeatedly to rewire random edges in the graph. (\"done\" to exit)");

                    for (int i = 0; i < graph.Length; i++)
                    {
                        var line = Console.ReadLine();
                        if (line == "done")
                        {
                            break;
                        }
                        stopwatch.Restart();
                        var newEdge = new Edge(random.Next(nodeCount), random.Next(nodeCount));
                        Console.WriteLine("Rewiring edge: {0} -> {1}", graph[i], newEdge);
                        edges.OnNext(new[] { new Weighted <Edge>(graph[i], -1), new Weighted <Edge>(newEdge, 1) });
                        result.Sync(i + 1);
                    }
                }

                edges.OnCompleted();
                computation.Join();
            }
        }
예제 #23
0
    static void Main(string[] args)
    {
        // 1. allocate a new dataflow computation.
        using (var computation = NewComputation.FromArgs(ref args))
        {
            if (args.Length != 5)
            {
                PrintHelp();
                return;
            }
            else if (args.Length == 1)
            {
                if (args[0] == "--help" || args[0] == "-h")
                {
                    PrintHelp();
                    return;
                }
            }


            Int32 procid        = computation.Configuration.ProcessID;
            Int32 dimension     = Int32.Parse(args[0]);
            Int32 iteration_num = Int32.Parse(args[1]);
            Int64 partition_num = Int32.Parse(args[2]);
            Int64 sample_num_m  = Int64.Parse(args[3]);
            Int64 worker_num    = Int64.Parse(args[4]);

            Console.Out.WriteLine("**NOTE: Worker num should be equal to core num!");
            Console.Out.WriteLine("procid: " + procid);
            Console.Out.WriteLine("dimension: " + dimension);
            Console.Out.WriteLine("iteration_num: " + iteration_num);
            Console.Out.WriteLine("partition_num: " + partition_num);
            Console.Out.WriteLine("sample_num_m: " + sample_num_m);
            Console.Out.WriteLine("worker_num (should be core num): " + worker_num);
            Console.Out.Flush();

            LogisticRegression lr =
                new LogisticRegression(procid,
                                       dimension,
                                       iteration_num,
                                       partition_num,
                                       sample_num_m,
                                       worker_num);

            Stream <Sample, Epoch> samples = lr.GenerateSamples().AsNaiadStream(computation);

            // partition the samples based on the first element.
            samples = samples.PartitionBy(s => (int)(s[0]));


            var end_samples = samples.Iterate((lc, s) => lr.Advance(s), iteration_num, "LogisticRegression");

            var output = end_samples.Subscribe(x => {
                Console.Out.WriteLine("Final weight: " + PrintList(lr.weight_));
                Console.Out.Flush();
            });

            Console.Out.WriteLine("Before Activate!");
            Console.Out.Flush();

            // start the computation, fixing the structure of the dataflow graph.
            computation.Activate();

            Console.Out.WriteLine("After Activate!");
            Console.Out.Flush();

            // block until all work is finished.
            computation.Join();

            Console.Out.WriteLine("After Join!");
            Console.Out.WriteLine("Counter 1 from procid: " + lr.procid_ + " " + PrintList(lr.counter_1_));
            Console.Out.WriteLine("Counter 2 from procid: " + lr.procid_ + " " + PrintList(lr.counter_2_));
            Console.Out.WriteLine("Counter 3 from procid: " + lr.procid_ + " " + PrintList(lr.counter_3_));
            Console.Out.Flush();
        }
    }
예제 #24
0
        public void Execute(string[] args)
        {
            using (var computation = NewComputation.FromArgs(ref args))
            {
                // establish numbers of nodes and edges from input or from defaults.
                if (args.Length == 3)
                {
                    nodeCount = Convert.ToInt32(args[1]);
                    edgeCount = Convert.ToInt32(args[2]);
                }

                // generate a random graph
                var random = new Random(0);
                var graph  = new IntPair[edgeCount];
                for (int i = 0; i < edgeCount; i++)
                {
                    graph[i] = new IntPair(random.Next(nodeCount), random.Next(nodeCount));
                }

                // set up the CC computation
                var edges = computation.NewInputCollection <IntPair>();

                var stopwatch = System.Diagnostics.Stopwatch.StartNew();

                var colors = edges.Where(x => x.s != x.t)
                             .Color();

                var output = colors.Select(x => x.t)          // just keep the color (to count)
                             .Output
                             .Subscribe((i, l) => Console.WriteLine("Time to process: {0}", stopwatch.Elapsed));

                // set to enable a correctness test, at the cost of more memory and computation.
                var testCorrectness = false;
                if (testCorrectness)
                {
                    edges.Where(x => x.s != x.t)
                    .Join(colors, e => e.s, c => c.s, e => e.t, c => c.t, (s, t, c) => new IntPair(c, t))
                    .Join(colors, e => e.t, c => c.s, e => e.s, c => c.t, (t, s, c) => new IntPair(s, c))
                    .Where(p => p.s == p.t)
                    .Consolidate()
                    .Subscribe(l => Console.WriteLine("Coloring errors: {0}", l.Length));
                }

                Console.WriteLine("Running graph coloring on a random graph ({0} nodes, {1} edges)", nodeCount, edgeCount);
                Console.WriteLine("For each color, the nodes with that color:");

                computation.Activate();

                edges.OnNext(computation.Configuration.ProcessID == 0 ? graph : Enumerable.Empty <IntPair>());

                output.Sync(0);

                // if we are up for interactive access ...
                if (computation.Configuration.Processes == 1)
                {
                    Console.WriteLine();
                    Console.WriteLine("Next: sequentially rewiring random edges (press [enter] each time):");

                    for (int i = 0; i < graph.Length; i++)
                    {
                        Console.ReadLine();
                        stopwatch.Restart();
                        var newEdge = new IntPair(random.Next(nodeCount), random.Next(nodeCount));
                        Console.WriteLine("Rewiring edge: {0} -> {1}", graph[i], newEdge);
                        edges.OnNext(new[] { new Weighted <IntPair>(graph[i], -1), new Weighted <IntPair>(newEdge, 1) });
                        output.Sync(i + 1);
                    }
                }

                edges.OnCompleted();

                computation.Join();
            }
        }
예제 #25
0
파일: PageRank.cs 프로젝트: omidm/naiad
        public void Execute(string[] args)
        {
            using (var computation = NewComputation.FromArgs(ref args))
            {
                // either read inputs from a file, or generate them randomly.
                Stream <Edge, Epoch> edges;
                if (args.Length == 1)
                {
                    // generate a random graph in each process; pagerank computation is performed on the union of edges.
                    edges = GenerateEdges(1000000, 20000000, computation.Configuration.ProcessID).AsNaiadStream(computation);
                }
                else
                {
                    var text = args.Skip(1)
                               .AsNaiadStream(computation)
                               .Distinct()
                               .SelectMany(x => x.ReadLinesOfText());

                    edges = text.Where(x => !x.StartsWith("#"))
                            .Select(x => x.Split())
                            .Select(x => new Edge(new Node(Int32.Parse(x[0])), new Node(Int32.Parse(x[1]))));
                }

                Console.Out.WriteLine("Started up!");
                Console.Out.Flush();

                edges = edges.PartitionBy(x => x.source);

                // capture degrees before trimming leaves.
                var degrees = edges.Select(x => x.source)
                              .CountNodes();

                // removes edges to pages with zero out-degree.
                var trim = false;
                if (trim)
                {
                    edges = edges.Select(x => x.target.WithValue(x.source))
                            .FilterBy(degrees.Select(x => x.node))
                            .Select(x => new Edge(x.value, x.node));
                }

                // initial distribution of ranks.
                var start = degrees.Select(x => x.node.WithValue(0.15f))
                            .PartitionBy(x => x.node.index);

                // define an iterative pagerank computation, add initial values, aggregate up the results and print them to the screen.
                var iterations = 10;
                var ranks      = start.IterateAndAccumulate((lc, deltas) => deltas.PageRankStep(lc.EnterLoop(degrees),
                                                                                                lc.EnterLoop(edges)),
                                                            x => x.node.index,
                                                            iterations,
                                                            "PageRank")
                                 .Concat(start)                             // add initial ranks in for correctness.
                                 .NodeAggregate((x, y) => x + y)            // accumulate up the ranks.
                                 .Where(x => x.value > 0.0f);               // report only positive ranks.

                var stopwatch = System.Diagnostics.Stopwatch.StartNew();
                computation.OnFrontierChange += (x, y) => { Console.WriteLine(stopwatch.Elapsed + "\t" + string.Join(", ", y.NewFrontier)); Console.Out.Flush(); };

                // start computation, and block until completion.
                computation.Activate();
                computation.Join();
            }
        }
예제 #26
0
파일: Program.cs 프로젝트: ggevay/cfl-naiad
        public static void Main(string[] args)
        {
            using (var computation = NewComputation.FromArgs(ref args))
            {
                Console.WriteLine("computation.Configuration.WorkerCount (--threads): " + computation.Configuration.WorkerCount);

                int numDays = int.Parse(args [1]);                  // This has to be inside the using, because the FromArgs call removes the Naiad-specific arguments from args
                Console.WriteLine("numDays: " + numDays);

                //Console.WriteLine ("computation.Configuration.ProcessID: " + computation.Configuration.ProcessID);

                var initYesterdayCounts     = Enumerable.Empty <Pair <int, int> >().AsNaiadStream(computation);
                Stream <int, Epoch> dayInit = null;
                if (computation.Configuration.ProcessID == 0)
                {
                    dayInit = new[] { 1 }.AsNaiadStream(computation);
                }
                else
                {
                    dayInit = Enumerable.Empty <int>().AsNaiadStream(computation);
                }

                initYesterdayCounts.Iterate((lc, yesterdayCounts) => {
                    var dayDelayed = lc.Delay <int>(numDays - 1);
                    var dayIngress = lc.EnterLoop(dayInit);
                    var dayHead    = dayIngress.Concat(dayDelayed.Output);

                    var dayTail = dayHead.Select(x => x + 1);

                    dayDelayed.Input = dayTail;



                    //var visits = day.SelectMany(x => ("/home/ggevay/Dropbox/cfl_testdata/ClickCount/in/clickLog_" + x).ReadLinesOfText());
                    var visits = dayHead.PartitionBy(x => x).SelectMany(x => (args[0] + x).ReadLinesOfText());
                    //var uri = day.Select(x => new Uri("hdfs://cloud-11:44000/user/ggevay/ClickCountGenerated/0.05/25000000/in/clickLog_" + x));
                    //var visits = uri.FromHdfsText();



                    visits = visits.PartitionBy(x => x);

                    var todayCounts = visits                                                                                         //Synchronize(x => true)
                                      .Select(x => x.PairWith(1))
                                      .Aggregate(p => p.First, p => p.Second, (x, y) => x + y, (key, state) => key.PairWith(state)); //.Synchronize(x => true);



                    var summed = todayCounts                                                                                 //.Synchronize(x => true)
                                 .Join(yesterdayCounts, x => x.First, x => x.First, (x, y) => Math.Abs(x.Second - y.Second)) //.Synchronize(x => true)
                                 .Aggregate <int, int, int, int, IterationIn <Epoch> >(x => 0, x => x, (x, y) => x + y, (key, state) => state, true);

                    lc.ExitLoop(summed).Subscribe(x =>
                    {
                        foreach (var line in x)
                        {
                            Console.WriteLine(line);
                        }
                    });


                    return(todayCounts);                   //.Synchronize(x => true);
                },
                                            numDays - 1,
                                            "ClickCount iteration");

                computation.Activate();
                computation.Join();

                if (computation.Configuration.ProcessID == 0)
                {
                    Console.WriteLine("Computation finished");
                }
            }
        }
예제 #27
0
        public void Execute(string[] args)
        {
            // the first thing to do is to allocate a computation from args.
            using (var computation = NewComputation.FromArgs(ref args))
            {
                // loading data
                if (args.Length < 5)
                {
                    Console.WriteLine("usage: Examples.exe terasort <inputDir> <numFiles> <outputDir> <numWorkers=6>");
                }
                String inputDir         = args[1];
                int    numFiles         = Int32.Parse(args[2]);
                string outputPathFormat = args[3] + "{0}";
                int    numWorkers       = Int32.Parse(args[4]);

                var stopwatch = System.Diagnostics.Stopwatch.StartNew();
                var text      = loadDiskFiles(computation, inputDir, numFiles);
                //text.Subscribe(l => {
                //    foreach (var v in l) Console.WriteLine("key is " + v.First + " , value is " + v.Second);
                //});

                // computation
                TeraSortPartitioner partitioner = new TeraSortPartitioner(computation.Configuration.Processes * numWorkers);
                //var result = text.TeraSort(partitioner);

                var result = text.PartitionBy(x => partitioner.getPartition(x.First)).TeraSort();


                //Console.WriteLine("max: " + partitioner.max);
                //Console.WriteLine("min: " + partitioner.min);
                //Console.WriteLine("rangePart: " + partitioner.rangePerPart);

                //char[] array = new char[4];
                //array[0] = 'h';
                //array[1] = 'e';
                //array[2] = 'l';
                //array[3] = 'o';

                //string testString = new string(array, 0, 3);
                //Console.WriteLine("testString: " + testString);
                //Console.WriteLine("2n pos: " + (int)testString[1]);

                //var result = text.PartitionBy(x => x.First.GetHashCode() ).TeraSort();


                //long keyLongValue = 0;
                //long index = 1;
                //for (int i = 6; i >= 0; --i)
                //{
                //    keyLongValue += index * 255;
                //    index *= 256;
                //}
                //Console.WriteLine("max, calculated: " + keyLongValue);

                // string testStr = "00000000";
                // Byte[] keyBytes = System.Text.Encoding.Default.GetBytes(testStr.Substring(0, 8));
                //// for (int i = 0; i < 7; ++i) keyBytes[i] = 0;
                // keyBytes[7] = 0;
                // Array.Reverse(keyBytes);
                // long keyLongValue = BitConverter.ToInt64(keyBytes, 0);
                // Console.WriteLine("keyLongValue: " + keyLongValue);
                // if (keyLongValue < 0) Console.WriteLine("Wrong !!!!!");



                //result.Subscribe(l =>
                //{
                //    foreach (var v in l) Console.WriteLine("key is " + v.First + " , value is " + v.Second);
                //});
                computation.OnFrontierChange += (x, y) => { Console.WriteLine(stopwatch.Elapsed + "\t" + string.Join(", ", y.NewFrontier)); Console.Out.Flush(); };
                //long max = BitConverter.ToInt64(new Byte[] { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 }, 0);
                //Console.WriteLine("long max equal to " + max);

                //dumping
                // Action< Pair<byte[], byte[]> , System.IO.BinaryWriter > writer = writePair;
                //result.WriteToFiles<Pair<byte[], byte[]>>(outputPathFormat, writer);
                computation.Activate();
                computation.Join();
            }
        }
예제 #28
0
        static void ExecuteNaiad(string[] args, string dataDir, string uriBase)
        {
            string ukFile          = Path.Combine(dataDir, @"uk-2007-05");
            string twitterFile     = Path.Combine(dataDir, @"twitter_rv.bin");
            string livejournalFile = Path.Combine(dataDir, @"livejournal.bin");

            var configuration = Configuration.FromArgs(ref args);

            var algorithm = args[1];
            var dataset   = args[2];

            #region file partitioning
            if (algorithm == "partition" && dataset == "twitter")
            {
                var stopwatch = System.Diagnostics.Stopwatch.StartNew();

                using (var computation = NewComputation.FromConfig(configuration))
                {
                    int parts  = Int32.Parse(args[3]);
                    var format = Path.Combine(dataDir, @"twitter-part-{0}-of-" + (parts * parts).ToString());

                    computation.LoadGraph(twitterFile)
                    .Partition(parts, parts)
                    .WriteBinaryToFiles(format);

                    computation.Activate();
                    computation.Join();
                }

                Console.WriteLine(stopwatch.Elapsed);
            }

            if (algorithm == "repartition" && dataset == "twitter")
            {
                var stopwatch = System.Diagnostics.Stopwatch.StartNew();

                using (var computation = NewComputation.FromConfig(configuration))
                {
                    int parts = Int32.Parse(args[3]);

                    computation.ReadHdfsBinaryCollection <Edge>(new Uri(uriBase + "twitter-10"))
                    .Partition(parts, parts)
                    .WriteHdfsBinary(new Uri(uriBase + "twitter-" + parts), 1024 * 1024, -1L, 100L * 1024L * 1024L * 1024L);

                    computation.Activate();
                    computation.Join();
                }

                Console.WriteLine(stopwatch.Elapsed);
            }

            if (algorithm == "compact" && dataset == "twitter")
            {
                using (var computation = NewComputation.FromConfig(configuration))
                {
                    var edges = System.IO.File.OpenRead(twitterFile)
                                .ReadEdges()
                                .AsNaiadStream(computation);

                    using (var renamer = new AutoRenamer <Int32>())
                    {
                        var newEdges = edges.RenameUsing(renamer, edge => edge.source)
                                       .Select(x => new Edge(x.node, x.value.target))
                                       .RenameUsing(renamer, edge => edge.target)
                                       .Select(x => new Edge(x.value.source, x.node));

                        edges = newEdges.FinishRenaming(renamer);
                    }

                    computation.Activate();
                    computation.Join();
                }
            }
            #endregion

            #region page rank
            if (algorithm == "pagerank" && dataset == "twitter")
            {
                var stopwatch = System.Diagnostics.Stopwatch.StartNew();

                using (var computation = NewComputation.FromConfig(configuration))
                {
                    computation.OnFrontierChange += (x, y) => { Console.WriteLine(System.DateTime.Now + "\t" + string.Join(", ", y.NewFrontier)); System.GC.GetTotalMemory(true); };

                    var edges = System.IO.File.OpenRead(twitterFile)
                                .ReadEdges()
                                .AsNaiadStream(computation);

                    edges.PageRank(20, "twitter").Subscribe();

                    computation.Activate();
                    computation.Join();
                }

                Console.WriteLine(stopwatch.Elapsed);
            }

            if (algorithm == "pagerank" && dataset == "livejournal")
            {
                var stopwatch = System.Diagnostics.Stopwatch.StartNew();

                using (var computation = NewComputation.FromConfig(configuration))
                {
                    computation.OnFrontierChange += (x, y) => { Console.WriteLine(System.DateTime.Now + "\t" + string.Join(", ", y.NewFrontier)); };

                    var edges = System.IO.File.OpenRead(livejournalFile)
                                .ReadEdges()
                                .AsNaiadStream(computation);

                    edges.PageRank(20, "livejournal").Subscribe();

                    computation.Activate();
                    computation.Join();
                }

                Console.WriteLine(stopwatch.Elapsed);
            }
            #endregion

            #region connected components
            if (algorithm == "connectedcomponents" && dataset == "uk-2007-05")
            {
                var stopwatch = System.Diagnostics.Stopwatch.StartNew();

                using (var computation = NewComputation.FromConfig(configuration))
                {
                    var format = Path.Combine(dataDir, @"uk-2007-05-part-{0}-of-{1}");

                    var extraInput = new[] { string.Format(format, 3, 4) }.AsNaiadStream(computation)
                    .PartitionBy(x => 3)
                    .ReadGraph();

                    computation.LoadGraph(format, 3, 4)
                    .UnionFind(106000000)
                    .PartitionBy(x => 3)
                    .Concat(extraInput)
                    .UnionFind(106000000);

                    computation.Activate();
                    computation.Join();
                }

                Console.WriteLine(stopwatch.Elapsed);
            }

            if (algorithm == "connectedcomponents" && dataset == "twitter")
            {
                using (Microsoft.Research.Peloponnese.Hdfs.HdfsInstance hdfs = new Microsoft.Research.Peloponnese.Hdfs.HdfsInstance(new Uri(uriBase)))
                {
                    // HDFS needs to be initialized from the main thread before distributed use
                    bool exists = hdfs.IsFileExists("/dummy");
                }

                var readWatch = System.Diagnostics.Stopwatch.StartNew();

                using (var controller = NewController.FromConfig(configuration))
                {
                    using (var readComputation = controller.NewComputation())
                    {
                        int parts      = (args.Length > 4) ? Int32.Parse(args[4]) : 1;
                        int machines   = (args.Length > 5) ? Int32.Parse(args[5]) : 1;
                        int another    = (args.Length > 6) ? Int32.Parse(args[6]) : 1;
                        var format     = new Uri(@uriBase + "twitter-40");
                        var collection = readComputation
                                         .ReadHdfsBinaryCollection <Edge>(format);

                        Stream <int[], Epoch> readStuff = null;

                        switch (args[3])
                        {
                        case "sp":
                            readStuff = collection.GroupEdgesSingleProcess(parts, parts);
                            break;

                        case "pp":
                            readStuff = collection.GroupEdgesPartsPerProcess(parts, parts, 16);
                            break;

                        case "op":
                            readStuff = collection.GroupEdgesOnePerProcess(parts, parts, 16);
                            break;

                        case "hp":
                            readStuff = collection.GroupEdgesHierarchyPerProcess(parts, machines, 16);
                            break;

                        case "hhp":
                            readStuff = collection.GroupEdgesHierarchyPerProcess(parts, machines * another, 16);
                            break;

                        default:
                            throw new ApplicationException("Grouping type must be sp, pp, op, hp or hpp");
                        }

                        var sink = new InterGraphDataSink <int[]>(readStuff);

                        readComputation.Activate();
                        readComputation.Join();

                        Console.WriteLine("Reading done: " + readWatch.Elapsed);

                        for (int i = 0; i < 20; ++i)
                        {
                            var stopwatch = System.Diagnostics.Stopwatch.StartNew();

                            using (var computation = controller.NewComputation())
                            {
                                var firstStage = computation.NewInput(sink.NewDataSource())
                                                 .ReformatInts();

                                if (parts * machines * another > 1)
                                {
                                    firstStage = firstStage
                                                 .UnionFindStruct(65000000, parts * machines * another, machines * another);
                                }

                                switch (args[3])
                                {
                                case "sp":
                                    firstStage
                                    .PartitionBy(x => parts * parts)
                                    .UnionFind(65000000);
                                    break;

                                case "pp":
                                    firstStage
                                    .PartitionBy(x => 16 * parts)
                                    .UnionFind(65000000);
                                    break;

                                case "op":
                                    firstStage
                                    .PartitionBy(x => 16 * (parts * parts))
                                    .UnionFind(65000000);
                                    break;

                                case "hp":
                                    if (parts * parts < 16)
                                    {
                                        firstStage
                                        .PartitionBy(x => 16 * x.destination + (parts * parts))
                                        .UnionFindStruct(65000000, 0, 0)
                                        .PartitionBy(x => 16 * (machines * machines))
                                        .UnionFind(65000000);
                                    }
                                    else
                                    {
                                        firstStage
                                        .PartitionBy(x => 16 * (x.destination + (machines * machines)))
                                        .UnionFindStruct(65000000, 0, 0)
                                        .PartitionBy(x => 16 * ((machines * machines) + (machines * machines)))
                                        .UnionFind(65000000);
                                    }
                                    break;

                                case "hhp":
                                    firstStage
                                    .PartitionBy(x => 16 * ((x.destination / (machines * machines)) + (machines * machines * another * another)) + (x.destination % (machines * machines)))
                                    .UnionFindStruct(65000000, -machines * another, another)
                                    .PartitionBy(x => 16 * (x.destination + (another * another) + (machines * machines * another * another)))
                                    .UnionFindStruct(65000000, -another, 1)
                                    .PartitionBy(x => 16 * ((another * another) + (another * another) + (machines * machines * another * another)))
                                    .UnionFind(65000000);
                                    break;

                                default:
                                    throw new ApplicationException("Grouping type must be sp, pp, op, hp or hhp");
                                }

                                computation.Activate();
                                computation.Join();
                            }

                            Console.WriteLine(stopwatch.Elapsed);
                        }
                    }

                    controller.Join();
                }
            }

            if (algorithm == "hashtablecc" && dataset == "twitter")
            {
                using (Microsoft.Research.Peloponnese.Hdfs.HdfsInstance hdfs = new Microsoft.Research.Peloponnese.Hdfs.HdfsInstance(new Uri(uriBase)))
                {
                    // HDFS needs to be initialized from the main thread before distributed use
                    bool exists = hdfs.IsFileExists("/dummy");
                }

                var readWatch = System.Diagnostics.Stopwatch.StartNew();

                using (var controller = NewController.FromConfig(configuration))
                {
                    using (var readComputation = controller.NewComputation())
                    {
                        int parts      = (args.Length > 4) ? Int32.Parse(args[4]) : 1;
                        int machines   = (args.Length > 5) ? Int32.Parse(args[5]) : 1;
                        int another    = (args.Length > 6) ? Int32.Parse(args[6]) : 1;
                        var format     = new Uri(@uriBase + "twitter-40");
                        var collection = readComputation
                                         .ReadHdfsBinaryCollection <Edge>(format);

                        Stream <int[], Epoch> readStuff = null;

                        switch (args[3])
                        {
                        case "sp":
                            readStuff = collection.GroupEdgesSingleProcess(parts, parts);
                            break;

                        case "pp":
                            readStuff = collection.GroupEdgesPartsPerProcess(parts, parts, 16);
                            break;

                        case "op":
                            readStuff = collection.GroupEdgesOnePerProcess(parts, parts, 16);
                            break;

                        case "hp":
                            readStuff = collection.GroupEdgesHierarchyPerProcess(parts, machines, 16);
                            break;

                        case "hhp":
                            readStuff = collection.GroupEdgesHierarchyPerProcess(parts, machines * another, 16);
                            break;

                        default:
                            throw new ApplicationException("Grouping type must be sp, pp, op, hp or hpp");
                        }

                        var sink = new InterGraphDataSink <int[]>(readStuff);

                        readComputation.Activate();
                        readComputation.Join();

                        Console.WriteLine("Reading done: " + readWatch.Elapsed);

                        for (int i = 0; i < 20; ++i)
                        {
                            var stopwatch = System.Diagnostics.Stopwatch.StartNew();

                            using (var computation = controller.NewComputation())
                            {
                                var firstStage = computation.NewInput(sink.NewDataSource())
                                                 .ReformatInts()
                                                 .UnionFindHashTable(65000000, parts * machines * another, machines * another);

                                switch (args[3])
                                {
                                case "sp":
                                    firstStage
                                    .PartitionBy(x => parts * parts)
                                    .UnionFind(65000000);
                                    break;

                                case "pp":
                                    firstStage
                                    .PartitionBy(x => 16 * parts)
                                    .UnionFind(65000000);
                                    break;

                                case "op":
                                    firstStage
                                    .PartitionBy(x => 16 * (parts * parts))
                                    .UnionFind(65000000);
                                    break;

                                case "hp":
                                    if (parts * parts < 16)
                                    {
                                        firstStage
                                        .PartitionBy(x => 16 * x.destination + (parts * parts))
                                        .UnionFindStruct(65000000, 0, 0)
                                        .PartitionBy(x => 16 * (machines * machines))
                                        .UnionFind(65000000);
                                    }
                                    else
                                    {
                                        firstStage
                                        .PartitionBy(x => 16 * (x.destination + (machines * machines)))
                                        .UnionFindStruct(65000000, 0, 0)
                                        .PartitionBy(x => 16 * ((machines * machines) + (machines * machines)))
                                        .UnionFind(65000000);
                                    }
                                    break;

                                case "hhp":
                                    firstStage
                                    .PartitionBy(x => 16 * ((x.destination / (machines * machines)) + (machines * machines * another * another)) + (x.destination % (machines * machines)))
                                    .UnionFindStruct(65000000, -machines * another, another)
                                    .PartitionBy(x => 16 * (x.destination + (another * another) + (machines * machines * another * another)))
                                    .UnionFindStruct(65000000, -another, 1)
                                    .PartitionBy(x => 16 * ((another * another) + (another * another) + (machines * machines * another * another)))
                                    .UnionFind(65000000);
                                    break;

                                default:
                                    throw new ApplicationException("Grouping type must be sp, pp, op, hp or hpp");
                                }

                                computation.Activate();
                                computation.Join();
                            }

                            Console.WriteLine(stopwatch.Elapsed);
                        }
                    }

                    controller.Join();
                }
            }

            if (algorithm == "hashtableonlycc" && dataset == "twitter")
            {
                using (Microsoft.Research.Peloponnese.Hdfs.HdfsInstance hdfs = new Microsoft.Research.Peloponnese.Hdfs.HdfsInstance(new Uri(uriBase)))
                {
                    // HDFS needs to be initialized from the main thread before distributed use
                    bool exists = hdfs.IsFileExists("/dummy");
                }

                var readWatch = System.Diagnostics.Stopwatch.StartNew();

                using (var controller = NewController.FromConfig(configuration))
                {
                    using (var readComputation = controller.NewComputation())
                    {
                        int parts      = (args.Length > 4) ? Int32.Parse(args[4]) : 1;
                        int machines   = (args.Length > 5) ? Int32.Parse(args[5]) : 1;
                        int another    = (args.Length > 6) ? Int32.Parse(args[6]) : 1;
                        var format     = new Uri(@uriBase + "twitter-40");
                        var collection = readComputation
                                         .ReadHdfsBinaryCollection <Edge>(format);

                        Stream <int[], Epoch> readStuff = null;

                        switch (args[3])
                        {
                        case "sp":
                            readStuff = collection.GroupEdgesSingleProcess(parts, parts);
                            break;

                        case "pp":
                            readStuff = collection.GroupEdgesPartsPerProcess(parts, parts, 16);
                            break;

                        case "op":
                            readStuff = collection.GroupEdgesOnePerProcess(parts, parts, 16);
                            break;

                        case "hp":
                            readStuff = collection.GroupEdgesHierarchyPerProcess(parts, machines, 16);
                            break;

                        case "hhp":
                            readStuff = collection.GroupEdgesHierarchyPerProcess(parts, machines * another, 16);
                            break;

                        default:
                            throw new ApplicationException("Grouping type must be sp, pp, op, hp or hpp");
                        }

                        var sink = new InterGraphDataSink <int[]>(readStuff);

                        readComputation.Activate();
                        readComputation.Join();

                        Console.WriteLine("Reading done: " + readWatch.Elapsed);

                        for (int i = 0; i < 20; ++i)
                        {
                            var stopwatch = System.Diagnostics.Stopwatch.StartNew();

                            using (var computation = controller.NewComputation())
                            {
                                var firstStage = computation.NewInput(sink.NewDataSource())
                                                 .ReformatInts();

                                if (parts * machines * another > 1)
                                {
                                    firstStage = firstStage
                                                 .UnionFindHashTable(65000000, parts * machines * another, machines * another);
                                }

                                switch (args[3])
                                {
                                case "sp":
                                    firstStage
                                    .PartitionBy(x => parts * parts)
                                    .UnionFindHashTable(65000000);
                                    break;

                                case "pp":
                                    firstStage
                                    .PartitionBy(x => 16 * parts)
                                    .UnionFindHashTable(65000000);
                                    break;

                                case "op":
                                    firstStage
                                    .PartitionBy(x => 16 * (parts * parts))
                                    .UnionFindHashTable(65000000);
                                    break;

                                case "hp":
                                    if (parts * parts < 16)
                                    {
                                        firstStage
                                        .PartitionBy(x => 16 * x.destination + (parts * parts))
                                        .UnionFindHashTable(65000000, 0, 0)
                                        .PartitionBy(x => 16 * (machines * machines))
                                        .UnionFindHashTable(65000000);
                                    }
                                    else
                                    {
                                        firstStage
                                        .PartitionBy(x => 16 * (x.destination + (machines * machines)))
                                        .UnionFindHashTable(65000000, 0, 0)
                                        .PartitionBy(x => 16 * ((machines * machines) + (machines * machines)))
                                        .UnionFindHashTable(65000000);
                                    }
                                    break;

                                case "hhp":
                                    firstStage
                                    .PartitionBy(x => 16 * ((x.destination / (machines * machines)) + (machines * machines * another * another)) + (x.destination % (machines * machines)))
                                    .UnionFindHashTable(65000000, -machines * another, another)
                                    .PartitionBy(x => 16 * (x.destination + (another * another) + (machines * machines * another * another)))
                                    .UnionFindHashTable(65000000, -another, 1)
                                    .PartitionBy(x => 16 * ((another * another) + (another * another) + (machines * machines * another * another)))
                                    .UnionFindHashTable(65000000);
                                    break;

                                default:
                                    throw new ApplicationException("Grouping type must be sp, pp, op, hp or hpp");
                                }

                                computation.Activate();
                                computation.Join();
                            }

                            Console.WriteLine(stopwatch.Elapsed);
                        }
                    }

                    controller.Join();
                }
            }


            if (algorithm == "connectedcomponents" && dataset == "livejournal")
            {
                var stopwatch = System.Diagnostics.Stopwatch.StartNew();

                using (var computation = NewComputation.FromConfig(configuration))
                {
                    var edges = System.IO.File.OpenRead(livejournalFile)
                                .ReadEdges()
                                .AsNaiadStream(computation);

                    edges.UnionFind(5000000)
                    .PartitionBy(x => 0)
                    .UnionFind(5000000);

                    computation.Activate();
                    computation.Join();
                }

                Console.WriteLine(stopwatch.Elapsed);
            }
            #endregion
        }
예제 #29
0
파일: SearchIndex.cs 프로젝트: xyuan/Naiad
        public void Execute(string[] args)
        {
            int documentCount = 100000;
            int vocabulary    = 100000;
            int batchSize     = 10000;
            int iterations    = 10;

            using (var computation = NewComputation.FromArgs(ref args))
            {
                #region building up input data

                if (args.Length == 5)
                {
                    documentCount = Convert.ToInt32(args[1]);
                    vocabulary    = Convert.ToInt32(args[2]);
                    batchSize     = Convert.ToInt32(args[3]);
                    iterations    = Convert.ToInt32(args[4]);
                }

                var             random = new Random(0);
                List <Document> docs   = Enumerable.Range(0, documentCount)
                                         .Select(i => new Document(Enumerable.Range(0, 10)
                                                                   .Select(j => String.Format("{0}", random.Next(vocabulary)))
                                                                   .Aggregate((x, y) => x + " " + y), i)).ToList <Document>();

                List <Query>[] queryBatches = new List <Query> [iterations];

                for (int i = 0; i < iterations; i++)
                {
                    queryBatches[i] = Enumerable.Range(i * batchSize, batchSize)
                                      .Select(j => new Query(String.Format("{0}", j % vocabulary), j, 1))
                                      .ToList();
                }

                #endregion

                // declare inputs for documents and queries.
                var documents = computation.NewInputCollection <Document>();
                var queries   = computation.NewInputCollection <Query>();

                // each document is broken down into a collection of terms, each with associated identifier.
                var dTerms = documents.SelectMany(doc => doc.text.Split(' ').Select(term => new Document(term, doc.id)))
                             .Distinct();

                // each query is broken down into a collection of terms, each with associated identifier and threshold.
                var qTerms = queries.SelectMany(query => query.text.Split(' ').Select(term => new Query(term, query.id, query.threshold)))
                             .Distinct();

                // doc terms and query terms are joined, matching pairs are counted and returned if the count exceeds the threshold.
                var results = dTerms.Join(qTerms, d => d.text, q => q.text, (d, q) => new Match(d.id, q.id, q.threshold))
                              .Count(match => match)
                              .Select(pair => new Match(pair.First.document, pair.First.query, pair.First.threshold - (int)pair.Second))
                              .Where(match => match.threshold <= 0)
                              .Select(match => new Pair <int, int>(match.document, match.query));

                // subscribe to the output in case we are interested in the results
                var subscription = results.Subscribe(list => Console.WriteLine("matches found: {0}", list.Length));

                computation.Activate();

                #region Prepare some fake documents to put in the collection

                // creates many documents each containing 10 words from [0, ... vocabulary-1].
                int share_size = docs.Count / computation.Configuration.Processes;

                documents.OnNext(docs.GetRange(computation.Configuration.ProcessID * share_size, share_size));
                queries.OnNext();

                //Console.WriteLine("Example SearchIndex in Naiad. Step 1: indexing documents, step 2: issuing queries.");
                Console.WriteLine("Indexing {0} random documents, {1} terms (please wait)", documentCount, 10 * documentCount);
                subscription.Sync(0);

                #endregion

                #region Issue batches of queries and assess performance

                if (computation.Configuration.ProcessID == 0)
                {
                    Console.WriteLine("Issuing {0} rounds of batches of {1} queries (press [enter] to start)", iterations, batchSize);
                    Console.ReadLine();
                }

                for (int i = 0; i < iterations; i++)
                {
                    // we round-robin through query terms. more advanced queries are possible.
                    if (computation.Configuration.ProcessID == 0)
                    {
                        queries.OnNext(queryBatches[i]); // introduce new queries.
                    }
                    else
                    {
                        queries.OnNext();
                    }

                    documents.OnNext();          // indicate no new docs.
                    subscription.Sync(i + 1);    // block until round is done.
                }

                documents.OnCompleted();
                queries.OnCompleted();

                #endregion

                computation.Join();
            }
        }