Exemplo n.º 1
0
        public void Execute(string[] args)
        {
            // allocate a new controller from command line arguments.
            using (var controller = NewController.FromArgs(ref args))
            {
                var nodeCount = args.Length == 2 ? Convert.ToInt32(args[0]) : 100000;
                var edgeCount = args.Length == 2 ? Convert.ToInt32(args[1]) : 200000;

                #region Generate a local fraction of input data

                var random        = new Random(0);
                var processes     = controller.Configuration.Processes;
                var graphFragment = new Pair <int, int> [edgeCount / processes];
                for (int i = 0; i < graphFragment.Length; i++)
                {
                    graphFragment[i] = new Pair <int, int>(random.Next(nodeCount), random.Next(nodeCount));
                }

                #endregion

                Console.WriteLine("Computing components of a random graph on {0} nodes and {1} edges", nodeCount, edgeCount);

                // allocate a new graph manager for the computation.
                using (var manager = controller.NewGraph())
                {
                    // convert array of edges to single-epoch stream.
                    var edges = graphFragment.AsNaiadStream(manager)
                                .Synchronize();

                    // symmetrize the graph by adding in transposed edges.
                    edges = edges.Select(x => new Pair <int, int>(x.v2, x.v1))
                            .Concat(edges);

                    edges.DirectedReachability()
                    .Subscribe(list => Console.WriteLine("labeled {0} nodes", list.Count()));

                    manager.Activate();     // start graph computation
                    manager.Join();         // block until computation completes
                }

                controller.Join();
            }
        }
Exemplo n.º 2
0
        /// <summary>
        /// Executes a word counting Naiad program.
        /// </summary>
        /// <param name="config">Naiad controller configuration</param>
        /// <param name="args">Remaining arguments</param>
        public void Execute(string[] args)
        {
            // first, construct a Naiad controller.
            using (var controller = NewController.FromArgs(ref args))
            {
                using (var graph = controller.NewGraph())
                {
                    // create an incrementally updateable collection
                    var text = new IncrementalCollection <string>(graph);//.NewInput<string>();

                    // segment strings, count, and print
                    text.SelectMany(x => x.Split(' '))
                    .Count(y => y, (k, c) => k + ":" + c)       // yields "word:count" for each word
                    .Subscribe(l => { foreach (var element in l)
                                      {
                                          Console.WriteLine(element);
                                      }
                               });

                    graph.Activate();

                    Console.WriteLine("Start entering lines of text. An empty line will exit the program.");
                    Console.WriteLine("Naiad will display counts (and changes in counts) of words you type.");

                    var line = Console.ReadLine();
                    for (int i = 0; line != ""; i++)
                    {
                        text.OnNext(line);
                        graph.Sync(i);
                        line = Console.ReadLine();
                    }

                    text.OnCompleted(); // closes input
                    graph.Join();
                }

                controller.Join();  // blocks until flushed
            }
        }
Exemplo n.º 3
0
        public void Execute(string[] args)
        {
            using (var controller = NewController.FromArgs(ref args))
            {
                // establish numbers of nodes and edges from input or from defaults.
                if (args.Length == 3)
                {
                    nodeCount = Convert.ToInt32(args[1]);
                    edgeCount = Convert.ToInt32(args[2]);
                }

                // generate a random graph
                var random = new Random(0);
                var graph  = new Edge[edgeCount];
                for (int i = 0; i < edgeCount; i++)
                {
                    graph[i] = new Edge(random.Next(nodeCount), random.Next(nodeCount));
                }

                using (var manager = controller.NewGraph())
                {
                    // set up the CC computation
                    var edges = new IncrementalCollection <Edge>(manager);//.NewInput<Edge>();

                    var stopwatch = System.Diagnostics.Stopwatch.StartNew();

                    var result = edges.TrimLeavesAndFlip()
                                 .TrimLeavesAndFlip()
                                 .SCC()
                                 .Subscribe(x => Console.WriteLine("{1}\tNet edge changes within SCCs: {0}", x.Sum(y => y.weight), stopwatch.Elapsed));

                    Console.WriteLine("Strongly connected components on a random graph ({0} nodes, {1} edges)", nodeCount, edgeCount);
                    Console.WriteLine("Reporting the numbers of edges within SCCs (may take a moment):");

                    manager.Activate();

                    // input graph and wait
                    if (controller.Configuration.ProcessID == 0)
                    {
                        edges.OnNext(graph);
                    }
                    else
                    {
                        edges.OnNext();
                    }

                    result.Sync(0);

                    Console.WriteLine("Computation completed");

                    // if we are up for interactive access ...
                    if (controller.Configuration.Processes == 1)
                    {
                        Console.WriteLine();
                        Console.WriteLine("Press [enter] repeatedly to rewire random edges in the graph. (\"done\" to exit)");

                        for (int i = 0; i < graph.Length; i++)
                        {
                            var line = Console.ReadLine();
                            if (line == "done")
                            {
                                break;
                            }
                            stopwatch.Restart();
                            var newEdge = new Edge(random.Next(nodeCount), random.Next(nodeCount));
                            Console.WriteLine("Rewiring edge: {0} -> {1}", graph[i], newEdge);
                            edges.OnNext(new[] { new Weighted <Edge>(graph[i], -1), new Weighted <Edge>(newEdge, 1) });
                            result.Sync(i + 1);
                        }
                    }

                    edges.OnCompleted();
                    manager.Join();
                }

                controller.Join();
            }
        }
Exemplo n.º 4
0
        public static void Main(string[] args)
        {
            using (var controller = NewController.FromArgs(ref args))
            {
                if (args.Length < 3)
                {
                    Console.Error.WriteLine("Usage: results_container_name node_count edge_count");
                    System.Environment.Exit(0);
                }

                string containerName = args[0];
                int    nodeCount     = Convert.ToInt32(args[1]);
                int    edgeCount     = Convert.ToInt32(args[2]);

                // For Azure storage emulator:
                string connectionString = "UseDevelopmentStorage=true;";

                // To use Windows Azure Storage, uncomment this line, and substitute your account name and key:
                // string connectionString = "DefaultEndpointsProtocol=https;AccountName=[Account name];AccountKey=[Account key]";

                Console.Error.WriteLine("Running connected components on a random graph ({0} nodes, {1} edges).", nodeCount, edgeCount);
                if (connectionString != null)
                {
                    Console.Error.WriteLine("Writing results to container {0}", containerName);
                }
                else
                {
                    Console.Error.WriteLine("Writing results to console");
                }

                // generate a random graph
                var random = new Random(0);
                var graph  = new IntPair[edgeCount];
                for (int i = 0; i < edgeCount; i++)
                {
                    graph[i] = new IntPair(random.Next(nodeCount), random.Next(nodeCount));
                }

                var stopwatch = System.Diagnostics.Stopwatch.StartNew();

                using (var computation = controller.NewComputation())
                {
                    // set up the CC computation
                    var edges = computation.NewInputCollection <IntPair>();

                    // no prioritization; uncomment this and comment the next for slowness!
                    // Func<IntPair, int> priorityFunction = node => 0;

                    // Introduce labels in priority order. Labels 0 through 9 inclusive are introduced sequentially,
                    // following by exponentially-growing sets of labels.
                    Func <IntPair, int> priorityFunction = node => 65536 * (node.t < 10 ? node.t : 10 + Convert.ToInt32(Math.Log(1 + node.t) / Math.Log(2.0)));

                    // Perform the connected components algorithm on the collection of edges.
                    var labeledVertices = edges.ConnectedComponents(priorityFunction);

                    // Count the number of vertices in each component.
                    var componentSizes = labeledVertices.Count(n => n.t, (l, c) => new Pair <int, long>(l, c)); // counts results with each label

                    // Ignore the labels and consolidate to find the number of components having each size.
                    var sizeDistribution = componentSizes.Select(x => x.Second).Consolidate();

                    if (connectionString != null)
                    {
                        var account   = CloudStorageAccount.Parse(connectionString);
                        var container = account.CreateCloudBlobClient().GetContainerReference(containerName);
                        container.CreateIfNotExists();

                        // Write the results to the given Azure blob container, with filename "componentSizes-part-i" for process i.
                        sizeDistribution.Output
                        .WriteTextToAzureBlobs(container, "componentSizes-part-{0}-{1}");
                    }
                    else
                    {
                        // Write the results to the console.
                        sizeDistribution.Subscribe(xs => { foreach (var x in xs)
                                                           {
                                                               Console.WriteLine(x);
                                                           }
                                                   });
                    }

                    computation.Activate();

                    edges.OnCompleted(controller.Configuration.ProcessID == 0 ? graph : Enumerable.Empty <IntPair>());

                    computation.Join();
                }

                controller.Join();
            }
        }
Exemplo n.º 5
0
        public void Execute(string[] args)
        {
            using (var controller = NewController.FromArgs(ref args))
            {
                // establish numbers of nodes and edges from input or from defaults.
                if (args.Length == 3)
                {
                    nodeCount = Convert.ToInt32(args[1]);
                    edgeCount = Convert.ToInt32(args[2]);
                }

                // generate a random graph
                var random = new Random(0);
                var graph  = new IntPair[edgeCount];
                for (int i = 0; i < edgeCount; i++)
                {
                    graph[i] = new IntPair(random.Next(nodeCount), random.Next(nodeCount));
                }

                using (var manager = controller.NewGraph())
                {
                    // set up the CC computation
                    var edges = new IncrementalCollection <IntPair>(manager);//.NewInput<IntPair>();

                    var stopwatch = System.Diagnostics.Stopwatch.StartNew();

                    var colors = edges.Where(x => x.s != x.t)
                                 .Color();

                    var output = colors.Select(x => x.t)          // just keep the color (to count)
                                 .Output
                                 .Subscribe((i, l) => Console.WriteLine("Time to process: {0}", stopwatch.Elapsed));

                    // set to enable a correctness test, at the cost of more memory and computation.
                    var testCorrectness = false;
                    if (testCorrectness)
                    {
                        edges.Where(x => x.s != x.t)
                        .Join(colors, e => e.s, c => c.s, e => e.t, c => c.t, (s, t, c) => new IntPair(c, t))
                        .Join(colors, e => e.t, c => c.s, e => e.s, c => c.t, (t, s, c) => new IntPair(s, c))
                        .Where(p => p.s == p.t)
                        .Consolidate()
                        .Subscribe(l => Console.WriteLine("Coloring errors: {0}", l.Length));
                    }

                    Console.WriteLine("Running graph coloring on a random graph ({0} nodes, {1} edges)", nodeCount, edgeCount);
                    Console.WriteLine("For each color, the nodes with that color:");

                    manager.Activate();

                    edges.OnNext(controller.Configuration.ProcessID == 0 ? graph : Enumerable.Empty <IntPair>());

                    output.Sync(0);

                    // if we are up for interactive access ...
                    if (controller.Configuration.Processes == 1)
                    {
                        Console.WriteLine();
                        Console.WriteLine("Next: sequentially rewiring random edges (press [enter] each time):");

                        for (int i = 0; i < graph.Length; i++)
                        {
                            Console.ReadLine();
                            stopwatch.Restart();
                            var newEdge = new IntPair(random.Next(nodeCount), random.Next(nodeCount));
                            Console.WriteLine("Rewiring edge: {0} -> {1}", graph[i], newEdge);
                            edges.OnNext(new[] { new Weighted <IntPair>(graph[i], -1), new Weighted <IntPair>(newEdge, 1) });
                            output.Sync(i + 1);
                        }
                    }

                    edges.OnCompleted();

                    manager.Join();
                }

                controller.Join();
            }
        }
Exemplo n.º 6
0
        public static void Main(string[] args)
        {
            var stopwatch = System.Diagnostics.Stopwatch.StartNew();

            using (var controller = NewController.FromArgs(ref args))
            {
                Console.WriteLine("{0}\tController constructed", stopwatch.Elapsed);

                InterGraphDataSink <Pair <int, int> > graph;

                var useAzure = false;
                if (useAzure)
                {
                    var container = args[0];
                    var prefix    = args[1];

                    #if false
                    graph = controller.NewAzureInterGraphStream(container, prefix, stream => stream.Select(x => x.ParseString()));
                    #else
#if true
                    graph = controller.NewNaiadAzureInterGraphStream <Pair <Int32, Int32> >(container, prefix);
#else
                    using (var computation = controller.NewComputation())
                    {
                        //controller.SetConsoleOut(computation.DefaultBlobContainer("naiad-output"), "output-{0}.txt");
                        //controller.SetConsoleError(computation.DefaultBlobContainer("naiad-output"), "error-{0}.txt");

                        var graphContainer = computation.DefaultBlobContainer(container);

                        var data = computation.ReadFromAzureBlobs(graphContainer, prefix, stream => ReadEdges(stream, Int32.MaxValue))
                                   //.Where(x => x.First != x.Second)
                                   //.Select(x => x.First < x.Second ? x : x.Second.PairWith(x.First))
                                   //.Distinct()
                                   .WriteBinaryToAzureBlobs(graphContainer, "twitter-oriented/part-{0:000}-{1:000}.bin");

                        computation.Activate();
                        computation.Join();
                    }

                    controller.Join();

                    return;
#endif
#endif
                }
                else
                {
                    var format = args[0];
                    var parts  = Int32.Parse(args[1]);

                    graph = controller.NewInterGraphStream(Enumerable.Range(0, parts), stream => stream.Distinct()
                                                           .SelectMany(x => string.Format(format, x).ReadEdges()));
                }

                Console.WriteLine("{0} Graph data loaded.", stopwatch.Elapsed);

                // normalize the graph to remove self-loops, point from smaller id to larger, and remove duplicates.
#if true
                graph = controller.NewInterGraphStream(graph, edges => edges.Where(x => x.First != x.Second)
                                                       .Select(x => x.First < x.Second ? x : x.Second.PairWith(x.First))
                                                       //.GroupBy(x => true, (k, l) => l.RandomlyRename())
                                                       .Distinct());
#endif

                // symmetrize the graph, just because...
                // graph = controller.NewInterGraphStream(graph, edges => edges.Select(x => x.Second.PairWith(x.First)).Concat(edges));

                Console.WriteLine("{0} Graph data normalized", stopwatch.Elapsed);

                // re-orient graph edges from lower undirected degree to larger undirected degree.
                // graph = controller.NewInterGraphStream(graph, edges => edges.GroupBy(x => true, (k,l) => l.RenameByDegree()));
                // graph = controller.NewInterGraphStream(graph, edges => edges.OrientFromLowToHighDegree());

                Console.WriteLine("{0} Graph data oriented from low to high degree", stopwatch.Elapsed);

                // we build two indices, one keyed on "no" attributes and one keyed on the first.
                var emptyKeyIndex = graph.ToEmptyIndex(controller, x => x.First);
                var denseKeyIndex = graph.ToDenseKeyIndex(controller, x => x.First, x => x.Second);

                graph.Seal();   // perhaps release the memory associated with graph. probably better to make it disposable.

                Console.WriteLine("{0} Relation prefix indices built", stopwatch.Elapsed);
                Console.WriteLine("{0} Starting query construction", stopwatch.Elapsed);

                using (var computation = controller.NewComputation())
                {
                    // we seed the computation with a single "true" record.
                    var queryseed = new BatchedDataSource <bool>();

                    var triangles = computation.NewInput(queryseed).Distinct()
                                    .Triangles(emptyKeyIndex, denseKeyIndex);

                    // optional things to comment/uncomment, based on what sort of output we would like to see.
                    // triangles.Where(x => false).Subscribe();
                    // triangles.Expand().Where(x => { Console.WriteLine("{0} Triangle observed: {1}", stopwatch.Elapsed, x); return true; });
                    triangles.Select(x => x.Second.Length).Aggregate(x => true, y => y, (x, y) => x + y, (k, sum) => sum, true).Subscribe(x => { foreach (var y in x)
                                                                                                                                                 {
                                                                                                                                                     Console.WriteLine("Triangles: {0}", y);
                                                                                                                                                 }
                                                                                                                                          });

                    queryseed.OnNext();

                    computation.Activate();
                    computation.Sync(0);

                    Console.WriteLine("{0} Synchronized", stopwatch.Elapsed);
                    System.Threading.Thread.Sleep(1000);
                    Console.WriteLine("{0} Starting query execution", stopwatch.Elapsed);

                    queryseed.OnCompleted(true);

                    computation.Join();
                }

                controller.Join();
            }

            Console.WriteLine("{0} All triangles listed", stopwatch.Elapsed);
            Console.Out.Close();
        }
Exemplo n.º 7
0
        public void Execute(string[] args)
        {
            int documentCount = 100000;
            int vocabulary    = 100000;
            int batchSize     = 10000;
            int iterations    = 10;

            using (var controller = NewController.FromArgs(ref args))
            {
                #region building up input data

                if (args.Length == 5)
                {
                    documentCount = Convert.ToInt32(args[1]);
                    vocabulary    = Convert.ToInt32(args[2]);
                    batchSize     = Convert.ToInt32(args[3]);
                    iterations    = Convert.ToInt32(args[4]);
                }

                var             random = new Random(0);
                List <Document> docs   = Enumerable.Range(0, documentCount)
                                         .Select(i => new Document(Enumerable.Range(0, 10)
                                                                   .Select(j => String.Format("{0}", random.Next(vocabulary)))
                                                                   .Aggregate((x, y) => x + " " + y), i)).ToList <Document>();

                List <Query>[] queryBatches = new List <Query> [iterations];

                for (int i = 0; i < iterations; i++)
                {
                    queryBatches[i] = Enumerable.Range(i * batchSize, batchSize)
                                      .Select(j => new Query(String.Format("{0}", j % vocabulary), j, 1))
                                      .ToList();
                }

                #endregion

                using (var manager = controller.NewGraph())
                {
                    // declare inputs for documents and queries.
                    var documents = new IncrementalCollection <Document>(manager);
                    var queries   = new IncrementalCollection <Query>(manager);

                    // each document is broken down into a collection of terms, each with associated identifier.
                    var dTerms = documents.SelectMany(doc => doc.text.Split(' ').Select(term => new Document(term, doc.id)))
                                 .Distinct();

                    // each query is broken down into a collection of terms, each with associated identifier and threshold.
                    var qTerms = queries.SelectMany(query => query.text.Split(' ').Select(term => new Query(term, query.id, query.threshold)))
                                 .Distinct();

                    // doc terms and query terms are joined, matching pairs are counted and returned if the count exceeds the threshold.
                    var results = dTerms.Join(qTerms, d => d.text, q => q.text, (d, q) => new Match(d.id, q.id, q.threshold))
                                  .Count(match => match)
                                  .Select(pair => new Match(pair.v1.document, pair.v1.query, pair.v1.threshold - (int)pair.v2))
                                  .Where(match => match.threshold <= 0)
                                  .Select(match => new Pair <int, int>(match.document, match.query));

                    // subscribe to the output in case we are interested in the results
                    var subscription = results.Subscribe(list => Console.WriteLine("matches found: {0}", list.Length));

                    manager.Activate();

                    #region Prepare some fake documents to put in the collection

                    // creates many documents each containing 10 words from [0, ... vocabulary-1].
                    int share_size = docs.Count / controller.Configuration.Processes;

                    documents.OnNext(docs.GetRange(controller.Configuration.ProcessID * share_size, share_size));
                    queries.OnNext();

                    //Console.WriteLine("Example SearchIndex in Naiad. Step 1: indexing documents, step 2: issuing queries.");
                    Console.WriteLine("Indexing {0} random documents, {1} terms (please wait)", documentCount, 10 * documentCount);
                    subscription.Sync(0);

                    #endregion

                    #region Issue batches of queries and assess performance

                    Console.WriteLine("Issuing {0} rounds of batches of {1} queries (press [enter] to start)", iterations, batchSize);
                    Console.ReadLine();

                    var stopwatch = System.Diagnostics.Stopwatch.StartNew();

                    for (int i = 0; i < iterations; i++)
                    {
                        // we round-robin through query terms. more advanced queries are possible.
                        if (controller.Configuration.ProcessID == 0)
                        {
                            queries.OnNext(queryBatches[i]); // introduce new queries.
                        }
                        else
                        {
                            queries.OnNext();
                        }

                        documents.OnNext();         // indicate no new docs.
                        subscription.Sync(i + 1);   // block until round is done.
                    }

                    documents.OnCompleted();
                    queries.OnCompleted();

                    controller.Join();

                    #endregion

                    manager.Join();
                }

                controller.Join();
            }
        }
Exemplo n.º 8
0
        public void Execute(string[] args)
        {
            using (var controller = NewController.FromArgs(ref args))
            {
                // establish numbers of nodes and edges from input or from defaults.
                if (args.Length == 3)
                {
                    nodeCount = Convert.ToInt32(args[1]);
                    edgeCount = Convert.ToInt32(args[2]);
                }

                // generate a random graph
                var random = new Random(0);
                var graph  = new IntPair[edgeCount];
                for (int i = 0; i < edgeCount; i++)
                {
                    graph[i] = new IntPair(random.Next(nodeCount), random.Next(nodeCount));
                }

                var stopwatch = System.Diagnostics.Stopwatch.StartNew();

                using (var manager = controller.NewGraph())
                {
                    // set up the CC computation
                    var edges = new IncrementalCollection <IntPair>(manager);

                    //manager.Frontier.OnFrontierChanged += Frontier_OnFrontierChanged;

                    //Func<IntPair, int> priorityFunction = node => 0;
                    //Func<IntPair, int> priorityFunction = node => Math.Min(node.t, 100);
                    Func <IntPair, int> priorityFunction = node => 65536 * (node.t < 10 ? node.t : 10 + Convert.ToInt32(Math.Log(1 + node.t) / Math.Log(2.0)));

                    var output = edges.ConnectedComponents(priorityFunction)
                                 .Count(n => n.t, (l, c) => c)       // counts results with each label
                                 .Consolidate()
                                 .Subscribe(l =>
                    {
                        Console.Error.WriteLine("Time to process: {0}", stopwatch.Elapsed);
                        foreach (var result in l.OrderBy(x => x.record))
                        {
                            Console.Error.WriteLine(result);
                        }
                    });

                    Console.Error.WriteLine("Running connected components on a random graph ({0} nodes, {1} edges)", nodeCount, edgeCount);
                    Console.Error.WriteLine("For each size, the number of components of that size (may take a moment):");

                    manager.Activate();

                    edges.OnNext(controller.Configuration.ProcessID == 0 ? graph : Enumerable.Empty <IntPair>());

                    // if we are up for interactive access ...
                    if (controller.Configuration.Processes == 1)
                    {
                        output.Sync(0);

                        Console.WriteLine();
                        Console.WriteLine("Next: sequentially rewiring random edges (press [enter] each time):");

                        for (int i = 0; true; i++)
                        {
                            Console.ReadLine();
                            stopwatch.Restart();
                            var newEdge = new IntPair(random.Next(nodeCount), random.Next(nodeCount));
                            Console.WriteLine("Rewiring edge: {0} -> {1}", graph[i], newEdge);
                            edges.OnNext(new[] { new Weighted <IntPair>(graph[i], -1), new Weighted <IntPair>(newEdge, 1) });
                            output.Sync(i + 1);
                        }
                    }

                    edges.OnCompleted();
                    manager.Join();
                }

                controller.Join();
            }
        }