public void Execute(string[] args) { // allocate a new controller from command line arguments. using (var controller = NewController.FromArgs(ref args)) { var nodeCount = args.Length == 2 ? Convert.ToInt32(args[0]) : 100000; var edgeCount = args.Length == 2 ? Convert.ToInt32(args[1]) : 200000; #region Generate a local fraction of input data var random = new Random(0); var processes = controller.Configuration.Processes; var graphFragment = new Pair <int, int> [edgeCount / processes]; for (int i = 0; i < graphFragment.Length; i++) { graphFragment[i] = new Pair <int, int>(random.Next(nodeCount), random.Next(nodeCount)); } #endregion Console.WriteLine("Computing components of a random graph on {0} nodes and {1} edges", nodeCount, edgeCount); // allocate a new graph manager for the computation. using (var manager = controller.NewGraph()) { // convert array of edges to single-epoch stream. var edges = graphFragment.AsNaiadStream(manager) .Synchronize(); // symmetrize the graph by adding in transposed edges. edges = edges.Select(x => new Pair <int, int>(x.v2, x.v1)) .Concat(edges); edges.DirectedReachability() .Subscribe(list => Console.WriteLine("labeled {0} nodes", list.Count())); manager.Activate(); // start graph computation manager.Join(); // block until computation completes } controller.Join(); } }
/// <summary> /// Executes a word counting Naiad program. /// </summary> /// <param name="config">Naiad controller configuration</param> /// <param name="args">Remaining arguments</param> public void Execute(string[] args) { // first, construct a Naiad controller. using (var controller = NewController.FromArgs(ref args)) { using (var graph = controller.NewGraph()) { // create an incrementally updateable collection var text = new IncrementalCollection <string>(graph);//.NewInput<string>(); // segment strings, count, and print text.SelectMany(x => x.Split(' ')) .Count(y => y, (k, c) => k + ":" + c) // yields "word:count" for each word .Subscribe(l => { foreach (var element in l) { Console.WriteLine(element); } }); graph.Activate(); Console.WriteLine("Start entering lines of text. An empty line will exit the program."); Console.WriteLine("Naiad will display counts (and changes in counts) of words you type."); var line = Console.ReadLine(); for (int i = 0; line != ""; i++) { text.OnNext(line); graph.Sync(i); line = Console.ReadLine(); } text.OnCompleted(); // closes input graph.Join(); } controller.Join(); // blocks until flushed } }
public void Execute(string[] args) { using (var controller = NewController.FromArgs(ref args)) { // establish numbers of nodes and edges from input or from defaults. if (args.Length == 3) { nodeCount = Convert.ToInt32(args[1]); edgeCount = Convert.ToInt32(args[2]); } // generate a random graph var random = new Random(0); var graph = new Edge[edgeCount]; for (int i = 0; i < edgeCount; i++) { graph[i] = new Edge(random.Next(nodeCount), random.Next(nodeCount)); } using (var manager = controller.NewGraph()) { // set up the CC computation var edges = new IncrementalCollection <Edge>(manager);//.NewInput<Edge>(); var stopwatch = System.Diagnostics.Stopwatch.StartNew(); var result = edges.TrimLeavesAndFlip() .TrimLeavesAndFlip() .SCC() .Subscribe(x => Console.WriteLine("{1}\tNet edge changes within SCCs: {0}", x.Sum(y => y.weight), stopwatch.Elapsed)); Console.WriteLine("Strongly connected components on a random graph ({0} nodes, {1} edges)", nodeCount, edgeCount); Console.WriteLine("Reporting the numbers of edges within SCCs (may take a moment):"); manager.Activate(); // input graph and wait if (controller.Configuration.ProcessID == 0) { edges.OnNext(graph); } else { edges.OnNext(); } result.Sync(0); Console.WriteLine("Computation completed"); // if we are up for interactive access ... if (controller.Configuration.Processes == 1) { Console.WriteLine(); Console.WriteLine("Press [enter] repeatedly to rewire random edges in the graph. (\"done\" to exit)"); for (int i = 0; i < graph.Length; i++) { var line = Console.ReadLine(); if (line == "done") { break; } stopwatch.Restart(); var newEdge = new Edge(random.Next(nodeCount), random.Next(nodeCount)); Console.WriteLine("Rewiring edge: {0} -> {1}", graph[i], newEdge); edges.OnNext(new[] { new Weighted <Edge>(graph[i], -1), new Weighted <Edge>(newEdge, 1) }); result.Sync(i + 1); } } edges.OnCompleted(); manager.Join(); } controller.Join(); } }
public static void Main(string[] args) { using (var controller = NewController.FromArgs(ref args)) { if (args.Length < 3) { Console.Error.WriteLine("Usage: results_container_name node_count edge_count"); System.Environment.Exit(0); } string containerName = args[0]; int nodeCount = Convert.ToInt32(args[1]); int edgeCount = Convert.ToInt32(args[2]); // For Azure storage emulator: string connectionString = "UseDevelopmentStorage=true;"; // To use Windows Azure Storage, uncomment this line, and substitute your account name and key: // string connectionString = "DefaultEndpointsProtocol=https;AccountName=[Account name];AccountKey=[Account key]"; Console.Error.WriteLine("Running connected components on a random graph ({0} nodes, {1} edges).", nodeCount, edgeCount); if (connectionString != null) { Console.Error.WriteLine("Writing results to container {0}", containerName); } else { Console.Error.WriteLine("Writing results to console"); } // generate a random graph var random = new Random(0); var graph = new IntPair[edgeCount]; for (int i = 0; i < edgeCount; i++) { graph[i] = new IntPair(random.Next(nodeCount), random.Next(nodeCount)); } var stopwatch = System.Diagnostics.Stopwatch.StartNew(); using (var computation = controller.NewComputation()) { // set up the CC computation var edges = computation.NewInputCollection <IntPair>(); // no prioritization; uncomment this and comment the next for slowness! // Func<IntPair, int> priorityFunction = node => 0; // Introduce labels in priority order. Labels 0 through 9 inclusive are introduced sequentially, // following by exponentially-growing sets of labels. Func <IntPair, int> priorityFunction = node => 65536 * (node.t < 10 ? node.t : 10 + Convert.ToInt32(Math.Log(1 + node.t) / Math.Log(2.0))); // Perform the connected components algorithm on the collection of edges. var labeledVertices = edges.ConnectedComponents(priorityFunction); // Count the number of vertices in each component. var componentSizes = labeledVertices.Count(n => n.t, (l, c) => new Pair <int, long>(l, c)); // counts results with each label // Ignore the labels and consolidate to find the number of components having each size. var sizeDistribution = componentSizes.Select(x => x.Second).Consolidate(); if (connectionString != null) { var account = CloudStorageAccount.Parse(connectionString); var container = account.CreateCloudBlobClient().GetContainerReference(containerName); container.CreateIfNotExists(); // Write the results to the given Azure blob container, with filename "componentSizes-part-i" for process i. sizeDistribution.Output .WriteTextToAzureBlobs(container, "componentSizes-part-{0}-{1}"); } else { // Write the results to the console. sizeDistribution.Subscribe(xs => { foreach (var x in xs) { Console.WriteLine(x); } }); } computation.Activate(); edges.OnCompleted(controller.Configuration.ProcessID == 0 ? graph : Enumerable.Empty <IntPair>()); computation.Join(); } controller.Join(); } }
public void Execute(string[] args) { using (var controller = NewController.FromArgs(ref args)) { // establish numbers of nodes and edges from input or from defaults. if (args.Length == 3) { nodeCount = Convert.ToInt32(args[1]); edgeCount = Convert.ToInt32(args[2]); } // generate a random graph var random = new Random(0); var graph = new IntPair[edgeCount]; for (int i = 0; i < edgeCount; i++) { graph[i] = new IntPair(random.Next(nodeCount), random.Next(nodeCount)); } using (var manager = controller.NewGraph()) { // set up the CC computation var edges = new IncrementalCollection <IntPair>(manager);//.NewInput<IntPair>(); var stopwatch = System.Diagnostics.Stopwatch.StartNew(); var colors = edges.Where(x => x.s != x.t) .Color(); var output = colors.Select(x => x.t) // just keep the color (to count) .Output .Subscribe((i, l) => Console.WriteLine("Time to process: {0}", stopwatch.Elapsed)); // set to enable a correctness test, at the cost of more memory and computation. var testCorrectness = false; if (testCorrectness) { edges.Where(x => x.s != x.t) .Join(colors, e => e.s, c => c.s, e => e.t, c => c.t, (s, t, c) => new IntPair(c, t)) .Join(colors, e => e.t, c => c.s, e => e.s, c => c.t, (t, s, c) => new IntPair(s, c)) .Where(p => p.s == p.t) .Consolidate() .Subscribe(l => Console.WriteLine("Coloring errors: {0}", l.Length)); } Console.WriteLine("Running graph coloring on a random graph ({0} nodes, {1} edges)", nodeCount, edgeCount); Console.WriteLine("For each color, the nodes with that color:"); manager.Activate(); edges.OnNext(controller.Configuration.ProcessID == 0 ? graph : Enumerable.Empty <IntPair>()); output.Sync(0); // if we are up for interactive access ... if (controller.Configuration.Processes == 1) { Console.WriteLine(); Console.WriteLine("Next: sequentially rewiring random edges (press [enter] each time):"); for (int i = 0; i < graph.Length; i++) { Console.ReadLine(); stopwatch.Restart(); var newEdge = new IntPair(random.Next(nodeCount), random.Next(nodeCount)); Console.WriteLine("Rewiring edge: {0} -> {1}", graph[i], newEdge); edges.OnNext(new[] { new Weighted <IntPair>(graph[i], -1), new Weighted <IntPair>(newEdge, 1) }); output.Sync(i + 1); } } edges.OnCompleted(); manager.Join(); } controller.Join(); } }
public static void Main(string[] args) { var stopwatch = System.Diagnostics.Stopwatch.StartNew(); using (var controller = NewController.FromArgs(ref args)) { Console.WriteLine("{0}\tController constructed", stopwatch.Elapsed); InterGraphDataSink <Pair <int, int> > graph; var useAzure = false; if (useAzure) { var container = args[0]; var prefix = args[1]; #if false graph = controller.NewAzureInterGraphStream(container, prefix, stream => stream.Select(x => x.ParseString())); #else #if true graph = controller.NewNaiadAzureInterGraphStream <Pair <Int32, Int32> >(container, prefix); #else using (var computation = controller.NewComputation()) { //controller.SetConsoleOut(computation.DefaultBlobContainer("naiad-output"), "output-{0}.txt"); //controller.SetConsoleError(computation.DefaultBlobContainer("naiad-output"), "error-{0}.txt"); var graphContainer = computation.DefaultBlobContainer(container); var data = computation.ReadFromAzureBlobs(graphContainer, prefix, stream => ReadEdges(stream, Int32.MaxValue)) //.Where(x => x.First != x.Second) //.Select(x => x.First < x.Second ? x : x.Second.PairWith(x.First)) //.Distinct() .WriteBinaryToAzureBlobs(graphContainer, "twitter-oriented/part-{0:000}-{1:000}.bin"); computation.Activate(); computation.Join(); } controller.Join(); return; #endif #endif } else { var format = args[0]; var parts = Int32.Parse(args[1]); graph = controller.NewInterGraphStream(Enumerable.Range(0, parts), stream => stream.Distinct() .SelectMany(x => string.Format(format, x).ReadEdges())); } Console.WriteLine("{0} Graph data loaded.", stopwatch.Elapsed); // normalize the graph to remove self-loops, point from smaller id to larger, and remove duplicates. #if true graph = controller.NewInterGraphStream(graph, edges => edges.Where(x => x.First != x.Second) .Select(x => x.First < x.Second ? x : x.Second.PairWith(x.First)) //.GroupBy(x => true, (k, l) => l.RandomlyRename()) .Distinct()); #endif // symmetrize the graph, just because... // graph = controller.NewInterGraphStream(graph, edges => edges.Select(x => x.Second.PairWith(x.First)).Concat(edges)); Console.WriteLine("{0} Graph data normalized", stopwatch.Elapsed); // re-orient graph edges from lower undirected degree to larger undirected degree. // graph = controller.NewInterGraphStream(graph, edges => edges.GroupBy(x => true, (k,l) => l.RenameByDegree())); // graph = controller.NewInterGraphStream(graph, edges => edges.OrientFromLowToHighDegree()); Console.WriteLine("{0} Graph data oriented from low to high degree", stopwatch.Elapsed); // we build two indices, one keyed on "no" attributes and one keyed on the first. var emptyKeyIndex = graph.ToEmptyIndex(controller, x => x.First); var denseKeyIndex = graph.ToDenseKeyIndex(controller, x => x.First, x => x.Second); graph.Seal(); // perhaps release the memory associated with graph. probably better to make it disposable. Console.WriteLine("{0} Relation prefix indices built", stopwatch.Elapsed); Console.WriteLine("{0} Starting query construction", stopwatch.Elapsed); using (var computation = controller.NewComputation()) { // we seed the computation with a single "true" record. var queryseed = new BatchedDataSource <bool>(); var triangles = computation.NewInput(queryseed).Distinct() .Triangles(emptyKeyIndex, denseKeyIndex); // optional things to comment/uncomment, based on what sort of output we would like to see. // triangles.Where(x => false).Subscribe(); // triangles.Expand().Where(x => { Console.WriteLine("{0} Triangle observed: {1}", stopwatch.Elapsed, x); return true; }); triangles.Select(x => x.Second.Length).Aggregate(x => true, y => y, (x, y) => x + y, (k, sum) => sum, true).Subscribe(x => { foreach (var y in x) { Console.WriteLine("Triangles: {0}", y); } }); queryseed.OnNext(); computation.Activate(); computation.Sync(0); Console.WriteLine("{0} Synchronized", stopwatch.Elapsed); System.Threading.Thread.Sleep(1000); Console.WriteLine("{0} Starting query execution", stopwatch.Elapsed); queryseed.OnCompleted(true); computation.Join(); } controller.Join(); } Console.WriteLine("{0} All triangles listed", stopwatch.Elapsed); Console.Out.Close(); }
public void Execute(string[] args) { int documentCount = 100000; int vocabulary = 100000; int batchSize = 10000; int iterations = 10; using (var controller = NewController.FromArgs(ref args)) { #region building up input data if (args.Length == 5) { documentCount = Convert.ToInt32(args[1]); vocabulary = Convert.ToInt32(args[2]); batchSize = Convert.ToInt32(args[3]); iterations = Convert.ToInt32(args[4]); } var random = new Random(0); List <Document> docs = Enumerable.Range(0, documentCount) .Select(i => new Document(Enumerable.Range(0, 10) .Select(j => String.Format("{0}", random.Next(vocabulary))) .Aggregate((x, y) => x + " " + y), i)).ToList <Document>(); List <Query>[] queryBatches = new List <Query> [iterations]; for (int i = 0; i < iterations; i++) { queryBatches[i] = Enumerable.Range(i * batchSize, batchSize) .Select(j => new Query(String.Format("{0}", j % vocabulary), j, 1)) .ToList(); } #endregion using (var manager = controller.NewGraph()) { // declare inputs for documents and queries. var documents = new IncrementalCollection <Document>(manager); var queries = new IncrementalCollection <Query>(manager); // each document is broken down into a collection of terms, each with associated identifier. var dTerms = documents.SelectMany(doc => doc.text.Split(' ').Select(term => new Document(term, doc.id))) .Distinct(); // each query is broken down into a collection of terms, each with associated identifier and threshold. var qTerms = queries.SelectMany(query => query.text.Split(' ').Select(term => new Query(term, query.id, query.threshold))) .Distinct(); // doc terms and query terms are joined, matching pairs are counted and returned if the count exceeds the threshold. var results = dTerms.Join(qTerms, d => d.text, q => q.text, (d, q) => new Match(d.id, q.id, q.threshold)) .Count(match => match) .Select(pair => new Match(pair.v1.document, pair.v1.query, pair.v1.threshold - (int)pair.v2)) .Where(match => match.threshold <= 0) .Select(match => new Pair <int, int>(match.document, match.query)); // subscribe to the output in case we are interested in the results var subscription = results.Subscribe(list => Console.WriteLine("matches found: {0}", list.Length)); manager.Activate(); #region Prepare some fake documents to put in the collection // creates many documents each containing 10 words from [0, ... vocabulary-1]. int share_size = docs.Count / controller.Configuration.Processes; documents.OnNext(docs.GetRange(controller.Configuration.ProcessID * share_size, share_size)); queries.OnNext(); //Console.WriteLine("Example SearchIndex in Naiad. Step 1: indexing documents, step 2: issuing queries."); Console.WriteLine("Indexing {0} random documents, {1} terms (please wait)", documentCount, 10 * documentCount); subscription.Sync(0); #endregion #region Issue batches of queries and assess performance Console.WriteLine("Issuing {0} rounds of batches of {1} queries (press [enter] to start)", iterations, batchSize); Console.ReadLine(); var stopwatch = System.Diagnostics.Stopwatch.StartNew(); for (int i = 0; i < iterations; i++) { // we round-robin through query terms. more advanced queries are possible. if (controller.Configuration.ProcessID == 0) { queries.OnNext(queryBatches[i]); // introduce new queries. } else { queries.OnNext(); } documents.OnNext(); // indicate no new docs. subscription.Sync(i + 1); // block until round is done. } documents.OnCompleted(); queries.OnCompleted(); controller.Join(); #endregion manager.Join(); } controller.Join(); } }
public void Execute(string[] args) { using (var controller = NewController.FromArgs(ref args)) { // establish numbers of nodes and edges from input or from defaults. if (args.Length == 3) { nodeCount = Convert.ToInt32(args[1]); edgeCount = Convert.ToInt32(args[2]); } // generate a random graph var random = new Random(0); var graph = new IntPair[edgeCount]; for (int i = 0; i < edgeCount; i++) { graph[i] = new IntPair(random.Next(nodeCount), random.Next(nodeCount)); } var stopwatch = System.Diagnostics.Stopwatch.StartNew(); using (var manager = controller.NewGraph()) { // set up the CC computation var edges = new IncrementalCollection <IntPair>(manager); //manager.Frontier.OnFrontierChanged += Frontier_OnFrontierChanged; //Func<IntPair, int> priorityFunction = node => 0; //Func<IntPair, int> priorityFunction = node => Math.Min(node.t, 100); Func <IntPair, int> priorityFunction = node => 65536 * (node.t < 10 ? node.t : 10 + Convert.ToInt32(Math.Log(1 + node.t) / Math.Log(2.0))); var output = edges.ConnectedComponents(priorityFunction) .Count(n => n.t, (l, c) => c) // counts results with each label .Consolidate() .Subscribe(l => { Console.Error.WriteLine("Time to process: {0}", stopwatch.Elapsed); foreach (var result in l.OrderBy(x => x.record)) { Console.Error.WriteLine(result); } }); Console.Error.WriteLine("Running connected components on a random graph ({0} nodes, {1} edges)", nodeCount, edgeCount); Console.Error.WriteLine("For each size, the number of components of that size (may take a moment):"); manager.Activate(); edges.OnNext(controller.Configuration.ProcessID == 0 ? graph : Enumerable.Empty <IntPair>()); // if we are up for interactive access ... if (controller.Configuration.Processes == 1) { output.Sync(0); Console.WriteLine(); Console.WriteLine("Next: sequentially rewiring random edges (press [enter] each time):"); for (int i = 0; true; i++) { Console.ReadLine(); stopwatch.Restart(); var newEdge = new IntPair(random.Next(nodeCount), random.Next(nodeCount)); Console.WriteLine("Rewiring edge: {0} -> {1}", graph[i], newEdge); edges.OnNext(new[] { new Weighted <IntPair>(graph[i], -1), new Weighted <IntPair>(newEdge, 1) }); output.Sync(i + 1); } } edges.OnCompleted(); manager.Join(); } controller.Join(); } }