public void Execute(string[] args) { // allocate a new computation from command line arguments. using (var computation = NewComputation.FromArgs(ref args)) { var nodeCount = args.Length == 3 ? Convert.ToInt32(args[1]) : 1000; var edgeCount = args.Length == 3 ? Convert.ToInt32(args[2]) : 2000; #region Generate a local fraction of input data var random = new Random(0); var processes = computation.Configuration.Processes; var thisProcess = computation.Configuration.ProcessID; var graphFragmentList = new List <Pair <int, int> >(); for (int i = 0; i < edgeCount; i++) { // ensure we generate the same graph no matter how many processes there are var edge = new Pair <int, int>(random.Next(nodeCount), random.Next(nodeCount)); if ((i % processes) == thisProcess) { graphFragmentList.Add(edge); } } //graphFragmentList.Add(new Pair<int, int>(100, 100000000)); //graphFragmentList.Add(new Pair<int, int>(200000000, 200)); var graphFragment = graphFragmentList.ToArray(); #endregion Console.WriteLine("size of graphFragmentList: {0}", graphFragmentList.Count()); Console.WriteLine("Computing components of a random graph on {0} nodes and {1} edges", nodeCount, edgeCount); Stopwatch stopwatch = new Stopwatch(); // convert array of edges to single-epoch stream. var edges = graphFragment.AsNaiadStream(computation) .Synchronize(x => true); // symmetrize the graph by adding in transposed edges. edges = edges.Select(x => new Pair <int, int>(x.Second, x.First)) .Concat(edges); edges.DirectedReachability() .Subscribe(list => Console.WriteLine("labeled {0} nodes in {1}", list.Count(), stopwatch.Elapsed)); edges.Subscribe(list => { int numEdges = 0; foreach (var element in list) { numEdges++; } Console.WriteLine("# of edges is : {0}", numEdges); }); stopwatch.Start(); computation.Activate(); // start graph computation computation.Join(); // block until computation completes } }
public NaiadSolution(string[] args) { rawUsers = new List <User>(); rawPosts = new List <Post>(); rawComments = new List <Comment>(); rawCommentedEdges = new List <CommentedEdge>(); rawLikesEdges = new List <LikesEdge>(); rawPostEdges = new List <PostEdge>(); rawSubmitterEdges = new List <SubmitterEdge>(); rawFriendEdges = new List <FriendEdge>(); computation = NewComputation.FromArgs(ref args); users = computation.NewInputCollection <User>(); posts = computation.NewInputCollection <Post>(); comments = computation.NewInputCollection <Comment>(); commentedEdges = computation.NewInputCollection <CommentedEdge>(); likesEdges = computation.NewInputCollection <LikesEdge>(); postEdges = computation.NewInputCollection <PostEdge>(); submitterEdges = computation.NewInputCollection <SubmitterEdge>(); friendEdges = computation.NewInputCollection <FriendEdge>(); actualEpoch = -1; isDisposed = false; }
public void Execute(string[] args) { var containerName = args[1]; var directoryName = args[2]; var nodeCount = int.Parse(args[3]); var edgeCount = int.Parse(args[4]); CloudStorageAccount storageAccount = CloudStorageAccount.DevelopmentStorageAccount; var container = storageAccount.CreateCloudBlobClient() .GetContainerReference(containerName); container.CreateIfNotExists(); // allocate a new computation from command line arguments. using (var computation = NewComputation.FromArgs(ref args)) { // create a new input from a constant data source var source = new ConstantDataSource <GraphProperties>(new GraphProperties(nodeCount, edgeCount)); var input = computation.NewInput(source); // generate the graph, partition by edge source, and write to Azure. input.SelectMany(x => GenerateGraph(x.NodeCount, x.EdgeCount)) .PartitionBy(x => x.First) .WriteBinaryToAzureBlobs(container, directoryName + "/edges-{0}"); // start job and wait. computation.Activate(); computation.Join(); } }
static void Main(string[] args) { // 1. allocate a new dataflow computation. using (var computation = NewComputation.FromArgs(ref args)) { int iterations = 3; int counts = 5; Stream <Node, Epoch> nodes = GenerateNodes(counts, computation.Configuration.ProcessID).AsNaiadStream(computation); // nodes = nodes.PartitionBy(x => x.source); nodes.IterateAndAccumulate((lc, x) => x, x => Print(x), iterations, "LogisticRegression"); // nodes.Iterate((lc , x) => Operate(lc.EnterLoop(x)), iterations, "LogisticRegression"); Console.Out.WriteLine("Proc ID: " + computation.Configuration.ProcessID); Console.Out.Flush(); // 2. define an object which accepts input strings. // var source = new BatchedDataSource<string>(); // 3. convert the data source into a Naiad stream of strings. // var input = computation.NewInput(source); // 4.request a notification for each batch of strings // received. // var output = input.Subscribe(x => // { // foreach (var line // in x) // Console.WriteLine(line); // }); Console.Out.WriteLine("Before Activate!"); Console.Out.Flush(); // 5. start the computation, fixing the structure of // the dataflow graph. computation.Activate(); Console.Out.WriteLine("After Activate!"); Console.Out.Flush(); // 6. read inputs from the console as long as the // user supplies them. for (var l = Console.ReadLine(); l.Length > 0; l = Console.ReadLine()) { } // source.OnNext(l.Split()); // 7. signal that the source is now complete. // source.OnCompleted(); // 8. block until all work is finished. computation.Join(); } }
public void Execute(string[] args) { using (var computation = NewComputation.FromArgs(ref args)) { int iterations = int.Parse(args[1]); // first construct a simple graph with a feedback loop. var inputStream = (new int[] { }).AsNaiadStream(computation); var loopContext = new LoopContext <Epoch>(inputStream.Context, "loop"); var feedback = loopContext.Delay <int>(); var ingress = loopContext.EnterLoop(inputStream); feedback.Input = Barrier.MakeStage(ingress, feedback.Output, iterations); // prepare measurement callbacks var sw = new Stopwatch(); var lastTime = 0L; var times = new List <double>(iterations); computation.OnStartup += (c, y) => { sw.Start(); }; computation.OnFrontierChange += (v, b) => { var now = sw.ElapsedTicks; if (lastTime > 0) { times.Add(1000.0 * (now - lastTime) / (double)Stopwatch.Frequency); } lastTime = now; }; Console.WriteLine("Running barrier latency test with {0} iterations, vertices={1}", iterations, ingress.ForStage.Placement.Count); // start computation and block computation.Activate(); computation.Join(); // print results times.Sort(); var percentiles = new[] { 0.00, 0.01, 0.05, 0.10, 0.25, 0.50, 0.75, 0.90, 0.95, 0.99 }; var latencies = percentiles.Select(f => times[(int)(iterations * f)]).ToArray(); Console.WriteLine("Ran {0} iterations on {1} processes; this is process {2}", times.Count - 1, computation.Configuration.Processes, computation.Configuration.ProcessID); Console.WriteLine("%-ile\tLatency (ms)"); for (int i = 0; i < latencies.Length; i++) { Console.WriteLine("{0:0.00}:\t{1:0.00}", percentiles[i], latencies[i]); } Console.WriteLine("max:\t{0:0.00}", latencies[latencies.Length - 1]); } }
public void Execute(string[] args) { // a controller manages an instance of Naiad using (var computation = NewComputation.FromArgs(ref args)) { // define a graph input from a filename and some transformations. var edgeStrings = new[] { args[1] }.AsNaiadStream(computation) .SelectMany(x => ReadLines(x)) .Select(x => x.Split()) .Select(x => x[0].PairWith(x[1])); // define reachability roots from a second filename. var rootStrings = new[] { args[2] }.AsNaiadStream(computation) .SelectMany(x => ReadLines(x)); // convert (string, string) -> edge and string -> node. Stream <Edge, Epoch> edges; // will eventually hold stream of edges Stream <Node, Epoch> roots; // will eventually hold stream of roots // an autorenamer context is used to consistently rename identifiers. using (var renamer = new AutoRenamer <string>()) { var tempEdges = edgeStrings.RenameUsing(renamer, x => x.First) // use the first string to find a name .Select(x => x.node.WithValue(x.value.Second)) // discard the first string .RenameUsing(renamer, x => x.value) // use the second string to find a name .Select(x => new Edge(x.value.node, x.node)); // discard the second string and form an edge var tempRoots = rootStrings.RenameUsing(renamer, x => x) // use the string itself to find a name .Select(x => x.node); // discard the string and keep the node // FinishRenaming only after all RenameUsing edges = tempEdges.FinishRenaming(renamer); roots = tempRoots.FinishRenaming(renamer); } // iteratively expand reachable set as pairs (node, isReachable). var limit = roots.Select(x => x.WithValue(true)) .IterateAndAccumulate((lc, x) => x.TransmitAlong(lc.EnterLoop(edges)) // transmit (node, true) values along edges .StateMachine((bool b, bool s) => true), // any received value sets the state to true x => x.node.index, // partitioning information Int32.MaxValue, // the number of iterations "Reachability") // a nice descriptive name .Concat(roots.Select(x => x.WithValue(true))) // add the original trusted nodes .NodeAggregate((a, b) => true) .Where(x => x.value); // aggregate, for the originals // print the results onto the screen (or write to file, as appopriate) limit.Select(x => x.node.index) .Subscribe(x => Console.WriteLine(x.Count())); // start the computation and wait until it finishes computation.Activate(); computation.Join(); } }
public void Execute(string[] args) { using (var computation = NewComputation.FromArgs(ref args)) { var keyvals = new BatchedDataSource <Pair <string, string> >(); var queries = new BatchedDataSource <string>(); computation.NewInput(keyvals) .KeyValueLookup(computation.NewInput(queries)) .Subscribe(list => { foreach (var l in list) { Console.WriteLine("value[\"{0}\"]:\t\"{1}\"", l.First, l.Second); } }); computation.Activate(); if (computation.Configuration.ProcessID == 0) { Console.WriteLine("Enter two strings to insert/overwrite a (key, value) pairs."); Console.WriteLine("Enter one string to look up a key."); // repeatedly read lines and introduce records based on their structure. // note: it is important to advance both inputs in order to make progress. for (var line = Console.ReadLine(); line.Length > 0; line = Console.ReadLine()) { var split = line.Split(); if (split.Length == 1) { queries.OnNext(line); keyvals.OnNext(); } if (split.Length == 2) { queries.OnNext(); keyvals.OnNext(split[0].PairWith(split[1])); } if (split.Length > 2) { Console.Error.WriteLine("error: lines with three or more strings are not understood."); } } } keyvals.OnCompleted(); queries.OnCompleted(); computation.Join(); } }
public void Execute(string[] args) { var containerName = args[1]; var directoryName = args[2]; var outputblobName = args[3]; CloudStorageAccount storageAccount = CloudStorageAccount.DevelopmentStorageAccount; var container = storageAccount.CreateCloudBlobClient() .GetContainerReference(containerName); if (!container.Exists()) { throw new Exception("No such container exists"); } // allocate a new computation from command line arguments. using (var computation = NewComputation.FromArgs(ref args)) { // Set Console.Out to point at an Azure blob bearing the process id. // See the important note at end of method about closing Console.Out. computation.Controller.SetConsoleOut(container, "stdout-{0}.txt"); System.Diagnostics.Stopwatch stopwatch = new System.Diagnostics.Stopwatch(); // read the edges from azure storage var edges = computation.ReadBinaryFromAzureBlobs <Pair <int, int> >(container, directoryName); // symmetrize the graph by adding in transposed edges. edges = edges.Select(x => new Pair <int, int>(x.Second, x.First)) .Concat(edges); // invoke director reachability var result = edges.DirectedReachability(); // listen to the output for reporting, and also write the output somewhere in Azure result.Subscribe(list => Console.WriteLine("labeled {0} nodes in {1}", list.Count(), stopwatch.Elapsed)); result.WriteBinaryToAzureBlobs(container, outputblobName); stopwatch.Start(); // start computation and wait. computation.Activate(); computation.Join(); } // very important to close the stream to flush writes to Azure. Console.Out.Close(); }
public void Execute(string[] args) { // the first thing to do is to allocate a computation from args. using (var computation = NewComputation.FromArgs(ref args)) { // loading data //Console.WriteLine("Grep program starts"); if (args.Length < 5) { string parameters = ""; for (int i = 0; i < args.Length; ++i) { parameters = parameters + " " + args[i]; } Console.WriteLine("current parameters: " + parameters); Console.WriteLine("usage: Examples.exe terasort <inputPath> <numFiles> <pattern> <outputPath>"); return; } //for (int i = 0; i < args.Length; ++i){ // Console.WriteLine("the "+i+"th argument is " + args[i] ); //} string inputDir = args[1]; int numFiles = Int32.Parse(args[2]); string pattern = args[3]; string outputPathFormat = args[4] + "{0}"; var text = loadDiskFiles(computation, inputDir, numFiles); //text.Subscribe(l => //{ // Console.WriteLine("input, # od records: " + l.Count()); //}); // computation var result = text.Where(x => x.Contains(pattern)); //result.Subscribe(l => //{ // Console.WriteLine("result, # of records: " + l.Count()); //}); //dumping Action <string, System.IO.BinaryWriter> writer = writeByte; result.WriteToFiles <string>(outputPathFormat, writer); Console.WriteLine("deploy successfully"); computation.Activate(); computation.Join(); } }
static void Main(string[] args) { // 1. allocate a new dataflow computation. using (var computation = NewComputation.FromArgs(ref args)) { Console.Out.WriteLine("Proc ID: " + computation.Configuration.ProcessID); Console.Out.Flush(); // 2. define an object which accepts input strings. var source = new BatchedDataSource <string>(); // 3. convert the data source into a Naiad stream of strings. var input = computation.NewInput(source); // 4.request a notification for each batch of strings // received. var output = input.Subscribe(x => { foreach (var line in x) { Console.WriteLine(line); } }); Console.Out.WriteLine("Before Activate!"); Console.Out.Flush(); // 5. start the computation, fixing the structure of // the dataflow graph. computation.Activate(); Console.Out.WriteLine("After Activate!"); Console.Out.Flush(); // 6. read inputs from the console as long as the // user supplies them. for (var l = Console.ReadLine(); l.Length > 0; l = Console.ReadLine()) { source.OnNext(l.Split()); } // 7. signal that the source is now complete. source.OnCompleted(); // 8. block until all work is finished. computation.Join(); } }
public void Execute(string[] args) { using (OneOffComputation computation = NewComputation.FromArgs(ref args)) { int numToExchange = args.Length > 1 ? int.Parse(args[1]) : 1000000; Stream <int, Epoch> input = computation.NewInput(new ConstantDataSource <int>(5)); Stream <int, Epoch> stream = ProducerVertex.MakeStage(numToExchange, computation.Configuration.WorkerCount, input); Stage <ConsumerVertex, Epoch> consumer = ConsumerVertex.MakeStage(numToExchange, computation.Configuration.WorkerCount, stream); computation.Activate(); computation.Join(); } }
public void Execute(string[] args) { using (OneOffComputation computation = NewComputation.FromArgs(ref args)) { int numToExchange = args.Length > 1 ? int.Parse(args[1]) : 1000000; int producers = Int32.Parse(args[2]); int consumers = Int32.Parse(args[3]); var exchange = args.Length > 4 && args[4] == "exchange"; var input = new Pair <int, int>[] { }.AsNaiadStream(computation); Stream <Pair <int, int>, Epoch> stream = ProducerVertex.MakeStage(numToExchange, 0, producers, computation.Configuration.WorkerCount, input); Stage <ConsumerVertex, Epoch> consumer = ConsumerVertex.MakeStage(numToExchange, computation.Configuration.Processes - consumers, computation.Configuration.Processes, computation.Configuration.WorkerCount, exchange, stream); computation.Activate(); computation.Join(); } }
public void Execute(string[] args) { // allocate a new computation from command line arguments. using (var computation = NewComputation.FromArgs(ref args)) { var nodeCount = args.Length == 3 ? Convert.ToInt32(args[1]) : 1000; var edgeCount = args.Length == 3 ? Convert.ToInt32(args[2]) : 2000; #region Generate a local fraction of input data var random = new Random(0); var processes = computation.Configuration.Processes; var graphFragment = new Pair <int, int> [edgeCount / processes]; for (int i = 0; i < graphFragment.Length; i++) { graphFragment[i] = new Pair <int, int>(random.Next(nodeCount), random.Next(nodeCount)); } #endregion Console.WriteLine("Computing components of a random graph on {0} nodes and {1} edges", nodeCount, edgeCount); Stopwatch stopwatch = new Stopwatch(); // convert array of edges to single-epoch stream. var edges = graphFragment.AsNaiadStream(computation) .Synchronize(); // symmetrize the graph by adding in transposed edges. edges = edges.Select(x => new Pair <int, int>(x.Second, x.First)) .Concat(edges); edges.DirectedReachability() .Subscribe(list => Console.WriteLine("labeled {0} nodes in {1}", list.Count(), stopwatch.Elapsed)); stopwatch.Start(); computation.Activate(); // start graph computation computation.Join(); // block until computation completes } }
public void Execute(string[] args) { using (var computation = NewComputation.FromArgs(ref args)) { computation.Controller.SetConsoleOut(computation.DefaultBlobContainer("naiad-outputs"), "out-{0}.txt"); computation.Controller.SetConsoleError(computation.DefaultBlobContainer("naiad-outputs"), "err-{0}.txt"); if (args.Length == 4) { var containerName = args[1]; var inputDirectory = args[2]; var outputDirectory = args[3]; if (!inputDirectory.Equals(outputDirectory)) { var container = computation.DefaultBlobContainer(containerName); computation.ReadTextFromAzureBlobs(container, inputDirectory) .PartitionBy(x => x.GetHashCode()) .WriteTextToAzureBlobs(container, outputDirectory + "/part-{0}-{1}.txt"); } else { Console.Error.WriteLine("ERROR: Input directory name ({0}) equals output directory name ({1})", inputDirectory, outputDirectory); } } else { Console.Error.WriteLine("repartition requires three additional arguments: " + this.Usage); } computation.Activate(); computation.Join(); Console.Out.Close(); Console.Error.Close(); } }
public void Execute(string[] args) { // the first thing to do is to allocate a computation from args. using (var computation = NewComputation.FromArgs(ref args)) { // 1. Make a new data source, to which we will supply strings. var source = new BatchedDataSource <string>(); // 2. Attach source, and apply count extension method. var counts = computation.NewInput(source).StreamingCount(); // 3. Subscribe to the resulting stream with a callback to print the outputs. counts.Subscribe(list => { foreach (var element in list) { Console.WriteLine(element); } }); computation.Activate(); // activate the execution of this graph (no new stages allowed). if (computation.Configuration.ProcessID == 0) { // with our dataflow graph defined, we can start soliciting strings from the user. Console.WriteLine("Start entering lines of text. An empty line will exit the program."); Console.WriteLine("Naiad will display counts (and changes in counts) of words you type."); // read lines of input and hand them to the input, until an empty line appears. for (var line = Console.ReadLine(); line.Length > 0; line = Console.ReadLine()) { source.OnNext(line.Split()); } } source.OnCompleted(); // signal the end of the input. computation.Join(); // waits until the graph has finished executing. } }
/// <summary> /// Executes a word counting Naiad program. /// </summary> /// <param name="config">Naiad controller configuration</param> /// <param name="args">Remaining arguments</param> public void Execute(string[] args) { // first, construct a Naiad controller. using (var computation = NewComputation.FromArgs(ref args)) { // create an incrementally updateable collection var text = computation.NewInputCollection <string>(); // segment strings, count, and print text.SelectMany(x => x.Split(' ')) .Count(y => y, (k, c) => k + ":" + c) // yields "word:count" for each word .Subscribe(l => { foreach (var element in l) { Console.WriteLine(element); } }); computation.Activate(); if (computation.Configuration.ProcessID == 0) { Console.WriteLine("Start entering lines of text. An empty line will exit the program."); Console.WriteLine("Naiad will display counts (and changes in counts) of words you type."); var line = Console.ReadLine(); for (int i = 0; line != ""; i++) { text.OnNext(line); computation.Sync(i); line = Console.ReadLine(); } } text.OnCompleted(); // closes input computation.Join(); } }
public void Execute(string[] args) { int documentCount = 100000; int vocabulary = 100000; int batchSize = 10000; int iterations = 10; using (var computation = NewComputation.FromArgs(ref args)) { #region building up input data if (args.Length == 5) { documentCount = Convert.ToInt32(args[1]); vocabulary = Convert.ToInt32(args[2]); batchSize = Convert.ToInt32(args[3]); iterations = Convert.ToInt32(args[4]); } var random = new Random(0); List <Document> docs = Enumerable.Range(0, documentCount) .Select(i => new Document(Enumerable.Range(0, 10) .Select(j => String.Format("{0}", random.Next(vocabulary))) .Aggregate((x, y) => x + " " + y), i)).ToList <Document>(); List <Query>[] queryBatches = new List <Query> [iterations]; for (int i = 0; i < iterations; i++) { queryBatches[i] = Enumerable.Range(i * batchSize, batchSize) .Select(j => new Query(String.Format("{0}", j % vocabulary), j, 1)) .ToList(); } #endregion // declare inputs for documents and queries. var documents = computation.NewInputCollection <Document>(); var queries = computation.NewInputCollection <Query>(); // each document is broken down into a collection of terms, each with associated identifier. var dTerms = documents.SelectMany(doc => doc.text.Split(' ').Select(term => new Document(term, doc.id))) .Distinct(); // each query is broken down into a collection of terms, each with associated identifier and threshold. var qTerms = queries.SelectMany(query => query.text.Split(' ').Select(term => new Query(term, query.id, query.threshold))) .Distinct(); // doc terms and query terms are joined, matching pairs are counted and returned if the count exceeds the threshold. var results = dTerms.Join(qTerms, d => d.text, q => q.text, (d, q) => new Match(d.id, q.id, q.threshold)) .Count(match => match) .Select(pair => new Match(pair.First.document, pair.First.query, pair.First.threshold - (int)pair.Second)) .Where(match => match.threshold <= 0) .Select(match => new Pair <int, int>(match.document, match.query)); // subscribe to the output in case we are interested in the results var subscription = results.Subscribe(list => Console.WriteLine("matches found: {0}", list.Length)); computation.Activate(); #region Prepare some fake documents to put in the collection // creates many documents each containing 10 words from [0, ... vocabulary-1]. int share_size = docs.Count / computation.Configuration.Processes; documents.OnNext(docs.GetRange(computation.Configuration.ProcessID * share_size, share_size)); queries.OnNext(); //Console.WriteLine("Example SearchIndex in Naiad. Step 1: indexing documents, step 2: issuing queries."); Console.WriteLine("Indexing {0} random documents, {1} terms (please wait)", documentCount, 10 * documentCount); subscription.Sync(0); #endregion #region Issue batches of queries and assess performance if (computation.Configuration.ProcessID == 0) { Console.WriteLine("Issuing {0} rounds of batches of {1} queries (press [enter] to start)", iterations, batchSize); Console.ReadLine(); } for (int i = 0; i < iterations; i++) { // we round-robin through query terms. more advanced queries are possible. if (computation.Configuration.ProcessID == 0) { queries.OnNext(queryBatches[i]); // introduce new queries. } else { queries.OnNext(); } documents.OnNext(); // indicate no new docs. subscription.Sync(i + 1); // block until round is done. } documents.OnCompleted(); queries.OnCompleted(); #endregion computation.Join(); } }
public void Execute(string[] args) { using (var computation = NewComputation.FromArgs(ref args)) { // establish numbers of nodes and edges from input or from defaults. if (args.Length == 3) { nodeCount = Convert.ToInt32(args[1]); edgeCount = Convert.ToInt32(args[2]); } // generate a random graph var random = new Random(0); var graph = new IntPair[edgeCount]; for (int i = 0; i < edgeCount; i++) { graph[i] = new IntPair(random.Next(nodeCount), random.Next(nodeCount)); } var stopwatch = System.Diagnostics.Stopwatch.StartNew(); // set up the CC computation var edges = computation.NewInputCollection <IntPair>(); //Func<IntPair, int> priorityFunction = node => 0; //Func<IntPair, int> priorityFunction = node => Math.Min(node.t, 100); Func <IntPair, int> priorityFunction = node => 65536 * (node.t < 10 ? node.t : 10 + Convert.ToInt32(Math.Log(1 + node.t) / Math.Log(2.0))); var output = edges.ConnectedComponents(priorityFunction) .Count(n => n.t, (l, c) => c) // counts results with each label .Consolidate() .Subscribe(l => { Console.Error.WriteLine("Time to process: {0}", stopwatch.Elapsed); foreach (var result in l.OrderBy(x => x.record)) { Console.Error.WriteLine(result); } }); Console.Error.WriteLine("Running connected components on a random graph ({0} nodes, {1} edges)", nodeCount, edgeCount); Console.Error.WriteLine("For each size, the number of components of that size (may take a moment):"); computation.Activate(); edges.OnNext(computation.Configuration.ProcessID == 0 ? graph : Enumerable.Empty <IntPair>()); // if we are up for interactive access ... if (computation.Configuration.Processes == 1) { output.Sync(0); Console.WriteLine(); Console.WriteLine("Next: sequentially rewiring random edges (press [enter] each time):"); for (int i = 0; true; i++) { Console.ReadLine(); stopwatch.Restart(); var newEdge = new IntPair(random.Next(nodeCount), random.Next(nodeCount)); Console.WriteLine("Rewiring edge: {0} -> {1}", graph[i], newEdge); edges.OnNext(new[] { new Weighted <IntPair>(graph[i], -1), new Weighted <IntPair>(newEdge, 1) }); output.Sync(i + 1); } } edges.OnCompleted(); computation.Join(); } }
public void Execute(string[] args) { using (var computation = NewComputation.FromArgs(ref args)) { // establish numbers of nodes and edges from input or from defaults. if (args.Length == 3) { nodeCount = Convert.ToInt32(args[1]); edgeCount = Convert.ToInt32(args[2]); } // generate a random graph var random = new Random(0); var graph = new Edge[edgeCount]; for (int i = 0; i < edgeCount; i++) { graph[i] = new Edge(random.Next(nodeCount), random.Next(nodeCount)); } // set up the SCC computation var edges = computation.NewInputCollection <Edge>(); var stopwatch = System.Diagnostics.Stopwatch.StartNew(); var result = edges.TrimLeavesAndFlip() .TrimLeavesAndFlip() .SCC() .Subscribe(x => Console.WriteLine("{1}\tNet edge changes within SCCs: {0}", x.Sum(y => y.weight), stopwatch.Elapsed)); Console.WriteLine("Strongly connected components on a random graph ({0} nodes, {1} edges)", nodeCount, edgeCount); Console.WriteLine("Reporting the numbers of edges within SCCs (may take a moment):"); computation.Activate(); // input graph and wait if (computation.Configuration.ProcessID == 0) { edges.OnNext(graph); } else { edges.OnNext(); } result.Sync(0); Console.WriteLine("Computation completed"); // if we are up for interactive access ... if (computation.Configuration.Processes == 1) { Console.WriteLine(); Console.WriteLine("Press [enter] repeatedly to rewire random edges in the graph. (\"done\" to exit)"); for (int i = 0; i < graph.Length; i++) { var line = Console.ReadLine(); if (line == "done") { break; } stopwatch.Restart(); var newEdge = new Edge(random.Next(nodeCount), random.Next(nodeCount)); Console.WriteLine("Rewiring edge: {0} -> {1}", graph[i], newEdge); edges.OnNext(new[] { new Weighted <Edge>(graph[i], -1), new Weighted <Edge>(newEdge, 1) }); result.Sync(i + 1); } } edges.OnCompleted(); computation.Join(); } }
public void Execute(string[] args) { // allocate a new computation from command line arguments. using (var computation = NewComputation.FromArgs(ref args)) { //var nodeCount = args.Length == 3 ? Convert.ToInt32(args[1]) : 1000; //var edgeCount = args.Length == 3 ? Convert.ToInt32(args[2]) : 2000; //#region Generate a local fraction of input data //var random = new Random(0); //var processes = computation.Configuration.Processes; //var thisProcess = computation.Configuration.ProcessID; //var graphFragmentList = new List<Pair<int, int>>(); //for (int i = 0; i < edgeCount; i++) //{ // // ensure we generate the same graph no matter how many processes there are // var edge = new Pair<int, int>(random.Next(nodeCount), random.Next(nodeCount)); // if ((i % processes) == thisProcess) // { // graphFragmentList.Add(edge); // } //} //var graphFragment = graphFragmentList.ToArray(); //#endregion //Console.WriteLine("size of graphFragmentList: {0}", graphFragmentList.Count()); //Console.WriteLine("Computing components of a random graph on {0} nodes and {1} edges", nodeCount, edgeCount); Stopwatch stopwatch = new Stopwatch(); //// convert array of edges to single-epoch stream. //var edges = graphFragment.AsNaiadStream(computation) // .Synchronize(x => true); if (args.Length < 5) { string parameters = ""; for (int i = 0; i < args.Length; ++i) { parameters = parameters + " " + args[i]; } Console.WriteLine("current parameters: " + parameters); Console.WriteLine("usage: Examples.exe sssp <inputPath> <numFiles> <outputPath> <srcId>"); return; } //for (int i = 0; i < args.Length; ++i){ // Console.WriteLine("the "+i+"th argument is " + args[i] ); //} string inputDir = args[1]; int numFiles = Int32.Parse(args[2]); string outputPathFormat = args[3] + "{0}"; var srcId = Int32.Parse(args[4]); //loading var text = loadDiskFiles(computation, inputDir, numFiles); //building graph var edges = text.Select(x => x.Split()) .Select(x => new Pair <int, int>(Int32.Parse(x[0]), Int32.Parse(x[1]))); var result = edges.SSSP(srcId); //.Subscribe(list => { // Console.WriteLine("labeled {0} nodes in {1}", list.Count(), stopwatch.Elapsed); // foreach (var element in list) { // Console.WriteLine("vertex: " + element.First + ",value: " + element.Second); // } //}); //edges.Subscribe(list => //{ // int numEdges = 0; // foreach (var element in list) // { // numEdges++; // } // Console.WriteLine("# of edges is : {0}", numEdges); //}); //dumping Action <Pair <int, float>, System.IO.BinaryWriter> writer = writePair; result.WriteToFiles <Pair <int, float> >(outputPathFormat, writer); //Console.WriteLine("deploy successfully"); stopwatch.Start(); computation.OnFrontierChange += (x, y) => { Console.WriteLine(stopwatch.Elapsed + "\t" + string.Join(", ", y.NewFrontier)); Console.Out.Flush(); }; computation.Activate(); // start graph computation computation.Join(); // block until computation completes } }
static void Main(string[] args) { // 1. allocate a new dataflow computation. using (var computation = NewComputation.FromArgs(ref args)) { if (args.Length != 6) { PrintHelp(); return; } Int32 procid = computation.Configuration.ProcessID; Int32 thread_num = computation.Configuration.WorkerCount; Int32 worker_num = computation.Configuration.Processes; Int32 dimension = Int32.Parse(args[0]); Int32 cluster_num = Int32.Parse(args[1]); Int32 iteration_num = Int32.Parse(args[2]); Int32 partition_num = Int32.Parse(args[3]); double sample_num_m = Convert.ToDouble(args[4]); Int64 spin_wait = Int64.Parse(args[5]); Console.Out.WriteLine("dimension: " + dimension); Console.Out.WriteLine("cluster_num: " + cluster_num); Console.Out.WriteLine("iteration_num: " + iteration_num); Console.Out.WriteLine("partition_num: " + partition_num); Console.Out.WriteLine("sample_num_m: " + sample_num_m); Console.Out.WriteLine("spin_wait: " + spin_wait); Console.Out.WriteLine("procid: " + procid); Console.Out.WriteLine("worker_num: " + worker_num); Console.Out.WriteLine("thread_num: " + thread_num); Console.Out.Flush(); KMeans km = new KMeans(dimension, cluster_num, iteration_num, partition_num, sample_num_m, spin_wait, procid, worker_num, thread_num); Stream <SampleBatch, Epoch> samples = km.GenerateSamples().AsNaiadStream(computation); samples = samples.PartitionBy(s => (int)(s[0][0])); var end_samples = samples.Iterate((lc, s) => km.Advance(s), iteration_num, "KMeans"); // var output = end_samples.Subscribe(x => { // Console.Out.WriteLine("Final center 0: " + PrintList(km.means_[0])); // Console.Out.Flush(); // }); Console.Out.WriteLine("Before Activate!"); Console.Out.Flush(); // start the computation, fixing the structure of the dataflow graph. computation.Activate(); Console.Out.WriteLine("After Activate!"); Console.Out.Flush(); // block until all work is finished. computation.Join(); Console.Out.WriteLine("After Join!"); double average_total = km.total_times_.GetRange(truncate_index_, iteration_num - truncate_index_).Average(); double average_compute = km.compute_times_.GetRange(truncate_index_, iteration_num - truncate_index_).Average(); double average_idle = average_total - average_compute; Console.Out.WriteLine("*** Average for the last {0:D2} iterations: compute(ms): {1:F2} total(ms): {2:F2} (idle(ms): {3:F2})", iteration_num - truncate_index_, 1000 * average_compute, 1000 * average_total, 1000 * average_idle); for (int i = 0; i < cluster_num; ++i) { Console.Out.WriteLine("Final center {0:D2}: {1:S}: ", i, PrintList(km.means_[i])); } Console.Out.WriteLine("Samples Counts: " + PrintList(km.sample_counter)); Console.Out.WriteLine("Reduce Level 1 Counts: " + PrintList(km.reduce_l1_counter_)); Console.Out.WriteLine("Reduce Level 2 Counts: " + PrintList(km.reduce_l2_counter_)); Console.Out.WriteLine("Sync Level 1 Counts: " + PrintList(km.sync_l1_counter_)); Console.Out.WriteLine("Sync Level 2 Counts: " + PrintList(km.sync_l2_counter_)); Console.Out.WriteLine("Sync Tags: " + PrintHashSet(km.sync_tags_)); Console.Out.WriteLine("Reduce Tags: " + PrintHashSet(km.reduce_tags_)); Console.Out.WriteLine("Clustering Tags: " + PrintHashSet(km.clustering_tags_)); Console.Out.Flush(); } }
static void Main(string[] args) { // 1. allocate a new dataflow computation. using (var computation = NewComputation.FromArgs(ref args)) { if (args.Length != 5) { PrintHelp(); return; } else if (args.Length == 1) { if (args[0] == "--help" || args[0] == "-h") { PrintHelp(); return; } } Int32 procid = computation.Configuration.ProcessID; Int32 dimension = Int32.Parse(args[0]); Int32 iteration_num = Int32.Parse(args[1]); Int64 partition_num = Int32.Parse(args[2]); Int64 sample_num_m = Int64.Parse(args[3]); Int64 worker_num = Int64.Parse(args[4]); Console.Out.WriteLine("**NOTE: Worker num should be equal to core num!"); Console.Out.WriteLine("procid: " + procid); Console.Out.WriteLine("dimension: " + dimension); Console.Out.WriteLine("iteration_num: " + iteration_num); Console.Out.WriteLine("partition_num: " + partition_num); Console.Out.WriteLine("sample_num_m: " + sample_num_m); Console.Out.WriteLine("worker_num (should be core num): " + worker_num); Console.Out.Flush(); LogisticRegression lr = new LogisticRegression(procid, dimension, iteration_num, partition_num, sample_num_m, worker_num); Stream <Sample, Epoch> samples = lr.GenerateSamples().AsNaiadStream(computation); // partition the samples based on the first element. samples = samples.PartitionBy(s => (int)(s[0])); var end_samples = samples.Iterate((lc, s) => lr.Advance(s), iteration_num, "LogisticRegression"); var output = end_samples.Subscribe(x => { Console.Out.WriteLine("Final weight: " + PrintList(lr.weight_)); Console.Out.Flush(); }); Console.Out.WriteLine("Before Activate!"); Console.Out.Flush(); // start the computation, fixing the structure of the dataflow graph. computation.Activate(); Console.Out.WriteLine("After Activate!"); Console.Out.Flush(); // block until all work is finished. computation.Join(); Console.Out.WriteLine("After Join!"); Console.Out.WriteLine("Counter 1 from procid: " + lr.procid_ + " " + PrintList(lr.counter_1_)); Console.Out.WriteLine("Counter 2 from procid: " + lr.procid_ + " " + PrintList(lr.counter_2_)); Console.Out.WriteLine("Counter 3 from procid: " + lr.procid_ + " " + PrintList(lr.counter_3_)); Console.Out.Flush(); } }
public void Execute(string[] args) { using (var computation = NewComputation.FromArgs(ref args)) { // establish numbers of nodes and edges from input or from defaults. if (args.Length == 3) { nodeCount = Convert.ToInt32(args[1]); edgeCount = Convert.ToInt32(args[2]); } // generate a random graph var random = new Random(0); var graph = new IntPair[edgeCount]; for (int i = 0; i < edgeCount; i++) { graph[i] = new IntPair(random.Next(nodeCount), random.Next(nodeCount)); } // set up the CC computation var edges = computation.NewInputCollection <IntPair>(); var stopwatch = System.Diagnostics.Stopwatch.StartNew(); var colors = edges.Where(x => x.s != x.t) .Color(); var output = colors.Select(x => x.t) // just keep the color (to count) .Output .Subscribe((i, l) => Console.WriteLine("Time to process: {0}", stopwatch.Elapsed)); // set to enable a correctness test, at the cost of more memory and computation. var testCorrectness = false; if (testCorrectness) { edges.Where(x => x.s != x.t) .Join(colors, e => e.s, c => c.s, e => e.t, c => c.t, (s, t, c) => new IntPair(c, t)) .Join(colors, e => e.t, c => c.s, e => e.s, c => c.t, (t, s, c) => new IntPair(s, c)) .Where(p => p.s == p.t) .Consolidate() .Subscribe(l => Console.WriteLine("Coloring errors: {0}", l.Length)); } Console.WriteLine("Running graph coloring on a random graph ({0} nodes, {1} edges)", nodeCount, edgeCount); Console.WriteLine("For each color, the nodes with that color:"); computation.Activate(); edges.OnNext(computation.Configuration.ProcessID == 0 ? graph : Enumerable.Empty <IntPair>()); output.Sync(0); // if we are up for interactive access ... if (computation.Configuration.Processes == 1) { Console.WriteLine(); Console.WriteLine("Next: sequentially rewiring random edges (press [enter] each time):"); for (int i = 0; i < graph.Length; i++) { Console.ReadLine(); stopwatch.Restart(); var newEdge = new IntPair(random.Next(nodeCount), random.Next(nodeCount)); Console.WriteLine("Rewiring edge: {0} -> {1}", graph[i], newEdge); edges.OnNext(new[] { new Weighted <IntPair>(graph[i], -1), new Weighted <IntPair>(newEdge, 1) }); output.Sync(i + 1); } } edges.OnCompleted(); computation.Join(); } }
// public class Sample { // public Sample() { // vector = new List<float>(); // // vector = new List<float>(new float[dimension]); // // label = 0; // } // public List<float> vector; // public float label; // } static void Main(string[] args) { // 1. allocate a new dataflow computation. using (var computation = NewComputation.FromArgs(ref args)) { Int32 iterations = 10; if (args.Length >= 1) { iterations = Int32.Parse(args[0]); } Console.Out.WriteLine("iterations: " + iterations); Console.Out.Flush(); int counts = 5; int dimension = 10; Stream <Sample, Epoch> nodes = GenerateNodes(dimension, counts, computation.Configuration.ProcessID).AsNaiadStream(computation); // nodes = nodes.PartitionBy(x => x.source); // nodes.IterateAndAccumulate((lc, x) => x, x => Print(x), iterations, "LogisticRegression"); var end_nodes = nodes.Iterate((lc, x) => Operate(x), iterations, "LogisticRegression"); var node_count = Microsoft.Research.Naiad.Frameworks.Lindi.ExtensionMethods.Count(end_nodes); end_nodes.WriteToFiles("output_nodes_{0}.txt", (record, writer) => writer.Write(true)); node_count.WriteToFiles("output_count_{0}.txt", (record, writer) => writer.Write(true)); // var end_nodes = nodes.IterateAndAccumulate((lc , x) => Operate(x), iterations, "LogisticRegression"); Console.Out.WriteLine("Proc ID: " + computation.Configuration.ProcessID); Console.Out.Flush(); var output = node_count.Subscribe(x => { foreach (var e in x) { Console.WriteLine("vector: " + PrintList(e.First) + " count: " + e.Second); } }); // 2. define an object which accepts input strings. // var source = new BatchedDataSource<string>(); // 3. convert the data source into a Naiad stream of strings. // var input = computation.NewInput(source); // 4.request a notification for each batch of strings // received. // var output = input.Subscribe(x => // { // foreach (var line // in x) // Console.WriteLine(line); // }); Console.Out.WriteLine("Before Activate!"); Console.Out.Flush(); // 5. start the computation, fixing the structure of // the dataflow graph. computation.Activate(); Console.Out.WriteLine("After Activate!"); Console.Out.Flush(); // 6. read inputs from the console as long as the // user supplies them. // for (var l = Console.ReadLine(); l.Length > 0; l // = Console.ReadLine()) {} // // source.OnNext(l.Split()); // 7. signal that the source is now complete. // source.OnCompleted(); // 8. block until all work is finished. computation.Join(); Console.Out.WriteLine("After Join!"); Console.Out.Flush(); } }
public void Execute(string[] args) { using (var computation = NewComputation.FromArgs(ref args)) { // either read inputs from a file, or generate them randomly. Stream <Edge, Epoch> edges; if (args.Length == 1) { // generate a random graph in each process; pagerank computation is performed on the union of edges. edges = GenerateEdges(1000000, 20000000, computation.Configuration.ProcessID).AsNaiadStream(computation); } else { var text = args.Skip(1) .AsNaiadStream(computation) .Distinct() .SelectMany(x => x.ReadLinesOfText()); edges = text.Where(x => !x.StartsWith("#")) .Select(x => x.Split()) .Select(x => new Edge(new Node(Int32.Parse(x[0])), new Node(Int32.Parse(x[1])))); } Console.Out.WriteLine("Started up!"); Console.Out.Flush(); edges = edges.PartitionBy(x => x.source); // capture degrees before trimming leaves. var degrees = edges.Select(x => x.source) .CountNodes(); // removes edges to pages with zero out-degree. var trim = false; if (trim) { edges = edges.Select(x => x.target.WithValue(x.source)) .FilterBy(degrees.Select(x => x.node)) .Select(x => new Edge(x.value, x.node)); } // initial distribution of ranks. var start = degrees.Select(x => x.node.WithValue(0.15f)) .PartitionBy(x => x.node.index); // define an iterative pagerank computation, add initial values, aggregate up the results and print them to the screen. var iterations = 10; var ranks = start.IterateAndAccumulate((lc, deltas) => deltas.PageRankStep(lc.EnterLoop(degrees), lc.EnterLoop(edges)), x => x.node.index, iterations, "PageRank") .Concat(start) // add initial ranks in for correctness. .NodeAggregate((x, y) => x + y) // accumulate up the ranks. .Where(x => x.value > 0.0f); // report only positive ranks. var stopwatch = System.Diagnostics.Stopwatch.StartNew(); computation.OnFrontierChange += (x, y) => { Console.WriteLine(stopwatch.Elapsed + "\t" + string.Join(", ", y.NewFrontier)); Console.Out.Flush(); }; // start computation, and block until completion. computation.Activate(); computation.Join(); } }
public static void Main(string[] args) { using (var computation = NewComputation.FromArgs(ref args)) { Console.WriteLine("computation.Configuration.WorkerCount (--threads): " + computation.Configuration.WorkerCount); int numDays = int.Parse(args [1]); // This has to be inside the using, because the FromArgs call removes the Naiad-specific arguments from args Console.WriteLine("numDays: " + numDays); //Console.WriteLine ("computation.Configuration.ProcessID: " + computation.Configuration.ProcessID); var initYesterdayCounts = Enumerable.Empty <Pair <int, int> >().AsNaiadStream(computation); Stream <int, Epoch> dayInit = null; if (computation.Configuration.ProcessID == 0) { dayInit = new[] { 1 }.AsNaiadStream(computation); } else { dayInit = Enumerable.Empty <int>().AsNaiadStream(computation); } initYesterdayCounts.Iterate((lc, yesterdayCounts) => { var dayDelayed = lc.Delay <int>(numDays - 1); var dayIngress = lc.EnterLoop(dayInit); var dayHead = dayIngress.Concat(dayDelayed.Output); var dayTail = dayHead.Select(x => x + 1); dayDelayed.Input = dayTail; //var visits = day.SelectMany(x => ("/home/ggevay/Dropbox/cfl_testdata/ClickCount/in/clickLog_" + x).ReadLinesOfText()); var visits = dayHead.PartitionBy(x => x).SelectMany(x => (args[0] + x).ReadLinesOfText()); //var uri = day.Select(x => new Uri("hdfs://cloud-11:44000/user/ggevay/ClickCountGenerated/0.05/25000000/in/clickLog_" + x)); //var visits = uri.FromHdfsText(); visits = visits.PartitionBy(x => x); var todayCounts = visits //Synchronize(x => true) .Select(x => x.PairWith(1)) .Aggregate(p => p.First, p => p.Second, (x, y) => x + y, (key, state) => key.PairWith(state)); //.Synchronize(x => true); var summed = todayCounts //.Synchronize(x => true) .Join(yesterdayCounts, x => x.First, x => x.First, (x, y) => Math.Abs(x.Second - y.Second)) //.Synchronize(x => true) .Aggregate <int, int, int, int, IterationIn <Epoch> >(x => 0, x => x, (x, y) => x + y, (key, state) => state, true); lc.ExitLoop(summed).Subscribe(x => { foreach (var line in x) { Console.WriteLine(line); } }); return(todayCounts); //.Synchronize(x => true); }, numDays - 1, "ClickCount iteration"); computation.Activate(); computation.Join(); if (computation.Configuration.ProcessID == 0) { Console.WriteLine("Computation finished"); } } }
public void Execute(string[] args) { // the first thing to do is to allocate a computation from args. using (var computation = NewComputation.FromArgs(ref args)) { // loading data if (args.Length < 5) { Console.WriteLine("usage: Examples.exe terasort <inputDir> <numFiles> <outputDir> <numWorkers=6>"); } String inputDir = args[1]; int numFiles = Int32.Parse(args[2]); string outputPathFormat = args[3] + "{0}"; int numWorkers = Int32.Parse(args[4]); var stopwatch = System.Diagnostics.Stopwatch.StartNew(); var text = loadDiskFiles(computation, inputDir, numFiles); //text.Subscribe(l => { // foreach (var v in l) Console.WriteLine("key is " + v.First + " , value is " + v.Second); //}); // computation TeraSortPartitioner partitioner = new TeraSortPartitioner(computation.Configuration.Processes * numWorkers); //var result = text.TeraSort(partitioner); var result = text.PartitionBy(x => partitioner.getPartition(x.First)).TeraSort(); //Console.WriteLine("max: " + partitioner.max); //Console.WriteLine("min: " + partitioner.min); //Console.WriteLine("rangePart: " + partitioner.rangePerPart); //char[] array = new char[4]; //array[0] = 'h'; //array[1] = 'e'; //array[2] = 'l'; //array[3] = 'o'; //string testString = new string(array, 0, 3); //Console.WriteLine("testString: " + testString); //Console.WriteLine("2n pos: " + (int)testString[1]); //var result = text.PartitionBy(x => x.First.GetHashCode() ).TeraSort(); //long keyLongValue = 0; //long index = 1; //for (int i = 6; i >= 0; --i) //{ // keyLongValue += index * 255; // index *= 256; //} //Console.WriteLine("max, calculated: " + keyLongValue); // string testStr = "00000000"; // Byte[] keyBytes = System.Text.Encoding.Default.GetBytes(testStr.Substring(0, 8)); //// for (int i = 0; i < 7; ++i) keyBytes[i] = 0; // keyBytes[7] = 0; // Array.Reverse(keyBytes); // long keyLongValue = BitConverter.ToInt64(keyBytes, 0); // Console.WriteLine("keyLongValue: " + keyLongValue); // if (keyLongValue < 0) Console.WriteLine("Wrong !!!!!"); //result.Subscribe(l => //{ // foreach (var v in l) Console.WriteLine("key is " + v.First + " , value is " + v.Second); //}); computation.OnFrontierChange += (x, y) => { Console.WriteLine(stopwatch.Elapsed + "\t" + string.Join(", ", y.NewFrontier)); Console.Out.Flush(); }; //long max = BitConverter.ToInt64(new Byte[] { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 }, 0); //Console.WriteLine("long max equal to " + max); //dumping // Action< Pair<byte[], byte[]> , System.IO.BinaryWriter > writer = writePair; //result.WriteToFiles<Pair<byte[], byte[]>>(outputPathFormat, writer); computation.Activate(); computation.Join(); } }
public void Execute(string[] args) { using (var computation = NewComputation.FromArgs(ref args)) { //// either read inputs from a file, or generate them randomly. //Stream<Edge, Epoch> edges; //if (args.Length == 1) //{ // // generate a random graph in each process; pagerank computation is performed on the union of edges. // edges = GenerateEdges(1000000, 20000000, computation.Configuration.ProcessID).AsNaiadStream(computation); //} //else //{ // var text = args.Skip(1) // .AsNaiadStream(computation) // .Distinct() // .SelectMany(x => x.ReadLinesOfText()); // edges = text.Where(x => !x.StartsWith("#")) // .Select(x => x.Split()) // .Select(x => new Edge(new Node(Int32.Parse(x[0])), new Node(Int32.Parse(x[1])))); //} // loading data //Console.WriteLine("Grep program starts"); if (args.Length < 5) { string parameters = ""; for (int i = 0; i < args.Length; ++i) { parameters = parameters + " " + args[i]; } Console.WriteLine("current parameters: " + parameters); Console.WriteLine("usage: Examples.exe pr <inputPath> <numIters> <outputPath> <numIterations>"); return; } //for (int i = 0; i < args.Length; ++i){ // Console.WriteLine("the "+i+"th argument is " + args[i] ); //} string inputDir = args[1]; int numFiles = Int32.Parse(args[2]); string outputPathFormat = args[3] + "{0}"; var iterations = Int32.Parse(args[4]); var stopwatch = System.Diagnostics.Stopwatch.StartNew(); var text = loadDiskFiles(computation, inputDir, numFiles); //this will make the whole process slow! //var barrierL = text.Select(x => 1); //barrierL.Subscribe( list => { // Console.WriteLine("number of edges: " + list.Count()); //}); //Console.WriteLine("loading finished at " + stopwatch.Elapsed); var edges = text.Select(x => x.Split()) .Select(x => new Edge(new Node(Int32.Parse(x[0])), new Node(Int32.Parse(x[1])))); Console.Out.WriteLine("Started up!"); Console.Out.Flush(); edges = edges.PartitionBy(x => x.source); // capture degrees before trimming leaves. countNodes has big problem! var degrees = edges.Select(x => x.source) .CountNodes(); // initial distribution of ranks. var start = degrees.Select(x => x.node.WithValue(0.15f)); // define an iterative pagerank computation, add initial values, aggregate up the results and print them to the screen. //var result = start.NodeJoin(degrees, (rank, degree) => degree > 0 ? rank * (0.85f / degree) : 0.0f) // .GraphReduce(edges, (x, y) => x + y, false) // .Select(x => x.node.WithValue((float)(x.value + 0.15))) // .NodeJoin(degrees, (rank, degree) => degree > 0 ? rank * (0.85f / degree) : 0.0f) // .GraphReduce(edges, (x, y) => x + y, false); var ranks = start.Iterate((lc, deltas) => deltas.PageRankStep(lc.EnterLoop(degrees), lc.EnterLoop(edges)), x => x.node.index, iterations, "PageRank"); computation.OnFrontierChange += (x, y) => { Console.WriteLine(stopwatch.Elapsed + "\t" + string.Join(", ", y.NewFrontier)); Console.Out.Flush(); }; //dumping //Action<NodeWithValue<float>, System.IO.BinaryWriter> writer = writeNodeWithValue; //rank.WriteToFiles<NodeWithValue<float>>(outputPathFormat, writeNodeWithValue); Console.WriteLine("deploy successfully"); // start computation, and block until completion. computation.Activate(); computation.Join(); } }