static void ExecuteNaiad(string[] args, string dataDir, string uriBase) { string ukFile = Path.Combine(dataDir, @"uk-2007-05"); string twitterFile = Path.Combine(dataDir, @"twitter_rv.bin"); string livejournalFile = Path.Combine(dataDir, @"livejournal.bin"); var configuration = Configuration.FromArgs(ref args); var algorithm = args[1]; var dataset = args[2]; #region file partitioning if (algorithm == "partition" && dataset == "twitter") { var stopwatch = System.Diagnostics.Stopwatch.StartNew(); using (var computation = NewComputation.FromConfig(configuration)) { int parts = Int32.Parse(args[3]); var format = Path.Combine(dataDir, @"twitter-part-{0}-of-" + (parts * parts).ToString()); computation.LoadGraph(twitterFile) .Partition(parts, parts) .WriteBinaryToFiles(format); computation.Activate(); computation.Join(); } Console.WriteLine(stopwatch.Elapsed); } if (algorithm == "repartition" && dataset == "twitter") { var stopwatch = System.Diagnostics.Stopwatch.StartNew(); using (var computation = NewComputation.FromConfig(configuration)) { int parts = Int32.Parse(args[3]); computation.ReadHdfsBinaryCollection <Edge>(new Uri(uriBase + "twitter-10")) .Partition(parts, parts) .WriteHdfsBinary(new Uri(uriBase + "twitter-" + parts), 1024 * 1024, -1L, 100L * 1024L * 1024L * 1024L); computation.Activate(); computation.Join(); } Console.WriteLine(stopwatch.Elapsed); } if (algorithm == "compact" && dataset == "twitter") { using (var computation = NewComputation.FromConfig(configuration)) { var edges = System.IO.File.OpenRead(twitterFile) .ReadEdges() .AsNaiadStream(computation); using (var renamer = new AutoRenamer <Int32>()) { var newEdges = edges.RenameUsing(renamer, edge => edge.source) .Select(x => new Edge(x.node, x.value.target)) .RenameUsing(renamer, edge => edge.target) .Select(x => new Edge(x.value.source, x.node)); edges = newEdges.FinishRenaming(renamer); } computation.Activate(); computation.Join(); } } #endregion #region page rank if (algorithm == "pagerank" && dataset == "twitter") { var stopwatch = System.Diagnostics.Stopwatch.StartNew(); using (var computation = NewComputation.FromConfig(configuration)) { computation.OnFrontierChange += (x, y) => { Console.WriteLine(System.DateTime.Now + "\t" + string.Join(", ", y.NewFrontier)); System.GC.GetTotalMemory(true); }; var edges = System.IO.File.OpenRead(twitterFile) .ReadEdges() .AsNaiadStream(computation); edges.PageRank(20, "twitter").Subscribe(); computation.Activate(); computation.Join(); } Console.WriteLine(stopwatch.Elapsed); } if (algorithm == "pagerank" && dataset == "livejournal") { var stopwatch = System.Diagnostics.Stopwatch.StartNew(); using (var computation = NewComputation.FromConfig(configuration)) { computation.OnFrontierChange += (x, y) => { Console.WriteLine(System.DateTime.Now + "\t" + string.Join(", ", y.NewFrontier)); }; var edges = System.IO.File.OpenRead(livejournalFile) .ReadEdges() .AsNaiadStream(computation); edges.PageRank(20, "livejournal").Subscribe(); computation.Activate(); computation.Join(); } Console.WriteLine(stopwatch.Elapsed); } #endregion #region connected components if (algorithm == "connectedcomponents" && dataset == "uk-2007-05") { var stopwatch = System.Diagnostics.Stopwatch.StartNew(); using (var computation = NewComputation.FromConfig(configuration)) { var format = Path.Combine(dataDir, @"uk-2007-05-part-{0}-of-{1}"); var extraInput = new[] { string.Format(format, 3, 4) }.AsNaiadStream(computation) .PartitionBy(x => 3) .ReadGraph(); computation.LoadGraph(format, 3, 4) .UnionFind(106000000) .PartitionBy(x => 3) .Concat(extraInput) .UnionFind(106000000); computation.Activate(); computation.Join(); } Console.WriteLine(stopwatch.Elapsed); } if (algorithm == "connectedcomponents" && dataset == "twitter") { using (Microsoft.Research.Peloponnese.Hdfs.HdfsInstance hdfs = new Microsoft.Research.Peloponnese.Hdfs.HdfsInstance(new Uri(uriBase))) { // HDFS needs to be initialized from the main thread before distributed use bool exists = hdfs.IsFileExists("/dummy"); } var readWatch = System.Diagnostics.Stopwatch.StartNew(); using (var controller = NewController.FromConfig(configuration)) { using (var readComputation = controller.NewComputation()) { int parts = (args.Length > 4) ? Int32.Parse(args[4]) : 1; int machines = (args.Length > 5) ? Int32.Parse(args[5]) : 1; int another = (args.Length > 6) ? Int32.Parse(args[6]) : 1; var format = new Uri(@uriBase + "twitter-40"); var collection = readComputation .ReadHdfsBinaryCollection <Edge>(format); Stream <int[], Epoch> readStuff = null; switch (args[3]) { case "sp": readStuff = collection.GroupEdgesSingleProcess(parts, parts); break; case "pp": readStuff = collection.GroupEdgesPartsPerProcess(parts, parts, 16); break; case "op": readStuff = collection.GroupEdgesOnePerProcess(parts, parts, 16); break; case "hp": readStuff = collection.GroupEdgesHierarchyPerProcess(parts, machines, 16); break; case "hhp": readStuff = collection.GroupEdgesHierarchyPerProcess(parts, machines * another, 16); break; default: throw new ApplicationException("Grouping type must be sp, pp, op, hp or hpp"); } var sink = new InterGraphDataSink <int[]>(readStuff); readComputation.Activate(); readComputation.Join(); Console.WriteLine("Reading done: " + readWatch.Elapsed); for (int i = 0; i < 20; ++i) { var stopwatch = System.Diagnostics.Stopwatch.StartNew(); using (var computation = controller.NewComputation()) { var firstStage = computation.NewInput(sink.NewDataSource()) .ReformatInts(); if (parts * machines * another > 1) { firstStage = firstStage .UnionFindStruct(65000000, parts * machines * another, machines * another); } switch (args[3]) { case "sp": firstStage .PartitionBy(x => parts * parts) .UnionFind(65000000); break; case "pp": firstStage .PartitionBy(x => 16 * parts) .UnionFind(65000000); break; case "op": firstStage .PartitionBy(x => 16 * (parts * parts)) .UnionFind(65000000); break; case "hp": if (parts * parts < 16) { firstStage .PartitionBy(x => 16 * x.destination + (parts * parts)) .UnionFindStruct(65000000, 0, 0) .PartitionBy(x => 16 * (machines * machines)) .UnionFind(65000000); } else { firstStage .PartitionBy(x => 16 * (x.destination + (machines * machines))) .UnionFindStruct(65000000, 0, 0) .PartitionBy(x => 16 * ((machines * machines) + (machines * machines))) .UnionFind(65000000); } break; case "hhp": firstStage .PartitionBy(x => 16 * ((x.destination / (machines * machines)) + (machines * machines * another * another)) + (x.destination % (machines * machines))) .UnionFindStruct(65000000, -machines * another, another) .PartitionBy(x => 16 * (x.destination + (another * another) + (machines * machines * another * another))) .UnionFindStruct(65000000, -another, 1) .PartitionBy(x => 16 * ((another * another) + (another * another) + (machines * machines * another * another))) .UnionFind(65000000); break; default: throw new ApplicationException("Grouping type must be sp, pp, op, hp or hhp"); } computation.Activate(); computation.Join(); } Console.WriteLine(stopwatch.Elapsed); } } controller.Join(); } } if (algorithm == "hashtablecc" && dataset == "twitter") { using (Microsoft.Research.Peloponnese.Hdfs.HdfsInstance hdfs = new Microsoft.Research.Peloponnese.Hdfs.HdfsInstance(new Uri(uriBase))) { // HDFS needs to be initialized from the main thread before distributed use bool exists = hdfs.IsFileExists("/dummy"); } var readWatch = System.Diagnostics.Stopwatch.StartNew(); using (var controller = NewController.FromConfig(configuration)) { using (var readComputation = controller.NewComputation()) { int parts = (args.Length > 4) ? Int32.Parse(args[4]) : 1; int machines = (args.Length > 5) ? Int32.Parse(args[5]) : 1; int another = (args.Length > 6) ? Int32.Parse(args[6]) : 1; var format = new Uri(@uriBase + "twitter-40"); var collection = readComputation .ReadHdfsBinaryCollection <Edge>(format); Stream <int[], Epoch> readStuff = null; switch (args[3]) { case "sp": readStuff = collection.GroupEdgesSingleProcess(parts, parts); break; case "pp": readStuff = collection.GroupEdgesPartsPerProcess(parts, parts, 16); break; case "op": readStuff = collection.GroupEdgesOnePerProcess(parts, parts, 16); break; case "hp": readStuff = collection.GroupEdgesHierarchyPerProcess(parts, machines, 16); break; case "hhp": readStuff = collection.GroupEdgesHierarchyPerProcess(parts, machines * another, 16); break; default: throw new ApplicationException("Grouping type must be sp, pp, op, hp or hpp"); } var sink = new InterGraphDataSink <int[]>(readStuff); readComputation.Activate(); readComputation.Join(); Console.WriteLine("Reading done: " + readWatch.Elapsed); for (int i = 0; i < 20; ++i) { var stopwatch = System.Diagnostics.Stopwatch.StartNew(); using (var computation = controller.NewComputation()) { var firstStage = computation.NewInput(sink.NewDataSource()) .ReformatInts() .UnionFindHashTable(65000000, parts * machines * another, machines * another); switch (args[3]) { case "sp": firstStage .PartitionBy(x => parts * parts) .UnionFind(65000000); break; case "pp": firstStage .PartitionBy(x => 16 * parts) .UnionFind(65000000); break; case "op": firstStage .PartitionBy(x => 16 * (parts * parts)) .UnionFind(65000000); break; case "hp": if (parts * parts < 16) { firstStage .PartitionBy(x => 16 * x.destination + (parts * parts)) .UnionFindStruct(65000000, 0, 0) .PartitionBy(x => 16 * (machines * machines)) .UnionFind(65000000); } else { firstStage .PartitionBy(x => 16 * (x.destination + (machines * machines))) .UnionFindStruct(65000000, 0, 0) .PartitionBy(x => 16 * ((machines * machines) + (machines * machines))) .UnionFind(65000000); } break; case "hhp": firstStage .PartitionBy(x => 16 * ((x.destination / (machines * machines)) + (machines * machines * another * another)) + (x.destination % (machines * machines))) .UnionFindStruct(65000000, -machines * another, another) .PartitionBy(x => 16 * (x.destination + (another * another) + (machines * machines * another * another))) .UnionFindStruct(65000000, -another, 1) .PartitionBy(x => 16 * ((another * another) + (another * another) + (machines * machines * another * another))) .UnionFind(65000000); break; default: throw new ApplicationException("Grouping type must be sp, pp, op, hp or hpp"); } computation.Activate(); computation.Join(); } Console.WriteLine(stopwatch.Elapsed); } } controller.Join(); } } if (algorithm == "hashtableonlycc" && dataset == "twitter") { using (Microsoft.Research.Peloponnese.Hdfs.HdfsInstance hdfs = new Microsoft.Research.Peloponnese.Hdfs.HdfsInstance(new Uri(uriBase))) { // HDFS needs to be initialized from the main thread before distributed use bool exists = hdfs.IsFileExists("/dummy"); } var readWatch = System.Diagnostics.Stopwatch.StartNew(); using (var controller = NewController.FromConfig(configuration)) { using (var readComputation = controller.NewComputation()) { int parts = (args.Length > 4) ? Int32.Parse(args[4]) : 1; int machines = (args.Length > 5) ? Int32.Parse(args[5]) : 1; int another = (args.Length > 6) ? Int32.Parse(args[6]) : 1; var format = new Uri(@uriBase + "twitter-40"); var collection = readComputation .ReadHdfsBinaryCollection <Edge>(format); Stream <int[], Epoch> readStuff = null; switch (args[3]) { case "sp": readStuff = collection.GroupEdgesSingleProcess(parts, parts); break; case "pp": readStuff = collection.GroupEdgesPartsPerProcess(parts, parts, 16); break; case "op": readStuff = collection.GroupEdgesOnePerProcess(parts, parts, 16); break; case "hp": readStuff = collection.GroupEdgesHierarchyPerProcess(parts, machines, 16); break; case "hhp": readStuff = collection.GroupEdgesHierarchyPerProcess(parts, machines * another, 16); break; default: throw new ApplicationException("Grouping type must be sp, pp, op, hp or hpp"); } var sink = new InterGraphDataSink <int[]>(readStuff); readComputation.Activate(); readComputation.Join(); Console.WriteLine("Reading done: " + readWatch.Elapsed); for (int i = 0; i < 20; ++i) { var stopwatch = System.Diagnostics.Stopwatch.StartNew(); using (var computation = controller.NewComputation()) { var firstStage = computation.NewInput(sink.NewDataSource()) .ReformatInts(); if (parts * machines * another > 1) { firstStage = firstStage .UnionFindHashTable(65000000, parts * machines * another, machines * another); } switch (args[3]) { case "sp": firstStage .PartitionBy(x => parts * parts) .UnionFindHashTable(65000000); break; case "pp": firstStage .PartitionBy(x => 16 * parts) .UnionFindHashTable(65000000); break; case "op": firstStage .PartitionBy(x => 16 * (parts * parts)) .UnionFindHashTable(65000000); break; case "hp": if (parts * parts < 16) { firstStage .PartitionBy(x => 16 * x.destination + (parts * parts)) .UnionFindHashTable(65000000, 0, 0) .PartitionBy(x => 16 * (machines * machines)) .UnionFindHashTable(65000000); } else { firstStage .PartitionBy(x => 16 * (x.destination + (machines * machines))) .UnionFindHashTable(65000000, 0, 0) .PartitionBy(x => 16 * ((machines * machines) + (machines * machines))) .UnionFindHashTable(65000000); } break; case "hhp": firstStage .PartitionBy(x => 16 * ((x.destination / (machines * machines)) + (machines * machines * another * another)) + (x.destination % (machines * machines))) .UnionFindHashTable(65000000, -machines * another, another) .PartitionBy(x => 16 * (x.destination + (another * another) + (machines * machines * another * another))) .UnionFindHashTable(65000000, -another, 1) .PartitionBy(x => 16 * ((another * another) + (another * another) + (machines * machines * another * another))) .UnionFindHashTable(65000000); break; default: throw new ApplicationException("Grouping type must be sp, pp, op, hp or hpp"); } computation.Activate(); computation.Join(); } Console.WriteLine(stopwatch.Elapsed); } } controller.Join(); } } if (algorithm == "connectedcomponents" && dataset == "livejournal") { var stopwatch = System.Diagnostics.Stopwatch.StartNew(); using (var computation = NewComputation.FromConfig(configuration)) { var edges = System.IO.File.OpenRead(livejournalFile) .ReadEdges() .AsNaiadStream(computation); edges.UnionFind(5000000) .PartitionBy(x => 0) .UnionFind(5000000); computation.Activate(); computation.Join(); } Console.WriteLine(stopwatch.Elapsed); } #endregion }