public Shared.Japi.Job CreateJob() { var job = new Job { Name = "MapReduce" }; var assembly = new UploadAssembly(typeof(Map).Assembly); // map Console.WriteLine("Generating \"map\" tasks.."); var fstLayer = new List<Task>(); var n = 0; foreach (var path in pathes) { fstLayer.Add(new Task { EntryPoint = new EntryPoint { Assembly = assembly, ClassName = typeof(Map).FullName }, InputFiles = new List<File> { new UploadFile(path) }, Name = string.Format("MapReduce.Map({0})", n), Parameters = new Dictionary<string, string> { { "MapReduce.MapIndex", n.ToString() }, { "MapReduce.ItemsCount", takeTop.ToString() } }, OutputFiles = Sequence.Items(new CreateFile(string.Format("PartialResult_{0}", n))) .Cast<File>() .ToList() }); n++; } // reduce Console.WriteLine("Generating \"reduce\" tasks..."); var resultTask = new Task { EntryPoint = new EntryPoint { Assembly = assembly, ClassName = typeof(Reduce).FullName }, InputFiles = (from t in fstLayer from f in t.OutputFiles select f) .ToList(), Name = "MapReduce.Reduce()", Parameters = new Dictionary<string, string> { { "MapReduce.ItemsCount", takeTop.ToString() } }, OutputFiles = new List<File> { new CreateFile("Result") }, Dependencies = fstLayer }; job.Tasks.AddRange(fstLayer); job.Tasks.Add(resultTask); return job; }
public Job CreateJob() { var job = new Job { Name = "DF.SDK.ComputeWordFrequencies" }; var assembly = new UploadAssembly(typeof(BuildWordsList).Assembly); // 1st layer: Console.WriteLine("Generating 1st layer tasks..."); var fstLayer = new List<Task>(); var n = 0; foreach(var path in pathes) { fstLayer.Add(new Task { EntryPoint = new EntryPoint { Assembly = assembly, ClassName = typeof(BuildWordsList).FullName }, InputFiles = new List<File> { new UploadFile(path) }, Name = string.Format(typeof(BuildWordsList).FullName + "({0})", n), Parameters = new Dictionary<string, string> { { "Layer.NextSize", nodes.ToString() }, {"Node", n.ToString() } }, OutputFiles = (from target in Enumerable.Range(0, nodes) let name = string.Format("HashDistribute_{0}_to_{1})", n, target) select new CreateFile(name) { Metadata = new Dictionary<string, string> { {"Node",target.ToString()} } }) .Cast<File>() .ToList() }); n++; } // 2nd layer Console.WriteLine("Generating 2nd layer tasks..."); var sndLayer = new List<Task>(); for(n = 0; n < nodes; n++) { sndLayer.Add(new Task { EntryPoint = new EntryPoint { Assembly = assembly, ClassName = typeof(TakeTopList).FullName }, InputFiles = (from t in fstLayer from f in t.OutputFiles where int.Parse(f.Metadata["Node"]) == n select f) .ToList(), Name = string.Format(typeof(TakeTopList).FullName + "({0})", n), Parameters = new Dictionary<string, string> { { "TakeTop", takeTop.ToString() } }, OutputFiles = new List<File> { new CreateFile(string.Format("TakeTopList_{0}", n)) }, Dependencies = fstLayer }); } // 3rd layer Console.WriteLine("Generating 3rd layer tasks..."); var resultTask = new Task { EntryPoint = new EntryPoint { Assembly = assembly, ClassName = typeof(TakeTopList).FullName }, InputFiles = (from t in sndLayer from f in t.OutputFiles select f) .ToList(), Name = typeof(TakeTopList).FullName + "(last)", Parameters = new Dictionary<string, string> { { "TakeTop", takeTop.ToString() } }, OutputFiles = new List<File> { new CreateFile("Result") }, Dependencies = sndLayer }; job.Tasks.AddRange(fstLayer); job.Tasks.AddRange(sndLayer); job.Tasks.Add(resultTask); return job; }