public override void Execute() { currentNodeIndex = this.Parameter<int>("MapReduce.MapIndex"); topItemsCount = this.Parameter<int>("MapReduce.ItemsCount"); traceMessage = string.Format("AISTek.DataFlow.Sdk.Samples.MapReduce.Map({0})", currentNodeIndex); using (Perfomance.BeginTrace(traceMessage) .BindToConsole() .Start()) { var text = ReadFile(Task.InputFiles.First()); List<Word> results; using (Perfomance.Trace(traceMessage + ": Build groups").BindToConsole()) { results = (from g in (from word in Regex.Split(text, @"\W+") where word.Length > 2 group word by word into groups let word = groups.Key let count = groups.Count() select new {Word = word, Count = count}) orderby g.Count descending select new Word(g.Word, g.Count)) .ToList(); } using (Perfomance.Trace(traceMessage + ": Write results").BindToConsole()) { var resultSet = new Words(results.Take(topItemsCount)); resultSet.Serialize().ToExistingFile(Repository, Task.OutputFiles.First()); } } }
public override void Execute() { takeTop = this.Parameter<int>("TakeTop"); var traceMessage = "AISTek.DataFlow.Sdk.Samples.HashDistribute.TakeTopList()"; using (Perfomance.BeginTrace(traceMessage) .BindToConsole() .Start()) { var groups = new List<Word>(); using (Perfomance.Trace(traceMessage + ": Read data").BindToConsole()) { foreach (var words in from file in Task.InputFiles let wordsContainer = Repository.Deserialize<Words>(file) select wordsContainer) { groups.AddRange(words); } } List<Word> resultGroups; using (Perfomance.Trace(traceMessage + ": Build list").BindToConsole()) { resultGroups = (from grp in (from g in groups group g by g.Value into grp let count = grp.Sum(x => x.Count) let word = grp.Key select new Word(word, count)) orderby grp.Count descending select grp) .Take(takeTop) .ToList(); } using (Perfomance.Trace(traceMessage + ": Write result").BindToConsole()) { var result = new Words(resultGroups); result.Serialize().ToExistingFile(Repository, Task.OutputFiles.First()); } } }
public override void Execute() { nextNodesCount = this.Parameter<int>("Layer.NextSize"); var nodeId = this.Parameter<int>("Node"); traceMessage = string.Format("AISTek.DataFlow.Sdk.Samples.HashDistribute.BuildWordsList({0})", nodeId); using (Perfomance.BeginTrace(traceMessage) .BindToConsole() .Start()) { var text = ReadFile(Task.InputFiles.First()); List<Group> wordGroups; using (Perfomance.Trace(traceMessage + ": Build groups").BindToConsole()) { wordGroups = (from word in Regex.Split(text, @"\W+") where word.Length > 2 group word by word into groups let word = groups.Key let count = groups.Count() let node = NodeHash(word, nextNodesCount) select new Group {Word = word, Node = node, Count = count}) .ToList(); } using (Perfomance.Trace(traceMessage + ": Write outputs").BindToConsole()) { for (var i = 0; i < nextNodesCount; i++) { var index = i; var link = Task.OutputFiles.First(x => x.Metadata["Node"] == index.ToString()); var words = new Words(from wordGroup in wordGroups where wordGroup.Node == index select new Word(wordGroup.Word, wordGroup.Count)); words.Serialize().ToExistingFile(Repository, link); } } } }