예제 #1
0
파일: Map.cs 프로젝트: kapitanov/diploma
        public override void Execute()
        {
            currentNodeIndex = this.Parameter<int>("MapReduce.MapIndex");
            topItemsCount = this.Parameter<int>("MapReduce.ItemsCount");
            traceMessage = string.Format("AISTek.DataFlow.Sdk.Samples.MapReduce.Map({0})", currentNodeIndex);

            using (Perfomance.BeginTrace(traceMessage)
                .BindToConsole()
                .Start())
            {
                var text = ReadFile(Task.InputFiles.First());
                List<Word> results;
                using (Perfomance.Trace(traceMessage + ": Build groups").BindToConsole())
                {
                    results = (from g in
                                   (from word in Regex.Split(text, @"\W+")
                                    where word.Length > 2
                                    group word by word
                                    into groups
                                    let word = groups.Key
                                    let count = groups.Count()
                                    select new {Word = word, Count = count})
                               orderby g.Count descending
                               select new Word(g.Word, g.Count))
                        .ToList();
                }

                using (Perfomance.Trace(traceMessage + ": Write results").BindToConsole())
                {
                    var resultSet = new Words(results.Take(topItemsCount));
                    resultSet.Serialize().ToExistingFile(Repository, Task.OutputFiles.First());
                }
            }
        }
예제 #2
0
        public override void Execute()
        {
            takeTop = this.Parameter<int>("TakeTop");
            var traceMessage = "AISTek.DataFlow.Sdk.Samples.HashDistribute.TakeTopList()";

            using (Perfomance.BeginTrace(traceMessage)
                .BindToConsole()
                .Start())
            {
                var groups = new List<Word>();
                using (Perfomance.Trace(traceMessage + ": Read data").BindToConsole())
                {
                    foreach (var words in from file in Task.InputFiles
                                          let wordsContainer = Repository.Deserialize<Words>(file)
                                          select wordsContainer)
                    {
                        groups.AddRange(words);
                    }
                }

                List<Word> resultGroups;
                using (Perfomance.Trace(traceMessage + ": Build list").BindToConsole())
                {
                    resultGroups = (from grp in
                                        (from g in groups
                                         group g by g.Value
                                             into grp
                                             let count = grp.Sum(x => x.Count)
                                             let word = grp.Key
                                             select new Word(word, count))
                                    orderby grp.Count descending
                                    select grp)
                    .Take(takeTop)
                    .ToList();
                }
                using (Perfomance.Trace(traceMessage + ": Write result").BindToConsole())
                {
                    var result = new Words(resultGroups);
                    result.Serialize().ToExistingFile(Repository, Task.OutputFiles.First());
                }
            }
        }
예제 #3
0
        public override void Execute()
        {
            nextNodesCount = this.Parameter<int>("Layer.NextSize");
            var nodeId = this.Parameter<int>("Node");
            traceMessage = string.Format("AISTek.DataFlow.Sdk.Samples.HashDistribute.BuildWordsList({0})", nodeId);

            using (Perfomance.BeginTrace(traceMessage)
                .BindToConsole()
                .Start())
            {
                var text = ReadFile(Task.InputFiles.First());
                List<Group> wordGroups;

                using (Perfomance.Trace(traceMessage + ": Build groups").BindToConsole())
                {
                    wordGroups = (from word in Regex.Split(text, @"\W+")
                                  where word.Length > 2
                                  group word by word
                                  into groups
                                  let word = groups.Key
                                  let count = groups.Count()
                                  let node = NodeHash(word, nextNodesCount)
                                  select new Group {Word = word, Node = node, Count = count})
                        .ToList();
                }

                using (Perfomance.Trace(traceMessage + ": Write outputs").BindToConsole())
                {
                    for (var i = 0; i < nextNodesCount; i++)
                    {
                        var index = i;
                        var link = Task.OutputFiles.First(x => x.Metadata["Node"] == index.ToString());
                        var words = new Words(from wordGroup in wordGroups
                                              where wordGroup.Node == index
                                              select new Word(wordGroup.Word, wordGroup.Count));
                        words.Serialize().ToExistingFile(Repository, link);
                    }
                }
            }
        }