Beispiel #1
0
        public static IDictionary <string, uint> GetTopWordsSequential(FileInfo InputFile, char[] Separators, uint TopCount)
        {
            // Initialize Result Dictionary
            var result = new Dictionary <string, uint>(StringComparer.InvariantCultureIgnoreCase);

            // Loop through lines in file
            foreach (var line in File.ReadLines(InputFile.FullName))
            {
                // Loop through words in lines
                foreach (var word in line.Split(Separators, StringSplitOptions.RemoveEmptyEntries))
                {
                    // Check word in blacklist
                    if (!TrackWordsClass.IsValidWord(word))
                    {
                        continue;
                    }
                    // Track word
                    TrackWordsClass.TrackWordsOccurrence(result, word);
                }
            }
            // Return ordered dictionary
            return(result
                   .OrderByDescending(kv => kv.Value)
                   .Take((int)TopCount)
                   .ToDictionary(kv => kv.Key, kv => kv.Value));
        }
Beispiel #2
0
        public static IDictionary <string, uint> GetTopWordsParallelForEachMapReduce(FileInfo InputFile, char[] Separators, uint TopCount)
        {
            // Initalize result dictionary
            var result = new Dictionary <string, uint>(StringComparer.InvariantCultureIgnoreCase);

            // Loop through lines in parallel
            Parallel.ForEach(
                File.ReadLines(InputFile.FullName),
                new ParallelOptions {
                MaxDegreeOfParallelism = Environment.ProcessorCount
            },
                () => new Dictionary <string, uint>(StringComparer.InvariantCultureIgnoreCase),
                (line, state, index, localDic) =>
            {
                // Loop through words, filter seperators
                foreach (var word in line.Split(Separators, StringSplitOptions.RemoveEmptyEntries))
                {
                    // Valid word
                    if (!TrackWordsClass.IsValidWord(word))
                    {
                        continue;
                    }
                    // Update word list
                    TrackWordsClass.TrackWordsOccurrence(localDic, word);
                }
                return(localDic);
            },
                localDic =>
            {
                lock (result)
                {
                    // Organize pairs
                    foreach (var pair in localDic)
                    {
                        var key = pair.Key;
                        // Increment matching keys
                        if (result.ContainsKey(key))
                        {
                            result[key] += pair.Value;
                        }
                        else
                        {
                            result[key] = pair.Value;
                        }
                    }
                }
            }
                );
            // Return ordered dictionary
            return(result
                   .OrderByDescending(kv => kv.Value)
                   .Take((int)TopCount)
                   .ToDictionary(kv => kv.Key, kv => kv.Value));
        }
 public static IDictionary <string, uint> GetTopWordsPLINQMapReduce(FileInfo InputFile, char[] Separators, uint TopCount)
 {
     // Return ordered dictionary
     return(File.ReadLines(InputFile.FullName)
            .AsParallel()
            // Let C# decide max degree
            //.WithDegreeOfParallelism(12)
            .Aggregate(
                () => new Dictionary <string, uint>(StringComparer.InvariantCultureIgnoreCase), //#1
                (localDic, line) =>                                                             //#2
     {
         // Ignore seperator characters
         foreach (var word in line.Split(Separators, StringSplitOptions.RemoveEmptyEntries))
         {
             // Check word is blacklist
             if (!TrackWordsClass.IsValidWord(word))
             {
                 continue;
             }
             // Track word
             TrackWordsClass.TrackWordsOccurrence(localDic, word);
         }
         return localDic;
     },
                // Take result and sort by key/value pair
                (finalResult, localDic) => //#3
     {
         foreach (var pair in localDic)
         {
             var key = pair.Key;
             if (finalResult.ContainsKey(key))
             {
                 finalResult[key] += pair.Value;
             }
             else
             {
                 finalResult[key] = pair.Value;
             }
         }
         return finalResult;
     },
                // Return ordered dictionary
                finalResult => finalResult //#4
                .OrderByDescending(kv => kv.Value)
                .Take((int)TopCount)
                .ToDictionary(kv => kv.Key, kv => kv.Value)
                ));
 }
Beispiel #4
0
        public static IDictionary <string, uint> GetTopWordsPLINQNaive(FileInfo InputFile, char[] Separators, uint TopCount)
        {
            // Initalize words array by reading from file in paralll
            var words = File.ReadLines(InputFile.FullName)
                        .AsParallel()
                        .SelectMany(l => l.Split(Separators, StringSplitOptions.RemoveEmptyEntries))
                        .Where(TrackWordsClass.IsValidWord);
            // Initialize results dictionary
            var result = new Dictionary <string, uint>(StringComparer.InvariantCultureIgnoreCase);

            foreach (var word in words)
            {
                // Track word
                TrackWordsClass.TrackWordsOccurrence(result, word);
            }

            // Return ordered dictionary
            return(result
                   .OrderByDescending(kv => kv.Value)
                   .Take((int)TopCount)
                   .ToDictionary(kv => kv.Key, kv => kv.Value));
        }