public static IDictionary <string, uint> GetTopWordsSequential(FileInfo InputFile, char[] Separators, uint TopCount) { // Initialize Result Dictionary var result = new Dictionary <string, uint>(StringComparer.InvariantCultureIgnoreCase); // Loop through lines in file foreach (var line in File.ReadLines(InputFile.FullName)) { // Loop through words in lines foreach (var word in line.Split(Separators, StringSplitOptions.RemoveEmptyEntries)) { // Check word in blacklist if (!TrackWordsClass.IsValidWord(word)) { continue; } // Track word TrackWordsClass.TrackWordsOccurrence(result, word); } } // Return ordered dictionary return(result .OrderByDescending(kv => kv.Value) .Take((int)TopCount) .ToDictionary(kv => kv.Key, kv => kv.Value)); }
public static IDictionary <string, uint> GetTopWordsParallelForEachMapReduce(FileInfo InputFile, char[] Separators, uint TopCount) { // Initalize result dictionary var result = new Dictionary <string, uint>(StringComparer.InvariantCultureIgnoreCase); // Loop through lines in parallel Parallel.ForEach( File.ReadLines(InputFile.FullName), new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount }, () => new Dictionary <string, uint>(StringComparer.InvariantCultureIgnoreCase), (line, state, index, localDic) => { // Loop through words, filter seperators foreach (var word in line.Split(Separators, StringSplitOptions.RemoveEmptyEntries)) { // Valid word if (!TrackWordsClass.IsValidWord(word)) { continue; } // Update word list TrackWordsClass.TrackWordsOccurrence(localDic, word); } return(localDic); }, localDic => { lock (result) { // Organize pairs foreach (var pair in localDic) { var key = pair.Key; // Increment matching keys if (result.ContainsKey(key)) { result[key] += pair.Value; } else { result[key] = pair.Value; } } } } ); // Return ordered dictionary return(result .OrderByDescending(kv => kv.Value) .Take((int)TopCount) .ToDictionary(kv => kv.Key, kv => kv.Value)); }
public static IDictionary <string, uint> GetTopWordsPLINQMapReduce(FileInfo InputFile, char[] Separators, uint TopCount) { // Return ordered dictionary return(File.ReadLines(InputFile.FullName) .AsParallel() // Let C# decide max degree //.WithDegreeOfParallelism(12) .Aggregate( () => new Dictionary <string, uint>(StringComparer.InvariantCultureIgnoreCase), //#1 (localDic, line) => //#2 { // Ignore seperator characters foreach (var word in line.Split(Separators, StringSplitOptions.RemoveEmptyEntries)) { // Check word is blacklist if (!TrackWordsClass.IsValidWord(word)) { continue; } // Track word TrackWordsClass.TrackWordsOccurrence(localDic, word); } return localDic; }, // Take result and sort by key/value pair (finalResult, localDic) => //#3 { foreach (var pair in localDic) { var key = pair.Key; if (finalResult.ContainsKey(key)) { finalResult[key] += pair.Value; } else { finalResult[key] = pair.Value; } } return finalResult; }, // Return ordered dictionary finalResult => finalResult //#4 .OrderByDescending(kv => kv.Value) .Take((int)TopCount) .ToDictionary(kv => kv.Key, kv => kv.Value) )); }
public static IDictionary <string, uint> GetTopWordsPLINQNaive(FileInfo InputFile, char[] Separators, uint TopCount) { // Initalize words array by reading from file in paralll var words = File.ReadLines(InputFile.FullName) .AsParallel() .SelectMany(l => l.Split(Separators, StringSplitOptions.RemoveEmptyEntries)) .Where(TrackWordsClass.IsValidWord); // Initialize results dictionary var result = new Dictionary <string, uint>(StringComparer.InvariantCultureIgnoreCase); foreach (var word in words) { // Track word TrackWordsClass.TrackWordsOccurrence(result, word); } // Return ordered dictionary return(result .OrderByDescending(kv => kv.Value) .Take((int)TopCount) .ToDictionary(kv => kv.Key, kv => kv.Value)); }