public static IDictionary <string, uint> GetTopWordsParallelForEachConcurrentDictionary(FileInfo InputFile, char[] Separators, uint TopCount) { // Initialize result dictionary var result = new ConcurrentDictionary <string, uint>(StringComparer.InvariantCultureIgnoreCase); // Loop each line in parallel Parallel.ForEach( File.ReadLines(InputFile.FullName), new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount }, (line, state, index) => { // Loop each word, filter seperators foreach (var word in line.Split(Separators, StringSplitOptions.RemoveEmptyEntries)) { // Valid word if (!TrackWordsClass.IsValidWord(word)) { continue; } // Update word list result.AddOrUpdate(word, 1, (key, oldVal) => oldVal + 1); } } ); // Return ordered dictionary return(result .OrderByDescending(kv => kv.Value) .Take((int)TopCount) .ToDictionary(kv => kv.Key, kv => kv.Value)); }
public static IDictionary <string, uint> GetTopWordsSequential(FileInfo InputFile, char[] Separators, uint TopCount) { // Initialize Result Dictionary var result = new Dictionary <string, uint>(StringComparer.InvariantCultureIgnoreCase); // Loop through lines in file foreach (var line in File.ReadLines(InputFile.FullName)) { // Loop through words in lines foreach (var word in line.Split(Separators, StringSplitOptions.RemoveEmptyEntries)) { // Check word in blacklist if (!TrackWordsClass.IsValidWord(word)) { continue; } // Track word TrackWordsClass.TrackWordsOccurrence(result, word); } } // Return ordered dictionary return(result .OrderByDescending(kv => kv.Value) .Take((int)TopCount) .ToDictionary(kv => kv.Key, kv => kv.Value)); }
public static IDictionary <string, uint> GetTopWordsPLINQConcurrentDictionary(FileInfo InputFile, char[] Separators, uint TopCount) { // Initalize result dictionary var result = new ConcurrentDictionary <string, uint>(StringComparer.InvariantCultureIgnoreCase); // Read from file by line in parallel File.ReadLines(InputFile.FullName) .AsParallel() .ForAll(line => { // Loop through each word, filter seperators foreach (var word in line.Split(Separators, StringSplitOptions.RemoveEmptyEntries)) { // Valid word if (!TrackWordsClass.IsValidWord(word)) { continue; } // Update word list result.AddOrUpdate(word, 1, (key, oldVal) => oldVal + 1); } }); // Return ordered dictionary return(result .OrderByDescending(kv => kv.Value) .Take((int)TopCount) .ToDictionary(kv => kv.Key, kv => kv.Value)); }
public static IDictionary <string, uint> GetTopWordsParallelForEachMapReduce(FileInfo InputFile, char[] Separators, uint TopCount) { // Initalize result dictionary var result = new Dictionary <string, uint>(StringComparer.InvariantCultureIgnoreCase); // Loop through lines in parallel Parallel.ForEach( File.ReadLines(InputFile.FullName), new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount }, () => new Dictionary <string, uint>(StringComparer.InvariantCultureIgnoreCase), (line, state, index, localDic) => { // Loop through words, filter seperators foreach (var word in line.Split(Separators, StringSplitOptions.RemoveEmptyEntries)) { // Valid word if (!TrackWordsClass.IsValidWord(word)) { continue; } // Update word list TrackWordsClass.TrackWordsOccurrence(localDic, word); } return(localDic); }, localDic => { lock (result) { // Organize pairs foreach (var pair in localDic) { var key = pair.Key; // Increment matching keys if (result.ContainsKey(key)) { result[key] += pair.Value; } else { result[key] = pair.Value; } } } } ); // Return ordered dictionary return(result .OrderByDescending(kv => kv.Value) .Take((int)TopCount) .ToDictionary(kv => kv.Key, kv => kv.Value)); }
public static IDictionary <string, uint> GetTopWordsProducerConsumer(FileInfo InputFile, char[] Separators, uint TopCount) { // Limitations const int WorkerCount = 12; const int BoundedCapacity = 10000; var result = new ConcurrentDictionary <string, uint>(StringComparer.InvariantCultureIgnoreCase); // Setup the queue var blockingCollection = new BlockingCollection <string>(BoundedCapacity); // Declare the worker Action work = () => { // Each line in selected block foreach (var line in blockingCollection.GetConsumingEnumerable()) { // Each word in line, filter seperators foreach (var word in line.Split(Separators, StringSplitOptions.RemoveEmptyEntries)) { // Valid word if (!TrackWordsClass.IsValidWord(word)) { continue; } // Update word list result.AddOrUpdate(word, 1, (key, oldVal) => oldVal + 1); } } }; // Start the workers var tasks = Enumerable.Range(1, WorkerCount).Select(n => Task.Factory.StartNew(work, CancellationToken.None, TaskCreationOptions.LongRunning, TaskScheduler.Default)) .ToArray(); // Begin producing foreach (var line in File.ReadLines(InputFile.FullName)) { blockingCollection.Add(line); } blockingCollection.CompleteAdding(); // End of producing // Wait for workers to finish their work Task.WaitAll(tasks); // Return ordered dictionary return(result .OrderByDescending(kv => kv.Value) .Take((int)TopCount) .ToDictionary(kv => kv.Key, kv => kv.Value)); }
public static IDictionary <string, uint> GetTopWordsPLINQProducerConsumer(FileInfo InputFile, char[] Separators, uint TopCount) { // Limtations const int WorkerCount = 12; const int BoundedCapacity = 10000; // Initalize result dictionary var result = new ConcurrentDictionary <string, uint>(StringComparer.InvariantCultureIgnoreCase); // Setup the queue var blockingCollection = new BlockingCollection <string>(BoundedCapacity); // Declare the worker Action <string> work = line => { // Loop words and filter seperators foreach (var word in line.Split(Separators, StringSplitOptions.RemoveEmptyEntries)) { // Valid word if (!TrackWordsClass.IsValidWord(word)) { continue; } // Update word list result.AddOrUpdate(word, 1, (key, oldVal) => oldVal + 1); } }; Task.Run(() => { // Begin producing foreach (var line in File.ReadLines(InputFile.FullName)) { blockingCollection.Add(line); } blockingCollection.CompleteAdding(); }); // Start consuming blockingCollection .GetConsumingEnumerable() .AsParallel() .WithDegreeOfParallelism(WorkerCount) .WithMergeOptions(ParallelMergeOptions.NotBuffered) .ForAll(work); // Return ordered dictionary return(result .OrderByDescending(kv => kv.Value) .Take((int)TopCount) .ToDictionary(kv => kv.Key, kv => kv.Value)); }
public static IDictionary <string, uint> GetTopWordsPLINQMapReduce(FileInfo InputFile, char[] Separators, uint TopCount) { // Return ordered dictionary return(File.ReadLines(InputFile.FullName) .AsParallel() // Let C# decide max degree //.WithDegreeOfParallelism(12) .Aggregate( () => new Dictionary <string, uint>(StringComparer.InvariantCultureIgnoreCase), //#1 (localDic, line) => //#2 { // Ignore seperator characters foreach (var word in line.Split(Separators, StringSplitOptions.RemoveEmptyEntries)) { // Check word is blacklist if (!TrackWordsClass.IsValidWord(word)) { continue; } // Track word TrackWordsClass.TrackWordsOccurrence(localDic, word); } return localDic; }, // Take result and sort by key/value pair (finalResult, localDic) => //#3 { foreach (var pair in localDic) { var key = pair.Key; if (finalResult.ContainsKey(key)) { finalResult[key] += pair.Value; } else { finalResult[key] = pair.Value; } } return finalResult; }, // Return ordered dictionary finalResult => finalResult //#4 .OrderByDescending(kv => kv.Value) .Take((int)TopCount) .ToDictionary(kv => kv.Key, kv => kv.Value) )); }
public static IDictionary <string, uint> GetTopWordsProducerConsumerEasier(FileInfo InputFile, char[] Separators, uint TopCount) { // Limitations const int WorkerCount = 12; const int BoundedCapacity = 10000; var result = new ConcurrentDictionary <string, uint>(StringComparer.InvariantCultureIgnoreCase); // Declare the worker Action <string> work = line => { // Loop through words in line, filter seperators foreach (var word in line.Split(Separators, StringSplitOptions.RemoveEmptyEntries)) { // Valid word if (!TrackWordsClass.IsValidWord(word)) { continue; } // Update word list result.AddOrUpdate(word, 1, (key, oldVal) => oldVal + 1); } }; // Setup the queue var pcq = new ProducerConsumerQueue <string>(work, WorkerCount, BoundedCapacity); pcq.OnException += (sender, ex) => Console.WriteLine("Oooops: " + ex.Message); // Begin producing foreach (var line in File.ReadLines(InputFile.FullName)) { pcq.Add(line); } pcq.CompleteAdding(); // End of producing // Wait for workers to finish their work pcq.Completion.Wait(); // Return ordered dictionary return(result .OrderByDescending(kv => kv.Value) .Take((int)TopCount) .ToDictionary(kv => kv.Key, kv => kv.Value)); }
public static IDictionary <string, uint> GetTopWordsPLINQNaive(FileInfo InputFile, char[] Separators, uint TopCount) { // Initalize words array by reading from file in paralll var words = File.ReadLines(InputFile.FullName) .AsParallel() .SelectMany(l => l.Split(Separators, StringSplitOptions.RemoveEmptyEntries)) .Where(TrackWordsClass.IsValidWord); // Initialize results dictionary var result = new Dictionary <string, uint>(StringComparer.InvariantCultureIgnoreCase); foreach (var word in words) { // Track word TrackWordsClass.TrackWordsOccurrence(result, word); } // Return ordered dictionary return(result .OrderByDescending(kv => kv.Value) .Take((int)TopCount) .ToDictionary(kv => kv.Key, kv => kv.Value)); }
public static IDictionary <string, uint> GetTopWordsDataFlow(FileInfo InputFile, char[] Separators, uint TopCount) { // Limitations const int WorkerCount = 12; var result = new ConcurrentDictionary <string, uint>(StringComparer.InvariantCultureIgnoreCase); const int BoundedCapacity = 10000; // Buffer blocks var bufferBlock = new BufferBlock <string>( new DataflowBlockOptions { BoundedCapacity = BoundedCapacity }); // Split blocks into lines var splitLineToWordsBlock = new TransformManyBlock <string, string>( line => line.Split(Separators, StringSplitOptions.RemoveEmptyEntries), new ExecutionDataflowBlockOptions { MaxDegreeOfParallelism = 1, BoundedCapacity = BoundedCapacity }); var batchWordsBlock = new BatchBlock <string>(5000); var trackWordsOccurrencBlock = new ActionBlock <string[]>(words => { // Loop words in lines foreach (var word in words) { // Valid word if (!TrackWordsClass.IsValidWord(word)) { continue; } // Update word list result.AddOrUpdate(word, 1, (key, oldVal) => oldVal + 1); } }, new ExecutionDataflowBlockOptions { MaxDegreeOfParallelism = WorkerCount }); var defaultLinkOptions = new DataflowLinkOptions { PropagateCompletion = true }; bufferBlock.LinkTo(splitLineToWordsBlock, defaultLinkOptions); splitLineToWordsBlock.LinkTo(batchWordsBlock, defaultLinkOptions); batchWordsBlock.LinkTo(trackWordsOccurrencBlock, defaultLinkOptions); // Begin producing foreach (var line in File.ReadLines(InputFile.FullName)) { bufferBlock.SendAsync(line).Wait(); } bufferBlock.Complete(); // End of producing // Wait for workers to finish their work trackWordsOccurrencBlock.Completion.Wait(); // Return ordered dictionary return(result .OrderByDescending(kv => kv.Value) .Take((int)TopCount) .ToDictionary(kv => kv.Key, kv => kv.Value)); }