Пример #1
0
        public static IDictionary <string, uint> GetTopWordsPLINQConcurrentDictionary(FileInfo InputFile, char[] Separators, uint TopCount)
        {
            // Initalize result dictionary
            var result = new ConcurrentDictionary <string, uint>(StringComparer.InvariantCultureIgnoreCase);

            // Read from file by line in parallel
            File.ReadLines(InputFile.FullName)
            .AsParallel()
            .ForAll(line =>
            {
                // Loop through each word, filter seperators
                foreach (var word in line.Split(Separators, StringSplitOptions.RemoveEmptyEntries))
                {
                    // Valid word
                    if (!TrackWordsClass.IsValidWord(word))
                    {
                        continue;
                    }
                    // Update word list
                    result.AddOrUpdate(word, 1, (key, oldVal) => oldVal + 1);
                }
            });
            // Return ordered dictionary
            return(result
                   .OrderByDescending(kv => kv.Value)
                   .Take((int)TopCount)
                   .ToDictionary(kv => kv.Key, kv => kv.Value));
        }
Пример #2
0
        public static IDictionary <string, uint> GetTopWordsParallelForEachConcurrentDictionary(FileInfo InputFile, char[] Separators, uint TopCount)
        {
            // Initialize result dictionary
            var result = new ConcurrentDictionary <string, uint>(StringComparer.InvariantCultureIgnoreCase);

            // Loop each line in parallel
            Parallel.ForEach(
                File.ReadLines(InputFile.FullName),
                new ParallelOptions {
                MaxDegreeOfParallelism = Environment.ProcessorCount
            },
                (line, state, index) =>
            {
                // Loop each word, filter seperators
                foreach (var word in line.Split(Separators, StringSplitOptions.RemoveEmptyEntries))
                {
                    // Valid word
                    if (!TrackWordsClass.IsValidWord(word))
                    {
                        continue;
                    }
                    // Update word list
                    result.AddOrUpdate(word, 1, (key, oldVal) => oldVal + 1);
                }
            }
                );
            // Return ordered dictionary
            return(result
                   .OrderByDescending(kv => kv.Value)
                   .Take((int)TopCount)
                   .ToDictionary(kv => kv.Key, kv => kv.Value));
        }
Пример #3
0
        public static IDictionary <string, uint> GetTopWordsSequential(FileInfo InputFile, char[] Separators, uint TopCount)
        {
            // Initialize Result Dictionary
            var result = new Dictionary <string, uint>(StringComparer.InvariantCultureIgnoreCase);

            // Loop through lines in file
            foreach (var line in File.ReadLines(InputFile.FullName))
            {
                // Loop through words in lines
                foreach (var word in line.Split(Separators, StringSplitOptions.RemoveEmptyEntries))
                {
                    // Check word in blacklist
                    if (!TrackWordsClass.IsValidWord(word))
                    {
                        continue;
                    }
                    // Track word
                    TrackWordsClass.TrackWordsOccurrence(result, word);
                }
            }
            // Return ordered dictionary
            return(result
                   .OrderByDescending(kv => kv.Value)
                   .Take((int)TopCount)
                   .ToDictionary(kv => kv.Key, kv => kv.Value));
        }
Пример #4
0
        public static IDictionary <string, uint> GetTopWordsParallelForEachMapReduce(FileInfo InputFile, char[] Separators, uint TopCount)
        {
            // Initalize result dictionary
            var result = new Dictionary <string, uint>(StringComparer.InvariantCultureIgnoreCase);

            // Loop through lines in parallel
            Parallel.ForEach(
                File.ReadLines(InputFile.FullName),
                new ParallelOptions {
                MaxDegreeOfParallelism = Environment.ProcessorCount
            },
                () => new Dictionary <string, uint>(StringComparer.InvariantCultureIgnoreCase),
                (line, state, index, localDic) =>
            {
                // Loop through words, filter seperators
                foreach (var word in line.Split(Separators, StringSplitOptions.RemoveEmptyEntries))
                {
                    // Valid word
                    if (!TrackWordsClass.IsValidWord(word))
                    {
                        continue;
                    }
                    // Update word list
                    TrackWordsClass.TrackWordsOccurrence(localDic, word);
                }
                return(localDic);
            },
                localDic =>
            {
                lock (result)
                {
                    // Organize pairs
                    foreach (var pair in localDic)
                    {
                        var key = pair.Key;
                        // Increment matching keys
                        if (result.ContainsKey(key))
                        {
                            result[key] += pair.Value;
                        }
                        else
                        {
                            result[key] = pair.Value;
                        }
                    }
                }
            }
                );
            // Return ordered dictionary
            return(result
                   .OrderByDescending(kv => kv.Value)
                   .Take((int)TopCount)
                   .ToDictionary(kv => kv.Key, kv => kv.Value));
        }
Пример #5
0
        public static IDictionary <string, uint> GetTopWordsProducerConsumer(FileInfo InputFile, char[] Separators, uint TopCount)
        {
            // Limitations
            const int WorkerCount     = 12;
            const int BoundedCapacity = 10000;
            var       result          = new ConcurrentDictionary <string, uint>(StringComparer.InvariantCultureIgnoreCase);

            // Setup the queue
            var blockingCollection = new BlockingCollection <string>(BoundedCapacity);

            // Declare the worker
            Action work = () =>
            {
                // Each line in selected block
                foreach (var line in blockingCollection.GetConsumingEnumerable())
                {
                    // Each word in line, filter seperators
                    foreach (var word in line.Split(Separators, StringSplitOptions.RemoveEmptyEntries))
                    {
                        // Valid word
                        if (!TrackWordsClass.IsValidWord(word))
                        {
                            continue;
                        }
                        // Update word list
                        result.AddOrUpdate(word, 1, (key, oldVal) => oldVal + 1);
                    }
                }
            };

            // Start the workers
            var tasks = Enumerable.Range(1, WorkerCount).Select(n => Task.Factory.StartNew(work, CancellationToken.None, TaskCreationOptions.LongRunning, TaskScheduler.Default))
                        .ToArray();

            // Begin producing
            foreach (var line in File.ReadLines(InputFile.FullName))
            {
                blockingCollection.Add(line);
            }
            blockingCollection.CompleteAdding();
            // End of producing

            // Wait for workers to finish their work
            Task.WaitAll(tasks);
            // Return ordered dictionary
            return(result
                   .OrderByDescending(kv => kv.Value)
                   .Take((int)TopCount)
                   .ToDictionary(kv => kv.Key, kv => kv.Value));
        }
Пример #6
0
        public static IDictionary <string, uint> GetTopWordsPLINQProducerConsumer(FileInfo InputFile, char[] Separators, uint TopCount)
        {
            // Limtations
            const int WorkerCount     = 12;
            const int BoundedCapacity = 10000;
            // Initalize result dictionary
            var result = new ConcurrentDictionary <string, uint>(StringComparer.InvariantCultureIgnoreCase);

            // Setup the queue
            var blockingCollection = new BlockingCollection <string>(BoundedCapacity);

            // Declare the worker
            Action <string> work = line =>
            {
                // Loop words and filter seperators
                foreach (var word in line.Split(Separators, StringSplitOptions.RemoveEmptyEntries))
                {
                    // Valid word
                    if (!TrackWordsClass.IsValidWord(word))
                    {
                        continue;
                    }
                    // Update word list
                    result.AddOrUpdate(word, 1, (key, oldVal) => oldVal + 1);
                }
            };

            Task.Run(() =>
            {
                // Begin producing
                foreach (var line in File.ReadLines(InputFile.FullName))
                {
                    blockingCollection.Add(line);
                }
                blockingCollection.CompleteAdding();
            });

            // Start consuming
            blockingCollection
            .GetConsumingEnumerable()
            .AsParallel()
            .WithDegreeOfParallelism(WorkerCount)
            .WithMergeOptions(ParallelMergeOptions.NotBuffered)
            .ForAll(work);
            // Return ordered dictionary
            return(result
                   .OrderByDescending(kv => kv.Value)
                   .Take((int)TopCount)
                   .ToDictionary(kv => kv.Key, kv => kv.Value));
        }
Пример #7
0
 public static IDictionary <string, uint> GetTopWordsPLINQMapReduce(FileInfo InputFile, char[] Separators, uint TopCount)
 {
     // Return ordered dictionary
     return(File.ReadLines(InputFile.FullName)
            .AsParallel()
            // Let C# decide max degree
            //.WithDegreeOfParallelism(12)
            .Aggregate(
                () => new Dictionary <string, uint>(StringComparer.InvariantCultureIgnoreCase), //#1
                (localDic, line) =>                                                             //#2
     {
         // Ignore seperator characters
         foreach (var word in line.Split(Separators, StringSplitOptions.RemoveEmptyEntries))
         {
             // Check word is blacklist
             if (!TrackWordsClass.IsValidWord(word))
             {
                 continue;
             }
             // Track word
             TrackWordsClass.TrackWordsOccurrence(localDic, word);
         }
         return localDic;
     },
                // Take result and sort by key/value pair
                (finalResult, localDic) => //#3
     {
         foreach (var pair in localDic)
         {
             var key = pair.Key;
             if (finalResult.ContainsKey(key))
             {
                 finalResult[key] += pair.Value;
             }
             else
             {
                 finalResult[key] = pair.Value;
             }
         }
         return finalResult;
     },
                // Return ordered dictionary
                finalResult => finalResult //#4
                .OrderByDescending(kv => kv.Value)
                .Take((int)TopCount)
                .ToDictionary(kv => kv.Key, kv => kv.Value)
                ));
 }
Пример #8
0
        public static IDictionary <string, uint> GetTopWordsProducerConsumerEasier(FileInfo InputFile, char[] Separators, uint TopCount)
        {
            // Limitations
            const int WorkerCount     = 12;
            const int BoundedCapacity = 10000;
            var       result          = new ConcurrentDictionary <string, uint>(StringComparer.InvariantCultureIgnoreCase);

            // Declare the worker
            Action <string> work = line =>
            {
                // Loop through words in line, filter seperators
                foreach (var word in line.Split(Separators, StringSplitOptions.RemoveEmptyEntries))
                {
                    // Valid word
                    if (!TrackWordsClass.IsValidWord(word))
                    {
                        continue;
                    }
                    // Update word list
                    result.AddOrUpdate(word, 1, (key, oldVal) => oldVal + 1);
                }
            };

            // Setup the queue
            var pcq = new ProducerConsumerQueue <string>(work, WorkerCount, BoundedCapacity);

            pcq.OnException += (sender, ex) => Console.WriteLine("Oooops: " + ex.Message);

            // Begin producing
            foreach (var line in File.ReadLines(InputFile.FullName))
            {
                pcq.Add(line);
            }
            pcq.CompleteAdding();
            // End of producing

            // Wait for workers to finish their work
            pcq.Completion.Wait();
            // Return ordered dictionary
            return(result
                   .OrderByDescending(kv => kv.Value)
                   .Take((int)TopCount)
                   .ToDictionary(kv => kv.Key, kv => kv.Value));
        }
Пример #9
0
        public static IDictionary <string, uint> GetTopWordsDataFlow(FileInfo InputFile, char[] Separators, uint TopCount)
        {
            // Limitations
            const int WorkerCount     = 12;
            var       result          = new ConcurrentDictionary <string, uint>(StringComparer.InvariantCultureIgnoreCase);
            const int BoundedCapacity = 10000;

            // Buffer blocks
            var bufferBlock = new BufferBlock <string>(
                new DataflowBlockOptions {
                BoundedCapacity = BoundedCapacity
            });

            // Split blocks into lines
            var splitLineToWordsBlock = new TransformManyBlock <string, string>(
                line => line.Split(Separators, StringSplitOptions.RemoveEmptyEntries),
                new ExecutionDataflowBlockOptions
            {
                MaxDegreeOfParallelism = 1,
                BoundedCapacity        = BoundedCapacity
            });

            var batchWordsBlock = new BatchBlock <string>(5000);

            var trackWordsOccurrencBlock = new ActionBlock <string[]>(words =>
            {
                // Loop words in lines
                foreach (var word in words)
                {
                    // Valid word
                    if (!TrackWordsClass.IsValidWord(word))
                    {
                        continue;
                    }
                    // Update word list
                    result.AddOrUpdate(word, 1, (key, oldVal) => oldVal + 1);
                }
            },
                                                                      new ExecutionDataflowBlockOptions {
                MaxDegreeOfParallelism = WorkerCount
            });

            var defaultLinkOptions = new DataflowLinkOptions {
                PropagateCompletion = true
            };

            bufferBlock.LinkTo(splitLineToWordsBlock, defaultLinkOptions);
            splitLineToWordsBlock.LinkTo(batchWordsBlock, defaultLinkOptions);
            batchWordsBlock.LinkTo(trackWordsOccurrencBlock, defaultLinkOptions);

            // Begin producing
            foreach (var line in File.ReadLines(InputFile.FullName))
            {
                bufferBlock.SendAsync(line).Wait();
            }

            bufferBlock.Complete();
            // End of producing

            // Wait for workers to finish their work
            trackWordsOccurrencBlock.Completion.Wait();
            // Return ordered dictionary
            return(result
                   .OrderByDescending(kv => kv.Value)
                   .Take((int)TopCount)
                   .ToDictionary(kv => kv.Key, kv => kv.Value));
        }