RunFuzzyMatchPLINQ(
                string[] wordsLookup, 
                IEnumerable<string> files)
        {
           
            var matchSet = await (
                from contentFile in files.Traverse(f => File.ReadAllTextAsync(f))
                
                from words in contentFile.Traverse(text =>
                    WordRegex.Value.Split(text).Where(w => !IgnoreWords.Contains(w)))
                let wordSet = words.Flatten().AsSet()
            
                from bestMatch in wordsLookup.Traverse(wl => JaroWinklerModule.bestMatchTask(wordSet, wl, threshold))
                select bestMatch.Flatten());
            
            
            // NOTES
            // Here the code that leverages the "ReadFileLinesAndFlatten" method
            
            //var matchSet = await (
            //    from contentFile in files.Traverse(f => ReadFileLinesAndFlatten(f))
            //    let wordSet = contentFile.Flatten().AsSet()
            //    from bestMatch in wordsLookup.Traverse(wl => JaroWinklerModule.bestMatchTask(wordSet, wl, threshold))
            //    select bestMatch.Flatten());

            return PrintSummary(matchSet.AsSet());
        }
            RunFuzzyMatchSequential(
                string[] wordsLookup, 
                IEnumerable<string> files)
        {
            // Sequential workflow -> how can we parallelize this work?
            // The collection 'matchSet' cannot be shared among threads  

            var matchSet = new HashSet<WordDistanceStruct>();

            foreach (var file in files)
            {
                string readText = File.ReadAllText(file);

                var words = readText.Split(punctuation.Value)
                    .Where(w => !IgnoreWords.Contains(w))
                    .AsSet();

                foreach (var wl in wordsLookup)
                {
                    var bestMatch = JaroWinklerModule.bestMatch(words, wl, threshold);
                    matchSet.AddRange(bestMatch);
                }
            }

            return PrintSummary(matchSet);
        }
Exemplo n.º 3
0
        RunFuzzyMatchTaskProcessAsCompleteAbstracted(
            string[] wordsLookup,
            IEnumerable <string> files)
        {
            var matchSet = new HashSet <WordDistanceStruct>();

            // TODO (4) : Implement a resuable function called "ContinueAsComplete" to abstract the implementation of the
            // previous method "RunFuzzyMatchTaskProcessAsCompleteBasic".
            // The function "ContinueAsComplete" should sattisfy the following signatue:
            // Signature :
            // Enumerable<Task<R>> ContinueAsComplete<T, R>(this IEnumerable<T> input, Func<T, Task<R>> selector)
            //
            // C# : go to "Module 2\TaskAsComplete.cs" and add the missing code (4)
            // F# : go to the FSharp project "Module 2\TaskAsComplete.fs" and add the missing code (4)

            foreach (var textTask in files.ContinueAsComplete(file => File.ReadAllTextAsync(file)))
            {
                var text = await textTask;

                var words = WordRegex.Value
                            .Split(text)
                            .Where(w => !IgnoreWords.Contains(w))
                            .AsSet();

                foreach (var matchTask in wordsLookup.ContinueAsComplete(
                             wl => JaroWinklerModule.bestMatchTask(words, wl, threshold)))
                {
                    matchSet.AddRange(await matchTask);
                }
            }

            return(PrintSummary(matchSet));
        }
        RunFuzzyMatchTaskContinuation(
            string[] wordsLookup,
            IEnumerable <string> files)
        {
            // Let's start by converting the I/O operation to be asynchronous
            // The continuation passing style avoids to block any threads
            //
            // What about the error handlimg ? and cancellation (if any) ?

            var matchSet = new HashSet <WordDistanceStruct>();

            foreach (var file in files)
            {
                var readFileTask = File.ReadAllTextAsync(file);

                IEnumerable <WordDistanceStruct[]> bestMatches =
                    await readFileTask
                    .ContinueWith(readText =>
                {
                    return(WordRegex.Value.Split(readText.Result)
                           .Where(w => !IgnoreWords.Contains(w)));
                })
                    .ContinueWith(words =>
                {
                    var tasks = (from wl in wordsLookup
                                 select JaroWinklerModule.bestMatchTask(words.Result, wl, threshold)).ToList();

                    return(Task.WhenAll(tasks));
                }).Unwrap();

                matchSet.AddRange(bestMatches.Flatten());
            }
            return(PrintSummary(matchSet));
        }
Exemplo n.º 5
0
        public static void RunFuzzyMatchPipeline(
            string[] wordsLookup,
            IList <string> files)
        {
            var pipeline = Pipeline <string, string[]> .Create(file => File.ReadAllLinesAsync(file));

            pipeline
            .Then(lines =>
                  lines.SelectMany(l => l.Split(punctuation.Value)
                                   .Where(w => !IgnoreWords.Contains(w))).AsSet()
                  )
            .Then(wordSet =>
                  wordsLookup.Traverse(wl => JaroWinklerModule.bestMatchTask(wordSet, wl, threshold))
                  )
            .Then(matcheSet =>
                  PrintSummary(matcheSet.Flatten().AsSet())
                  );

            foreach (var file in files)
            {
                Console.WriteLine($"analyzing file {file}");
                pipeline.Enqueue(file);
            }
            // End C# Pipeline
        }
Exemplo n.º 6
0
        public static void RunFuzzyMatchPipelineFSharp(
            string[] wordsLookup,
            IList <string> files)
        {
            // TODO (3) : In the previous example you have implemented the Monadic operator SelectMany (usually called Bind)
            // This operator enables the compiler to understand the monadic (LINQ) pattern, which allows you to write
            // expressive/declarative code in LINQ style
            // Let's implement a parallel Pipeline that allows you to keep the continuation semantic,
            // with the advantage of running
            // the transoramations in parallel
            //
            // Implement the "Then" operator (instance method) that can be used to create and fluently compose a pipeline
            // for example:
            // pipeline.Then( .... ).Then(...)
            //
            // Also implement the logic in the "Enqueue" method
            //
            // F# : go to the FSharp project "Module 2\Pipeline.fs" and add the missing code (3.a and 3.b)
            //
            // To be bale to handle correctly a Multi-Producer/Multi-Consumer scenario,
            // take look to these oprions
            //      BlockingCollection<TInput>.TryAddToAny
            //      BlockingCollection<TInput>.TryTakeFromAny
            //
            // When you are complete, uncomment the code and run it

            // TODO (3) Start F# Pipeline
            var pipelineFSharp =
                Pipeline.Pipeline <string, string[]>
                .Create(file => File.ReadAllLinesAsync(file))
                .Then(lines =>
                      lines.SelectMany(l => l.Split(punctuation.Value)
                                       .Where(w => !IgnoreWords.Contains(w))).AsSet()
                      )
                .Then(wordSet =>
                      wordsLookup.Traverse(wl => JaroWinklerModule.bestMatchTask(wordSet, wl, threshold))
                      )
                .Then(matcheSet =>
                      matcheSet.Flatten().AsSet()
                      );

            pipelineFSharp.Execute(4, CancellationToken.None);

            var unit = (Unit)Activator.CreateInstance(typeof(Unit), true);

            foreach (var file in files)
            {
                pipelineFSharp.Enqueue(file,
                                       (tup =>
                {
                    Console.WriteLine($"analyzing file {file}");
                    PrintSummary(tup.Item2);
                    return(unit);
                }));
            }
            // End F# Pipeline
        }
            RunFuzzyMatchBetterTaskContinuation(
                string[] wordsLookup, 
                IEnumerable<string> files)
        {
            // Ideally, we should handle potential errors or cancellations
            // This is a lot of code which goes against the DRY principal

            var matchSet = new HashSet<WordDistanceStruct>();

            foreach (var file in files)
            {
                var readFileTask = File.ReadAllTextAsync(file);
                var bestMatches = await readFileTask
                    .ContinueWith(readText =>
                    {
                        switch (readText.Status)
                        {
                            case TaskStatus.Faulted:
                                Exception ex = readText.Exception;
                                while (ex is AggregateException && ex.InnerException != null)
                                    ex = ex.InnerException;
                                // do something with ex
                                return null;
                            case TaskStatus.Canceled:
                                // do something because Task cancelled
                                return null;
                            default:
                                return WordRegex.Value.Split(readText.Result)
                                    .Where(w => !IgnoreWords.Contains(w));
                        }
                    })
                    .ContinueWith(words =>
                    {
                        switch (words.Status)
                        {
                            case TaskStatus.Faulted:
                                Exception ex = words.Exception;
                                while (ex is AggregateException && ex.InnerException != null)
                                    ex = ex.InnerException;
                                // do something with ex
                                return null;
                            case TaskStatus.Canceled:
                                // do something because Task cancelled
                                return null;
                            default:
                                return wordsLookup.Traverse(wl =>
                                    JaroWinklerModule.bestMatchTask(words.Result, wl, threshold));
                        }
                    }).Unwrap();

                matchSet.AddRange(bestMatches.Flatten());
            }

            return PrintSummary(matchSet);
        }
        // Listing 2.23 Fast Fuzzy Match using precomputation
        public static Func <string, string> PartialFuzzyMatch(List <string> words) //#A
        {
            var wordSet = new HashSet <string>(words);                             //#B

            return(word =>
                   (from w in wordSet.AsParallel()
                    select JaroWinklerModule.Match(w, word))
                   .OrderByDescending(w => w.Distance)
                   .Select(w => w.Word)
                   .FirstOrDefault());                  //#C
        }
Exemplo n.º 9
0
        RunFuzzyMatchTaskProcessAsCompleteBasic(
            string[] wordsLookup,
            IEnumerable <string> files)
        {
            // An alternative pattern to parallize the FuzzyMatch is the "Procces as complete"
            // The idea of this pattern is to start the execution of all the operations (tasks)
            // at the same time, and then proccess them as they complete instead of waiting for all the operations
            // to be completed before continuing.
            // In other words, this pattern returns a sequence of tasks which will be observed to complete with the same set
            // of results as the given input tasks, but in the order in which the original tasks complete.
            //
            // Here a simple implementation ::

            var matchSet = new HashSet <WordDistanceStruct>();

            var readFileTasks =
                (from file in files
                 select File.ReadAllTextAsync(file)
                ).ToList();

            while (readFileTasks.Count > 0)
            {
                await Task.WhenAny(readFileTasks)
                .ContinueWith(async readTask =>
                {
                    var finishedReadTask = readTask.Result;
                    readFileTasks.Remove(finishedReadTask);

                    var words = WordRegex.Value
                                .Split(finishedReadTask.Result)
                                .Where(w => !IgnoreWords.Contains(w));

                    var matchTasks =
                        (from wl in wordsLookup
                         select JaroWinklerModule.bestMatchTask(words, wl, threshold)
                        ).ToList();

                    while (matchTasks.Count > 0)
                    {
                        await Task.WhenAny(matchTasks)
                        .ContinueWith(matchTask =>
                        {
                            var finishedMatchTask = matchTask.Result;
                            matchTasks.Remove(finishedMatchTask);

                            matchSet.AddRange(finishedMatchTask.Result);
                        });
                    }
                });
            }

            return(PrintSummary(matchSet));
        }
        RunFuzzyMatchTaskComposition(
            string[] wordsLookup,
            IEnumerable <string> files)
        {
            // A better apporach is to create a custom operator that preserves
            // the continuation semantic, while handling cases of error, exception and transformation
            // Signatures :
            //     Task<TOut> Then<TIn, TOut>(this Task<TIn> task, Func<TIn, TOut> next)  : Functor
            //     Task<TOut> Then<TIn, TOut>(this Task<TIn> task, Func<TIn, Task<TOut>> next)   : Bind

            // Traverese the given files in parallel
            // TODO (1) : Implement a reusable and optimizied fucntion called "Then" that satisfied the previous signature
            // C# : go to the "Module 1\TaskCompoistion.cs" and add the missing code in TODO (1)
            // F# : go to the FSharp project "Module 1\TaskCompoistion.fs" and add the missing code
            //
            // Optional/bonus function to implement with signature :
            // Task<TOut> SelectMany<TIn, TMid, TOut>(this Task<TIn> input, Func<TIn, Task<TMid>> f, Func<TIn, TMid, TOut> projection)

            return
                (files.Traverse(file => File.ReadAllTextAsync(file))
                 .Then(fileContent =>
                       fileContent
                       .SelectMany(text => WordRegex.Value.Split(text))
                       .Where(w => !IgnoreWords.Contains(w))
                       .AsSet()
                       )
                 .Then(wordsSplit =>
                       wordsLookup.Traverse(wl => JaroWinklerModule.bestMatchTask(wordsSplit, wl, threshold))
                       )
                 .Then(matcheSet => PrintSummary(matcheSet.Flatten().AsSet())));

            // NOTES
            // In this scenario, and only for demo purposes, we are reading the text asynchronously
            // in one operation, and then we are treating the text as a unique string.
            // In the case that the text is a large string, there are some performance penalties especially
            // during the Regex Split. A better approach is to read text files in line and run the Regex against
            // chunks of strings.
            // One solution is to create a Task that reads, splits and flattens the input text
            // in one operation. The method "ReadFileLinesAndFlatten" ( in the "TaskEx" static class )
            // implements this design.
            // Feel free to check the method and use it if you would like.
            // Here is the code that replaces the previus code.

            //return
            //    files.Traverse(file => ReadFileLinesAndFlatten(file))
            //        .Then(wordsSplit =>
            //        {
            //            var words = wordsSplit.Flatten();
            //            return wordsLookup.Traverse(wl => JaroWinklerModule.bestMatchTask(words, wl, threshold));
            //        })
            //        .Then(matcheSet => PrintSummary(matcheSet.Flatten().AsSet()));
        }
        // Listing 2.22 A fuzzy match
        public static string FuzzyMatch(List <string> words, string word)
        {
            var wordSet = new HashSet <string>(words);   //#A

            string bestMatch =
                (from w in wordSet.AsParallel()         //#B
                 select JaroWinklerModule.Match(w, word))
                .OrderByDescending(w => w.Distance)
                .Select(w => w.Word)
                .FirstOrDefault();

            return(bestMatch);                           //#C
        }
            RunFuzzyMatchTaskComposition(
                string[] wordsLookup, 
                IEnumerable<string> files)
        {

            
            return
                files.Traverse(file => File.ReadAllTextAsync(file))
                    .Then(fileContent =>
                         fileContent
                             .SelectMany(text => WordRegex.Value.Split(text))
                             .Where(w => !IgnoreWords.Contains(w))
                             .AsSet()
                    )
                    .Then(wordsSplit =>
                        wordsLookup.Traverse(wl => JaroWinklerModule.bestMatchTask(wordsSplit, wl, threshold))
                    )
                    .Then(matcheSet => PrintSummary(matcheSet.Flatten().AsSet()));

            // NOTES
            // In this scenario, and only for demo purposes, we are reading the text asynchronously 
            // in one operation, and then we are treating the text as a unique string. 
            // In the case that the text is a large string, there are some performance penalties especially  
            // during the Regex Split. A better approach is to read text files in line and run the Regex against
            // chunks of strings.
            // One solution is to create a Task that reads, splits and flattens the input text 
            // in one operation. The method "ReadFileLinesAndFlatten" ( in the "TaskEx" static class )
            // implements this design. 
            // Feel free to check the method and use it if you would like.
            // Here is the code that replaces the previus code.
            
            //return
            //    files.Traverse(file => ReadFileLinesAndFlatten(file))
            //        .Then(wordsSplit =>
            //        {
            //            var words = wordsSplit.Flatten();
            //            return wordsLookup.Traverse(wl => JaroWinklerModule.bestMatchTask(words, wl, threshold));
            //        })
            //        .Then(matcheSet => PrintSummary(matcheSet.Flatten().AsSet()));
        }
Exemplo n.º 13
0
        RunFuzzyMatchTaskProcessAsCompleteAbstracted(
            string[] wordsLookup,
            IEnumerable <string> files)
        {
            var matchSet = new HashSet <WordDistanceStruct>();

            foreach (var textTask in files.ContinueAsComplete(file => File.ReadAllTextAsync(file)))
            {
                var text = await textTask;

                var words = WordRegex.Value
                            .Split(text)
                            .Where(w => !IgnoreWords.Contains(w))
                            .AsSet();

                foreach (var matchTask in wordsLookup.ContinueAsComplete(
                             wl => JaroWinklerModule.bestMatchTask(words, wl, threshold)))
                {
                    matchSet.AddRange(await matchTask);
                }
            }

            return(PrintSummary(matchSet));
        }
Exemplo n.º 14
0
        RunFuzzyMatchPLINQ(
            string[] wordsLookup,
            IEnumerable <string> files)
        {
            // TODO (2) : After have copmletd TODO (1), we should be able to implement
            // effortlessly a LINQ pattern using the Task.
            // Rename the "Then" function implemented with the name SelectMany, such that these three followig signatures
            // are sattisfied :
            //
            // Task<TOut> SelectMany<TIn, TMid, TOut>(this Task<TIn> input, Func<TIn, Task<TMid>> f, Func<TIn, TMid, TOut> projection)
            // Task<TOut> SelectMany<TIn, TOut>(this Task<TIn> first, Func<TIn, Task<TOut>> next)
            // Task<TOut> Select<TIn, TOut>(this Task<TIn> task, Func<TIn, TOut> projection)
            //
            // Then uncomment the following code, add the missing code and run it

            var matchSet = await(
                from contentFile in files.Traverse(f => File.ReadAllTextAsync(f))
                from words in contentFile.Traverse(text =>
                                                   WordRegex.Value.Split(text).Where(w => !IgnoreWords.Contains(w)))
                let wordSet = words.Flatten().AsSet()
                              // TODO (2)
                              from bestMatch in wordsLookup.Traverse(wl => JaroWinklerModule.bestMatchTask(wordSet, wl, threshold))
                              select bestMatch.Flatten());


            // NOTES
            // Here the code that leverages the "ReadFileLinesAndFlatten" method

            //var matchSet = await (
            //    from contentFile in files.Traverse(f => ReadFileLinesAndFlatten(f))
            //    let wordSet = contentFile.Flatten().AsSet()
            //    from bestMatch in wordsLookup.Traverse(wl => JaroWinklerModule.bestMatchTask(wordSet, wl, threshold))
            //    select bestMatch.Flatten());

            return(PrintSummary(matchSet.AsSet()));
        }
Exemplo n.º 15
0
        // Example F#
        public static async Task RunFuzzyMatchAgentFSharp(string[] wordsLookup, IList <string> files)
        {
            var cts = new CancellationTokenSource();
            var opt = new ExecutionDataflowBlockOptions
            {
                BoundedCapacity        = 10,
                MaxDegreeOfParallelism = 4,
                CancellationToken      = cts.Token
            };

            var inputBlock = new BufferBlock <string>(opt);

            var readLinesBlock =
                new TransformBlock <string, string>(
                    file => File.ReadAllTextAsync(file, cts.Token), opt);

            var splitWordsBlock = new TransformBlock <string, string[]>(
                text => WordRegex.Value.Split(text).Where(w => !IgnoreWords.Contains(w)).AsSet().ToArray(), opt);

            var foundMatchesBlock =
                new TransformBlock <string[], WordDistanceStruct[]>(async wordSet =>
            {
                var matches =
                    await wordsLookup.Traverse(wl => JaroWinklerModule.bestMatchTask(wordSet, wl, threshold));
                return(matches.Flatten().ToArray());
            }, opt);


            var linkOptions = new DataflowLinkOptions {
                PropagateCompletion = true
            };

            // TODO (7) (for F#)
            // Implement a Reactive MailboxProcessor in F#.
            // Go to the Fsharp project, Module 3 and follow the instructions (7.a)
            // then, uncomment the following code and remove the previous code that uses
            // the Agent based on TPL Dataflow

            var agent =
                new ReactiveAgent.AgentObservable <WordDistanceStruct[], Dictionary <string, HashSet <string> > >
                    (new Dictionary <string, HashSet <string> >(),
                    (state, matches) =>
            {
                var matchesDic = matches
                                 .GroupBy(w => w.Word).ToDictionary(k => k.Key,
                                                                    v => v.Select(w => w.Match).AsSet());

                // Clone is important to be race condition free
                // or use an immutable collection
                var newState = Clone(state);
                foreach (var match in matchesDic)
                {
                    if (newState.TryGetValue(match.Key, out HashSet <string> values))
                    {
                        values.AddRange(match.Value);
                        newState[match.Key] = values;
                    }
                    else
                    {
                        newState.Add(match.Key, match.Value);
                    }
                }

                return(newState);
            });



            IDisposable disposeAll = new CompositeDisposable(
                inputBlock.LinkTo(readLinesBlock, linkOptions),
                readLinesBlock.LinkTo(splitWordsBlock, linkOptions),
                splitWordsBlock.LinkTo(foundMatchesBlock, linkOptions),
                foundMatchesBlock.LinkTo(agent),
                agent.AsObservable().Subscribe(
                    summaryMathces => PrintSummary(summaryMathces))
                );

            cts.Token.Register(disposeAll.Dispose);

            foreach (var file in files)
            {
                await inputBlock.SendAsync(file, cts.Token);
            }

            // inputBlock.Complete();
            // await foundMatchesBlock.Completion.ContinueWith(_ => disposeAll.Dispose());
        }
Exemplo n.º 16
0
        public static async Task RunFuzzyMatchDataFlow(string[] wordsLookup, IList <string> files)
        {
            var cts = new CancellationTokenSource();
            var opt = new ExecutionDataflowBlockOptions
            {
                BoundedCapacity = 10,
                // TODO, change this value and check what is happening
                MaxDegreeOfParallelism = 1,
                CancellationToken      = cts.Token
            };

            int fileCount = files.Count;

            var inputBlock = new BufferBlock <string>(opt);

            var readLinesBlock =
                new TransformBlock <string, string>(
                    async file => await File.ReadAllTextAsync(file, cts.Token), opt);

            var splitWordsBlock =
                new TransformBlock <string, HashSet <string> >(
                    text => WordRegex.Value.Split(text).Where(w => !IgnoreWords.Contains(w)).AsSet(), opt);

            var batch =
                new BatchBlock <HashSet <string> >(fileCount);

            var foundMatchesBlock =
                new TransformBlock <HashSet <string>[], WordDistanceStruct[]>(
                    async wordSet =>
            {
                var wordSetFlatten = wordSet.Flatten().AsSet();
                var matches        =
                    await wordsLookup.Traverse(wl =>
                                               JaroWinklerModule.bestMatchTask(wordSetFlatten, wl, threshold));
                return(matches.Flatten().ToArray());
            }, opt);


            // TODO (5)
            // Implement a block name "printBlock", which prints the output of
            // the foundMatchesBlock using the "PrintSummary" method
            // Then link the block to the "foundMatchesBlock" block
            // var printBlock = // missing code

            var linkOptions = new DataflowLinkOptions {
                PropagateCompletion = true
            };

            IDisposable disposeAll = new CompositeDisposable(
                inputBlock.LinkTo(readLinesBlock, linkOptions),
                readLinesBlock.LinkTo(splitWordsBlock, linkOptions),
                splitWordsBlock.LinkTo(batch, linkOptions),
                batch.LinkTo(foundMatchesBlock, linkOptions)
                // TODO uncoment this code after
                // implemented TODO (5)
                // foundMatchesBlock.LinkTo(printBlock)
                );

            cts.Token.Register(disposeAll.Dispose);

            // TODO (6)
            // After have completed TODO (5), remove or unlink the printBlock, and replace the output of the "foundMatchesBlock" block
            // with Reactive Extensions "AsObservable", maintaining the call to the "PrintSummary" method


            foreach (var file in files)
            {
                await inputBlock.SendAsync(file, cts.Token);
            }

            inputBlock.Complete();
            await foundMatchesBlock.Completion.ContinueWith(_ => disposeAll.Dispose());
        }
Exemplo n.º 17
0
        // C# example
        public static async Task RunFuzzyMatchAgentCSharp(string[] wordsLookup, IList <string> files)
        {
            var cts = new CancellationTokenSource();
            var opt = new ExecutionDataflowBlockOptions
            {
                BoundedCapacity        = 10,
                MaxDegreeOfParallelism = 4,
                CancellationToken      = cts.Token
            };

            var inputBlock = new BufferBlock <string>(opt);

            var readLinesBlock =
                new TransformBlock <string, string>(
                    async file => await File.ReadAllTextAsync(file, cts.Token), opt);

            var splitWordsBlock =
                new TransformBlock <string, string[]>(
                    text => WordRegex.Value.Split(text).Where(w => !IgnoreWords.Contains(w)).AsSet().ToArray(), opt);

            var foundMatchesBlock =
                new TransformBlock <string[], WordDistanceStruct[]>(async wordSet =>
            {
                var matches =
                    await wordsLookup.Traverse(wl => JaroWinklerModule.bestMatchTask(wordSet, wl, threshold));
                return(matches.Flatten().ToArray());
            }, opt);


            var linkOptions = new DataflowLinkOptions {
                PropagateCompletion = true
            };

            // TODO (7) (for C#)
            // Implement a stateful agent using the TPL Dataflow.
            // The Agent should have an internal state protected from external access.
            // The function passed in the constractor applies a project/reduce to the incoming messages and in the current state,
            // to return a new state
            // (see AgentAggregator.cs)
            var agent = Agent.Start(new Dictionary <string, HashSet <string> >(),
                                    (Dictionary <string, HashSet <string> > state, WordDistanceStruct[] matches) =>
            {
                var matchesDic = matches
                                 .GroupBy(w => w.Word)
                                 .ToDictionary(
                    k => k.Key,
                    v => v.Select(w => w.Match).AsSet());

                var newState = Clone(state);
                foreach (var match in matchesDic)
                {
                    if (newState.TryGetValue(match.Key, out HashSet <string> values))
                    {
                        values.AddRange(match.Value);
                        newState[match.Key] = values;
                    }
                    else
                    {
                        newState.Add(match.Key, match.Value);
                    }
                }

                return(newState);
            });

            IDisposable disposeAll = new CompositeDisposable(
                inputBlock.LinkTo(readLinesBlock, linkOptions),
                readLinesBlock.LinkTo(splitWordsBlock, linkOptions),
                splitWordsBlock.LinkTo(foundMatchesBlock, linkOptions),
                foundMatchesBlock.LinkTo(agent),
                agent.AsObservable()
                .Subscribe(
                    summaryMathces => PrintSummary(summaryMathces))
                );

            cts.Token.Register(disposeAll.Dispose);

            foreach (var file in files)
            {
                await inputBlock.SendAsync(file, cts.Token);
            }

            //  inputBlock.Complete();
            //  await foundMatchesBlock.Completion.ContinueWith(_ =>
            //      disposeAll.Dispose());
        }
Exemplo n.º 18
0
        public static (bool Matched, string Word) CpuMatch(string word)
        {
            var match = JaroWinklerModule.bestMatch(WordsToSearch, word, 0.9);

            return(match.Any() ? (true, match.First().Match) : (false, null));
        }