Example #1
0
        private static async Task Main()
        {
            //Initialize the English built-in models
            Catalyst.Models.English.Register();

            //Storage.Current = new OnlineRepositoryStorage(new DiskStorage("catalyst-models"));

            Console.OutputEncoding = Encoding.UTF8;
            ApplicationLogging.SetLoggerFactory(LoggerFactory.Create(lb => lb.AddConsole()));

            // Catalyst currently supports 3 different types of models for Named Entity Recognition (NER):
            // - Gazetteer-like(i.e. [Spotter](https://github.com/curiosity-ai/catalyst/blob/master/Catalyst/src/Models/EntityRecognition/Spotter.cs))
            // - Regex-like(i.e. [PatternSpotter](https://github.com/curiosity-ai/catalyst/blob/master/Catalyst/src/Models/EntityRecognition/PatternSpotter.cs))
            // - Perceptron (i.e. [AveragePerceptronEntityRecognizer](https://github.com/curiosity-ai/catalyst/blob/master/Catalyst/src/Models/EntityRecognition/AveragePerceptronEntityRecognizer.cs))



            //var s = typeof(Catalyst.Models.English).Assembly.GetManifestResourceStream($"{typeof(Catalyst.Models.English).Assembly.GetName().Name}.Resources.sentence-detector.bin");
            //foreach(var name in typeof(Catalyst.Models.English).Assembly.GetManifestResourceNames())
            //{
            //    Console.WriteLine(name);
            //}

            var sd = await SentenceDetector.FromStoreAsync(Language.English, -1, "");

            var a = new AveragePerceptronTagger(Language.English, 0, "");
            await a.LoadDataAsync();


            var p = await AveragePerceptronTagger.FromStoreAsync(Language.English, -1, "");

            await DemonstrateAveragePerceptronEntityRecognizerAndPatternSpotter();

            DemonstrateSpotter();
        }
Example #2
0
        static async Task Main(string[] args)
        {
            Console.OutputEncoding = Encoding.UTF8;
            ApplicationLogging.SetLoggerFactory(LoggerFactory.Create(lb => lb.AddConsole()));

            //Configures the model storage to use the online repository backed by the local folder ./catalyst-models/
            Storage.Current = new OnlineRepositoryStorage(new DiskStorage("catalyst-models"));


            //Download the Reuters corpus if necessary
            var(train, test) = await Corpus.Reuters.GetAsync();

            //Parse the documents using the English pipeline, as the text data is untokenized so far
            var nlp = Pipeline.For(Language.English);

            var trainDocs = nlp.Process(train).ToArray();
            var testDocs  = nlp.Process(test).ToArray();

            //Train a FastText supervised classifier with a multi-label loss (OneVsAll)
            var fastText = new FastText(Language.English, 0, "Reuters-Classifier");

            fastText.Data.Type                    = FastText.ModelType.Supervised;
            fastText.Data.Loss                    = FastText.LossType.OneVsAll;
            fastText.Data.LearningRate            = 1f;
            fastText.Data.Dimensions              = 256;
            fastText.Data.Epoch                   = 100;
            fastText.Data.MinimumWordNgramsCounts = 5;
            fastText.Data.MaximumWordNgrams       = 3;
            fastText.Data.MinimumCount            = 5;

            fastText.Train(trainDocs);

            //You can also auto-tune the model using the algorithm from https://ai.facebook.com/blog/fasttext-blog-post-open-source-in-brief/
            fastText.AutoTuneTrain(trainDocs, testDocs, new FastText.AutoTuneOptions());

            //Compute predictions
            Dictionary <IDocument, Dictionary <string, float> > predTrain, predTest;

            using (new Measure(Logger, "Computing train-set predictions", trainDocs.Length))
            {
                predTrain = trainDocs.AsParallel().Select(d => (Doc: d, Pred: fastText.Predict(d))).ToDictionary(d => d.Doc, d => d.Pred);
            }

            using (new Measure(Logger, "Computing test set predictions", testDocs.Length))
            {
                predTest = testDocs.AsParallel().Select(d => (Doc: d, Pred: fastText.Predict(d))).ToDictionary(d => d.Doc, d => d.Pred);
            }

            var resultsTrain = ComputeStats(predTrain);
            var resultsTest  = ComputeStats(predTest);

            Console.WriteLine("\n\n\n--- Results ---\n\n\n");
            foreach (var res in resultsTrain.Zip(resultsTest))
            {
                Console.WriteLine($"\tScore cutoff: {res.First.Cutoff:n2} Train: F1={res.First.F1:n2} P={res.First.Precision:n2} R={res.First.Recall:n2} Test: F1={res.Second.F1:n2} P={res.Second.Precision:n2} R={res.Second.Recall:n2}");
            }

            Console.ReadLine();
        }
Example #3
0
        static async Task Main(string[] args)
        {
            ApplicationLogging.SetLoggerFactory(LoggerFactory.Create(builder => builder.AddConsole()));
            ForceInvariantCultureAndUTF8Output();

            await Parser.Default
            .ParseArguments <CommandLineOptions>(args)
            .MapResult(
                async options =>
            {
                if (string.IsNullOrWhiteSpace(options.Token))
                {
                    Storage.Current = new DiskStorage(options.DiskStoragePath);
                }
                else
                {
                    //For uploading on the online models repository
                    Storage.Current = new OnlineWriteableRepositoryStorage(new DiskStorage(options.DiskStoragePath), options.Token);
                }

                Thread.CurrentThread.Priority = ThreadPriority.AboveNormal;

                using (var p = Process.GetCurrentProcess())
                {
                    p.PriorityClass = ProcessPriorityClass.High;
                }

                if (!string.IsNullOrWhiteSpace(options.UniversalDependenciesPath))
                {
                    TrainSentenceDetector.Train(options.UniversalDependenciesPath);
                    TrainPOSTagger.Train(udSource: options.UniversalDependenciesPath, ontonotesSource: options.OntonotesPath);
                }

                if (!string.IsNullOrWhiteSpace(options.WikiNERPath))
                {
                    await TrainWikiNER.TrainAsync(options.WikiNERPath, Language.English, 0, "WikiNER");
                    await TrainWikiNER.TrainAsync(options.WikiNERPath, Language.French, 0, "WikiNER");
                    await TrainWikiNER.TrainAsync(options.WikiNERPath, Language.German, 0, "WikiNER");
                    await TrainWikiNER.TrainAsync(options.WikiNERPath, Language.Spanish, 0, "WikiNER");
                    await TrainWikiNER.TrainAsync(options.WikiNERPath, Language.Italian, 0, "WikiNER");
                    await TrainWikiNER.TrainAsync(options.WikiNERPath, Language.Portuguese, 0, "WikiNER");
                    await TrainWikiNER.TrainAsync(options.WikiNERPath, Language.Russian, 0, "WikiNER");
                    await TrainWikiNER.TrainAsync(options.WikiNERPath, Language.Dutch, 0, "WikiNER");
                    await TrainWikiNER.TrainAsync(options.WikiNERPath, Language.Polish, 0, "WikiNER");
                }

                if (!string.IsNullOrWhiteSpace(options.FastTextLanguageSentencesPath))
                {
                    TrainLanguageDetector.Train(options.FastTextLanguageSentencesPath);
                    TrainLanguageDetector.Test(options.FastTextLanguageSentencesPath);
                }

                if (!string.IsNullOrWhiteSpace(options.LanguageJsonPath))
                {
                    TrainLanguageDetector.CreateLanguageDetector(options.LanguageJsonPath);
                }
            },
                error => Task.CompletedTask);
        }
Example #4
0
        static void Main(string[] args)
        {
            /*var dTTest = new DTTest();
             * dTTest.Test();*/
            ApplicationLogging.SetLoggerFactory(LoggerFactory.Create(lb => lb.AddConsole()));
            var nnTest = new NNTest();

            nnTest.Test();
        }
Example #5
0
        public static async Task Main(string[] args)
        {
            Console.OutputEncoding = Encoding.UTF8;
            ApplicationLogging.SetLoggerFactory(LoggerFactory.Create(lb => lb.AddConsole()));

            //Need to register the languages we want to use first
            Catalyst.Models.English.Register();

            //This example shows the two language detection models available on Catalyst.
            //The first is derived from the Chrome former language detection code Compact Language Detector 2 (https://github.com/CLD2Owners/cld2)
            //and the newer model is derived from Facebook's FastText language detection dataset (see: https://fasttext.cc/blog/2017/10/02/blog-post.html)

            //Configures the model storage to use the local folder ./catalyst-models/
            Storage.Current = new DiskStorage("catalyst-models");

            var cld2LanguageDetector = await LanguageDetector.FromStoreAsync(Language.Any, Version.Latest, "");

            var fastTextLanguageDetector = await FastTextLanguageDetector.FromStoreAsync(Language.Any, Version.Latest, "");

            //We show bellow the detection on short and longer samples. You can expect lower precision on shorter texts, as there is less information for the model to work with
            //It's also interesting to see the kind of mistakes these models make, such as detecting Welsh as Gaelic_Scottish_Gaelic

            foreach (var(lang, text) in Data.ShortSamples)
            {
                var doc = new Document(text);
                fastTextLanguageDetector.Process(doc);

                var doc2 = new Document(text);
                cld2LanguageDetector.Process(doc2);

                Console.WriteLine(text);
                Console.WriteLine($"Actual:\t{lang}\nFT:\t{doc.Language}\nCLD2\t{doc2.Language}");
                Console.WriteLine();
            }

            foreach (var(lang, text) in Data.LongSamples)
            {
                var doc = new Document(text);
                fastTextLanguageDetector.Process(doc);

                var doc2 = new Document(text);
                cld2LanguageDetector.Process(doc2);

                Console.WriteLine(text);
                Console.WriteLine($"Actual:\t{lang}\nFT:\t{doc.Language}\nCLD2\t{doc2.Language}");
                Console.WriteLine();
            }

            // You can also access all predictions via the Predict method:
            var allPredictions = fastTextLanguageDetector.Predict(new Document(Data.LongSamples[Language.Spanish]));

            Console.WriteLine($"\n\nTop 10 predictions and scores for the Spanish sample:");
            foreach (var kv in allPredictions.OrderByDescending(kv => kv.Value).Take(10))
            {
                Console.WriteLine($"{kv.Key.ToString().PadRight(40)}\tScore: {kv.Value:n2}");
            }
        }
Example #6
0
        public static async Task Main(string[] args)
        {
            Console.OutputEncoding = Encoding.UTF8;
            ApplicationLogging.SetLoggerFactory(LoggerFactory.Create(lb => lb.AddConsole()));

            //This example uses the WikiNER model, trained on the data provided by the paper "Learning multilingual named entity recognition from Wikipedia", Artificial Intelligence 194 (DOI: 10.1016/j.artint.2012.03.006)
            //The training data was sourced from the following repository: https://github.com/dice-group/FOX/tree/master/input/Wikiner

            //Configures the model storage to use the online repository backed by the local folder ./catalyst-models/
            Storage.Current = new OnlineRepositoryStorage(new DiskStorage("catalyst-models"));

            //Create a new pipeline for the english language, and add the WikiNER model to it
            Console.WriteLine("Loading models... This might take a bit longer the first time you run this sample, as the models have to be downloaded from the online repository");
            var nlp = await Pipeline.ForAsync(Language.English);

            nlp.Add(await AveragePerceptronEntityRecognizer.FromStoreAsync(language: Language.English, version: Version.Latest, tag: "WikiNER"));

            //For processing a single document, you can call nlp.ProcessSingle
            var doc = new Document(Data.Sample_1, Language.English);

            nlp.ProcessSingle(doc);

            //For processing a multiple documents in parallel (i.e. multithreading), you can call nlp.Process on an IEnumerable<IDocument> enumerable
            var docs = nlp.Process(MultipleDocuments());

            //This will print all recognized entities. You can also see how the WikiNER model makes a mistake on recognizing Amazon as a location on Data.Sample_1
            PrintDocumentEntities(doc);
            foreach (var d in docs)
            {
                PrintDocumentEntities(d);
            }

            //For correcting Entity Recognition mistakes, you can use the Neuralyzer class.
            //This class uses the Pattern Matching entity recognition class to perform "forget-entity" and "add-entity"
            //passes on the document, after it has been processed by all other proceses in the NLP pipeline
            var neuralizer = new Neuralyzer(Language.English, 0, "WikiNER-sample-fixes");

            //Teach the Neuralyzer class to forget the match for a single token "Amazon" with entity type "Location"
            neuralizer.TeachForgetPattern("Location", "Amazon", mp => mp.Add(new PatternUnit(P.Single().WithToken("Amazon").WithEntityType("Location"))));

            //Teach the Neuralyzer class to add the entity type Organization for a match for the single token "Amazon"
            neuralizer.TeachAddPattern("Organization", "Amazon", mp => mp.Add(new PatternUnit(P.Single().WithToken("Amazon"))));

            //Add the Neuralyzer to the pipeline
            nlp.UseNeuralyzer(neuralizer);

            //Now you can see that "Amazon" is correctly recognized as the entity type "Organization"
            var doc2 = new Document(Data.Sample_1, Language.English);

            nlp.ProcessSingle(doc2);
            PrintDocumentEntities(doc2);
        }
Example #7
0
        private static async Task Main()
        {
            Console.OutputEncoding = Encoding.UTF8;
            ApplicationLogging.SetLoggerFactory(LoggerFactory.Create(lb => lb.AddConsole()));

            // Catalyst currently supports 3 different types of models for Named Entity Recognition (NER):
            // - Gazetteer-like(i.e. [Spotter](https://github.com/curiosity-ai/catalyst/blob/master/Catalyst/src/Models/EntityRecognition/Spotter.cs))
            // - Regex-like(i.e. [PatternSpotter](https://github.com/curiosity-ai/catalyst/blob/master/Catalyst/src/Models/EntityRecognition/PatternSpotter.cs))
            // - Perceptron (i.e. [AveragePerceptronEntityRecognizer](https://github.com/curiosity-ai/catalyst/blob/master/Catalyst/src/Models/EntityRecognition/AveragePerceptronEntityRecognizer.cs))

            await DemonstrateAveragePerceptronEntityRecognizerAndPatternSpotter();

            DemonstrateSpotter();
        }
Example #8
0
        static async Task Main(string[] args)
        {
            Console.OutputEncoding = Encoding.UTF8;
            ApplicationLogging.SetLoggerFactory(LoggerFactory.Create(lb => lb.AddConsole()));

            //Need to register the languages we want to use first
            Catalyst.Models.English.Register();

            //Configures the model storage to use the local folder ./catalyst-models/
            Storage.Current = new DiskStorage("catalyst-models");

            //Download the Reuters corpus if necessary
            var(train, test) = await Corpus.Reuters.GetAsync();

            //Parse the documents using the English pipeline, as the text data is untokenized so far
            var nlp = Pipeline.For(Language.English);

            var trainDocs = nlp.Process(train).ToArray();
            var testDocs  = nlp.Process(test).ToArray();


            //Train an LDA topic model on the trainind dateset
            using (var lda = new LDA(Language.English, 0, "reuters-lda"))
            {
                lda.Data.NumberOfTopics = 20; //Arbitrary number of topics
                lda.Train(trainDocs, Environment.ProcessorCount);
                await lda.StoreAsync();
            }

            using (var lda = await LDA.FromStoreAsync(Language.English, 0, "reuters-lda"))
            {
                foreach (var doc in testDocs)
                {
                    if (lda.TryPredict(doc, out var topics))
                    {
                        var docTopics = string.Join("\n", topics.Select(t => lda.TryDescribeTopic(t.TopicID, out var td) ? $"[{t.Score:n3}] => {td.ToString()}" : ""));

                        Console.WriteLine("------------------------------------------");
                        Console.WriteLine(doc.Value);
                        Console.WriteLine("------------------------------------------");
                        Console.WriteLine(docTopics);
                        Console.WriteLine("------------------------------------------\n\n");
                    }
                }
            }
        }
Example #9
0
        private static async Task Main()
        {
            //Initialize the English built-in models
            Catalyst.Models.English.Register();

            //This step is not necessary anymore as we are consuming the default English models from Nuget
            //    Storage.Current = new OnlineRepositoryStorage(new DiskStorage("catalyst-models"));

            Console.OutputEncoding = Encoding.UTF8;
            ApplicationLogging.SetLoggerFactory(LoggerFactory.Create(lb => lb.AddConsole()));

            // Catalyst currently supports 3 different types of models for Named Entity Recognition (NER):
            // - Gazetteer-like(i.e. [Spotter](https://github.com/curiosity-ai/catalyst/blob/master/Catalyst/src/Models/EntityRecognition/Spotter.cs))
            // - Regex-like(i.e. [PatternSpotter](https://github.com/curiosity-ai/catalyst/blob/master/Catalyst/src/Models/EntityRecognition/PatternSpotter.cs))
            // - Perceptron (i.e. [AveragePerceptronEntityRecognizer](https://github.com/curiosity-ai/catalyst/blob/master/Catalyst/src/Models/EntityRecognition/AveragePerceptronEntityRecognizer.cs))

            SpotterSample();

            await AveragePerceptronEntityRecognizerAndPatternSpotterSample();
        }
Example #10
0
        static async Task Main(string[] args)
        {
            Microsoft.Build.Locator.MSBuildLocator.RegisterDefaults();

            ApplicationLogging.SetLoggerFactory(LoggerFactory.Create(l => l.SetMinimumLevel(LogLevel.Information)
                                                                     .AddZLoggerConsole(options => options.PrefixFormatter = (buf, info) => ZString.Utf8Format(buf, "[{0}] [{1:D2}:{2:D2}:{3:D2}] ", GetLogLevelString(info.LogLevel), info.Timestamp.LocalDateTime.Hour, info.Timestamp.LocalDateTime.Minute, info.Timestamp.LocalDateTime.Second))));

            var settings = new H5DotJson_AssemblySettings();
            var request  = new CompilationRequest("App", settings)
                           .NoPackageResources()
                           .NoHTML()
                           .WithPackageReference("h5", NuGetVersion.Parse("0.0.8537"))
                           .WithPackageReference("h5.Core", NuGetVersion.Parse("0.0.8533"))
                           .WithSourceFile("App.cs",
                                           @"
using System;
using H5;

namespace Test
{
    internal static class App
    {
        private static int HelloWorld;
        private static void Main()
        {
            Console.WriteLine(nameof(HelloWorld));
        }
    }
}
");
            var compiledJavascript = await CompilationProcessor.CompileAsync(request);


            foreach (var(file, code) in compiledJavascript.Output)
            {
                Logger.ZLogInformation("File: {0}\n\n----------------------------\n\n{1}\n\n----------------------------\n\n", file, code);
            }

            await Task.Delay(1000); //Awaits to print all log messages
        }
Example #11
0
        static async Task Main(string[] args)
        {
            ApplicationLogging.SetLoggerFactory(LoggerFactory.Create(builder => builder.AddConsole()));
            ForceInvariantCultureAndUTF8Output();

            await Parser.Default
            .ParseArguments <CommandLineOptions>(args)
            .MapResult(
                async options =>
            {
                if (true || string.IsNullOrWhiteSpace(options.Token))
                {
                    Storage.Current = new DiskStorage(options.DiskStoragePath);
                }
                else
                {
                    //For uploading on the online models repository
                    Storage.Current = new OnlineWriteableRepositoryStorage(new DiskStorage(options.DiskStoragePath), options.Token);
                }

                Thread.CurrentThread.Priority = ThreadPriority.AboveNormal;
                ThreadPool.SetMinThreads(Environment.ProcessorCount * 2, Environment.ProcessorCount * 2);
                ThreadPool.SetMaxThreads(Environment.ProcessorCount * 20, Environment.ProcessorCount * 20);

                using (var p = Process.GetCurrentProcess())
                {
                    p.PriorityClass = ProcessPriorityClass.High;
                }

                await CreateProjectsIfNeeded(options.LanguagesDirectory);

                if (!string.IsNullOrWhiteSpace(options.HoldOffHungerData))
                {
                    await PrepareBritishToAmericanSpellings.RunAsync(options.HoldOffHungerData, options.LanguagesDirectory);
                }

                if (!string.IsNullOrWhiteSpace(options.SpacyLookupsData))
                {
                    await PrepareSpacyLookups.RunAsync(options.SpacyLookupsData, options.LanguagesDirectory);
                }


                if (!string.IsNullOrWhiteSpace(options.UniversalDependenciesPath))
                {
                    //await TrainSentenceDetector.Train(options.UniversalDependenciesPath, options.LanguagesDirectory);
                    await TrainPOSTagger.Train(udSource: options.UniversalDependenciesPath, ontonotesSource: options.OntonotesPath, languagesDirectory: options.LanguagesDirectory);
                }
                return;

                if (!string.IsNullOrWhiteSpace(options.WikiNERPath))
                {
                    await TrainWikiNER.TrainAsync(options.WikiNERPath, Language.English, 0, "WikiNER", options.LanguagesDirectory);
                    await TrainWikiNER.TrainAsync(options.WikiNERPath, Language.French, 0, "WikiNER", options.LanguagesDirectory);
                    await TrainWikiNER.TrainAsync(options.WikiNERPath, Language.German, 0, "WikiNER", options.LanguagesDirectory);
                    await TrainWikiNER.TrainAsync(options.WikiNERPath, Language.Spanish, 0, "WikiNER", options.LanguagesDirectory);
                    await TrainWikiNER.TrainAsync(options.WikiNERPath, Language.Italian, 0, "WikiNER", options.LanguagesDirectory);
                    await TrainWikiNER.TrainAsync(options.WikiNERPath, Language.Portuguese, 0, "WikiNER", options.LanguagesDirectory);
                    await TrainWikiNER.TrainAsync(options.WikiNERPath, Language.Russian, 0, "WikiNER", options.LanguagesDirectory);
                    await TrainWikiNER.TrainAsync(options.WikiNERPath, Language.Dutch, 0, "WikiNER", options.LanguagesDirectory);
                    await TrainWikiNER.TrainAsync(options.WikiNERPath, Language.Polish, 0, "WikiNER", options.LanguagesDirectory);
                }


                if (!string.IsNullOrWhiteSpace(options.FastTextLanguageSentencesPath))
                {
                    TrainLanguageDetector.Train(options.FastTextLanguageSentencesPath);
                    TrainLanguageDetector.Test(options.FastTextLanguageSentencesPath);
                }

                if (!string.IsNullOrWhiteSpace(options.LanguageJsonPath))
                {
                    TrainLanguageDetector.CreateLanguageDetector(options.LanguageJsonPath);
                }
            },
                error => Task.CompletedTask);
        }
Example #12
0
        public static async Task Main(string[] args)
        {
            Console.OutputEncoding = Encoding.UTF8;
            ApplicationLogging.SetLoggerFactory(LoggerFactory.Create(lb => lb.AddConsole()));

            // Catalyst currently supports 3 different types of models for Named Entity Recognition (NER):
            // - Gazetteer-like(i.e. [Spotter](https://github.com/curiosity-ai/catalyst/blob/master/Catalyst/src/Models/EntityRecognition/Spotter.cs))
            // - Regex-like(i.e. [PatternSpotter](https://github.com/curiosity-ai/catalyst/blob/master/Catalyst/src/Models/EntityRecognition/PatternSpotter.cs))
            // - Perceptron (i.e. [AveragePerceptronEntityRecognizer](https://github.com/curiosity-ai/catalyst/blob/master/Catalyst/src/Models/EntityRecognition/AveragePerceptronEntityRecognizer.cs))



            // For training an AveragePerceptronModel, check the source-code here: https://github.com/curiosity-ai/catalyst/blob/master/Catalyst.Training/src/TrainWikiNER.cs
            // This example uses the pre-trained WikiNER model, trained on the data provided by the paper "Learning multilingual named entity recognition from Wikipedia", Artificial Intelligence 194 (DOI: 10.1016/j.artint.2012.03.006)
            // The training data was sourced from the following repository: https://github.com/dice-group/FOX/tree/master/input/Wikiner


            //Configures the model storage to use the online repository backed by the local folder ./catalyst-models/
            Storage.Current = new OnlineRepositoryStorage(new DiskStorage("catalyst-models"));

            //Create a new pipeline for the english language, and add the WikiNER model to it
            Console.WriteLine("Loading models... This might take a bit longer the first time you run this sample, as the models have to be downloaded from the online repository");
            var nlp = await Pipeline.ForAsync(Language.English);

            nlp.Add(await AveragePerceptronEntityRecognizer.FromStoreAsync(language: Language.English, version: Version.Latest, tag: "WikiNER"));



            //Another available model for NER is the PatternSpotter, which is the conceptual equivalent of a RegEx on raw text, but operating on the tokenized form off the text.

            //Adds a custom pattern spotter for the pattern: single("is" / VERB) + multiple(NOUN/AUX/PROPN/AUX/DET/ADJ)
            var isApattern = new PatternSpotter(Language.English, 0, tag: "is-a-pattern", captureTag: "IsA");

            isApattern.NewPattern("Is+Noun", mp => mp.Add(new PatternUnit(P.Single().WithToken("is").WithPOS(PartOfSpeech.VERB)),
                                                          new PatternUnit(P.Multiple().WithPOS(PartOfSpeech.NOUN, PartOfSpeech.PROPN, PartOfSpeech.AUX, PartOfSpeech.DET, PartOfSpeech.ADJ))
                                                          ));

            nlp.Add(isApattern);



            //For processing a single document, you can call nlp.ProcessSingle
            var doc = new Document(Data.Sample_1, Language.English);

            nlp.ProcessSingle(doc);

            //For processing a multiple documents in parallel (i.e. multithreading), you can call nlp.Process on an IEnumerable<IDocument> enumerable
            var docs = nlp.Process(MultipleDocuments());

            //This will print all recognized entities. You can also see how the WikiNER model makes a mistake on recognizing Amazon as a location on Data.Sample_1
            PrintDocumentEntities(doc);
            foreach (var d in docs)
            {
                PrintDocumentEntities(d);
            }



            //For correcting Entity Recognition mistakes, you can use the Neuralyzer class.
            //This class uses the Pattern Matching entity recognition class to perform "forget-entity" and "add-entity"
            //passes on the document, after it has been processed by all other proceses in the NLP pipeline
            var neuralizer = new Neuralyzer(Language.English, 0, "WikiNER-sample-fixes");

            //Teach the Neuralyzer class to forget the match for a single token "Amazon" with entity type "Location"
            neuralizer.TeachForgetPattern("Location", "Amazon", mp => mp.Add(new PatternUnit(P.Single().WithToken("Amazon").WithEntityType("Location"))));

            //Teach the Neuralyzer class to add the entity type Organization for a match for the single token "Amazon"
            neuralizer.TeachAddPattern("Organization", "Amazon", mp => mp.Add(new PatternUnit(P.Single().WithToken("Amazon"))));

            //Add the Neuralyzer to the pipeline
            nlp.UseNeuralyzer(neuralizer);

            //Now you can see that "Amazon" is correctly recognized as the entity type "Organization"
            var doc2 = new Document(Data.Sample_1, Language.English);

            nlp.ProcessSingle(doc2);
            PrintDocumentEntities(doc2);



            //Another way to perform entity recognition is to use a gazeteer-like model. For example, here is one for capturing a set of programing languages
            var spotter = new Spotter(Language.Any, 0, "programming", "ProgrammingLanguage");

            spotter.Data.IgnoreCase = true; //In some cases, it might be better to set it to false, and only add upper/lower-case exceptions as required

            spotter.AddEntry("C#");
            spotter.AddEntry("Python");
            spotter.AddEntry("Python 3"); //entries can have more than one word, and will be automatically tokenized on whitespace
            spotter.AddEntry("C++");
            spotter.AddEntry("Rust");
            spotter.AddEntry("Java");

            var nlp2 = Pipeline.TokenizerFor(Language.English);

            nlp2.Add(spotter); //When adding a spotter model, the model propagates any exceptions on tokenization to the pipeline's tokenizer

            var docAboutProgramming = new Document(Data.SampleProgramming, Language.English);

            nlp.ProcessSingle(docAboutProgramming);

            PrintDocumentEntities(docAboutProgramming);
        }