コード例 #1
0
        private static async Task Main()
        {
            //Initialize the English built-in models
            Catalyst.Models.English.Register();

            //Storage.Current = new OnlineRepositoryStorage(new DiskStorage("catalyst-models"));

            Console.OutputEncoding = Encoding.UTF8;
            ApplicationLogging.SetLoggerFactory(LoggerFactory.Create(lb => lb.AddConsole()));

            // Catalyst currently supports 3 different types of models for Named Entity Recognition (NER):
            // - Gazetteer-like(i.e. [Spotter](https://github.com/curiosity-ai/catalyst/blob/master/Catalyst/src/Models/EntityRecognition/Spotter.cs))
            // - Regex-like(i.e. [PatternSpotter](https://github.com/curiosity-ai/catalyst/blob/master/Catalyst/src/Models/EntityRecognition/PatternSpotter.cs))
            // - Perceptron (i.e. [AveragePerceptronEntityRecognizer](https://github.com/curiosity-ai/catalyst/blob/master/Catalyst/src/Models/EntityRecognition/AveragePerceptronEntityRecognizer.cs))



            //var s = typeof(Catalyst.Models.English).Assembly.GetManifestResourceStream($"{typeof(Catalyst.Models.English).Assembly.GetName().Name}.Resources.sentence-detector.bin");
            //foreach(var name in typeof(Catalyst.Models.English).Assembly.GetManifestResourceNames())
            //{
            //    Console.WriteLine(name);
            //}

            var sd = await SentenceDetector.FromStoreAsync(Language.English, -1, "");

            var a = new AveragePerceptronTagger(Language.English, 0, "");
            await a.LoadDataAsync();


            var p = await AveragePerceptronTagger.FromStoreAsync(Language.English, -1, "");

            await DemonstrateAveragePerceptronEntityRecognizerAndPatternSpotter();

            DemonstrateSpotter();
        }
コード例 #2
0
        public static async Task <Pipeline> ForAsync(Language language, bool sentenceDetector = true, bool tagger = true)
        {
            var p = new Pipeline(language);

            p.Add(new FastTokenizer(language));
            if (sentenceDetector)
            {
                p.Add(await SentenceDetector.FromStoreAsync(language, 0, ""));
            }
            if (tagger)
            {
                p.Add(await AveragePerceptronTagger.FromStoreAsync(language, 0, ""));
            }
            return(p);
        }
コード例 #3
0
        public static Pipeline For(Language language, bool sentenceDetector = true, bool tagger = true)
        {
            var p = new Pipeline(language);

            p.Add(new FastTokenizer(language));
            if (sentenceDetector)
            {
                p.Add(SentenceDetector.FromStoreAsync(language, -1, "").WaitResult());
            }
            if (tagger)
            {
                p.Add(AveragePerceptronTagger.FromStoreAsync(language, -1, "").WaitResult());
            }
            return(p);
        }
コード例 #4
0
        public static async Task <Pipeline> TokenizerForAsync(Language language, bool sentenceDetector = true)
        {
            var p = new Pipeline()
            {
                Language = language
            };

            p.Add(new FastTokenizer(language));

            if (sentenceDetector)
            {
                IProcess sd = null;

                try
                {
                    //Uses english sentence detector as a default
                    sd = await SentenceDetector.FromStoreAsync((language == Language.Any)?Language.English : language, -1, "");

                    p.Add(sd);
                }
                catch
                {
                    Logger.LogWarning("Could not find sentence detector model for language {LANGUAGE}. Falling back to english model", language);
                }

                if (sd is null)
                {
                    try
                    {
                        sd = await SentenceDetector.FromStoreAsync(Language.English, -1, "");

                        p.Add(sd);
                    }
                    catch
                    {
                        Logger.LogWarning("Could not find sentence detector model for language {LANGUAGE}. Continuing without one", Language.English);
                    }
                }
            }

            return(p);
        }
コード例 #5
0
        public static async Task <Pipeline> For(IEnumerable <Language> languages, bool sentenceDetector = true, bool tagger = true)
        {
            var processes = new List <IProcess>();

            foreach (var language in languages)
            {
                processes.Add(new FastTokenizer(language));
                if (sentenceDetector)
                {
                    processes.Add(await SentenceDetector.FromStoreAsync(language, -1, ""));
                }
                if (tagger)
                {
                    processes.Add(await AveragePerceptronTagger.FromStoreAsync(language, -1, ""));
                }
            }
            var p = new Pipeline(processes)
            {
                Language = Language.Any
            };

            return(p);
        }