private static async Task Main() { //Initialize the English built-in models Catalyst.Models.English.Register(); //Storage.Current = new OnlineRepositoryStorage(new DiskStorage("catalyst-models")); Console.OutputEncoding = Encoding.UTF8; ApplicationLogging.SetLoggerFactory(LoggerFactory.Create(lb => lb.AddConsole())); // Catalyst currently supports 3 different types of models for Named Entity Recognition (NER): // - Gazetteer-like(i.e. [Spotter](https://github.com/curiosity-ai/catalyst/blob/master/Catalyst/src/Models/EntityRecognition/Spotter.cs)) // - Regex-like(i.e. [PatternSpotter](https://github.com/curiosity-ai/catalyst/blob/master/Catalyst/src/Models/EntityRecognition/PatternSpotter.cs)) // - Perceptron (i.e. [AveragePerceptronEntityRecognizer](https://github.com/curiosity-ai/catalyst/blob/master/Catalyst/src/Models/EntityRecognition/AveragePerceptronEntityRecognizer.cs)) //var s = typeof(Catalyst.Models.English).Assembly.GetManifestResourceStream($"{typeof(Catalyst.Models.English).Assembly.GetName().Name}.Resources.sentence-detector.bin"); //foreach(var name in typeof(Catalyst.Models.English).Assembly.GetManifestResourceNames()) //{ // Console.WriteLine(name); //} var sd = await SentenceDetector.FromStoreAsync(Language.English, -1, ""); var a = new AveragePerceptronTagger(Language.English, 0, ""); await a.LoadDataAsync(); var p = await AveragePerceptronTagger.FromStoreAsync(Language.English, -1, ""); await DemonstrateAveragePerceptronEntityRecognizerAndPatternSpotter(); DemonstrateSpotter(); }
public static async Task <Pipeline> ForAsync(Language language, bool sentenceDetector = true, bool tagger = true) { var p = new Pipeline(language); p.Add(new FastTokenizer(language)); if (sentenceDetector) { p.Add(await SentenceDetector.FromStoreAsync(language, 0, "")); } if (tagger) { p.Add(await AveragePerceptronTagger.FromStoreAsync(language, 0, "")); } return(p); }
public static Pipeline For(Language language, bool sentenceDetector = true, bool tagger = true) { var p = new Pipeline(language); p.Add(new FastTokenizer(language)); if (sentenceDetector) { p.Add(SentenceDetector.FromStoreAsync(language, -1, "").WaitResult()); } if (tagger) { p.Add(AveragePerceptronTagger.FromStoreAsync(language, -1, "").WaitResult()); } return(p); }
public static async Task <Pipeline> TokenizerForAsync(Language language, bool sentenceDetector = true) { var p = new Pipeline() { Language = language }; p.Add(new FastTokenizer(language)); if (sentenceDetector) { IProcess sd = null; try { //Uses english sentence detector as a default sd = await SentenceDetector.FromStoreAsync((language == Language.Any)?Language.English : language, -1, ""); p.Add(sd); } catch { Logger.LogWarning("Could not find sentence detector model for language {LANGUAGE}. Falling back to english model", language); } if (sd is null) { try { sd = await SentenceDetector.FromStoreAsync(Language.English, -1, ""); p.Add(sd); } catch { Logger.LogWarning("Could not find sentence detector model for language {LANGUAGE}. Continuing without one", Language.English); } } } return(p); }
public static async Task <Pipeline> For(IEnumerable <Language> languages, bool sentenceDetector = true, bool tagger = true) { var processes = new List <IProcess>(); foreach (var language in languages) { processes.Add(new FastTokenizer(language)); if (sentenceDetector) { processes.Add(await SentenceDetector.FromStoreAsync(language, -1, "")); } if (tagger) { processes.Add(await AveragePerceptronTagger.FromStoreAsync(language, -1, "")); } } var p = new Pipeline(processes) { Language = Language.Any }; return(p); }