Exemplo n.º 1
0
        static async Task Main(string[] args)
        {
            ApplicationLogging.SetLoggerFactory(LoggerFactory.Create(builder => builder.AddConsole()));
            ForceInvariantCultureAndUTF8Output();

            await Parser.Default
            .ParseArguments <CommandLineOptions>(args)
            .MapResult(
                async options =>
            {
                if (string.IsNullOrWhiteSpace(options.Token))
                {
                    Storage.Current = new DiskStorage(options.DiskStoragePath);
                }
                else
                {
                    //For uploading on the online models repository
                    Storage.Current = new OnlineWriteableRepositoryStorage(new DiskStorage(options.DiskStoragePath), options.Token);
                }

                Thread.CurrentThread.Priority = ThreadPriority.AboveNormal;

                using (var p = Process.GetCurrentProcess())
                {
                    p.PriorityClass = ProcessPriorityClass.High;
                }

                if (!string.IsNullOrWhiteSpace(options.UniversalDependenciesPath))
                {
                    TrainSentenceDetector.Train(options.UniversalDependenciesPath);
                    TrainPOSTagger.Train(udSource: options.UniversalDependenciesPath, ontonotesSource: options.OntonotesPath);
                }

                if (!string.IsNullOrWhiteSpace(options.WikiNERPath))
                {
                    await TrainWikiNER.TrainAsync(options.WikiNERPath, Language.English, 0, "WikiNER");
                    await TrainWikiNER.TrainAsync(options.WikiNERPath, Language.French, 0, "WikiNER");
                    await TrainWikiNER.TrainAsync(options.WikiNERPath, Language.German, 0, "WikiNER");
                    await TrainWikiNER.TrainAsync(options.WikiNERPath, Language.Spanish, 0, "WikiNER");
                    await TrainWikiNER.TrainAsync(options.WikiNERPath, Language.Italian, 0, "WikiNER");
                    await TrainWikiNER.TrainAsync(options.WikiNERPath, Language.Portuguese, 0, "WikiNER");
                    await TrainWikiNER.TrainAsync(options.WikiNERPath, Language.Russian, 0, "WikiNER");
                    await TrainWikiNER.TrainAsync(options.WikiNERPath, Language.Dutch, 0, "WikiNER");
                    await TrainWikiNER.TrainAsync(options.WikiNERPath, Language.Polish, 0, "WikiNER");
                }

                if (!string.IsNullOrWhiteSpace(options.FastTextLanguageSentencesPath))
                {
                    TrainLanguageDetector.Train(options.FastTextLanguageSentencesPath);
                    TrainLanguageDetector.Test(options.FastTextLanguageSentencesPath);
                }

                if (!string.IsNullOrWhiteSpace(options.LanguageJsonPath))
                {
                    TrainLanguageDetector.CreateLanguageDetector(options.LanguageJsonPath);
                }
            },
                error => Task.CompletedTask);
        }
Exemplo n.º 2
0
        static async Task Main(string[] args)
        {
            ApplicationLogging.SetLoggerFactory(LoggerFactory.Create(builder => builder.AddConsole()));
            ForceInvariantCultureAndUTF8Output();

            await Parser.Default
            .ParseArguments <CommandLineOptions>(args)
            .MapResult(
                async options =>
            {
                if (true || string.IsNullOrWhiteSpace(options.Token))
                {
                    Storage.Current = new DiskStorage(options.DiskStoragePath);
                }
                else
                {
                    //For uploading on the online models repository
                    Storage.Current = new OnlineWriteableRepositoryStorage(new DiskStorage(options.DiskStoragePath), options.Token);
                }

                Thread.CurrentThread.Priority = ThreadPriority.AboveNormal;
                ThreadPool.SetMinThreads(Environment.ProcessorCount * 2, Environment.ProcessorCount * 2);
                ThreadPool.SetMaxThreads(Environment.ProcessorCount * 20, Environment.ProcessorCount * 20);

                using (var p = Process.GetCurrentProcess())
                {
                    p.PriorityClass = ProcessPriorityClass.High;
                }

                await CreateProjectsIfNeeded(options.LanguagesDirectory);

                if (!string.IsNullOrWhiteSpace(options.HoldOffHungerData))
                {
                    await PrepareBritishToAmericanSpellings.RunAsync(options.HoldOffHungerData, options.LanguagesDirectory);
                }

                if (!string.IsNullOrWhiteSpace(options.SpacyLookupsData))
                {
                    await PrepareSpacyLookups.RunAsync(options.SpacyLookupsData, options.LanguagesDirectory);
                }


                if (!string.IsNullOrWhiteSpace(options.UniversalDependenciesPath))
                {
                    //await TrainSentenceDetector.Train(options.UniversalDependenciesPath, options.LanguagesDirectory);
                    await TrainPOSTagger.Train(udSource: options.UniversalDependenciesPath, ontonotesSource: options.OntonotesPath, languagesDirectory: options.LanguagesDirectory);
                }
                return;

                if (!string.IsNullOrWhiteSpace(options.WikiNERPath))
                {
                    await TrainWikiNER.TrainAsync(options.WikiNERPath, Language.English, 0, "WikiNER", options.LanguagesDirectory);
                    await TrainWikiNER.TrainAsync(options.WikiNERPath, Language.French, 0, "WikiNER", options.LanguagesDirectory);
                    await TrainWikiNER.TrainAsync(options.WikiNERPath, Language.German, 0, "WikiNER", options.LanguagesDirectory);
                    await TrainWikiNER.TrainAsync(options.WikiNERPath, Language.Spanish, 0, "WikiNER", options.LanguagesDirectory);
                    await TrainWikiNER.TrainAsync(options.WikiNERPath, Language.Italian, 0, "WikiNER", options.LanguagesDirectory);
                    await TrainWikiNER.TrainAsync(options.WikiNERPath, Language.Portuguese, 0, "WikiNER", options.LanguagesDirectory);
                    await TrainWikiNER.TrainAsync(options.WikiNERPath, Language.Russian, 0, "WikiNER", options.LanguagesDirectory);
                    await TrainWikiNER.TrainAsync(options.WikiNERPath, Language.Dutch, 0, "WikiNER", options.LanguagesDirectory);
                    await TrainWikiNER.TrainAsync(options.WikiNERPath, Language.Polish, 0, "WikiNER", options.LanguagesDirectory);
                }


                if (!string.IsNullOrWhiteSpace(options.FastTextLanguageSentencesPath))
                {
                    TrainLanguageDetector.Train(options.FastTextLanguageSentencesPath);
                    TrainLanguageDetector.Test(options.FastTextLanguageSentencesPath);
                }

                if (!string.IsNullOrWhiteSpace(options.LanguageJsonPath))
                {
                    TrainLanguageDetector.CreateLanguageDetector(options.LanguageJsonPath);
                }
            },
                error => Task.CompletedTask);
        }