Esempio n. 1
0
        private static void RunAutomaticGlossing(string dataDir, string input)
        {
            var kana = new KanaProperties2(Path.Combine(dataDir, "character", "kana.txt"), Encoding.UTF8);

            using (var mecab = new MeCabIpadic(new MeCabParam {
                DicDir = Path.Combine(dataDir, "mecab", "ipadic"), UseMemoryMappedFile = true
            }))
                using (var dict = JMDictLookup.Create(Path.Combine(dataDir, "dictionaries", "JMdict_e.gz"), Path.Combine(dataDir, "dictionaries", "JMdict_e.cache")))
                {
                    var parser     = new SentenceParser(mecab, dict);
                    var glosser    = new AutoGlosserNext(parser, dict, kana);
                    var glosses    = glosser.Gloss(input);
                    var jsonWriter = new JsonTextWriter(Console.Out);
                    jsonWriter.WriteStartArray();
                    foreach (var gloss in glosses)
                    {
                        jsonWriter.WriteStartObject();
                        jsonWriter.WritePropertyName("word");
                        jsonWriter.WriteValue(gloss.Foreign);
                        jsonWriter.WritePropertyName("definitions");
                        jsonWriter.WriteStartArray();
                        foreach (var glossCandidate in gloss.GlossCandidates)
                        {
                            jsonWriter.WriteValue(glossCandidate);
                        }
                        jsonWriter.WriteEndArray();
                        jsonWriter.WriteEndObject();
                    }
                    jsonWriter.WriteEndArray();
                }
        }
        public static async Task Main(string[] args)
        {
            var dataDirectory = GetDataDirectory(args.ElementAtOrDefault(0));
            var httpClient    = new HttpClient();

            using var mecab = new MeCabIpadic(new MeCabParam
            {
                DicDir = Path.Combine(dataDirectory, "mecab", "ipadic"),
                UseMemoryMappedFile = true
            });

            var updater1 = new JMDictUpdaterProcess(
                httpClient,
                "http://ftp.edrdg.org/pub/Nihongo/JMdict_e.gz",
                Path.Combine(dataDirectory, "dictionaries", "JMdict_e.gz"),
                Path.Combine(dataDirectory, "dictionaries", "JMdict_e.cache"),
                Path.Combine(dataDirectory, "dictionaries", "JMdict_e.gz.new"),
                Path.Combine(dataDirectory, "dictionaries", "JMdict_e.new.cache"),
                Path.Combine(dataDirectory, "dictionaries", "jmdict_tested_schema.xml"));

            updater1.OnUpdateStatusChange += UpdaterStatusUpdate(updater1.Name);

            var updater2 = new JMNedictUpdaterProcess(
                httpClient,
                "http://ftp.edrdg.org/pub/Nihongo/JMnedict.xml.gz",
                Path.Combine(dataDirectory, "dictionaries", "JMnedict.xml.gz"),
                Path.Combine(dataDirectory, "dictionaries", "JMnedict.xml.cache"),
                Path.Combine(dataDirectory, "dictionaries", "JMnedict.xml.gz.new"),
                Path.Combine(dataDirectory, "dictionaries", "JMnedict.xml.new.cache"),
                Path.Combine(dataDirectory, "dictionaries", "jmnedict_tested_schema.xml"));

            updater2.OnUpdateStatusChange += UpdaterStatusUpdate(updater2.Name);


            var updater3 = new KanjiDictUpdaterProcess(
                httpClient,
                "http://ftp.edrdg.org/pub/Nihongo/kanjidic2.xml.gz",
                Path.Combine(dataDirectory, "character", "kanjidic2.xml.gz"),
                Path.Combine(dataDirectory, "character", "kanjidic2.xml.gz.new"));

            updater3.OnUpdateStatusChange += UpdaterStatusUpdate(updater3.Name);

            var updater4 = new TanakaUpdaterProcess(
                httpClient,
                "http://ftp.edrdg.org/pub/Nihongo/examples.utf.gz",
                Path.Combine(dataDirectory, "corpora", "examples.utf.gz"),
                Path.Combine(dataDirectory, "corpora", "examples.utf.gz.new"),
                Path.Combine(dataDirectory, "corpora", "tanaka.cache"),
                Path.Combine(dataDirectory, "corpora", "tanaka.cache.new"),
                mecab);

            updater4.OnUpdateStatusChange += UpdaterStatusUpdate(updater4.Name);

            var task1 = updater1.Execute();
            var task2 = updater2.Execute();
            var task3 = updater3.Execute();
            var task4 = updater4.Execute();

            await Task.WhenAll(task1, task2, task3, task4);
        }
 public App()
 {
     Startup += async(sender, args) =>
     {
         var baseDir    = AppDomain.CurrentDomain.BaseDirectory;
         var dataDir    = Path.Combine(baseDir, "Data");
         var window     = new MainWindow();
         var httpClient = new HttpClient();
         var mecab      = new MeCabIpadic(new MeCabParam
         {
             DicDir = Path.Combine(dataDir, "mecab", "ipadic"),
             UseMemoryMappedFile = true
         });
         window.DataContext = new MainWindowVM(new UpdaterProcess[]
         {
             new JMDictUpdaterProcess(
                 httpClient,
                 ConfigurationManager.AppSettings["JMDictSourceUrl"],
                 Path.Combine(dataDir, "dictionaries", "JMdict_e.gz"),
                 Path.Combine(dataDir, "dictionaries", "JMdict_e.cache"),
                 Path.Combine(dataDir, "dictionaries", "JMdict_e.gz.new"),
                 Path.Combine(dataDir, "dictionaries", "JMdict_e.new.cache"),
                 Path.Combine(dataDir, "dictionaries", "jmdict_tested_schema.xml")),
             new JMNedictUpdaterProcess(
                 httpClient,
                 ConfigurationManager.AppSettings["JMNeDictSourceUrl"],
                 Path.Combine(dataDir, "dictionaries", "JMnedict.xml.gz"),
                 Path.Combine(dataDir, "dictionaries", "JMnedict.xml.cache"),
                 Path.Combine(dataDir, "dictionaries", "JMnedict.xml.gz.new"),
                 Path.Combine(dataDir, "dictionaries", "JMnedict.xml.new.cache"),
                 Path.Combine(dataDir, "dictionaries", "jmnedict_tested_schema.xml")),
             new KanjiDictUpdaterProcess(
                 httpClient,
                 ConfigurationManager.AppSettings["KanjiDicSourceUrl"],
                 Path.Combine(dataDir, "character", "kanjidic2.xml.gz"),
                 Path.Combine(dataDir, "character", "kanjidic2.xml.gz.new")),
             new TanakaUpdaterProcess(
                 httpClient,
                 ConfigurationManager.AppSettings["TanakaCorpusSourceUrl"],
                 Path.Combine(dataDir, "corpora", "examples.utf.gz"),
                 Path.Combine(dataDir, "corpora", "examples.utf.gz.new"),
                 Path.Combine(dataDir, "corpora", "tanaka.cache"),
                 Path.Combine(dataDir, "corpora", "tanaka.cache.new"),
                 mecab)
         });
         window.Show();
     };
     Exit += (sender, args) =>
     {
     };
 }