Beispiel #1
0
        /// <summary>
        /// Loads the special expressions dictionary.
        /// </summary>
        private void LoadSpecialExpressions()
        {
            _specialExpressions = new Dictionary <string, SpecialExpression>();
            foreach (string line in File.ReadAllLines(PathHelper.SpecialReadingsPath))
            {
                if (string.IsNullOrWhiteSpace(line) || line.First() == ';')
                {
                    continue;
                }

                string[] split        = line.Split(SeparatorHelper.FileFieldSeparator);
                string   kanjiReading = split[0];
                string   kanaReading  = split[1];

                VocabEntry v = new VocabEntry(kanjiReading, kanaReading);

                // Read the solution if it is explicitly written. Compute it otherwise.
                FuriganaSolution solution = split.Count() == 3 ?
                                            FuriganaSolution.Parse(split[2], v)
                    : new FuriganaSolution(v, new FuriganaPart(kanaReading, 0, kanjiReading.Length - 1));

                // Add the special reading or special expression.
                SpecialReading specialReading = new SpecialReading(kanaReading, solution);
                if (_specialExpressions.ContainsKey(kanjiReading))
                {
                    _specialExpressions[kanjiReading].Readings.Add(specialReading);
                }
                else
                {
                    _specialExpressions.Add(kanjiReading, new SpecialExpression(kanjiReading, specialReading));
                }
            }
        }
Beispiel #2
0
        public void Write(IEnumerable <FuriganaSolutionSet> solutions)
        {
            int success = 0, total = 0;

            log4net.ILog logger = log4net.LogManager.GetLogger("Writer");
            DateTime     start  = DateTime.Now;

            string jsonFileName = $"{Path.GetFileNameWithoutExtension(OutputPath)}.json";

            using (var stream = new StreamWriter(OutputPath, false, Encoding.UTF8))
                using (var jsonStream = new StreamWriter(jsonFileName, false, Encoding.UTF8))
                    using (var jsonWriter = new JsonTextWriter(jsonStream))
                    {
                        jsonWriter.WriteStartArray();
                        var jsonSerializer = new JsonSerializer();
                        jsonSerializer.Converters.Add(new FuriganaSolutionJsonSerializer());
                        foreach (FuriganaSolutionSet solution in solutions)
                        {
                            FuriganaSolution singleSolution = solution.GetSingleSolution();

                            if (solution.Any())
                            {
                                if (singleSolution == null)
                                {
                                    logger.InfoFormat("➕   {0}", solution);
                                }
                                else
                                {
                                    logger.InfoFormat("◯   {0}", solution);
                                }
                            }
                            else
                            {
                                logger.InfoFormat("X    {0}|{1}|???", solution.Vocab.KanjiReading, solution.Vocab.KanaReading);
                            }

                            if (singleSolution != null && !AlreadyWritten.Contains(singleSolution.ToString()))
                            {
                                stream.WriteLine(singleSolution.ToString());
                                AlreadyWritten.Add(singleSolution.ToString());
                                jsonSerializer.Serialize(jsonWriter, singleSolution);
                            }

                            if (singleSolution != null)
                            {
                                success++;
                            }

                            total++;
                        }
                        jsonWriter.WriteEndArray();
                    }

            TimeSpan duration = DateTime.Now - start;

            logger.InfoFormat("Successfuly ended process with {0} out of {1} successfuly found furigana strings.", success, total);
            logger.InfoFormat("Process took {0} seconds.", duration.TotalSeconds);
        }
        public void Write(IEnumerable <FuriganaSolutionSet> solutions)
        {
            int      success = 0, total = 0;
            var      logger = LogManager.GetCurrentClassLogger();
            DateTime start  = DateTime.Now;

            string jsonFileName = $"{Path.GetFileNameWithoutExtension(OutputPath)}.json";

            using (var stream = new StreamWriter(OutputPath, false, Encoding.UTF8))
                using (var jsonStream = new StreamWriter(jsonFileName, false, Encoding.UTF8))
                    using (var jsonWriter = new JsonTextWriter(jsonStream))
                    {
                        jsonWriter.WriteStartArray();
                        var jsonSerializer = new JsonSerializer();
                        jsonSerializer.Converters.Add(new FuriganaSolutionJsonSerializer());
                        foreach (FuriganaSolutionSet solution in solutions)
                        {
                            FuriganaSolution singleSolution = solution.GetSingleSolution();

                            if (solution.Any())
                            {
                                if (singleSolution == null)
                                {
                                    logger.Info($"➕   {solution}");
                                }
                                else
                                {
                                    logger.Info($"◯   {solution}");
                                }
                            }
                            else
                            {
                                logger.Info($"X    {solution.Vocab.KanjiReading}|{solution.Vocab.KanaReading}|???");
                            }

                            if (singleSolution != null && !AlreadyWritten.Contains(singleSolution.ToString()))
                            {
                                stream.WriteLine(singleSolution.ToString());
                                AlreadyWritten.Add(singleSolution.ToString());
                                jsonSerializer.Serialize(jsonWriter, singleSolution);
                            }

                            if (singleSolution != null)
                            {
                                success++;
                            }

                            total++;
                        }
                        jsonWriter.WriteEndArray();
                    }

            TimeSpan duration = DateTime.Now - start;

            logger.Info($"Successfuly ended process with {success} out of {total} successfuly found furigana strings.");
            logger.Info($"Process took {duration}.");
        }
Beispiel #4
0
        public void Test_BreakIntoParts_Akagaeruka()
        {
            var vocab    = new VocabEntry("アカガエル科", "アカガエルか");
            var solution = new FuriganaSolution(vocab, new FuriganaPart("か", 5));

            var parts = solution.BreakIntoParts().ToList();

            Assert.AreEqual(2, parts.Count);
            Assert.AreEqual("アカガエル", parts[0].Text);
            Assert.IsNull(parts[0].Furigana);
            Assert.AreEqual("科", parts[1].Text);
            Assert.AreEqual("か", parts[1].Furigana);
        }
Beispiel #5
0
        /// <summary>
        /// Attempts to solve furigana by looking up for solutions in the override list.
        /// </summary>
        protected override IEnumerable <FuriganaSolution> DoSolve(FuriganaResourceSet r, VocabEntry v)
        {
            FuriganaSolution solution = r.GetOverride(v);

            if (solution != null)
            {
                yield return(new FuriganaSolution()
                {
                    Furigana = solution.Furigana,
                    Vocab = v
                });
            }
        }
Beispiel #6
0
        /// <summary>
        /// Loads the furigana override list.
        /// </summary>
        private void LoadOverrideList()
        {
            _overrideList = new Dictionary <string, FuriganaSolution>();
            foreach (string line in File.ReadAllLines(PathHelper.OverrideFuriganaPath))
            {
                if (string.IsNullOrWhiteSpace(line) || line.First() == ';')
                {
                    continue;
                }

                string[] split = line.Split(SeparatorHelper.FileFieldSeparator);
                _overrideList.Add(new VocabEntry(split[0], split[1]).ToString(), FuriganaSolution.Parse(split[2], null));
            }
        }
        public void Write(IEnumerable <FuriganaSolutionSet> solutions)
        {
            int success = 0, total = 0;

            log4net.ILog logger = log4net.LogManager.GetLogger("Writer");
            DateTime     start  = DateTime.Now;

            using (StreamWriter stream = new StreamWriter(OutputPath, false, Encoding.UTF8))
            {
                foreach (FuriganaSolutionSet solution in solutions)
                {
                    FuriganaSolution singleSolution = solution.GetSingleSolution();

                    if (solution.Any())
                    {
                        if (singleSolution == null)
                        {
                            logger.InfoFormat("➕   {0}", solution);
                        }
                        else
                        {
                            logger.InfoFormat("◯   {0}", solution);
                        }
                    }
                    else
                    {
                        logger.InfoFormat("X    {0}|{1}|???", solution.Vocab.KanjiReading, solution.Vocab.KanaReading);
                    }

                    if (singleSolution != null && !AlreadyWritten.Contains(singleSolution.ToString()))
                    {
                        stream.WriteLine(singleSolution.ToString());
                        AlreadyWritten.Add(singleSolution.ToString());
                    }

                    if (singleSolution != null)
                    {
                        success++;
                    }

                    total++;
                }
            }

            TimeSpan duration = DateTime.Now - start;

            logger.InfoFormat("Successfuly ended process with {0} out of {1} successfuly found furigana strings.", success, total);
            logger.InfoFormat("Process took {0} seconds.", duration.TotalSeconds);
        }
Beispiel #8
0
        public void Test_Furigana(string kanjiReading, string kanaReading, string expectedFurigana)
        {
            VocabEntry          v        = new VocabEntry(kanjiReading, kanaReading);
            FuriganaBusiness    business = new FuriganaBusiness(DictionaryFile.Jmdict);
            FuriganaSolutionSet result   = business.Execute(v);

            if (result.GetSingleSolution() == null)
            {
                Assert.Fail();
            }
            else
            {
                Assert.AreEqual(FuriganaSolution.Parse(expectedFurigana, v), result.GetSingleSolution());
            }
        }
Beispiel #9
0
        public void Test_BreakIntoParts_Otonagai()
        {
            var vocab    = new VocabEntry("大人買い", "おとながい");
            var solution = new FuriganaSolution(vocab, new FuriganaPart("おとな", 0, 1), new FuriganaPart("が", 2));

            var parts = solution.BreakIntoParts().ToList();

            Assert.AreEqual(3, parts.Count);
            Assert.AreEqual("大人", parts[0].Text);
            Assert.AreEqual("おとな", parts[0].Furigana);
            Assert.AreEqual("買", parts[1].Text);
            Assert.AreEqual("が", parts[1].Furigana);
            Assert.AreEqual("い", parts[2].Text);
            Assert.IsNull(parts[2].Furigana);
        }
Beispiel #10
0
        /// <summary>
        /// Attempts to solve furigana in cases where there are no consecutive kanji in the kanji string,
        /// using regular expressions.
        /// </summary>
        protected override IEnumerable <FuriganaSolution> DoSolve(FuriganaResourceSet r, VocabEntry v)
        {
            // We are using both a greedy expression and a lazy expression because we want to make sure
            // there is only one way to read them. If the result differs with a greedy or a lazy expression,
            // it means that we have no idea how to read the damn thing.
            string     regGreedy         = "^";
            string     regLazy           = "^";
            bool       consecutiveMarker = false;
            List <int> kanjiIndexes      = new List <int>(4);

            for (int i = 0; i < v.KanjiReading.Length; i++)
            {
                char  c = v.KanjiReading[i];
                Kanji k = r.GetKanji(c);
                if (k == null)
                {
                    // Add the characters to the string. No capture group for kana.
                    regGreedy        += string.Format(c.ToString());
                    regLazy          += string.Format(c.ToString());
                    consecutiveMarker = false;
                }
                else if (consecutiveMarker)
                {
                    // Consecutive kanji. The vocab entry is not eligible for this solution.
                    yield break;
                }
                else
                {
                    // Add the characters inside a capture group for kanji.
                    regGreedy        += "(.+)";
                    regLazy          += "(.+?)";
                    consecutiveMarker = true;
                    kanjiIndexes.Add(i);
                }
            }
            regGreedy += "$";
            regLazy   += "$";

            // Example regex:
            // For 持ち運ぶ (もちはこぶ)
            // The regexes would be:
            // ^(.+)ち(.+)ぶ$
            // ^(.+?)ち(.+?)ぶ$

            Regex regexGreedy = new Regex(regGreedy);
            Regex regexLazy   = new Regex(regLazy);
            Match matchGreedy = regexGreedy.Match(v.KanaReading);
            Match matchLazy   = regexLazy.Match(v.KanaReading);

            if (matchGreedy.Success && matchLazy.Success)
            {
                // Obtain both solutions.
                FuriganaSolution greedySolution = MakeSolutionFromMatch(v, matchGreedy, kanjiIndexes);
                FuriganaSolution lazySolution   = MakeSolutionFromMatch(v, matchLazy, kanjiIndexes);

                // Are both solutions non-null and equivalent?
                if (greedySolution != null && lazySolution != null && greedySolution.Equals(lazySolution))
                {
                    // Yes they are! Return only one of them of course.
                    // Greedy wins obviously.
                    yield return(greedySolution);
                }
            }
        }