/// <summary>
        /// Subpart of TryReading. Attempts to find a matching special expression.
        /// If found, iterates on TryReading.
        /// </summary>
        private IEnumerable <FuriganaSolution> FindSpecialExpressions(FuriganaResourceSet r, VocabEntry v,
                                                                      int currentIndexKanji, int currentIndexKana, List <FuriganaPart> currentCut)
        {
            string lookup = string.Empty;

            for (int i = v.KanjiReading.Length - 1; i >= currentIndexKanji; i--)
            {
                lookup = v.KanjiReading.Substring(currentIndexKanji, (i - currentIndexKanji) + 1);
                SpecialExpression expression = r.GetExpression(lookup);
                if (expression != null)
                {
                    foreach (SpecialReading expressionReading in ReadingExpander.GetPotentialSpecialReadings(
                                 expression, currentIndexKanji == 0, i == v.KanjiReading.Length - 1))
                    {
                        if (v.KanaReading.Length >= currentIndexKana + expressionReading.KanaReading.Length &&
                            v.KanaReading.Substring(currentIndexKana, expressionReading.KanaReading.Length) == expressionReading.KanaReading)
                        {
                            // The reading matches. Iterate with this possibility.
                            List <FuriganaPart> newCut = currentCut.Clone();
                            newCut.AddRange(expressionReading.Furigana.Furigana
                                            .Select(fp => new FuriganaPart(fp.Value, fp.StartIndex + currentIndexKanji, fp.EndIndex + currentIndexKanji)));

                            foreach (FuriganaSolution result in TryReading(r, v, i + 1,
                                                                           currentIndexKana + expressionReading.KanaReading.Length, newCut))
                            {
                                yield return(result);
                            }
                        }
                    }
                }
            }
        }
 /// <summary>
 /// Attempts to solve furigana by reading the kanji reading string and finding matching kanji
 /// kanji readings.
 /// </summary>
 protected override IEnumerable <FuriganaSolution> DoSolve(FuriganaResourceSet r, VocabEntry v)
 {
     foreach (FuriganaSolution solution in TryReading(r, v, 0, 0, new List <FuriganaPart>()))
     {
         yield return(solution);
     }
 }
Exemple #3
0
        /// <summary>
        /// Attempts to solve cases where the length of the kanji reading matches the length of the
        /// kana reading.
        /// </summary>
        protected override IEnumerable <FuriganaSolution> DoSolve(FuriganaResourceSet r, VocabEntry v)
        {
            if (v.KanjiReading.Length == v.KanaReading.Length)
            {
                List <FuriganaPart> parts = new List <FuriganaPart>();
                for (int i = 0; i < v.KanjiReading.Length; i++)
                {
                    if (r.GetKanji(v.KanjiReading[i]) != null)
                    {
                        parts.Add(new FuriganaPart(v.KanaReading[i].ToString(), i));
                    }
                    else if (!KanaHelper.IsAllKana(v.KanjiReading[i].ToString()))
                    {
                        // Our character is not a kanji and apparently not a kana either.
                        // Stop right there. It's probably a trap.
                        yield break;
                    }
                    else
                    {
                        if (!KanaHelper.AreEquivalent(v.KanjiReading[i].ToString(), v.KanaReading[i].ToString()))
                        {
                            // We are reading kana characters that are not equivalent. Stop.
                            yield break;
                        }
                    }
                }

                if (parts.Any())
                {
                    yield return(new FuriganaSolution(v, parts));
                }
            }
        }
 /// <summary>
 /// Attempts to solve furigana when the kanji reading only has one character.
 /// </summary>
 protected override IEnumerable <FuriganaSolution> DoSolve(FuriganaResourceSet r, VocabEntry v)
 {
     if (v.KanjiReading.Length == 1 && !KanaHelper.IsAllKana(v.KanjiReading))
     {
         yield return(new FuriganaSolution(v, new FuriganaPart(v.KanaReading, 0, 0)));
     }
 }
        /// <summary>
        /// Recursive method that reads the kanji reading string and attempts to find all the ways the
        /// kana reading could be cut by matching it with the potential kanji readings.
        /// </summary>
        /// <param name="r">Resource set.</param>
        /// <param name="v">Vocab to solve.</param>
        /// <param name="currentIndexKanji">Current position in the kanji string. Used for recursion.</param>
        /// <param name="currentIndexKana">Current position in the kana string. Used for recursion.</param>
        /// <param name="currentCut">Current furigana parts. Used for recursion.</param>
        private IEnumerable <FuriganaSolution> TryReading(FuriganaResourceSet r, VocabEntry v,
                                                          int currentIndexKanji, int currentIndexKana, List <FuriganaPart> currentCut)
        {
            if (currentIndexKanji == v.KanjiReading.Length && currentIndexKana == v.KanaReading.Length)
            {
                // We successfuly read the word and stopped at the last character in both kanji and kana readings.
                // Our current cut is valid. Return it.
                yield return(new FuriganaSolution(v, currentCut));

                yield break;
            }
            else if (currentIndexKanji >= v.KanjiReading.Length || currentIndexKana >= v.KanaReading.Length)
            {
                // Broken case. Do not return anything.
                yield break;
            }

            // Search for special expressions.
            bool foundSpecialExpressions = false;

            foreach (FuriganaSolution solution in FindSpecialExpressions(r, v, currentIndexKanji, currentIndexKana, currentCut))
            {
                foundSpecialExpressions = true;
                yield return(solution);
            }

            if (foundSpecialExpressions)
            {
                yield break;
            }

            // General case. Get the current character and see if it is a kanji.
            char c = v.KanjiReading[currentIndexKanji];

            if (c == '々' && currentIndexKanji > 0)
            {
                // Special case: handle the repeater kanji by using the previous character instead.
                c = v.KanjiReading[currentIndexKanji - 1];
            }
            Kanji k = r.GetKanji(c);

            if (k != null)
            {
                // Read as kanji subpart.
                foreach (FuriganaSolution solution in ReadAsKanji(r, v, currentIndexKanji, currentIndexKana, currentCut, c, k))
                {
                    yield return(solution);
                }
            }
            else
            {
                // Read as kana subpart.
                foreach (FuriganaSolution solution in ReadAsKana(r, v, currentIndexKanji, currentIndexKana, currentCut, c))
                {
                    yield return(solution);
                }
            }
        }
Exemple #6
0
        /// <summary>
        /// Attempts to solve the given vocab entry.
        /// </summary>
        /// <param name="r">Set of resources required by solvers.</param>
        /// <param name="v">Entry to attempt to solve.</param>
        /// <returns>The solutions found, if any.</returns>
        public IEnumerable <FuriganaSolution> Solve(FuriganaResourceSet r, VocabEntry v)
        {
            foreach (FuriganaSolution solution in DoSolve(r, v))
            {
                if (!solution.Check())
                {
                    throw new Exception("The solution did not pass the check test.");
                }

                yield return(solution);
            }
        }
        /// <summary>
        /// Solves cases where the kanji reading consists in a repeated kanji.
        /// </summary>
        protected override IEnumerable <FuriganaSolution> DoSolve(FuriganaResourceSet r, VocabEntry v)
        {
            if (v.KanjiReading.Length == 2 && v.KanaReading.Length % 2 == 0 &&
                (v.KanjiReading[1] == '々' || v.KanjiReading[1] == v.KanjiReading[0]))
            {
                // We have a case where the kanji string is composed of kanji repeated (e.g. 中々),
                // and our kana string can be cut in two. Just do that.

                yield return(new FuriganaSolution(v,
                                                  new FuriganaPart(v.KanaReading.Substring(0, v.KanaReading.Length / 2), 0),
                                                  new FuriganaPart(v.KanaReading.Substring(v.KanaReading.Length / 2), 1)));
            }
        }
Exemple #8
0
        /// <summary>
        /// Attempts to solve furigana by looking up for solutions in the override list.
        /// </summary>
        protected override IEnumerable <FuriganaSolution> DoSolve(FuriganaResourceSet r, VocabEntry v)
        {
            FuriganaSolution solution = r.GetOverride(v);

            if (solution != null)
            {
                yield return(new FuriganaSolution()
                {
                    Furigana = solution.Furigana,
                    Vocab = v
                });
            }
        }
        /// <summary>
        /// Subpart of TryReading. Attempts to find a match between the current kanji reading character
        /// and the current kana reading character. If found, iterates on TryReading.
        /// </summary>
        private IEnumerable <FuriganaSolution> ReadAsKana(FuriganaResourceSet r, VocabEntry v,
                                                          int currentIndexKanji, int currentIndexKana, List <FuriganaPart> currentCut, char c)
        {
            char kc = v.KanaReading[currentIndexKana];

            if (c == kc || KanaHelper.ToHiragana(c.ToString()) == KanaHelper.ToHiragana(kc.ToString()))
            {
                // What we are reading in the kanji reading matches the kana reading.
                // We can iterate with the same cut (no added furigana) because we are reading kana.
                foreach (FuriganaSolution result in TryReading(r, v, currentIndexKanji + 1, currentIndexKana + 1, currentCut))
                {
                    yield return(result);
                }
            }
        }
        /// <summary>
        /// Subpart of TryReading. Finds all matching kanji readings for the current situation,
        /// and iterates on TryReading when found.
        /// </summary>
        private IEnumerable <FuriganaSolution> ReadAsKanji(FuriganaResourceSet r, VocabEntry v,
                                                           int currentIndexKanji, int currentIndexKana, List <FuriganaPart> currentCut, char c, Kanji k)
        {
            // Our character is a kanji. Try to consume kana strings that match that kanji.
            int           remainingKanjiLength = v.KanjiReading.Length - currentIndexKanji - 1;
            List <string> kanjiReadings        = ReadingExpander.GetPotentialKanjiReadings(k,
                                                                                           currentIndexKanji == 0, currentIndexKanji == v.KanjiReading.Length - 1, UseNanori);

            // Iterate on the kana reading.
            for (int i = currentIndexKana; i < v.KanaReading.Length && i < currentIndexKana + MaxKanaPerKanji; i++)
            {
                int remainingKanaLength = v.KanaReading.Length - i - 1;
                if (remainingKanaLength < remainingKanjiLength)
                {
                    // We consumed too many characters: not enough kana remaining for the number of kanji.
                    // Stop here. There are no more solutions.
                    yield break;
                }

                // Get the kana string between currentIndexKana and i.
                string testedString = v.KanaReading.Substring(currentIndexKana, (i - currentIndexKana) + 1);

                // Now try to match that string against one of the potential readings of our kanji.
                foreach (string reading in kanjiReadings)
                {
                    if (reading == testedString)
                    {
                        // We have a match.
                        // Create our new cut and iterate with it.
                        List <FuriganaPart> newCut = currentCut.Clone();
                        newCut.Add(new FuriganaPart(reading, currentIndexKanji));

                        foreach (FuriganaSolution result in TryReading(r, v, currentIndexKanji + 1, i + 1, newCut))
                        {
                            yield return(result);
                        }
                    }
                }

                // Continue to expand our testedString to try and follow other potential reading paths.
            }
        }
Exemple #11
0
        /// <summary>
        /// Attempts to solve furigana in cases where there are no consecutive kanji in the kanji string,
        /// using regular expressions.
        /// </summary>
        protected override IEnumerable <FuriganaSolution> DoSolve(FuriganaResourceSet r, VocabEntry v)
        {
            // We are using both a greedy expression and a lazy expression because we want to make sure
            // there is only one way to read them. If the result differs with a greedy or a lazy expression,
            // it means that we have no idea how to read the damn thing.
            string     regGreedy         = "^";
            string     regLazy           = "^";
            bool       consecutiveMarker = false;
            List <int> kanjiIndexes      = new List <int>(4);

            for (int i = 0; i < v.KanjiReading.Length; i++)
            {
                char  c = v.KanjiReading[i];
                Kanji k = r.GetKanji(c);
                if (k == null)
                {
                    // Add the characters to the string. No capture group for kana.
                    regGreedy        += string.Format(c.ToString());
                    regLazy          += string.Format(c.ToString());
                    consecutiveMarker = false;
                }
                else if (consecutiveMarker)
                {
                    // Consecutive kanji. The vocab entry is not eligible for this solution.
                    yield break;
                }
                else
                {
                    // Add the characters inside a capture group for kanji.
                    regGreedy        += "(.+)";
                    regLazy          += "(.+?)";
                    consecutiveMarker = true;
                    kanjiIndexes.Add(i);
                }
            }
            regGreedy += "$";
            regLazy   += "$";

            // Example regex:
            // For 持ち運ぶ (もちはこぶ)
            // The regexes would be:
            // ^(.+)ち(.+)ぶ$
            // ^(.+?)ち(.+?)ぶ$

            Regex regexGreedy = new Regex(regGreedy);
            Regex regexLazy   = new Regex(regLazy);
            Match matchGreedy = regexGreedy.Match(v.KanaReading);
            Match matchLazy   = regexLazy.Match(v.KanaReading);

            if (matchGreedy.Success && matchLazy.Success)
            {
                // Obtain both solutions.
                FuriganaSolution greedySolution = MakeSolutionFromMatch(v, matchGreedy, kanjiIndexes);
                FuriganaSolution lazySolution   = MakeSolutionFromMatch(v, matchLazy, kanjiIndexes);

                // Are both solutions non-null and equivalent?
                if (greedySolution != null && lazySolution != null && greedySolution.Equals(lazySolution))
                {
                    // Yes they are! Return only one of them of course.
                    // Greedy wins obviously.
                    yield return(greedySolution);
                }
            }
        }
        /// <summary>
        /// Attempts to solve furigana by reading the kana string and attributing kanji a reading based
        /// not on the readings of the kanji, but on the kana characters that come up.
        /// </summary>
        protected override IEnumerable <FuriganaSolution> DoSolve(FuriganaResourceSet r, VocabEntry v)
        {
            // Basically, we are reading the kanji reading character by character, eating the kana from
            // the kana reading and associating each kanji the piece of kana that comes next.
            // The thing is, we are taking advantage that kanji readings cannot start with certain
            // kana (ん and the small characters).
            // If we just stumbled upon a kanji and the next characters of the kana string are of these
            // impossible start kana, we can automatically associate them with the kanji.
            // Now this will work only for a number of vocab, but it does significantly improve the results.
            // It is especially good for 2-characters compounds that use unusual readings.

            /// Example: 阿呆陀羅 (あほんだら)
            /// Read the あ for 阿;
            /// Read the ほ for 呆;
            /// Read the ん: it's an impossible start character, so it goes with 呆 as well;
            /// Read the だ for 陀;
            /// Read the ら for 羅.

            string kana = v.KanaReading;
            List <FuriganaPart> furigana = new List <FuriganaPart>();

            for (int i = 0; i < v.KanjiReading.Length; i++)
            {
                if (kana.Length == 0)
                {
                    // We still have characters to browse in our kanji reading, but
                    // there are no more kana to consume. Cannot solve.
                    yield break;
                }

                char c = v.KanjiReading[i];
                // Check for special expressions
                bool foundExpression = false;
                for (int j = v.KanjiReading.Length - 1; j >= i; j--)
                {
                    string            lookup     = v.KanjiReading.Substring(i, (j - i) + 1);
                    SpecialExpression expression = r.GetExpression(lookup);
                    if (expression != null)
                    {
                        // We found an expression.
                        foreach (SpecialReading expressionReading in ReadingExpander.GetPotentialSpecialReadings(
                                     expression, i == 0, j == v.KanjiReading.Length - 1))
                        {
                            if (kana.Length >= expressionReading.KanaReading.Length &&
                                kana.Substring(0, expressionReading.KanaReading.Length) == expressionReading.KanaReading)
                            {
                                // The reading matches.
                                // Eat the kana chain.
                                furigana.AddRange(expressionReading.Furigana.Furigana
                                                  .Select(fp => new FuriganaPart(fp.Value, fp.StartIndex + i, fp.EndIndex + i)));
                                kana            = kana.Substring(expressionReading.KanaReading.Length);
                                i               = j;
                                foundExpression = true;
                                break;
                            }
                        }

                        if (foundExpression)
                        {
                            break;
                        }
                    }
                }

                if (foundExpression)
                {
                    continue;
                }

                // Normal process: eat the first character of our kana string.
                string eaten = kana.First().ToString();
                kana = kana.Substring(1);
                Kanji k = r.GetKanji(c);
                if (k != null)
                {
                    // On a kanji case, also eat consecutive "impossible start characters"
                    // (ん, ょ, ゃ, ゅ, っ)
                    while (kana.Length > 0 && ImpossibleCutStart.Contains(kana.First()))
                    {
                        eaten += kana.First();
                        kana   = kana.Substring(1);
                    }

                    furigana.Add(new FuriganaPart(eaten, i));
                }
                else if (!KanaHelper.IsAllKana(c.ToString()))
                {
                    // The character is neither a kanji or a kana.
                    // Cannot solve.
                    yield break;
                }
                else
                {
                    if (eaten != c.ToString())
                    {
                        // The character browsed is a kana but is not the
                        // character that we just ate. We made a mistake
                        // in one of the kanji readings, meaning that we...
                        // Cannot solve.
                        yield break;
                    }
                }
            }

            if (kana.Length == 0)
            {
                // We consumed the whole kana string.
                // The case is solved.
                yield return(new FuriganaSolution(v, furigana));
            }
        }
Exemple #13
0
 protected abstract IEnumerable <FuriganaSolution> DoSolve(FuriganaResourceSet r, VocabEntry v);
        protected override IEnumerable <FuriganaSolution> DoSolve(FuriganaResourceSet r, VocabEntry v)
        {
            int kanjiCount = v.KanjiReading.Count(c => r.GetKanji(c) != null);

            if (kanjiCount == 1)
            {
                int kanjiIndex = 0;

                string kanaReading = v.KanaReading;
                // See if there are only obvious characters around.

                // Browse the kanji reading and eat characters until we get to
                // the kanji character.
                for (int i = 0; i < v.KanjiReading.Length; i++)
                {
                    char  c = v.KanjiReading[i];
                    Kanji k = r.GetKanji(c);
                    if (k == null)
                    {
                        if (kanaReading.First() == c)
                        {
                            // Remove the first character of the reading.
                            kanaReading = kanaReading.Substring(1);
                        }
                        else
                        {
                            // There is something wrong. Readings don't add up.
                            // Can't solve.
                            yield break;
                        }
                    }
                    else
                    {
                        // We are on the kanji. Skip.
                        kanjiIndex = i;
                        break;
                    }
                }

                // Now browse in reverse and eat characters until we get back to
                // the kanji character.
                for (int i = v.KanjiReading.Length - 1; i >= 0; i--)
                {
                    char  c = v.KanjiReading[i];
                    Kanji k = r.GetKanji(c);
                    if (k == null)
                    {
                        if (kanaReading.Last() == c)
                        {
                            // Eat the last character of the reading.
                            kanaReading = kanaReading.Substring(0, kanaReading.Length - 1);
                        }
                        else
                        {
                            // There is something wrong. Readings don't add up.
                            // Can't solve.
                            yield break;
                        }
                    }
                    else
                    {
                        // We are on the kanji. Skip.
                        break;
                    }
                }

                // We are done. Our kanaReading contains only what's left when eating the kana
                // before and after the kanji. It's the reading of our kanji.
                yield return(new FuriganaSolution(v, new FuriganaPart(kanaReading, kanjiIndex)));
            }
        }