예제 #1
0
        /// <summary>
        /// Subpart of TryReading. Attempts to find a matching special expression.
        /// If found, iterates on TryReading.
        /// </summary>
        private IEnumerable <FuriganaSolution> FindSpecialExpressions(FuriganaResourceSet r, VocabEntry v,
                                                                      int currentIndexKanji, int currentIndexKana, List <FuriganaPart> currentCut)
        {
            string lookup = string.Empty;

            for (int i = v.KanjiReading.Length - 1; i >= currentIndexKanji; i--)
            {
                lookup = v.KanjiReading.Substring(currentIndexKanji, (i - currentIndexKanji) + 1);
                SpecialExpression expression = r.GetExpression(lookup);
                if (expression != null)
                {
                    foreach (SpecialReading expressionReading in ReadingExpander.GetPotentialSpecialReadings(
                                 expression, currentIndexKanji == 0, i == v.KanjiReading.Length - 1))
                    {
                        if (v.KanaReading.Length >= currentIndexKana + expressionReading.KanaReading.Length &&
                            v.KanaReading.Substring(currentIndexKana, expressionReading.KanaReading.Length) == expressionReading.KanaReading)
                        {
                            // The reading matches. Iterate with this possibility.
                            List <FuriganaPart> newCut = currentCut.Clone();
                            newCut.AddRange(expressionReading.Furigana.Furigana
                                            .Select(fp => new FuriganaPart(fp.Value, fp.StartIndex + currentIndexKanji, fp.EndIndex + currentIndexKanji)));

                            foreach (FuriganaSolution result in TryReading(r, v, i + 1,
                                                                           currentIndexKana + expressionReading.KanaReading.Length, newCut))
                            {
                                yield return(result);
                            }
                        }
                    }
                }
            }
        }
예제 #2
0
        /// <summary>
        /// Subpart of TryReading. Finds all matching kanji readings for the current situation,
        /// and iterates on TryReading when found.
        /// </summary>
        private IEnumerable <FuriganaSolution> ReadAsKanji(FuriganaResourceSet r, VocabEntry v,
                                                           int currentIndexKanji, int currentIndexKana, List <FuriganaPart> currentCut, char c, Kanji k)
        {
            // Our character is a kanji. Try to consume kana strings that match that kanji.
            int           remainingKanjiLength = v.KanjiReading.Length - currentIndexKanji - 1;
            List <string> kanjiReadings        = ReadingExpander.GetPotentialKanjiReadings(k,
                                                                                           currentIndexKanji == 0, currentIndexKanji == v.KanjiReading.Length - 1, UseNanori);

            // Iterate on the kana reading.
            for (int i = currentIndexKana; i < v.KanaReading.Length && i < currentIndexKana + MaxKanaPerKanji; i++)
            {
                int remainingKanaLength = v.KanaReading.Length - i - 1;
                if (remainingKanaLength < remainingKanjiLength)
                {
                    // We consumed too many characters: not enough kana remaining for the number of kanji.
                    // Stop here. There are no more solutions.
                    yield break;
                }

                // Get the kana string between currentIndexKana and i.
                string testedString = v.KanaReading.Substring(currentIndexKana, (i - currentIndexKana) + 1);

                // Now try to match that string against one of the potential readings of our kanji.
                foreach (string reading in kanjiReadings)
                {
                    if (reading == testedString)
                    {
                        // We have a match.
                        // Create our new cut and iterate with it.
                        List <FuriganaPart> newCut = currentCut.Clone();
                        newCut.Add(new FuriganaPart(reading, currentIndexKanji));

                        foreach (FuriganaSolution result in TryReading(r, v, currentIndexKanji + 1, i + 1, newCut))
                        {
                            yield return(result);
                        }
                    }
                }

                // Continue to expand our testedString to try and follow other potential reading paths.
            }
        }
예제 #3
0
        /// <summary>
        /// Attempts to solve furigana by reading the kana string and attributing kanji a reading based
        /// not on the readings of the kanji, but on the kana characters that come up.
        /// </summary>
        protected override IEnumerable <FuriganaSolution> DoSolve(FuriganaResourceSet r, VocabEntry v)
        {
            // Basically, we are reading the kanji reading character by character, eating the kana from
            // the kana reading and associating each kanji the piece of kana that comes next.
            // The thing is, we are taking advantage that kanji readings cannot start with certain
            // kana (ん and the small characters).
            // If we just stumbled upon a kanji and the next characters of the kana string are of these
            // impossible start kana, we can automatically associate them with the kanji.
            // Now this will work only for a number of vocab, but it does significantly improve the results.
            // It is especially good for 2-characters compounds that use unusual readings.

            /// Example: 阿呆陀羅 (あほんだら)
            /// Read the あ for 阿;
            /// Read the ほ for 呆;
            /// Read the ん: it's an impossible start character, so it goes with 呆 as well;
            /// Read the だ for 陀;
            /// Read the ら for 羅.

            string kana = v.KanaReading;
            List <FuriganaPart> furigana = new List <FuriganaPart>();

            for (int i = 0; i < v.KanjiReading.Length; i++)
            {
                if (kana.Length == 0)
                {
                    // We still have characters to browse in our kanji reading, but
                    // there are no more kana to consume. Cannot solve.
                    yield break;
                }

                char c = v.KanjiReading[i];
                // Check for special expressions
                bool foundExpression = false;
                for (int j = v.KanjiReading.Length - 1; j >= i; j--)
                {
                    string            lookup     = v.KanjiReading.Substring(i, (j - i) + 1);
                    SpecialExpression expression = r.GetExpression(lookup);
                    if (expression != null)
                    {
                        // We found an expression.
                        foreach (SpecialReading expressionReading in ReadingExpander.GetPotentialSpecialReadings(
                                     expression, i == 0, j == v.KanjiReading.Length - 1))
                        {
                            if (kana.Length >= expressionReading.KanaReading.Length &&
                                kana.Substring(0, expressionReading.KanaReading.Length) == expressionReading.KanaReading)
                            {
                                // The reading matches.
                                // Eat the kana chain.
                                furigana.AddRange(expressionReading.Furigana.Furigana
                                                  .Select(fp => new FuriganaPart(fp.Value, fp.StartIndex + i, fp.EndIndex + i)));
                                kana            = kana.Substring(expressionReading.KanaReading.Length);
                                i               = j;
                                foundExpression = true;
                                break;
                            }
                        }

                        if (foundExpression)
                        {
                            break;
                        }
                    }
                }

                if (foundExpression)
                {
                    continue;
                }

                // Normal process: eat the first character of our kana string.
                string eaten = kana.First().ToString();
                kana = kana.Substring(1);
                Kanji k = r.GetKanji(c);
                if (k != null)
                {
                    // On a kanji case, also eat consecutive "impossible start characters"
                    // (ん, ょ, ゃ, ゅ, っ)
                    while (kana.Length > 0 && ImpossibleCutStart.Contains(kana.First()))
                    {
                        eaten += kana.First();
                        kana   = kana.Substring(1);
                    }

                    furigana.Add(new FuriganaPart(eaten, i));
                }
                else if (!KanaHelper.IsAllKana(c.ToString()))
                {
                    // The character is neither a kanji or a kana.
                    // Cannot solve.
                    yield break;
                }
                else
                {
                    if (eaten != c.ToString())
                    {
                        // The character browsed is a kana but is not the
                        // character that we just ate. We made a mistake
                        // in one of the kanji readings, meaning that we...
                        // Cannot solve.
                        yield break;
                    }
                }
            }

            if (kana.Length == 0)
            {
                // We consumed the whole kana string.
                // The case is solved.
                yield return(new FuriganaSolution(v, furigana));
            }
        }