/// <summary>
        /// Subpart of TryReading. Attempts to find a matching special expression.
        /// If found, iterates on TryReading.
        /// </summary>
        private IEnumerable <FuriganaSolution> FindSpecialExpressions(FuriganaResourceSet r, VocabEntry v,
                                                                      int currentIndexKanji, int currentIndexKana, List <FuriganaPart> currentCut)
        {
            string lookup = string.Empty;

            for (int i = v.KanjiReading.Length - 1; i >= currentIndexKanji; i--)
            {
                lookup = v.KanjiReading.Substring(currentIndexKanji, (i - currentIndexKanji) + 1);
                SpecialExpression expression = r.GetExpression(lookup);
                if (expression != null)
                {
                    foreach (SpecialReading expressionReading in ReadingExpander.GetPotentialSpecialReadings(
                                 expression, currentIndexKanji == 0, i == v.KanjiReading.Length - 1))
                    {
                        if (v.KanaReading.Length >= currentIndexKana + expressionReading.KanaReading.Length &&
                            v.KanaReading.Substring(currentIndexKana, expressionReading.KanaReading.Length) == expressionReading.KanaReading)
                        {
                            // The reading matches. Iterate with this possibility.
                            List <FuriganaPart> newCut = currentCut.Clone();
                            newCut.AddRange(expressionReading.Furigana.Furigana
                                            .Select(fp => new FuriganaPart(fp.Value, fp.StartIndex + currentIndexKanji, fp.EndIndex + currentIndexKanji)));

                            foreach (FuriganaSolution result in TryReading(r, v, i + 1,
                                                                           currentIndexKana + expressionReading.KanaReading.Length, newCut))
                            {
                                yield return(result);
                            }
                        }
                    }
                }
            }
        }
示例#2
0
        /// <summary>
        /// Given a special reading expression, returns all potential kana readings the expression could use.
        /// </summary>
        /// <param name="sp">Target special reading expression.</param>
        /// <param name="isFirstChar">Set to true if the first character of the expression is the first
        /// character of the string that the expression is found in.</param>
        /// <param name="isLastChar">Set to true if the last character of the expression is the last
        /// character of the string that the expression is found in.</param>
        /// <returns>A list containing all potential readings the expression could assume.</returns>
        public static List <SpecialReading> GetPotentialSpecialReadings(SpecialExpression sp, bool isFirstChar, bool isLastChar)
        {
            // Aaargh that's a mess.
            List <SpecialReading> output = new List <SpecialReading>(sp.Readings);

            // Add final small tsu rendaku
            if (!isLastChar)
            {
                List <SpecialReading> add = new List <SpecialReading>();
                foreach (SpecialReading r in output)
                {
                    if (SmallTsuRendakuList.Contains(r.KanaReading.Last()))
                    {
                        string         newKanaReading = r.KanaReading.Substring(0, r.KanaReading.Length - 1) + "っ";
                        SpecialReading newReading     = new SpecialReading(newKanaReading, new FuriganaSolution(r.Furigana.Vocab,
                                                                                                                r.Furigana.Furigana.Clone()));

                        List <FuriganaPart> affectedParts = newReading.Furigana.GetPartsForIndex(
                            newReading.Furigana.Vocab.KanjiReading.Length - 1);
                        foreach (FuriganaPart part in affectedParts)
                        {
                            part.Value = part.Value.Remove(part.Value.Length - 1) + "っ";
                        }

                        add.Add(newReading);
                    }
                }
                output.AddRange(add);
            }

            // Rendaku
            if (!isFirstChar)
            {
                List <SpecialReading> add = new List <SpecialReading>();
                foreach (SpecialReading r in output)
                {
                    if (RendakuDictionary.ContainsKey(r.KanaReading.First()))
                    {
                        foreach (char ren in RendakuDictionary[r.KanaReading.First()])
                        {
                            string         newKanaReading = ren.ToString() + r.KanaReading.Substring(1);
                            SpecialReading newReading     = new SpecialReading(newKanaReading, new FuriganaSolution(r.Furigana.Vocab,
                                                                                                                    r.Furigana.Furigana.Clone()));

                            List <FuriganaPart> affectedParts = newReading.Furigana.GetPartsForIndex(0);
                            foreach (FuriganaPart part in affectedParts)
                            {
                                part.Value = ren.ToString() + part.Value.Substring(1);
                            }

                            add.Add(newReading);
                        }
                    }
                }
                output.AddRange(add);
            }

            return(output.Distinct().ToList());
        }
示例#3
0
        /// <summary>
        /// Translates a SpecialExpression to standard Expression equivalent
        /// </summary>
        /// <param name="specialExpression"></param>
        /// <returns></returns>
        protected virtual Expression Translate(SpecialExpression specialExpression)
        {
            var operands = specialExpression.Operands.ToList();

            switch (specialExpression.SpecialNodeType)  // SETuse
            {
            case SpecialExpressionType.IsNull:
                return(TranslateIsNull(operands));

            case SpecialExpressionType.IsNotNull:
                return(TranslateIsNotNull(operands));

            case SpecialExpressionType.Concat:
                return(TranslateConcat(operands));

            //case SpecialExpressionType.Count:
            //    break;
            //case SpecialExpressionType.Like:
            //    break;
            //case SpecialExpressionType.Min:
            //    break;
            //case SpecialExpressionType.Max:
            //    break;
            //case SpecialExpressionType.Sum:
            //    break;
            //case SpecialExpressionType.Average:
            //    break;
            case SpecialExpressionType.StringLength:
                return(TranslateStringLength(operands));

            case SpecialExpressionType.ToUpper:
                return(GetStandardCallInvoke("ToUpper", operands));

            case SpecialExpressionType.ToLower:
                return(GetStandardCallInvoke("ToLower", operands));

            //case SpecialExpressionType.In:
            //    break;

            case SpecialExpressionType.StringInsert:
                return(GetStandardCallInvoke("Insert", operands));

            case SpecialExpressionType.Substring:
            case SpecialExpressionType.Trim:
            case SpecialExpressionType.LTrim:
            case SpecialExpressionType.RTrim:
            case SpecialExpressionType.Replace:
            case SpecialExpressionType.Remove:
            case SpecialExpressionType.IndexOf:
            case SpecialExpressionType.Year:
            case SpecialExpressionType.Month:
            case SpecialExpressionType.Day:
            case SpecialExpressionType.Hour:
            case SpecialExpressionType.Minute:
            case SpecialExpressionType.Millisecond:
            case SpecialExpressionType.Date:
                return(GetStandardCallInvoke(specialExpression.SpecialNodeType.ToString(), operands));

            case SpecialExpressionType.Now:
                return(GetDateTimeNowCall(operands));

            case SpecialExpressionType.DateDiffInMilliseconds:
                return(GetCallDateDiffInMilliseconds(operands));

            default:
                throw Error.BadArgument("S0078: Implement translator for {0}", specialExpression.SpecialNodeType);
            }
        }
        /// <summary>
        /// Attempts to solve furigana by reading the kana string and attributing kanji a reading based
        /// not on the readings of the kanji, but on the kana characters that come up.
        /// </summary>
        protected override IEnumerable <FuriganaSolution> DoSolve(FuriganaResourceSet r, VocabEntry v)
        {
            // Basically, we are reading the kanji reading character by character, eating the kana from
            // the kana reading and associating each kanji the piece of kana that comes next.
            // The thing is, we are taking advantage that kanji readings cannot start with certain
            // kana (ん and the small characters).
            // If we just stumbled upon a kanji and the next characters of the kana string are of these
            // impossible start kana, we can automatically associate them with the kanji.
            // Now this will work only for a number of vocab, but it does significantly improve the results.
            // It is especially good for 2-characters compounds that use unusual readings.

            /// Example: 阿呆陀羅 (あほんだら)
            /// Read the あ for 阿;
            /// Read the ほ for 呆;
            /// Read the ん: it's an impossible start character, so it goes with 呆 as well;
            /// Read the だ for 陀;
            /// Read the ら for 羅.

            string kana = v.KanaReading;
            List <FuriganaPart> furigana = new List <FuriganaPart>();

            for (int i = 0; i < v.KanjiReading.Length; i++)
            {
                if (kana.Length == 0)
                {
                    // We still have characters to browse in our kanji reading, but
                    // there are no more kana to consume. Cannot solve.
                    yield break;
                }

                char c = v.KanjiReading[i];
                // Check for special expressions
                bool foundExpression = false;
                for (int j = v.KanjiReading.Length - 1; j >= i; j--)
                {
                    string            lookup     = v.KanjiReading.Substring(i, (j - i) + 1);
                    SpecialExpression expression = r.GetExpression(lookup);
                    if (expression != null)
                    {
                        // We found an expression.
                        foreach (SpecialReading expressionReading in ReadingExpander.GetPotentialSpecialReadings(
                                     expression, i == 0, j == v.KanjiReading.Length - 1))
                        {
                            if (kana.Length >= expressionReading.KanaReading.Length &&
                                kana.Substring(0, expressionReading.KanaReading.Length) == expressionReading.KanaReading)
                            {
                                // The reading matches.
                                // Eat the kana chain.
                                furigana.AddRange(expressionReading.Furigana.Furigana
                                                  .Select(fp => new FuriganaPart(fp.Value, fp.StartIndex + i, fp.EndIndex + i)));
                                kana            = kana.Substring(expressionReading.KanaReading.Length);
                                i               = j;
                                foundExpression = true;
                                break;
                            }
                        }

                        if (foundExpression)
                        {
                            break;
                        }
                    }
                }

                if (foundExpression)
                {
                    continue;
                }

                // Normal process: eat the first character of our kana string.
                string eaten = kana.First().ToString();
                kana = kana.Substring(1);
                Kanji k = r.GetKanji(c);
                if (k != null)
                {
                    // On a kanji case, also eat consecutive "impossible start characters"
                    // (ん, ょ, ゃ, ゅ, っ)
                    while (kana.Length > 0 && ImpossibleCutStart.Contains(kana.First()))
                    {
                        eaten += kana.First();
                        kana   = kana.Substring(1);
                    }

                    furigana.Add(new FuriganaPart(eaten, i));
                }
                else if (!KanaHelper.IsAllKana(c.ToString()))
                {
                    // The character is neither a kanji or a kana.
                    // Cannot solve.
                    yield break;
                }
                else
                {
                    if (eaten != c.ToString())
                    {
                        // The character browsed is a kana but is not the
                        // character that we just ate. We made a mistake
                        // in one of the kanji readings, meaning that we...
                        // Cannot solve.
                        yield break;
                    }
                }
            }

            if (kana.Length == 0)
            {
                // We consumed the whole kana string.
                // The case is solved.
                yield return(new FuriganaSolution(v, furigana));
            }
        }