/// <summary> /// Subpart of TryReading. Attempts to find a matching special expression. /// If found, iterates on TryReading. /// </summary> private IEnumerable <FuriganaSolution> FindSpecialExpressions(FuriganaResourceSet r, VocabEntry v, int currentIndexKanji, int currentIndexKana, List <FuriganaPart> currentCut) { string lookup = string.Empty; for (int i = v.KanjiReading.Length - 1; i >= currentIndexKanji; i--) { lookup = v.KanjiReading.Substring(currentIndexKanji, (i - currentIndexKanji) + 1); SpecialExpression expression = r.GetExpression(lookup); if (expression != null) { foreach (SpecialReading expressionReading in ReadingExpander.GetPotentialSpecialReadings( expression, currentIndexKanji == 0, i == v.KanjiReading.Length - 1)) { if (v.KanaReading.Length >= currentIndexKana + expressionReading.KanaReading.Length && v.KanaReading.Substring(currentIndexKana, expressionReading.KanaReading.Length) == expressionReading.KanaReading) { // The reading matches. Iterate with this possibility. List <FuriganaPart> newCut = currentCut.Clone(); newCut.AddRange(expressionReading.Furigana.Furigana .Select(fp => new FuriganaPart(fp.Value, fp.StartIndex + currentIndexKanji, fp.EndIndex + currentIndexKanji))); foreach (FuriganaSolution result in TryReading(r, v, i + 1, currentIndexKana + expressionReading.KanaReading.Length, newCut)) { yield return(result); } } } } } }
/// <summary> /// Given a special reading expression, returns all potential kana readings the expression could use. /// </summary> /// <param name="sp">Target special reading expression.</param> /// <param name="isFirstChar">Set to true if the first character of the expression is the first /// character of the string that the expression is found in.</param> /// <param name="isLastChar">Set to true if the last character of the expression is the last /// character of the string that the expression is found in.</param> /// <returns>A list containing all potential readings the expression could assume.</returns> public static List <SpecialReading> GetPotentialSpecialReadings(SpecialExpression sp, bool isFirstChar, bool isLastChar) { // Aaargh that's a mess. List <SpecialReading> output = new List <SpecialReading>(sp.Readings); // Add final small tsu rendaku if (!isLastChar) { List <SpecialReading> add = new List <SpecialReading>(); foreach (SpecialReading r in output) { if (SmallTsuRendakuList.Contains(r.KanaReading.Last())) { string newKanaReading = r.KanaReading.Substring(0, r.KanaReading.Length - 1) + "っ"; SpecialReading newReading = new SpecialReading(newKanaReading, new FuriganaSolution(r.Furigana.Vocab, r.Furigana.Furigana.Clone())); List <FuriganaPart> affectedParts = newReading.Furigana.GetPartsForIndex( newReading.Furigana.Vocab.KanjiReading.Length - 1); foreach (FuriganaPart part in affectedParts) { part.Value = part.Value.Remove(part.Value.Length - 1) + "っ"; } add.Add(newReading); } } output.AddRange(add); } // Rendaku if (!isFirstChar) { List <SpecialReading> add = new List <SpecialReading>(); foreach (SpecialReading r in output) { if (RendakuDictionary.ContainsKey(r.KanaReading.First())) { foreach (char ren in RendakuDictionary[r.KanaReading.First()]) { string newKanaReading = ren.ToString() + r.KanaReading.Substring(1); SpecialReading newReading = new SpecialReading(newKanaReading, new FuriganaSolution(r.Furigana.Vocab, r.Furigana.Furigana.Clone())); List <FuriganaPart> affectedParts = newReading.Furigana.GetPartsForIndex(0); foreach (FuriganaPart part in affectedParts) { part.Value = ren.ToString() + part.Value.Substring(1); } add.Add(newReading); } } } output.AddRange(add); } return(output.Distinct().ToList()); }
/// <summary> /// Translates a SpecialExpression to standard Expression equivalent /// </summary> /// <param name="specialExpression"></param> /// <returns></returns> protected virtual Expression Translate(SpecialExpression specialExpression) { var operands = specialExpression.Operands.ToList(); switch (specialExpression.SpecialNodeType) // SETuse { case SpecialExpressionType.IsNull: return(TranslateIsNull(operands)); case SpecialExpressionType.IsNotNull: return(TranslateIsNotNull(operands)); case SpecialExpressionType.Concat: return(TranslateConcat(operands)); //case SpecialExpressionType.Count: // break; //case SpecialExpressionType.Like: // break; //case SpecialExpressionType.Min: // break; //case SpecialExpressionType.Max: // break; //case SpecialExpressionType.Sum: // break; //case SpecialExpressionType.Average: // break; case SpecialExpressionType.StringLength: return(TranslateStringLength(operands)); case SpecialExpressionType.ToUpper: return(GetStandardCallInvoke("ToUpper", operands)); case SpecialExpressionType.ToLower: return(GetStandardCallInvoke("ToLower", operands)); //case SpecialExpressionType.In: // break; case SpecialExpressionType.StringInsert: return(GetStandardCallInvoke("Insert", operands)); case SpecialExpressionType.Substring: case SpecialExpressionType.Trim: case SpecialExpressionType.LTrim: case SpecialExpressionType.RTrim: case SpecialExpressionType.Replace: case SpecialExpressionType.Remove: case SpecialExpressionType.IndexOf: case SpecialExpressionType.Year: case SpecialExpressionType.Month: case SpecialExpressionType.Day: case SpecialExpressionType.Hour: case SpecialExpressionType.Minute: case SpecialExpressionType.Millisecond: case SpecialExpressionType.Date: return(GetStandardCallInvoke(specialExpression.SpecialNodeType.ToString(), operands)); case SpecialExpressionType.Now: return(GetDateTimeNowCall(operands)); case SpecialExpressionType.DateDiffInMilliseconds: return(GetCallDateDiffInMilliseconds(operands)); default: throw Error.BadArgument("S0078: Implement translator for {0}", specialExpression.SpecialNodeType); } }
/// <summary> /// Attempts to solve furigana by reading the kana string and attributing kanji a reading based /// not on the readings of the kanji, but on the kana characters that come up. /// </summary> protected override IEnumerable <FuriganaSolution> DoSolve(FuriganaResourceSet r, VocabEntry v) { // Basically, we are reading the kanji reading character by character, eating the kana from // the kana reading and associating each kanji the piece of kana that comes next. // The thing is, we are taking advantage that kanji readings cannot start with certain // kana (ん and the small characters). // If we just stumbled upon a kanji and the next characters of the kana string are of these // impossible start kana, we can automatically associate them with the kanji. // Now this will work only for a number of vocab, but it does significantly improve the results. // It is especially good for 2-characters compounds that use unusual readings. /// Example: 阿呆陀羅 (あほんだら) /// Read the あ for 阿; /// Read the ほ for 呆; /// Read the ん: it's an impossible start character, so it goes with 呆 as well; /// Read the だ for 陀; /// Read the ら for 羅. string kana = v.KanaReading; List <FuriganaPart> furigana = new List <FuriganaPart>(); for (int i = 0; i < v.KanjiReading.Length; i++) { if (kana.Length == 0) { // We still have characters to browse in our kanji reading, but // there are no more kana to consume. Cannot solve. yield break; } char c = v.KanjiReading[i]; // Check for special expressions bool foundExpression = false; for (int j = v.KanjiReading.Length - 1; j >= i; j--) { string lookup = v.KanjiReading.Substring(i, (j - i) + 1); SpecialExpression expression = r.GetExpression(lookup); if (expression != null) { // We found an expression. foreach (SpecialReading expressionReading in ReadingExpander.GetPotentialSpecialReadings( expression, i == 0, j == v.KanjiReading.Length - 1)) { if (kana.Length >= expressionReading.KanaReading.Length && kana.Substring(0, expressionReading.KanaReading.Length) == expressionReading.KanaReading) { // The reading matches. // Eat the kana chain. furigana.AddRange(expressionReading.Furigana.Furigana .Select(fp => new FuriganaPart(fp.Value, fp.StartIndex + i, fp.EndIndex + i))); kana = kana.Substring(expressionReading.KanaReading.Length); i = j; foundExpression = true; break; } } if (foundExpression) { break; } } } if (foundExpression) { continue; } // Normal process: eat the first character of our kana string. string eaten = kana.First().ToString(); kana = kana.Substring(1); Kanji k = r.GetKanji(c); if (k != null) { // On a kanji case, also eat consecutive "impossible start characters" // (ん, ょ, ゃ, ゅ, っ) while (kana.Length > 0 && ImpossibleCutStart.Contains(kana.First())) { eaten += kana.First(); kana = kana.Substring(1); } furigana.Add(new FuriganaPart(eaten, i)); } else if (!KanaHelper.IsAllKana(c.ToString())) { // The character is neither a kanji or a kana. // Cannot solve. yield break; } else { if (eaten != c.ToString()) { // The character browsed is a kana but is not the // character that we just ate. We made a mistake // in one of the kanji readings, meaning that we... // Cannot solve. yield break; } } } if (kana.Length == 0) { // We consumed the whole kana string. // The case is solved. yield return(new FuriganaSolution(v, furigana)); } }