Exemple #1
0
        public FuriganaSolutionSet Execute(VocabEntry v)
        {
            if (v.KanjiReading == null || v.KanaReading == null || string.IsNullOrWhiteSpace(v.KanjiReading))
            {
                // Cannot solve when we do not have a kanji or kana reading.
                return(new FuriganaSolutionSet(v));
            }

            FuriganaSolutionSet result = Process(v);

            if (!result.Any() && v.KanjiReading.StartsWith("御"))
            {
                // When a word starts with 御 (honorific, often used), try to override the
                // result by replacing it with an お or a ご. It will sometimes bring a
                // result where the kanji form wouldn't.

                result = Process(new VocabEntry(v.KanaReading, "お" + v.KanjiReading.Substring(1)));

                if (!result.Any())
                {
                    result = Process(new VocabEntry(v.KanaReading, "ご" + v.KanjiReading.Substring(1)));
                }

                result.Vocab = v;
            }

            return(result);
        }
Exemple #2
0
        private FuriganaSolutionSet Process(VocabEntry v)
        {
            FuriganaSolutionSet solutionSet = new FuriganaSolutionSet(v);

            int priority = Solvers.First().Priority;

            foreach (FuriganaSolver solver in Solvers)
            {
                if (solver.Priority < priority)
                {
                    if (solutionSet.Any())
                    {
                        // Priority goes down and we already have solutions.
                        // Stop solving.
                        break;
                    }

                    // No solutions yet. Continue with the next level of priority.
                    priority = solver.Priority;
                }

                // Add all solutions if they are correct and unique.
                solutionSet.SafeAdd(solver.Solve(ResourceSet, v));
            }

            return(solutionSet);
        }
Exemple #3
0
        /// <summary>
        /// Loads the special expressions dictionary.
        /// </summary>
        private void LoadSpecialExpressions()
        {
            _specialExpressions = new Dictionary <string, SpecialExpression>();
            foreach (string line in File.ReadAllLines(PathHelper.SpecialReadingsPath))
            {
                if (string.IsNullOrWhiteSpace(line) || line.First() == ';')
                {
                    continue;
                }

                string[] split        = line.Split(SeparatorHelper.FileFieldSeparator);
                string   kanjiReading = split[0];
                string   kanaReading  = split[1];

                VocabEntry v = new VocabEntry(kanjiReading, kanaReading);

                // Read the solution if it is explicitly written. Compute it otherwise.
                FuriganaSolution solution = split.Count() == 3 ?
                                            FuriganaSolution.Parse(split[2], v)
                    : new FuriganaSolution(v, new FuriganaPart(kanaReading, 0, kanjiReading.Length - 1));

                // Add the special reading or special expression.
                SpecialReading specialReading = new SpecialReading(kanaReading, solution);
                if (_specialExpressions.ContainsKey(kanjiReading))
                {
                    _specialExpressions[kanjiReading].Readings.Add(specialReading);
                }
                else
                {
                    _specialExpressions.Add(kanjiReading, new SpecialExpression(kanjiReading, specialReading));
                }
            }
        }
Exemple #4
0
        /// <summary>
        /// Parses a reading element node.
        /// Updates the list with the available info.
        /// </summary>
        /// <param name="xreadingElement">Element to parse.</param>
        /// <param name="vocabList">Vocab list to be updated.</param>
        private void ParseReading(XElement xreadingElement, List <VocabEntry> vocabList)
        {
            // First, we have to determine the target of the reading node.
            // Two possible cases:
            // - Scenario 1: There were no kanji readings. In that case, the reading should
            //   add a new vocab element which has no kanji reading.
            // - Scenario 2: There was at least one kanji reading. In that case, the reading
            //   node targets a set of existing vocabs. They may be filtered by kanji reading
            //   with the reading constraint nodes.

            VocabEntry[] targets;
            if (!vocabList.Any())
            {
                // Scenario 1. Create a new kanji reading, add it to the list, and set it as target.
                VocabEntry newVocab = new VocabEntry();
                vocabList.Add(newVocab);
                targets = new VocabEntry[] { newVocab };
            }
            else
            {
                // Scenario 2. Check constraint nodes to filter the targets.

                // Get all reading constraints in an array.
                string[] readingConstraints = xreadingElement.Elements(XmlNode_ReadingConstraint)
                                              .Select(x => x.Value).ToArray();

                // Filter from the vocab list.
                if (readingConstraints.Any())
                {
                    targets = vocabList.Where(v => readingConstraints.Contains(v.KanjiReading)).ToArray();
                }
                else
                {
                    targets = vocabList.ToArray();
                }
            }

            // Now that we have the target vocabs, we can get the proper information from the node.
            string kanaReading = xreadingElement.Element(XmlNode_KanaReading).Value;

            // We have the info. Now we can apply it to the targets.
            // For each target
            foreach (VocabEntry target in targets)
            {
                // Set the kana reading if not already set.
                if (string.IsNullOrEmpty(target.KanaReading))
                {
                    target.KanaReading = kanaReading;
                }
                else if (vocabList.All(v => !(v.KanjiReading == target.KanjiReading && v.KanaReading == kanaReading)))
                {
                    // If a target already has a kana reading, we need to create a new vocab.
                    vocabList.Add(new VocabEntry()
                    {
                        KanjiReading = target.KanjiReading,
                        KanaReading  = kanaReading
                    });
                }
            }
        }
Exemple #5
0
        public void SetEntry(string entryId)
        {
            if (!string.IsNullOrEmpty(entryId))
            {
                Entry = _realm.Find <VocabEntry>(entryId);
            }

            if (Entry == null)
            {
                Entry = new VocabEntry
                {
                    Metadata = new EntryMetadata
                    {
                        Date = DateTimeOffset.Now
                    }
                };

                _realm.Write(() => {
                    _realm.Add(Entry);
                });
            }

            if (Entry.Translations == null || Entry.Translations.Count == 0)
            {
                AddTranslation();
            }

            var q = from e in Entry.Translations
                    select new TranslationViewModel(e);

            Translations = q.ToList();
        }
        internal async void EditEntry(VocabEntry entry)
        {
            var page = new VocabEntryDetailsPage {
                EntryId = entry.Id
            };

            Navigation.PushAsync(page);
        }
Exemple #7
0
 public Form1()
 {
     InitializeComponent();
     // conjugation = Conjugation.Preterite;
     vocabEntryList = new List <VocabEntry>();
     vocabEntry     = null;
     gobackIndex    = 0;
 }
        /// <summary>
        /// Recursive method that reads the kanji reading string and attempts to find all the ways the
        /// kana reading could be cut by matching it with the potential kanji readings.
        /// </summary>
        /// <param name="r">Resource set.</param>
        /// <param name="v">Vocab to solve.</param>
        /// <param name="currentIndexKanji">Current position in the kanji string. Used for recursion.</param>
        /// <param name="currentIndexKana">Current position in the kana string. Used for recursion.</param>
        /// <param name="currentCut">Current furigana parts. Used for recursion.</param>
        private IEnumerable <FuriganaSolution> TryReading(FuriganaResourceSet r, VocabEntry v,
                                                          int currentIndexKanji, int currentIndexKana, List <FuriganaPart> currentCut)
        {
            if (currentIndexKanji == v.KanjiReading.Length && currentIndexKana == v.KanaReading.Length)
            {
                // We successfuly read the word and stopped at the last character in both kanji and kana readings.
                // Our current cut is valid. Return it.
                yield return(new FuriganaSolution(v, currentCut));

                yield break;
            }
            else if (currentIndexKanji >= v.KanjiReading.Length || currentIndexKana >= v.KanaReading.Length)
            {
                // Broken case. Do not return anything.
                yield break;
            }

            // Search for special expressions.
            bool foundSpecialExpressions = false;

            foreach (FuriganaSolution solution in FindSpecialExpressions(r, v, currentIndexKanji, currentIndexKana, currentCut))
            {
                foundSpecialExpressions = true;
                yield return(solution);
            }

            if (foundSpecialExpressions)
            {
                yield break;
            }

            // General case. Get the current character and see if it is a kanji.
            char c = v.KanjiReading[currentIndexKanji];

            if (c == '々' && currentIndexKanji > 0)
            {
                // Special case: handle the repeater kanji by using the previous character instead.
                c = v.KanjiReading[currentIndexKanji - 1];
            }
            Kanji k = r.GetKanji(c);

            if (k != null)
            {
                // Read as kanji subpart.
                foreach (FuriganaSolution solution in ReadAsKanji(r, v, currentIndexKanji, currentIndexKana, currentCut, c, k))
                {
                    yield return(solution);
                }
            }
            else
            {
                // Read as kana subpart.
                foreach (FuriganaSolution solution in ReadAsKana(r, v, currentIndexKanji, currentIndexKana, currentCut, c))
                {
                    yield return(solution);
                }
            }
        }
        /// <summary>
        /// Parses the dictionary file and returns entries.
        /// </summary>
        public IEnumerable <VocabEntry> Execute()
        {
            // Load the file as an XML document
            XDocument xdoc;

            using (var stream = new MemoryStream(Encoding.UTF8.GetBytes(File.ReadAllText(DictionaryFilePath))))
            {
                var settings = new XmlReaderSettings();
                settings.DtdProcessing             = DtdProcessing.Parse;
                settings.MaxCharactersFromEntities = long.MaxValue;
                settings.MaxCharactersInDocument   = long.MaxValue;
                using (var reader = XmlReader.Create(stream, settings))
                {
                    xdoc = XDocument.Load(reader);
                }
            }

            // Load and return vocab items:
            // Browse each vocab entry.
            foreach (XElement xentry in xdoc.Root.Elements(XmlNode_Entry))
            {
                List <VocabEntry> vocabList = new List <VocabEntry>();

                // For each kanji element node
                foreach (XElement xkanjiElement in xentry.Elements(XmlNode_KanjiElement))
                {
                    // Parse the kanji element. The list will be expanded with new elements.
                    // Create a new vocab with the associated writing.
                    VocabEntry vocab = new VocabEntry();
                    vocab.KanjiReading = xkanjiElement.Element(XmlNode_KanjiReading).Value;

                    // Add the created vocab to the list.
                    vocabList.Add(vocab);
                }

                // For each kanji reading node
                var xreadingElements = xentry.Elements(XmlNode_ReadingElement);
                foreach (XElement xreadingElement in xreadingElements)
                {
                    // Exclude the node if it contains the no kanji node, and is not the only reading.
                    // This is a behavior that seems to be implemented in Jisho (example word: 台詞).
                    if (xreadingElement.HasElement(XmlNode_NoKanji) && xreadingElements.Count() > 1)
                    {
                        continue;
                    }

                    // Parse the reading. The list will be expanded and/or its elements filled with
                    // the available info.
                    ParseReading(xreadingElement, vocabList);
                }

                // Yield return all vocab entries parsed from this entry.
                foreach (VocabEntry entry in vocabList)
                {
                    yield return(entry);
                }
            }
        }
Exemple #10
0
 private void trimEmptyTranslation(VocabEntry entry)
 {
     if (entry.Translations != null)
     {
         if (string.IsNullOrEmpty(entry.Translations.Last().Content))
         {
             _realm.Write(() => {
                 entry.Translations.Remove(entry.Translations.Last());
             });
         }
     }
 }
Exemple #11
0
        /// <summary>
        /// Attempts to solve the given vocab entry.
        /// </summary>
        /// <param name="r">Set of resources required by solvers.</param>
        /// <param name="v">Entry to attempt to solve.</param>
        /// <returns>The solutions found, if any.</returns>
        public IEnumerable <FuriganaSolution> Solve(FuriganaResourceSet r, VocabEntry v)
        {
            foreach (FuriganaSolution solution in DoSolve(r, v))
            {
                if (!solution.Check())
                {
                    throw new Exception("The solution did not pass the check test.");
                }

                yield return(solution);
            }
        }
Exemple #12
0
        public void Test_BreakIntoParts_Akagaeruka()
        {
            var vocab    = new VocabEntry("アカガエル科", "アカガエルか");
            var solution = new FuriganaSolution(vocab, new FuriganaPart("か", 5));

            var parts = solution.BreakIntoParts().ToList();

            Assert.AreEqual(2, parts.Count);
            Assert.AreEqual("アカガエル", parts[0].Text);
            Assert.IsNull(parts[0].Furigana);
            Assert.AreEqual("科", parts[1].Text);
            Assert.AreEqual("か", parts[1].Furigana);
        }
Exemple #13
0
        public void Test_Furigana(string kanjiReading, string kanaReading, string expectedFurigana)
        {
            VocabEntry          v        = new VocabEntry(kanjiReading, kanaReading);
            FuriganaBusiness    business = new FuriganaBusiness(DictionaryFile.Jmdict);
            FuriganaSolutionSet result   = business.Execute(v);

            if (result.GetSingleSolution() == null)
            {
                Assert.Fail();
            }
            else
            {
                Assert.AreEqual(FuriganaSolution.Parse(expectedFurigana, v), result.GetSingleSolution());
            }
        }
Exemple #14
0
        public void Test_BreakIntoParts_Otonagai()
        {
            var vocab    = new VocabEntry("大人買い", "おとながい");
            var solution = new FuriganaSolution(vocab, new FuriganaPart("おとな", 0, 1), new FuriganaPart("が", 2));

            var parts = solution.BreakIntoParts().ToList();

            Assert.AreEqual(3, parts.Count);
            Assert.AreEqual("大人", parts[0].Text);
            Assert.AreEqual("おとな", parts[0].Furigana);
            Assert.AreEqual("買", parts[1].Text);
            Assert.AreEqual("が", parts[1].Furigana);
            Assert.AreEqual("い", parts[2].Text);
            Assert.IsNull(parts[2].Furigana);
        }
        /// <summary>
        /// Subpart of TryReading. Attempts to find a match between the current kanji reading character
        /// and the current kana reading character. If found, iterates on TryReading.
        /// </summary>
        private IEnumerable <FuriganaSolution> ReadAsKana(FuriganaResourceSet r, VocabEntry v,
                                                          int currentIndexKanji, int currentIndexKana, List <FuriganaPart> currentCut, char c)
        {
            char kc = v.KanaReading[currentIndexKana];

            if (c == kc || KanaHelper.ToHiragana(c.ToString()) == KanaHelper.ToHiragana(kc.ToString()))
            {
                // What we are reading in the kanji reading matches the kana reading.
                // We can iterate with the same cut (no added furigana) because we are reading kana.
                foreach (FuriganaSolution result in TryReading(r, v, currentIndexKanji + 1, currentIndexKana + 1, currentCut))
                {
                    yield return(result);
                }
            }
        }
        public AudioViewModel(VocabEntry vm)
        {
            CloseCommand = new Command(Close);

            TimeCode = "55:55";

            if (vm == null)
            {
                return;
            }

            _entry           = vm;
            EntryTitle       = _entry.Title;
            TranslationTitle = _entry.Translations[0].Title;
        }
Exemple #17
0
        /// <summary>
        /// Creates a furigana solution from a regex match computed in the DoSolve method.
        /// </summary>
        private FuriganaSolution MakeSolutionFromMatch(VocabEntry v, Match match, List <int> kanjiIndexes)
        {
            if (match.Groups.Count != kanjiIndexes.Count + 1)
            {
                return(null);
            }

            List <FuriganaPart> parts = new List <FuriganaPart>(match.Groups.Count - 1);

            for (int i = 1; i < match.Groups.Count; i++)
            {
                Group group = match.Groups[i];
                parts.Add(new FuriganaPart(group.Value, kanjiIndexes[i - 1]));
            }

            return(new FuriganaSolution(v, parts));
        }
        /// <summary>
        /// Subpart of TryReading. Finds all matching kanji readings for the current situation,
        /// and iterates on TryReading when found.
        /// </summary>
        private IEnumerable <FuriganaSolution> ReadAsKanji(FuriganaResourceSet r, VocabEntry v,
                                                           int currentIndexKanji, int currentIndexKana, List <FuriganaPart> currentCut, char c, Kanji k)
        {
            // Our character is a kanji. Try to consume kana strings that match that kanji.
            int           remainingKanjiLength = v.KanjiReading.Length - currentIndexKanji - 1;
            List <string> kanjiReadings        = ReadingExpander.GetPotentialKanjiReadings(k,
                                                                                           currentIndexKanji == 0, currentIndexKanji == v.KanjiReading.Length - 1, UseNanori);

            // Iterate on the kana reading.
            for (int i = currentIndexKana; i < v.KanaReading.Length && i < currentIndexKana + MaxKanaPerKanji; i++)
            {
                int remainingKanaLength = v.KanaReading.Length - i - 1;
                if (remainingKanaLength < remainingKanjiLength)
                {
                    // We consumed too many characters: not enough kana remaining for the number of kanji.
                    // Stop here. There are no more solutions.
                    yield break;
                }

                // Get the kana string between currentIndexKana and i.
                string testedString = v.KanaReading.Substring(currentIndexKana, (i - currentIndexKana) + 1);

                // Now try to match that string against one of the potential readings of our kanji.
                foreach (string reading in kanjiReadings)
                {
                    if (reading == testedString)
                    {
                        // We have a match.
                        // Create our new cut and iterate with it.
                        List <FuriganaPart> newCut = currentCut.Clone();
                        newCut.Add(new FuriganaPart(reading, currentIndexKanji));

                        foreach (FuriganaSolution result in TryReading(r, v, currentIndexKanji + 1, i + 1, newCut))
                        {
                            yield return(result);
                        }
                    }
                }

                // Continue to expand our testedString to try and follow other potential reading paths.
            }
        }
Exemple #19
0
        private void formNextAction()
        {
            switch (promptState)
            {
            case PromptState.answering:
            {
                if (gobackIndex > 0)
                {
                    gobackIndex = 0;
                    vocabEntry  = vocabEntryList[vocabEntryList.Count - 2];
                }
                else
                {
                    vocabEntry = vocab.getRandomVocabEntry();
                    vocabEntryList.Add(vocabEntry);
                }
                label1.Text = vocabEntry.portuguese;
                label2.Text = "";
                label3.Text = "";

                //if (verbiage.IsIrregular)
                //{
                //    label3.Text = "Irregular";
                //}
                promptState = PromptState.prompting;
            }
            break;

            case PromptState.prompting:
            {
                label1.Text = vocabEntry.genderToString() + " " + vocabEntry.portuguese;
                label3.Text = "";
                label2.Text = "";
                promptState = PromptState.answering;
            }
            break;
            }
        }
        /// <summary>
        /// Solves cases where the kanji reading consists in a repeated kanji.
        /// </summary>
        protected override IEnumerable <FuriganaSolution> DoSolve(FuriganaResourceSet r, VocabEntry v)
        {
            if (v.KanjiReading.Length == 2 && v.KanaReading.Length % 2 == 0 &&
                (v.KanjiReading[1] == '々' || v.KanjiReading[1] == v.KanjiReading[0]))
            {
                // We have a case where the kanji string is composed of kanji repeated (e.g. 中々),
                // and our kana string can be cut in two. Just do that.

                yield return(new FuriganaSolution(v,
                                                  new FuriganaPart(v.KanaReading.Substring(0, v.KanaReading.Length / 2), 0),
                                                  new FuriganaPart(v.KanaReading.Substring(v.KanaReading.Length / 2), 1)));
            }
        }
Exemple #21
0
        /// <summary>
        /// Gets all possible reading cuts of the kana reading of the specified vocab entry, considering
        /// the length of the kanji reading string.
        /// </summary>
        /// <param name="v">Vocab entry to cut.</param>
        /// <returns>List of all possible reading cuts.</returns>
        /// <example>
        /// 頑張る (がんばる)
        /// => が.ん.ばる,
        ///    が.んば.る,
        ///    がん.ば.る
        /// </example>
        public static IEnumerable <string> GetAllPossibleCuts(VocabEntry v)
        {
            int cutCount = v.KanjiReading.Length;

            return(GetCuts(v.KanaReading, v.KanjiReading.Length));
        }
        public void SaveVocabEntry(VocabEntry entry)
        {
            if (entry.ObjectId == null)
            {
                entry.ObjectId = BsonObjectId.GenerateNewId();
            }

            vocabEntries[entry.Text] = entry;
        }
Exemple #23
0
        /// <summary>
        /// Gets the override solution matching the given vocab entry.
        /// </summary>
        /// <param name="v">Entry to look for in the override list.</param>
        /// <returns>The matching solution if found. Null otherwise.</returns>
        public FuriganaSolution GetOverride(VocabEntry v)
        {
            string s = v.ToString();

            return(_overrideList.ContainsKey(s) ? _overrideList[s] : null);
        }
 private void DeleteEntry(VocabEntry entry)
 {
     _realm.Write(() => _realm.Remove(entry));
 }
Exemple #25
0
        /// <summary>
        /// Attempts to solve cases where the length of the kanji reading matches the length of the
        /// kana reading.
        /// </summary>
        protected override IEnumerable <FuriganaSolution> DoSolve(FuriganaResourceSet r, VocabEntry v)
        {
            if (v.KanjiReading.Length == v.KanaReading.Length)
            {
                List <FuriganaPart> parts = new List <FuriganaPart>();
                for (int i = 0; i < v.KanjiReading.Length; i++)
                {
                    if (r.GetKanji(v.KanjiReading[i]) != null)
                    {
                        parts.Add(new FuriganaPart(v.KanaReading[i].ToString(), i));
                    }
                    else if (!KanaHelper.IsAllKana(v.KanjiReading[i].ToString()))
                    {
                        // Our character is not a kanji and apparently not a kana either.
                        // Stop right there. It's probably a trap.
                        yield break;
                    }
                    else
                    {
                        if (!KanaHelper.AreEquivalent(v.KanjiReading[i].ToString(), v.KanaReading[i].ToString()))
                        {
                            // We are reading kana characters that are not equivalent. Stop.
                            yield break;
                        }
                    }
                }

                if (parts.Any())
                {
                    yield return(new FuriganaSolution(v, parts));
                }
            }
        }
Exemple #26
0
        /// <summary>
        /// Attempts to solve furigana in cases where there are no consecutive kanji in the kanji string,
        /// using regular expressions.
        /// </summary>
        protected override IEnumerable <FuriganaSolution> DoSolve(FuriganaResourceSet r, VocabEntry v)
        {
            // We are using both a greedy expression and a lazy expression because we want to make sure
            // there is only one way to read them. If the result differs with a greedy or a lazy expression,
            // it means that we have no idea how to read the damn thing.
            string     regGreedy         = "^";
            string     regLazy           = "^";
            bool       consecutiveMarker = false;
            List <int> kanjiIndexes      = new List <int>(4);

            for (int i = 0; i < v.KanjiReading.Length; i++)
            {
                char  c = v.KanjiReading[i];
                Kanji k = r.GetKanji(c);
                if (k == null)
                {
                    // Add the characters to the string. No capture group for kana.
                    regGreedy        += string.Format(c.ToString());
                    regLazy          += string.Format(c.ToString());
                    consecutiveMarker = false;
                }
                else if (consecutiveMarker)
                {
                    // Consecutive kanji. The vocab entry is not eligible for this solution.
                    yield break;
                }
                else
                {
                    // Add the characters inside a capture group for kanji.
                    regGreedy        += "(.+)";
                    regLazy          += "(.+?)";
                    consecutiveMarker = true;
                    kanjiIndexes.Add(i);
                }
            }
            regGreedy += "$";
            regLazy   += "$";

            // Example regex:
            // For 持ち運ぶ (もちはこぶ)
            // The regexes would be:
            // ^(.+)ち(.+)ぶ$
            // ^(.+?)ち(.+?)ぶ$

            Regex regexGreedy = new Regex(regGreedy);
            Regex regexLazy   = new Regex(regLazy);
            Match matchGreedy = regexGreedy.Match(v.KanaReading);
            Match matchLazy   = regexLazy.Match(v.KanaReading);

            if (matchGreedy.Success && matchLazy.Success)
            {
                // Obtain both solutions.
                FuriganaSolution greedySolution = MakeSolutionFromMatch(v, matchGreedy, kanjiIndexes);
                FuriganaSolution lazySolution   = MakeSolutionFromMatch(v, matchLazy, kanjiIndexes);

                // Are both solutions non-null and equivalent?
                if (greedySolution != null && lazySolution != null && greedySolution.Equals(lazySolution))
                {
                    // Yes they are! Return only one of them of course.
                    // Greedy wins obviously.
                    yield return(greedySolution);
                }
            }
        }
        /// <summary>
        /// Subpart of TryReading. Attempts to find a matching special expression.
        /// If found, iterates on TryReading.
        /// </summary>
        private IEnumerable <FuriganaSolution> FindSpecialExpressions(FuriganaResourceSet r, VocabEntry v,
                                                                      int currentIndexKanji, int currentIndexKana, List <FuriganaPart> currentCut)
        {
            string lookup = string.Empty;

            for (int i = v.KanjiReading.Length - 1; i >= currentIndexKanji; i--)
            {
                lookup = v.KanjiReading.Substring(currentIndexKanji, (i - currentIndexKanji) + 1);
                SpecialExpression expression = r.GetExpression(lookup);
                if (expression != null)
                {
                    foreach (SpecialReading expressionReading in ReadingExpander.GetPotentialSpecialReadings(
                                 expression, currentIndexKanji == 0, i == v.KanjiReading.Length - 1))
                    {
                        if (v.KanaReading.Length >= currentIndexKana + expressionReading.KanaReading.Length &&
                            v.KanaReading.Substring(currentIndexKana, expressionReading.KanaReading.Length) == expressionReading.KanaReading)
                        {
                            // The reading matches. Iterate with this possibility.
                            List <FuriganaPart> newCut = currentCut.Clone();
                            newCut.AddRange(expressionReading.Furigana.Furigana
                                            .Select(fp => new FuriganaPart(fp.Value, fp.StartIndex + currentIndexKanji, fp.EndIndex + currentIndexKanji)));

                            foreach (FuriganaSolution result in TryReading(r, v, i + 1,
                                                                           currentIndexKana + expressionReading.KanaReading.Length, newCut))
                            {
                                yield return(result);
                            }
                        }
                    }
                }
            }
        }
        /// <summary>
        /// Attempts to solve furigana by reading the kana string and attributing kanji a reading based
        /// not on the readings of the kanji, but on the kana characters that come up.
        /// </summary>
        protected override IEnumerable <FuriganaSolution> DoSolve(FuriganaResourceSet r, VocabEntry v)
        {
            // Basically, we are reading the kanji reading character by character, eating the kana from
            // the kana reading and associating each kanji the piece of kana that comes next.
            // The thing is, we are taking advantage that kanji readings cannot start with certain
            // kana (ん and the small characters).
            // If we just stumbled upon a kanji and the next characters of the kana string are of these
            // impossible start kana, we can automatically associate them with the kanji.
            // Now this will work only for a number of vocab, but it does significantly improve the results.
            // It is especially good for 2-characters compounds that use unusual readings.

            /// Example: 阿呆陀羅 (あほんだら)
            /// Read the あ for 阿;
            /// Read the ほ for 呆;
            /// Read the ん: it's an impossible start character, so it goes with 呆 as well;
            /// Read the だ for 陀;
            /// Read the ら for 羅.

            string kana = v.KanaReading;
            List <FuriganaPart> furigana = new List <FuriganaPart>();

            for (int i = 0; i < v.KanjiReading.Length; i++)
            {
                if (kana.Length == 0)
                {
                    // We still have characters to browse in our kanji reading, but
                    // there are no more kana to consume. Cannot solve.
                    yield break;
                }

                char c = v.KanjiReading[i];
                // Check for special expressions
                bool foundExpression = false;
                for (int j = v.KanjiReading.Length - 1; j >= i; j--)
                {
                    string            lookup     = v.KanjiReading.Substring(i, (j - i) + 1);
                    SpecialExpression expression = r.GetExpression(lookup);
                    if (expression != null)
                    {
                        // We found an expression.
                        foreach (SpecialReading expressionReading in ReadingExpander.GetPotentialSpecialReadings(
                                     expression, i == 0, j == v.KanjiReading.Length - 1))
                        {
                            if (kana.Length >= expressionReading.KanaReading.Length &&
                                kana.Substring(0, expressionReading.KanaReading.Length) == expressionReading.KanaReading)
                            {
                                // The reading matches.
                                // Eat the kana chain.
                                furigana.AddRange(expressionReading.Furigana.Furigana
                                                  .Select(fp => new FuriganaPart(fp.Value, fp.StartIndex + i, fp.EndIndex + i)));
                                kana            = kana.Substring(expressionReading.KanaReading.Length);
                                i               = j;
                                foundExpression = true;
                                break;
                            }
                        }

                        if (foundExpression)
                        {
                            break;
                        }
                    }
                }

                if (foundExpression)
                {
                    continue;
                }

                // Normal process: eat the first character of our kana string.
                string eaten = kana.First().ToString();
                kana = kana.Substring(1);
                Kanji k = r.GetKanji(c);
                if (k != null)
                {
                    // On a kanji case, also eat consecutive "impossible start characters"
                    // (ん, ょ, ゃ, ゅ, っ)
                    while (kana.Length > 0 && ImpossibleCutStart.Contains(kana.First()))
                    {
                        eaten += kana.First();
                        kana   = kana.Substring(1);
                    }

                    furigana.Add(new FuriganaPart(eaten, i));
                }
                else if (!KanaHelper.IsAllKana(c.ToString()))
                {
                    // The character is neither a kanji or a kana.
                    // Cannot solve.
                    yield break;
                }
                else
                {
                    if (eaten != c.ToString())
                    {
                        // The character browsed is a kana but is not the
                        // character that we just ate. We made a mistake
                        // in one of the kanji readings, meaning that we...
                        // Cannot solve.
                        yield break;
                    }
                }
            }

            if (kana.Length == 0)
            {
                // We consumed the whole kana string.
                // The case is solved.
                yield return(new FuriganaSolution(v, furigana));
            }
        }
Exemple #29
0
 protected abstract IEnumerable <FuriganaSolution> DoSolve(FuriganaResourceSet r, VocabEntry v);
 /// <summary>
 /// Attempts to solve furigana by reading the kanji reading string and finding matching kanji
 /// kanji readings.
 /// </summary>
 protected override IEnumerable <FuriganaSolution> DoSolve(FuriganaResourceSet r, VocabEntry v)
 {
     foreach (FuriganaSolution solution in TryReading(r, v, 0, 0, new List <FuriganaPart>()))
     {
         yield return(solution);
     }
 }
Exemple #31
0
 public VocabEntryViewModel(VocabEntry model)
 {
     _model = model;
 }