public void TestHiraToKataSimpleConvert() { string sentence = "そのことばにじいくはけんそんをすることもなく、また、こうていをすることもなく、おだやかなびえみのひょうじょうだけをみせてくれた。"; string actual = KataHiraConvert.ConvertHiraToKata(sentence); string expected = "ソノコトバニジイクハケンソンヲスルコトモナク、マタ、コウテイヲスルコトモナク、オダヤカナビエミノヒョウジョウダケヲミセテクレタ。"; Assert.AreEqual(expected, actual); }
public void TestKataExtend() { string sentence = "ふぁふぃふぇふぉふゅうぃうぇヴぁヴぃヴぇヴぉつぁつぃつぉちぇしぇじぇてぃとぅいぇうぉでぃでゅ"; string actual = KataHiraConvert.ConvertHiraToKata(sentence); string expected = "ファフィフェフォフュウィウェヴァヴィヴェヴォツァツィツォチェシェジェティトゥイェウォディデュ"; actual = KataHiraConvert.ConvertKataToHira(expected); Assert.AreEqual(sentence, actual); }
public void TestHiraToKataFullTable() { using (var file = File.OpenRead(Locations.ABS_DICT_CONVERT_PATH + "RomaHiraKata.txt")) using (var reader = new StreamReader(file)) { while (!reader.EndOfStream) { var line = reader.ReadLine().Split('@'); string actual = KataHiraConvert.ConvertHiraToKata(line[1]); string expected = line[2]; Assert.AreEqual(expected, actual); } } }
private void MakeSureReadingAndPronunIsCorrect(string wordSurface, List <JmdictEntity> matchWords, ref string readingStr, ref string pronunStr) { if (!IsCombineAll && matchWords.Count == 1 && StringHelper.IsHaveKanji(wordSurface)) { if (matchWords[0].RepresentWord.ContainsExtend(wordSurface, StringComparison.OrdinalIgnoreCase) || matchWords[0].KanjiElement.ContainsExtend(wordSurface, StringComparison.OrdinalIgnoreCase)) { var readingElements = JmdictWord.ParseReadElement(matchWords[0]); var firstCorrectReading = KataHiraConvert.ConvertHiraToKata(readingElements[0].Word); if (!readingStr.EqualsOrdinalIgnore(firstCorrectReading)) { readingStr = firstCorrectReading; pronunStr = readingStr; } } } }
private static List <JmdictEntity> TrySearchByConvertToHira(WordInformation selectedWord, Database japEngDictionary, List <JmdictEntity> perfectMatches) { var hira = KataHiraConvert.ConvertKataToHira(selectedWord.Surface); var word = new WordInformation(selectedWord.FirstConjugationType, selectedWord.FirstConjugationForm, selectedWord.BaseForm, selectedWord.PartOfSpeech); word.AddWordPart(hira, selectedWord.Reading, selectedWord.Pronunciation); var entries = FindTokenPerfectMatchInDictionary(word, japEngDictionary); if (entries.Count > 0) { return(perfectMatches.Union(entries, JmdictEntity.EqualComparer).ToList()); } else { return(perfectMatches); } }
private void GetUntokenReadingAndPronunication(string unTokenized, out string reading, out string pronunication) { if (StringHelper.IsHaveKanji(unTokenized)) { var tokens = tokenizer.Tokenize(unTokenized); StringBuilder readingBuilder = new StringBuilder(); StringBuilder pronunciationBuilder = new StringBuilder(); foreach (var token in tokens) { if (token.Reading.Equals("*", StringComparison.OrdinalIgnoreCase) || token.Pronunciation.Equals("*", StringComparison.OrdinalIgnoreCase)) { readingBuilder.Clear(); readingBuilder.Append(" "); pronunciationBuilder.Clear(); pronunciationBuilder.Append(" "); break; } readingBuilder.Append(token.Reading); pronunciationBuilder.Append(token.Pronunciation); } reading = readingBuilder.ToString(); pronunication = pronunciationBuilder.ToString(); } else if (StringHelper.IsHiraganaOnly(unTokenized)) { reading = KataHiraConvert.ConvertHiraToKata(unTokenized); pronunication = reading; } else if (StringHelper.IsKatakanaOnly(unTokenized)) { reading = unTokenized; pronunication = unTokenized; } else { reading = ""; pronunication = ""; } }
private static string GetBaseFormReadingOfWordWithKanji(WordInformation word) { string hiragana = KataHiraConvert.ConvertKataToHira(word.Reading); string surface = word.Surface; if (surface.Length == 1) { // if only one kanji exists then reading is already base form return(hiragana); } if (word.BaseForm == null) { return(hiragana); } string baseForm = word.BaseForm; var surfaceIndex = surface.Length - 1; int readingIndex = hiragana.Length - 1; for (; surfaceIndex > 0 && readingIndex > 0; surfaceIndex--, readingIndex--) { if (surface[surfaceIndex] != hiragana[readingIndex]) { break; } } var startBaseReading = hiragana.Substring(0, readingIndex + 1); surfaceIndex++; if (surfaceIndex > baseForm.Length - 1) { return(hiragana.Substring(0, readingIndex)); } else { return(startBaseReading + baseForm.Substring(surfaceIndex)); } }