public bool ToKanji(KakasiReader input, TextWriter output) { char key = itaijiDictionary.Get((char)input.Get()); SortedSet <KanjiYomi> .Enumerator iterator = kanwaDictionary.Lookup(key); string rest = null; int restLength = 0; int resultLength = 0; while (iterator.MoveNext()) { KanjiYomi kanjiYomi = iterator.Current; int length = kanjiYomi.Length; if (rest == null) { char[] chars = new char[length + 1]; restLength = input.More(chars); for (int index = 0; index < restLength; index++) { chars[index] = itaijiDictionary.Get(chars[index]); } rest = new string(chars, 0, restLength); } if (length < resultLength) { break; } if (length > restLength) { continue; } if (kanjiYomi.GetYomiFor(rest) != null) { resultLength = length; break; } } if (resultLength > 0 && restLength > resultLength && rest[resultLength - 1] == '\u3063') { char nextCh = rest[resultLength]; UnicodeBlock block = UnicodeBlock.Of(nextCh); if (block.Equals(UnicodeBlock.Hiragana)) { ++resultLength; } } input.Consume(resultLength + 1); output.Write(key); if (resultLength > 0) { output.Write(rest.ToCharArray(), 0, resultLength); } return(true); }
private static bool IsHiragana(int ch) { if (ch < 0) { return(false); } if (ch == '\u30fc') { // prolonged sound mark return(true); } UnicodeBlock block = UnicodeBlock.Of((char)ch); return(block.Equals(UnicodeBlock.Hiragana)); }
private static bool IsKatakana(int ch) { if (ch < 0) { return(false); } if (ch == '\u309b' || ch == '\u309c') // voice sound mark { return(true); } UnicodeBlock block = UnicodeBlock.Of((char)ch); return(block.Equals(UnicodeBlock.Katakana)); }
public void Do() { while (true) { int ch = input.Get(); if (ch < 0) { break; } IConverter converter = null; UnicodeBlock block = UnicodeBlock.Of((char)ch); if (block.Equals(UnicodeBlock.CJKUnifiedIdeographs)) { converter = kanjiConverter; } else if (block.Equals(UnicodeBlock.Hiragana)) { converter = hiraganaConverter; } else if (block.Equals(UnicodeBlock.Katakana)) { converter = katakanaConverter; } else if (block.Equals(UnicodeBlock.HalfKana)) { converter = halfKanaConverter; } if (converter == null) { converter = defaultConverter; } output.PutSeparator(); if (!converter.Convert(input, output)) { input.Consume(1); if (wakachigakiMode) { output.Write((char)ch); } } } output.Flush(); }
public bool Convert(KakasiReader input, TextWriter output) { int ch = input.Get(); if (ch < 0) { return(false); } UnicodeBlock pblock = UnicodeBlock.Of((char)ch); while (true) { input.Consume(1); output.Write((char)ch); ch = input.Get(); if (ch < 0) { break; } UnicodeBlock block; switch (ch) { case '\u3005': // kurikaesi case '\u3006': // shime case '\u30f5': // katakana small ka case '\u30f6': // katakana small ke block = UnicodeBlock.CJKUnifiedIdeographs; break; default: block = UnicodeBlock.Of((char)ch); break; } if (!block.Equals(pblock)) { break; } //if (IsJapanese(block) != IsJapanese(pblock)) { // break; //} } return(true); }
public void Load(StreamReader reader) { while (true) { string line = reader.ReadLine(); if (line == null) { break; } int length = line.Length; if (length == 0) { continue; } UnicodeBlock yomiBlock = UnicodeBlock.Of(line[0]); if (!yomiBlock.Equals(UnicodeBlock.Hiragana) && !yomiBlock.Equals(UnicodeBlock.Katakana)) { continue; } StringBuilder yomiBuffer = new StringBuilder(); yomiBuffer.Append(line[0]); int index = 1; for (; index < length; index++) { char ch = line[index]; if (" ,\t".IndexOf(ch) >= 0) { break; } yomiBuffer.Append(ch); } if (index >= length) { Console.Error.WriteLine("KanwaDictionary: Ignored line: " + line); continue; } char okurigana = '0'; char yomiLast = yomiBuffer[index - 1]; if (yomiLast >= 'a' && yomiLast <= 'z') { okurigana = yomiLast; yomiBuffer.Length = index - 1; } string yomi = yomiBuffer.ToString(); for (++index; index < length; index++) { char ch = line[index]; if (" ,\t".IndexOf(ch) < 0) { break; } } if (index >= length) { Console.Error.WriteLine("KanwaDictionary: Ignored line: " + line); continue; } if (line[index] == '/') { SKK_LOOP: while (true) { StringBuilder kanji = new StringBuilder(); for (++index; index < length; index++) { char ch = line[index]; if (ch == '/') { break; } if (ch == ';') { index = length - 1; break; } if (ch == '[') { goto SKK_LOOP; } kanji.Append(ch); } if (index >= length) { break; } AddItem(kanji.ToString(), yomi, okurigana); } } else { StringBuilder kanji = new StringBuilder(); kanji.Append(line[index]); for (++index; index < length; index++) { char ch = line[index]; if (" ,\t".IndexOf(ch) >= 0) { break; } kanji.Append(ch); } AddItem(kanji.ToString(), yomi, okurigana); } } }
public void AddItem(string kanji, string yomi, char okurigana) { UnicodeBlock kanjiBlock = UnicodeBlock.Of(kanji[0]); if (!kanjiBlock.Equals(UnicodeBlock.CJKUnifiedIdeographs)) { //System.err.println("KanwaDictionary: Ignored item:" + // " kanji=" + kanji + " yomi=" + yomi); return; } int kanjiLength = kanji.Length; StringBuilder kanjiBuffer = new StringBuilder(kanjiLength); for (int index = 0; index < kanjiLength; index++) { char ch = kanji[index]; //if (ch < '\u0100') { // System.err.println("KanwaDictionary: Ignored item:" + // " kanji=" + kanji + " yomi=" + yomi); // return; //} kanjiBuffer.Append(ItaijiDictionary.GetInstance().Get(ch)); } char key = kanjiBuffer[0]; kanji = kanjiBuffer.ToString().Substring(1); int yomiLength = yomi.Length; StringBuilder yomiBuffer = new StringBuilder(yomiLength); for (int index = 0; index < yomiLength; index++) { char ch = yomi[index]; UnicodeBlock block = UnicodeBlock.Of(ch); if (!block.Equals(UnicodeBlock.Hiragana) && !block.Equals(UnicodeBlock.Katakana)) { Console.Error.WriteLine("KanwaDictionary: Ignored item:" + " kanji=" + kanjiBuffer + " yomi=" + yomi); return; } if ((ch >= '\u30a1' && ch <= '\u30f3') || ch == '\u30fd' || ch == '\u30fe') { yomiBuffer.Append((char)(ch - 0x60)); } else if (ch == '\u30f4') { // 'vu' yomiBuffer.Append('\u3046'); yomiBuffer.Append('\u309b'); } else { yomiBuffer.Append(ch); } } yomi = yomiBuffer.ToString(); KanjiYomi kanjiYomi = new KanjiYomi(kanji, yomi, okurigana); if (!contentsTable.ContainsKey(key)) { contentsTable.Add(key, new SortedSet <KanjiYomi>()); } SortedSet <KanjiYomi> list = contentsTable[key]; list.Add(kanjiYomi); }
internal bool ToHiragana(KakasiReader input, TextWriter output) { char key = itaijiDictionary.Get((char)input.Get()); SortedSet <KanjiYomi> .Enumerator iterator = kanwaDictionary.Lookup(key); HashSet <string> yomiSet = new HashSet <string>(); string rest = null; int restLength = 0; int resultLength = 0; while (iterator.MoveNext()) { KanjiYomi kanjiYomi = iterator.Current; if (rest == null) { char[] chars = new char[kanjiYomi.Length + 1]; restLength = input.More(chars); for (int index = 0; index < restLength; index++) { chars[index] = itaijiDictionary.Get(chars[index]); } rest = new string(chars, 0, restLength); } Logger.Log("kanjiYomi: " + kanjiYomi.Kanji + "," + kanjiYomi.Yomi + "," + kanjiYomi.Okurigana + "," + kanjiYomi.Length + "," + restLength); if (kanjiYomi.Length < resultLength) { break; } if (kanjiYomi.Length > restLength) { continue; } string yomi = kanjiYomi.GetYomiFor(rest); if (yomi == null) { continue; } yomiSet.Add(yomi); resultLength = kanjiYomi.Length; if (!HeikiMode) { break; } } if (yomiSet.Count == 0) { return(false); } char additionalChar = (char)0; if (resultLength > 0 && restLength > resultLength && rest[resultLength - 1] == '\u3063') { char nextCh = rest[resultLength]; UnicodeBlock block = UnicodeBlock.Of(nextCh); if (block.Equals(UnicodeBlock.Hiragana)) { ++resultLength; additionalChar = nextCh; } } input.Consume(resultLength + 1); if (FuriganaMode) { output.Write(key); if (resultLength > 0) { output.Write(rest.ToCharArray(), 0, resultLength); } output.Write('['); } if (yomiSet.Count == 1) { output.Write(yomiSet.FirstOrDefault()); if (additionalChar > 0) { output.Write(additionalChar); } } else if (yomiSet.Count > 1) { HashSet <string> .Enumerator iter = yomiSet.GetEnumerator(); output.Write('{'); bool bar = false; while (iter.MoveNext()) { if (bar) { output.Write('|'); } output.Write(iter.Current); if (additionalChar > 0) { output.Write(additionalChar); } bar = true; } output.Write('}'); } if (FuriganaMode) { output.Write(']'); } return(true); }