Exemple #1
0
        public bool ToKanji(KakasiReader input, TextWriter output)
        {
            char key = itaijiDictionary.Get((char)input.Get());

            SortedSet <KanjiYomi> .Enumerator iterator = kanwaDictionary.Lookup(key);
            string rest         = null;
            int    restLength   = 0;
            int    resultLength = 0;

            while (iterator.MoveNext())
            {
                KanjiYomi kanjiYomi = iterator.Current;
                int       length    = kanjiYomi.Length;
                if (rest == null)
                {
                    char[] chars = new char[length + 1];
                    restLength = input.More(chars);
                    for (int index = 0; index < restLength; index++)
                    {
                        chars[index] = itaijiDictionary.Get(chars[index]);
                    }
                    rest = new string(chars, 0, restLength);
                }
                if (length < resultLength)
                {
                    break;
                }
                if (length > restLength)
                {
                    continue;
                }
                if (kanjiYomi.GetYomiFor(rest) != null)
                {
                    resultLength = length;
                    break;
                }
            }
            if (resultLength > 0 && restLength > resultLength &&
                rest[resultLength - 1] == '\u3063')
            {
                char         nextCh = rest[resultLength];
                UnicodeBlock block  = UnicodeBlock.Of(nextCh);
                if (block.Equals(UnicodeBlock.Hiragana))
                {
                    ++resultLength;
                }
            }
            input.Consume(resultLength + 1);
            output.Write(key);
            if (resultLength > 0)
            {
                output.Write(rest.ToCharArray(), 0, resultLength);
            }
            return(true);
        }
Exemple #2
0
        private static bool IsHiragana(int ch)
        {
            if (ch < 0)
            {
                return(false);
            }
            if (ch == '\u30fc')
            {   // prolonged sound mark
                return(true);
            }
            UnicodeBlock block = UnicodeBlock.Of((char)ch);

            return(block.Equals(UnicodeBlock.Hiragana));
        }
Exemple #3
0
        private static bool IsKatakana(int ch)
        {
            if (ch < 0)
            {
                return(false);
            }
            if (ch == '\u309b' || ch == '\u309c')   // voice sound mark
            {
                return(true);
            }
            UnicodeBlock block = UnicodeBlock.Of((char)ch);

            return(block.Equals(UnicodeBlock.Katakana));
        }
Exemple #4
0
 public void Do()
 {
     while (true)
     {
         int ch = input.Get();
         if (ch < 0)
         {
             break;
         }
         IConverter   converter = null;
         UnicodeBlock block     = UnicodeBlock.Of((char)ch);
         if (block.Equals(UnicodeBlock.CJKUnifiedIdeographs))
         {
             converter = kanjiConverter;
         }
         else if (block.Equals(UnicodeBlock.Hiragana))
         {
             converter = hiraganaConverter;
         }
         else if (block.Equals(UnicodeBlock.Katakana))
         {
             converter = katakanaConverter;
         }
         else if (block.Equals(UnicodeBlock.HalfKana))
         {
             converter = halfKanaConverter;
         }
         if (converter == null)
         {
             converter = defaultConverter;
         }
         output.PutSeparator();
         if (!converter.Convert(input, output))
         {
             input.Consume(1);
             if (wakachigakiMode)
             {
                 output.Write((char)ch);
             }
         }
     }
     output.Flush();
 }
Exemple #5
0
        public bool Convert(KakasiReader input, TextWriter output)
        {
            int ch = input.Get();

            if (ch < 0)
            {
                return(false);
            }
            UnicodeBlock pblock = UnicodeBlock.Of((char)ch);

            while (true)
            {
                input.Consume(1);
                output.Write((char)ch);
                ch = input.Get();
                if (ch < 0)
                {
                    break;
                }
                UnicodeBlock block;
                switch (ch)
                {
                case '\u3005':      // kurikaesi
                case '\u3006':      // shime
                case '\u30f5':      // katakana small ka
                case '\u30f6':      // katakana small ke
                    block = UnicodeBlock.CJKUnifiedIdeographs;
                    break;

                default:
                    block = UnicodeBlock.Of((char)ch);
                    break;
                }
                if (!block.Equals(pblock))
                {
                    break;
                }
                //if (IsJapanese(block) != IsJapanese(pblock)) {
                //    break;
                //}
            }
            return(true);
        }
Exemple #6
0
        public void Load(StreamReader reader)
        {
            while (true)
            {
                string line = reader.ReadLine();
                if (line == null)
                {
                    break;
                }
                int length = line.Length;
                if (length == 0)
                {
                    continue;
                }
                UnicodeBlock yomiBlock = UnicodeBlock.Of(line[0]);
                if (!yomiBlock.Equals(UnicodeBlock.Hiragana) && !yomiBlock.Equals(UnicodeBlock.Katakana))
                {
                    continue;
                }
                StringBuilder yomiBuffer = new StringBuilder();
                yomiBuffer.Append(line[0]);
                int index = 1;
                for (; index < length; index++)
                {
                    char ch = line[index];
                    if (" ,\t".IndexOf(ch) >= 0)
                    {
                        break;
                    }
                    yomiBuffer.Append(ch);
                }
                if (index >= length)
                {
                    Console.Error.WriteLine("KanwaDictionary: Ignored line: " + line);
                    continue;
                }
                char okurigana = '0';
                char yomiLast  = yomiBuffer[index - 1];
                if (yomiLast >= 'a' && yomiLast <= 'z')
                {
                    okurigana         = yomiLast;
                    yomiBuffer.Length = index - 1;
                }
                string yomi = yomiBuffer.ToString();
                for (++index; index < length; index++)
                {
                    char ch = line[index];
                    if (" ,\t".IndexOf(ch) < 0)
                    {
                        break;
                    }
                }
                if (index >= length)
                {
                    Console.Error.WriteLine("KanwaDictionary: Ignored line: " + line);
                    continue;
                }
                if (line[index] == '/')
                {
SKK_LOOP:
                    while (true)
                    {
                        StringBuilder kanji = new StringBuilder();
                        for (++index; index < length; index++)
                        {
                            char ch = line[index];
                            if (ch == '/')
                            {
                                break;
                            }
                            if (ch == ';')
                            {
                                index = length - 1;
                                break;
                            }
                            if (ch == '[')
                            {
                                goto SKK_LOOP;
                            }
                            kanji.Append(ch);
                        }
                        if (index >= length)
                        {
                            break;
                        }
                        AddItem(kanji.ToString(), yomi, okurigana);
                    }
                }
                else
                {
                    StringBuilder kanji = new StringBuilder();
                    kanji.Append(line[index]);
                    for (++index; index < length; index++)
                    {
                        char ch = line[index];
                        if (" ,\t".IndexOf(ch) >= 0)
                        {
                            break;
                        }
                        kanji.Append(ch);
                    }
                    AddItem(kanji.ToString(), yomi, okurigana);
                }
            }
        }
Exemple #7
0
        public void AddItem(string kanji, string yomi, char okurigana)
        {
            UnicodeBlock kanjiBlock = UnicodeBlock.Of(kanji[0]);

            if (!kanjiBlock.Equals(UnicodeBlock.CJKUnifiedIdeographs))
            {
                //System.err.println("KanwaDictionary: Ignored item:" +
                //                   " kanji=" + kanji + " yomi=" + yomi);
                return;
            }
            int           kanjiLength = kanji.Length;
            StringBuilder kanjiBuffer = new StringBuilder(kanjiLength);

            for (int index = 0; index < kanjiLength; index++)
            {
                char ch = kanji[index];
                //if (ch < '\u0100') {
                //    System.err.println("KanwaDictionary: Ignored item:" +
                //                       " kanji=" + kanji + " yomi=" + yomi);
                //    return;
                //}
                kanjiBuffer.Append(ItaijiDictionary.GetInstance().Get(ch));
            }
            char key = kanjiBuffer[0];

            kanji = kanjiBuffer.ToString().Substring(1);

            int           yomiLength = yomi.Length;
            StringBuilder yomiBuffer = new StringBuilder(yomiLength);

            for (int index = 0; index < yomiLength; index++)
            {
                char         ch    = yomi[index];
                UnicodeBlock block = UnicodeBlock.Of(ch);
                if (!block.Equals(UnicodeBlock.Hiragana) && !block.Equals(UnicodeBlock.Katakana))
                {
                    Console.Error.WriteLine("KanwaDictionary: Ignored item:" +
                                            " kanji=" + kanjiBuffer + " yomi=" + yomi);
                    return;
                }
                if ((ch >= '\u30a1' && ch <= '\u30f3') ||
                    ch == '\u30fd' || ch == '\u30fe')
                {
                    yomiBuffer.Append((char)(ch - 0x60));
                }
                else if (ch == '\u30f4')
                {    // 'vu'
                    yomiBuffer.Append('\u3046');
                    yomiBuffer.Append('\u309b');
                }
                else
                {
                    yomiBuffer.Append(ch);
                }
            }
            yomi = yomiBuffer.ToString();

            KanjiYomi kanjiYomi = new KanjiYomi(kanji, yomi, okurigana);

            if (!contentsTable.ContainsKey(key))
            {
                contentsTable.Add(key, new SortedSet <KanjiYomi>());
            }
            SortedSet <KanjiYomi> list = contentsTable[key];

            list.Add(kanjiYomi);
        }
Exemple #8
0
        internal bool ToHiragana(KakasiReader input, TextWriter output)
        {
            char key = itaijiDictionary.Get((char)input.Get());

            SortedSet <KanjiYomi> .Enumerator iterator = kanwaDictionary.Lookup(key);
            HashSet <string> yomiSet = new HashSet <string>();
            string           rest    = null;
            int restLength           = 0;
            int resultLength         = 0;

            while (iterator.MoveNext())
            {
                KanjiYomi kanjiYomi = iterator.Current;
                if (rest == null)
                {
                    char[] chars = new char[kanjiYomi.Length + 1];
                    restLength = input.More(chars);
                    for (int index = 0; index < restLength; index++)
                    {
                        chars[index] = itaijiDictionary.Get(chars[index]);
                    }
                    rest = new string(chars, 0, restLength);
                }
                Logger.Log("kanjiYomi: " + kanjiYomi.Kanji + "," + kanjiYomi.Yomi + "," + kanjiYomi.Okurigana + "," + kanjiYomi.Length + "," + restLength);
                if (kanjiYomi.Length < resultLength)
                {
                    break;
                }
                if (kanjiYomi.Length > restLength)
                {
                    continue;
                }
                string yomi = kanjiYomi.GetYomiFor(rest);
                if (yomi == null)
                {
                    continue;
                }
                yomiSet.Add(yomi);
                resultLength = kanjiYomi.Length;
                if (!HeikiMode)
                {
                    break;
                }
            }
            if (yomiSet.Count == 0)
            {
                return(false);
            }
            char additionalChar = (char)0;

            if (resultLength > 0 && restLength > resultLength &&
                rest[resultLength - 1] == '\u3063')
            {
                char         nextCh = rest[resultLength];
                UnicodeBlock block  = UnicodeBlock.Of(nextCh);
                if (block.Equals(UnicodeBlock.Hiragana))
                {
                    ++resultLength;
                    additionalChar = nextCh;
                }
            }
            input.Consume(resultLength + 1);
            if (FuriganaMode)
            {
                output.Write(key);
                if (resultLength > 0)
                {
                    output.Write(rest.ToCharArray(), 0, resultLength);
                }
                output.Write('[');
            }
            if (yomiSet.Count == 1)
            {
                output.Write(yomiSet.FirstOrDefault());
                if (additionalChar > 0)
                {
                    output.Write(additionalChar);
                }
            }
            else if (yomiSet.Count > 1)
            {
                HashSet <string> .Enumerator iter = yomiSet.GetEnumerator();
                output.Write('{');
                bool bar = false;
                while (iter.MoveNext())
                {
                    if (bar)
                    {
                        output.Write('|');
                    }
                    output.Write(iter.Current);
                    if (additionalChar > 0)
                    {
                        output.Write(additionalChar);
                    }
                    bar = true;
                }
                output.Write('}');
            }
            if (FuriganaMode)
            {
                output.Write(']');
            }
            return(true);
        }
Exemple #9
0
 private bool IsJapanese(UnicodeBlock block)
 {
     return(block.Equals(UnicodeBlock.CJKUnifiedIdeographs) || block.Equals(UnicodeBlock.Hiragana) || block.Equals(UnicodeBlock.Katakana));
 }