C# (CSharp) JapaneseToRomajiFilenameConverter.Converter TextToken 예제들

프로그래밍 언어: C# (CSharp)

네임스페이스/패키지 이름: JapaneseToRomajiFilenameConverter.Converter

클래스/타입: TextToken

hotexamples.com에서의 예제들: 2

C# (CSharp) JapaneseToRomajiFilenameConverter.Converter TextToken - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 C# (CSharp)의 JapaneseToRomajiFilenameConverter.Converter.TextToken에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

GetTextTokens(1)

예제 #1

파일 보기

        public static string Translate(string inText, string languagePair = LanguagePair)
        {
            // Check if already translated / romanized
            // TODO check japanese punctuation too
            // if (IsTranslated(inText)) return inText;

            // Normalize to convert full-width characters
            inText = inText.Normalize(NormalizationForm.FormKC);

            // Split the text into separate sequential tokens and translate each token
            List <TextToken> textTokens = TextToken.GetTextTokens(inText);

            // Load maps and particles lists once
            string        hirakanjiMapPath = Path.Combine(Maps.DirectoryPath, Maps.HirakanjiLatn);
            List <string> hirakanjiMaps    = new List <string>(File.ReadAllLines(hirakanjiMapPath));

            string        hirakanjiParticlesPath = Path.Combine(Particles.DirectoryPath, Particles.HirakanjiLatn);
            List <string> hirakanjiParticles     = new List <string>(File.ReadAllLines(hirakanjiParticlesPath));

            string        kataMapPath = Path.Combine(Maps.DirectoryPath, Maps.KataEn);
            List <string> kataMaps    = new List <string>(File.ReadAllLines(kataMapPath));

            string        kataParticlesPath = Path.Combine(Particles.DirectoryPath, Particles.KataEn);
            List <string> kataParticles     = new List <string>(File.ReadAllLines(kataParticlesPath));

            // Translate each token and join them back together
            string outText = "";

            foreach (TextToken textToken in textTokens)
            {
                switch (textToken.Type)
                {
                case TokenType.HiraganaKanji:
                    outText += textToken.Translate(hirakanjiMaps, hirakanjiParticles);
                    break;

                case TokenType.Katakana:
                    outText += textToken.Translate(kataMaps, kataParticles);
                    break;

                case TokenType.Latin:
                default:
                    outText += textToken.Translate();
                    break;
                }
            }

            // Normalize
            outText = outText.Normalize(NormalizationForm.FormKC);

            return(outText);
        }

예제 #2

파일 보기

파일: TextToken.cs 프로젝트: Adawesome/Japanese-To-Romaji-Filename-Converter

        // Loop through characters in a string and split them into sequential tokens
        // eg. "Cake 01. ヴァンパイア雪降る夜"
        // => ["Cake 01. ", "ヴァンパイア", "雪降る夜"]
        public static List <TextToken> GetTextTokens(string inText)
        {
            List <TextToken> textTokens = new List <TextToken>();

            // Start with arbitrary token type
            TokenType prevCharTokenType = TokenType.Latin;
            TokenType currCharTokenType = prevCharTokenType;

            TextToken currToken = new TextToken(currCharTokenType);

            foreach (char c in inText)
            {
                string cs = c.ToString();

                if (Unicode.IsProlongedChar(c))
                {
                    // Special condition for prolonged sound character
                    currCharTokenType = prevCharTokenType;
                }
                else if (Unicode.IsHiragana(cs) || Unicode.IsKanji(cs))
                {
                    // Hiragana / Kanji
                    currCharTokenType = TokenType.HiraganaKanji;
                }
                else if (Unicode.IsKatakana(cs))
                {
                    // Katakana
                    currCharTokenType = TokenType.Katakana;
                }
                else
                {
                    // Latin or other
                    currCharTokenType = TokenType.Latin;
                }

                // Check if there is a new token
                if (prevCharTokenType == currCharTokenType)
                {
                    // Same token
                    currToken.Text += cs;
                }
                else
                {
                    // New token

                    // Modifies the prefix of the token depending on prev/curr tokens
                    // eg. Add space before curr token
                    string tokenPrefix = "";

                    if (!string.IsNullOrEmpty(currToken.Text))
                    {
                        // Add token to token list if there is text in it
                        textTokens.Add(currToken);

                        // Get token prefix for new token if previous token was not empty
                        if (textTokens.Count > 0)
                        {
                            char prevLastChar = textTokens.Last().Text.Last();
                            tokenPrefix = GetTokenPrefix(prevCharTokenType,
                                                         currCharTokenType,
                                                         prevLastChar, c);
                        }
                    }

                    // Create new token
                    currToken = new TextToken(currCharTokenType, cs, tokenPrefix);

                    prevCharTokenType = currCharTokenType;
                }
            }

            // Add last token to the list
            if (!string.IsNullOrEmpty(currToken.Text))
            {
                textTokens.Add(currToken);
            }

            return(textTokens);
        }