Exemplo n.º 1
0
        // Loop through characters in a string and split them into sequential tokens
        // eg. "Cake 01. ヴァンパイア雪降る夜"
        // => ["Cake 01. ", "ヴァンパイア", "雪降る夜"]
        public List <TextToken> GetTextTokens(string inText)
        {
            var textTokens = new List <TextToken>();

            // Start with arbitrary token type
            var prevCharTokenType = TokenType.Latin;
            var currCharTokenType = prevCharTokenType;

            var currToken = new TextToken(currCharTokenType);

            foreach (var c in inText)
            {
                var cs = c.ToString();

                if (Unicode.IsProlongedChar(c))
                {
                    currCharTokenType = prevCharTokenType;
                }
                else if (Unicode.IsHiragana(cs) || Unicode.IsKanji(cs))
                {
                    currCharTokenType = TokenType.HiraganaKanji;
                }
                else if (Unicode.IsKatakana(cs))
                {
                    currCharTokenType = TokenType.Katakana;
                }
                else
                {
                    currCharTokenType = TokenType.Latin;
                }

                // Check if there is a new token
                if (prevCharTokenType == currCharTokenType)
                {
                    // Same token
                    currToken.Text += cs;
                }
                else
                {
                    // New token

                    // Modifies the prefix of the token depending on prev/curr tokens
                    // eg. Add space before curr token
                    var tokenPrefix = "";

                    if (!string.IsNullOrEmpty(currToken.Text))
                    {
                        // Add token to token list if there is text in it
                        textTokens.Add(currToken);

                        // Get token prefix for new token if previous token was not empty
                        if (textTokens.Count > 0)
                        {
                            var prevLastChar = textTokens.Last().Text.Last();
                            tokenPrefix = GetTokenPrefix(prevCharTokenType,
                                                         currCharTokenType,
                                                         prevLastChar, c);
                        }
                    }

                    // Create new token
                    currToken = new TextToken(currCharTokenType, cs, tokenPrefix);

                    prevCharTokenType = currCharTokenType;
                }
            }

            // Add last token to the list
            if (!string.IsNullOrEmpty(currToken.Text))
            {
                textTokens.Add(currToken);
            }

            return(textTokens);
        }
Exemplo n.º 2
0
 public bool IsTranslated(string text)
 {
     return(!text.Any(c => Unicode.IsJapanese(c.ToString())));
 }