// Loop through characters in a string and split them into sequential tokens // eg. "Cake 01. ヴァンパイア雪降る夜" // => ["Cake 01. ", "ヴァンパイア", "雪降る夜"] public List <TextToken> GetTextTokens(string inText) { var textTokens = new List <TextToken>(); // Start with arbitrary token type var prevCharTokenType = TokenType.Latin; var currCharTokenType = prevCharTokenType; var currToken = new TextToken(currCharTokenType); foreach (var c in inText) { var cs = c.ToString(); if (Unicode.IsProlongedChar(c)) { currCharTokenType = prevCharTokenType; } else if (Unicode.IsHiragana(cs) || Unicode.IsKanji(cs)) { currCharTokenType = TokenType.HiraganaKanji; } else if (Unicode.IsKatakana(cs)) { currCharTokenType = TokenType.Katakana; } else { currCharTokenType = TokenType.Latin; } // Check if there is a new token if (prevCharTokenType == currCharTokenType) { // Same token currToken.Text += cs; } else { // New token // Modifies the prefix of the token depending on prev/curr tokens // eg. Add space before curr token var tokenPrefix = ""; if (!string.IsNullOrEmpty(currToken.Text)) { // Add token to token list if there is text in it textTokens.Add(currToken); // Get token prefix for new token if previous token was not empty if (textTokens.Count > 0) { var prevLastChar = textTokens.Last().Text.Last(); tokenPrefix = GetTokenPrefix(prevCharTokenType, currCharTokenType, prevLastChar, c); } } // Create new token currToken = new TextToken(currCharTokenType, cs, tokenPrefix); prevCharTokenType = currCharTokenType; } } // Add last token to the list if (!string.IsNullOrEmpty(currToken.Text)) { textTokens.Add(currToken); } return(textTokens); }
public bool IsTranslated(string text) { return(!text.Any(c => Unicode.IsJapanese(c.ToString()))); }