public static List <string[]> TokenIdsToText(List <int[]> sentences, Vocabulary vocabulary) { return(sentences.Select(sentence => TokenIdsToText(sentence, vocabulary).ToArray()).ToList()); }
private static void BackTranslate(string filename1, string filename2, Vocabulary vocabulary1, Vocabulary vocabulary2, int tokenId, int count, bool first) { var word = first ? vocabulary1.Words[tokenId] : vocabulary2.Words[tokenId]; Console.WriteLine($"__{word}__"); using (var file1 = new StreamReader(filename1, Encoding.UTF8, true)) using (var file2 = new StreamReader(filename2, Encoding.UTF8, true)) { var i = 0; var l = 0; string line1, line2; while ((line1 = file1.ReadLine()) != null && (line2 = file2.ReadLine()) != null && i < count) { var tokens1 = line1.Trim().Split(null).Select(int.Parse).ToArray(); var tokens2 = line2.Trim().Split(null).Select(int.Parse).ToArray(); var found = first ? Array.IndexOf(tokens1, tokenId) >= 0 : Array.IndexOf(tokens2, tokenId) >= 0; if (found) { i++; Console.WriteLine($"[{l}]"); if (first) { Emphasize(tokens1, tokenId, vocabulary1); Console.WriteLine($"{string.Join(" ", Data.TokenIdsToText(tokens2, vocabulary2))}"); } else { Console.WriteLine($"{string.Join(" ", Data.TokenIdsToText(tokens1, vocabulary1))}"); Emphasize(tokens2, tokenId, vocabulary2); } Console.WriteLine(); } l++; } } }
public static string[] TokenIdsToText(int[] sentence, Vocabulary vocabulary) { return(sentence.Select(tokenId => vocabulary.Words[tokenId]).ToArray()); }
// highlight word with tokenId in sencence with underscore for better readabiltiy private static void Emphasize(IEnumerable <int> sentence, int tokenId, Vocabulary vocabulary) { var text = sentence.Select(id => id == tokenId ? "__" + vocabulary.Words[id] + "__" : vocabulary.Words[id]).ToArray(); Console.WriteLine($"{string.Join(" ", text)}"); }