public void TestReduceWithBaseForm() { var tokens = tokenizer.Tokenize("始めてみることにした"); var words = WordConjungateCombiner.Combine(tokens, dictionary); Assert.AreEqual(5, words.Count); int index = words.FindIndex(0, words.Count, (x) => { return(x.Surface.Contains("こと")); }); WordListReducer reducer = new WordListReducer(); var newWord = reducer.ReduceOnce(index, words, dictionary, true); Assert.AreEqual("ことにした", newWord.Surface); Assert.AreEqual("コトニシタ", newWord.Pronunciation); Assert.AreEqual("コトニシタ", newWord.Reading); Assert.AreEqual("ことにする", newWord.BaseForm); tokens = tokenizer.Tokenize("というものであった"); words = WordConjungateCombiner.Combine(tokens, dictionary); Assert.AreEqual(3, words.Count); index = words.FindIndex(0, words.Count, (x) => { return(x.Surface.Contains("もの")); }); reducer = new WordListReducer(); newWord = reducer.ReduceOnce(index, words, dictionary, true); Assert.AreEqual("ものであった", newWord.Surface); Assert.AreEqual("モノデアッタ", newWord.Pronunciation); Assert.AreEqual("モノデアッタ", newWord.Reading); Assert.AreEqual("ものである", newWord.BaseForm); }
public void TestAllSampleSentences() { using (var examples = File.OpenRead(RESOURCE + "/TestReducer.csv")) using (var reader = new StreamReader(examples)) { while (!reader.EndOfStream) { var line = reader.ReadLine(); if (String.IsNullOrWhiteSpace(line)) { continue; } var split = line.Split(new string[] { "\t" }, StringSplitOptions.RemoveEmptyEntries); var tokens = tokenizer.Tokenize(split[0]); var actualWords = WordConjungateCombiner.Combine(tokens, dictionary); WordListReducer reducer = new WordListReducer(); actualWords = reducer.ReduceAll(actualWords, dictionary); var expectedWords = split[1].Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries); for (int i = 0; i < expectedWords.Length; i++) { Assert.AreEqual(expectedWords[i], actualWords[i].Surface); } } } }
public void TestFullAndPartialWordMatchInDictionary() { var tokens = tokenizer.Tokenize("だいがく"); var words = WordConjungateCombiner.Combine(tokens, dictionary); WordListReducer reducer = new WordListReducer(); words = reducer.ReduceAll(words, dictionary); Assert.AreEqual(1, words.Count); Assert.AreEqual("だいがく", words[0].Surface); Assert.AreEqual("ダイガク", words[0].Reading); Assert.AreEqual("ダイガク", words[0].Pronunciation); Assert.IsTrue(words[0].IsInDictionary); tokens = tokenizer.Tokenize("だいが"); words = WordConjungateCombiner.Combine(tokens, dictionary); reducer = new WordListReducer(); words = reducer.ReduceAll(words, dictionary); Assert.AreEqual(2, words.Count); Assert.AreEqual("だい", words[0].Surface); Assert.AreEqual("が", words[1].Surface); Assert.AreEqual("ダイ", words[0].Reading); Assert.AreEqual("ガ", words[1].Reading); Assert.AreEqual("ダイ", words[0].Pronunciation); Assert.AreEqual("ガ", words[1].Pronunciation); }
public void TestIfRemoveWrongEndIndex() { var tokens = tokenizer.Tokenize("私だからな"); var words = WordConjungateCombiner.Combine(tokens, dictionary); WordListReducer reducer = new WordListReducer(); var newWord = reducer.ReduceOnce(1, words, dictionary, true); Assert.AreEqual("だから", newWord.Surface); Assert.AreEqual("ダカラ", newWord.Pronunciation); Assert.AreEqual("ダカラ", newWord.Reading); }
public void TestWrongReadingAfterReduce() { var tokens = tokenizer.Tokenize("今日は一歩も"); var words = WordConjungateCombiner.Combine(tokens, dictionary); WordListReducer reducer = new WordListReducer(); var newWords = reducer.ReduceAll(words, dictionary); Assert.AreEqual("一歩", newWords[2].Surface); Assert.AreEqual("イッポ", newWords[2].Pronunciation); Assert.AreEqual("イッポ", newWords[2].Reading); }
public void TestReduceOnce() { var tokens = tokenizer.Tokenize("そこで私たちを待っている幸福が、私たちが望むような幸福ではないかもしれない。"); var words = WordConjungateCombiner.Combine(tokens, dictionary); int index = words.FindIndex(0, words.Count, (x) => { return(x.Surface.Contains("かも")); }); WordListReducer reducer = new WordListReducer(); var newWord = reducer.ReduceOnce(index, words, dictionary, true); Assert.AreEqual("かもしれない", newWord.Surface); Assert.AreEqual("カモシレナイ", newWord.Pronunciation); Assert.AreEqual("カモシレナイ", newWord.Reading); }
public void TestReduceUnkownWord() { var tokens = tokenizer.Tokenize("洋ゲーで日本語ローカライズされてない"); var words = WordConjungateCombiner.Combine(tokens, dictionary); WordListReducer reducer = new WordListReducer(); var newWords = reducer.ReduceAll(words, dictionary); Assert.IsTrue(newWords[0].IsInDictionary); Assert.IsTrue(!newWords[0].IsUnknownWord); Assert.AreEqual("洋ゲー", newWords[0].Surface); Assert.AreEqual("ヨーゲー", newWords[0].Pronunciation); Assert.AreEqual("ヨウゲー", newWords[0].Reading); Assert.IsTrue(newWords[3].IsInDictionary); Assert.IsTrue(!newWords[3].IsUnknownWord); }
public void TestParticle() { var tokens = tokenizer.Tokenize("ここら辺には見るべきところがたくさんあります。"); var words = WordConjungateCombiner.Combine(tokens, dictionary); WordListReducer reducer = new WordListReducer(); words = reducer.ReduceAll(words, dictionary); Assert.AreEqual("ここら辺", words[0].Surface); Assert.AreEqual("に", words[1].Surface); Assert.AreEqual("は", words[2].Surface); Assert.AreEqual("見る", words[3].Surface); Assert.AreEqual("べき", words[4].Surface); Assert.AreEqual("ところ", words[5].Surface); Assert.AreEqual("が", words[6].Surface); Assert.AreEqual("たくさん", words[7].Surface); Assert.AreEqual("あります", words[8].Surface); Assert.AreEqual("。", words[9].Surface); Assert.AreEqual("ココラヘン", words[0].Reading); Assert.AreEqual("ニ", words[1].Reading); Assert.AreEqual("ハ", words[2].Reading); Assert.AreEqual("ミル", words[3].Reading); Assert.AreEqual("ベキ", words[4].Reading); Assert.AreEqual("トコロ", words[5].Reading); Assert.AreEqual("ガ", words[6].Reading); Assert.AreEqual("タクサン", words[7].Reading); Assert.AreEqual("アリマス", words[8].Reading); Assert.AreEqual("。", words[9].Reading); Assert.AreEqual("ココラヘン", words[0].Pronunciation); Assert.AreEqual("ニ", words[1].Pronunciation); Assert.AreEqual("ワ", words[2].Pronunciation); Assert.AreEqual("ミル", words[3].Pronunciation); Assert.AreEqual("ベキ", words[4].Pronunciation); Assert.AreEqual("トコロ", words[5].Pronunciation); Assert.AreEqual("ガ", words[6].Pronunciation); Assert.AreEqual("タクサン", words[7].Pronunciation); Assert.AreEqual("アリマス", words[8].Pronunciation); Assert.AreEqual("。", words[9].Pronunciation); }