public void TestReduceWithBaseForm()
        {
            var tokens = tokenizer.Tokenize("始めてみることにした");
            var words  = WordConjungateCombiner.Combine(tokens, dictionary);

            Assert.AreEqual(5, words.Count);
            int             index   = words.FindIndex(0, words.Count, (x) => { return(x.Surface.Contains("こと")); });
            WordListReducer reducer = new WordListReducer();
            var             newWord = reducer.ReduceOnce(index, words, dictionary, true);

            Assert.AreEqual("ことにした", newWord.Surface);
            Assert.AreEqual("コトニシタ", newWord.Pronunciation);
            Assert.AreEqual("コトニシタ", newWord.Reading);
            Assert.AreEqual("ことにする", newWord.BaseForm);

            tokens = tokenizer.Tokenize("というものであった");
            words  = WordConjungateCombiner.Combine(tokens, dictionary);
            Assert.AreEqual(3, words.Count);
            index   = words.FindIndex(0, words.Count, (x) => { return(x.Surface.Contains("もの")); });
            reducer = new WordListReducer();
            newWord = reducer.ReduceOnce(index, words, dictionary, true);
            Assert.AreEqual("ものであった", newWord.Surface);
            Assert.AreEqual("モノデアッタ", newWord.Pronunciation);
            Assert.AreEqual("モノデアッタ", newWord.Reading);
            Assert.AreEqual("ものである", newWord.BaseForm);
        }
        public void TestAllSampleSentences()
        {
            using (var examples = File.OpenRead(RESOURCE + "/TestReducer.csv"))
                using (var reader = new StreamReader(examples))
                {
                    while (!reader.EndOfStream)
                    {
                        var line = reader.ReadLine();
                        if (String.IsNullOrWhiteSpace(line))
                        {
                            continue;
                        }

                        var             split       = line.Split(new string[] { "\t" }, StringSplitOptions.RemoveEmptyEntries);
                        var             tokens      = tokenizer.Tokenize(split[0]);
                        var             actualWords = WordConjungateCombiner.Combine(tokens, dictionary);
                        WordListReducer reducer     = new WordListReducer();
                        actualWords = reducer.ReduceAll(actualWords, dictionary);

                        var expectedWords = split[1].Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries);

                        for (int i = 0; i < expectedWords.Length; i++)
                        {
                            Assert.AreEqual(expectedWords[i], actualWords[i].Surface);
                        }
                    }
                }
        }
        public void TestFullAndPartialWordMatchInDictionary()
        {
            var             tokens  = tokenizer.Tokenize("だいがく");
            var             words   = WordConjungateCombiner.Combine(tokens, dictionary);
            WordListReducer reducer = new WordListReducer();

            words = reducer.ReduceAll(words, dictionary);
            Assert.AreEqual(1, words.Count);
            Assert.AreEqual("だいがく", words[0].Surface);
            Assert.AreEqual("ダイガク", words[0].Reading);
            Assert.AreEqual("ダイガク", words[0].Pronunciation);
            Assert.IsTrue(words[0].IsInDictionary);

            tokens  = tokenizer.Tokenize("だいが");
            words   = WordConjungateCombiner.Combine(tokens, dictionary);
            reducer = new WordListReducer();
            words   = reducer.ReduceAll(words, dictionary);
            Assert.AreEqual(2, words.Count);

            Assert.AreEqual("だい", words[0].Surface);
            Assert.AreEqual("が", words[1].Surface);

            Assert.AreEqual("ダイ", words[0].Reading);
            Assert.AreEqual("ガ", words[1].Reading);

            Assert.AreEqual("ダイ", words[0].Pronunciation);
            Assert.AreEqual("ガ", words[1].Pronunciation);
        }
        public void TestIfRemoveWrongEndIndex()
        {
            var             tokens  = tokenizer.Tokenize("私だからな");
            var             words   = WordConjungateCombiner.Combine(tokens, dictionary);
            WordListReducer reducer = new WordListReducer();
            var             newWord = reducer.ReduceOnce(1, words, dictionary, true);

            Assert.AreEqual("だから", newWord.Surface);
            Assert.AreEqual("ダカラ", newWord.Pronunciation);
            Assert.AreEqual("ダカラ", newWord.Reading);
        }
        public void TestWrongReadingAfterReduce()
        {
            var             tokens   = tokenizer.Tokenize("今日は一歩も");
            var             words    = WordConjungateCombiner.Combine(tokens, dictionary);
            WordListReducer reducer  = new WordListReducer();
            var             newWords = reducer.ReduceAll(words, dictionary);

            Assert.AreEqual("一歩", newWords[2].Surface);
            Assert.AreEqual("イッポ", newWords[2].Pronunciation);
            Assert.AreEqual("イッポ", newWords[2].Reading);
        }
        public void TestReduceOnce()
        {
            var             tokens  = tokenizer.Tokenize("そこで私たちを待っている幸福が、私たちが望むような幸福ではないかもしれない。");
            var             words   = WordConjungateCombiner.Combine(tokens, dictionary);
            int             index   = words.FindIndex(0, words.Count, (x) => { return(x.Surface.Contains("かも")); });
            WordListReducer reducer = new WordListReducer();
            var             newWord = reducer.ReduceOnce(index, words, dictionary, true);

            Assert.AreEqual("かもしれない", newWord.Surface);
            Assert.AreEqual("カモシレナイ", newWord.Pronunciation);
            Assert.AreEqual("カモシレナイ", newWord.Reading);
        }
        public void TestReduceUnkownWord()
        {
            var             tokens   = tokenizer.Tokenize("洋ゲーで日本語ローカライズされてない");
            var             words    = WordConjungateCombiner.Combine(tokens, dictionary);
            WordListReducer reducer  = new WordListReducer();
            var             newWords = reducer.ReduceAll(words, dictionary);

            Assert.IsTrue(newWords[0].IsInDictionary);
            Assert.IsTrue(!newWords[0].IsUnknownWord);
            Assert.AreEqual("洋ゲー", newWords[0].Surface);
            Assert.AreEqual("ヨーゲー", newWords[0].Pronunciation);
            Assert.AreEqual("ヨウゲー", newWords[0].Reading);

            Assert.IsTrue(newWords[3].IsInDictionary);
            Assert.IsTrue(!newWords[3].IsUnknownWord);
        }
        public void TestParticle()
        {
            var             tokens  = tokenizer.Tokenize("ここら辺には見るべきところがたくさんあります。");
            var             words   = WordConjungateCombiner.Combine(tokens, dictionary);
            WordListReducer reducer = new WordListReducer();

            words = reducer.ReduceAll(words, dictionary);
            Assert.AreEqual("ここら辺", words[0].Surface);
            Assert.AreEqual("に", words[1].Surface);
            Assert.AreEqual("は", words[2].Surface);
            Assert.AreEqual("見る", words[3].Surface);
            Assert.AreEqual("べき", words[4].Surface);
            Assert.AreEqual("ところ", words[5].Surface);
            Assert.AreEqual("が", words[6].Surface);
            Assert.AreEqual("たくさん", words[7].Surface);
            Assert.AreEqual("あります", words[8].Surface);
            Assert.AreEqual("。", words[9].Surface);

            Assert.AreEqual("ココラヘン", words[0].Reading);
            Assert.AreEqual("ニ", words[1].Reading);
            Assert.AreEqual("ハ", words[2].Reading);
            Assert.AreEqual("ミル", words[3].Reading);
            Assert.AreEqual("ベキ", words[4].Reading);
            Assert.AreEqual("トコロ", words[5].Reading);
            Assert.AreEqual("ガ", words[6].Reading);
            Assert.AreEqual("タクサン", words[7].Reading);
            Assert.AreEqual("アリマス", words[8].Reading);
            Assert.AreEqual("。", words[9].Reading);

            Assert.AreEqual("ココラヘン", words[0].Pronunciation);
            Assert.AreEqual("ニ", words[1].Pronunciation);
            Assert.AreEqual("ワ", words[2].Pronunciation);
            Assert.AreEqual("ミル", words[3].Pronunciation);
            Assert.AreEqual("ベキ", words[4].Pronunciation);
            Assert.AreEqual("トコロ", words[5].Pronunciation);
            Assert.AreEqual("ガ", words[6].Pronunciation);
            Assert.AreEqual("タクサン", words[7].Pronunciation);
            Assert.AreEqual("アリマス", words[8].Pronunciation);
            Assert.AreEqual("。", words[9].Pronunciation);
        }