public void TestExtractAsList() { var extractor = new NGramExtractor(Unigram); IList <NGram> actual = extractor.ExtractAsList(Tokens); var expected = new[] { one, two, three, four, five, one, two, three, four }; CollectionAssert.AreEqual(expected, actual); extractor = new NGramExtractor(Bigram); actual = extractor.ExtractAsList(Tokens); expected = new[] { one_two, two_three, three_four, four_five, five_one, one_two, two_three, three_four }; CollectionAssert.AreEqual(expected, actual); extractor = new NGramExtractor(Trigram); actual = extractor.ExtractAsList(Tokens); expected = new[] { one_two_three, two_three_four, three_four_five, four_five_one, five_one_two, one_two_three, two_three_four }; CollectionAssert.AreEqual(expected, actual); extractor = new NGramExtractor(Unigram, Bigram); actual = extractor.ExtractAsList(Tokens); expected = new[] { one, one_two, two, two_three, three, three_four, four, four_five, five, five_one, one, one_two, two, two_three, three, three_four, four }; CollectionAssert.AreEqual(expected, actual); extractor = new NGramExtractor(Bigram, Trigram); actual = extractor.ExtractAsList(Tokens); expected = new[] { one_two, one_two_three, two_three, two_three_four, three_four, three_four_five, four_five, four_five_one, five_one, five_one_two, one_two, one_two_three, two_three, two_three_four, three_four }; CollectionAssert.AreEqual(expected, actual); extractor = new NGramExtractor(Unigram, Trigram); actual = extractor.ExtractAsList(Tokens); expected = new[] { one, one_two, one_two_three, two, two_three, two_three_four, three, three_four, three_four_five, four, four_five, four_five_one, five, five_one, five_one_two, one, one_two, one_two_three, two, two_three, two_three_four, three, three_four, four }; CollectionAssert.AreEqual(expected, actual); }
public void TestExtractAsSet() { var extractor = new NGramExtractor(Unigram); ISet <NGram> actual = extractor.ExtractAsSet(Tokens); var expected = new[] { one, two, three, four, five }; CollectionAssert.AreEquivalent(expected, actual); extractor = new NGramExtractor(Bigram); actual = extractor.ExtractAsSet(Tokens); expected = new[] { one_two, two_three, three_four, four_five, five_one }; CollectionAssert.AreEquivalent(expected, actual); extractor = new NGramExtractor(Trigram); actual = extractor.ExtractAsSet(Tokens); expected = new[] { one_two_three, two_three_four, three_four_five, four_five_one, five_one_two }; CollectionAssert.AreEquivalent(expected, actual); }
public void TestExtractAsDictionary() { var extractor = new NGramExtractor(Unigram); IDictionary <NGram, int> actual = extractor.ExtractAsDictionary(Tokens); var expected = new Dictionary <NGram, int> { { one, 2 }, { two, 2 }, { three, 2 }, { four, 2 }, { five, 1 } }; CollectionAssert.AreEquivalent(expected, actual); extractor = new NGramExtractor(Bigram); actual = extractor.ExtractAsDictionary(Tokens); expected = new Dictionary <NGram, int> { { one_two, 2 }, { two_three, 2 }, { three_four, 2 }, { four_five, 1 }, { five_one, 1 } }; CollectionAssert.AreEquivalent(expected, actual); extractor = new NGramExtractor(Trigram); actual = extractor.ExtractAsDictionary(Tokens); expected = new Dictionary <NGram, int> { { one_two_three, 2 }, { two_three_four, 2 }, { four_five_one, 1 }, { three_four_five, 1 }, { five_one_two, 1 }, }; CollectionAssert.AreEquivalent(expected, actual); }
public void TestExtractLetterNGramsAsList() { var extractor = new NGramExtractor(Bigram, Trigram); IEnumerable <string> tokens = "beşiktaş".ToCharArray().Select(x => x.ToString()); IList <NGram> actual = extractor.ExtractAsList(tokens); }