Ejemplo n.º 1
0
        public void TestExtractAsList()
        {
            var extractor = new NGramExtractor(Unigram);


            IList <NGram> actual   = extractor.ExtractAsList(Tokens);
            var           expected = new[] { one, two, three, four, five, one, two, three, four };

            CollectionAssert.AreEqual(expected, actual);

            extractor = new NGramExtractor(Bigram);
            actual    = extractor.ExtractAsList(Tokens);
            expected  = new[] { one_two, two_three, three_four, four_five, five_one, one_two, two_three, three_four };
            CollectionAssert.AreEqual(expected, actual);

            extractor = new NGramExtractor(Trigram);
            actual    = extractor.ExtractAsList(Tokens);
            expected  = new[]
            { one_two_three, two_three_four, three_four_five, four_five_one, five_one_two, one_two_three, two_three_four };
            CollectionAssert.AreEqual(expected, actual);

            extractor = new NGramExtractor(Unigram, Bigram);
            actual    = extractor.ExtractAsList(Tokens);
            expected  = new[]
            {
                one, one_two, two, two_three, three, three_four, four, four_five, five, five_one, one, one_two, two,
                two_three, three, three_four, four
            };
            CollectionAssert.AreEqual(expected, actual);

            extractor = new NGramExtractor(Bigram, Trigram);
            actual    = extractor.ExtractAsList(Tokens);
            expected  = new[]
            {
                one_two, one_two_three, two_three, two_three_four, three_four,
                three_four_five, four_five, four_five_one, five_one, five_one_two, one_two, one_two_three, two_three,
                two_three_four, three_four
            };
            CollectionAssert.AreEqual(expected, actual);

            extractor = new NGramExtractor(Unigram, Trigram);
            actual    = extractor.ExtractAsList(Tokens);
            expected  = new[]
            {
                one, one_two, one_two_three,
                two, two_three, two_three_four,
                three, three_four, three_four_five,
                four, four_five, four_five_one,
                five, five_one, five_one_two,
                one, one_two, one_two_three,
                two, two_three, two_three_four,
                three, three_four,
                four
            };
            CollectionAssert.AreEqual(expected, actual);
        }
Ejemplo n.º 2
0
        public void TestExtractAsSet()
        {
            var          extractor = new NGramExtractor(Unigram);
            ISet <NGram> actual    = extractor.ExtractAsSet(Tokens);
            var          expected  = new[] { one, two, three, four, five };

            CollectionAssert.AreEquivalent(expected, actual);

            extractor = new NGramExtractor(Bigram);
            actual    = extractor.ExtractAsSet(Tokens);
            expected  = new[] { one_two, two_three, three_four, four_five, five_one };
            CollectionAssert.AreEquivalent(expected, actual);

            extractor = new NGramExtractor(Trigram);
            actual    = extractor.ExtractAsSet(Tokens);
            expected  = new[] { one_two_three, two_three_four, three_four_five, four_five_one, five_one_two };
            CollectionAssert.AreEquivalent(expected, actual);
        }
Ejemplo n.º 3
0
        public void TestExtractAsDictionary()
        {
            var extractor = new NGramExtractor(Unigram);
            IDictionary <NGram, int> actual = extractor.ExtractAsDictionary(Tokens);
            var expected = new Dictionary <NGram, int>
            {
                { one, 2 },
                { two, 2 },
                { three, 2 },
                { four, 2 },
                { five, 1 }
            };

            CollectionAssert.AreEquivalent(expected, actual);

            extractor = new NGramExtractor(Bigram);
            actual    = extractor.ExtractAsDictionary(Tokens);
            expected  = new Dictionary <NGram, int>
            {
                { one_two, 2 },
                { two_three, 2 },
                { three_four, 2 },
                { four_five, 1 },
                { five_one, 1 }
            };
            CollectionAssert.AreEquivalent(expected, actual);

            extractor = new NGramExtractor(Trigram);
            actual    = extractor.ExtractAsDictionary(Tokens);
            expected  = new Dictionary <NGram, int>
            {
                { one_two_three, 2 },
                { two_three_four, 2 },
                { four_five_one, 1 },
                { three_four_five, 1 },
                { five_one_two, 1 },
            };
            CollectionAssert.AreEquivalent(expected, actual);
        }
Ejemplo n.º 4
0
 public void TestExtractLetterNGramsAsList()
 {
     var extractor = new NGramExtractor(Bigram, Trigram);
     IEnumerable <string> tokens = "beşiktaş".ToCharArray().Select(x => x.ToString());
     IList <NGram>        actual = extractor.ExtractAsList(tokens);
 }