Пример #1
0
        public void BiGramParsingService_should_tokenize_string_correctly()
        {
            var input = "The quick brown fox and the quick blue hare";

            var expectedResults = new string[]
            {
                "the",
                "quick",
                "brown",
                "fox",
                "and",
                "the",
                "quick",
                "blue",
                "hare"
            };

            var sut = new BiGramParsingService();

            var actualResults = sut.TokenizeInput(input);

            var isEqual = expectedResults.SequenceEqual(actualResults);

            Assert.True(isEqual);
        }
Пример #2
0
        public void BiGramParsingService_should_remove_non_alphanumeric_characters_from_string()
        {
            var input = ".`~!@#$%^&*()_-+=The quick brown fox and.`~!@#$%^&*()_-+= the quick blue hare.`~!@#$%^&*()_-+=";

            var expectedResults = "The quick brown fox and the quick blue hare";

            var sut = new BiGramParsingService();

            var actualResults = sut.RemoveNonAlphanumericCharacters(input);

            Assert.Equal(expectedResults, actualResults);
        }
Пример #3
0
        public void BiGramParsingService_should_return_correct_bigrams()
        {
            var tokens = new string[]
            {
                "the",
                "quick",
                "brown",
                "fox",
                "and",
                "the",
                "quick",
                "blue",
                "hare"
            };

            var expectedResults = new Dictionary <string, List <string> >
            {
                { "the", new List <string> {
                      "quick", "quick"
                  } },
                { "quick", new List <string> {
                      "brown", "blue"
                  } },
                { "brown", new List <string> {
                      "fox"
                  } },
                { "fox", new List <string> {
                      "and"
                  } },
                { "and", new List <string> {
                      "the"
                  } },
                { "blue", new List <string> {
                      "hare"
                  } }
            };

            var sut = new BiGramParsingService();

            var actualResults = sut.GetBiGrams(tokens);

            var isKeysEqual = actualResults.Keys.SequenceEqual(expectedResults.Keys);

            var isValuesEqual = actualResults.Keys
                                .All(key => expectedResults.ContainsKey(key) && actualResults[key].SequenceEqual(expectedResults[key]));

            Assert.True(isKeysEqual);
            Assert.True(isValuesEqual);
        }
Пример #4
0
        public void BiGramParsingService_should_return_correct_histogram()
        {
            var biGrams = new Dictionary <string, List <string> >
            {
                { "the", new List <string> {
                      "quick", "quick"
                  } },
                { "quick", new List <string> {
                      "brown", "blue"
                  } },
                { "brown", new List <string> {
                      "fox"
                  } },
                { "fox", new List <string> {
                      "and"
                  } },
                { "and", new List <string> {
                      "the"
                  } },
                { "blue", new List <string> {
                      "hare"
                  } }
            };

            var expectedResults = new Dictionary <string, int>
            {
                { "the quick", 2 },
                { "quick brown", 1 },
                { "brown fox", 1 },
                { "fox and", 1 },
                { "and the", 1 },
                { "quick blue", 1 },
                { "blue hare", 1 }
            };

            var sut = new BiGramParsingService();

            var actualResults = sut.GetHistogram(biGrams);

            var isEqual = actualResults.OrderBy(kvp => kvp.Key)
                          .SequenceEqual(expectedResults.OrderBy(kvp => kvp.Key));

            Assert.True(isEqual);
        }