Example #1
0
        public void CheckTooShortWord()
        {
            var index = new NGramSearch.IntersectionCountIndex <int>(4);

            index.Add(1, "a");

            Assert.Empty(index.GetAllNGrams());
        }
Example #2
0
        public void MeHas3BiGramTest()
        {
            var index = new NGramSearch.IntersectionCountIndex <long>(2);

            index.Add(1, "me");
            index.Add(2, "you");

            var result = index.Search("me");

            Assert.Single(result);
            Assert.Equal(1, result.Single().Id);
            Assert.Equal(3, result.Single().Similarity); // _m me e_
        }
Example #3
0
        public void CheckTotalFourgramCount()
        {
            var index = new NGramSearch.IntersectionCountIndex <int>(4);

            index.Add(1, "abcd");
            Assert.Equal(3, index.GetAllNGrams().Count());

            index.Add(2, "defg");
            Assert.Equal(6, index.GetAllNGrams().Count());

            index.Add(3, "abc"); // new is "abc_"
            Assert.Equal(7, index.GetAllNGrams().Count());
        }
Example #4
0
        public void CheckTotalTrigramCount()
        {
            var index = new NGramSearch.IntersectionCountIndex <int>(3);

            index.Add(1, "abcd");
            Assert.Equal(4, index.GetAllNGrams().Count());

            index.Add(2, "defg");
            Assert.Equal(8, index.GetAllNGrams().Count());

            index.Add(3, "abc"); // new is "bc_"
            Assert.Equal(9, index.GetAllNGrams().Count());
        }
Example #5
0
        public void CheckTotalBigramCount()
        {
            var index = new NGramSearch.IntersectionCountIndex <int>(2);

            index.Add(1, "abcd");
            Assert.Equal(5, index.GetAllNGrams().Count());

            index.Add(2, "defg");
            Assert.Equal(10, index.GetAllNGrams().Count());

            index.Add(3, "abc"); // new is "c_"
            Assert.Equal(11, index.GetAllNGrams().Count());
        }
Example #6
0
        public void SimpleCount()
        {
            var source = new NGramSearch.IntersectionCountIndex <string>();

            source.Add("first", "aaaa");
            source.Add("second", "bbbb");

            var result = source.Search("aaa");

            Assert.Single(result);
            Assert.Equal("first", result.First().Id);
            Assert.Equal(3, result.First().Similarity); // _aa aaa aa_
        }
Example #7
0
        public void RobertHas5FourGrams()
        {
            var actors = new NGramSearch.IntersectionCountIndex <int>(4);

            actors.Add(1, "johnny depp");
            actors.Add(2, "al pacino");
            actors.Add(3, "robert de niro");
            actors.Add(4, "kevin spacey");


            var result = actors.Search("robert");

            Assert.Single(result);
            Assert.Equal(3, result.First().Id);
            Assert.Equal(5, result.First().Similarity); // _rob robe ober bert ert_
        }
Example #8
0
        public void CheckNgramCount()
        {
            var germaniFirms = new NGramSearch.IntersectionCountIndex <int>();

            germaniFirms.Add(1, "volkswagen ag");
            germaniFirms.Add(2, "daimler ag");
            germaniFirms.Add(3, "allianz se");
            germaniFirms.Add(4, "bmw ag");
            germaniFirms.Add(5, "siemens ag");
            germaniFirms.Add(6, "lange uhren gmbh");

            var result = germaniFirms.GetAllNGrams()
                         .OrderByDescending(x => x.TotalPhraseNGramCount)
                         .ThenBy(x => x.NGram)
                         .ToList();

            Assert.Equal(4, result.First().TotalPhraseNGramCount);

            Assert.Equal(" ag", result[0].NGram);
            Assert.Equal(4, result[1].TotalPhraseNGramCount);

            Assert.Equal("ag ", result[1].NGram);
            Assert.Equal(4, result[1].TotalPhraseNGramCount);
        }