Пример #1
0
        public void LengthComparison()
        {
            var source = new NGramSearch.SorensenDiceCoefficientIndex <string>();

            source.Add("first", "aaaa");
            source.Add("second", "bbbb");


            var result = source.Search("aaa");

            Assert.Single(result);
            Assert.Equal("first", result.First().Id);
            //Assert.Equal(3.0/4, result.First().Similarity, 8); // aaa contains 3, aaaa contains 4.
        }
Пример #2
0
        public void LengthComparison()
        {
            var germaniFirms = new NGramSearch.SorensenDiceCoefficientIndex <int>();

            germaniFirms.Add(1, "volkswagen ag");
            germaniFirms.Add(2, "daimler ag");
            germaniFirms.Add(3, "allianz se");
            germaniFirms.Add(4, "bmw ag");
            germaniFirms.Add(5, "siemens ag");
            germaniFirms.Add(6, "lange uhren gmbh");


            var result = germaniFirms.Search("bmw ag");

            Assert.All(result.Select(r => r.Id), item => (new[] { 1, 2, 4, 5 }).Contains(item)); // "ag" contains 1,2,4,5
            Assert.Equal(4, result.First().Id);                                                  // 4 must be first
            Assert.Equal(1, result.First().Similarity);                                          // "bmw ag" has 100% similarity

            result = germaniFirms.Search("bmw");

            Assert.Single(result);
            Assert.Equal(4, result.First().Id);
            // _bm bmw mw_ & _bm bmw mw_ w_a _ag ag_ : 2 * 3:intersection / (3 + 6) = 2 / 3
            Assert.Equal(2.0 / 3, result.First().Similarity, 8);
        }
        public void SorensenDiceEasyTest()
        {
            var index = new NGramSearch.SorensenDiceCoefficientIndex <int>();

            index.Add(1, "abcd");            // _ab abc bcd cd_

            var result = index.Search("ab"); // _ab ab_

            // 2 * 1 / (4 + 2) = 1 / 3
            Assert.Equal(1.0 / 3, result.Single().Similarity, 8);

            result = index.Search("abc"); // _ab abc bc_
            // 2 * 2  / (4 + 3) = 4 / 7
            Assert.Equal(4.0 / 7, result.Single().Similarity, 8);

            result = index.Search("abcd abcd"); // _ab abc bcd cd_ d_a _ab abc bcd cd_
            // 2 * 4 / (4 + 9) = 8 / 13
            Assert.Equal(8.0 / 13, result.Single().Similarity, 8);

            index.Add(2, "xyz xyz");      // _xy xyz yz_ z_x _xy xyz yz_
            result = index.Search("xyz"); // _xy xyz yz_
            // 2 * 3 / (7 + 3) = 0.6
            Assert.Equal(0.6, result.First().Similarity, 8);
        }