public void LengthComparison() { var germaniFirms = new NGramSearch.SorensenDiceCoefficientIndex <int>(); germaniFirms.Add(1, "volkswagen ag"); germaniFirms.Add(2, "daimler ag"); germaniFirms.Add(3, "allianz se"); germaniFirms.Add(4, "bmw ag"); germaniFirms.Add(5, "siemens ag"); germaniFirms.Add(6, "lange uhren gmbh"); var result = germaniFirms.Search("bmw ag"); Assert.All(result.Select(r => r.Id), item => (new[] { 1, 2, 4, 5 }).Contains(item)); // "ag" contains 1,2,4,5 Assert.Equal(4, result.First().Id); // 4 must be first Assert.Equal(1, result.First().Similarity); // "bmw ag" has 100% similarity result = germaniFirms.Search("bmw"); Assert.Single(result); Assert.Equal(4, result.First().Id); // _bm bmw mw_ & _bm bmw mw_ w_a _ag ag_ : 2 * 3:intersection / (3 + 6) = 2 / 3 Assert.Equal(2.0 / 3, result.First().Similarity, 8); }
public void LengthComparison() { var source = new NGramSearch.SorensenDiceCoefficientIndex <string>(); source.Add("first", "aaaa"); source.Add("second", "bbbb"); var result = source.Search("aaa"); Assert.Single(result); Assert.Equal("first", result.First().Id); //Assert.Equal(3.0/4, result.First().Similarity, 8); // aaa contains 3, aaaa contains 4. }
public void SorensenDiceEasyTest() { var index = new NGramSearch.SorensenDiceCoefficientIndex <int>(); index.Add(1, "abcd"); // _ab abc bcd cd_ var result = index.Search("ab"); // _ab ab_ // 2 * 1 / (4 + 2) = 1 / 3 Assert.Equal(1.0 / 3, result.Single().Similarity, 8); result = index.Search("abc"); // _ab abc bc_ // 2 * 2 / (4 + 3) = 4 / 7 Assert.Equal(4.0 / 7, result.Single().Similarity, 8); result = index.Search("abcd abcd"); // _ab abc bcd cd_ d_a _ab abc bcd cd_ // 2 * 4 / (4 + 9) = 8 / 13 Assert.Equal(8.0 / 13, result.Single().Similarity, 8); index.Add(2, "xyz xyz"); // _xy xyz yz_ z_x _xy xyz yz_ result = index.Search("xyz"); // _xy xyz yz_ // 2 * 3 / (7 + 3) = 0.6 Assert.Equal(0.6, result.First().Similarity, 8); }