public void MeHas3BiGramTest() { var index = new NGramSearch.IntersectionCountIndex <long>(2); index.Add(1, "me"); index.Add(2, "you"); var result = index.Search("me"); Assert.Single(result); Assert.Equal(1, result.Single().Id); Assert.Equal(3, result.Single().Similarity); // _m me e_ }
public void CheckTotalFourgramCount() { var index = new NGramSearch.IntersectionCountIndex <int>(4); index.Add(1, "abcd"); Assert.Equal(3, index.GetAllNGrams().Count()); index.Add(2, "defg"); Assert.Equal(6, index.GetAllNGrams().Count()); index.Add(3, "abc"); // new is "abc_" Assert.Equal(7, index.GetAllNGrams().Count()); }
public void CheckTotalTrigramCount() { var index = new NGramSearch.IntersectionCountIndex <int>(3); index.Add(1, "abcd"); Assert.Equal(4, index.GetAllNGrams().Count()); index.Add(2, "defg"); Assert.Equal(8, index.GetAllNGrams().Count()); index.Add(3, "abc"); // new is "bc_" Assert.Equal(9, index.GetAllNGrams().Count()); }
public void CheckTotalBigramCount() { var index = new NGramSearch.IntersectionCountIndex <int>(2); index.Add(1, "abcd"); Assert.Equal(5, index.GetAllNGrams().Count()); index.Add(2, "defg"); Assert.Equal(10, index.GetAllNGrams().Count()); index.Add(3, "abc"); // new is "c_" Assert.Equal(11, index.GetAllNGrams().Count()); }
public void SimpleCount() { var source = new NGramSearch.IntersectionCountIndex <string>(); source.Add("first", "aaaa"); source.Add("second", "bbbb"); var result = source.Search("aaa"); Assert.Single(result); Assert.Equal("first", result.First().Id); Assert.Equal(3, result.First().Similarity); // _aa aaa aa_ }
public void RobertHas5FourGrams() { var actors = new NGramSearch.IntersectionCountIndex <int>(4); actors.Add(1, "johnny depp"); actors.Add(2, "al pacino"); actors.Add(3, "robert de niro"); actors.Add(4, "kevin spacey"); var result = actors.Search("robert"); Assert.Single(result); Assert.Equal(3, result.First().Id); Assert.Equal(5, result.First().Similarity); // _rob robe ober bert ert_ }
public void CheckTooShortWord() { var index = new NGramSearch.IntersectionCountIndex <int>(4); index.Add(1, "a"); Assert.Empty(index.GetAllNGrams()); }
public void CheckNgramCount() { var germaniFirms = new NGramSearch.IntersectionCountIndex <int>(); germaniFirms.Add(1, "volkswagen ag"); germaniFirms.Add(2, "daimler ag"); germaniFirms.Add(3, "allianz se"); germaniFirms.Add(4, "bmw ag"); germaniFirms.Add(5, "siemens ag"); germaniFirms.Add(6, "lange uhren gmbh"); var result = germaniFirms.GetAllNGrams() .OrderByDescending(x => x.TotalPhraseNGramCount) .ThenBy(x => x.NGram) .ToList(); Assert.Equal(4, result.First().TotalPhraseNGramCount); Assert.Equal(" ag", result[0].NGram); Assert.Equal(4, result[1].TotalPhraseNGramCount); Assert.Equal("ag ", result[1].NGram); Assert.Equal(4, result[1].TotalPhraseNGramCount); }