private static SimhashResult ComputeHash <THash, TRes>(this Simhash self, string content, THash hash) where THash : struct, IHash <TRes> where TRes : IHashResult <TRes> { var builder = new StringBuilder(content.Length); var shingles = Shingling.Tokenize(content, builder); return(self.ComputeHash <THash, TRes>(shingles, hash)); }
private void Chinese <THash, TRes>(THash hash) where THash : IHash <TRes> where TRes : IHashResult <TRes> { var simhash = new SimhashLib.Simhash(); var h1 = simhash.ComputeHash <THash, TRes>("你好 世界! 呼噜。", hash); var h2 = simhash.ComputeHash <THash, TRes>("你好,世界呼噜", hash); Assert.Equal(0, h1.Distance(h2)); var h4 = simhash.ComputeHash <THash, TRes>("How are you? I Am fine. ablar ablar xyz blar blar blar blar blar blar blar Thanks.", hash); var h5 = simhash.ComputeHash <THash, TRes>("How are you i am fine.ablar ablar xyz blar blar blar blar blar blar blar than", hash); var h6 = simhash.ComputeHash <THash, TRes>("How are you i am fine.ablar ablar xyz blar blar blar blar blar blar blar thank", hash); Assert.True(h4.Distance(h6) < 3); Assert.True(h5.Distance(h6) < 3); }
public void Distance <THash, TRes>(THash hash) where THash : IHash <TRes> where TRes : IHashResult <TRes> { var simhash = new SimhashLib.Simhash(); var hash1 = simhash.ComputeHash <THash, TRes>("How are you? I AM fine. Thanks. And you?", hash); var hash2 = simhash.ComputeHash <THash, TRes>("How old are you? :-) i am fine. Thanks. And you?", hash); var distA = hash1.Distance(hash2); var dist2 = hash1.Distance(hash2); Assert.True(distA > 0); Assert.True(dist2 > 0); var distB = hash2.Distance(hash2); var dist3 = hash2.Distance(hash2); Assert.Equal(0, distB); Assert.Equal(0, dist3); var hash3 = simhash.ComputeHash <THash, TRes>("1", hash); Assert.NotEqual(0, hash3.Distance(hash2)); }
private void Short <THash, TRes>(THash hash) where THash : IHash <TRes> where TRes : IHashResult <TRes> { var simhash = new SimhashLib.Simhash(); var ss = new List <string>() { "aa", "aaa", "aaaa", "aaaab", "aaaaabb", "aaaaabbb" }; var shs = ss.Select(s => simhash.ComputeHash <THash, TRes>(s, hash)).ToList(); foreach (var sh1 in shs) { foreach (var sh2 in shs.Where(sh2 => !sh1.Equals(sh2))) { Assert.NotEqual(sh1, sh2); } } }
public static SimhashResult ComputeHashByMd5(this Simhash self, string content) => self.ComputeHash <Md5Hash, Md5HashResult>(content, new Md5Hash());
public static SimhashResult ComputeHashByJenkins(this Simhash self, List <string> tokens, uint seed = default) => self.ComputeHash <JenkinsHash, JenkinsHashResult>(tokens, new JenkinsHash(seed));
public static SimhashResult ComputeHashByJenkins(this Simhash self, string content, uint seed = default) => self.ComputeHash <JenkinsHash, JenkinsHashResult>(content, new JenkinsHash(seed));
public static SimhashResult ComputeHashByMurmurHash3(this Simhash self, List <string> tokens) => self.ComputeHash <MurmurHash3, MurmurHash3Result>(tokens, new MurmurHash3());
public static SimhashResult ComputeHashByMurmurHash3(this Simhash self, string content) => self.ComputeHash <MurmurHash3, MurmurHash3Result>(content, new MurmurHash3());
public static SimhashResult ComputeHashByMd5(this Simhash self, List <string> tokens) => self.ComputeHash <Md5Hash, Md5HashResult>(tokens, new Md5Hash());