Beispiel #1
0
        private static SimhashResult ComputeHash <THash, TRes>(this Simhash self, string content, THash hash)
            where THash : struct, IHash <TRes>
            where TRes : IHashResult <TRes>
        {
            var builder  = new StringBuilder(content.Length);
            var shingles = Shingling.Tokenize(content, builder);

            return(self.ComputeHash <THash, TRes>(shingles, hash));
        }
Beispiel #2
0
        private void Chinese <THash, TRes>(THash hash)
            where THash : IHash <TRes>
            where TRes : IHashResult <TRes>
        {
            var simhash = new SimhashLib.Simhash();

            var h1 = simhash.ComputeHash <THash, TRes>("你好 世界!  呼噜。", hash);

            var h2 = simhash.ComputeHash <THash, TRes>("你好,世界呼噜", hash);

            Assert.Equal(0, h1.Distance(h2));

            var h4 = simhash.ComputeHash <THash, TRes>("How are you? I Am fine. ablar ablar xyz blar blar blar blar blar blar blar Thanks.", hash);
            var h5 = simhash.ComputeHash <THash, TRes>("How are you i am fine.ablar ablar xyz blar blar blar blar blar blar blar than", hash);
            var h6 = simhash.ComputeHash <THash, TRes>("How are you i am fine.ablar ablar xyz blar blar blar blar blar blar blar thank", hash);

            Assert.True(h4.Distance(h6) < 3);
            Assert.True(h5.Distance(h6) < 3);
        }
Beispiel #3
0
        public void Distance <THash, TRes>(THash hash)
            where THash : IHash <TRes>
            where TRes : IHashResult <TRes>
        {
            var simhash = new SimhashLib.Simhash();

            var hash1 = simhash.ComputeHash <THash, TRes>("How are you? I AM fine. Thanks. And you?", hash);
            var hash2 = simhash.ComputeHash <THash, TRes>("How old are you? :-) i am fine. Thanks. And you?", hash);
            var distA = hash1.Distance(hash2);
            var dist2 = hash1.Distance(hash2);

            Assert.True(distA > 0);
            Assert.True(dist2 > 0);

            var distB = hash2.Distance(hash2);
            var dist3 = hash2.Distance(hash2);

            Assert.Equal(0, distB);
            Assert.Equal(0, dist3);

            var hash3 = simhash.ComputeHash <THash, TRes>("1", hash);

            Assert.NotEqual(0, hash3.Distance(hash2));
        }
Beispiel #4
0
        private void Short <THash, TRes>(THash hash)
            where THash : IHash <TRes>
            where TRes : IHashResult <TRes>
        {
            var simhash = new SimhashLib.Simhash();

            var ss = new List <string>()
            {
                "aa", "aaa", "aaaa", "aaaab", "aaaaabb", "aaaaabbb"
            };

            var shs = ss.Select(s => simhash.ComputeHash <THash, TRes>(s, hash)).ToList();

            foreach (var sh1 in shs)
            {
                foreach (var sh2 in shs.Where(sh2 => !sh1.Equals(sh2)))
                {
                    Assert.NotEqual(sh1, sh2);
                }
            }
        }
Beispiel #5
0
 public static SimhashResult ComputeHashByMd5(this Simhash self, string content)
 => self.ComputeHash <Md5Hash, Md5HashResult>(content, new Md5Hash());
Beispiel #6
0
 public static SimhashResult ComputeHashByJenkins(this Simhash self, List <string> tokens, uint seed = default)
 => self.ComputeHash <JenkinsHash, JenkinsHashResult>(tokens, new JenkinsHash(seed));
Beispiel #7
0
 public static SimhashResult ComputeHashByJenkins(this Simhash self, string content, uint seed = default)
 => self.ComputeHash <JenkinsHash, JenkinsHashResult>(content, new JenkinsHash(seed));
Beispiel #8
0
 public static SimhashResult ComputeHashByMurmurHash3(this Simhash self, List <string> tokens)
 => self.ComputeHash <MurmurHash3, MurmurHash3Result>(tokens, new MurmurHash3());
Beispiel #9
0
 public static SimhashResult ComputeHashByMurmurHash3(this Simhash self, string content)
 => self.ComputeHash <MurmurHash3, MurmurHash3Result>(content, new MurmurHash3());
Beispiel #10
0
 public static SimhashResult ComputeHashByMd5(this Simhash self, List <string> tokens)
 => self.ComputeHash <Md5Hash, Md5HashResult>(tokens, new Md5Hash());