public HashSet <long> GetNearDups(SimhashResult simhash) { /* * "simhash" is an instance of Simhash * return a list of obj_id, which is in type of long (for now) */ if (Simhash.FpSize != _fpSize) { throw new Exception(); } var ans = new HashSet <long>(); foreach (var key in GetEnumerableKeys(simhash)) { if (!_bucket.TryGetValue(key, out var dups)) { continue; } foreach (var dup in dups) { var parts = dup.Split(','); var fp = Convert.ToUInt64(parts[0]); var objId = Convert.ToInt64(parts[1]); var sim2 = new SimhashResult(fp); var d = simhash.Distance(sim2); if (d <= _kDistance) { ans.Add(objId); } } } return(ans); }