public static void Test() { HashSet <String> set1 = new HashSet <String>(); set1.Add("FRANCISCO"); set1.Add("MISSION"); set1.Add("SAN"); HashSet <String> set2 = new HashSet <String>(); set2.Add("SAN"); set2.Add("FRANCISCO"); set2.Add("MISSION"); set2.Add("USA"); MinHash minHash = new MinHash(set1.Count + set2.Count); Console.Out.WriteLine(minHash.Similarity(set1, set2)); }
public int FindClosest(int setIndex, MinHash minHasher) { //First find potential "close" candidates HashSet <int> potentialSetIndexes = new HashSet <int>(); for (int b = 0; b < m_numBands; b++) { //combine all 5 MH values and then hash get its hashcode int sum = 0; for (int i = 0; i < ROWSINBAND; i++) { sum += m_minHashMatrix[setIndex, b *ROWSINBAND + i]; } foreach (var i in m_lshBuckets[sum]) { potentialSetIndexes.Add(i); } } //From the candidates compute similarity using min-hash and find the index of the closet set int minIndex = -1; double similarityOfMinIndex = 0.0; foreach (int candidateIndex in potentialSetIndexes.Where(i => i != setIndex)) { // TODO: FIX this //double similarity = minHasher.ComputeSimilarity(m_minHashMatrix, setIndex, candidateIndex); //double similarity = minHasher.Similarity(m_minHashMatrix, setIndex, candidateIndex); double similarity = 0.0; if (similarity > similarityOfMinIndex) { similarityOfMinIndex = similarity; minIndex = candidateIndex; } } return(minIndex); }