Beispiel #1
0
        public static void Test()
        {
            HashSet <String> set1 = new HashSet <String>();

            set1.Add("FRANCISCO");
            set1.Add("MISSION");
            set1.Add("SAN");

            HashSet <String> set2 = new HashSet <String>();

            set2.Add("SAN");
            set2.Add("FRANCISCO");
            set2.Add("MISSION");
            set2.Add("USA");

            MinHash minHash = new MinHash(set1.Count + set2.Count);

            Console.Out.WriteLine(minHash.Similarity(set1, set2));
        }
Beispiel #2
0
        public int FindClosest(int setIndex, MinHash minHasher)
        {
            //First find potential "close" candidates
            HashSet <int> potentialSetIndexes = new HashSet <int>();

            for (int b = 0; b < m_numBands; b++)
            {
                //combine all 5 MH values and then hash get its hashcode
                int sum = 0;

                for (int i = 0; i < ROWSINBAND; i++)
                {
                    sum += m_minHashMatrix[setIndex, b *ROWSINBAND + i];
                }

                foreach (var i in m_lshBuckets[sum])
                {
                    potentialSetIndexes.Add(i);
                }
            }

            //From the candidates compute similarity using min-hash and find the index of the closet set
            int    minIndex             = -1;
            double similarityOfMinIndex = 0.0;

            foreach (int candidateIndex in potentialSetIndexes.Where(i => i != setIndex))
            {
                // TODO: FIX this
                //double similarity = minHasher.ComputeSimilarity(m_minHashMatrix, setIndex, candidateIndex);
                //double similarity = minHasher.Similarity(m_minHashMatrix, setIndex, candidateIndex);
                double similarity = 0.0;
                if (similarity > similarityOfMinIndex)
                {
                    similarityOfMinIndex = similarity;
                    minIndex             = candidateIndex;
                }
            }

            return(minIndex);
        }