Example #1
0
 private void InitializeFunctionsArray()
 {
     for (long i = 0; i < K; i++)
     {
         _functionsArray[i] = HashFunctionGenerator.Generate(Range);
     }
 }
        /// <summary>Initializes a new instance of the <see cref="LocalitySensitiveHashingCalculator"/> class.</summary>
        /// <param name="documents">The documents to compare similarity.</param>
        /// <param name="k">The number of MinHash functions.</param>
        /// <param name="r">The r value/the row size of b band</param>
        public LocalitySensitiveHashingCalculator(IList <Document> documents, int k, int r)
        {
            Documents    = documents;
            UniversalSet = documents.SelectMany(d =>
            {
                var words = d.Words.Select(x => x.Id);
                return(words);
            }).Distinct().ToArray();

            HashFunctions = HashFunctionGenerator.Generate(k, UniversalSet.Length);

            _r = r;
        }
        public double[,] GetMinHashSignatureMatrix(int[,] boolMatrix)
        {
            HashFunctions = HashFunctionGenerator.Generate(NumberOfHashFunctions, boolMatrix.GetLength(0));

            var minHashStopwatch = Stopwatch.StartNew();

            double[,] minHashes = new double[HashFunctions.Length, boolMatrix.GetLength(1)];

            // Initialise all minhashes as infinity
            for (int i = 0; i < minHashes.GetLength(0); i++)
            {
                for (int j = 0; j < minHashes.GetLength(1); j++)
                {
                    minHashes[i, j] = double.PositiveInfinity;
                }
            }

            // Iterate through boolean matrix
            for (int docIndex = 0; docIndex < boolMatrix.GetLength(1); docIndex++)
            {
                for (int elIndex = 0; elIndex < boolMatrix.GetLength(0); elIndex++)
                {
                    var rowNumber = elIndex + 1;

                    if (boolMatrix[elIndex, docIndex] == 1)
                    {
                        for (int hashFuncIndex = 0; hashFuncIndex < HashFunctions.Length; hashFuncIndex++)
                        {
                            var hashValue = HashFunctions[hashFuncIndex](rowNumber);

                            if (hashValue < minHashes[hashFuncIndex, docIndex])
                            {
                                minHashes[hashFuncIndex, docIndex] = hashValue;
                            }
                        }
                    }
                }
            }
            Console.WriteLine($"MinHash Signatures computed in: {minHashStopwatch.ElapsedTicks / TimeSpan.TicksPerMillisecond}ms");
            minHashStopwatch.Stop();

            return(minHashes);
        }