private void InitializeFunctionsArray() { for (long i = 0; i < K; i++) { _functionsArray[i] = HashFunctionGenerator.Generate(Range); } }
/// <summary>Initializes a new instance of the <see cref="LocalitySensitiveHashingCalculator"/> class.</summary> /// <param name="documents">The documents to compare similarity.</param> /// <param name="k">The number of MinHash functions.</param> /// <param name="r">The r value/the row size of b band</param> public LocalitySensitiveHashingCalculator(IList <Document> documents, int k, int r) { Documents = documents; UniversalSet = documents.SelectMany(d => { var words = d.Words.Select(x => x.Id); return(words); }).Distinct().ToArray(); HashFunctions = HashFunctionGenerator.Generate(k, UniversalSet.Length); _r = r; }
public double[,] GetMinHashSignatureMatrix(int[,] boolMatrix) { HashFunctions = HashFunctionGenerator.Generate(NumberOfHashFunctions, boolMatrix.GetLength(0)); var minHashStopwatch = Stopwatch.StartNew(); double[,] minHashes = new double[HashFunctions.Length, boolMatrix.GetLength(1)]; // Initialise all minhashes as infinity for (int i = 0; i < minHashes.GetLength(0); i++) { for (int j = 0; j < minHashes.GetLength(1); j++) { minHashes[i, j] = double.PositiveInfinity; } } // Iterate through boolean matrix for (int docIndex = 0; docIndex < boolMatrix.GetLength(1); docIndex++) { for (int elIndex = 0; elIndex < boolMatrix.GetLength(0); elIndex++) { var rowNumber = elIndex + 1; if (boolMatrix[elIndex, docIndex] == 1) { for (int hashFuncIndex = 0; hashFuncIndex < HashFunctions.Length; hashFuncIndex++) { var hashValue = HashFunctions[hashFuncIndex](rowNumber); if (hashValue < minHashes[hashFuncIndex, docIndex]) { minHashes[hashFuncIndex, docIndex] = hashValue; } } } } } Console.WriteLine($"MinHash Signatures computed in: {minHashStopwatch.ElapsedTicks / TimeSpan.TicksPerMillisecond}ms"); minHashStopwatch.Stop(); return(minHashes); }