private static void BuildTitleMinHashes(List <Listing> listings, string job_id) { var universe = new Dictionary <string, int>(); var wordId = 0; var mh = new MinHash(200000, minHashCount); var singleItemProgress = ProgressManager.CalculateLoopIncrement(listings.Count(), 0.2M); // Build ngrams for each listing (if not done already) and save to universe of ngrams foreach (var listing in listings.ToList()) { if (!listing.ngrams_description.Any()) { listing.ngrams_description = mh.GetProfile(listing.description, nGramLength); } foreach (var ngram in listing.ngrams_description.Keys) { if (!universe.ContainsKey(ngram)) { universe[ngram] = wordId++; } } ProgressManager.IncrementJobPercentBy(job_id, singleItemProgress); } singleItemProgress = ProgressManager.CalculateLoopIncrement(listings.Count(), 0.2M); mh = new MinHash(universe.Count, minHashCount); foreach (var listing in listings) { if (listing.minhash_description.Any()) { continue; } // Set word ID in each document foreach (var ngram in listing.ngrams_description.Keys) { listing.word_ids_description.Add(universe[ngram]); } // Calculate min hash for each listing listing.minhash_description = mh.GetMinHash(listing.word_ids_description); ProgressManager.IncrementJobPercentBy(job_id, singleItemProgress); } }