// this function will incrementally build an index from the gallery using a high water mark stored in the commit metadata // this function is useful for building a fresh index as in that case it is more efficient than diff-ing approach public static void RebuildIndex(string sqlConnectionString, Lucene.Net.Store.Directory directory, FrameworksList frameworks, TextWriter log = null, PerfEventTracker perfTracker = null) { perfTracker = perfTracker ?? new PerfEventTracker(); log = log ?? DefaultTraceWriter; Stopwatch sw = new Stopwatch(); sw.Start(); using (perfTracker.TrackEvent("RebuildIndex", String.Empty)) { // Empty the index, we're rebuilding CreateNewEmptyIndex(directory); var projectFxs = frameworks.Load(); log.WriteLine("get curated feeds by PackageRegistration"); IDictionary <int, IEnumerable <string> > feeds = GalleryExport.GetFeedsByPackageRegistration(sqlConnectionString, log, verbose: false); int highestPackageKey = 0; while (true) { log.WriteLine("get the checksums from the gallery"); IDictionary <int, int> checksums = GalleryExport.FetchGalleryChecksums(sqlConnectionString, highestPackageKey); log.WriteLine("get packages from gallery where the Package.Key > {0}", highestPackageKey); List <Package> packages = GalleryExport.GetPublishedPackagesSince(sqlConnectionString, highestPackageKey, log, verbose: false); if (packages.Count == 0) { break; } log.WriteLine("associate the feeds and checksum data with each packages"); List <IndexDocumentData> indexDocumentData = MakeIndexDocumentData(packages, feeds, checksums); highestPackageKey = indexDocumentData.Max(d => d.Package.Key); AddPackagesToIndex(indexDocumentData, directory, log, projectFxs, perfTracker); // Summarize performance // (Save some time by not bothering if the log is "null") if (!ReferenceEquals(TextWriter.Null, log) && !ReferenceEquals(PerfEventTracker.Null, perfTracker)) { SummarizePerf(log, perfTracker); } } } SummarizePerf(log, perfTracker); sw.Stop(); log.WriteLine("all done, took {0}", sw.Elapsed); }
public override void Execute() { var manager = GetSearcherManager(); IDictionary <int, int> databaseChecksums = GalleryExport.FetchGalleryChecksums(SqlConnectionString); Log.WriteLine("fetched {0} keys from database", databaseChecksums.Count); Tuple <int, int> minMax = GalleryExport.FindMinMaxKey(databaseChecksums); Log.WriteLine("min = {0}, max = {1}", minMax.Item1, minMax.Item2); // For now, use the in-memory Searcher client. But eventually this will use the original Search Service call below IDictionary <int, int> index = ParseRangeResult( Searcher.KeyRangeQuery(manager, minMax.Item1, minMax.Item2)); Log.WriteLine("fetched {0} keys from index", index.Count); List <int> adds = new List <int>(); List <int> updates = new List <int>(); List <int> deletes = new List <int>(); SortIntoAddsUpdateDeletes(databaseChecksums, index, adds, updates, deletes); Log.WriteLine("{0} adds", adds.Count); Log.WriteLine("{0} updates", updates.Count); Log.WriteLine("{0} deletes", deletes.Count); if (adds.Count == 0 && updates.Count == 0) { return; } IDictionary <int, IEnumerable <string> > feeds = GalleryExport.GetFeedsByPackageRegistration(SqlConnectionString, Log, verbose: false); IDictionary <int, IndexDocumentData> packages = PackageIndexing.LoadDocumentData(SqlConnectionString, adds, updates, deletes, feeds, databaseChecksums, Log); Lucene.Net.Store.Directory directory = manager.Directory; Func <int, IndexDocumentData> packageFetcher = (key) => { IndexDocumentData knownDoc; if (packages.TryGetValue(key, out knownDoc)) { return(knownDoc); } else { // We're modifying a different document var pkgs = GalleryExport.GetPackages(SqlConnectionString, new List <int>() { key }, Log, verbose: false); var docs = PackageIndexing.MakeIndexDocumentData(pkgs, feeds, databaseChecksums); packages[key] = docs[0]; return(docs[0]); } }; var perfTracker = new PerfEventTracker(); PackageIndexing.UpdateIndex(WhatIf, adds, updates, deletes, packageFetcher, directory, Log, perfTracker, manager.Frameworks.Load()); }