private static void ProcessCompatibleVersion(string packageId, PerfEventTracker perfTracker, IDictionary <string, FacetedDocument> candidateNewFacets, FacetedDocument doc, FrameworkName projectFx) { using (perfTracker.TrackEvent("ProcessCompatibleVersion", "{0} v{1} (fx:{2})", packageId, doc.Version, projectFx)) { // Add compatible facet doc.AddFacet(Facets.Compatible(projectFx)); // If listed, process it against latest versions if (doc.Data.Package.Listed) { // Check it against the current latest prerelease and swap latests if necessary string latestPreFacet = Facets.LatestPrereleaseVersion(projectFx); string latestStableFacet = Facets.LatestStableVersion(projectFx); if (!candidateNewFacets.ContainsKey(latestPreFacet)) { candidateNewFacets[latestPreFacet] = doc; } // If this package is a stable version, do the same for latest stable if (String.IsNullOrEmpty(doc.Version.SpecialVersion) && !candidateNewFacets.ContainsKey(latestStableFacet)) { candidateNewFacets[latestStableFacet] = doc; } } } }
private static List <FacetedDocument> CollectExistingDocuments(PerfEventTracker perfTracker, IndexReader reader, string id) { var docs = reader.TermDocs(new Term("Id", id.ToLowerInvariant())); var documents = new List <FacetedDocument>(); using (perfTracker.TrackEvent("GetExistingDocuments", id)) { while (docs.Next()) { documents.Add(new FacetedDocument(reader.Document(docs.Doc))); } } return(documents); }
private static void SummarizePerf(TextWriter log, PerfEventTracker perfTracker) { log.WriteLine("Perf Summary:"); foreach (var evt in perfTracker.GetEvents()) { var summary = perfTracker.GetSummary(evt); log.WriteLine(" {0} Avg:{5:0.00}ms Max: {1:0.00}ms({2}) Min: {3:0.00}ms({4})", evt, summary.Max.Duration.TotalMilliseconds, summary.Max.Payload, summary.Min.Duration.TotalMilliseconds, summary.Min.Payload, summary.Average.TotalMilliseconds); } perfTracker.Clear(); }
private static void ApplyDeletes(List <int> packageKeys, Func <int, IndexDocumentData> fetch, Lucene.Net.Store.Directory directory, TextWriter log, PerfEventTracker perfTracker, IEnumerable <FrameworkName> projectFxs) { log.WriteLine("ApplyDeletes"); try { PackageQueryParser queryParser = new PackageQueryParser(Lucene.Net.Util.Version.LUCENE_30, "Id", new PackageAnalyzer()); // Collect all the packages var packages = packageKeys.Select(k => fetch(k)); using (IndexWriter indexWriter = CreateIndexWriter(directory, false)) { var dirtyDocuments = new List <FacetedDocument>(); IDictionary <string, string> commitUserData; using (var reader = indexWriter.GetReader()) { commitUserData = reader.CommitUserData; // Group by Id foreach (var group in packages.GroupBy(p => p.Package.PackageRegistration.Id)) { // Collect existing documents IEnumerable <FacetedDocument> existing = CollectExistingDocuments(perfTracker, indexWriter.GetReader(), group.Key); // Remove the documents we need to remove foreach (var package in group) { Query query = NumericRangeQuery.NewIntRange("Key", package.Package.Key, package.Package.Key, true, true); indexWriter.DeleteDocuments(query); existing = existing.Where(d => !SemanticVersion.Parse(package.Package.NormalizedVersion).Equals(d.Version)); } // Recalculate facets UpdateFacets(group.Key, existing.ToList(), projectFxs, perfTracker); // Add dirty documents dirtyDocuments.AddRange(existing.Where(d => d.Dirty)); } } // Process dirty documents WriteDirtyDocuments(dirtyDocuments, indexWriter, perfTracker); commitUserData["count"] = packageKeys.Count.ToString(); commitUserData["commit-description"] = "delete"; log.WriteLine("Commit {0} deletes", packageKeys.Count); indexWriter.Commit(commitUserData); } } catch (Exception) { } }
private static void ApplyAdds(List <int> packageKeys, Func <int, IndexDocumentData> fetch, Lucene.Net.Store.Directory directory, TextWriter log, PerfEventTracker perfTracker, IEnumerable <FrameworkName> projectFxs) { log.WriteLine("ApplyAdds"); // Collect all the packages var packages = packageKeys.Select(k => fetch(k)); using (IndexWriter indexWriter = CreateIndexWriter(directory, false)) { IDictionary <string, string> commitUserData; var dirtyDocuments = new List <FacetedDocument>(); using (var reader = indexWriter.GetReader()) { commitUserData = reader.CommitUserData; foreach (var group in packages.GroupBy(p => p.Package.PackageRegistration.Id)) { var newDirtyDocs = DetermineDirtyDocuments(projectFxs, perfTracker, reader, group.Key, group); dirtyDocuments.AddRange(newDirtyDocs); } } WriteDirtyDocuments(dirtyDocuments, indexWriter, perfTracker); string lastEditsIndexTime = commitUserData["last-edits-index-time"]; if (lastEditsIndexTime == null) { // this should never happen but if it did Lucene would throw lastEditsIndexTime = DateTime.MinValue.ToString(); } log.WriteLine("Commit {0} adds", packageKeys.Count); indexWriter.Commit(PackageIndexing.CreateCommitMetadata(lastEditsIndexTime, packageKeys.Max(), packageKeys.Count, "add")); } }
public static void UpdateIndex(bool whatIf, List <int> adds, List <int> updates, List <int> deletes, Func <int, IndexDocumentData> fetch, Lucene.Net.Store.Directory directory, TextWriter log, PerfEventTracker perfTracker, IEnumerable <FrameworkName> projectFxs) { log = log ?? DefaultTraceWriter; if (whatIf) { log.WriteLine("WhatIf mode"); Apply(adds, keys => WhatIf_ApplyAdds(keys, fetch, directory, log)); Apply(updates, keys => WhatIf_ApplyUpdates(keys, fetch, directory, log)); Apply(deletes, keys => WhatIf_ApplyDeletes(keys, fetch, directory, log)); } else { Apply(adds, keys => ApplyAdds(keys, fetch, directory, log, perfTracker, projectFxs)); Apply(updates, keys => ApplyUpdates(keys, fetch, directory, log, perfTracker, projectFxs)); Apply(deletes, keys => ApplyDeletes(keys, fetch, directory, log, perfTracker, projectFxs)); } }
// this function will incrementally build an index from the gallery using a high water mark stored in the commit metadata // this function is useful for building a fresh index as in that case it is more efficient than diff-ing approach public static void RebuildIndex(string sqlConnectionString, Lucene.Net.Store.Directory directory, FrameworksList frameworks, TextWriter log = null, PerfEventTracker perfTracker = null) { perfTracker = perfTracker ?? new PerfEventTracker(); log = log ?? DefaultTraceWriter; Stopwatch sw = new Stopwatch(); sw.Start(); using (perfTracker.TrackEvent("RebuildIndex", String.Empty)) { // Empty the index, we're rebuilding CreateNewEmptyIndex(directory); var projectFxs = frameworks.Load(); log.WriteLine("get curated feeds by PackageRegistration"); IDictionary <int, IEnumerable <string> > feeds = GalleryExport.GetFeedsByPackageRegistration(sqlConnectionString, log, verbose: false); int highestPackageKey = 0; while (true) { log.WriteLine("get the checksums from the gallery"); IDictionary <int, int> checksums = GalleryExport.FetchGalleryChecksums(sqlConnectionString, highestPackageKey); log.WriteLine("get packages from gallery where the Package.Key > {0}", highestPackageKey); List <Package> packages = GalleryExport.GetPublishedPackagesSince(sqlConnectionString, highestPackageKey, log, verbose: false); if (packages.Count == 0) { break; } log.WriteLine("associate the feeds and checksum data with each packages"); List <IndexDocumentData> indexDocumentData = MakeIndexDocumentData(packages, feeds, checksums); highestPackageKey = indexDocumentData.Max(d => d.Package.Key); AddPackagesToIndex(indexDocumentData, directory, log, projectFxs, perfTracker); // Summarize performance // (Save some time by not bothering if the log is "null") if (!ReferenceEquals(TextWriter.Null, log) && !ReferenceEquals(PerfEventTracker.Null, perfTracker)) { SummarizePerf(log, perfTracker); } } } SummarizePerf(log, perfTracker); sw.Stop(); log.WriteLine("all done, took {0}", sw.Elapsed); }
private static void UpdateFacets(string packageId, IList <FacetedDocument> documents, IEnumerable <FrameworkName> projectFxs, PerfEventTracker perfTracker) { using (perfTracker.TrackEvent("UpdateFacets", "{0} ({1} items)", packageId, documents.Count)) { // Collect all the current latest versions into dictionaries IDictionary <string, List <FacetedDocument> > existingFacets = new Dictionary <string, List <FacetedDocument> >(StringComparer.OrdinalIgnoreCase); using (perfTracker.TrackEvent("FindExistingFacets", packageId)) { foreach (var document in documents.Where(d => !d.IsNew)) { foreach (var projectFx in projectFxs) { AddToExistingFacetsList(existingFacets, document, projectFx, Facets.LatestStableVersion(projectFx)); AddToExistingFacetsList(existingFacets, document, projectFx, Facets.LatestPrereleaseVersion(projectFx)); } } } IDictionary <string, FacetedDocument> candidateNewFacets = new Dictionary <string, FacetedDocument>(); // Process the new documents var newDocs = documents.Where(d => d.IsNew).OrderByDescending(d => d.Version).ToList(); documents = null; // Done with the master list of all documents using (perfTracker.TrackEvent("DetermineNewLatestVersions", packageId)) { foreach (var doc in newDocs) { if (!String.IsNullOrEmpty(doc.Version.SpecialVersion)) { doc.AddFacet(Facets.PrereleaseVersion); } if (doc.Data.Package.Listed) { doc.AddFacet(Facets.Listed); } var packageFxs = doc.Data.Package.SupportedFrameworks .Select(fx => { using (perfTracker.TrackEvent("ParseFrameworkName", fx.TargetFramework)) { return(VersionUtility.ParseFrameworkName(fx.TargetFramework)); } }) .ToList(); // Process each target framework foreach (var projectFx in projectFxs) { if (projectFx == FrameworksList.AnyFramework || VersionUtility.IsCompatible(projectFx, packageFxs)) { ProcessCompatibleVersion(packageId, perfTracker, candidateNewFacets, doc, projectFx); } } } } // Adjust facets as needed using (perfTracker.TrackEvent("AdjustProjectFxes", packageId)) { foreach (var projectFx in projectFxs) { using (perfTracker.TrackEvent("AdjustProjectFx", "{0} ({1})", packageId, projectFx.FullName)) { UpdateLatestVersionFacet(existingFacets, candidateNewFacets, Facets.LatestStableVersion(projectFx)); UpdateLatestVersionFacet(existingFacets, candidateNewFacets, Facets.LatestPrereleaseVersion(projectFx)); } } } } }
private static IEnumerable <FacetedDocument> DetermineDirtyDocuments(IEnumerable <FrameworkName> projectFxs, PerfEventTracker perfTracker, IndexReader reader, string id, IEnumerable <IndexDocumentData> data) { using (perfTracker.TrackEvent("Processdata", id)) { // Get all documents matching the ID of this data. var documents = CollectExistingDocuments(perfTracker, reader, id); // Add the new documents using (perfTracker.TrackEvent("CreateNewDocuments", id)) { foreach (var package in data) { documents.Add(new FacetedDocument(package)); } } // Process the facets UpdateFacets(id, documents, projectFxs, perfTracker); return(documents.Where(d => d.Dirty)); } }
private static void WriteDirtyDocuments(List <FacetedDocument> dirtyDocs, IndexWriter indexWriter, PerfEventTracker perfTracker) { // Delete dirty documents and flush foreach (var dirtyDoc in dirtyDocs.Where(d => d.Dirty)) { indexWriter.DeleteDocuments(dirtyDoc.GetQuery()); } using (perfTracker.TrackEvent("FlushingDeletes", "")) { indexWriter.Flush(triggerMerge: false, flushDocStores: true, flushDeletes: true); } // (Re-)add dirty documents foreach (var dirtyDoc in dirtyDocs) { using (perfTracker.TrackEvent("AddDocument", "{0} v{1}", dirtyDoc.Id, dirtyDoc.Version)) { indexWriter.AddDocument(CreateLuceneDocument(dirtyDoc)); } } }
private static void AddToIndex(Lucene.Net.Store.Directory directory, List <IndexDocumentData> rangeToIndex, TextWriter log, IEnumerable <FrameworkName> projectFxs, PerfEventTracker perfTracker) { log.WriteLine("begin AddToIndex"); int highestPackageKey = -1; var groups = rangeToIndex.GroupBy(d => d.Package.PackageRegistration.Id).ToList(); // Collect documents to change var dirtyDocs = new List <FacetedDocument>(); using (var reader = IndexReader.Open(directory, readOnly: true)) using (perfTracker.TrackEvent("CalculateChanges", "")) { foreach (var group in groups) { var newDirtyDocs = DetermineDirtyDocuments(projectFxs, perfTracker, reader, group.Key, group); // (Re-)Add any dirty documents to the index dirtyDocs.AddRange(newDirtyDocs); } } using (IndexWriter indexWriter = CreateIndexWriter(directory, create: false)) { WriteDirtyDocuments(dirtyDocs, indexWriter, perfTracker); highestPackageKey = rangeToIndex.Max(i => i.Package.Key); log.WriteLine("about to commit {0} packages", rangeToIndex.Count); IDictionary <string, string> commitUserData = indexWriter.GetReader().CommitUserData; string lastEditsIndexTime = commitUserData["last-edits-index-time"]; if (lastEditsIndexTime == null) { // this should never happen but if it did Lucene would throw lastEditsIndexTime = DateTime.MinValue.ToString(); } indexWriter.Commit(PackageIndexing.CreateCommitMetadata(lastEditsIndexTime, highestPackageKey, rangeToIndex.Count, "add")); log.WriteLine("commit done"); } log.WriteLine("end AddToIndex"); }
private static void AddPackagesToIndex(List <IndexDocumentData> indexDocumentData, Lucene.Net.Store.Directory directory, TextWriter log, IEnumerable <FrameworkName> projectFxs, PerfEventTracker perfTracker) { log.WriteLine("About to add {0} packages", indexDocumentData.Count); for (int index = 0; index < indexDocumentData.Count; index += MaxDocumentsPerCommit) { int count = Math.Min(MaxDocumentsPerCommit, indexDocumentData.Count - index); List <IndexDocumentData> rangeToIndex = indexDocumentData.GetRange(index, count); AddToIndex(directory, rangeToIndex, log, projectFxs, perfTracker); } }
public override void Execute() { var manager = GetSearcherManager(); IDictionary <int, int> databaseChecksums = GalleryExport.FetchGalleryChecksums(SqlConnectionString); Log.WriteLine("fetched {0} keys from database", databaseChecksums.Count); Tuple <int, int> minMax = GalleryExport.FindMinMaxKey(databaseChecksums); Log.WriteLine("min = {0}, max = {1}", minMax.Item1, minMax.Item2); // For now, use the in-memory Searcher client. But eventually this will use the original Search Service call below IDictionary <int, int> index = ParseRangeResult( Searcher.KeyRangeQuery(manager, minMax.Item1, minMax.Item2)); Log.WriteLine("fetched {0} keys from index", index.Count); List <int> adds = new List <int>(); List <int> updates = new List <int>(); List <int> deletes = new List <int>(); SortIntoAddsUpdateDeletes(databaseChecksums, index, adds, updates, deletes); Log.WriteLine("{0} adds", adds.Count); Log.WriteLine("{0} updates", updates.Count); Log.WriteLine("{0} deletes", deletes.Count); if (adds.Count == 0 && updates.Count == 0) { return; } IDictionary <int, IEnumerable <string> > feeds = GalleryExport.GetFeedsByPackageRegistration(SqlConnectionString, Log, verbose: false); IDictionary <int, IndexDocumentData> packages = PackageIndexing.LoadDocumentData(SqlConnectionString, adds, updates, deletes, feeds, databaseChecksums, Log); Lucene.Net.Store.Directory directory = manager.Directory; Func <int, IndexDocumentData> packageFetcher = (key) => { IndexDocumentData knownDoc; if (packages.TryGetValue(key, out knownDoc)) { return(knownDoc); } else { // We're modifying a different document var pkgs = GalleryExport.GetPackages(SqlConnectionString, new List <int>() { key }, Log, verbose: false); var docs = PackageIndexing.MakeIndexDocumentData(pkgs, feeds, databaseChecksums); packages[key] = docs[0]; return(docs[0]); } }; var perfTracker = new PerfEventTracker(); PackageIndexing.UpdateIndex(WhatIf, adds, updates, deletes, packageFetcher, directory, Log, perfTracker, manager.Frameworks.Load()); }