public override async Task<bool> ExecuteAsync() { var dbFilename = DatabaseCacheFilePath; bool catalogUpdated = false; string filename = null; string registryCacheDirectory = null; string registryCachePath = null; string registryCacheFilePath = null; // Use a semaphore instead of a mutex because await may return to a thread other than the calling thread. using (var semaphore = GetDatabaseSemaphore()) { // Wait until file is downloaded/parsed if another download is already in session. // Allows user to open multiple npm windows and show progress bar without file-in-use errors. bool success = await Task.Run(() => semaphore.WaitOne(TimeSpan.FromMinutes(5))); if (!success) { // Return immediately so that the user can explicitly decide to refresh on failure. return false; } Uri registryUrl = await GetRegistryUrl(); OnOutputLogged(string.Format(Resources.InfoRegistryUrl, registryUrl)); try { DbVersion version = null; RegistryInfo registryInfo = null; RegistryFileMapping registryFileMapping = null; Directory.CreateDirectory(Path.GetDirectoryName(dbFilename)); using (var db = new SQLiteConnection(dbFilename)) { // prevent errors from occurring when table doesn't exist db.CreateCatalogTableIfNotExists(); version = db.Table<DbVersion>().FirstOrDefault(); registryFileMapping = db.Table<RegistryFileMapping>().FirstOrDefault(info => info.RegistryUrl == registryUrl.ToString()); } registryCacheDirectory = registryFileMapping != null ? registryFileMapping.DbFileLocation : Guid.NewGuid().ToString(); registryCachePath = Path.Combine(CachePath, registryCacheDirectory); registryCacheFilePath = Path.Combine(registryCachePath, RegistryCacheFilename); Directory.CreateDirectory(Path.GetDirectoryName(registryCacheFilePath)); if (File.Exists(registryCacheFilePath)) { using (var registryDb = new SQLiteConnection(registryCacheFilePath)) { // prevent errors from occurring when table doesn't exist registryDb.CreateRegistryTableIfNotExists(); registryInfo = registryDb.Table<RegistryInfo>().FirstOrDefault(); } } bool correctDatabaseSchema = version != null && version.Id == _databaseSchemaVersion; bool incrementalUpdate = correctDatabaseSchema && _forceDownload && registryInfo != null && registryInfo.Revision > 0; bool fullUpdate = correctDatabaseSchema && (registryInfo == null || registryInfo.Revision <= 0); if (!correctDatabaseSchema) { OnOutputLogged(Resources.InfoCatalogUpgrade); SafeDeleteFolder(CachePath); CreateCatalogDatabaseAndInsertEntries(dbFilename, registryUrl, registryCacheDirectory); filename = await UpdatePackageCache(registryUrl, CachePath); catalogUpdated = true; } else if (incrementalUpdate) { filename = await UpdatePackageCache(registryUrl, registryCachePath, registryInfo.Revision); catalogUpdated = true; } else if (fullUpdate) { CreateCatalogDatabaseAndInsertEntries(dbFilename, registryUrl, registryCacheDirectory); filename = await UpdatePackageCache(registryUrl, registryCachePath); catalogUpdated = true; } if (catalogUpdated) { var fileInfo = new FileInfo(filename); OnOutputLogged(String.Format(Resources.InfoReadingBytesFromPackageCache, fileInfo.Length, filename, fileInfo.LastWriteTime)); using (var reader = new StreamReader(filename)) { await Task.Run(() => ParseResultsAndAddToDatabase(reader, registryCacheFilePath, registryUrl.ToString())); } } using (var db = new SQLiteConnection(registryCacheFilePath)) { db.CreateRegistryTableIfNotExists(); ResultsCount = db.Table<CatalogEntry>().Count(); } } catch (Exception ex) { if (ex is StackOverflowException || ex is OutOfMemoryException || ex is ThreadAbortException || ex is AccessViolationException) { throw; } // assume the results are corrupted OnOutputLogged(ex.ToString()); throw; } finally { if (ResultsCount == null) { OnOutputLogged(string.Format(Resources.DownloadOrParsingFailed, CachePath)); SafeDeleteFolder(registryCacheDirectory); } else if (ResultsCount <= 0) { // Database file exists, but is corrupt. Delete database, so that we can download the file next time arround. OnOutputLogged(string.Format(Resources.DatabaseCorrupt, dbFilename)); SafeDeleteFolder(registryCacheDirectory); } semaphore.Release(); } } LastRefreshed = File.GetLastWriteTime(registryCacheFilePath); OnOutputLogged(String.Format(Resources.InfoCurrentTime, DateTime.Now)); OnOutputLogged(String.Format(Resources.InfoLastRefreshed, LastRefreshed)); if (ResultsCount != null) { OnOutputLogged(String.Format(Resources.InfoNumberOfResults, ResultsCount)); } return true; }
internal void ParseResultsAndAddToDatabase(TextReader reader, string dbFilename, string registryUrl) { Directory.CreateDirectory(Path.GetDirectoryName(dbFilename)); using (var db = new SQLiteConnection(dbFilename)) { db.RunInTransaction(() => { db.CreateRegistryTableIfNotExists(); using (var jsonReader = new JsonTextReader(reader)) { while (jsonReader.Read()) { if (JsonToken.PropertyName != jsonReader.TokenType) { continue; } if ((string)jsonReader.Value == "_updated") { jsonReader.Read(); db.InsertOrReplace(new RegistryInfo() { RegistryUrl = registryUrl, Revision = (long)jsonReader.Value, UpdatedOn = DateTime.Now }); continue; } var builder = new NodeModuleBuilder(); JToken token = null; #if DEV14_OR_LATER try { #endif token = JToken.ReadFrom(jsonReader); #if DEV14_OR_LATER } catch (JsonReaderException) { // Reached end of file, so continue. break; } #endif var module = token.FirstOrDefault(); while (module != null) { try { builder.Name = (string)module["name"]; if (string.IsNullOrEmpty(builder.Name)) { continue; } builder.AppendToDescription((string)module["description"] ?? string.Empty); var time = module["time"]; if (time != null) { builder.AppendToDate((string)time["modified"]); } var distTags = module["dist-tags"]; if (distTags != null) { var latestVersion = distTags .OfType<JProperty>() .Where(v => (string)v.Name == "latest") .Select(v => (string)v.Value) .FirstOrDefault(); if (!string.IsNullOrEmpty(latestVersion)) { try { builder.LatestVersion = SemverVersion.Parse(latestVersion); } catch (SemverVersionFormatException) { OnOutputLogged(String.Format(Resources.InvalidPackageSemVersion, latestVersion, builder.Name)); } } } var versions = module["versions"]; if (versions != null) { builder.AvailableVersions = GetVersions(versions); } AddKeywords(builder, module["keywords"]); AddAuthor(builder, module["author"]); AddHomepage(builder, module["homepage"]); var package = builder.Build(); InsertCatalogEntry(db, package); } catch (InvalidOperationException) { // Occurs if a JValue appears where we expect JProperty } catch (ArgumentException) { OnOutputLogged(string.Format(Resources.ParsingError, builder.Name)); if (!string.IsNullOrEmpty(builder.Name)) { var package = builder.Build(); InsertCatalogEntry(db, package); } } builder.Reset(); #if DEV14_OR_LATER try { #endif token = JToken.ReadFrom(jsonReader); #if DEV14_OR_LATER } catch (JsonReaderException) { // Reached end of file, so continue. break; } #endif module = token.FirstOrDefault(); } } } // FTS doesn't support INSERT OR REPLACE. This is the most efficient way to bypass that limitation. db.Execute("DELETE FROM CatalogEntry WHERE docid NOT IN (SELECT MAX(docid) FROM CatalogEntry GROUP BY Name)"); }); } }
internal void ParseResultsAndAddToDatabase(TextReader reader, string dbFilename, string registryUrl) { Directory.CreateDirectory(Path.GetDirectoryName(dbFilename)); using (var db = new SQLiteConnection(dbFilename)) { db.RunInTransaction(() => { db.CreateRegistryTableIfNotExists(); using (var jsonReader = new JsonTextReader(reader)) { /* The schema seems to have changed over time. The first format we need to handle is an object literal. It starts with an "_updated" property, with a value of the timestamp it was retrived, and then a property for each package, with a name of the package name, and a value which is on object literal representing the package info. An example downloaded may start: { "_updated": 1413573404788, "unlink-empty-files": { "name": "unlink-empty-files", "description": "given a directory, unlink (remove) all files with a length of 0", "dist-tags": { "latest": "1.0.1" }, "maintainers": [ { "name": "kesla", etc. The other format is an array literal, where each element is an object literal for a package, similar to the value of the properties above, for example: [ {"name":"008-somepackage","description":"Test Package","dist-tags":{"latest":"1.1.1"}.. , {"name":"01-simple","description":"That is the first app in order to study the ..." , etc. In this second format, there is no "_updated" property with a timestamp, and the 'Date' timestamp from the HTTP request for the data is used instead. The NPM code that handles the payload seems to be written in a way to handle both formats See https://github.com/npm/npm/blob/2.x-release/lib/cache/update-index.js#L87 */ jsonReader.Read(); switch (jsonReader.TokenType) { case JsonToken.StartObject: ReadPackagesFromObject(db, jsonReader, registryUrl); break; case JsonToken.StartArray: // The array format doesn't contain the "_update" field, // so create a rough timestamp. Use the time from 30 mins // ago (to set it before the download request started), // converted to a JavaScript value (milliseconds since // start of 1970) var timestamp = DateTime.UtcNow .Subtract(new DateTime(1970, 1, 1, 0, 30, 0, DateTimeKind.Utc)) .TotalMilliseconds; ReadPackagesFromArray(db, jsonReader); db.InsertOrReplace(new RegistryInfo() { RegistryUrl = registryUrl, Revision = (long)timestamp, UpdatedOn = DateTime.Now }); break; default: throw new JsonException("Unexpected JSON token at start of NPM catalog data"); } } // FTS doesn't support INSERT OR REPLACE. This is the most efficient way to bypass that limitation. db.Execute("DELETE FROM CatalogEntry WHERE docid NOT IN (SELECT MAX(docid) FROM CatalogEntry GROUP BY Name)"); }); } }