public BatchProcessBooks(BatchOptions opts) { this._options = opts; if (_options.VeryVerbose) { _options.Verbose = true; } _opdsClient = new OpdsClient(opts); }
public FetchFromOPDS(FetchOptions opts) { _options = opts; if (_options.VeryVerbose) { _options.Verbose = true; } _filteredCatalog.PreserveWhitespace = true; _opdsClient = new OpdsClient(opts); }
private void UpdateParseTables() { var bookDirs = ReadBloomBulkUploadLogFile(); GetBookInformationFromBookFolders(bookDirs, out Dictionary <string, string> instanceIdToTitle, out Dictionary <string, XmlDocument> instanceIdToOpds, out Dictionary <string, string> instanceIdToFolder); ParseClient parseClient = new ParseClient(_options.UploadUser, _options.UploadPassword); LoadAllBooks(parseClient); string importedFilter = "{\"importedBookSourceUrl\": {\"$regex\": \".\"}}"; IEnumerable <Book> bookList = parseClient.GetBooks(importedFilter, new[] { "uploader" }); var uploadDate = new Date(DateTime.Now.ToUniversalTime()); var updateJsonBase = String.Format("{{ \"{0}\":\"{1}\", \"{2}\":{3}, \"{4}\":{5}, \"{6}\":\"{7}\", \"{8}\": {9}", Book.kImporterNameField, "RoseGarden", // possibly unneeded, but loudly claim RoseGarden did the import Book.kImporterMajorVersionField, _majorVersion, // version stamp so we can update for new versions of RoseGarden Book.kImporterMinorVersionField, _minorVersion, "updateSource", "*****@*****.**", // very important so we don't add system:Incoming tag "lastUploaded", uploadDate.ToJson() // timestamp so we can check later for books modified on ODPS source ); foreach (var book in bookList) { if (instanceIdToTitle.TryGetValue(book.BookInstanceId, out string localTitle) && instanceIdToOpds.TryGetValue(book.BookInstanceId, out XmlDocument opdsEntry) && instanceIdToFolder.TryGetValue(book.BookInstanceId, out string folder) && !_previouslyLoadedBooks.Contains(folder)) // If we didn't reupload, don't try to update table. { var needUpdate = book.ImporterName != "RoseGarden" || book.ImporterMajorVersion != _majorVersion || book.ImporterMinorVersion != _minorVersion; if (localTitle != book.Title) { Console.WriteLine("WARNING: mismatch in titles from local to parse server: \"{0}\" vs \"{1}\"", localTitle, book.Title); } var updateJsonBldr = new StringBuilder(updateJsonBase); var updateTags = false; // Matching titles that vary in case or whitespace doesn't seem feasible using // parse queries. So we preload everything locally and index by a normalized // title to find matching titles. var matchingBooks = FindBooksWithMatchingTitle(Program.NormalizeToCompare(book.Title)); var related = new HashSet <Book>(); foreach (var oldBook in matchingBooks) { if (oldBook.ObjectId == book.ObjectId) { continue; // a better query would avoid this check } if (SameAuthor(book, oldBook) && SameBookshelf(book, oldBook)) { if (_options.VeryVerbose) { Console.WriteLine("DEBUG: found matching book for \"{0}\"", book.Title); } if (book.InCirculation != false && oldBook.InCirculation != false) { book.InCirculation = false; updateJsonBldr.Append(", \"inCirculation\":false"); needUpdate = true; } // add tag for librarian? if (!book.Tags.Contains("todo:check duplicate import")) { book.Tags.Add("todo:check duplicate import"); updateTags = true; } related.Add(oldBook); } } if (book.Tags != null) { if (_options.VeryVerbose) { foreach (var tag in book.Tags) { Console.WriteLine("DEBUG: initial parse books table tags: tag=\"{0}\"", tag); } } if (book.Tags.Contains("system:Incoming")) { updateTags = true; book.Tags.Remove("system:Incoming"); } var nsmgrOpds = OpdsClient.CreateNameSpaceManagerForOpdsDocument(opdsEntry); var levelNode = opdsEntry.SelectSingleNode("/a:feed/a:entry/lrmi:educationalAlignment[@alignmentType='readingLevel']", nsmgrOpds) as XmlElement; var level = levelNode?.GetAttribute("targetName"); if (!String.IsNullOrWhiteSpace(level)) { var levelTag = GetTagForLrmiReadingLevel(level); if (!book.Tags.Contains(levelTag)) { // This removal step may just be paranoid. But I think there should always be at most one level tag! foreach (var tag in book.Tags) { if (tag.StartsWith("level:", StringComparison.InvariantCulture)) { book.Tags.Remove(tag); break; } } updateTags = true; book.Tags.Add(levelTag); } } if (updateTags) { updateJsonBldr.Append(", \"tags\":["); var sep = ""; foreach (var tag in book.Tags) { updateJsonBldr.AppendFormat("{0}\"{1}\"", sep, tag); sep = ","; } updateJsonBldr.Append("]"); needUpdate = true; } } if (needUpdate) { if (_options.Verbose) { Console.WriteLine("INFO: updating bloomlibrary books table with RoseGarden importer values for {0}", book.Title); } updateJsonBldr.Append(" }"); if (_options.VeryVerbose) { Console.WriteLine("DEBUG: updateJson={0}", updateJsonBldr); } var response = parseClient.UpdateObject("books", book.ObjectId, updateJsonBldr.ToString()); if (response.StatusCode != System.Net.HttpStatusCode.OK) { Console.WriteLine("WARNING: updating the book table for \"{0}\" failed: {1}", book.Title, response.Content); } } if (related.Count > 0) { FixRelatedBooksTable(book, related, parseClient); } } } }
/// <summary> /// Check the books beneath the upload folder against those in BloomBulkUploadLog.txt in that folder, /// checking RoseGarden version numbers and upload timestamps against OPDS update timestamps. If /// any books that have already been uploaded actually do need to be updated, remove them from the /// BloomBulkUploadLog.txt file. /// Keep a record of which books remain in BloomBulkUploadLog.txt so that we don't try to update /// their parse records after the upload finishes. /// </summary> private void PrepareForUpload() { var bookDirs = ReadBloomBulkUploadLogFile(); foreach (var dir in bookDirs) { if (IsValidFolder(dir)) { _previouslyLoadedBooks.Add(dir); } } GetBookInformationFromBookFolders(_previouslyLoadedBooks, out Dictionary <string, string> instanceIdToTitle, out Dictionary <string, XmlDocument> instanceIdToOpds, out Dictionary <string, string> instanceIdToFolder); Program.GetVersionNumbers(out _majorVersion, out _minorVersion); ParseClient parseClient = new ParseClient(_options.UploadUser, _options.UploadPassword); string importedFilter = "{\"importedBookSourceUrl\": {\"$regex\": \".\"}}"; IEnumerable <Book> bookList = parseClient.GetBooks(importedFilter, new[] { "uploader" }); int updatedBooks = 0; var uploadedInstances = new HashSet <string>(); // Check for books that are newer and thus need to be re-uploaded. foreach (var book in bookList) { uploadedInstances.Add(book.BookInstanceId); if (instanceIdToTitle.TryGetValue(book.BookInstanceId, out string localTitle) && instanceIdToOpds.TryGetValue(book.BookInstanceId, out XmlDocument opdsEntry) && instanceIdToFolder.TryGetValue(book.BookInstanceId, out string dir)) { // First check if RoseGarden is a newer version: if so, updating the book is indicated. var needUpdate = book.ImporterName == "RoseGarden" && (book.ImporterMajorVersion < _majorVersion || (book.ImporterMajorVersion == _majorVersion && book.ImporterMinorVersion < _minorVersion)); if (!needUpdate) { // Check if the book source is newer than the uploaded book: if so, updating the book is indicated. var nsmgrOpds = OpdsClient.CreateNameSpaceManagerForOpdsDocument(opdsEntry); var updateNode = opdsEntry.SelectSingleNode("/a:feed/a:entry/a:updated", nsmgrOpds) as XmlElement; if (updateNode != null) { var bookUpdated = updateNode.InnerText.Trim(); needUpdate = string.Compare(book.LastUploaded.Iso, bookUpdated, StringComparison.InvariantCulture) < 0; } } if (needUpdate) { _previouslyLoadedBooks.Remove(dir); ++updatedBooks; if (_options.VeryVerbose) { Console.WriteLine("DEBUG: will re-upload \"{0}\" because it appears to be newer.", Path.GetFileName(dir)); } } } } // Check for books that have been deleted and thus need to be re-uploaded. foreach (var instance in instanceIdToFolder.Keys) { if (!uploadedInstances.Contains(instance)) { // Book no longer exists in the library: we need to reload it! var dir = instanceIdToFolder[instance]; _previouslyLoadedBooks.Remove(dir); ++updatedBooks; if (_options.VeryVerbose) { Console.WriteLine("DEBUG: will re-upload \"{0}\" because it does not exist in bloomlibrary.", Path.GetFileName(dir)); } } } if (updatedBooks > 0) { var logfile = GetBloomBulkUploadLogFilePath(); File.Delete(logfile); if (_previouslyLoadedBooks.Count > 0) { var lines = new List <string>(); lines.AddRange(_previouslyLoadedBooks); lines.Add(""); lines.Add(""); lines.Add("All finished!"); lines.Add("In order to repeat the uploading, this file will need to be deleted."); File.WriteAllLines(logfile, lines); } } }
private List <XmlElement> GetEntriesToProcess() { var catalog = (String.IsNullOrWhiteSpace(_options.LanguageName) && String.IsNullOrWhiteSpace(_options.Publisher)) ? _rootCatalog : _filteredCatalog; _nsmgr = OpdsClient.CreateNameSpaceManagerForOpdsDocument(catalog); var allEntries = catalog.DocumentElement.SelectNodes($"/a:feed/a:entry", _nsmgr).Cast <XmlElement>().ToList(); int majorVersion; int minorVersion; Program.GetVersionNumbers(out majorVersion, out minorVersion); var entriesToProcess = new List <XmlElement>(); foreach (var entry in allEntries) { var link = entry.SelectSingleNode("./a:link[contains(@rel, 'http://opds-spec.org/acquisition') and @type='application/epub+zip']", _nsmgr) as XmlElement; if (link == null) { continue; } var epubPath = link.GetAttribute("href"); if (String.IsNullOrWhiteSpace(epubPath)) { continue; } Book book; if (_bloomlibraryBooks.TryGetValue(epubPath, out book)) { // We have this book already, but is either the book or RoseGarden newer than before? bool needBook = book.ImporterMajorVersion < majorVersion || (book.ImporterMajorVersion == majorVersion && book.ImporterMinorVersion < minorVersion); if (needBook) { if (_options.VeryVerbose) { Console.WriteLine("DEBUG: \"{0}\" is already imported, but needs to be updated for a new version of RoseGarden.", book.Title); } } else { // The updated element should exist: it does in both the GDL and StoryWeaver catalog entries. // But we'll check a couple of other possible fields in the entry just in case. var timestampXml = entry.SelectSingleNode("./a:updated", _nsmgr) as XmlElement; if (timestampXml == null) { timestampXml = entry.SelectSingleNode("./a:published", _nsmgr) as XmlElement; } if (timestampXml == null) { timestampXml = entry.SelectSingleNode("./dc:created", _nsmgr) as XmlElement; } var catalogUpdated = timestampXml?.InnerText; if (!String.IsNullOrWhiteSpace(catalogUpdated)) { needBook = book.LastUploaded == null || string.CompareOrdinal(book.LastUploaded.Iso, catalogUpdated) < 0; if (needBook && _options.VeryVerbose) { Console.WriteLine("DEBUG: \"{0}\" is already imported, but needs to be updated.", book.Title); } } } if (!needBook && _options.ForceConvert && _options.VeryVerbose) { Console.WriteLine("DEBUG: \"{0}\" is imported and apparently up to date, but will be converted afresh", book.Title); } if (needBook || _options.ForceConvert) { entriesToProcess.Add(entry); } else { if (_options.VeryVerbose) { Console.WriteLine("DEBUG: \"{0}\" is already imported and still up to date with RoseGarden and the catalog entry.", book.Title); } } } else { // We don't have this book yet: add it to the collection for processing. entriesToProcess.Add(entry); } } return(entriesToProcess); }