Esempio n. 1
0
 public BatchProcessBooks(BatchOptions opts)
 {
     this._options = opts;
     if (_options.VeryVerbose)
     {
         _options.Verbose = true;
     }
     _opdsClient = new OpdsClient(opts);
 }
Esempio n. 2
0
 public FetchFromOPDS(FetchOptions opts)
 {
     _options = opts;
     if (_options.VeryVerbose)
     {
         _options.Verbose = true;
     }
     _filteredCatalog.PreserveWhitespace = true;
     _opdsClient = new OpdsClient(opts);
 }
Esempio n. 3
0
        private void UpdateParseTables()
        {
            var bookDirs = ReadBloomBulkUploadLogFile();

            GetBookInformationFromBookFolders(bookDirs, out Dictionary <string, string> instanceIdToTitle, out Dictionary <string, XmlDocument> instanceIdToOpds,
                                              out Dictionary <string, string> instanceIdToFolder);
            ParseClient parseClient = new ParseClient(_options.UploadUser, _options.UploadPassword);

            LoadAllBooks(parseClient);
            string             importedFilter = "{\"importedBookSourceUrl\": {\"$regex\": \".\"}}";
            IEnumerable <Book> bookList       = parseClient.GetBooks(importedFilter, new[] { "uploader" });
            var uploadDate     = new Date(DateTime.Now.ToUniversalTime());
            var updateJsonBase = String.Format("{{ \"{0}\":\"{1}\", \"{2}\":{3}, \"{4}\":{5}, \"{6}\":\"{7}\", \"{8}\": {9}",
                                               Book.kImporterNameField, "RoseGarden",          // possibly unneeded, but loudly claim RoseGarden did the import
                                               Book.kImporterMajorVersionField, _majorVersion, // version stamp so we can update for new versions of RoseGarden
                                               Book.kImporterMinorVersionField, _minorVersion,
                                               "updateSource", "*****@*****.**", // very important so we don't add system:Incoming tag
                                               "lastUploaded", uploadDate.ToJson()             // timestamp so we can check later for books modified on ODPS source
                                               );

            foreach (var book in bookList)
            {
                if (instanceIdToTitle.TryGetValue(book.BookInstanceId, out string localTitle) &&
                    instanceIdToOpds.TryGetValue(book.BookInstanceId, out XmlDocument opdsEntry) &&
                    instanceIdToFolder.TryGetValue(book.BookInstanceId, out string folder) &&
                    !_previouslyLoadedBooks.Contains(folder))                           // If we didn't reupload, don't try to update table.
                {
                    var needUpdate = book.ImporterName != "RoseGarden" || book.ImporterMajorVersion != _majorVersion || book.ImporterMinorVersion != _minorVersion;
                    if (localTitle != book.Title)
                    {
                        Console.WriteLine("WARNING: mismatch in titles from local to parse server: \"{0}\" vs \"{1}\"", localTitle, book.Title);
                    }
                    var updateJsonBldr = new StringBuilder(updateJsonBase);
                    var updateTags     = false;
                    // Matching titles that vary in case or whitespace doesn't seem feasible using
                    // parse queries.  So we preload everything locally and index by a normalized
                    // title to find matching titles.
                    var matchingBooks = FindBooksWithMatchingTitle(Program.NormalizeToCompare(book.Title));
                    var related       = new HashSet <Book>();
                    foreach (var oldBook in matchingBooks)
                    {
                        if (oldBook.ObjectId == book.ObjectId)
                        {
                            continue;                               // a better query would avoid this check
                        }
                        if (SameAuthor(book, oldBook) && SameBookshelf(book, oldBook))
                        {
                            if (_options.VeryVerbose)
                            {
                                Console.WriteLine("DEBUG: found matching book for \"{0}\"", book.Title);
                            }
                            if (book.InCirculation != false && oldBook.InCirculation != false)
                            {
                                book.InCirculation = false;
                                updateJsonBldr.Append(", \"inCirculation\":false");
                                needUpdate = true;
                            }
                            // add tag for librarian?
                            if (!book.Tags.Contains("todo:check duplicate import"))
                            {
                                book.Tags.Add("todo:check duplicate import");
                                updateTags = true;
                            }
                            related.Add(oldBook);
                        }
                    }
                    if (book.Tags != null)
                    {
                        if (_options.VeryVerbose)
                        {
                            foreach (var tag in book.Tags)
                            {
                                Console.WriteLine("DEBUG: initial parse books table tags: tag=\"{0}\"", tag);
                            }
                        }
                        if (book.Tags.Contains("system:Incoming"))
                        {
                            updateTags = true;
                            book.Tags.Remove("system:Incoming");
                        }
                        var nsmgrOpds = OpdsClient.CreateNameSpaceManagerForOpdsDocument(opdsEntry);
                        var levelNode = opdsEntry.SelectSingleNode("/a:feed/a:entry/lrmi:educationalAlignment[@alignmentType='readingLevel']", nsmgrOpds) as XmlElement;
                        var level     = levelNode?.GetAttribute("targetName");
                        if (!String.IsNullOrWhiteSpace(level))
                        {
                            var levelTag = GetTagForLrmiReadingLevel(level);
                            if (!book.Tags.Contains(levelTag))
                            {
                                // This removal step may just be paranoid.  But I think there should always be at most one level tag!
                                foreach (var tag in book.Tags)
                                {
                                    if (tag.StartsWith("level:", StringComparison.InvariantCulture))
                                    {
                                        book.Tags.Remove(tag);
                                        break;
                                    }
                                }
                                updateTags = true;
                                book.Tags.Add(levelTag);
                            }
                        }
                        if (updateTags)
                        {
                            updateJsonBldr.Append(", \"tags\":[");
                            var sep = "";
                            foreach (var tag in book.Tags)
                            {
                                updateJsonBldr.AppendFormat("{0}\"{1}\"", sep, tag);
                                sep = ",";
                            }
                            updateJsonBldr.Append("]");
                            needUpdate = true;
                        }
                    }
                    if (needUpdate)
                    {
                        if (_options.Verbose)
                        {
                            Console.WriteLine("INFO: updating bloomlibrary books table with RoseGarden importer values for {0}", book.Title);
                        }
                        updateJsonBldr.Append(" }");
                        if (_options.VeryVerbose)
                        {
                            Console.WriteLine("DEBUG: updateJson={0}", updateJsonBldr);
                        }
                        var response = parseClient.UpdateObject("books", book.ObjectId, updateJsonBldr.ToString());
                        if (response.StatusCode != System.Net.HttpStatusCode.OK)
                        {
                            Console.WriteLine("WARNING: updating the book table for \"{0}\" failed: {1}", book.Title, response.Content);
                        }
                    }

                    if (related.Count > 0)
                    {
                        FixRelatedBooksTable(book, related, parseClient);
                    }
                }
            }
        }
Esempio n. 4
0
        /// <summary>
        /// Check the books beneath the upload folder against those in BloomBulkUploadLog.txt in that folder,
        /// checking RoseGarden version numbers and upload timestamps against OPDS update timestamps.  If
        /// any books that have already been uploaded actually do need to be updated, remove them from the
        /// BloomBulkUploadLog.txt file.
        /// Keep a record of which books remain in BloomBulkUploadLog.txt so that we don't try to update
        /// their parse records after the upload finishes.
        /// </summary>
        private void PrepareForUpload()
        {
            var bookDirs = ReadBloomBulkUploadLogFile();

            foreach (var dir in bookDirs)
            {
                if (IsValidFolder(dir))
                {
                    _previouslyLoadedBooks.Add(dir);
                }
            }
            GetBookInformationFromBookFolders(_previouslyLoadedBooks, out Dictionary <string, string> instanceIdToTitle, out Dictionary <string, XmlDocument> instanceIdToOpds,
                                              out Dictionary <string, string> instanceIdToFolder);
            Program.GetVersionNumbers(out _majorVersion, out _minorVersion);
            ParseClient        parseClient    = new ParseClient(_options.UploadUser, _options.UploadPassword);
            string             importedFilter = "{\"importedBookSourceUrl\": {\"$regex\": \".\"}}";
            IEnumerable <Book> bookList       = parseClient.GetBooks(importedFilter, new[] { "uploader" });
            int updatedBooks      = 0;
            var uploadedInstances = new HashSet <string>();

            // Check for books that are newer and thus need to be re-uploaded.
            foreach (var book in bookList)
            {
                uploadedInstances.Add(book.BookInstanceId);
                if (instanceIdToTitle.TryGetValue(book.BookInstanceId, out string localTitle) &&
                    instanceIdToOpds.TryGetValue(book.BookInstanceId, out XmlDocument opdsEntry) &&
                    instanceIdToFolder.TryGetValue(book.BookInstanceId, out string dir))
                {
                    // First check if RoseGarden is a newer version: if so, updating the book is indicated.
                    var needUpdate = book.ImporterName == "RoseGarden" &&
                                     (book.ImporterMajorVersion < _majorVersion ||
                                      (book.ImporterMajorVersion == _majorVersion && book.ImporterMinorVersion < _minorVersion));
                    if (!needUpdate)
                    {
                        // Check if the book source is newer than the uploaded book: if so, updating the book is indicated.
                        var nsmgrOpds  = OpdsClient.CreateNameSpaceManagerForOpdsDocument(opdsEntry);
                        var updateNode = opdsEntry.SelectSingleNode("/a:feed/a:entry/a:updated", nsmgrOpds) as XmlElement;
                        if (updateNode != null)
                        {
                            var bookUpdated = updateNode.InnerText.Trim();
                            needUpdate = string.Compare(book.LastUploaded.Iso, bookUpdated, StringComparison.InvariantCulture) < 0;
                        }
                    }
                    if (needUpdate)
                    {
                        _previouslyLoadedBooks.Remove(dir);
                        ++updatedBooks;
                        if (_options.VeryVerbose)
                        {
                            Console.WriteLine("DEBUG: will re-upload \"{0}\" because it appears to be newer.", Path.GetFileName(dir));
                        }
                    }
                }
            }
            // Check for books that have been deleted and thus need to be re-uploaded.
            foreach (var instance in instanceIdToFolder.Keys)
            {
                if (!uploadedInstances.Contains(instance))
                {
                    // Book no longer exists in the library: we need to reload it!
                    var dir = instanceIdToFolder[instance];
                    _previouslyLoadedBooks.Remove(dir);
                    ++updatedBooks;
                    if (_options.VeryVerbose)
                    {
                        Console.WriteLine("DEBUG: will re-upload \"{0}\" because it does not exist in bloomlibrary.", Path.GetFileName(dir));
                    }
                }
            }
            if (updatedBooks > 0)
            {
                var logfile = GetBloomBulkUploadLogFilePath();
                File.Delete(logfile);
                if (_previouslyLoadedBooks.Count > 0)
                {
                    var lines = new List <string>();
                    lines.AddRange(_previouslyLoadedBooks);
                    lines.Add("");
                    lines.Add("");
                    lines.Add("All finished!");
                    lines.Add("In order to repeat the uploading, this file will need to be deleted.");
                    File.WriteAllLines(logfile, lines);
                }
            }
        }
Esempio n. 5
0
        private List <XmlElement> GetEntriesToProcess()
        {
            var catalog = (String.IsNullOrWhiteSpace(_options.LanguageName) && String.IsNullOrWhiteSpace(_options.Publisher)) ? _rootCatalog : _filteredCatalog;

            _nsmgr = OpdsClient.CreateNameSpaceManagerForOpdsDocument(catalog);
            var allEntries = catalog.DocumentElement.SelectNodes($"/a:feed/a:entry", _nsmgr).Cast <XmlElement>().ToList();
            int majorVersion;
            int minorVersion;

            Program.GetVersionNumbers(out majorVersion, out minorVersion);
            var entriesToProcess = new List <XmlElement>();

            foreach (var entry in allEntries)
            {
                var link = entry.SelectSingleNode("./a:link[contains(@rel, 'http://opds-spec.org/acquisition') and @type='application/epub+zip']", _nsmgr) as XmlElement;
                if (link == null)
                {
                    continue;
                }
                var epubPath = link.GetAttribute("href");
                if (String.IsNullOrWhiteSpace(epubPath))
                {
                    continue;
                }
                Book book;
                if (_bloomlibraryBooks.TryGetValue(epubPath, out book))
                {
                    // We have this book already, but is either the book or RoseGarden newer than before?
                    bool needBook = book.ImporterMajorVersion < majorVersion ||
                                    (book.ImporterMajorVersion == majorVersion && book.ImporterMinorVersion < minorVersion);
                    if (needBook)
                    {
                        if (_options.VeryVerbose)
                        {
                            Console.WriteLine("DEBUG: \"{0}\" is already imported, but needs to be updated for a new version of RoseGarden.", book.Title);
                        }
                    }
                    else
                    {
                        // The updated element should exist: it does in both the GDL and StoryWeaver catalog entries.
                        // But we'll check a couple of other possible fields in the entry just in case.
                        var timestampXml = entry.SelectSingleNode("./a:updated", _nsmgr) as XmlElement;
                        if (timestampXml == null)
                        {
                            timestampXml = entry.SelectSingleNode("./a:published", _nsmgr) as XmlElement;
                        }
                        if (timestampXml == null)
                        {
                            timestampXml = entry.SelectSingleNode("./dc:created", _nsmgr) as XmlElement;
                        }
                        var catalogUpdated = timestampXml?.InnerText;
                        if (!String.IsNullOrWhiteSpace(catalogUpdated))
                        {
                            needBook = book.LastUploaded == null || string.CompareOrdinal(book.LastUploaded.Iso, catalogUpdated) < 0;
                            if (needBook && _options.VeryVerbose)
                            {
                                Console.WriteLine("DEBUG: \"{0}\" is already imported, but needs to be updated.", book.Title);
                            }
                        }
                    }
                    if (!needBook && _options.ForceConvert && _options.VeryVerbose)
                    {
                        Console.WriteLine("DEBUG: \"{0}\" is imported and apparently up to date, but will be converted afresh", book.Title);
                    }
                    if (needBook || _options.ForceConvert)
                    {
                        entriesToProcess.Add(entry);
                    }
                    else
                    {
                        if (_options.VeryVerbose)
                        {
                            Console.WriteLine("DEBUG: \"{0}\" is already imported and still up to date with RoseGarden and the catalog entry.", book.Title);
                        }
                    }
                }
                else
                {
                    // We don't have this book yet: add it to the collection for processing.
                    entriesToProcess.Add(entry);
                }
            }
            return(entriesToProcess);
        }