/// <summary> /// Returns the number of books read; clears out the original books and saves. /// </summary> /// <returns></returns> public static async Task <int> ReadDirAsync(BookDataContext bookdb) { var picker = new FolderPicker() { SuggestedStartLocation = PickerLocationId.DocumentsLibrary, }; picker.FileTypeFilter.Add(".rdf"); var folder = await picker.PickSingleFolderAsync(); if (folder == null) { return(-1); } NextIndexLogged = LogNIndex; NRead = 0; CommonQueries.BookRemoveAll(bookdb); await ReadDirAsyncFolder(bookdb, folder); CommonQueries.BookSaveChanges(bookdb); var totlog = logsb.ToString(); return(NRead); }
public static void DownloadedBookEnsureFileMarkedAsDownloaded(BookDataContext bookdb, string bookId, string folderPath, string filename) { NQueries++; var book = BookGet(bookdb, bookId); if (book == null) { App.Error($"ERROR: trying to ensure that {bookId} is downloaded, but it's not a valid book"); return; } var dd = book.DownloadData; if (dd == null) { dd = new DownloadData() { BookId = bookId, FilePath = folderPath, FileName = filename, CurrFileStatus = DownloadData.FileStatus.Downloaded, DownloadDate = DateTimeOffset.Now, }; book.DenormDownloadDate = dd.DownloadDate.ToUnixTimeSeconds(); CommonQueries.DownloadedBookAdd(bookdb, dd, CommonQueries.ExistHandling.IfNotExists); CommonQueries.BookSaveChanges(bookdb); } else if (dd.CurrFileStatus != DownloadData.FileStatus.Downloaded) { dd.FilePath = folderPath; dd.CurrFileStatus = DownloadData.FileStatus.Downloaded; BookSaveChanges(bookdb); } }
public static void BookNoteSave(BookDataContext bookdb, UserNote note) { var bn = CommonQueries.BookNotesFind(bookdb, note.BookId); if (bn == null) { bn = new BookNotes(); bn.BookId = note.BookId; CommonQueries.BookNotesAdd(bookdb, bn, CommonQueries.ExistHandling.IfNotExists); bn = CommonQueries.BookNotesFind(bookdb, note.BookId); } if (note.Id == 0) // Hasn't been saved before. The id is 0. { bn.Notes.Add(note); } CommonQueries.BookSaveChanges(bookdb); }
public static BookNavigationData BookNavigationDataEnsure(BookDataContext bookdb, BookData bookData) { var nd = CommonQueries.BookNavigationDataFind(bookdb, bookData.BookId); if (nd == null) { nd = new BookNavigationData() { BookId = bookData.BookId, }; CommonQueries.BookNavigationDataAdd(bookdb, nd, CommonQueries.ExistHandling.IfNotExists); nd = CommonQueries.BookNavigationDataFind(bookdb, bookData.BookId); CommonQueries.BookSaveChanges(bookdb); } if (nd == null) { App.Error($"ERROR: trying to ensure navigation data, but don't have one."); } return(nd); }
public static BookMarkFile CreateBookMarkFile(BookMarkFileType fileType) { var retval = new BookMarkFile(); var bookdb = BookDataContext.Get(); var list = fileType == BookMarkFileType.FullFile ? CommonQueries.BookGetAllWhichHaveUserData(bookdb) : CommonQueries.BookGetRecentWhichHaveUserData(bookdb); // We only save some of the BookData fields in a book mark file. // Don't bother with the full file list (total waste of time), or the people list. var trimmedList = new List <BookData>(); foreach (var book in list) { trimmedList.Add(CreateBookMarkBookData(book)); } retval.BookMarkList = trimmedList; return(retval); }
/// <summary> /// Adds the bookData into the Books database, but only if it's not already present. /// If it's already /// </summary> /// <param name="bookData"></param> /// <returns>0=not added, 1=added. Technical is the count of the number added.</returns> public static int BookAdd(BookDataContext bookdb, BookData book, ExistHandling handling) { int retval = 0; NQueries++; lock (bookdb) { switch (handling) { case ExistHandling.IfNotExists: if (bookdb.Books.Find(book.BookId) == null) { bookdb.Books.Add(book); retval++; } break; case ExistHandling.CatalogOverrideFast: { var dbbook = bookdb.Books.Find(book.BookId); if (dbbook == null) { bookdb.Books.Add(book); retval++; } else // have to be smart. { if (dbbook.BookSource.StartsWith(BookData.BookSourceBookMarkFile)) { // The database was added to from a bookmark file. // For these books, the dbbook top-level data isn't correct but the user data is correct. // At the same time, the new book top-level data IS correct, but the user data is not correct. BookData.Merge(dbbook, book); retval++; } } } break; case ExistHandling.SmartCatalogOverride: { var dbbook = bookdb.Books.Find(book.BookId); if (dbbook == null) { bookdb.Books.Add(book); retval++; } else // have to be smart. { if (dbbook.BookSource.StartsWith(BookData.BookSourceBookMarkFile)) { // The database was added to from a bookmark file. // For these books, the dbbook top-level data isn't correct but the user data is correct. // At the same time, the new book top-level data IS correct, but the user data is not correct. BookData.Merge(dbbook, book); retval++; } else { // Grab the full data including the number of files dbbook = CommonQueries.BookGetFiles(bookdb, book.BookId); var mustReplace = book.Files.Count != dbbook.Files.Count; // In case the files don't match exactly.... if (!mustReplace) { //TODO: make faster? Or keep because it's needed functionality? mustReplace = !BookData.FilesMatch(book, dbbook); } if (mustReplace) { //FAIL: project gutenberg LOVES changing their URLs. If the old list doesn't match the // new list in number of files, then dump ALL the old values and replace them with the // new ones. // TODO: actually verify that the files match? // Can't use clear because it doesn't work: dbbook.Files.Clear(); // (Seriously: it doesn't work because Files doesn't implement it and will throw) for (int i = dbbook.Files.Count - 1; i >= 0; i--) { dbbook.Files.RemoveAt(i); } foreach (var file in book.Files) { if (file.Id != 0) { file.Id = 0; // if it's straight from the catalog, it should have no id } dbbook.Files.Add(file); } retval++; } } } } break; } return(retval); } }
/// <summary> /// Merges the changes from a single read-in bookmarkfile into the local database. /// </summary> /// <param name="bmf"></param> /// <returns></returns> private static async Task <int> MergeAsync(BookMarkFile bmf) { int nchanges = 0; // Now let's be very smart about combining this file in with the original. var bookdb = BookDataContext.Get(); var currbooks = CommonQueries.BookGetAllWhichHaveUserData(bookdb); const int CHANGES_PER_SAVE = 1000; int nextDbSaveChange = CHANGES_PER_SAVE; foreach (var external in bmf.BookMarkList) { var book = currbooks.Find(b => b.BookId == external.BookId); if (book == null) { book = CommonQueries.BookGet(bookdb, external.BookId); } if (book == null) { // Prepend the BookMarkSource so that the book is clearly labeled // as being from a bookmark file (and therefore this is kind of a fake entry) if (!external.BookSource.StartsWith(BookData.BookSourceBookMarkFile)) { external.BookSource = BookData.BookSourceBookMarkFile + external.BookSource; } // Must set all these ids to zero so that they get re-set by EF. if (external.Review != null) { external.Review.Id = 0; } if (external.Notes != null) { external.Notes.Id = 0; foreach (var note in external.Notes.Notes) { note.Id = 0; } } if (external.NavigationData != null) { external.NavigationData.Id = 0; } external.DownloadData = null; // on this computer, nothing has been downloaded. CommonQueries.BookAdd(bookdb, external, CommonQueries.ExistHandling.IfNotExists); nchanges++; App.Error($"NOTE: adding external {external.BookId} name {external.Title}"); } else { // Great -- now I can merge the UserReview, Notes, and BookNavigationData. int nbookchanges = 0; if (external.Review != null) { if (book.Review == null) { external.Review.Id = 0; // clear it out so that EF will set to the correct value. book.Review = external.Review; nbookchanges++; } else { nbookchanges += book.Review.Merge(external.Review); } } if (external.NavigationData != null) { if (book.NavigationData == null) { external.NavigationData.Id = 0; // clear it out so that EF will set to the correct value. book.NavigationData = external.NavigationData; nbookchanges++; } else { nbookchanges += book.NavigationData.Merge(external.NavigationData); } } if (external.Notes != null) { if (book.Notes == null) { // Copy them all over book.Notes = new BookNotes() { BookId = external.Notes.BookId, }; foreach (var note in external.Notes.Notes) { note.Id = 0; // reset to zero to insert into the currrent book. book.Notes.Notes.Add(note); } nbookchanges++; } else { // Add in only the changed notes. The ids will not be the same nbookchanges += book.Notes.Merge(external.Notes); } } if (nbookchanges > 0) { ; // hook to hang the debugger on. } nchanges += nbookchanges; if (nchanges > nextDbSaveChange) { await bookdb.SaveChangesAsync(); nextDbSaveChange = nchanges + CHANGES_PER_SAVE; } } } // And save at the end! if (nchanges > 0) { await bookdb.SaveChangesAsync(); } return(nchanges); }
public static async Task <int> ReadZipTarRdfFileAsync(IndexReader ui, BookDataContext bookdb, Windows.Storage.StorageFile file, CancellationToken token, UpdateType updateType = UpdateType.Full) { SaveAfterNFiles = SaveSkipCount; UiAfterNNodes = NodeReadCount; // FAIL: Gutenberg includes bad files HashSet <string> KnownBadFiles = new HashSet <string>() { "cache/epub/0/pg0.rdf", "cache/epub/999999/pg999999.rdf", }; var startTime = DateTime.Now; int nnewfiles = 0; int nnodes = 0; List <BookData> newBooks = new List <BookData>(); try { using (var stream = await file.OpenAsync(Windows.Storage.FileAccessMode.Read)) { using (var reader = ReaderFactory.Open(stream.AsStream())) { while (reader.MoveToNextEntry()) { if (token.IsCancellationRequested) { break; } System.Diagnostics.Debug.WriteLine($"ZIPREAD: {reader.Entry.Key} size {reader.Entry.Size}"); // Is the rdf-files.tar file that Gutenberg uses. // The zip file has one giant TAR file (rdf-files.tar) embedded in it. if (reader.Entry.Key.EndsWith(".tar")) { using (var tarStream = reader.OpenEntryStream()) { using (var tarReader = ReaderFactory.Open(tarStream)) { while (tarReader.MoveToNextEntry()) { MemoryStream ms = new MemoryStream((int)tarReader.Entry.Size); tarReader.WriteEntryTo(ms); ms.Position = 0; var sr = new StreamReader(ms); var text = sr.ReadToEnd(); nnodes++; if (token.IsCancellationRequested) { break; } if (KnownBadFiles.Contains(tarReader.Entry.Key)) { // Skip known bad files like entry 999999 -- has weird values for lots of stuff! } else { // Got a book; let the UI know. newBooks.Clear(); if (tarReader.Entry.Key.Contains("62548")) { ; // useful hook for debugging. } // Reads and saves to database. And does a fancy merge if needed. int newCount = 0; try { newCount = Read(bookdb, tarReader.Entry.Key, text, newBooks, updateType); } catch (Exception rdfex) { // Do what on exception? Log($"Error: file {file.Name} name {tarReader.Entry.Key} exception {rdfex.Message}"); newCount = 0; } nnewfiles += newCount; if (nnewfiles > 6000 && nnewfiles < 9000) { SaveSkipCount = 100; } else { SaveSkipCount = 100; // save very frequently. Otherwise, ka-boom! } if (nnewfiles >= SaveAfterNFiles) { // FAIL: must save periodically. Can't accumulate a large number // of books (e..g, 60K books in the catalog) and then save all at // once; it will take up too much memory and will crash. Log($"At index {CommonQueries.BookCount(bookdb)} file {file.Name} nfiles {nnewfiles}"); CommonQueries.BookSaveChanges(bookdb); // Try resetting the singleton to reduce the number of crashes. BookDataContext.ResetSingleton("InitialBookData.Db"); await Task.Delay(100); // Try a pause to reduce crashes. SaveAfterNFiles += SaveSkipCount; } if (newCount > 0) { foreach (var bookData in newBooks) { await ui.OnAddNewBook(bookData); } } if (nnodes >= UiAfterNNodes) { //await ui.LogAsync($"Book: file {tarReader.Entry.Key}\nNNew: {nfiles} NProcesses {nnodes}\n"); await ui.OnTotalBooks(nnodes); UiAfterNNodes += NodeReadCount; } } } } } } } } } } catch (Exception readEx) { Log($"Error: reading Gutenberg ZIP file exception {readEx.Message}"); ; // something bad happened. } await ui.OnReadComplete(nnodes, nnewfiles); var delta = DateTime.Now.Subtract(startTime).TotalSeconds; System.Diagnostics.Debug.WriteLine($"READ: {nnewfiles} in {delta} seconds = {nnewfiles / delta} fps or {delta / nnewfiles * 1000} ms per file"); CommonQueries.BookSaveChanges(bookdb); // Woot, woot! I've got good book data! return(nnewfiles); }
private static async Task ReadDirAsyncFolder(BookDataContext bookdb, StorageFolder folder) { uint startIndex = 0; uint maxItems = 1000; try { bool keepGoing = true; while (keepGoing) { var allitems = await folder.GetItemsAsync(startIndex, maxItems); if (allitems == null || allitems.Count == 0) { keepGoing = false; } else { if (allitems.Count < maxItems) { keepGoing = false; } foreach (var item in allitems) { if (item is StorageFile file) { if (file.Name.EndsWith(".rdf")) { var text = await FileIO.ReadTextAsync(file); Read(bookdb, file.Name, text); if (NRead >= NextIndexLogged) { Log($"At index {CommonQueries.BookCount(bookdb)} file {file.Name}"); NextIndexLogged += LogNIndex; } if (CommonQueries.BookCount(bookdb) >= MaxFilesChecked) { return; } } } else if (item is StorageFolder subfolder) { await ReadDirAsyncFolder(bookdb, subfolder); if (CommonQueries.BookCount(bookdb) >= MaxFilesChecked) { return; } } } startIndex += maxItems; } } } catch (Exception ex) { Log($"ERROR: got exception while reading {ex.Message}"); } }