public static BookNavigationData BookNavigationDataEnsure(BookDataContext bookdb, BookData bookData)
        {
            var nd = CommonQueries.BookNavigationDataFind(bookdb, bookData.BookId);

            if (nd == null)
            {
                nd = new BookNavigationData()
                {
                    BookId = bookData.BookId,
                };
                CommonQueries.BookNavigationDataAdd(bookdb, nd, CommonQueries.ExistHandling.IfNotExists);
                nd = CommonQueries.BookNavigationDataFind(bookdb, bookData.BookId);
                CommonQueries.BookSaveChanges(bookdb);
            }
            if (nd == null)
            {
                App.Error($"ERROR: trying to ensure navigation data, but don't have one.");
            }
            return(nd);
        }
Exemplo n.º 2
0
        public static async Task <int> ReadZipTarRdfFileAsync(IndexReader ui, BookDataContext bookdb, Windows.Storage.StorageFile file, CancellationToken token, UpdateType updateType = UpdateType.Full)
        {
            SaveAfterNFiles = SaveSkipCount;
            UiAfterNNodes   = NodeReadCount;

            // FAIL: Gutenberg includes bad files
            HashSet <string> KnownBadFiles = new HashSet <string>()
            {
                "cache/epub/0/pg0.rdf",
                "cache/epub/999999/pg999999.rdf",
            };
            var             startTime = DateTime.Now;
            int             nnewfiles = 0;
            int             nnodes    = 0;
            List <BookData> newBooks  = new List <BookData>();

            try
            {
                using (var stream = await file.OpenAsync(Windows.Storage.FileAccessMode.Read))
                {
                    using (var reader = ReaderFactory.Open(stream.AsStream()))
                    {
                        while (reader.MoveToNextEntry())
                        {
                            if (token.IsCancellationRequested)
                            {
                                break;
                            }
                            System.Diagnostics.Debug.WriteLine($"ZIPREAD: {reader.Entry.Key} size {reader.Entry.Size}");

                            // Is the rdf-files.tar file that Gutenberg uses.
                            // The zip file has one giant TAR file (rdf-files.tar) embedded in it.
                            if (reader.Entry.Key.EndsWith(".tar"))
                            {
                                using (var tarStream = reader.OpenEntryStream())
                                {
                                    using (var tarReader = ReaderFactory.Open(tarStream))
                                    {
                                        while (tarReader.MoveToNextEntry())
                                        {
                                            MemoryStream ms = new MemoryStream((int)tarReader.Entry.Size);
                                            tarReader.WriteEntryTo(ms);
                                            ms.Position = 0;
                                            var sr   = new StreamReader(ms);
                                            var text = sr.ReadToEnd();
                                            nnodes++;
                                            if (token.IsCancellationRequested)
                                            {
                                                break;
                                            }

                                            if (KnownBadFiles.Contains(tarReader.Entry.Key))
                                            {
                                                // Skip known bad files like entry 999999 -- has weird values for lots of stuff!
                                            }
                                            else
                                            {
                                                // Got a book; let the UI know.
                                                newBooks.Clear();
                                                if (tarReader.Entry.Key.Contains("62548"))
                                                {
                                                    ; // useful hook for debugging.
                                                }

                                                // Reads and saves to database. And does a fancy merge if needed.
                                                int newCount = 0;
                                                try
                                                {
                                                    newCount = Read(bookdb, tarReader.Entry.Key, text, newBooks, updateType);
                                                }
                                                catch (Exception rdfex)
                                                {
                                                    // Do what on exception?
                                                    Log($"Error: file {file.Name} name {tarReader.Entry.Key} exception {rdfex.Message}");
                                                    newCount = 0;
                                                }
                                                nnewfiles += newCount;

                                                if (nnewfiles > 6000 && nnewfiles < 9000)
                                                {
                                                    SaveSkipCount = 100;
                                                }
                                                else
                                                {
                                                    SaveSkipCount = 100; // save very frequently. Otherwise, ka-boom!
                                                }

                                                if (nnewfiles >= SaveAfterNFiles)
                                                {
                                                    // FAIL: must save periodically. Can't accumulate a large number
                                                    // of books (e..g, 60K books in the catalog) and then save all at
                                                    // once; it will take up too much memory and will crash.
                                                    Log($"At index {CommonQueries.BookCount(bookdb)} file {file.Name} nfiles {nnewfiles}");
                                                    CommonQueries.BookSaveChanges(bookdb);

                                                    // Try resetting the singleton to reduce the number of crashes.
                                                    BookDataContext.ResetSingleton("InitialBookData.Db");
                                                    await Task.Delay(100); // Try a pause to reduce crashes.

                                                    SaveAfterNFiles += SaveSkipCount;
                                                }
                                                if (newCount > 0)
                                                {
                                                    foreach (var bookData in newBooks)
                                                    {
                                                        await ui.OnAddNewBook(bookData);
                                                    }
                                                }
                                                if (nnodes >= UiAfterNNodes)
                                                {
                                                    //await ui.LogAsync($"Book: file {tarReader.Entry.Key}\nNNew: {nfiles} NProcesses {nnodes}\n");
                                                    await ui.OnTotalBooks(nnodes);

                                                    UiAfterNNodes += NodeReadCount;
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            catch (Exception readEx)
            {
                Log($"Error: reading Gutenberg ZIP file exception {readEx.Message}");
                ; // something bad happened.
            }
            await ui.OnReadComplete(nnodes, nnewfiles);

            var delta = DateTime.Now.Subtract(startTime).TotalSeconds;

            System.Diagnostics.Debug.WriteLine($"READ: {nnewfiles} in {delta} seconds = {nnewfiles / delta} fps or {delta / nnewfiles * 1000} ms per file");

            CommonQueries.BookSaveChanges(bookdb); // Woot, woot! I've got good book data!
            return(nnewfiles);
        }