예제 #1
0
        private static async Task <EpubContent> ReadContent(EpubContentRef contentRef)
        {
            EpubContent result = new EpubContent();

            result.Html = await ReadTextContentFiles(contentRef.Html).ConfigureAwait(false);

            result.Css = await ReadTextContentFiles(contentRef.Css).ConfigureAwait(false);

            result.Images = await ReadByteContentFiles(contentRef.Images).ConfigureAwait(false);

            result.Fonts = await ReadByteContentFiles(contentRef.Fonts).ConfigureAwait(false);

            result.AllFiles = new Dictionary <string, EpubContentFile>();
            foreach (KeyValuePair <string, EpubTextContentFile> textContentFile in result.Html.Concat(result.Css))
            {
                result.AllFiles.Add(textContentFile.Key, textContentFile.Value);
            }
            foreach (KeyValuePair <string, EpubByteContentFile> byteContentFile in result.Images.Concat(result.Fonts))
            {
                result.AllFiles.Add(byteContentFile.Key, byteContentFile.Value);
            }
            foreach (KeyValuePair <string, EpubContentFileRef> contentFileRef in contentRef.AllFiles)
            {
                if (!result.AllFiles.ContainsKey(contentFileRef.Key))
                {
                    result.AllFiles.Add(contentFileRef.Key, await ReadByteContentFile(contentFileRef.Value).ConfigureAwait(false));
                }
            }
            return(result);
        }
예제 #2
0
        private void Load_Click(object sender, RoutedEventArgs e)
        {
            var openFileDialog = new Microsoft.Win32.OpenFileDialog()
            {
                Filter = "Epub Files (*.epub)|*.epub"
            };
            var result = openFileDialog.ShowDialog();

            if (result == true)
            {
                // Opens a book and reads all of its content into memory
                epubBook = EpubReader.ReadBook(openFileDialog.FileName);

                // COMMON PROPERTIES

                // Book's title
                string title = epubBook.Title;

                // Book's authors (comma separated list)
                string author = epubBook.Author;

                // Book's authors (list of authors names)
                List <string> authors = epubBook.AuthorList;

                // Book's cover image (null if there is no cover)
                byte[] coverImageContent = epubBook.CoverImage;
                if (coverImageContent != null)
                {
                    using (MemoryStream coverImageStream = new MemoryStream(coverImageContent.ToArray()))
                    {
                        // Assign the Source property of your image
                        try
                        {
                            File.Delete(openFileDialog.FileName + ".jpg");
                            File.WriteAllBytes(openFileDialog.FileName + ".jpg", coverImageStream.ToArray());
                        }
                        catch
                        {
                            File.WriteAllBytes(openFileDialog.FileName + ".jpg", coverImageStream.ToArray());
                        }
                        BitmapImage imageSource = new BitmapImage(new Uri(@"C:/Users/shish/Downloads/1.jpg", UriKind.Absolute));
                        Image1.Source = imageSource;
                    }
                }
                Info.Text       = "Title: " + title + "\n" + "Author: " + author + "\n";
                Info.FontWeight = FontWeights.Bold;

                // CHAPTERS

                // Enumerating chapters
                foreach (EpubChapter chapter in epubBook.Chapters)
                {
                    // Title of chapter
                    string chapterTitle = chapter.Title;

                    // HTML content of current chapter
                    string chapterHtmlContent = chapter.HtmlContent;

                    // Nested chapters
                    List <EpubChapter> subChapters = chapter.SubChapters;

                    //PrintChapter(chapter);

                    //Chapters.Inlines.Add(new Run(chapterTitle + "\n"));

                    Chapters.Items.Add(chapterTitle);
                }
                // CONTENT

                // Book's content (HTML files, stlylesheets, images, fonts, etc.)
                EpubContent bookContent = epubBook.Content;


                // IMAGES

                // All images in the book (file name is the key)
                Dictionary <string, EpubByteContentFile> images = bookContent.Images;

                EpubByteContentFile firstImage = images.Values.First();

                // Content type (e.g. EpubContentType.IMAGE_JPEG, EpubContentType.IMAGE_PNG)
                EpubContentType contentType = firstImage.ContentType;

                // MIME type (e.g. "image/jpeg", "image/png")
                string mimeContentType = firstImage.ContentMimeType;

                // Creating Image class instance from the content
                using (MemoryStream imageStream = new MemoryStream(firstImage.Content))
                {
                    System.Drawing.Image image = System.Drawing.Image.FromStream(imageStream);
                }


                // HTML & CSS

                // All XHTML files in the book (file name is the key)
                Dictionary <string, EpubTextContentFile> htmlFiles = bookContent.Html;

                // All CSS files in the book (file name is the key)
                Dictionary <string, EpubTextContentFile> cssFiles = bookContent.Css;

                // Entire HTML content of the book
                foreach (EpubTextContentFile htmlFile in htmlFiles.Values)
                {
                    string htmlContent = htmlFile.Content;
                }

                // All CSS content in the book
                foreach (EpubTextContentFile cssFile in cssFiles.Values)
                {
                    string cssContent = cssFile.Content;
                }


                // OTHER CONTENT

                // All fonts in the book (file name is the key)
                Dictionary <string, EpubByteContentFile> fonts = bookContent.Fonts;

                // All files in the book (including HTML, CSS, images, fonts, and other types of files)
                Dictionary <string, EpubContentFile> allFiles = bookContent.AllFiles;


                // ACCESSING RAW SCHEMA INFORMATION

                // EPUB OPF data
                EpubPackage package = epubBook.Schema.Package;

                // Enumerating book's contributors
                foreach (EpubMetadataContributor contributor in package.Metadata.Contributors)
                {
                    string contributorName = contributor.Contributor;
                    string contributorRole = contributor.Role;
                }

                // EPUB NCX data
                EpubNavigation navigation = epubBook.Schema.Navigation;

                // Enumerating NCX metadata
                foreach (EpubNavigationHeadMeta meta in navigation.Head)
                {
                    string metadataItemName    = meta.Name;
                    string metadataItemContent = meta.Content;
                }
            }
        }
예제 #3
0
        /// <summary>
        /// Reading all E-Book files to memory structure EpubContent
        /// </summary>
        /// <param name="epubArchive"></param>
        /// <param name="book"></param>
        /// <returns></returns>
        public static EpubContent ReadContentFilesToMemory(ZipArchive epubArchive, EpubBook book)
        {
            EpubContent result = new EpubContent
            {
                Html     = new Dictionary <string, EpubTextContentFile>(),
                Css      = new Dictionary <string, EpubTextContentFile>(),
                Images   = new Dictionary <string, EpubByteContentFile>(),
                Fonts    = new Dictionary <string, EpubByteContentFile>(),
                AllFiles = new Dictionary <string, EpubContentFile>()
            };

            //double progress = 20;
            //double increment = (double)80 / book.Schema.Package.Manifest.Count;
            foreach (EpubManifestItem manifestItem in book.Schema.Package.Manifest)
            {
                string contentFilePath = ZipPathUtils.Combine(book.Schema.ContentDirectoryPath, manifestItem.Href);

                ZipArchiveEntry contentFileEntry = epubArchive.GetEntry(contentFilePath);
                if (contentFileEntry == null)
                {
                    throw new Exception(String.Format("EPUB parsing error: file {0} not found in archive.", contentFilePath));
                }
                if (contentFileEntry.Length > Int32.MaxValue)
                {
                    throw new Exception(String.Format("EPUB parsing error: file {0} is bigger than 2 Gb.", contentFilePath));
                }
                string          fileName        = manifestItem.Href;
                string          contentMimeType = manifestItem.MediaType;
                EpubContentType contentType     = GetContentTypeByContentMimeType(contentMimeType);
                switch (contentType)
                {
                case EpubContentType.XHTML_1_1:
                case EpubContentType.CSS:
                case EpubContentType.OEB1_DOCUMENT:
                case EpubContentType.OEB1_CSS:
                case EpubContentType.XHTML_1_1XML:
                case EpubContentType.DTBOOK:
                case EpubContentType.DTBOOK_NCX:
                    EpubTextContentFile epubTextContentFile = new EpubTextContentFile
                    {
                        FileName        = fileName,
                        ContentMimeType = contentMimeType,
                        ContentType     = contentType
                    };
                    using (Stream contentStream = contentFileEntry.Open())
                    {
                        if (contentStream == null)
                        {
                            throw new Exception(String.Format("Incorrect EPUB file: content file \"{0}\" specified in manifest is not found", fileName));
                        }
                        using (StreamReader streamReader = new StreamReader(contentStream))
                            epubTextContentFile.Content = streamReader.ReadToEnd();
                    }
                    switch (contentType)
                    {
                    case EpubContentType.XHTML_1_1:
                        result.Html.Add(fileName, epubTextContentFile);
                        break;

                    case EpubContentType.CSS:
                        result.Css.Add(fileName, epubTextContentFile);
                        break;
                    }
                    //В данный момент в AllFiles контент не попадает, так как отсутствует конвертация из EpubTextContentFile в EpubContentFile,
                    //а именно, нет конвертации из string в byte[]
                    result.AllFiles.Add(fileName, epubTextContentFile);
                    break;

                default:
                    EpubByteContentFile epubByteContentFile = new EpubByteContentFile
                    {
                        FileName        = fileName,
                        ContentMimeType = contentMimeType,
                        ContentType     = contentType
                    };
                    using (Stream contentStream = contentFileEntry.Open())
                    {
                        if (contentStream == null)
                        {
                            throw new Exception(String.Format("Incorrect EPUB file: content file \"{0}\" specified in manifest is not found", fileName));
                        }
                        using (MemoryStream memoryStream = new MemoryStream((int)contentFileEntry.Length))
                        {
                            contentStream.CopyTo(memoryStream);
                            epubByteContentFile.Content = memoryStream.ToArray();
                        }
                    }
                    switch (contentType)
                    {
                    case EpubContentType.IMAGE_GIF:
                    case EpubContentType.IMAGE_JPEG:
                    case EpubContentType.IMAGE_PNG:
                    case EpubContentType.IMAGE_SVG:
                        result.Images.Add(fileName, epubByteContentFile);
                        break;

                    case EpubContentType.FONT_TRUETYPE:
                    case EpubContentType.FONT_OPENTYPE:
                        result.Fonts.Add(fileName, epubByteContentFile);
                        break;
                    }
                    result.AllFiles.Add(fileName, epubByteContentFile);
                    break;
                }
            }
            return(result);
        }
예제 #4
0
        private async void Button_Click(object sender, RoutedEventArgs e)
        {
            //Init epub object.

            //bool fileExist = await EpubReader.DoesFileExistAsync(Windows.ApplicationModel.Package.Current.InstalledLocation, "test.epub");
            //if (!fileExist)
            //    throw new Exception(string.Format("File test.epub not found, bitch"));

            progressbar.Text = "Загрузка книги";
            await progressbar.ShowAsync();

            //bookLoadingProgressBar.Visibility = Visibility.Visible;

            // Opening a book
            currentEpubBook = await EpubReader.OpenBookAsync("test.epub");

            if (currentEpubBook != null)
            {
                loadEbookButton.Content   = "Loaded";
                loadEbookButton.IsEnabled = false;
            }
            //// COMMON PROPERTIES
            //// Book's title
            //string title = currentEpubBook.Title;
            //// Book's authors (comma separated list)
            //string author = currentEpubBook.Author;
            //// Book's authors (list of authors names)
            //List<string> authors = currentEpubBook.AuthorList;
            //// Book's cover image (null if there are no cover)
            //BitmapImage coverImage = currentEpubBook.CoverImage;
            //// ShowCoverImage(coverImage); //Only for testing purposes



            // CONTENT

            // Book's content (HTML files, style-sheets, images, fonts, etc.)
            EpubContent bookContent = currentEpubBook.Content;


            // IMAGES

            // All images in the book (file name is the key)
            //Dictionary<string, EpubByteContentFile> images = bookContent.Images;

            //EpubByteContentFile firstImage = images.Values.First();

            //// Content type (e.g. EpubContentType.IMAGE_JPEG, EpubContentType.IMAGE_PNG)
            //EpubContentType contentType = firstImage.ContentType;

            //// MIME type (e.g. "image/jpeg", "image/png")
            //string mimeContentType = firstImage.ContentMimeType;



            // HTML & CSS

            // All XHTML files in the book (file name is the key)
            Dictionary <string, EpubTextContentFile> htmlFiles = bookContent.Html;

            // All CSS files in the book (file name is the key)
            Dictionary <string, EpubTextContentFile> cssFiles = bookContent.Css;
            // All CSS content in the book
            //foreach (EpubTextContentFile cssFile in cssFiles.Values)
            //{
            //    string cssContent = cssFile.Content;
            //}
            // OTHER CONTENT

            // All fonts in the book (file name is the key)
            // Dictionary<string, EpubByteContentFile> fonts = bookContent.Fonts;

            // All files in the book (including HTML, CSS, images, fonts, and other types of files)
            //TO-DO looks like this dictionary not working well at the moment, have to trace
            //Dictionary<string, EpubContentFile> allFiles = bookContent.AllFiles;

            //To-DO:
            //Определить первый файл в книге - через spine или через guide
            //Отслеживать клики по экрану и по краям экрана - чтобы листать вперед и назад.
            //Отслеживать, когда на экране последняя column из файла и нужно подгружать следующую

            await progressbar.HideAsync();

            progressbar.Text = "Форматирование";
            await progressbar.ShowAsync();

            // Entire HTML content of the book should be injected in case we are showing chapter by chapter, and not pretending to load the whole set of chapters
            //foreach (KeyValuePair<string, EpubTextContentFile> htmlItem in htmlFiles)
            //{
            //    string injectedItem = WebViewHelpers.injectMonocle(htmlItem.Value.Content,
            //   (int)bookReaderWebViewControl.ActualWidth, (int)bookReaderWebViewControl.ActualHeight);
            //    htmlItem.Value.Content = injectedItem;
            //}

            IndexFileSceleton index = new IndexFileSceleton();

            index.author     = currentEpubBook.Author;
            index.title      = currentEpubBook.Title;
            index.height     = (int)bookReaderWebViewControl.ActualHeight;
            index.chapters   = currentEpubBook.Chapters;
            index.xhtmlFiles = currentEpubBook.Content.Html;

            // --- Streaming HTML+JS content directly from the memory ---
            //Uri url = bookReaderWebViewControl.BuildLocalStreamUri("MemoryTag", "section4.xhtml");
            CreateIndex();

            Uri url = bookReaderWebViewControl.BuildLocalStreamUri("MemoryTag", "index.html");

            bookReaderWebViewControl.NavigateToLocalStreamUri(url, myMemoryResolver);

            //Now we could have a look at the chapters list
            chaptersMenuButton.IsEnabled = true;
            await progressbar.HideAsync();

            //bookLoadingProgressBar.Visibility = Visibility.Collapsed;

            // ACCESSING RAW SCHEMA INFORMATION

            //// EPUB OPF data
            //EpubPackage package = epubBook.Schema.Package;

            //// Enumerating book's contributors
            //foreach (EpubMetadataContributor contributor in package.Metadata.Contributors)
            //{
            //    string contributorName = contributor.Contributor;
            //    string contributorRole = contributor.Role;
            //}

            //// EPUB NCX data
            //EpubNavigation navigation = epubBook.Schema.Navigation;

            //// Enumerating NCX metadata
            //foreach (EpubNavigationHeadMeta meta in navigation.Head)
            //{
            //    string metadataItemName = meta.Name;
            //    string metadataItemContent = meta.Content;
            //}
        }
예제 #5
0
        public (HtmlToFlowDocument.Dom.FlowDocument Document, Dictionary <string, string> FontDictionary) OpenEbook(string fileName)
        {
            var epubBook = EpubReader.ReadBook(fileName);

            _bookContent = epubBook.Content;

            Dictionary <string, EpubTextContentFile> htmlFiles = _bookContent.Html;
            Dictionary <string, EpubTextContentFile> cssFiles  = _bookContent.Css;
            var readingOrder = epubBook.ReadingOrder;

            // ----------------- handle fonts ------------------------------
            var fontDictionary = new Dictionary <string, string>(); // Key is the font name, value is the absolute path to the font file
            var fontPath       = Path.Combine(_instanceStorageService.InstanceStoragePath, "Fonts");

            Directory.CreateDirectory(fontPath);

            foreach (var entry in _bookContent.Fonts)
            {
                var fontName     = entry.Key;
                var bytes        = entry.Value;
                var fontFileName = Path.GetFileName(entry.Value.FileName);
                fontFileName = Path.Combine(fontPath, fontFileName);
                using (var stream = new FileStream(fontFileName, FileMode.Create, FileAccess.Write, FileShare.None))
                {
                    var byteArray = bytes.Content;
                    stream.Write(byteArray, 0, byteArray.Length);
                }
                fontDictionary.Add(fontName, fontFileName);
            }

            // -------------------------------------------------------------

            string GetStyleSheet(string name, string htmlFileNameReferencedFrom)
            {
                EpubTextContentFile cssFile;
                // calculate absolute name with reference to htmlFileNameReferencedFrom
                var absoluteName = HtmlToFlowDocument.CssStylesheets.GetAbsoluteFileNameForFileRelativeToHtmlFile(name, htmlFileNameReferencedFrom);

                if (cssFiles.TryGetValue(absoluteName, out cssFile))
                {
                    return(cssFile.Content);
                }

                // if this could not resolve the name, then try to go to parent directories
                while (htmlFileNameReferencedFrom.Contains("/"))
                {
                    var idx = htmlFileNameReferencedFrom.LastIndexOf("/");
                    htmlFileNameReferencedFrom = htmlFileNameReferencedFrom.Substring(0, idx - 1);
                    absoluteName = HtmlToFlowDocument.CssStylesheets.GetAbsoluteFileNameForFileRelativeToHtmlFile(name, htmlFileNameReferencedFrom);
                    if (cssFiles.TryGetValue(absoluteName, out cssFile))
                    {
                        return(cssFile.Content);
                    }
                }

                // if this was not successful, then try it with the name alone
                if (cssFiles.TryGetValue(name, out cssFile))
                {
                    return(cssFile.Content);
                }

                return(null);
                // throw new ArgumentException($"CssFile {name} was not found!", nameof(name));
            }

            // Entire HTML content of the book
            var converter = new HtmlToFlowDocument.Converter()
            {
                AttachSourceAsTags = true
            };
            var flowDocument = new HtmlToFlowDocument.Dom.FlowDocument();

            foreach (EpubTextContentFile htmlFile in readingOrder)
            {
                string htmlContent = htmlFile.Content;
                var    textElement = converter.ConvertXHtml(htmlContent, false, GetStyleSheet, htmlFile.FileName); // create sections
                flowDocument.AppendChild(textElement);                                                             // and add them to the flow document
            }
            Settings.BookSettings.BookFileName = fileName;
            return(flowDocument, fontDictionary);
        }
예제 #6
0
        public static void GetDataAsync(List <FileInfo> files)
        {
            List <string> lines = new List <string>();

            foreach (FileInfo fi in files)
            {
                if (!fi.Extension.ToLower().Equals(".epub"))
                {
                    continue;
                }
                if (FinishedBooks.Contains(fi.Name))
                {
                    continue;
                }

                try
                {
                    EpubBook      epubBook = EpubReader.ReadBook(fi.FullName);
                    string        title    = epubBook.Title;
                    string        author   = epubBook.Author;
                    List <string> authors  = epubBook.AuthorList;

                    EpubContent bookContent = epubBook.Content;
                    Dictionary <string, EpubTextContentFile> htmlFiles = bookContent.Html;
                    foreach (EpubTextContentFile htmlFile in htmlFiles.Values)
                    {
                        MatchCollection mc = new Regex(">(.*?)<").Matches(htmlFile.Content);
                        foreach (Match m in mc)
                        {
                            if (m.Groups.Count != 2)
                            {
                                continue;
                            }
                            string   value  = m.Groups[1].Value.Replace("&nbsp;", string.Empty).Trim();
                            string[] vsplit = value.Split(' ');
                            if (string.IsNullOrEmpty(value) || value.Length <= 3 ||
                                lines.Contains(value) || vsplit.Length < 2 ||
                                char.IsLower(value[0]))
                            {
                                continue;
                            }

                            if (!value.Contains('.'))
                            {
                                if (!value.Contains('?'))
                                {
                                    if (!value.Contains('!'))
                                    {
                                        continue;
                                    }
                                }
                            }

                            if (!char.IsLetter(value[0]))
                            {
                                if (value[0] != '-')
                                {
                                    continue;
                                }
                            }

                            lines.Add(Program.CyrilicCharConvertor.Transform(value));
                        }
                    }

                    string Content = "";
                    foreach (string value in lines)
                    {
                        Content += ' ' + value;
                    }
                    AnylizeText(Content);
                    Console.WriteLine($"[SUCC] - {title}");
                }
                catch (Exception e)
                {
                    Console.WriteLine($"[ERR] - {fi.Name}");
                }
                finally
                {
                    FinishedBooks.Add(fi.Name);
                    File.AppendAllText(ROOT + @"\finished.txt", fi.Name + Environment.NewLine);
                }
            }
        }