private static async Task <EpubContent> ReadContent(EpubContentRef contentRef) { EpubContent result = new EpubContent(); result.Html = await ReadTextContentFiles(contentRef.Html).ConfigureAwait(false); result.Css = await ReadTextContentFiles(contentRef.Css).ConfigureAwait(false); result.Images = await ReadByteContentFiles(contentRef.Images).ConfigureAwait(false); result.Fonts = await ReadByteContentFiles(contentRef.Fonts).ConfigureAwait(false); result.AllFiles = new Dictionary <string, EpubContentFile>(); foreach (KeyValuePair <string, EpubTextContentFile> textContentFile in result.Html.Concat(result.Css)) { result.AllFiles.Add(textContentFile.Key, textContentFile.Value); } foreach (KeyValuePair <string, EpubByteContentFile> byteContentFile in result.Images.Concat(result.Fonts)) { result.AllFiles.Add(byteContentFile.Key, byteContentFile.Value); } foreach (KeyValuePair <string, EpubContentFileRef> contentFileRef in contentRef.AllFiles) { if (!result.AllFiles.ContainsKey(contentFileRef.Key)) { result.AllFiles.Add(contentFileRef.Key, await ReadByteContentFile(contentFileRef.Value).ConfigureAwait(false)); } } return(result); }
private void Load_Click(object sender, RoutedEventArgs e) { var openFileDialog = new Microsoft.Win32.OpenFileDialog() { Filter = "Epub Files (*.epub)|*.epub" }; var result = openFileDialog.ShowDialog(); if (result == true) { // Opens a book and reads all of its content into memory epubBook = EpubReader.ReadBook(openFileDialog.FileName); // COMMON PROPERTIES // Book's title string title = epubBook.Title; // Book's authors (comma separated list) string author = epubBook.Author; // Book's authors (list of authors names) List <string> authors = epubBook.AuthorList; // Book's cover image (null if there is no cover) byte[] coverImageContent = epubBook.CoverImage; if (coverImageContent != null) { using (MemoryStream coverImageStream = new MemoryStream(coverImageContent.ToArray())) { // Assign the Source property of your image try { File.Delete(openFileDialog.FileName + ".jpg"); File.WriteAllBytes(openFileDialog.FileName + ".jpg", coverImageStream.ToArray()); } catch { File.WriteAllBytes(openFileDialog.FileName + ".jpg", coverImageStream.ToArray()); } BitmapImage imageSource = new BitmapImage(new Uri(@"C:/Users/shish/Downloads/1.jpg", UriKind.Absolute)); Image1.Source = imageSource; } } Info.Text = "Title: " + title + "\n" + "Author: " + author + "\n"; Info.FontWeight = FontWeights.Bold; // CHAPTERS // Enumerating chapters foreach (EpubChapter chapter in epubBook.Chapters) { // Title of chapter string chapterTitle = chapter.Title; // HTML content of current chapter string chapterHtmlContent = chapter.HtmlContent; // Nested chapters List <EpubChapter> subChapters = chapter.SubChapters; //PrintChapter(chapter); //Chapters.Inlines.Add(new Run(chapterTitle + "\n")); Chapters.Items.Add(chapterTitle); } // CONTENT // Book's content (HTML files, stlylesheets, images, fonts, etc.) EpubContent bookContent = epubBook.Content; // IMAGES // All images in the book (file name is the key) Dictionary <string, EpubByteContentFile> images = bookContent.Images; EpubByteContentFile firstImage = images.Values.First(); // Content type (e.g. EpubContentType.IMAGE_JPEG, EpubContentType.IMAGE_PNG) EpubContentType contentType = firstImage.ContentType; // MIME type (e.g. "image/jpeg", "image/png") string mimeContentType = firstImage.ContentMimeType; // Creating Image class instance from the content using (MemoryStream imageStream = new MemoryStream(firstImage.Content)) { System.Drawing.Image image = System.Drawing.Image.FromStream(imageStream); } // HTML & CSS // All XHTML files in the book (file name is the key) Dictionary <string, EpubTextContentFile> htmlFiles = bookContent.Html; // All CSS files in the book (file name is the key) Dictionary <string, EpubTextContentFile> cssFiles = bookContent.Css; // Entire HTML content of the book foreach (EpubTextContentFile htmlFile in htmlFiles.Values) { string htmlContent = htmlFile.Content; } // All CSS content in the book foreach (EpubTextContentFile cssFile in cssFiles.Values) { string cssContent = cssFile.Content; } // OTHER CONTENT // All fonts in the book (file name is the key) Dictionary <string, EpubByteContentFile> fonts = bookContent.Fonts; // All files in the book (including HTML, CSS, images, fonts, and other types of files) Dictionary <string, EpubContentFile> allFiles = bookContent.AllFiles; // ACCESSING RAW SCHEMA INFORMATION // EPUB OPF data EpubPackage package = epubBook.Schema.Package; // Enumerating book's contributors foreach (EpubMetadataContributor contributor in package.Metadata.Contributors) { string contributorName = contributor.Contributor; string contributorRole = contributor.Role; } // EPUB NCX data EpubNavigation navigation = epubBook.Schema.Navigation; // Enumerating NCX metadata foreach (EpubNavigationHeadMeta meta in navigation.Head) { string metadataItemName = meta.Name; string metadataItemContent = meta.Content; } } }
/// <summary> /// Reading all E-Book files to memory structure EpubContent /// </summary> /// <param name="epubArchive"></param> /// <param name="book"></param> /// <returns></returns> public static EpubContent ReadContentFilesToMemory(ZipArchive epubArchive, EpubBook book) { EpubContent result = new EpubContent { Html = new Dictionary <string, EpubTextContentFile>(), Css = new Dictionary <string, EpubTextContentFile>(), Images = new Dictionary <string, EpubByteContentFile>(), Fonts = new Dictionary <string, EpubByteContentFile>(), AllFiles = new Dictionary <string, EpubContentFile>() }; //double progress = 20; //double increment = (double)80 / book.Schema.Package.Manifest.Count; foreach (EpubManifestItem manifestItem in book.Schema.Package.Manifest) { string contentFilePath = ZipPathUtils.Combine(book.Schema.ContentDirectoryPath, manifestItem.Href); ZipArchiveEntry contentFileEntry = epubArchive.GetEntry(contentFilePath); if (contentFileEntry == null) { throw new Exception(String.Format("EPUB parsing error: file {0} not found in archive.", contentFilePath)); } if (contentFileEntry.Length > Int32.MaxValue) { throw new Exception(String.Format("EPUB parsing error: file {0} is bigger than 2 Gb.", contentFilePath)); } string fileName = manifestItem.Href; string contentMimeType = manifestItem.MediaType; EpubContentType contentType = GetContentTypeByContentMimeType(contentMimeType); switch (contentType) { case EpubContentType.XHTML_1_1: case EpubContentType.CSS: case EpubContentType.OEB1_DOCUMENT: case EpubContentType.OEB1_CSS: case EpubContentType.XHTML_1_1XML: case EpubContentType.DTBOOK: case EpubContentType.DTBOOK_NCX: EpubTextContentFile epubTextContentFile = new EpubTextContentFile { FileName = fileName, ContentMimeType = contentMimeType, ContentType = contentType }; using (Stream contentStream = contentFileEntry.Open()) { if (contentStream == null) { throw new Exception(String.Format("Incorrect EPUB file: content file \"{0}\" specified in manifest is not found", fileName)); } using (StreamReader streamReader = new StreamReader(contentStream)) epubTextContentFile.Content = streamReader.ReadToEnd(); } switch (contentType) { case EpubContentType.XHTML_1_1: result.Html.Add(fileName, epubTextContentFile); break; case EpubContentType.CSS: result.Css.Add(fileName, epubTextContentFile); break; } //В данный момент в AllFiles контент не попадает, так как отсутствует конвертация из EpubTextContentFile в EpubContentFile, //а именно, нет конвертации из string в byte[] result.AllFiles.Add(fileName, epubTextContentFile); break; default: EpubByteContentFile epubByteContentFile = new EpubByteContentFile { FileName = fileName, ContentMimeType = contentMimeType, ContentType = contentType }; using (Stream contentStream = contentFileEntry.Open()) { if (contentStream == null) { throw new Exception(String.Format("Incorrect EPUB file: content file \"{0}\" specified in manifest is not found", fileName)); } using (MemoryStream memoryStream = new MemoryStream((int)contentFileEntry.Length)) { contentStream.CopyTo(memoryStream); epubByteContentFile.Content = memoryStream.ToArray(); } } switch (contentType) { case EpubContentType.IMAGE_GIF: case EpubContentType.IMAGE_JPEG: case EpubContentType.IMAGE_PNG: case EpubContentType.IMAGE_SVG: result.Images.Add(fileName, epubByteContentFile); break; case EpubContentType.FONT_TRUETYPE: case EpubContentType.FONT_OPENTYPE: result.Fonts.Add(fileName, epubByteContentFile); break; } result.AllFiles.Add(fileName, epubByteContentFile); break; } } return(result); }
private async void Button_Click(object sender, RoutedEventArgs e) { //Init epub object. //bool fileExist = await EpubReader.DoesFileExistAsync(Windows.ApplicationModel.Package.Current.InstalledLocation, "test.epub"); //if (!fileExist) // throw new Exception(string.Format("File test.epub not found, bitch")); progressbar.Text = "Загрузка книги"; await progressbar.ShowAsync(); //bookLoadingProgressBar.Visibility = Visibility.Visible; // Opening a book currentEpubBook = await EpubReader.OpenBookAsync("test.epub"); if (currentEpubBook != null) { loadEbookButton.Content = "Loaded"; loadEbookButton.IsEnabled = false; } //// COMMON PROPERTIES //// Book's title //string title = currentEpubBook.Title; //// Book's authors (comma separated list) //string author = currentEpubBook.Author; //// Book's authors (list of authors names) //List<string> authors = currentEpubBook.AuthorList; //// Book's cover image (null if there are no cover) //BitmapImage coverImage = currentEpubBook.CoverImage; //// ShowCoverImage(coverImage); //Only for testing purposes // CONTENT // Book's content (HTML files, style-sheets, images, fonts, etc.) EpubContent bookContent = currentEpubBook.Content; // IMAGES // All images in the book (file name is the key) //Dictionary<string, EpubByteContentFile> images = bookContent.Images; //EpubByteContentFile firstImage = images.Values.First(); //// Content type (e.g. EpubContentType.IMAGE_JPEG, EpubContentType.IMAGE_PNG) //EpubContentType contentType = firstImage.ContentType; //// MIME type (e.g. "image/jpeg", "image/png") //string mimeContentType = firstImage.ContentMimeType; // HTML & CSS // All XHTML files in the book (file name is the key) Dictionary <string, EpubTextContentFile> htmlFiles = bookContent.Html; // All CSS files in the book (file name is the key) Dictionary <string, EpubTextContentFile> cssFiles = bookContent.Css; // All CSS content in the book //foreach (EpubTextContentFile cssFile in cssFiles.Values) //{ // string cssContent = cssFile.Content; //} // OTHER CONTENT // All fonts in the book (file name is the key) // Dictionary<string, EpubByteContentFile> fonts = bookContent.Fonts; // All files in the book (including HTML, CSS, images, fonts, and other types of files) //TO-DO looks like this dictionary not working well at the moment, have to trace //Dictionary<string, EpubContentFile> allFiles = bookContent.AllFiles; //To-DO: //Определить первый файл в книге - через spine или через guide //Отслеживать клики по экрану и по краям экрана - чтобы листать вперед и назад. //Отслеживать, когда на экране последняя column из файла и нужно подгружать следующую await progressbar.HideAsync(); progressbar.Text = "Форматирование"; await progressbar.ShowAsync(); // Entire HTML content of the book should be injected in case we are showing chapter by chapter, and not pretending to load the whole set of chapters //foreach (KeyValuePair<string, EpubTextContentFile> htmlItem in htmlFiles) //{ // string injectedItem = WebViewHelpers.injectMonocle(htmlItem.Value.Content, // (int)bookReaderWebViewControl.ActualWidth, (int)bookReaderWebViewControl.ActualHeight); // htmlItem.Value.Content = injectedItem; //} IndexFileSceleton index = new IndexFileSceleton(); index.author = currentEpubBook.Author; index.title = currentEpubBook.Title; index.height = (int)bookReaderWebViewControl.ActualHeight; index.chapters = currentEpubBook.Chapters; index.xhtmlFiles = currentEpubBook.Content.Html; // --- Streaming HTML+JS content directly from the memory --- //Uri url = bookReaderWebViewControl.BuildLocalStreamUri("MemoryTag", "section4.xhtml"); CreateIndex(); Uri url = bookReaderWebViewControl.BuildLocalStreamUri("MemoryTag", "index.html"); bookReaderWebViewControl.NavigateToLocalStreamUri(url, myMemoryResolver); //Now we could have a look at the chapters list chaptersMenuButton.IsEnabled = true; await progressbar.HideAsync(); //bookLoadingProgressBar.Visibility = Visibility.Collapsed; // ACCESSING RAW SCHEMA INFORMATION //// EPUB OPF data //EpubPackage package = epubBook.Schema.Package; //// Enumerating book's contributors //foreach (EpubMetadataContributor contributor in package.Metadata.Contributors) //{ // string contributorName = contributor.Contributor; // string contributorRole = contributor.Role; //} //// EPUB NCX data //EpubNavigation navigation = epubBook.Schema.Navigation; //// Enumerating NCX metadata //foreach (EpubNavigationHeadMeta meta in navigation.Head) //{ // string metadataItemName = meta.Name; // string metadataItemContent = meta.Content; //} }
public (HtmlToFlowDocument.Dom.FlowDocument Document, Dictionary <string, string> FontDictionary) OpenEbook(string fileName) { var epubBook = EpubReader.ReadBook(fileName); _bookContent = epubBook.Content; Dictionary <string, EpubTextContentFile> htmlFiles = _bookContent.Html; Dictionary <string, EpubTextContentFile> cssFiles = _bookContent.Css; var readingOrder = epubBook.ReadingOrder; // ----------------- handle fonts ------------------------------ var fontDictionary = new Dictionary <string, string>(); // Key is the font name, value is the absolute path to the font file var fontPath = Path.Combine(_instanceStorageService.InstanceStoragePath, "Fonts"); Directory.CreateDirectory(fontPath); foreach (var entry in _bookContent.Fonts) { var fontName = entry.Key; var bytes = entry.Value; var fontFileName = Path.GetFileName(entry.Value.FileName); fontFileName = Path.Combine(fontPath, fontFileName); using (var stream = new FileStream(fontFileName, FileMode.Create, FileAccess.Write, FileShare.None)) { var byteArray = bytes.Content; stream.Write(byteArray, 0, byteArray.Length); } fontDictionary.Add(fontName, fontFileName); } // ------------------------------------------------------------- string GetStyleSheet(string name, string htmlFileNameReferencedFrom) { EpubTextContentFile cssFile; // calculate absolute name with reference to htmlFileNameReferencedFrom var absoluteName = HtmlToFlowDocument.CssStylesheets.GetAbsoluteFileNameForFileRelativeToHtmlFile(name, htmlFileNameReferencedFrom); if (cssFiles.TryGetValue(absoluteName, out cssFile)) { return(cssFile.Content); } // if this could not resolve the name, then try to go to parent directories while (htmlFileNameReferencedFrom.Contains("/")) { var idx = htmlFileNameReferencedFrom.LastIndexOf("/"); htmlFileNameReferencedFrom = htmlFileNameReferencedFrom.Substring(0, idx - 1); absoluteName = HtmlToFlowDocument.CssStylesheets.GetAbsoluteFileNameForFileRelativeToHtmlFile(name, htmlFileNameReferencedFrom); if (cssFiles.TryGetValue(absoluteName, out cssFile)) { return(cssFile.Content); } } // if this was not successful, then try it with the name alone if (cssFiles.TryGetValue(name, out cssFile)) { return(cssFile.Content); } return(null); // throw new ArgumentException($"CssFile {name} was not found!", nameof(name)); } // Entire HTML content of the book var converter = new HtmlToFlowDocument.Converter() { AttachSourceAsTags = true }; var flowDocument = new HtmlToFlowDocument.Dom.FlowDocument(); foreach (EpubTextContentFile htmlFile in readingOrder) { string htmlContent = htmlFile.Content; var textElement = converter.ConvertXHtml(htmlContent, false, GetStyleSheet, htmlFile.FileName); // create sections flowDocument.AppendChild(textElement); // and add them to the flow document } Settings.BookSettings.BookFileName = fileName; return(flowDocument, fontDictionary); }
public static void GetDataAsync(List <FileInfo> files) { List <string> lines = new List <string>(); foreach (FileInfo fi in files) { if (!fi.Extension.ToLower().Equals(".epub")) { continue; } if (FinishedBooks.Contains(fi.Name)) { continue; } try { EpubBook epubBook = EpubReader.ReadBook(fi.FullName); string title = epubBook.Title; string author = epubBook.Author; List <string> authors = epubBook.AuthorList; EpubContent bookContent = epubBook.Content; Dictionary <string, EpubTextContentFile> htmlFiles = bookContent.Html; foreach (EpubTextContentFile htmlFile in htmlFiles.Values) { MatchCollection mc = new Regex(">(.*?)<").Matches(htmlFile.Content); foreach (Match m in mc) { if (m.Groups.Count != 2) { continue; } string value = m.Groups[1].Value.Replace(" ", string.Empty).Trim(); string[] vsplit = value.Split(' '); if (string.IsNullOrEmpty(value) || value.Length <= 3 || lines.Contains(value) || vsplit.Length < 2 || char.IsLower(value[0])) { continue; } if (!value.Contains('.')) { if (!value.Contains('?')) { if (!value.Contains('!')) { continue; } } } if (!char.IsLetter(value[0])) { if (value[0] != '-') { continue; } } lines.Add(Program.CyrilicCharConvertor.Transform(value)); } } string Content = ""; foreach (string value in lines) { Content += ' ' + value; } AnylizeText(Content); Console.WriteLine($"[SUCC] - {title}"); } catch (Exception e) { Console.WriteLine($"[ERR] - {fi.Name}"); } finally { FinishedBooks.Add(fi.Name); File.AppendAllText(ROOT + @"\finished.txt", fi.Name + Environment.NewLine); } } }