/// <summary> /// Creating index.html needed to create a JS+HTML book layout from all files included in the book. /// After this we could use index.html as an entry point for rendering the book in WebView /// </summary> private void CreateIndex() { IndexFileMonocleGenerator _index = new IndexFileMonocleGenerator(currentEpubBook.Title, currentEpubBook.Author, 530); EpubTextContentFile _result = _index.CreateIndex(currentEpubBook.Chapters, currentEpubBook.Content.Html); try { currentEpubBook.Content.Html.Add("index.html", _result); } catch (ArgumentException) { currentEpubBook.Content.Html["index.html"] = _result; Debug.WriteLine($"\n---------------------------\nAn element with Key = \"index.html\" UPDATED.\n"); } }
private static void PrintTextContentFile(EpubTextContentFile textContentFile) { HtmlDocument htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(textContentFile.Content); StringBuilder sb = new StringBuilder(); foreach (HtmlNode node in htmlDocument.DocumentNode.SelectNodes("//text()")) { sb.AppendLine(node.InnerText.Trim()); } string contentText = sb.ToString(); Console.WriteLine(contentText); Console.WriteLine(); }
private int ImportChapter(IElement parentFolder, EpubBook book, EpubTextContentFile text, string htmlFolder, string imageFolder) { try { string html = text.Content; var refs = CreateChapterReference(book, text.Content); html = UpdateLocalLinks(html, htmlFolder, imageFolder); var chapterId = CreateSMTopic(html, refs, parentFolder); var chapterEl = Svc.SM.Registry.Element[chapterId]; return(chapterId); } catch (Exception ex) { LogTo.Debug($"Failed to import chapter with exception {ex}"); return(-1); } }
private static async Task <Dictionary <string, EpubTextContentFile> > ReadTextContentFiles(Dictionary <string, EpubTextContentFileRef> textContentFileRefs) { Dictionary <string, EpubTextContentFile> result = new Dictionary <string, EpubTextContentFile>(); foreach (KeyValuePair <string, EpubTextContentFileRef> textContentFileRef in textContentFileRefs) { EpubTextContentFile textContentFile = new EpubTextContentFile { FileName = textContentFileRef.Value.FileName, ContentType = textContentFileRef.Value.ContentType, ContentMimeType = textContentFileRef.Value.ContentMimeType }; textContentFile.Content = await textContentFileRef.Value.ReadContentAsTextAsync().ConfigureAwait(false); result.Add(textContentFileRef.Key, textContentFile); } return(result); }
public EpubTextContentFile CreateIndex(List <EpubChapter> chapters, Dictionary <string, EpubTextContentFile> xhtmlFiles) { //reading content to string var _indexFileContent = _templateContent; string componentsTagString = "getComponents:"; string contentsTagString = "getContents:"; int indexOfComponentsTag = _indexFileContent.IndexOf(componentsTagString); int lengthOfComponentsTag = componentsTagString.Length; string componentInjectonPrefix = " function() {\nreturn ["; string componentInjectonPostfix = "];\n},\n"; string allComponents = BuildAllXHTMLFilesToString(xhtmlFiles); //Injecting after the <head> _indexFileContent = _indexFileContent.Insert(indexOfComponentsTag + lengthOfComponentsTag, componentInjectonPrefix + allComponents + componentInjectonPostfix); int indexOfContentTag = _indexFileContent.IndexOf(contentsTagString); string allContent = BuildChapterListToString(chapters); _indexFileContent = _indexFileContent.Insert(indexOfContentTag + contentsTagString.Length, componentInjectonPrefix + allContent + componentInjectonPostfix); //Adding a new item to the HTML list of book files EpubTextContentFile _indexMemoryItem = new EpubTextContentFile() { Content = _indexFileContent, FileName = "index.html", ContentType = EpubContentType.XHTML_1_1, ContentMimeType = "text/html" }; //currentEpubBook.Content.Html.Add("index.html", _indexMemoryItem); Debug.WriteLine(string.Format("-------- Here comes the final result:------- \n{0}\n ------------------------------", _indexFileContent)); return(_indexMemoryItem); }
public async Task ProcessMessageAsync(SQSEvent.SQSMessage message) { ParseEpubContract parseEpubContract = JsonConvert.DeserializeObject <ParseEpubContract>(message.Body); string bucketName = Environment.GetEnvironmentVariable("ALEXA_READER_BUCKET"); string filePath = $"{parseEpubContract.FolderName}/{parseEpubContract.FileName}"; Stream fileStream = AwsService.S3.GetObject(filePath, bucketName); // Opens a book and reads all of its content into memory EpubBook epubBook = EpubReader.ReadBook(fileStream); Book book = new Book(); book.Uuid = Guid.NewGuid().ToString(); book.Title = epubBook.Title; book.Chapters = new List <Chapter>(); book.Owner = parseEpubContract.User; book.EpubFilePath = filePath; List <string> chaptersToSkip = new List <string> { "index", "preface", "glossary", "quick glossary" }; List <EpubNavigationItem> validChapters = epubBook.Navigation .Where(c => !chaptersToSkip.Contains(c.Title.ToLower())) .ToList(); List <ConvertTextToSpeechContract> convertTextToSpeechContracts = new List <ConvertTextToSpeechContract>(); foreach (EpubNavigationItem epubChapter in validChapters) { Chapter chapter = new Chapter(); chapter.Uuid = Guid.NewGuid().ToString(); chapter.Title = epubChapter.Title; chapter.Subchapters = new List <Subchapter>(); // Nested chapters List <EpubNavigationItem> subChapters = epubChapter.NestedItems; foreach (var subChapter in subChapters) { EpubTextContentFile content = subChapter.HtmlContentFile; string stripped = StripHTML(content.Content); string id = Guid.NewGuid().ToString(); string audioFilePath = $"{parseEpubContract.FolderName}/{id}.mp3"; chapter.Subchapters.Add(new Subchapter { AudioFilePath = audioFilePath, Uuid = id, CurrentTimePosition = 0 }); convertTextToSpeechContracts.Add(new ConvertTextToSpeechContract { TextContent = stripped, AudioFilePathToSave = audioFilePath, Owner = parseEpubContract.User }); } chapter.CurrentSubchapterId = chapter.Subchapters.FirstOrDefault()?.Uuid; book.Chapters.Add(chapter); } book.CurrentChapterId = book.Chapters.FirstOrDefault()?.Uuid; string queueUrl = Environment.GetEnvironmentVariable("CONVERSION_QUEUE_URL"); string messageGroupId = Guid.NewGuid().ToString(); List <SendMessageBatchRequestEntry> messages = new List <SendMessageBatchRequestEntry>(); Action <ConvertTextToSpeechContract> addMessageToSend = (contract) => { string messageBody = JsonConvert.SerializeObject(contract); string messageDeduplicationId = Guid.NewGuid().ToString(); messages.Add(new SendMessageBatchRequestEntry { Id = messageDeduplicationId, MessageBody = messageBody, MessageGroupId = messageGroupId, MessageDeduplicationId = messageDeduplicationId }); }; convertTextToSpeechContracts .Take(convertTextToSpeechContracts.Count - 1) .ToList() .ForEach(contract => addMessageToSend(contract)); ConvertTextToSpeechContract last = convertTextToSpeechContracts.Last(); last.NotifyOwner = true; addMessageToSend(last); List <List <SendMessageBatchRequestEntry> > messageGroups = SplitList(messages, 10).ToList(); messageGroups.ForEach(messageGroup => { AwsService.SQS.SendMessageBatch(messageGroup, queueUrl); }); var synteshisRequest = new SynthesizeSpeechRequest { Engine = Engine.Neural, OutputFormat = "mp3", //SampleRate = "8000", Text = txtContent, TextType = "text", VoiceId = VoiceId.Joanna, LanguageCode = LanguageCode.EnUS }; var client = new AmazonPollyClient(RegionEndpoint.USEast1); var task = client.SynthesizeSpeechAsync(synteshisRequest); task.Wait(); var response = task.Result; //Console.WriteLine($"Synthetized {response.RequestCharacters} caracthers"); //// COMMON PROPERTIES //// Book's title //string title = epubBook.Title; //// Book's authors (comma separated list) //string author = epubBook.Author; //// Book's authors (list of authors names) //List<string> authors = epubBook.AuthorList; //// Book's cover image (null if there is no cover) //byte[] coverImageContent = epubBook.CoverImage; //if (coverImageContent != null) //{ // using (MemoryStream coverImageStream = new MemoryStream(coverImageContent)) // { // Image coverImage = Image.FromStream(coverImageStream); // } //} //// TABLE OF CONTENTS //// Enumerating chapters //foreach (EpubNavigationItem chapter in epubBook.Navigation) //{ // // Title of chapter // string chapterTitle = chapter.Title; // // Nested chapters // List<EpubNavigationItem> subChapters = chapter.NestedItems; //} //// READING ORDER //// Enumerating the whole text content of the book in the order of reading //foreach (EpubTextContentFile textContentFile in book.ReadingOrder) //{ // // HTML of current text content file // string htmlContent = textContentFile.Content; //} //// CONTENT //// Book's content (HTML files, stlylesheets, images, fonts, etc.) //EpubContent bookContent = epubBook.Content; //// IMAGES //// All images in the book (file name is the key) //Dictionary<string, EpubByteContentFile> images = bookContent.Images; //EpubByteContentFile firstImage = images.Values.First(); //// Content type (e.g. EpubContentType.IMAGE_JPEG, EpubContentType.IMAGE_PNG) //EpubContentType contentType = firstImage.ContentType; //// MIME type (e.g. "image/jpeg", "image/png") //string mimeType = firstImage.ContentMimeType; //// Creating Image class instance from the content //using (MemoryStream imageStream = new MemoryStream(firstImage.Content)) //{ // Image image = Image.FromStream(imageStream); //} //// Cover metadata //if (bookContent.Cover != null) //{ // string coverFileName = bookContent.Cover.FileName; // EpubContentType coverContentType = bookContent.Cover.ContentType; // string coverMimeType = bookContent.Cover.ContentMimeType; //} //// HTML & CSS //// All XHTML files in the book (file name is the key) //Dictionary<string, EpubTextContentFile> htmlFiles = bookContent.Html; //// All CSS files in the book (file name is the key) //Dictionary<string, EpubTextContentFile> cssFiles = bookContent.Css; //// Entire HTML content of the book //foreach (EpubTextContentFile htmlFile in htmlFiles.Values) //{ // string htmlContent = htmlFile.Content; //} //// All CSS content in the book //foreach (EpubTextContentFile cssFile in cssFiles.Values) //{ // string cssContent = cssFile.Content; //} //// OTHER CONTENT //// All fonts in the book (file name is the key) //Dictionary<string, EpubByteContentFile> fonts = bookContent.Fonts; //// All files in the book (including HTML, CSS, images, fonts, and other types of files) //Dictionary<string, EpubContentFile> allFiles = bookContent.AllFiles; //// ACCESSING RAW SCHEMA INFORMATION //// EPUB OPF data //EpubPackage package = epubBook.Schema.Package; //// Enumerating book's contributors //foreach (EpubMetadataContributor contributor in package.Metadata.Contributors) //{ // string contributorName = contributor.Contributor; // string contributorRole = contributor.Role; //} //// EPUB 2 NCX data //Epub2Ncx epub2Ncx = epubBook.Schema.Epub2Ncx; //// Enumerating EPUB 2 NCX metadata //foreach (Epub2NcxHeadMeta meta in epub2Ncx.Head) //{ // string metadataItemName = meta.Name; // string metadataItemContent = meta.Content; //} //// EPUB 3 navigation //Epub3NavDocument epub3NavDocument = epubBook.Schema.Epub3NavDocument; //// Accessing structural semantics data of the head item //StructuralSemanticsProperty? ssp = epub3NavDocument.Navs.First().Type; }
/// <summary> /// Reading all E-Book files to memory structure EpubContent /// </summary> /// <param name="epubArchive"></param> /// <param name="book"></param> /// <returns></returns> public static EpubContent ReadContentFilesToMemory(ZipArchive epubArchive, EpubBook book) { EpubContent result = new EpubContent { Html = new Dictionary <string, EpubTextContentFile>(), Css = new Dictionary <string, EpubTextContentFile>(), Images = new Dictionary <string, EpubByteContentFile>(), Fonts = new Dictionary <string, EpubByteContentFile>(), AllFiles = new Dictionary <string, EpubContentFile>() }; //double progress = 20; //double increment = (double)80 / book.Schema.Package.Manifest.Count; foreach (EpubManifestItem manifestItem in book.Schema.Package.Manifest) { string contentFilePath = ZipPathUtils.Combine(book.Schema.ContentDirectoryPath, manifestItem.Href); ZipArchiveEntry contentFileEntry = epubArchive.GetEntry(contentFilePath); if (contentFileEntry == null) { throw new Exception(String.Format("EPUB parsing error: file {0} not found in archive.", contentFilePath)); } if (contentFileEntry.Length > Int32.MaxValue) { throw new Exception(String.Format("EPUB parsing error: file {0} is bigger than 2 Gb.", contentFilePath)); } string fileName = manifestItem.Href; string contentMimeType = manifestItem.MediaType; EpubContentType contentType = GetContentTypeByContentMimeType(contentMimeType); switch (contentType) { case EpubContentType.XHTML_1_1: case EpubContentType.CSS: case EpubContentType.OEB1_DOCUMENT: case EpubContentType.OEB1_CSS: case EpubContentType.XHTML_1_1XML: case EpubContentType.DTBOOK: case EpubContentType.DTBOOK_NCX: EpubTextContentFile epubTextContentFile = new EpubTextContentFile { FileName = fileName, ContentMimeType = contentMimeType, ContentType = contentType }; using (Stream contentStream = contentFileEntry.Open()) { if (contentStream == null) { throw new Exception(String.Format("Incorrect EPUB file: content file \"{0}\" specified in manifest is not found", fileName)); } using (StreamReader streamReader = new StreamReader(contentStream)) epubTextContentFile.Content = streamReader.ReadToEnd(); } switch (contentType) { case EpubContentType.XHTML_1_1: result.Html.Add(fileName, epubTextContentFile); break; case EpubContentType.CSS: result.Css.Add(fileName, epubTextContentFile); break; } //В данный момент в AllFiles контент не попадает, так как отсутствует конвертация из EpubTextContentFile в EpubContentFile, //а именно, нет конвертации из string в byte[] result.AllFiles.Add(fileName, epubTextContentFile); break; default: EpubByteContentFile epubByteContentFile = new EpubByteContentFile { FileName = fileName, ContentMimeType = contentMimeType, ContentType = contentType }; using (Stream contentStream = contentFileEntry.Open()) { if (contentStream == null) { throw new Exception(String.Format("Incorrect EPUB file: content file \"{0}\" specified in manifest is not found", fileName)); } using (MemoryStream memoryStream = new MemoryStream((int)contentFileEntry.Length)) { contentStream.CopyTo(memoryStream); epubByteContentFile.Content = memoryStream.ToArray(); } } switch (contentType) { case EpubContentType.IMAGE_GIF: case EpubContentType.IMAGE_JPEG: case EpubContentType.IMAGE_PNG: case EpubContentType.IMAGE_SVG: result.Images.Add(fileName, epubByteContentFile); break; case EpubContentType.FONT_TRUETYPE: case EpubContentType.FONT_OPENTYPE: result.Fonts.Add(fileName, epubByteContentFile); break; } result.AllFiles.Add(fileName, epubByteContentFile); break; } } return(result); }