示例#1
0
        /// <summary>
        /// Creating index.html needed to create a JS+HTML book layout from all files included in the book.
        /// After this we could use index.html as an entry point for rendering the book in WebView
        /// </summary>
        private void CreateIndex()
        {
            IndexFileMonocleGenerator _index  = new IndexFileMonocleGenerator(currentEpubBook.Title, currentEpubBook.Author, 530);
            EpubTextContentFile       _result = _index.CreateIndex(currentEpubBook.Chapters, currentEpubBook.Content.Html);

            try
            {
                currentEpubBook.Content.Html.Add("index.html", _result);
            }
            catch (ArgumentException)
            {
                currentEpubBook.Content.Html["index.html"] = _result;
                Debug.WriteLine($"\n---------------------------\nAn element with Key = \"index.html\" UPDATED.\n");
            }
        }
        private static void PrintTextContentFile(EpubTextContentFile textContentFile)
        {
            HtmlDocument htmlDocument = new HtmlDocument();

            htmlDocument.LoadHtml(textContentFile.Content);
            StringBuilder sb = new StringBuilder();

            foreach (HtmlNode node in htmlDocument.DocumentNode.SelectNodes("//text()"))
            {
                sb.AppendLine(node.InnerText.Trim());
            }
            string contentText = sb.ToString();

            Console.WriteLine(contentText);
            Console.WriteLine();
        }
示例#3
0
 private int ImportChapter(IElement parentFolder, EpubBook book, EpubTextContentFile text, string htmlFolder, string imageFolder)
 {
     try
     {
         string html = text.Content;
         var    refs = CreateChapterReference(book, text.Content);
         html = UpdateLocalLinks(html, htmlFolder, imageFolder);
         var chapterId = CreateSMTopic(html, refs, parentFolder);
         var chapterEl = Svc.SM.Registry.Element[chapterId];
         return(chapterId);
     }
     catch (Exception ex)
     {
         LogTo.Debug($"Failed to import chapter with exception {ex}");
         return(-1);
     }
 }
        private static async Task <Dictionary <string, EpubTextContentFile> > ReadTextContentFiles(Dictionary <string, EpubTextContentFileRef> textContentFileRefs)
        {
            Dictionary <string, EpubTextContentFile> result = new Dictionary <string, EpubTextContentFile>();

            foreach (KeyValuePair <string, EpubTextContentFileRef> textContentFileRef in textContentFileRefs)
            {
                EpubTextContentFile textContentFile = new EpubTextContentFile
                {
                    FileName        = textContentFileRef.Value.FileName,
                    ContentType     = textContentFileRef.Value.ContentType,
                    ContentMimeType = textContentFileRef.Value.ContentMimeType
                };
                textContentFile.Content = await textContentFileRef.Value.ReadContentAsTextAsync().ConfigureAwait(false);

                result.Add(textContentFileRef.Key, textContentFile);
            }
            return(result);
        }
        public EpubTextContentFile CreateIndex(List <EpubChapter> chapters, Dictionary <string, EpubTextContentFile> xhtmlFiles)
        {
            //reading content to string
            var _indexFileContent = _templateContent;

            string componentsTagString   = "getComponents:";
            string contentsTagString     = "getContents:";
            int    indexOfComponentsTag  = _indexFileContent.IndexOf(componentsTagString);
            int    lengthOfComponentsTag = componentsTagString.Length;

            string componentInjectonPrefix  = " function() {\nreturn [";
            string componentInjectonPostfix = "];\n},\n";
            string allComponents            = BuildAllXHTMLFilesToString(xhtmlFiles);

            //Injecting after the <head>
            _indexFileContent = _indexFileContent.Insert(indexOfComponentsTag + lengthOfComponentsTag,
                                                         componentInjectonPrefix + allComponents + componentInjectonPostfix);

            int    indexOfContentTag = _indexFileContent.IndexOf(contentsTagString);
            string allContent        = BuildChapterListToString(chapters);

            _indexFileContent = _indexFileContent.Insert(indexOfContentTag + contentsTagString.Length,
                                                         componentInjectonPrefix + allContent + componentInjectonPostfix);
            //Adding a new item to the HTML list of book files
            EpubTextContentFile _indexMemoryItem = new EpubTextContentFile()
            {
                Content         = _indexFileContent,
                FileName        = "index.html",
                ContentType     = EpubContentType.XHTML_1_1,
                ContentMimeType = "text/html"
            };

            //currentEpubBook.Content.Html.Add("index.html", _indexMemoryItem);
            Debug.WriteLine(string.Format("-------- Here comes the final result:------- \n{0}\n ------------------------------", _indexFileContent));
            return(_indexMemoryItem);
        }
示例#6
0
        public async Task ProcessMessageAsync(SQSEvent.SQSMessage message)
        {
            ParseEpubContract parseEpubContract =
                JsonConvert.DeserializeObject <ParseEpubContract>(message.Body);

            string bucketName = Environment.GetEnvironmentVariable("ALEXA_READER_BUCKET");
            string filePath   = $"{parseEpubContract.FolderName}/{parseEpubContract.FileName}";

            Stream fileStream = AwsService.S3.GetObject(filePath, bucketName);

            // Opens a book and reads all of its content into memory
            EpubBook epubBook = EpubReader.ReadBook(fileStream);

            Book book = new Book();

            book.Uuid         = Guid.NewGuid().ToString();
            book.Title        = epubBook.Title;
            book.Chapters     = new List <Chapter>();
            book.Owner        = parseEpubContract.User;
            book.EpubFilePath = filePath;

            List <string> chaptersToSkip = new List <string>
            {
                "index",
                "preface",
                "glossary",
                "quick glossary"
            };

            List <EpubNavigationItem> validChapters = epubBook.Navigation
                                                      .Where(c => !chaptersToSkip.Contains(c.Title.ToLower()))
                                                      .ToList();

            List <ConvertTextToSpeechContract> convertTextToSpeechContracts =
                new List <ConvertTextToSpeechContract>();

            foreach (EpubNavigationItem epubChapter in validChapters)
            {
                Chapter chapter = new Chapter();
                chapter.Uuid        = Guid.NewGuid().ToString();
                chapter.Title       = epubChapter.Title;
                chapter.Subchapters = new List <Subchapter>();

                // Nested chapters
                List <EpubNavigationItem> subChapters = epubChapter.NestedItems;

                foreach (var subChapter in subChapters)
                {
                    EpubTextContentFile content = subChapter.HtmlContentFile;

                    string stripped = StripHTML(content.Content);

                    string id            = Guid.NewGuid().ToString();
                    string audioFilePath = $"{parseEpubContract.FolderName}/{id}.mp3";

                    chapter.Subchapters.Add(new Subchapter
                    {
                        AudioFilePath       = audioFilePath,
                        Uuid                = id,
                        CurrentTimePosition = 0
                    });

                    convertTextToSpeechContracts.Add(new ConvertTextToSpeechContract
                    {
                        TextContent         = stripped,
                        AudioFilePathToSave = audioFilePath,
                        Owner = parseEpubContract.User
                    });
                }

                chapter.CurrentSubchapterId = chapter.Subchapters.FirstOrDefault()?.Uuid;
                book.Chapters.Add(chapter);
            }

            book.CurrentChapterId = book.Chapters.FirstOrDefault()?.Uuid;

            string queueUrl       = Environment.GetEnvironmentVariable("CONVERSION_QUEUE_URL");
            string messageGroupId = Guid.NewGuid().ToString();

            List <SendMessageBatchRequestEntry> messages = new List <SendMessageBatchRequestEntry>();

            Action <ConvertTextToSpeechContract> addMessageToSend = (contract) =>
            {
                string messageBody            = JsonConvert.SerializeObject(contract);
                string messageDeduplicationId = Guid.NewGuid().ToString();

                messages.Add(new SendMessageBatchRequestEntry
                {
                    Id                     = messageDeduplicationId,
                    MessageBody            = messageBody,
                    MessageGroupId         = messageGroupId,
                    MessageDeduplicationId = messageDeduplicationId
                });
            };

            convertTextToSpeechContracts
            .Take(convertTextToSpeechContracts.Count - 1)
            .ToList()
            .ForEach(contract => addMessageToSend(contract));

            ConvertTextToSpeechContract last = convertTextToSpeechContracts.Last();

            last.NotifyOwner = true;
            addMessageToSend(last);

            List <List <SendMessageBatchRequestEntry> > messageGroups = SplitList(messages, 10).ToList();

            messageGroups.ForEach(messageGroup =>
            {
                AwsService.SQS.SendMessageBatch(messageGroup, queueUrl);
            });

            var synteshisRequest = new SynthesizeSpeechRequest
            {
                Engine       = Engine.Neural,
                OutputFormat = "mp3",
                //SampleRate = "8000",
                Text         = txtContent,
                TextType     = "text",
                VoiceId      = VoiceId.Joanna,
                LanguageCode = LanguageCode.EnUS
            };

            var client = new AmazonPollyClient(RegionEndpoint.USEast1);

            var task = client.SynthesizeSpeechAsync(synteshisRequest);

            task.Wait();
            var response = task.Result;

            //Console.WriteLine($"Synthetized {response.RequestCharacters} caracthers");

            //// COMMON PROPERTIES

            //// Book's title
            //string title = epubBook.Title;

            //// Book's authors (comma separated list)
            //string author = epubBook.Author;

            //// Book's authors (list of authors names)
            //List<string> authors = epubBook.AuthorList;

            //// Book's cover image (null if there is no cover)
            //byte[] coverImageContent = epubBook.CoverImage;
            //if (coverImageContent != null)
            //{
            //    using (MemoryStream coverImageStream = new MemoryStream(coverImageContent))
            //    {
            //        Image coverImage = Image.FromStream(coverImageStream);
            //    }
            //}

            //// TABLE OF CONTENTS

            //// Enumerating chapters
            //foreach (EpubNavigationItem chapter in epubBook.Navigation)
            //{
            //    // Title of chapter
            //    string chapterTitle = chapter.Title;

            //    // Nested chapters
            //    List<EpubNavigationItem> subChapters = chapter.NestedItems;
            //}

            //// READING ORDER

            //// Enumerating the whole text content of the book in the order of reading
            //foreach (EpubTextContentFile textContentFile in book.ReadingOrder)
            //{
            //    // HTML of current text content file
            //    string htmlContent = textContentFile.Content;
            //}


            //// CONTENT

            //// Book's content (HTML files, stlylesheets, images, fonts, etc.)
            //EpubContent bookContent = epubBook.Content;


            //// IMAGES

            //// All images in the book (file name is the key)
            //Dictionary<string, EpubByteContentFile> images = bookContent.Images;

            //EpubByteContentFile firstImage = images.Values.First();

            //// Content type (e.g. EpubContentType.IMAGE_JPEG, EpubContentType.IMAGE_PNG)
            //EpubContentType contentType = firstImage.ContentType;

            //// MIME type (e.g. "image/jpeg", "image/png")
            //string mimeType = firstImage.ContentMimeType;

            //// Creating Image class instance from the content
            //using (MemoryStream imageStream = new MemoryStream(firstImage.Content))
            //{
            //    Image image = Image.FromStream(imageStream);
            //}

            //// Cover metadata
            //if (bookContent.Cover != null)
            //{
            //    string coverFileName = bookContent.Cover.FileName;
            //    EpubContentType coverContentType = bookContent.Cover.ContentType;
            //    string coverMimeType = bookContent.Cover.ContentMimeType;
            //}

            //// HTML & CSS

            //// All XHTML files in the book (file name is the key)
            //Dictionary<string, EpubTextContentFile> htmlFiles = bookContent.Html;

            //// All CSS files in the book (file name is the key)
            //Dictionary<string, EpubTextContentFile> cssFiles = bookContent.Css;

            //// Entire HTML content of the book
            //foreach (EpubTextContentFile htmlFile in htmlFiles.Values)
            //{
            //    string htmlContent = htmlFile.Content;
            //}

            //// All CSS content in the book
            //foreach (EpubTextContentFile cssFile in cssFiles.Values)
            //{
            //    string cssContent = cssFile.Content;
            //}


            //// OTHER CONTENT

            //// All fonts in the book (file name is the key)
            //Dictionary<string, EpubByteContentFile> fonts = bookContent.Fonts;

            //// All files in the book (including HTML, CSS, images, fonts, and other types of files)
            //Dictionary<string, EpubContentFile> allFiles = bookContent.AllFiles;


            //// ACCESSING RAW SCHEMA INFORMATION

            //// EPUB OPF data
            //EpubPackage package = epubBook.Schema.Package;

            //// Enumerating book's contributors
            //foreach (EpubMetadataContributor contributor in package.Metadata.Contributors)
            //{
            //    string contributorName = contributor.Contributor;
            //    string contributorRole = contributor.Role;
            //}

            //// EPUB 2 NCX data
            //Epub2Ncx epub2Ncx = epubBook.Schema.Epub2Ncx;

            //// Enumerating EPUB 2 NCX metadata
            //foreach (Epub2NcxHeadMeta meta in epub2Ncx.Head)
            //{
            //    string metadataItemName = meta.Name;
            //    string metadataItemContent = meta.Content;
            //}

            //// EPUB 3 navigation
            //Epub3NavDocument epub3NavDocument = epubBook.Schema.Epub3NavDocument;

            //// Accessing structural semantics data of the head item
            //StructuralSemanticsProperty? ssp = epub3NavDocument.Navs.First().Type;
        }
示例#7
0
        /// <summary>
        /// Reading all E-Book files to memory structure EpubContent
        /// </summary>
        /// <param name="epubArchive"></param>
        /// <param name="book"></param>
        /// <returns></returns>
        public static EpubContent ReadContentFilesToMemory(ZipArchive epubArchive, EpubBook book)
        {
            EpubContent result = new EpubContent
            {
                Html     = new Dictionary <string, EpubTextContentFile>(),
                Css      = new Dictionary <string, EpubTextContentFile>(),
                Images   = new Dictionary <string, EpubByteContentFile>(),
                Fonts    = new Dictionary <string, EpubByteContentFile>(),
                AllFiles = new Dictionary <string, EpubContentFile>()
            };

            //double progress = 20;
            //double increment = (double)80 / book.Schema.Package.Manifest.Count;
            foreach (EpubManifestItem manifestItem in book.Schema.Package.Manifest)
            {
                string contentFilePath = ZipPathUtils.Combine(book.Schema.ContentDirectoryPath, manifestItem.Href);

                ZipArchiveEntry contentFileEntry = epubArchive.GetEntry(contentFilePath);
                if (contentFileEntry == null)
                {
                    throw new Exception(String.Format("EPUB parsing error: file {0} not found in archive.", contentFilePath));
                }
                if (contentFileEntry.Length > Int32.MaxValue)
                {
                    throw new Exception(String.Format("EPUB parsing error: file {0} is bigger than 2 Gb.", contentFilePath));
                }
                string          fileName        = manifestItem.Href;
                string          contentMimeType = manifestItem.MediaType;
                EpubContentType contentType     = GetContentTypeByContentMimeType(contentMimeType);
                switch (contentType)
                {
                case EpubContentType.XHTML_1_1:
                case EpubContentType.CSS:
                case EpubContentType.OEB1_DOCUMENT:
                case EpubContentType.OEB1_CSS:
                case EpubContentType.XHTML_1_1XML:
                case EpubContentType.DTBOOK:
                case EpubContentType.DTBOOK_NCX:
                    EpubTextContentFile epubTextContentFile = new EpubTextContentFile
                    {
                        FileName        = fileName,
                        ContentMimeType = contentMimeType,
                        ContentType     = contentType
                    };
                    using (Stream contentStream = contentFileEntry.Open())
                    {
                        if (contentStream == null)
                        {
                            throw new Exception(String.Format("Incorrect EPUB file: content file \"{0}\" specified in manifest is not found", fileName));
                        }
                        using (StreamReader streamReader = new StreamReader(contentStream))
                            epubTextContentFile.Content = streamReader.ReadToEnd();
                    }
                    switch (contentType)
                    {
                    case EpubContentType.XHTML_1_1:
                        result.Html.Add(fileName, epubTextContentFile);
                        break;

                    case EpubContentType.CSS:
                        result.Css.Add(fileName, epubTextContentFile);
                        break;
                    }
                    //В данный момент в AllFiles контент не попадает, так как отсутствует конвертация из EpubTextContentFile в EpubContentFile,
                    //а именно, нет конвертации из string в byte[]
                    result.AllFiles.Add(fileName, epubTextContentFile);
                    break;

                default:
                    EpubByteContentFile epubByteContentFile = new EpubByteContentFile
                    {
                        FileName        = fileName,
                        ContentMimeType = contentMimeType,
                        ContentType     = contentType
                    };
                    using (Stream contentStream = contentFileEntry.Open())
                    {
                        if (contentStream == null)
                        {
                            throw new Exception(String.Format("Incorrect EPUB file: content file \"{0}\" specified in manifest is not found", fileName));
                        }
                        using (MemoryStream memoryStream = new MemoryStream((int)contentFileEntry.Length))
                        {
                            contentStream.CopyTo(memoryStream);
                            epubByteContentFile.Content = memoryStream.ToArray();
                        }
                    }
                    switch (contentType)
                    {
                    case EpubContentType.IMAGE_GIF:
                    case EpubContentType.IMAGE_JPEG:
                    case EpubContentType.IMAGE_PNG:
                    case EpubContentType.IMAGE_SVG:
                        result.Images.Add(fileName, epubByteContentFile);
                        break;

                    case EpubContentType.FONT_TRUETYPE:
                    case EpubContentType.FONT_OPENTYPE:
                        result.Fonts.Add(fileName, epubByteContentFile);
                        break;
                    }
                    result.AllFiles.Add(fileName, epubByteContentFile);
                    break;
                }
            }
            return(result);
        }