Пример #1
0
        /// <summary>
        /// Clones the book instance by writing and reading it from memory.
        /// </summary>
        /// <param name="book"></param>
        /// <returns></returns>
        public static EpubBook MakeCopy(EpubBook book)
        {
            var stream = new MemoryStream();
            var writer = new EpubWriter(book);

            writer.Write(stream);
            stream.Seek(0, SeekOrigin.Begin);
            var epub = EpubReader.Read(stream, string.Empty);

            return(epub);
        }
Пример #2
0
 private static IEnumerable <Tuple <string, int> > CountHtmlWords(string fileName, string password)
 {
     using (var zipFile = ZipFile.Read(fileName))
     {
         foreach (var entry in zipFile.Entries)
         {
             var fullFileName = !entry.FileName.StartsWith("/", StringComparison.CurrentCulture) ? "/" + entry.FileName : entry.FileName;
             if (txtExtensions.Any(extention => fullFileName.EndsWith(extention, StringComparison.CurrentCulture)))
             {
                 using (var outputStream = EpubReader.GetMemoryStream(entry, password))
                 {
                     var output = outputStream.ReadToEnd();
                     var text   = Encoding.UTF8.GetString(output, 0, output.Length);
                     var parts  = text.Split(' ', ';', '\r', '\n', '\t', ',', '.', '!', '?');
                     yield return(new Tuple <string, int>(fullFileName, parts.Length));
                 }
             }
         }
     }
 }
Пример #3
0
        public static void Main()
        {
            using (var reader = new PdfReader(new FileStream(@"C:\Users\d1mne\source\repos\CSharpPlayground\BookParsing\3.pdf", FileMode.Open)))
            {
                var text = new StringBuilder();

                for (var i = 3; i < reader.NumberOfPages - 1; i++)
                {
                    text.Append(PdfTextExtractor.GetTextFromPage(reader, i));
                }

                var dumm = text.ToString();
            }


            var fileStream = new FileStream(@"C:\Users\d1mne\source\repos\CSharpPlayground\BookParsing\1.epub", FileMode.Open);
            var dummy      = ReadFully(fileStream);
            var epub       = EpubReader.Read(fileStream, false);

            var plainText = epub.PayloadToPlainText(new List <string>
            {
                "cover",
                "title",
                "copyright",
                "table-of-content",
                "table_of_content",
                "kolofon",
                "titel"
            },
                                                    new List <string>
            {
                "-1.xhtml",
                "-2.xhtml",
                "-3.xhtml",
                "-4.xhtml",
                "-5.xhtml",
                "-6.xhtml"
            });
        }