/// <summary> /// Clones the book instance by writing and reading it from memory. /// </summary> /// <param name="book"></param> /// <returns></returns> public static EpubBook MakeCopy(EpubBook book) { var stream = new MemoryStream(); var writer = new EpubWriter(book); writer.Write(stream); stream.Seek(0, SeekOrigin.Begin); var epub = EpubReader.Read(stream, string.Empty); return(epub); }
private static IEnumerable <Tuple <string, int> > CountHtmlWords(string fileName, string password) { using (var zipFile = ZipFile.Read(fileName)) { foreach (var entry in zipFile.Entries) { var fullFileName = !entry.FileName.StartsWith("/", StringComparison.CurrentCulture) ? "/" + entry.FileName : entry.FileName; if (txtExtensions.Any(extention => fullFileName.EndsWith(extention, StringComparison.CurrentCulture))) { using (var outputStream = EpubReader.GetMemoryStream(entry, password)) { var output = outputStream.ReadToEnd(); var text = Encoding.UTF8.GetString(output, 0, output.Length); var parts = text.Split(' ', ';', '\r', '\n', '\t', ',', '.', '!', '?'); yield return(new Tuple <string, int>(fullFileName, parts.Length)); } } } } }
public static void Main() { using (var reader = new PdfReader(new FileStream(@"C:\Users\d1mne\source\repos\CSharpPlayground\BookParsing\3.pdf", FileMode.Open))) { var text = new StringBuilder(); for (var i = 3; i < reader.NumberOfPages - 1; i++) { text.Append(PdfTextExtractor.GetTextFromPage(reader, i)); } var dumm = text.ToString(); } var fileStream = new FileStream(@"C:\Users\d1mne\source\repos\CSharpPlayground\BookParsing\1.epub", FileMode.Open); var dummy = ReadFully(fileStream); var epub = EpubReader.Read(fileStream, false); var plainText = epub.PayloadToPlainText(new List <string> { "cover", "title", "copyright", "table-of-content", "table_of_content", "kolofon", "titel" }, new List <string> { "-1.xhtml", "-2.xhtml", "-3.xhtml", "-4.xhtml", "-5.xhtml", "-6.xhtml" }); }