public PdfDetails Parse(PdfDocument source) { var numberOfPages = source.GetNumberOfPages(); PdfDetails details = new PdfDetails { Pages = new PdfPageDetails[numberOfPages], }; for (int pageNumber = 1; pageNumber <= numberOfPages; pageNumber++) { Log.Info("parsing page number " + pageNumber); var page = source.GetPage(pageNumber); var pageParser = CreatePageParser(page, pageNumber); new PdfCanvasProcessor(pageParser, new Dictionary <string, IContentOperator>()) .ProcessPageContent(page); details.Pages[pageNumber - 1] = pageParser.CreatePageDetails(); } details.Fonts = details.Pages.SelectMany(p => p.Fonts).Distinct().ToList(); // foreach (var f in details.Fonts) // { // // Console.WriteLine(f.BasicFontFamily+"\t\t"+f.FontFamily); // } return(details); }
public string ConvertPdf(PdfDetails pdf) { var sb = new StringBuilder(); var fontRef = CreateFontRef(pdf.Fonts); var allLines = pdf.Pages.SelectMany(p => p.Lines); StartHtml(sb, Title(pdf), null, fontRef, allLines); foreach (var page in pdf.Pages) { AddPage(page, fontRef, sb); } EndHtml(sb); return(sb.ToString()); }
protected virtual string Title(PdfDetails pdf) { return("pdf converted to html"); }
public void SaveAsHtml(PdfDetails pdf, string path) { var content = ConvertPdf(pdf); File.WriteAllText(path, content); }