public static void Main(string[] args) { // Extract pdf content ContentExtract.ExtractPdf( "/Users/ryan/RiderProjects/Sharpen Pdf Parser/Sharpen Pdf Parser/Resources/affordances.pdf", Path.Combine(Path.GetTempPath(), "test.pdf")); BookMetrics book = ContentExtract.Book; book.ToJson("/Users/ryan/RiderProjects/Sharpen Pdf Parser/test.json"); BookMetrics bookTest = BookMetrics.FromJson("/Users/ryan/RiderProjects/Sharpen Pdf Parser/test.json"); // Remove text from pdf GhostScript.RemoveText("/Users/ryan/Documents/Books/affordances.pdf"); // Convert page to svg Poppler.PdfToSvg("/Users/ryan/Documents/Books/affordances.pdf", "1"); // Preparing SVG for unity - temporary until they fix issues Svg.RenderEmptyPathsExplicit("/Users/ryan/RiderProjects/Sharpen Pdf Parser/Sharpen Pdf Parser/Resources/test.svg"); Svg.RenderRgbAsHex("/Users/ryan/RiderProjects/Sharpen Pdf Parser/Sharpen Pdf Parser/Resources/test.svg"); Svg.WriteSvg("/Users/ryan/Documents/Books/parsed2.svg"); }
public static void ExtractPdf(string input, string output) { PdfDocument pdfDoc = new PdfDocument( new PdfReader(input), new PdfWriter(output)); EncodingProvider codePages = CodePagesEncodingProvider.Instance; Encoding.RegisterProvider(codePages); pdfDoc.GetNumberOfPages(); book = new BookMetrics(); book.Author = pdfDoc.GetDocumentInfo().GetAuthor(); book.Title = pdfDoc.GetDocumentInfo().GetTitle(); book.Publisher = pdfDoc.GetDocumentInfo().GetProducer(); int nbPages = pdfDoc.GetNumberOfPages(); for (int i = 0; i < nbPages; i++) { Rectangle size = pdfDoc.GetPage(i + 1).GetPageSize(); PageMetrics page = new PageMetrics(i + 1); page.Width = size.GetWidth(); page.Height = size.GetHeight(); page.Rotation = pdfDoc.GetPage(i + 1).GetRotation(); DeepExtractionStrategy strategy = new DeepExtractionStrategy(ref page); Console.WriteLine("Processing page {0}", i + 1); PdfTextExtractor.GetTextFromPage(pdfDoc.GetPage(i + 1), strategy); page.SortLines(); page.WordMetrices = WordMetrics.FromChars(strategy.CharMetrices); page.CharMetrices = strategy.CharMetrices; book.AddPage(page); } pdfDoc.Close(); }