static void Main() { //Initialise the MuPDF context. This is needed to open or create documents. using MuPDFContext ctx = new MuPDFContext(); //Open a PDF document using MuPDFDocument doc1 = new MuPDFDocument(ctx, "Document1.pdf"); //Save the page as a PNG image with transparency, at a 1x zoom level (1pt = 1px). doc1.SaveImage(0, 1, PixelFormats.RGBA, "Raster1.png", RasterOutputFileTypes.PNG); //Open an Open XPS document using MuPDFDocument doc2 = new MuPDFDocument(ctx, "Document2.oxps"); //Save only part of the page as a PNG image with transparency, at a 2x zoom level (1pt = 2px). doc2.SaveImage(0, new Rectangle(87, 360, 517, 790), 2, PixelFormats.RGBA, "Raster2.png", RasterOutputFileTypes.PNG); //Merge the two documents into a single document. MuPDFDocument.CreateDocument(ctx, "Merged.pdf", DocumentOutputFileTypes.PDF, true, //We take the full page from the first document (doc1.Pages[0], doc1.Pages[0].Bounds, 1), //We only take a region of the page from the second document (doc2.Pages[0], new Rectangle(87, 360, 517, 790), 1) ); }
static void Main() { //Initialise the MuPDF context. This is needed to open or create documents. using MuPDFContext ctx = new MuPDFContext(); //Open a PDF document using MuPDFDocument doc1 = new MuPDFDocument(ctx, "Document1.pdf"); //Save the page as a PNG image with transparency, at a 1x zoom level (1pt = 1px). doc1.SaveImage(0, 1, PixelFormats.RGBA, "Raster1.png", RasterOutputFileTypes.PNG); //Open an Open XPS document using MuPDFDocument doc2 = new MuPDFDocument(ctx, "Document2.oxps"); //Save only part of the page as a PNG image with transparency, at a 2x zoom level (1pt = 2px). doc2.SaveImage(0, new Rectangle(87, 360, 517, 790), 2, PixelFormats.RGBA, "Raster2.png", RasterOutputFileTypes.PNG); //Merge the two documents into a single document. MuPDFDocument.CreateDocument(ctx, "Merged.pdf", DocumentOutputFileTypes.PDF, true, //We take the full page from the first document (doc1.Pages[0], doc1.Pages[0].Bounds, 1), //We only take a region of the page from the second document (doc2.Pages[0], new Rectangle(87, 360, 517, 790), 1) ); //Open the rasterised page so that we can try using the OCR. using MuPDFDocument doc3 = new MuPDFDocument(ctx, "Raster1.png"); //Get a structured text representation of the page using OCR. MuPDFStructuredTextPage page = doc3.GetStructuredTextPage(0, new TesseractLanguage(TesseractLanguage.Fast.Eng)); //Print all the text lines. foreach (MuPDFStructuredTextBlock blk in page) { foreach (MuPDFStructuredTextLine line in blk) { System.Console.WriteLine(line.Text); } } }
/// <summary> /// Parses an image URI into a page. This is intended to replace the default image URI interpreter in <c>VectSharp.SVG.Parser.ParseImageURI</c>. To do this, use something like: /// <code>VectSharp.SVG.Parser.ParseImageURI = VectSharp.MuPDFUtils.ImageURIParser.Parser(VectSharp.SVG.Parser.ParseSVGURI);</code> /// </summary> /// <param name="parseSVG">A function to parse an SVG image uri into a page. You should pass <c>VectSharp.SVG.Parser.ParseSVGURI</c> as this argument.</param> /// <returns>A function to parse an image URI into a page.</returns> public static Func <string, bool, Page> Parser(Func <string, bool, Page> parseSVG) { return((string uri, bool interpolate) => { if (uri.StartsWith("data:")) { string mimeType = uri.Substring(uri.IndexOf(":") + 1, uri.IndexOf(";") - uri.IndexOf(":") - 1); string type = uri.Substring(uri.IndexOf(";") + 1, uri.IndexOf(",") - uri.IndexOf(";") - 1); if (mimeType != "image/svg+xml") { int offset = uri.IndexOf(",") + 1; byte[] parsed; bool isVector = false; InputFileTypes fileType; switch (mimeType) { case "image/png": fileType = InputFileTypes.PNG; break; case "image/jpeg": case "image/jpg": fileType = InputFileTypes.JPEG; break; case "image/gif": fileType = InputFileTypes.GIF; break; case "image/bmp": fileType = InputFileTypes.BMP; break; case "image/tiff": case "image/tif": fileType = InputFileTypes.TIFF; break; case "application/oxps": case "application/vnd.ms-xpsdocument": fileType = InputFileTypes.XPS; isVector = true; break; case "application/x-cbz": fileType = InputFileTypes.CBZ; break; case "application/epub+zip": fileType = InputFileTypes.EPUB; isVector = true; break; case "text/fb2+xml": fileType = InputFileTypes.FB2; break; case "image/x-portable-anymap": fileType = InputFileTypes.PNM; break; case "image/x-portable-arbitrarymap": fileType = InputFileTypes.PAM; break; case "application/pdf": fileType = InputFileTypes.PDF; isVector = true; break; default: fileType = InputFileTypes.PDF; break; } string substring = uri.Substring(offset); switch (type) { case "base64": parsed = Convert.FromBase64String(uri.Substring(offset)); break; case "": parsed = (from el in System.Web.HttpUtility.UrlDecode(uri.Substring(offset)) select(byte) el).ToArray(); break; default: throw new InvalidDataException("Unknown data stream type!"); } if (!isVector) { GCHandle handle = GCHandle.Alloc(parsed, GCHandleType.Pinned); RasterImageStream img = new RasterImageStream(handle.AddrOfPinnedObject(), parsed.Length, fileType, interpolate: interpolate); handle.Free(); Page pag = new Page(img.Width, img.Height); pag.Graphics.DrawRasterImage(0, 0, img); return pag; } else { string tempFile = Path.GetTempFileName(); using (MuPDFContext context = new MuPDFContext()) { using (MuPDFDocument document = new MuPDFDocument(context, parsed, fileType)) { MuPDFDocument.CreateDocument(context, tempFile, DocumentOutputFileTypes.SVG, true, document.Pages[0]); } } string tbr = "data:image/svg+xml;," + System.Web.HttpUtility.UrlEncode(File.ReadAllText(tempFile)); File.Delete(tempFile); return parseSVG(tbr, interpolate); } } else { return parseSVG(uri, interpolate); } } return null; }); }