/// <summary> /// Add text annotation to pdf /// </summary> /// <param name="pdf">Pdf to add annotation to</param> /// <param name="textAnnotation">Annotation object</param> /// <returns>Pdf containing the annotation</returns> public byte[] AddAnnotation(byte[] pdf, TextAnnotation textAnnotation) { using (var pdfInstance = GdPictureHelper.GetPDFInstance()) { pdfInstance.LoadFromStream(new MemoryStream(pdf)); if (textAnnotation.Page == 0) { textAnnotation.Page++; } pdfInstance.SelectPage(textAnnotation.Page); using (var annotationManager = new AnnotationManager()) { annotationManager.InitFromGdPicturePDF(pdfInstance); var annotation = annotationManager.AddTextAnnot(textAnnotation.Left, textAnnotation.Top, textAnnotation.Width, textAnnotation.Height, textAnnotation.Text); annotation.FontSize = textAnnotation.FontSize; annotation.FontName = textAnnotation.FontName; annotation.ForeColor = textAnnotation.ForeColor; annotationManager.BurnAnnotationsToPage(false); using (var memoryStream = new MemoryStream()) { annotationManager.SaveDocumentToPDF(memoryStream); memoryStream.Position = 0; return(memoryStream.ToArray()); } } } }
/// <summary> /// Match the blob /// </summary> /// <param name="configuration">ADR configuration</param> /// <param name="blob">Blob to match</param> /// <returns>Best match if exists, else null</returns> public ADRResult Match(byte[] configuration, byte[] blob) { ADRResult result = null; var tempPathId = Guid.NewGuid(); var maxIdentityValue = 0.0; using (var gdPictureImaging = GdPictureHelper.GetImagingInstance()) { var path = $"{temporaryDataDirectory}\\ADR\\"; var fullPath = $"{path}{tempPathId}.dat"; DirectoryHelper.CreateDirectoryIfNotExists(path); File.WriteAllBytes(fullPath, configuration); gdPictureImaging.ADRLoadTemplateConfig(fullPath); int imageId = gdPictureImaging.CreateGdPictureImageFromByteArray(blob); if (imageId != 0) { try { int closerTemplateID = gdPictureImaging.ADRGetCloserTemplateForGdPictureImage(imageId); if (closerTemplateID != 0) { var tag = gdPictureImaging.ADRGetTemplateTag(closerTemplateID); var accuracy = Math.Round(gdPictureImaging.ADRGetLastConfidence(), 2); if (accuracy > maxIdentityValue) { result = new ADRResult() { Tag = tag, Accuracy = accuracy }; maxIdentityValue = accuracy; } } } finally { gdPictureImaging.ReleaseGdPictureImage(imageId); } // Remove path if (File.Exists(fullPath)) { File.Delete(fullPath); } } } return(result); }
/// <summary> /// Search text within a pdf /// </summary> /// <param name="pdf">Pdf as byte array</param> /// <param name="searchTexts">Search texts</param> /// <param name="caseSensitive">Gets or sets whether the search process is case sensitive</param> /// <returns>Search result list</returns> public IList <PdfSearchResult> Search(byte[] pdf, IList <string> searchTexts, bool caseSensitive = true) { searchTexts = searchTexts?.Where(x => !string.IsNullOrWhiteSpace(x))?.ToList() ?? new List <string>(); if (!searchTexts.Any()) { return(new List <PdfSearchResult>()); } var result = new List <PdfSearchResult>(); using (var stream = new MemoryStream(pdf)) { using (var pdfInstance = GdPictureHelper.GetPDFInstance()) { pdfInstance.LoadFromStream(stream); for (int i = 1; i <= pdfInstance.GetPageCount(); i++) { if (pdfInstance.SelectPage(i) == GdPictureStatus.OK) { var text = pdfInstance.GetPageText(); foreach (var searchText in searchTexts) { var match = false; if (caseSensitive) { match = text.Contains(searchText); } else { match = text.ToLower().Contains(searchText.ToLower()); } if (match) { result.Add(new PdfSearchResult { PageNumber = i, SearchText = searchText }); } } } } } } return(result); }
/// <summary> /// Add a layer to a pdf and return it /// </summary> /// <param name="pdf">Pdf to add the layer to</param> /// <param name="layer">Layer file as byte arrya</param> /// <param name="mode">Add mode (first page, each page, ...)</param> /// <returns>Pdf with layer</returns> public byte[] MergeLayer(byte[] pdf, byte[] layer, PdfLayerMode mode) { using (var templateStream = new MemoryStream(layer)) { using (var templatePdf = GdPictureHelper.GetPDFInstance()) { templatePdf.LoadFromStream(templateStream); templatePdf.SelectPage(1); using (var sourceStream = new MemoryStream(pdf)) { using (var sourcePdf = GdPictureHelper.GetPDFInstance()) { using (var targetPdf = GdPictureHelper.GetPDFInstance()) { targetPdf.NewPDF(); sourcePdf.LoadFromStream(sourceStream); for (int i = 1; i <= sourcePdf.GetPageCount(); i++) { if (sourcePdf.SelectPage(i) == GdPictureStatus.OK) { targetPdf.ClonePage(sourcePdf, i); targetPdf.SelectPage(i); if (mode == PdfLayerMode.AllPages || (mode == PdfLayerMode.FirstPage && i == 1)) { targetPdf.DrawPage(templatePdf, 1, 0, 0, templatePdf.GetPageWidth(), templatePdf.GetPageHeight()); } } } using (var targetStream = new MemoryStream()) { targetPdf.SaveToStream(targetStream); targetStream.Position = 0; return(targetStream.ToArray()); } } } } } } }
/// <summary> /// Merge pdfs and return a single pdf file /// </summary> /// <param name="pdfs">List of pdfs</param> /// <param name="options">Pdf merge options</param> /// <returns>Merged pdf</returns> public byte[] Merge(IList <byte[]> pdfs, MergeOption options = null) { if (!pdfs.Any()) { throw new InvalidOperationException("Could not merge 0 pdfs."); } if (pdfs.Count == 1) { return(pdfs.First()); } if (options == null) { options = new MergeOption(); } var pdfObjects = new List <GdPicturePDF>(); foreach (var pdf in pdfs) { var sourcePdf = GdPictureHelper.GetPDFInstance(); sourcePdf.LoadFromStream(new MemoryStream(pdf)); pdfObjects.Add(sourcePdf); } using (var targetPdf = GdPictureHelper.GetPDFInstance()) { var pdf = targetPdf.MergeDocuments(pdfObjects.ToArray()); using (var stream = new MemoryStream()) { pdf.SaveToStream(stream); // Dispose existing PDFs foreach (var _pdf in pdfObjects) { _pdf.Dispose(); } return(stream.ToArray()); } } }
/// <summary> /// Convert pdf to tiff /// </summary> /// <param name="data">Pdf as byte array (blob)</param> /// <returns>Tiff as byte array (blob)</returns> public byte[] Convert(byte[] data) { //We assume that GdPicture has been correctly installed and unlocked. using (var converter = GdPictureHelper.GetGdPictureDocumentConverterInstance()) { using (var inStream = new MemoryStream(data)) { GdPictureStatus status = converter.LoadFromStream(inStream, GdPicture14.DocumentFormat.DocumentFormatPDF); if (status == GdPictureStatus.OK) { using (var stream = new MemoryStream()) { status = converter.SaveAsTIFF(stream, TiffCompression.TiffCompressionAUTO); return(stream.ToArray()); } } } } return(null); }
/// <summary> /// Load image /// </summary> /// <param name="image">Image object</param> /// <returns>True if loading was successfull</returns> public bool LoadImage(byte[] image) { Dispose(); imaging = GdPictureHelper.GetImagingInstance(); imageId = imaging.CreateGdPictureImageFromByteArray(image); isMultipageImage = imaging.TiffIsMultiPage(imageId); if (isMultipageImage) { PageCount = imaging.TiffGetPageCount(imageId); } else { PageCount = imaging.GetPageCount(imageId); } ImageHeight = imaging.GetHeight(imageId); return(imageId != 0); }
/// <summary> /// Create adr Configuration /// </summary> /// <param name="configuration">List of configuration objects</param> /// <returns>Configuration as byte-array</returns> public byte[] CreateConfiguration(IList <ADRConfiguration> configurations) { byte[] result = null; var tempPathId = Guid.NewGuid(); var path = $"{temporaryDataDirectory}\\ADR\\"; var fullPath = $"{path}{tempPathId}.dat"; DirectoryHelper.CreateDirectoryIfNotExists(path); using (var gdPictureImaging = GdPictureHelper.GetImagingInstance()) { // Add images foreach (var configuration in configurations) { int imageId = gdPictureImaging.CreateGdPictureImageFromByteArray(configuration.Blob); if (imageId != 0) { int templateId = gdPictureImaging.ADRCreateTemplateEmpty(); gdPictureImaging.ADRSetTemplateTag(templateId, configuration.Tag); gdPictureImaging.ADRAddGdPictureImageToTemplate(templateId, imageId); gdPictureImaging.ReleaseGdPictureImage(imageId); } } // Save file, get content and delete if (gdPictureImaging.ADRSaveTemplateConfig(fullPath)) { result = File.ReadAllBytes(fullPath); File.Delete(fullPath); } return(result); } }
/// <summary> /// Split pdf by page range /// </summary> /// <param name="image">Pdf to split</param> /// <param name="ranges">List of page ranges</param> /// <returns>List of splitted pdfs</returns> public IList <ImageSplitResult> Split(byte[] image, IList <PageNumberRange> ranges) { var result = new List <ImageSplitResult>(); using (var gdPictureImage = GdPictureHelper.GetImagingInstance()) { int imageId = gdPictureImage.CreateGdPictureImageFromByteArray(image); int singlePageImageId = 0; if (!gdPictureImage.TiffIsMultiPage(imageId)) { singlePageImageId = imageId; imageId = gdPictureImage.TiffCreateMultiPageFromGdPictureImage(imageId); } if (singlePageImageId != 0) { gdPictureImage.ReleaseGdPictureImage(singlePageImageId); } if (ranges == null || ranges.Count == 0) { return new List <ImageSplitResult> { } } ; foreach (var range in ranges.Where(x => x.PageCount > 0 && x.StartPageNumber + (x.PageCount - 1) <= gdPictureImage.TiffGetPageCount(imageId))) { var newImageId = gdPictureImage.TiffCreateMultiPageFromGdPictureImage(imageId); for (int i = 1; i <= gdPictureImage.TiffGetPageCount(newImageId); i++) { gdPictureImage.TiffDeletePage(newImageId, i); } for (int i = 0; i < range.PageCount; i++) { gdPictureImage.SelectPage(imageId, range.StartPageNumber + i); gdPictureImage.TiffAppendPageFromGdPictureImage(newImageId, imageId); } using (var targetStream = new MemoryStream()) { gdPictureImage.SaveAsStream(newImageId, targetStream, GdPicture14.DocumentFormat.DocumentFormatTIFF, 4); targetStream.Position = 0; result.Add(new ImageSplitResult { Image = targetStream.ToArray(), PageCount = range.PageCount, Barcode = range.Barcode, BarcodeType = range.BarcodeType }); gdPictureImage.ReleaseGdPictureImage(newImageId); } } gdPictureImage.ReleaseGdPictureImage(imageId); } return(result); } }
public OCRResult ExtractText(OCROption options) { using (var ocr = GdPictureHelper.GetOCRInstance()) { try { var result = new OCRResult(); var contentBuilder = new StringBuilder(); ocr.SetImage(imageId); ApplySettings(ocr, options ?? DefaultOptions); var pages = new List <int>(); if (options.Pages.Count == 0) { for (int i = 1; i <= PageCount; i++) { pages.Add(i); } } else { pages.AddRange(options.Pages); } foreach (var page in pages) { GdPictureStatus status; if (isMultipageImage) { status = imaging.TiffSelectPage(imageId, page); } else { status = imaging.SelectPage(imageId, page); } if (status != GdPictureStatus.OK) { result.ErrorOccured = true; result.ErrorMessages.Add($"Error during page selection. Page: {page}"); continue; } if (ocr.SetImage(imageId) != GdPictureStatus.OK) { result.ErrorOccured = true; result.ErrorMessages.Add($"Error during setting image. Page: {page}"); continue; } var resultId = ocr.RunOCR(); string text = ocr.GetOCRResultText(resultId); // Add result var regionResult = new OCRRegionResult { OptionName = options.OptionName, Height = options.Height, Left = options.Left, Page = page, Top = options.Top, Width = options.Width, Text = text }; result.RegionResults.Add(regionResult); contentBuilder.AppendLine(text); } result.Text = contentBuilder.ToString(); ocr.ReleaseOCRResults(); return(result); } catch (Exception ex) { ocr.ReleaseOCRResults(); return(new OCRResult { Text = "", ErrorMessages = new[] { ex.Message }, ErrorOccured = true }); } } }
/// <summary> /// Initialize service /// </summary> public PdfContentExtractionService() { pdfInstance = GdPictureHelper.GetPDFInstance(); }
public byte[] Convert(byte[] data, bool embeddOCRText = true, string language = "deu") { byte[] pdf = null; using (var pdfInstance = GdPictureHelper.GetPDFInstance()) { using (var gdPictureImaging = GdPictureHelper.GetImagingInstance()) { int imageId = gdPictureImaging.CreateGdPictureImageFromByteArray(data); if (gdPictureImaging.GetStat() == GdPictureStatus.OK) { float resolution = System.Math.Max(200, gdPictureImaging.GetVerticalResolution(imageId)); GdPictureStatus state = GdPictureStatus.OK; if (embeddOCRText) { state = pdfInstance.NewPDF(PdfConformance.PDF_A_1a); } else { state = pdfInstance.NewPDF(); } if (state == GdPictureStatus.OK) { for (int i = 1; i <= gdPictureImaging.GetPageCount(imageId); i++) { if (gdPictureImaging.SelectPage(imageId, i) == GdPictureStatus.OK) { var addImageResult = pdfInstance.AddImageFromGdPictureImage(imageId, false, true); } } // pdfInstance.OcrPages("*", 1, language, GdPictureHelper.OCRDirectory, "", resolution, 0, true); using (var stream = new MemoryStream()) { pdfInstance.SaveToStream(stream); stream.Position = 0; pdf = stream.ToArray(); } } else { throw new Exception($"Culd not convert document. State: {state}"); } // Release gdpicture image gdPictureImaging.ReleaseGdPictureImage(imageId); } else { throw new Exception("Could not create gdpicture imaging instance"); } // Close pdf document pdfInstance?.CloseDocument(); } } return(pdf); }
/// <summary> /// Execute barcode recognition /// </summary> /// <param name="blob">Blob to test</param> /// <param name="fileExtension">file extension</param> /// <returns>List of detected barcodes</returns> public IList <BarcodeRecognitionResult> Process(byte[] blob, string fileExtension) { var results = new List <BarcodeRecognitionResult>(); byte[] imgData = blob; if (fileExtension?.ToLower()?.Replace(".", "") == "pdf") { imgData = pdfToTiffService.Convert(blob); } using (var gdPictureImage = GdPictureHelper.GetImagingInstance()) { int imageID = 0; try { imageID = gdPictureImage.CreateGdPictureImageFromByteArray(imgData); gdPictureImage.TiffOpenMultiPageForWrite(true); if (!gdPictureImage.TiffIsMultiPage(imageID)) { int imagetmp = imageID; imageID = gdPictureImage.TiffCreateMultiPageFromGdPictureImage(imagetmp); gdPictureImage.ReleaseGdPictureImage(imagetmp); } int pageCount = gdPictureImage.TiffGetPageCount(imageID); for (int pageNumber = 1; pageNumber <= pageCount; pageNumber++) { gdPictureImage.SelectPage(imageID, pageNumber); if (gdPictureImage.BarcodeQRReaderDoScan(imageID, BarcodeQRReaderScanMode.BestQuality) == GdPictureStatus.OK) { int barcodeCount = gdPictureImage.BarcodeQRReaderGetBarcodeCount(); for (int i = 1; i <= barcodeCount; i++) { results.Add(new BarcodeRecognitionResult { Barcode = gdPictureImage.BarcodeQRReaderGetBarcodeValue(i), BarcodeType = "QR", Page = pageNumber }); } } } gdPictureImage.BarcodeQRReaderClear(); gdPictureImage.ReleaseGdPictureImage(imageID); } catch { if (imageID != 0) { gdPictureImage.BarcodeQRReaderClear(); gdPictureImage.ReleaseGdPictureImage(imageID); } throw; } } return(results); }
/// <summary> /// Execute barcode recognition /// </summary> /// <param name="blob">Blob to test</param> /// <param name="fileExtension">file extension</param> /// <param name="options">Settings/options</param> /// <returns>List of detected barcodes</returns> public IList <BarcodeRecognitionResult> Process(byte[] blob, string fileExtension, BarcodeRecognitionOption options) { var result = new List <BarcodeRecognitionResult>(); var barcodeTypes = Barcode1DReaderType.Barcode1DReaderCode128 | Barcode1DReaderType.Barcode1DReaderEAN13 | Barcode1DReaderType.Barcode1DReaderCode39; if (options.BarcodeTypes != null && options.BarcodeTypes.Any()) { barcodeTypes = Barcode1DReaderType.Barcode1DReaderNone; foreach (var type in options.BarcodeTypes) { if (string.IsNullOrWhiteSpace(type)) { continue; } barcodeTypes = barcodeTypes | (Barcode1DReaderType)Enum.Parse(typeof(Barcode1DReaderType), $"Barcode1DReader{type}", true); } } using (var gdPictureImage = GdPictureHelper.GetImagingInstance()) { if (fileExtension?.Replace(".", "")?.ToLower() == "pdf") { using (var stream = new MemoryStream(blob)) { using (var gdPicturePdf = GdPictureHelper.GetPDFInstance()) { stream.Position = 0; gdPicturePdf.LoadFromStream(stream); var pageCount = gdPicturePdf.GetPageCount(); for (int pageNumber = 1; pageNumber <= pageCount; pageNumber++) { gdPicturePdf.SelectPage(pageNumber); var imageID = gdPicturePdf.RenderPageToGdPictureImageEx(options.PdfToTiffDPI, true); if (options.ConvertToBlackAndWhite) { gdPictureImage.FxBlackNWhite(imageID, BitonalReduction.Stucki); gdPictureImage.ConvertTo1BppFast(imageID); } var status = gdPictureImage.Barcode1DReaderDoScan(imageID, Barcode1DReaderScanMode.BestQuality, barcodeTypes, false, 1); if (status == GdPictureStatus.OK) { var barcodeAmount = gdPictureImage.Barcode1DReaderGetBarcodeCount(); for (int i = 1; i <= barcodeAmount; i++) { result.Add(new BarcodeRecognitionResult { Barcode = gdPictureImage.Barcode1DReaderGetBarcodeValue(i), Page = pageNumber, BarcodeType = gdPictureImage.Barcode1DReaderGetBarcodeType(i).ToString() }); } gdPictureImage.Barcode1DReaderClear(); } gdPictureImage.ReleaseGdPictureImage(imageID); } gdPicturePdf.CloseDocument(); } } } else { int imageID = gdPictureImage.CreateGdPictureImageFromByteArray(blob); gdPictureImage.TiffOpenMultiPageForWrite(true); if (!gdPictureImage.TiffIsMultiPage(imageID)) { int imagetmp = imageID; imageID = gdPictureImage.TiffCreateMultiPageFromGdPictureImage(imagetmp); gdPictureImage.ReleaseGdPictureImage(imagetmp); } if (options.ConvertToBlackAndWhite) { gdPictureImage.FxBlackNWhite(imageID, BitonalReduction.Stucki); gdPictureImage.ConvertTo1BppFast(imageID); } int pageCount = gdPictureImage.TiffGetPageCount(imageID); for (int pageNumber = 1; pageNumber <= pageCount; pageNumber++) { gdPictureImage.TiffSelectPage(imageID, pageNumber); var status = gdPictureImage.Barcode1DReaderDoScan(imageID, Barcode1DReaderScanMode.BestQuality, barcodeTypes, false, 1); if (status == GdPictureStatus.OK) { var barcodeAmount = gdPictureImage.Barcode1DReaderGetBarcodeCount(); for (int i = 1; i <= barcodeAmount; i++) { result.Add(new BarcodeRecognitionResult { Barcode = gdPictureImage.Barcode1DReaderGetBarcodeValue(i), Page = pageNumber, BarcodeType = gdPictureImage.Barcode1DReaderGetBarcodeType(i).ToString() }); } gdPictureImage.Barcode1DReaderClear(); } } gdPictureImage.ReleaseGdPictureImage(imageID); } } return(result); }