private PdfOCRResponse HandleOCRPDF(PDFApi pdfApiInstance, PDFOCRActionConfiguration actionConfiguration, FileToProcess fileToProcess, string fileID, int workerNumber) { // First get the number of page of the PDF PdfGetInfoResponse getInfoResponse = PassportPDFRequestsUtilities.SendGetInfoRequest(pdfApiInstance, new PdfGetInfoParameters(fileID), workerNumber, fileToProcess.FileAbsolutePath, FileOperationStartEventHandler);// todo: use appropriate event handler if (getInfoResponse.Error != null) { return(null); } PdfOCRParameters ocrParameters = PassportPDFParametersUtilities.GetOCRParameters(actionConfiguration, fileID); int pageCount = getInfoResponse.PageCount; int chunkLength = Math.Min(getInfoResponse.PageCount, FrameworkGlobals.PAGE_CHUNK_LENGTH_FOR_OCR_ACTION); int chunkCount = getInfoResponse.PageCount > FrameworkGlobals.PAGE_CHUNK_LENGTH_FOR_OCR_ACTION ? (int)Math.Ceiling((double)getInfoResponse.PageCount / FrameworkGlobals.PAGE_CHUNK_LENGTH_FOR_OCR_ACTION) : 1; PdfOCRResponse ocrResponse = null; for (int chunkNumber = 1; chunkNumber <= chunkCount; chunkNumber++) { ocrParameters.PageRange = PassportPDFParametersUtilities.GetChunkProcessingPageRange(pageCount, chunkLength, chunkNumber, chunkCount); ocrResponse = PassportPDFRequestsUtilities.SendOCRRequest(pdfApiInstance, ocrParameters, workerNumber, fileToProcess.FileAbsolutePath, ocrParameters.PageRange, pageCount, FileChunkProcessingProgressEventHandler); if (_cancellationPending || ocrResponse == null) { return(ocrResponse); } } return(ocrResponse); }
public static OperationsWorkflow CreatePDFOCRWorkflow(PDFOCRActionConfiguration ocrActionConfiguration) { List <Operation> actionsToBePerformed = new List <Operation> { new Operation(Operation.OperationType.LoadPDF, PdfVersion.PdfVersionRetainExisting), new Operation(Operation.OperationType.OCRPDF, ocrActionConfiguration) }; return(new OperationsWorkflow(actionsToBePerformed, OperationsWorkflow.SaveOperationType.SavePDF)); }
public static void ParseCommandLineArgs(string[] args, ApplicationConfiguration applicationConfiguration, PDFReduceActionConfiguration reduceActionConfiguration = null, PDFOCRActionConfiguration ocrActionConfiguration = null, ImageSaveAsPDFMRCActionConfiguration imageSaveAsPdfMrcActionConfiguration = null, DocumentLoadAsPDFActionConfiguration loadAsPdfActionConfiguration = null) { for (int index = 0; index < args.Length; index++) { if (!ParseApplicationConfigurationArgument(args, index, applicationConfiguration)) { if (reduceActionConfiguration != null) { ParseReduceActionConfigurationArgument(args, index, reduceActionConfiguration); } if (ocrActionConfiguration != null) { ParseOCRActionConfigurationArgument(args, index, ocrActionConfiguration); } if (imageSaveAsPdfMrcActionConfiguration != null) { ParseSaveImageAsPdfMrcConfigurationArgument(args, index, imageSaveAsPdfMrcActionConfiguration); } if (loadAsPdfActionConfiguration != null) { ParseDocumentLoadAsPdfConfigurationArgument(args, index, loadAsPdfActionConfiguration); } } } }
private WorkflowProcessingResult ProcessWorkflow(PDFApi pdfApiInstance, ImageApi imageApiInstance, OperationsWorkflow workflow, FileToProcess fileToProcess, int workerNumber) { List <string> warningMessages = new List <string>(); bool contentRemoved = false; bool versionChanged = false; bool linearized = false; string fileID = null; foreach (Operation operation in workflow.OperationsToBePerformed) { Error actionError = null; ReduceErrorInfo reduceErrorInfo = null; long remainingTokens = 0; if (_cancellationPending) { return(null); } switch (operation.Type) { case Operation.OperationType.LoadPDF: PdfVersion outputVersion = (PdfVersion)operation.Parameters; PdfLoadDocumentResponse loadDocumentResponse = HandleLoadPDF(pdfApiInstance, outputVersion, fileToProcess, workerNumber); if (loadDocumentResponse == null) { OnError(LogMessagesUtils.ReplaceMessageSequencesAndReferences(FrameworkGlobals.MessagesLocalizer.GetString("message_invalid_response_received", FrameworkGlobals.ApplicationLanguage), actionName: "Load")); return(null); } remainingTokens = loadDocumentResponse.RemainingTokens; actionError = loadDocumentResponse.Error; fileID = loadDocumentResponse.FileId; break; case Operation.OperationType.LoadImage: ImageLoadResponse imageLoadResponse = HandleLoadImage(imageApiInstance, fileToProcess, workerNumber); if (imageLoadResponse == null) { OnError(LogMessagesUtils.ReplaceMessageSequencesAndReferences(FrameworkGlobals.MessagesLocalizer.GetString("message_invalid_response_received", FrameworkGlobals.ApplicationLanguage), actionName: "Load")); return(null); } remainingTokens = imageLoadResponse.RemainingTokens; actionError = imageLoadResponse.Error; fileID = imageLoadResponse.FileId; break; case Operation.OperationType.ReducePDF: PDFReduceActionConfiguration reduceActionConfiguration = (PDFReduceActionConfiguration)operation.Parameters; PdfReduceResponse reduceResponse = HandleReducePDF(pdfApiInstance, reduceActionConfiguration, fileToProcess, fileID, workerNumber, warningMessages); if (reduceResponse == null) { OnError(LogMessagesUtils.ReplaceMessageSequencesAndReferences(FrameworkGlobals.MessagesLocalizer.GetString("message_invalid_response_received", FrameworkGlobals.ApplicationLanguage), actionName: "Reduce")); return(null); } remainingTokens = reduceResponse.RemainingTokens; contentRemoved = reduceResponse.ContentRemoved; versionChanged = reduceResponse.VersionChanged; actionError = reduceResponse.Error; reduceErrorInfo = reduceResponse.ErrorInfo; linearized = reduceActionConfiguration.FastWebView; break; case Operation.OperationType.OCRPDF: PDFOCRActionConfiguration ocrActionConfiguration = (PDFOCRActionConfiguration)operation.Parameters; PdfOCRResponse ocrResponse = HandleOCRPDF(pdfApiInstance, ocrActionConfiguration, fileToProcess, fileID, workerNumber); if (ocrResponse == null) { OnError(LogMessagesUtils.ReplaceMessageSequencesAndReferences(FrameworkGlobals.MessagesLocalizer.GetString("message_invalid_response_received", FrameworkGlobals.ApplicationLanguage), actionName: "OCR")); return(null); } remainingTokens = ocrResponse.RemainingTokens; actionError = ocrResponse.Error; break; } if (actionError != null) { string errorMessage = reduceErrorInfo != null && reduceErrorInfo.ErrorCode != ReduceErrorCode.OK ? ErrorManager.GetMessageFromReduceActionError(reduceErrorInfo, fileToProcess.FileAbsolutePath) : ErrorManager.GetMessageFromPassportPDFError(actionError, operation.Type, fileToProcess.FileAbsolutePath); OnError(errorMessage); return(null); } else { RemainingTokensUpdateEventHandler.Invoke(remainingTokens); } } return(new WorkflowProcessingResult(contentRemoved, versionChanged, linearized, fileID, warningMessages)); }
private static void ParseOCRActionConfigurationArgument(string[] args, int index, PDFOCRActionConfiguration ocrActionConfiguration) { switch (args[index].ToUpper()) { case "/L": //language is the next parameter if (args.Length > index + 1) { ocrActionConfiguration.OCRLanguage = args[index + 1].Trim(); } break; case "/SKIPPAGESWITHTEXT": ocrActionConfiguration.SkipPagesWithText = true; break; } }