Example #1
0
        private PdfOCRResponse HandleOCRPDF(PDFApi pdfApiInstance, PDFOCRActionConfiguration actionConfiguration, FileToProcess fileToProcess, string fileID, int workerNumber)
        {
            // First get the number of page of the PDF
            PdfGetInfoResponse getInfoResponse = PassportPDFRequestsUtilities.SendGetInfoRequest(pdfApiInstance, new PdfGetInfoParameters(fileID), workerNumber, fileToProcess.FileAbsolutePath, FileOperationStartEventHandler);// todo: use appropriate event handler

            if (getInfoResponse.Error != null)
            {
                return(null);
            }

            PdfOCRParameters ocrParameters = PassportPDFParametersUtilities.GetOCRParameters(actionConfiguration, fileID);

            int pageCount   = getInfoResponse.PageCount;
            int chunkLength = Math.Min(getInfoResponse.PageCount, FrameworkGlobals.PAGE_CHUNK_LENGTH_FOR_OCR_ACTION);
            int chunkCount  = getInfoResponse.PageCount > FrameworkGlobals.PAGE_CHUNK_LENGTH_FOR_OCR_ACTION ? (int)Math.Ceiling((double)getInfoResponse.PageCount / FrameworkGlobals.PAGE_CHUNK_LENGTH_FOR_OCR_ACTION) : 1;

            PdfOCRResponse ocrResponse = null;

            for (int chunkNumber = 1; chunkNumber <= chunkCount; chunkNumber++)
            {
                ocrParameters.PageRange = PassportPDFParametersUtilities.GetChunkProcessingPageRange(pageCount, chunkLength, chunkNumber, chunkCount);

                ocrResponse = PassportPDFRequestsUtilities.SendOCRRequest(pdfApiInstance, ocrParameters, workerNumber, fileToProcess.FileAbsolutePath, ocrParameters.PageRange, pageCount, FileChunkProcessingProgressEventHandler);

                if (_cancellationPending || ocrResponse == null)
                {
                    return(ocrResponse);
                }
            }

            return(ocrResponse);
        }
        public static OperationsWorkflow CreatePDFOCRWorkflow(PDFOCRActionConfiguration ocrActionConfiguration)
        {
            List <Operation> actionsToBePerformed = new List <Operation>
            {
                new Operation(Operation.OperationType.LoadPDF, PdfVersion.PdfVersionRetainExisting),
                new Operation(Operation.OperationType.OCRPDF, ocrActionConfiguration)
            };

            return(new OperationsWorkflow(actionsToBePerformed, OperationsWorkflow.SaveOperationType.SavePDF));
        }
Example #3
0
 public static void ParseCommandLineArgs(string[] args, ApplicationConfiguration applicationConfiguration, PDFReduceActionConfiguration reduceActionConfiguration = null, PDFOCRActionConfiguration ocrActionConfiguration = null, ImageSaveAsPDFMRCActionConfiguration imageSaveAsPdfMrcActionConfiguration = null, DocumentLoadAsPDFActionConfiguration loadAsPdfActionConfiguration = null)
 {
     for (int index = 0; index < args.Length; index++)
     {
         if (!ParseApplicationConfigurationArgument(args, index, applicationConfiguration))
         {
             if (reduceActionConfiguration != null)
             {
                 ParseReduceActionConfigurationArgument(args, index, reduceActionConfiguration);
             }
             if (ocrActionConfiguration != null)
             {
                 ParseOCRActionConfigurationArgument(args, index, ocrActionConfiguration);
             }
             if (imageSaveAsPdfMrcActionConfiguration != null)
             {
                 ParseSaveImageAsPdfMrcConfigurationArgument(args, index, imageSaveAsPdfMrcActionConfiguration);
             }
             if (loadAsPdfActionConfiguration != null)
             {
                 ParseDocumentLoadAsPdfConfigurationArgument(args, index, loadAsPdfActionConfiguration);
             }
         }
     }
 }
Example #4
0
        private WorkflowProcessingResult ProcessWorkflow(PDFApi pdfApiInstance, ImageApi imageApiInstance, OperationsWorkflow workflow, FileToProcess fileToProcess, int workerNumber)
        {
            List <string> warningMessages = new List <string>();
            bool          contentRemoved  = false;
            bool          versionChanged  = false;
            bool          linearized      = false;
            string        fileID          = null;

            foreach (Operation operation in workflow.OperationsToBePerformed)
            {
                Error           actionError     = null;
                ReduceErrorInfo reduceErrorInfo = null;
                long            remainingTokens = 0;

                if (_cancellationPending)
                {
                    return(null);
                }

                switch (operation.Type)
                {
                case Operation.OperationType.LoadPDF:
                    PdfVersion outputVersion = (PdfVersion)operation.Parameters;
                    PdfLoadDocumentResponse loadDocumentResponse = HandleLoadPDF(pdfApiInstance, outputVersion, fileToProcess, workerNumber);
                    if (loadDocumentResponse == null)
                    {
                        OnError(LogMessagesUtils.ReplaceMessageSequencesAndReferences(FrameworkGlobals.MessagesLocalizer.GetString("message_invalid_response_received", FrameworkGlobals.ApplicationLanguage), actionName: "Load"));
                        return(null);
                    }
                    remainingTokens = loadDocumentResponse.RemainingTokens;
                    actionError     = loadDocumentResponse.Error;
                    fileID          = loadDocumentResponse.FileId;
                    break;

                case Operation.OperationType.LoadImage:
                    ImageLoadResponse imageLoadResponse = HandleLoadImage(imageApiInstance, fileToProcess, workerNumber);
                    if (imageLoadResponse == null)
                    {
                        OnError(LogMessagesUtils.ReplaceMessageSequencesAndReferences(FrameworkGlobals.MessagesLocalizer.GetString("message_invalid_response_received", FrameworkGlobals.ApplicationLanguage), actionName: "Load"));
                        return(null);
                    }
                    remainingTokens = imageLoadResponse.RemainingTokens;
                    actionError     = imageLoadResponse.Error;
                    fileID          = imageLoadResponse.FileId;
                    break;

                case Operation.OperationType.ReducePDF:
                    PDFReduceActionConfiguration reduceActionConfiguration = (PDFReduceActionConfiguration)operation.Parameters;
                    PdfReduceResponse            reduceResponse            = HandleReducePDF(pdfApiInstance, reduceActionConfiguration, fileToProcess, fileID, workerNumber, warningMessages);
                    if (reduceResponse == null)
                    {
                        OnError(LogMessagesUtils.ReplaceMessageSequencesAndReferences(FrameworkGlobals.MessagesLocalizer.GetString("message_invalid_response_received", FrameworkGlobals.ApplicationLanguage), actionName: "Reduce"));
                        return(null);
                    }
                    remainingTokens = reduceResponse.RemainingTokens;
                    contentRemoved  = reduceResponse.ContentRemoved;
                    versionChanged  = reduceResponse.VersionChanged;
                    actionError     = reduceResponse.Error;
                    reduceErrorInfo = reduceResponse.ErrorInfo;
                    linearized      = reduceActionConfiguration.FastWebView;
                    break;

                case Operation.OperationType.OCRPDF:
                    PDFOCRActionConfiguration ocrActionConfiguration = (PDFOCRActionConfiguration)operation.Parameters;
                    PdfOCRResponse            ocrResponse            = HandleOCRPDF(pdfApiInstance, ocrActionConfiguration, fileToProcess, fileID, workerNumber);
                    if (ocrResponse == null)
                    {
                        OnError(LogMessagesUtils.ReplaceMessageSequencesAndReferences(FrameworkGlobals.MessagesLocalizer.GetString("message_invalid_response_received", FrameworkGlobals.ApplicationLanguage), actionName: "OCR"));
                        return(null);
                    }
                    remainingTokens = ocrResponse.RemainingTokens;
                    actionError     = ocrResponse.Error;
                    break;
                }

                if (actionError != null)
                {
                    string errorMessage = reduceErrorInfo != null && reduceErrorInfo.ErrorCode != ReduceErrorCode.OK ? ErrorManager.GetMessageFromReduceActionError(reduceErrorInfo, fileToProcess.FileAbsolutePath) : ErrorManager.GetMessageFromPassportPDFError(actionError, operation.Type, fileToProcess.FileAbsolutePath);
                    OnError(errorMessage);
                    return(null);
                }
                else
                {
                    RemainingTokensUpdateEventHandler.Invoke(remainingTokens);
                }
            }


            return(new WorkflowProcessingResult(contentRemoved, versionChanged, linearized, fileID, warningMessages));
        }
Example #5
0
        private static void ParseOCRActionConfigurationArgument(string[] args, int index, PDFOCRActionConfiguration ocrActionConfiguration)
        {
            switch (args[index].ToUpper())
            {
            case "/L":     //language is the next parameter
                if (args.Length > index + 1)
                {
                    ocrActionConfiguration.OCRLanguage = args[index + 1].Trim();
                }
                break;

            case "/SKIPPAGESWITHTEXT":
                ocrActionConfiguration.SkipPagesWithText = true;
                break;
            }
        }