// USE CASE: Using image processing tools in custom preprocessing public static void Using_image_processing_tools_in_custom_preprocessing(IEngine engine) { trace("Create and configure an instance of FlexiCapture processor..."); IFlexiCaptureProcessor processor = engine.CreateFlexiCaptureProcessor(); processor.AddDocumentDefinitionFile(SamplesFolder + "\\SampleProject\\Templates\\Invoice_eng.fcdot"); trace("Set up an image source with custom preprocessing..."); // Create and configure sample image source. ALL PREPROCESSING IS DONE IN THE IMAGE SOURCE // (see SampleImageSource class for details) CustomPreprocessingImageSource imageSource = new CustomPreprocessingImageSource(engine); imageSource.AddImageFile(SamplesFolder + "\\SampleImages\\Invoices_1.tif"); imageSource.AddImageFile(SamplesFolder + "\\SampleImages\\Invoices_2.tif"); imageSource.AddImageFile(SamplesFolder + "\\SampleImages\\Invoices_3.tif"); processor.SetCustomImageSource(imageSource); traceBegin("Process the images..."); int count = 0; while (true) { IDocument document = processor.RecognizeNextDocument(); if (document == null) { IProcessingError error = processor.GetLastProcessingError(); assert(error == null); // No errors are expected in this sample break; } else { // We expect that this will never happen in this sample assert(document.DocumentDefinition != null); } processor.ExportDocumentEx(document, SamplesFolder + "\\FCEExport", "NextDocument_" + count, null); count++; } traceEnd("OK"); trace("Check the results..."); assert(count == 2); }
protected override void Execute(CodeActivityContext context) { Console.WriteLine("Loading FlexiCapture Engine for Classification ..."); bool isClassified = false; List <LogMessage> logList = new List <LogMessage>(); List <string> filenameList = new List <string>(); List <string> annexPages = new List <string>(); List <string> missingInvoices = new List <string>(); int totalClassified = 0; int totalReject = 0; string name = ""; bool isInvoice = false; bool isPurchaseOrder = false; try { //get Document Project Id string documentProjectId = DocumentProjectId.Get(context); Console.WriteLine("Document Project ID: " + documentProjectId); logList.Add(new LogMessage("Document Project ID: " + documentProjectId, LogType.Information)); //get classifierTemplate path string classifierTemplatePath = ClassifierTemplate.Get(context); Console.WriteLine("Classifier path: " + classifierTemplatePath); logList.Add(new LogMessage("Classifier path: " + classifierTemplatePath, LogType.Information)); //get image path string sourceFolder = FolderFile.Get(context); Console.WriteLine("image path: " + sourceFolder); logList.Add(new LogMessage("image path: " + sourceFolder, LogType.Information)); //get unknown folder string rejectFolder = RejectFolder.Get(context); Console.WriteLine("unknown folder: " + rejectFolder); logList.Add(new LogMessage("image path: " + sourceFolder, LogType.Information)); if (!Directory.Exists(rejectFolder)) { Directory.CreateDirectory(rejectFolder); } //get classified Folder string classifiedFolder = ClassifiedFolder.Get(context); Console.WriteLine("Classified folder: " + classifiedFolder); logList.Add(new LogMessage("Classified folder: " + classifiedFolder, LogType.Information)); if (!Directory.Exists(classifiedFolder)) { Directory.CreateDirectory(classifiedFolder); } Console.WriteLine("Adding images to process..."); string[] files = Directory.GetFiles(sourceFolder, "*.pdf", SearchOption.AllDirectories); List <string> pdfFileList = new List <string>(); if (files.Length == 0) { Console.WriteLine("pdf Files *.pdf is not found"); logList.Add(new LogMessage("pdf Files *.pdf is not found", LogType.Error)); throw new Exception("pdf Files *.pdf is not found"); } foreach (string pdf in files) { Console.WriteLine("pdf: " + pdf); logList.Add(new LogMessage("pdf: " + pdf, LogType.Information)); pdfFileList.Add(pdf); string[] splitstr = pdf.Split('_'); string compare = splitstr[0]; if (pdf.Contains(compare)) { filenameList.Add(compare); } } Console.WriteLine("Added images to process..."); List <string> invoiceList = new List <string>(); List <string> poList = new List <string>(); foreach (string obj in filenameList.Distinct()) { int total = getTotalPage(files, obj); isInvoice = false; isPurchaseOrder = false; invoiceList.Clear(); poList.Clear(); for (int i = 1; i <= total; i++) { string pdfFile = obj + "_" + i + ".pdf"; name = this.DetermineDocumentType(documentProjectId, pdfFile, classifierTemplatePath); Console.WriteLine("executing pdf: " + pdfFile + " as " + name); logList.Add(new LogMessage("executing pdf: " + pdfFile, LogType.Information)); if (Object.Equals(name.ToUpper(), "INVOICE")) { invoiceList.Add(pdfFile); isInvoice = true; continue; } if (Object.Equals(name.ToUpper(), "PURCHASEORDER")) { poList.Add(pdfFile); isPurchaseOrder = true; continue; } if (name.Length == 0) { Console.WriteLine("annex page is found as " + pdfFile); logList.Add(new LogMessage("annex page is found as " + pdfFile, LogType.Error)); annexPages.Add(obj); Console.WriteLine("move to reject folder " + rejectFolder); string filenamewithoutextention = Path.GetFileNameWithoutExtension(pdfFile); logList.Add(new LogMessage("move to reject folder", LogType.Information)); MoveFileToDestinationFolder(sourceFolder, rejectFolder, pdfFile, pdfFile, filenamewithoutextention); totalReject++; } } if (isInvoice && isPurchaseOrder) { foreach (string pdfFile in invoiceList) { Console.WriteLine("move to classified folder " + classifiedFolder); string filenamewithoutextention = Path.GetFileNameWithoutExtension(pdfFile); logList.Add(new LogMessage("move to classified folder " + classifiedFolder, LogType.Information)); MoveFileToDestinationFolder(sourceFolder, classifiedFolder, pdfFile, pdfFile, filenamewithoutextention); totalClassified++; } foreach (string pdfFile in poList) { Console.WriteLine("move to reject folder " + rejectFolder); string filenamewithoutextention = Path.GetFileNameWithoutExtension(pdfFile); logList.Add(new LogMessage("move to reject folder", LogType.Information)); MoveFileToDestinationFolder(sourceFolder, rejectFolder, pdfFile, pdfFile, filenamewithoutextention); totalReject++; } } else { missingInvoices.Add(obj); foreach (string pdfFile in invoiceList) { Console.WriteLine("move to reject folder " + rejectFolder); string filenamewithoutextention = Path.GetFileNameWithoutExtension(pdfFile); logList.Add(new LogMessage("move to reject folder", LogType.Information)); MoveFileToDestinationFolder(sourceFolder, rejectFolder, pdfFile, pdfFile, filenamewithoutextention); missingInvoices.Add(pdfFile); totalReject++; } foreach (string pdfFile in poList) { Console.WriteLine("move to reject folder " + rejectFolder); string filenamewithoutextention = Path.GetFileNameWithoutExtension(pdfFile); logList.Add(new LogMessage("move to reject folder", LogType.Information)); MoveFileToDestinationFolder(sourceFolder, rejectFolder, pdfFile, pdfFile, filenamewithoutextention); totalReject++; } } } //foreach (string pdfFile in pdfFileList) //{ // Console.WriteLine("file: " + pdfFile); // name = this.DetermineDocumentType(documentProjectId, pdfFile, classifierTemplatePath); // string filenamewithoutextention = Path.GetFileNameWithoutExtension(pdfFile); // Console.WriteLine("executing pdf: " + pdfFile); // logList.Add(new LogMessage("executing pdf: " + pdfFile, LogType.Information)); // if (name.Length == 0) // { // Console.WriteLine("move to reject folder " + rejectFolder); // logList.Add(new LogMessage("move to reject folder", LogType.Information)); // MoveFileToDestinationFolder(sourceFolder, rejectFolder, pdfFile, pdfFile, filenamewithoutextention); // NoRejected++; // } // else // { // Console.WriteLine("move to classified folder "+classifiedFolder); // logList.Add(new LogMessage("move to classified folder "+ classifiedFolder, LogType.Information)); // MoveFileToDestinationFolder(sourceFolder, classifiedFolder, pdfFile, pdfFile, filenamewithoutextention); // NoClassified++; // } //} isClassified = true; string message = String.Format("Total Number of classified Invoice is {0} and Rejected is {1}", totalClassified, totalReject); logList.Add(new LogMessage(message, LogType.Information)); var msg = processor.GetLastProcessingError(); if (msg != null) { var msgError = msg.MessageText(); logList.Add(new LogMessage(msgError, LogType.Error)); Console.WriteLine(msgError); } } #pragma warning disable CS0618 // Type or member is obsolete catch (ExecutionEngineException e) #pragma warning restore CS0618 // Type or member is obsolete { isClassified = false; Console.WriteLine("error: " + e.Message); logList.Add(new LogMessage("throw exception because of " + e.Message, LogType.Error)); } finally { Console.WriteLine("Released FlexiCapture Engine ..."); UnloadEngine(ref engine); logList.Add(new LogMessage("Released FlexiCapture Engine ...", LogType.Information)); IsClassified.Set(context, isClassified); LogMessages.Set(context, logList); AnnexPageList.Set(context, annexPages); MissingInvoiceList.Set(context, missingInvoices); } }
protected override void Execute(CodeActivityContext context) { string message = ""; List <LogMessage> logList = new List <LogMessage>(); List <string> nonInvoices = new List <string>(); List <string> notConfidenceList = new List <string>(); message = "Loading FlexiCapture Engine for Recognition ..."; Console.WriteLine(message); logList.Add(new LogMessage(message, LogType.Information)); string documentProjectId = DocumentProjectId.Get(context); message = "Get Document Project ID: " + documentProjectId; Console.WriteLine(message); logList.Add(new LogMessage(message, LogType.Information)); int confidenceLevelLimitation = ConfidenceLevelLimitation.Get(context); message = string.Format("Confidence Level Limitation: {0}", confidenceLevelLimitation); Console.WriteLine(message); logList.Add(new LogMessage(message, LogType.Information)); string sourceFolder = SourceFolder.Get(context); message = string.Format("Get PDF folder: {0}", sourceFolder); Console.WriteLine(message); logList.Add(new LogMessage(message, LogType.Information)); string ocrTemplate = OCRTemplateFolder.Get(context); message = string.Format("OCR Template Folder: {0}", ocrTemplate); Console.WriteLine(message); logList.Add(new LogMessage(message, LogType.Information)); string exportFolder = ExportFolder.Get(context); message = string.Format("Get Export Folder: {0}", exportFolder); Console.WriteLine(message); logList.Add(new LogMessage(message, LogType.Information)); if (!Directory.Exists(exportFolder)) { Directory.CreateDirectory(exportFolder); message = string.Format("Folder {0} is created completely", exportFolder); Console.WriteLine(message); logList.Add(new LogMessage(message, LogType.Information)); } string recognizedFolder = RecognizedFolder.Get(context); Console.WriteLine("Get Recognize Folder: " + recognizedFolder); logList.Add(new LogMessage("Recognize Folder is: " + recognizedFolder, LogType.Information)); if (!Directory.Exists(recognizedFolder)) { Directory.CreateDirectory(recognizedFolder); message = string.Format("Folder {0} is created completely", recognizedFolder); Console.WriteLine(message); logList.Add(new LogMessage(message, LogType.Information)); } string notConfidenceFolder = NotConfidenceFolder.Get(context); Console.WriteLine("Get Not Confidence Folder: " + notConfidenceFolder); logList.Add(new LogMessage("Not Confidence Folder is: " + notConfidenceFolder, LogType.Information)); if (!Directory.Exists(notConfidenceFolder)) { Directory.CreateDirectory(notConfidenceFolder); message = string.Format("Folder {0} is created completely", notConfidenceFolder); Console.WriteLine(message); logList.Add(new LogMessage(message, LogType.Information)); } string rejectFolder = RejectFolder.Get(context); Console.WriteLine("Get Reject Folder: " + rejectFolder); logList.Add(new LogMessage("Reject Folder is: " + rejectFolder, LogType.Information)); if (!Directory.Exists(rejectFolder)) { Directory.CreateDirectory(rejectFolder); message = string.Format("Folder {0} is created completely", rejectFolder); Console.WriteLine(message); logList.Add(new LogMessage(message, LogType.Information)); } engine = LoadEngine(documentProjectId); Boolean isRecognized = false; bool isValidInvoice = false; int count = 0; int noError = 0; int noSuccess = 0; int noNotConfidence = 0; try { message = "Creating and configuring the FlexiCapture Processor..."; Console.WriteLine(message); logList.Add(new LogMessage(message, LogType.Information)); processor = engine.CreateFlexiCaptureProcessor(); message = "Adding Document Definition to process..."; Console.WriteLine(message); logList.Add(new LogMessage(message, LogType.Information)); string[] TotalOCRFiles = Directory.GetFiles(ocrTemplate, "*.fcdot", SearchOption.AllDirectories); if (TotalOCRFiles.Length == 0) { message = string.Format("OCR Template *.fcdot is not found in the folder {0}", ocrTemplate); Console.WriteLine(message); logList.Add(new LogMessage(message, LogType.Error)); throw new Exception(message); } foreach (string ocr in TotalOCRFiles) { processor.AddDocumentDefinitionFile(ocr); message = string.Format("OCR Temhplate {0} is added", ocr); Console.WriteLine(message); logList.Add(new LogMessage(message, LogType.Information)); } message = string.Format("Adding images to process..."); Console.WriteLine(message); logList.Add(new LogMessage(message, LogType.Information)); string[] TotalFiles = Directory.GetFiles(sourceFolder, "*.pdf", SearchOption.AllDirectories); if (TotalFiles.Length == 0) { message = string.Format("PDF Files *.pdf is not found in the folder {0}.", sourceFolder); Console.WriteLine(message); logList.Add(new LogMessage(message, LogType.Error)); throw new Exception(message); } foreach (string pdfFile in TotalFiles) { processor.AddImageFile(pdfFile); message = string.Format("PDF Files {0} is added", pdfFile); Console.WriteLine(message); logList.Add(new LogMessage(message, LogType.Information)); } message = "Recognizing the images and exporting the results..."; Console.WriteLine(message); logList.Add(new LogMessage(message, LogType.Information)); while (true) { // Recognize next document confidenceLevel = 0; confidenceDetailLevel = 0; confidenceHeaderLevel = 0; totalConfidenceLevelHD = 0; IDocument document = processor.RecognizeNextDocument(); if (document == null) { IProcessingError error = processor.GetLastProcessingError(); if (error != null) { // Processing error message = string.Format("processing error because of {0}.", error.MessageText()); Console.WriteLine(message); logList.Add(new LogMessage(message, LogType.Error)); continue; } else { // No more images message = string.Format("all PDF Files has been executed or no PDF file in the folder {0}", sourceFolder); Console.WriteLine(message); logList.Add(new LogMessage(message, LogType.Information)); break; } } else if (document.DocumentDefinition == null) { // Couldn't find matching template for the image. In this sample this is an error. // In other scenarios this might be normal message = string.Format("PDF file is not matched with existing OCR Templates."); Console.WriteLine(message); logList.Add(new LogMessage(message, LogType.Error)); //string tempPage = document.Pages[0].OriginalImagePath; //if(tempPage != null) //{ // string movefile = Path.GetFileName(tempPage); // string tempfilename = Path.GetFileNameWithoutExtension(tempPage); // logList.Add(new LogMessage("Move to Reject Folder", LogType.Information)); // if (File.Exists(tempPage)) // { // MoveFileToDestinationFolder(sourceFolder, rejectFolder, tempPage, tempfilename); // } // else // { // MoveFileToDestinationFolder(notConfidenceFolder, rejectFolder, notConfidenceFolder+"\\"+movefile, tempfilename); // } //} continue; } string originalPath = document.Pages[0].OriginalImagePath; string file = Path.GetFileName(originalPath); string filenamewithoutextention = Path.GetFileNameWithoutExtension(originalPath); message = string.Format("Recognizing pdf {0} is started.", originalPath); Console.WriteLine(message); logList.Add(new LogMessage(message, LogType.Information)); //set confident level and status message = string.Format("Extracting data from pdf {0} is started", originalPath); Console.WriteLine(message); logList.Add(new LogMessage(message, LogType.Information)); for (int i = 0; i < document.Sections.Count; i++) { // extracing var section = document.Sections[i]; if (object.ReferenceEquals(section, null)) { continue; } for (int d = 0; d < section.Children.Count; d++) { var child = section.Children[d]; if (object.ReferenceEquals(child, null)) { continue; } var field = ((IField)child); message = string.Format("Extracting column {0} = {1}", field.Name, field.Value.AsText); Console.WriteLine(message); if (field.Name.ToUpper().Trim() == "INV_CONFIDENCE_LEVEL") { var value = TextFieldHelper.GetConfidenLevel(engine, document); totalConfidenceLevelHD = value; var data = engine.CreateText(value.ToString(), null); field.Value.AsInteger = value; confidenceLevel = value; message = string.Format("Confidence level of {0} is {1}", file, value); Console.WriteLine(message); logList.Add(new LogMessage(message, LogType.Information)); } if (field.Name.ToUpper().Trim() == "INV_STATUS") { var value = "Recognized"; var data = engine.CreateText(value, null); field.Value.AsText.Delete(0, field.Value.AsText.Length); field.Value.AsText.Insert(data, 0); } if (field.Name.ToUpper().Trim() == "FILE_NAME") { var data = engine.CreateText(file, null); field.Value.AsText.Delete(0, field.Value.AsText.Length); field.Value.AsText.Insert(data, 0); } } } //end extracting isValidInvoice = true; if (isValidInvoice) { message = string.Format("Total Confidence is {0} ", totalConfidenceLevelHD); Console.WriteLine(message); logList.Add(new LogMessage(message, LogType.Information)); //check confidence level validation if (totalConfidenceLevelHD >= confidenceLevelLimitation) { try { message = string.Format("Exporting process for pdf {0} is started ...", file); Console.WriteLine(message); logList.Add(new LogMessage(message, LogType.Information)); //IFileExportParams exportParams = engine.CreateFileExportParams(); //Console.WriteLine("XLS"); //exportParams.FileFormat = FileExportFormatEnum.FEF_XLS; processor.ExportDocument(document, exportFolder); Console.WriteLine("Exporting process is completed ..."); MoveFileToDestinationFolder(sourceFolder, exportFolder, originalPath, filenamewithoutextention); Console.WriteLine(string.Format("Moving {0} to Export folder {1} is completed", file, exportFolder)); logList.Add(new LogMessage("Exporting process is ended ...", LogType.Information)); noSuccess++; } catch (Exception e) { noError++; Console.WriteLine(string.Format("exporting is failed because of {0}.", e.Message)); logList.Add(new LogMessage(message, LogType.Error)); MoveFileToDestinationFolder(sourceFolder, rejectFolder, originalPath, filenamewithoutextention); message = string.Format("Moving pdf {0} to Reject folder {1} is completed", file, rejectFolder); Console.WriteLine(message); logList.Add(new LogMessage(message, LogType.Information)); continue; } } else { message = string.Format("Confidence of PDF {0} is {1} less than target confidence {2}", file, totalConfidenceLevelHD, confidenceLevelLimitation); logList.Add(new LogMessage(message, LogType.Error)); notConfidenceList.Add(file); logList.Add(new LogMessage(string.Format("Total number of not confidence is {0}", notConfidenceList.Count), LogType.Information)); MoveFileToDestinationFolder(sourceFolder, notConfidenceFolder, originalPath, filenamewithoutextention); message = string.Format("Moving pdf {0} to Not Confidence folder {1} is completed", file, notConfidenceFolder); Console.WriteLine(message); logList.Add(new LogMessage(message, LogType.Information)); noNotConfidence++; } } count++; } var msg = processor.GetLastProcessingError(); if (msg != null) { var msgError = string.Format("the processing error because of {0}.", msg.MessageText()); Console.WriteLine(msgError); logList.Add(new LogMessage(msgError, LogType.Error)); noError++; } message = string.Format("No. of Not Confidence {0} and No. of Error {1} and No. of Exported to DB {2}", noNotConfidence, noError, noSuccess); logList.Add(new LogMessage(message, LogType.Information)); isRecognized = true; } finally { UnloadEngine(ref engine); message = string.Format("Released FlexiCapture Engine for Recognition..."); Console.WriteLine(message); logList.Add(new LogMessage(message, LogType.Information)); IsRecognized.Set(context, isRecognized); NonInvoiceList.Set(context, nonInvoices); Messages.Set(context, logList); TotalExported.Set(context, noSuccess); TotalError.Set(context, noError); TotalNotConfidence.Set(context, noNotConfidence); NotConfidenceList.Set(context, notConfidenceList); } }