Exemple #1
0
        protected override void Execute(CodeActivityContext context)
        {
            Console.WriteLine("Trying to connect to ABBYY Engine ...");
            List <LogMessage> logList = new List <LogMessage>();

            logList.Add(new LogMessage("Trying to connect to ABBYY Engine ...", LogType.Information));
            try
            {
                //get Document Project Id
                string documentProjectId = DocumentProjectId.Get(context);
                Console.WriteLine("Document Project ID: " + documentProjectId);

                engine = LoadEngine(documentProjectId);
            }
            #pragma warning disable CS0618 // Type or member is obsolete
            catch (ExecutionEngineException e)
            #pragma warning restore CS0618 // Type or member is obsolete
            {
                Console.WriteLine("error: " + e.Message);
                logList.Add(new LogMessage("Abbyy Engine is failed to connect because of " + e.Message, LogType.Error));
            }
            finally
            {
                UnloadEngine(ref engine);
                logList.Add(new LogMessage("ABBYY Engine is unloaded...", LogType.Information));
                LogMessages.Set(context, logList);
            }
        }
        protected override void Execute(CodeActivityContext context)
        {
            Console.WriteLine("Loading FlexiCapture Engine for Classification ...");
            bool isClassified                 = false;
            List <LogMessage> logList         = new List <LogMessage>();
            List <string>     filenameList    = new List <string>();
            List <string>     annexPages      = new List <string>();
            List <string>     missingInvoices = new List <string>();
            int    totalClassified            = 0;
            int    totalReject                = 0;
            string name            = "";
            bool   isInvoice       = false;
            bool   isPurchaseOrder = false;

            try
            {
                //get Document Project Id
                string documentProjectId = DocumentProjectId.Get(context);
                Console.WriteLine("Document Project ID: " + documentProjectId);
                logList.Add(new LogMessage("Document Project ID: " + documentProjectId, LogType.Information));

                //get classifierTemplate path
                string classifierTemplatePath = ClassifierTemplate.Get(context);
                Console.WriteLine("Classifier path: " + classifierTemplatePath);
                logList.Add(new LogMessage("Classifier path: " + classifierTemplatePath, LogType.Information));

                //get image path
                string sourceFolder = FolderFile.Get(context);
                Console.WriteLine("image path: " + sourceFolder);
                logList.Add(new LogMessage("image path: " + sourceFolder, LogType.Information));

                //get unknown folder
                string rejectFolder = RejectFolder.Get(context);
                Console.WriteLine("unknown folder: " + rejectFolder);
                logList.Add(new LogMessage("image path: " + sourceFolder, LogType.Information));
                if (!Directory.Exists(rejectFolder))
                {
                    Directory.CreateDirectory(rejectFolder);
                }

                //get classified Folder
                string classifiedFolder = ClassifiedFolder.Get(context);
                Console.WriteLine("Classified folder: " + classifiedFolder);
                logList.Add(new LogMessage("Classified folder: " + classifiedFolder, LogType.Information));
                if (!Directory.Exists(classifiedFolder))
                {
                    Directory.CreateDirectory(classifiedFolder);
                }

                Console.WriteLine("Adding images to process...");
                string[]      files       = Directory.GetFiles(sourceFolder, "*.pdf", SearchOption.AllDirectories);
                List <string> pdfFileList = new List <string>();
                if (files.Length == 0)
                {
                    Console.WriteLine("pdf Files *.pdf is not found");
                    logList.Add(new LogMessage("pdf Files *.pdf is not found", LogType.Error));
                    throw new Exception("pdf Files *.pdf is not found");
                }
                foreach (string pdf in files)
                {
                    Console.WriteLine("pdf: " + pdf);
                    logList.Add(new LogMessage("pdf: " + pdf, LogType.Information));
                    pdfFileList.Add(pdf);

                    string[] splitstr = pdf.Split('_');
                    string   compare  = splitstr[0];

                    if (pdf.Contains(compare))
                    {
                        filenameList.Add(compare);
                    }
                }

                Console.WriteLine("Added images to process...");

                List <string> invoiceList = new List <string>();
                List <string> poList      = new List <string>();

                foreach (string obj in filenameList.Distinct())
                {
                    int total = getTotalPage(files, obj);
                    isInvoice       = false;
                    isPurchaseOrder = false;
                    invoiceList.Clear();
                    poList.Clear();
                    for (int i = 1; i <= total; i++)
                    {
                        string pdfFile = obj + "_" + i + ".pdf";

                        name = this.DetermineDocumentType(documentProjectId, pdfFile, classifierTemplatePath);

                        Console.WriteLine("executing pdf: " + pdfFile + " as " + name);
                        logList.Add(new LogMessage("executing pdf: " + pdfFile, LogType.Information));

                        if (Object.Equals(name.ToUpper(), "INVOICE"))
                        {
                            invoiceList.Add(pdfFile);
                            isInvoice = true;
                            continue;
                        }

                        if (Object.Equals(name.ToUpper(), "PURCHASEORDER"))
                        {
                            poList.Add(pdfFile);
                            isPurchaseOrder = true;
                            continue;
                        }

                        if (name.Length == 0)
                        {
                            Console.WriteLine("annex page is found as " + pdfFile);
                            logList.Add(new LogMessage("annex page is found as " + pdfFile, LogType.Error));
                            annexPages.Add(obj);

                            Console.WriteLine("move to reject folder " + rejectFolder);
                            string filenamewithoutextention = Path.GetFileNameWithoutExtension(pdfFile);
                            logList.Add(new LogMessage("move to reject folder", LogType.Information));
                            MoveFileToDestinationFolder(sourceFolder, rejectFolder, pdfFile, pdfFile, filenamewithoutextention);
                            totalReject++;
                        }
                    }

                    if (isInvoice && isPurchaseOrder)
                    {
                        foreach (string pdfFile in invoiceList)
                        {
                            Console.WriteLine("move to classified folder " + classifiedFolder);
                            string filenamewithoutextention = Path.GetFileNameWithoutExtension(pdfFile);
                            logList.Add(new LogMessage("move to classified folder " + classifiedFolder, LogType.Information));
                            MoveFileToDestinationFolder(sourceFolder, classifiedFolder, pdfFile, pdfFile, filenamewithoutextention);
                            totalClassified++;
                        }

                        foreach (string pdfFile in poList)
                        {
                            Console.WriteLine("move to reject folder " + rejectFolder);
                            string filenamewithoutextention = Path.GetFileNameWithoutExtension(pdfFile);
                            logList.Add(new LogMessage("move to reject folder", LogType.Information));
                            MoveFileToDestinationFolder(sourceFolder, rejectFolder, pdfFile, pdfFile, filenamewithoutextention);
                            totalReject++;
                        }
                    }
                    else
                    {
                        missingInvoices.Add(obj);

                        foreach (string pdfFile in invoiceList)
                        {
                            Console.WriteLine("move to reject folder " + rejectFolder);
                            string filenamewithoutextention = Path.GetFileNameWithoutExtension(pdfFile);
                            logList.Add(new LogMessage("move to reject folder", LogType.Information));
                            MoveFileToDestinationFolder(sourceFolder, rejectFolder, pdfFile, pdfFile, filenamewithoutextention);
                            missingInvoices.Add(pdfFile);
                            totalReject++;
                        }

                        foreach (string pdfFile in poList)
                        {
                            Console.WriteLine("move to reject folder " + rejectFolder);
                            string filenamewithoutextention = Path.GetFileNameWithoutExtension(pdfFile);
                            logList.Add(new LogMessage("move to reject folder", LogType.Information));
                            MoveFileToDestinationFolder(sourceFolder, rejectFolder, pdfFile, pdfFile, filenamewithoutextention);
                            totalReject++;
                        }
                    }
                }

                //foreach (string pdfFile in pdfFileList)
                //{
                //    Console.WriteLine("file: " + pdfFile);
                //    name  = this.DetermineDocumentType(documentProjectId, pdfFile, classifierTemplatePath);
                //    string filenamewithoutextention = Path.GetFileNameWithoutExtension(pdfFile);

                //    Console.WriteLine("executing pdf: " + pdfFile);
                //    logList.Add(new LogMessage("executing pdf: " + pdfFile, LogType.Information));


                //    if (name.Length == 0)
                //    {
                //        Console.WriteLine("move to reject folder " + rejectFolder);
                //        logList.Add(new LogMessage("move to reject folder", LogType.Information));
                //        MoveFileToDestinationFolder(sourceFolder, rejectFolder, pdfFile, pdfFile, filenamewithoutextention);
                //        NoRejected++;
                //    }
                //    else
                //    {
                //        Console.WriteLine("move to classified folder "+classifiedFolder);
                //        logList.Add(new LogMessage("move to classified folder "+ classifiedFolder, LogType.Information));
                //        MoveFileToDestinationFolder(sourceFolder, classifiedFolder, pdfFile, pdfFile, filenamewithoutextention);
                //        NoClassified++;
                //    }

                //}

                isClassified = true;
                string message = String.Format("Total Number of classified Invoice is {0} and Rejected is {1}", totalClassified, totalReject);
                logList.Add(new LogMessage(message, LogType.Information));

                var msg = processor.GetLastProcessingError();
                if (msg != null)
                {
                    var msgError = msg.MessageText();
                    logList.Add(new LogMessage(msgError, LogType.Error));
                    Console.WriteLine(msgError);
                }
            }
#pragma warning disable CS0618 // Type or member is obsolete
            catch (ExecutionEngineException e)
#pragma warning restore CS0618 // Type or member is obsolete
            {
                isClassified = false;
                Console.WriteLine("error: " + e.Message);
                logList.Add(new LogMessage("throw exception because of " + e.Message, LogType.Error));
            }
            finally
            {
                Console.WriteLine("Released FlexiCapture Engine ...");
                UnloadEngine(ref engine);

                logList.Add(new LogMessage("Released FlexiCapture Engine ...", LogType.Information));
                IsClassified.Set(context, isClassified);
                LogMessages.Set(context, logList);
                AnnexPageList.Set(context, annexPages);
                MissingInvoiceList.Set(context, missingInvoices);
            }
        }
Exemple #3
0
        protected override void Execute(CodeActivityContext context)
        {
            string            message           = "";
            List <LogMessage> logList           = new List <LogMessage>();
            List <string>     nonInvoices       = new List <string>();
            List <string>     notConfidenceList = new List <string>();

            message = "Loading FlexiCapture Engine for Recognition ...";
            Console.WriteLine(message);
            logList.Add(new LogMessage(message, LogType.Information));

            string documentProjectId = DocumentProjectId.Get(context);

            message = "Get Document Project ID: " + documentProjectId;
            Console.WriteLine(message);
            logList.Add(new LogMessage(message, LogType.Information));

            int confidenceLevelLimitation = ConfidenceLevelLimitation.Get(context);

            message = string.Format("Confidence Level Limitation: {0}", confidenceLevelLimitation);
            Console.WriteLine(message);
            logList.Add(new LogMessage(message, LogType.Information));

            string sourceFolder = SourceFolder.Get(context);

            message = string.Format("Get PDF folder: {0}", sourceFolder);
            Console.WriteLine(message);
            logList.Add(new LogMessage(message, LogType.Information));

            string ocrTemplate = OCRTemplateFolder.Get(context);

            message = string.Format("OCR Template Folder: {0}", ocrTemplate);
            Console.WriteLine(message);
            logList.Add(new LogMessage(message, LogType.Information));

            string exportFolder = ExportFolder.Get(context);

            message = string.Format("Get Export Folder: {0}", exportFolder);
            Console.WriteLine(message);
            logList.Add(new LogMessage(message, LogType.Information));
            if (!Directory.Exists(exportFolder))
            {
                Directory.CreateDirectory(exportFolder);
                message = string.Format("Folder {0} is created completely", exportFolder);
                Console.WriteLine(message);
                logList.Add(new LogMessage(message, LogType.Information));
            }

            string recognizedFolder = RecognizedFolder.Get(context);

            Console.WriteLine("Get Recognize Folder: " + recognizedFolder);
            logList.Add(new LogMessage("Recognize Folder is:  " + recognizedFolder, LogType.Information));
            if (!Directory.Exists(recognizedFolder))
            {
                Directory.CreateDirectory(recognizedFolder);
                message = string.Format("Folder {0} is created completely", recognizedFolder);
                Console.WriteLine(message);
                logList.Add(new LogMessage(message, LogType.Information));
            }

            string notConfidenceFolder = NotConfidenceFolder.Get(context);

            Console.WriteLine("Get Not Confidence Folder: " + notConfidenceFolder);
            logList.Add(new LogMessage("Not Confidence Folder is:  " + notConfidenceFolder, LogType.Information));
            if (!Directory.Exists(notConfidenceFolder))
            {
                Directory.CreateDirectory(notConfidenceFolder);
                message = string.Format("Folder {0} is created completely", notConfidenceFolder);
                Console.WriteLine(message);
                logList.Add(new LogMessage(message, LogType.Information));
            }

            string rejectFolder = RejectFolder.Get(context);

            Console.WriteLine("Get Reject Folder: " + rejectFolder);
            logList.Add(new LogMessage("Reject Folder is:  " + rejectFolder, LogType.Information));
            if (!Directory.Exists(rejectFolder))
            {
                Directory.CreateDirectory(rejectFolder);
                message = string.Format("Folder {0} is created completely", rejectFolder);
                Console.WriteLine(message);
                logList.Add(new LogMessage(message, LogType.Information));
            }

            engine = LoadEngine(documentProjectId);

            Boolean isRecognized    = false;
            bool    isValidInvoice  = false;
            int     count           = 0;
            int     noError         = 0;
            int     noSuccess       = 0;
            int     noNotConfidence = 0;

            try
            {
                message = "Creating and configuring the FlexiCapture Processor...";
                Console.WriteLine(message);
                logList.Add(new LogMessage(message, LogType.Information));

                processor = engine.CreateFlexiCaptureProcessor();

                message = "Adding Document Definition to process...";
                Console.WriteLine(message);
                logList.Add(new LogMessage(message, LogType.Information));
                string[] TotalOCRFiles = Directory.GetFiles(ocrTemplate, "*.fcdot", SearchOption.AllDirectories);
                if (TotalOCRFiles.Length == 0)
                {
                    message = string.Format("OCR Template *.fcdot is not found in the folder {0}", ocrTemplate);
                    Console.WriteLine(message);
                    logList.Add(new LogMessage(message, LogType.Error));
                    throw new Exception(message);
                }
                foreach (string ocr in TotalOCRFiles)
                {
                    processor.AddDocumentDefinitionFile(ocr);
                    message = string.Format("OCR Temhplate {0} is added", ocr);
                    Console.WriteLine(message);
                    logList.Add(new LogMessage(message, LogType.Information));
                }

                message = string.Format("Adding images to process...");
                Console.WriteLine(message);
                logList.Add(new LogMessage(message, LogType.Information));
                string[] TotalFiles = Directory.GetFiles(sourceFolder, "*.pdf", SearchOption.AllDirectories);
                if (TotalFiles.Length == 0)
                {
                    message = string.Format("PDF Files *.pdf is not found in the folder {0}.", sourceFolder);
                    Console.WriteLine(message);
                    logList.Add(new LogMessage(message, LogType.Error));
                    throw new Exception(message);
                }
                foreach (string pdfFile in TotalFiles)
                {
                    processor.AddImageFile(pdfFile);
                    message = string.Format("PDF Files {0} is added", pdfFile);
                    Console.WriteLine(message);
                    logList.Add(new LogMessage(message, LogType.Information));
                }

                message = "Recognizing the images and exporting the results...";
                Console.WriteLine(message);
                logList.Add(new LogMessage(message, LogType.Information));

                while (true)
                {
                    // Recognize next document
                    confidenceLevel        = 0;
                    confidenceDetailLevel  = 0;
                    confidenceHeaderLevel  = 0;
                    totalConfidenceLevelHD = 0;
                    IDocument document = processor.RecognizeNextDocument();
                    if (document == null)
                    {
                        IProcessingError error = processor.GetLastProcessingError();
                        if (error != null)
                        {
                            // Processing error
                            message = string.Format("processing error because of {0}.", error.MessageText());
                            Console.WriteLine(message);
                            logList.Add(new LogMessage(message, LogType.Error));
                            continue;
                        }
                        else
                        {
                            // No more images
                            message = string.Format("all PDF Files has been executed or no PDF file in the folder {0}", sourceFolder);
                            Console.WriteLine(message);
                            logList.Add(new LogMessage(message, LogType.Information));
                            break;
                        }
                    }
                    else if (document.DocumentDefinition == null)
                    {
                        // Couldn't find matching template for the image. In this sample this is an error.
                        // In other scenarios this might be normal
                        message = string.Format("PDF file is not matched with existing OCR Templates.");
                        Console.WriteLine(message);
                        logList.Add(new LogMessage(message, LogType.Error));
                        //string tempPage = document.Pages[0].OriginalImagePath;
                        //if(tempPage != null)
                        //{
                        //    string movefile = Path.GetFileName(tempPage);
                        //    string tempfilename = Path.GetFileNameWithoutExtension(tempPage);

                        //    logList.Add(new LogMessage("Move to Reject Folder", LogType.Information));
                        //    if (File.Exists(tempPage))
                        //    {

                        //        MoveFileToDestinationFolder(sourceFolder, rejectFolder, tempPage, tempfilename);
                        //    }
                        //    else
                        //    {
                        //        MoveFileToDestinationFolder(notConfidenceFolder, rejectFolder, notConfidenceFolder+"\\"+movefile, tempfilename);
                        //    }

                        //}

                        continue;
                    }

                    string originalPath             = document.Pages[0].OriginalImagePath;
                    string file                     = Path.GetFileName(originalPath);
                    string filenamewithoutextention = Path.GetFileNameWithoutExtension(originalPath);

                    message = string.Format("Recognizing pdf {0} is started.", originalPath);
                    Console.WriteLine(message);
                    logList.Add(new LogMessage(message, LogType.Information));

                    //set confident level and status
                    message = string.Format("Extracting data from pdf {0} is started", originalPath);
                    Console.WriteLine(message);
                    logList.Add(new LogMessage(message, LogType.Information));
                    for (int i = 0; i < document.Sections.Count; i++)
                    { // extracing
                        var section = document.Sections[i];
                        if (object.ReferenceEquals(section, null))
                        {
                            continue;
                        }



                        for (int d = 0; d < section.Children.Count; d++)
                        {
                            var child = section.Children[d];
                            if (object.ReferenceEquals(child, null))
                            {
                                continue;
                            }

                            var field = ((IField)child);

                            message = string.Format("Extracting column {0} = {1}", field.Name, field.Value.AsText);
                            Console.WriteLine(message);

                            if (field.Name.ToUpper().Trim() == "INV_CONFIDENCE_LEVEL")
                            {
                                var value = TextFieldHelper.GetConfidenLevel(engine, document);
                                totalConfidenceLevelHD = value;
                                var data = engine.CreateText(value.ToString(), null);
                                field.Value.AsInteger = value;
                                confidenceLevel       = value;

                                message = string.Format("Confidence level of {0} is {1}", file, value);
                                Console.WriteLine(message);
                                logList.Add(new LogMessage(message, LogType.Information));
                            }

                            if (field.Name.ToUpper().Trim() == "INV_STATUS")
                            {
                                var value = "Recognized";
                                var data  = engine.CreateText(value, null);
                                field.Value.AsText.Delete(0, field.Value.AsText.Length);
                                field.Value.AsText.Insert(data, 0);
                            }


                            if (field.Name.ToUpper().Trim() == "FILE_NAME")
                            {
                                var data = engine.CreateText(file, null);
                                field.Value.AsText.Delete(0, field.Value.AsText.Length);
                                field.Value.AsText.Insert(data, 0);
                            }
                        }
                    } //end extracting
                    isValidInvoice = true;
                    if (isValidInvoice)
                    {
                        message = string.Format("Total Confidence is {0} ", totalConfidenceLevelHD);
                        Console.WriteLine(message);
                        logList.Add(new LogMessage(message, LogType.Information));
                        //check confidence level validation
                        if (totalConfidenceLevelHD >= confidenceLevelLimitation)
                        {
                            try
                            {
                                message = string.Format("Exporting process for pdf {0} is started ...", file);
                                Console.WriteLine(message);
                                logList.Add(new LogMessage(message, LogType.Information));

                                //IFileExportParams exportParams = engine.CreateFileExportParams();
                                //Console.WriteLine("XLS");
                                //exportParams.FileFormat = FileExportFormatEnum.FEF_XLS;


                                processor.ExportDocument(document, exportFolder);
                                Console.WriteLine("Exporting process is completed ...");

                                MoveFileToDestinationFolder(sourceFolder, exportFolder, originalPath, filenamewithoutextention);
                                Console.WriteLine(string.Format("Moving {0} to Export folder {1} is completed", file, exportFolder));

                                logList.Add(new LogMessage("Exporting process is ended ...", LogType.Information));
                                noSuccess++;
                            }
                            catch (Exception e)
                            {
                                noError++;
                                Console.WriteLine(string.Format("exporting is failed because of {0}.", e.Message));
                                logList.Add(new LogMessage(message, LogType.Error));

                                MoveFileToDestinationFolder(sourceFolder, rejectFolder, originalPath, filenamewithoutextention);
                                message = string.Format("Moving pdf {0} to Reject folder {1} is completed", file, rejectFolder);
                                Console.WriteLine(message);
                                logList.Add(new LogMessage(message, LogType.Information));
                                continue;
                            }
                        }
                        else
                        {
                            message = string.Format("Confidence of PDF {0} is {1} less than target confidence {2}", file, totalConfidenceLevelHD, confidenceLevelLimitation);
                            logList.Add(new LogMessage(message, LogType.Error));
                            notConfidenceList.Add(file);

                            logList.Add(new LogMessage(string.Format("Total number of not confidence is {0}", notConfidenceList.Count), LogType.Information));
                            MoveFileToDestinationFolder(sourceFolder, notConfidenceFolder, originalPath, filenamewithoutextention);
                            message = string.Format("Moving pdf {0} to Not Confidence folder {1} is completed", file, notConfidenceFolder);
                            Console.WriteLine(message);
                            logList.Add(new LogMessage(message, LogType.Information));
                            noNotConfidence++;
                        }
                    }
                    count++;
                }

                var msg = processor.GetLastProcessingError();
                if (msg != null)
                {
                    var msgError = string.Format("the processing error because of {0}.", msg.MessageText());
                    Console.WriteLine(msgError);
                    logList.Add(new LogMessage(msgError, LogType.Error));
                    noError++;
                }
                message = string.Format("No. of Not Confidence {0} and No. of Error {1} and No. of Exported to DB {2}", noNotConfidence, noError, noSuccess);
                logList.Add(new LogMessage(message, LogType.Information));
                isRecognized = true;
            }
            finally
            {
                UnloadEngine(ref engine);
                message = string.Format("Released FlexiCapture Engine for Recognition...");
                Console.WriteLine(message);
                logList.Add(new LogMessage(message, LogType.Information));

                IsRecognized.Set(context, isRecognized);
                NonInvoiceList.Set(context, nonInvoices);
                Messages.Set(context, logList);
                TotalExported.Set(context, noSuccess);
                TotalError.Set(context, noError);
                TotalNotConfidence.Set(context, noNotConfidence);
                NotConfidenceList.Set(context, notConfidenceList);
            }
        }