// USE CASE: Creating a Document Definition from a FlexiLayout (*.afl) public static void Creating_a_Document_Definition_from_a_FlexiLayout(IEngine engine) { trace("Create a Document Definition from an *.afl file..."); string flexibleDescriptionFilePath = SamplesFolder + "\\SampleMisc\\Invoice_eng.afl"; IDocumentDefinition newDefinition = engine.CreateDocumentDefinitionFromAFL(flexibleDescriptionFilePath, "English"); // You can save the new Document Definition to a file or use it from memory traceBegin("Use the Document Definition in FlexiCaptureProcessor..."); IFlexiCaptureProcessor processor = engine.CreateFlexiCaptureProcessor(); processor.AddDocumentDefinition(newDefinition); // Add images for a single document processor.AddImageFile(SamplesFolder + "\\SampleImages\\Invoices_1.tif"); // Recognize the document and check the result IDocument document = processor.RecognizeNextDocument(); assert(document != null); assert(document.DocumentDefinition != null); assert(document.Pages.Count == 1); // Export the result processor.ExportDocumentEx(document, SamplesFolder + "\\FCEExport", "Invoice", null); traceEnd("OK"); }
// USE CASE: Creating a compound Document Definition public static void Creating_a_compound_Document_Definition(IEngine engine) { trace("Create an empty Document Definition in memory..."); IDocumentDefinition newDefinition = engine.CreateDocumentDefinition(); assert(newDefinition != null); trace("Set default language..."); ILanguage language = engine.PredefinedLanguages.FindLanguage("English"); assert(language != null); newDefinition.DefaultLanguage = language; trace("Create a new fixed section from an XFD file..."); newDefinition.DefaultTextType = TextTypeEnum.TT_Handprinted; ISectionDefinition newSection1 = newDefinition.Sections.AddNew("Banking"); newSection1.LoadXFDDescription(SamplesFolder + "\\SampleMisc\\Banking_eng.xfd"); trace("Create a new flexible section from an AFL file..."); newDefinition.DefaultTextType = TextTypeEnum.TT_Normal; ISectionDefinition newSection2 = newDefinition.Sections.AddNew("Invoice"); newSection2.LoadFlexibleDescription(SamplesFolder + "\\SampleMisc\\Invoice_eng.afl"); // Modify the template as required. In this sample we need to loosen some constraints IPageAnalysisParams analysisParams = engine.CreatePageAnalysisParams(); analysisParams.CopyFrom(newDefinition.PageAnalysisParams); analysisParams.MaxHorizontalShrinkPercent = 20; analysisParams.MaxVerticalShrinkPercent = 20; newDefinition.PageAnalysisParams = analysisParams; trace("Check the Document Definition..."); assert(newDefinition.Check() == true); // You can save the new Document Definition to a file or use it from memory traceBegin("Use the Document Definition in FlexiCaptureProcessor..."); IFlexiCaptureProcessor processor = engine.CreateFlexiCaptureProcessor(); processor.AddDocumentDefinition(newDefinition); // Add images for a single multipage document processor.AddImageFile(SamplesFolder + "\\SampleImages\\Banking_1.tif"); processor.AddImageFile(SamplesFolder + "\\SampleImages\\Banking_2.tif"); processor.AddImageFile(SamplesFolder + "\\SampleImages\\Banking_3.tif"); processor.AddImageFile(SamplesFolder + "\\SampleImages\\Invoices_2.tif"); processor.AddImageFile(SamplesFolder + "\\SampleImages\\Invoices_3.tif"); // Recognize the document IDocument document = processor.RecognizeNextDocument(); assert(document != null); assert(document.DocumentDefinition != null); assert(document.Pages.Count == 5); processor.ExportDocumentEx(document, SamplesFolder + "\\FCEExport", "Mixed", null); traceEnd("OK"); }
// Auxiliary Tools /////////////////////////////////////////////////////////// static IFieldDefinition setFieldValueType(IDocumentDefinition documentDefinition, string fieldName, FieldValueTypeEnum valueType) { IFieldDefinition field = findFieldDef(documentDefinition, fieldName); assert(field != null); assert(field.ValueType == FieldValueTypeEnum.FVT_Text); field.ValueType = valueType; assert(field.ValueType == valueType); return(field); }
internal static IDocumentDefinition createDocDifinitionFromXML(string docDifPath, IEngine engine) { IDocumentDefinition documentDefinition = engine.CreateDocumentDefinition(); XDocument xDoc = XDocument.Load(docDifPath); var infoGeneral = from x in xDoc.Descendants("_Document_Definition") select new { templateImageName = x.Descendants("_TemplateImageName").First().Value, defaultLanguage = xDoc.Descendants("_DefaultLanguage").First().Value, exportDestinationTypeEnum = xDoc.Descendants("_ExportDestinationTypeEnum").First().Value }; foreach (var i in infoGeneral) { documentDefinition.Sections.AddNew("Section").Pages.AddNew(Config.parentDirectory + i.templateImageName); documentDefinition.DefaultLanguage = engine.PredefinedLanguages.FindLanguage(i.defaultLanguage); documentDefinition.ExportParams = engine.CreateExportParams((ExportDestinationTypeEnum)Enum.Parse(typeof(ExportDestinationTypeEnum), i.exportDestinationTypeEnum)); } var infoRegions = from x in xDoc.Descendants("_Region") select new { Name = x.Descendants("_Name").First().Value, X1 = x.Descendants("_X1").First().Value, Y1 = x.Descendants("_Y1").First().Value, X2 = x.Descendants("_X2").First().Value, Y2 = x.Descendants("_Y2").First().Value, BlockTypeEnum = x.Descendants("_BlockTypeEnum").First().Value }; List <Region> regions = new List <Region>(); foreach (var i in infoRegions) { Region region = new Region(); region.name = i.Name; region.x1 = Convert.ToInt32(i.X1); region.y1 = Convert.ToInt32(i.Y1); region.x2 = Convert.ToInt32(i.X2); region.y2 = Convert.ToInt32(i.Y2); region.blockTypeEnumElem = (BlockTypeEnum)Enum.Parse(typeof(BlockTypeEnum), i.BlockTypeEnum); regions.Add(region); } for (int i = 0; i < regions.Count; i++) { IRegion currentRegion = engine.CreateRegion(); currentRegion.AddRect(regions[i].x1, regions[i].y1, regions[i].x2, regions[i].y2); documentDefinition.Pages[0].Blocks.AddNew(regions[i].blockTypeEnumElem, currentRegion, regions[i].name); } return(documentDefinition); }
internal static void DoConfigureProcessor(string docDifPath, IFlexiCaptureProcessor processor, IEngine engine) { IDocumentDefinition documentDefinition = createDocDifinitionFromXML(docDifPath, engine); bool checkresult = documentDefinition.Check(); IStringsCollection errors = documentDefinition.Errors; for (int i = 0; i < errors.Count; i++) { string str = errors[i]; } bool isvalid = documentDefinition.IsValid; processor.AddDocumentDefinition(documentDefinition); processor.SetForceApplyDocumentDefinition(true); }
// USE CASE: Loading and saving Document Definitions public static void Loading_and_saving_Document_Definitions(IEngine engine) { trace("Create an empty Document Definition in memory..."); IDocumentDefinition documentDefinition = engine.CreateDocumentDefinition(); assert(documentDefinition != null); trace("Load a Document Definition from file..."); ICustomStorage customStorage = documentDefinition as ICustomStorage; assert(customStorage != null); customStorage.LoadFromFile(SamplesFolder + "\\SampleProject\\Templates\\Invoice_eng.fcdot"); trace("Modify if required and check the modified version..."); assert(documentDefinition.Check() == true); trace("Save the modified copy..."); customStorage.SaveToFile(SamplesFolder + "\\SampleProject\\Templates\\Invoice_engCOPY.fcdot"); }
static void CheckTrainedDocumentDefinition(IEngine engine, IDocumentDefinition newDocumentDefinition, string rootFolder) { IFlexiCaptureProcessor processor = engine.CreateFlexiCaptureProcessor(); processor.AddDocumentDefinition(newDocumentDefinition); processor.AddImageFile(rootFolder + "\\02.jpg"); processor.AddImageFile(rootFolder + "\\03.jpg"); IDocument document = processor.RecognizeNextDocument(); assert(document.DocumentDefinition != null); assert(document.Pages.Count == 1); assert(document.Sections[0].Children[0].Name == "ISBN"); assert(document.Sections[0].Children[0].Value.AsString == "0-517-59939-2"); document = processor.RecognizeNextDocument(); assert(document.DocumentDefinition != null); assert(document.Pages.Count == 1); assert(document.Sections[0].Children[0].Name == "ISBN"); assert(document.Sections[0].Children[0].Value.AsString == "0-8050-6176-2"); }
// USE CASE: Creating a Document Definition from an XML Form Definition public static void Creating_a_Document_Definition_from_an_XML_Form_Definition(IEngine engine) { trace("Create a Document Definition from an *.xfd file..."); string formDescriptionFilePath = SamplesFolder + "\\SampleMisc\\Banking_eng.xfd"; IDocumentDefinition newDefinition = engine.CreateDocumentDefinitionFromXFD(formDescriptionFilePath, "English"); // Modify the template as required. In this sample we need to loosen some constraints IPageAnalysisParams analysisParams = engine.CreatePageAnalysisParams(); analysisParams.CopyFrom(newDefinition.PageAnalysisParams); analysisParams.MaxHorizontalShrinkPercent = 20; analysisParams.MaxVerticalShrinkPercent = 20; newDefinition.PageAnalysisParams = analysisParams; // You can save the new Document Definition to a file or use it from memory traceBegin("Use the Document Definition in FlexiCaptureProcessor..."); IFlexiCaptureProcessor processor = engine.CreateFlexiCaptureProcessor(); processor.AddDocumentDefinition(newDefinition); // Add images for a single multipage document processor.AddImageFile(SamplesFolder + "\\SampleImages\\Banking_1.tif"); processor.AddImageFile(SamplesFolder + "\\SampleImages\\Banking_2.tif"); processor.AddImageFile(SamplesFolder + "\\SampleImages\\Banking_3.tif"); // Recognize the document and check the result IDocument document = processor.RecognizeNextDocument(); assert(document != null); assert(document.DocumentDefinition != null); assert(document.Pages.Count == 3); // Export the result processor.ExportDocumentEx(document, SamplesFolder + "\\FCEExport", "Banking", null); traceEnd("OK"); }
// Auxiliary Tools /////////////////////////////////////////////////////////// static IFieldDefinition setFieldValueType( IDocumentDefinition documentDefinition, string fieldName, FieldValueTypeEnum valueType ) { IFieldDefinition field = findFieldDef( documentDefinition, fieldName ); assert( field != null ); assert( field.ValueType == FieldValueTypeEnum.FVT_Text ); field.ValueType = valueType; assert( field.ValueType == valueType ); return field; }
static IFieldDefinition findFieldDef( IDocumentDefinition documentDefinition, string name ) { IFieldDefinition root = documentDefinition as IFieldDefinition; return recursiveFindFieldDef( root, name ); }
static void CheckTrainedDocumentDefinition( IEngine engine, IDocumentDefinition newDocumentDefinition, string rootFolder ) { IFlexiCaptureProcessor processor = engine.CreateFlexiCaptureProcessor(); processor.AddDocumentDefinition( newDocumentDefinition ); processor.AddImageFile( rootFolder + "\\02.jpg" ); processor.AddImageFile( rootFolder + "\\03.jpg" ); IDocument document = processor.RecognizeNextDocument(); assert( document.DocumentDefinition != null ); assert( document.Pages.Count == 1 ); assert( document.Sections[0].Children[0].Name == "ISBN" ); assert( document.Sections[0].Children[0].Value.AsString == "0-517-59939-2" ); document = processor.RecognizeNextDocument(); assert( document.DocumentDefinition != null ); assert( document.Pages.Count == 1 ); assert( document.Sections[0].Children[0].Name == "ISBN" ); assert( document.Sections[0].Children[0].Value.AsString == "0-8050-6176-2" ); }
static IFieldDefinition findFieldDef(IDocumentDefinition documentDefinition, string name) { IFieldDefinition root = documentDefinition as IFieldDefinition; return(recursiveFindFieldDef(root, name)); }
// USE CASE: Configuring fields for better recognition results public static void Configuring_fields_for_better_recognition_results(IEngine engine) { trace("Create a Document Definition from a FlexiLayout..."); IDocumentDefinition newDefinition = engine.CreateDocumentDefinitionFromAFL(SamplesFolder + "\\SampleMisc\\Invoice_eng.afl", "English"); assert(newDefinition != null); trace("Configure data types..."); setFieldValueType(newDefinition, "InvoiceDate", FieldValueTypeEnum.FVT_DateTime); setFieldValueType(newDefinition, "Quantity", FieldValueTypeEnum.FVT_Number); setFieldValueType(newDefinition, "UnitPrice", FieldValueTypeEnum.FVT_Currency); setFieldValueType(newDefinition, "Total", FieldValueTypeEnum.FVT_Currency); setFieldValueType(newDefinition, "TotalAmount", FieldValueTypeEnum.FVT_Currency); trace("Configure recognition languages for text fields ..."); IFieldDefinition fieldDef = findFieldDef(newDefinition, "InvoiceNumber"); assert(fieldDef != null); ITextRecognitionParams textParams = fieldDef.RecognitionParams.AsTextParams(); ILanguage newLanguage = textParams.CreateEmbeddedLanguageByDataType(FieldValueTypeEnum.FVT_DateTime); textParams.Language = newLanguage; newLanguage = textParams.CreateEmbeddedLanguage(textParams.Language.Type, textParams.Language); assert(newLanguage != textParams.Language); assert(newLanguage.LanguageCategory == LanguageCategoryEnum.LC_DataType); assert(newLanguage.DatatypeCategory == DatatypeCategoryEnum.TC_DateTime); textParams.Language = newLanguage; newLanguage = textParams.CreateEmbeddedLanguage(LanguageTypeEnum.LT_Group, null); newLanguage.AsGroupLanguage().Add(engine.PredefinedLanguages.FindLanguage("English")); newLanguage.AsGroupLanguage().Add(engine.PredefinedLanguages.FindLanguage("Russian")); textParams.Language = newLanguage; assert(textParams.Language.Type == LanguageTypeEnum.LT_Group); assert(textParams.Language.AsGroupLanguage().Count == 2); assert(textParams.Language.AsGroupLanguage().Item(0).InternalName == "English"); assert(textParams.Language.AsGroupLanguage().Item(1).InternalName == "Russian"); newLanguage = textParams.CreateEmbeddedLanguage(LanguageTypeEnum.LT_Simple, null); newLanguage.AsSimpleLanguage().set_LetterSet(LanguageLetterSetEnum.LLS_Alphabet, "ABCDEFGHIJKLMNOPQRSTUVWXYZ"); newLanguage.AsSimpleLanguage().RegularExpression = "[A-Z]{1-}"; textParams.Language = newLanguage; assert(textParams.Language.AsSimpleLanguage().RegularExpression.Length > 0); newLanguage = textParams.CreateEmbeddedLanguage(LanguageTypeEnum.LT_Simple, engine.PredefinedLanguages.FindLanguage("English")); assert(newLanguage.AsSimpleLanguage().UsePredefinedDictionary == true); assert(newLanguage.AsSimpleLanguage().UseUserDefinedDictionary == false); assert(newLanguage.AsSimpleLanguage().UserDefinedDictionary == null); newLanguage.AsSimpleLanguage().UseUserDefinedDictionary = true; FCEngine.IDictionary dictionary = newLanguage.AsSimpleLanguage().UserDefinedDictionary; assert(dictionary != null); assert(dictionary.WordsCount == 0); dictionary.AddWord("ONE", 1); dictionary.AddWord("TWO", 1); dictionary.AddWord("THREE", 1); assert(dictionary.WordsCount == 3); IEnumDictionaryWords enumWords = dictionary.EnumWords(); for (int i = 0; i < 10; i++) { int confidence = 0; string word = enumWords.Next(out confidence); if (confidence == 0) { break; } trace(word); } textParams.Language = newLanguage; trace("Check the Document Definition..."); assert(newDefinition.Check() == true); traceBegin("Use the Document Definition in FlexiCaptureProcessor..."); IFlexiCaptureProcessor processor = engine.CreateFlexiCaptureProcessor(); processor.AddDocumentDefinition(newDefinition); // Add images for a single document processor.AddImageFile(SamplesFolder + "\\SampleImages\\Invoices_1.tif"); // Recognize the document IDocument document = processor.RecognizeNextDocument(); assert(document != null); assert(document.DocumentDefinition != null); assert(document.Pages.Count == 1); processor.ExportDocumentEx(document, SamplesFolder + "\\FCEExport", "Invoice", null); traceEnd("OK"); }
// USE CASE: Creating a Document Definition by training on a set of images public static void Creating_a_Document_Definition_by_training_on_a_set_of_images(IEngine engine) { string rootFolder = SamplesFolder + "\\SampleImages\\Training\\ISBN"; string batchFolder = rootFolder + "\\_TrainingBatch"; if (System.IO.Directory.Exists(batchFolder)) { // Delete the existing training batch System.IO.Directory.Delete(batchFolder, true); } trace("Create training batch and populate it with images..."); ITrainingBatch trainingBatch = engine.CreateTrainingBatch(batchFolder, "English"); try { ITrainingDefinition newDefinition = trainingBatch.Definitions.AddNew("ISBN"); trainingBatch.AddImageFile(rootFolder + "\\00.jpg"); trainingBatch.AddImageFile(rootFolder + "\\01.jpg"); trace("Use the first page to define document structure..."); ITrainingPage firstPage = trainingBatch.Pages[0]; // Each page must be prepared before trying to work with its layout. At this stage the page is analyzed // and primitive image objects are extracted (which can be used as helpers in user imterface). // Than an attempt is made to predict the page layout based on the layout of verified pages if any. firstPage.PrepareLayout(); // At this point the user must draw boxes for fields and references. In this sample we try to emulate this // behavior by 'looking' for known text strings and 'drawing' fields around them. for (int j = 0; j < firstPage.ImageObjects.Count; j++) { ITrainingImageObject obj = firstPage.ImageObjects[j]; string text = obj.RecognizedText; if (text == "978-1-4095-3439-6") { // We want to extact this field. Create a data field and define its geometry on the current page. ITrainingField isbnField = newDefinition.Fields.AddNew("ISBN", TrainingFieldTypeEnum.TFT_Field); firstPage.SetFieldBlock(isbnField, obj.Region); break; } } for (int j = 0; j < firstPage.ImageObjects.Count; j++) { ITrainingImageObject obj = firstPage.ImageObjects[j]; string text = obj.RecognizedText; if (text == "ISBN") { // We want to use this text for reference. Create a reference element and define its geometry on the current page. ITrainingField isbnTag = newDefinition.Fields.AddNew("ISBNTag", TrainingFieldTypeEnum.TFT_ReferenceText); firstPage.SetFieldBlock(isbnTag, obj.Region); break; } } assert(newDefinition.Fields.Count == 2); // Now that we are done with this page, mark it as verified and ready for training. trainingBatch.SubmitPageForTraining(firstPage); traceBegin("Verify the computed layout on the remaining pages..."); for (int i = 1; i < trainingBatch.Pages.Count; i++) { traceBegin(i.ToString() + "..."); ITrainingPage page = trainingBatch.Pages[i]; page.PrepareLayout(); // At this point the user must verify and correct the computed layout. In this sample we assume that // the computed layout is correct, so we just mark the page as verified and ready for training. trainingBatch.SubmitPageForTraining(page); traceEnd("OK"); } traceEnd("OK"); trace("Export to AFL..."); newDefinition.ExportToAFL(batchFolder + "\\NewTemplate.afl"); } finally { trainingBatch.Close(); } trace("Create document definition."); IDocumentDefinition newDocumentDefinition = engine.CreateDocumentDefinitionFromAFL(batchFolder + "\\NewTemplate.afl", "English"); trace("Use the new document definition with FlexiCapture Processor."); CheckTrainedDocumentDefinition(engine, newDocumentDefinition, rootFolder); }
// USE CASE: Creating a Document Definition by training on a set of images public static void Creating_a_Document_Definition_by_training_on_a_set_of_images_LPAREN2RPAREN(IEngine engine) { string rootFolder = SamplesFolder + "\\SampleImages\\Training\\ISBN"; string batchFolder = rootFolder + "\\_TrainingBatch"; if (System.IO.Directory.Exists(batchFolder)) { // Delete the existing training batch System.IO.Directory.Delete(batchFolder, true); } trace("Create training batch and populate it with images..."); ITrainingBatch trainingBatch = engine.CreateTrainingBatch(batchFolder, "English"); try { ITrainingDefinition newDefinition = trainingBatch.Definitions.AddNew("ISBN"); trainingBatch.AddImageFile(rootFolder + "\\00.jpg"); trainingBatch.AddImageFile(rootFolder + "\\01.jpg"); traceBegin("The user iterates through the added images until all the pages have been submitted for training..."); ITrainingPage page = trainingBatch.PrepareNextPageNotSubmittedForTraining(); while (page != null) { traceBegin(page.ID.ToString() + "..."); // The user can 'draw' fields and references on any page while inside this loop. Any modification in the document definition will reset // the 'verified' flag for all pages and the loop will automatically reiterate through all pages. In this sample we try to emulate the // user 'drawing' fields on the first page. if (page == trainingBatch.Pages[0]) { // On the fist page we 'look' for known text strings and 'draw' fields around them for (int j = 0; j < page.ImageObjects.Count; j++) { ITrainingImageObject obj = page.ImageObjects[j]; string text = obj.RecognizedText; if (text == "978-1-4095-3439-6") { // We want to extact this field. Create a data field and define its geometry on the current page. ITrainingField isbnField = newDefinition.Fields.AddNew("ISBN", TrainingFieldTypeEnum.TFT_Field); page.SetFieldBlock(isbnField, obj.Region); break; } } for (int j = 0; j < page.ImageObjects.Count; j++) { ITrainingImageObject obj = page.ImageObjects[j]; string text = obj.RecognizedText; if (text == "ISBN") { // We want to use this text for reference. Create a reference element and define its geometry on the current page. ITrainingField isbnTag = newDefinition.Fields.AddNew("ISBNTag", TrainingFieldTypeEnum.TFT_ReferenceText); page.SetFieldBlock(isbnTag, obj.Region); break; } } // We assume that we have succeeded in defining two items assert(newDefinition.Fields.Count == 2); } // After the user has defined the layout (or verified the automatically computed layout for subsequent pages), he must submit // the result for training. In this sample we assume that the computed layout is always correct, so just mark the page as verified // and ready for training. trainingBatch.SubmitPageForTraining(page); // Fetch the next page that requires attention. The method will return null when all the pages have been verified and submitted for training. page = trainingBatch.PrepareNextPageNotSubmittedForTraining(); traceEnd("OK"); } traceEnd("OK"); trace("Export to AFL..."); newDefinition.ExportToAFL(batchFolder + "\\NewTemplate.afl"); } finally { trainingBatch.Close(); } trace("Create document definition."); IDocumentDefinition newDocumentDefinition = engine.CreateDocumentDefinitionFromAFL(batchFolder + "\\NewTemplate.afl", "English"); trace("Use the new document definition with FlexiCapture Processor."); CheckTrainedDocumentDefinition(engine, newDocumentDefinition, rootFolder); }