// USE CASE: Creating a Document Definition by training on a set of images public static void Creating_a_Document_Definition_by_training_on_a_set_of_images(IEngine engine) { string rootFolder = SamplesFolder + "\\SampleImages\\Training\\ISBN"; string batchFolder = rootFolder + "\\_TrainingBatch"; if (System.IO.Directory.Exists(batchFolder)) { // Delete the existing training batch System.IO.Directory.Delete(batchFolder, true); } trace("Create training batch and populate it with images..."); ITrainingBatch trainingBatch = engine.CreateTrainingBatch(batchFolder, "English"); try { ITrainingDefinition newDefinition = trainingBatch.Definitions.AddNew("ISBN"); trainingBatch.AddImageFile(rootFolder + "\\00.jpg"); trainingBatch.AddImageFile(rootFolder + "\\01.jpg"); trace("Use the first page to define document structure..."); ITrainingPage firstPage = trainingBatch.Pages[0]; // Each page must be prepared before trying to work with its layout. At this stage the page is analyzed // and primitive image objects are extracted (which can be used as helpers in user imterface). // Than an attempt is made to predict the page layout based on the layout of verified pages if any. firstPage.PrepareLayout(); // At this point the user must draw boxes for fields and references. In this sample we try to emulate this // behavior by 'looking' for known text strings and 'drawing' fields around them. for (int j = 0; j < firstPage.ImageObjects.Count; j++) { ITrainingImageObject obj = firstPage.ImageObjects[j]; string text = obj.RecognizedText; if (text == "978-1-4095-3439-6") { // We want to extact this field. Create a data field and define its geometry on the current page. ITrainingField isbnField = newDefinition.Fields.AddNew("ISBN", TrainingFieldTypeEnum.TFT_Field); firstPage.SetFieldBlock(isbnField, obj.Region); break; } } for (int j = 0; j < firstPage.ImageObjects.Count; j++) { ITrainingImageObject obj = firstPage.ImageObjects[j]; string text = obj.RecognizedText; if (text == "ISBN") { // We want to use this text for reference. Create a reference element and define its geometry on the current page. ITrainingField isbnTag = newDefinition.Fields.AddNew("ISBNTag", TrainingFieldTypeEnum.TFT_ReferenceText); firstPage.SetFieldBlock(isbnTag, obj.Region); break; } } assert(newDefinition.Fields.Count == 2); // Now that we are done with this page, mark it as verified and ready for training. trainingBatch.SubmitPageForTraining(firstPage); traceBegin("Verify the computed layout on the remaining pages..."); for (int i = 1; i < trainingBatch.Pages.Count; i++) { traceBegin(i.ToString() + "..."); ITrainingPage page = trainingBatch.Pages[i]; page.PrepareLayout(); // At this point the user must verify and correct the computed layout. In this sample we assume that // the computed layout is correct, so we just mark the page as verified and ready for training. trainingBatch.SubmitPageForTraining(page); traceEnd("OK"); } traceEnd("OK"); trace("Export to AFL..."); newDefinition.ExportToAFL(batchFolder + "\\NewTemplate.afl"); } finally { trainingBatch.Close(); } trace("Create document definition."); IDocumentDefinition newDocumentDefinition = engine.CreateDocumentDefinitionFromAFL(batchFolder + "\\NewTemplate.afl", "English"); trace("Use the new document definition with FlexiCapture Processor."); CheckTrainedDocumentDefinition(engine, newDocumentDefinition, rootFolder); }
// USE CASE: Creating a Document Definition by training on a set of images public static void Creating_a_Document_Definition_by_training_on_a_set_of_images_LPAREN2RPAREN(IEngine engine) { string rootFolder = SamplesFolder + "\\SampleImages\\Training\\ISBN"; string batchFolder = rootFolder + "\\_TrainingBatch"; if (System.IO.Directory.Exists(batchFolder)) { // Delete the existing training batch System.IO.Directory.Delete(batchFolder, true); } trace("Create training batch and populate it with images..."); ITrainingBatch trainingBatch = engine.CreateTrainingBatch(batchFolder, "English"); try { ITrainingDefinition newDefinition = trainingBatch.Definitions.AddNew("ISBN"); trainingBatch.AddImageFile(rootFolder + "\\00.jpg"); trainingBatch.AddImageFile(rootFolder + "\\01.jpg"); traceBegin("The user iterates through the added images until all the pages have been submitted for training..."); ITrainingPage page = trainingBatch.PrepareNextPageNotSubmittedForTraining(); while (page != null) { traceBegin(page.ID.ToString() + "..."); // The user can 'draw' fields and references on any page while inside this loop. Any modification in the document definition will reset // the 'verified' flag for all pages and the loop will automatically reiterate through all pages. In this sample we try to emulate the // user 'drawing' fields on the first page. if (page == trainingBatch.Pages[0]) { // On the fist page we 'look' for known text strings and 'draw' fields around them for (int j = 0; j < page.ImageObjects.Count; j++) { ITrainingImageObject obj = page.ImageObjects[j]; string text = obj.RecognizedText; if (text == "978-1-4095-3439-6") { // We want to extact this field. Create a data field and define its geometry on the current page. ITrainingField isbnField = newDefinition.Fields.AddNew("ISBN", TrainingFieldTypeEnum.TFT_Field); page.SetFieldBlock(isbnField, obj.Region); break; } } for (int j = 0; j < page.ImageObjects.Count; j++) { ITrainingImageObject obj = page.ImageObjects[j]; string text = obj.RecognizedText; if (text == "ISBN") { // We want to use this text for reference. Create a reference element and define its geometry on the current page. ITrainingField isbnTag = newDefinition.Fields.AddNew("ISBNTag", TrainingFieldTypeEnum.TFT_ReferenceText); page.SetFieldBlock(isbnTag, obj.Region); break; } } // We assume that we have succeeded in defining two items assert(newDefinition.Fields.Count == 2); } // After the user has defined the layout (or verified the automatically computed layout for subsequent pages), he must submit // the result for training. In this sample we assume that the computed layout is always correct, so just mark the page as verified // and ready for training. trainingBatch.SubmitPageForTraining(page); // Fetch the next page that requires attention. The method will return null when all the pages have been verified and submitted for training. page = trainingBatch.PrepareNextPageNotSubmittedForTraining(); traceEnd("OK"); } traceEnd("OK"); trace("Export to AFL..."); newDefinition.ExportToAFL(batchFolder + "\\NewTemplate.afl"); } finally { trainingBatch.Close(); } trace("Create document definition."); IDocumentDefinition newDocumentDefinition = engine.CreateDocumentDefinitionFromAFL(batchFolder + "\\NewTemplate.afl", "English"); trace("Use the new document definition with FlexiCapture Processor."); CheckTrainedDocumentDefinition(engine, newDocumentDefinition, rootFolder); }