public List <FilledForm> ProcessOcr(ResultsForPrettyJson formResults, List <ImageInfo> fileInfos) { try { var outDir = formResults.OriginalDirectoryName; var retForms = new List <FilledForm>(); var usedMasters = new HashSet <MasterForm>(); Stopwatch stopWatch = new Stopwatch(); stopWatch.Start(); formResults.PagesInPdf = fileInfos.Count; foreach (var ofi in fileInfos) { FilledForm newForm = new FilledForm(); retForms.Add(newForm); newForm.ImageInfoMaster.InitialImage = ofi; newForm.Name = Path.GetFileNameWithoutExtension(ofi.ImageFileInfo.Name); if (ofi.Image == null) { ofi.Image = LoadImageFile(ofi.ImageFileInfo.FullName, 1, -1); } //CleanupImage(ofi.Image); var par = new FormThreadCallParams() { ImageInfo = ofi, StopWatch = stopWatch, Form = newForm }; if (PageTimeoutInSeconds < 50) { Thread t = new Thread(this.PrepareNewFormThreader); t.Start(par); if (!t.Join(TimeSpan.FromSeconds(PageTimeoutInSeconds))) { t.Abort(); formResults.TimedOutPages.Add(newForm.Name); formResults.BestFormConfidence.Add(-1); if (formResults.TimedOutPages.Count > 2 && formResults.PagesMappedToForm == 0) { formResults.Status = $"Form abandoned for timeout after {formResults.BestFormConfidence.Count} pages"; logger.Error(formResults.Status); return(retForms); } continue; } } else { PrepareNewFormThreader(par); } Debug.Assert(par.Attributes != null); var filledFormAttributes = par.Attributes; //List<FormRecognitionResult> results = new List<FormRecognitionResult>(); MasterForm currentMasterBlockForm = null; int bestConfidence = -1; int currentConfidence = 85; foreach (var master in BlockMasterForms) { if (usedMasters.Contains(master)) { continue; } var result = RecognitionEngine.CompareForm(master.Attributes, filledFormAttributes, null, null); //logger.Debug($"Check {master} for {newForm} {stopWatch.ElapsedMilliseconds} {result.Confidence}"); if (result.Confidence > currentConfidence) { currentMasterBlockForm = master; bestConfidence = currentConfidence = result.Confidence; } else if (result.Confidence > bestConfidence) { bestConfidence = result.Confidence; } } formResults.BestFormConfidence.Add(bestConfidence); if (currentMasterBlockForm != null) { formResults.MasterFormPages.Add(currentMasterBlockForm.Properties.Name); formResults.PagesMappedToForm++; logger.Info($"FilledForm matched {newForm.Name} {newForm.Status} {stopWatch.ElapsedMilliseconds} "); newForm.ImageInfoMaster.InitialImage = ofi; var centeredImage = ofi.Image.CloneAll(); CleanupImage(centeredImage); newForm.ImageInfoMaster.CenteredImage = new ImageInfo() { Image = centeredImage }; var omrImage = centeredImage.CloneAll(); PrepareOmrImage(omrImage); newForm.ImageInfoMaster.OmrImage = new ImageInfo() { Image = omrImage }; newForm.Status = "Matched"; newForm.Master = currentMasterBlockForm; var alignment = RecognitionEngine.GetFormAlignment(newForm.Master.Attributes, newForm.Attributes, null); var fields = currentMasterBlockForm.ProcessingPages[0]; var scaler = currentMasterBlockForm.Resolution; var fieldsOnlyImage = RasterImage.Create(centeredImage.Width, centeredImage.Height, centeredImage.BitsPerPixel, 300, RasterColor.White); //fieldsOnlyImage = new RasterImage(RasterMemoryFlags.Conventional, centeredImage.Width, centeredImage.Height, centeredInage.BitsPerPixel, RasterByteOrder.Rgb, RasterViewPerspective.TopLeft, null, null, 0); var subDirField = Path.Combine(outDir, "fields"); var fileNameFieldOnly = Path.Combine(subDirField, newForm.Name + "_fields.jpg"); var googleResultsFile = Path.Combine(subDirField, newForm.Name + "_google.json"); var combined = false; foreach (var field in fields) { var isBlock = field.Name.Contains("block"); var rect200 = alignment[0].AlignRectangle(field.Bounds); scaler = 300; int fudge = isBlock ? 30 : 1; var rect300 = new LeadRect(rect200.Left * 300 / scaler - fudge, rect200.Top * 300 / scaler - fudge, rect200.Width * 300 / scaler + fudge, rect200.Height * 300 / scaler + fudge); try { var imageInfoToUse = newForm.ImageInfoMaster.CenteredImage; var zoneType = OcrZoneType.Text; if (field.GetType() == typeof(OmrFormField)) { imageInfoToUse = newForm.ImageInfoMaster.OmrImage; zoneType = OcrZoneType.Omr; } else if (field.GetType() == typeof(ImageFormField)) { zoneType = OcrZoneType.Graphic; } var image = imageInfoToUse.Image.CloneAll(); var subDir = Path.Combine(outDir, isBlock ? "blocks" : "fields"); var fileName = Path.Combine(subDir, newForm.Name + "_" + field.Name + ".jpg"); var imageField = new ImageField { Field = field, FieldResult = { FieldName = field.Name, IsBlock = isBlock, ImageFile = fileName, Bounds = rect300.ToString(), FieldType = zoneType.ToString(), Error = "None" } }; imageField.Rectangle = new Rectangle(rect300.X, rect300.Y, rect300.Width, rect300.Height); try { EnsurePathExists(subDir); CropCommand command = new CropCommand { Rectangle = rect300 }; command.Run(image); RasterCodecs.Save(image, fileName, RasterImageFormat.Jpeg, bitsPerPixel: 8); if (!isBlock && zoneType == OcrZoneType.Text && !combined) { try { ; var combiner = new CombineCommand(); //combiner.DestinationImage = fieldsOnlyImage; combiner.SourceImage = image.Clone(); combiner.DestinationRectangle = rect300; var regionBounds = image.GetRegionBounds(null); combiner.SourcePoint = new LeadPoint(regionBounds.X, regionBounds.Y); //combiner.Flags = CombineCommandFlags.OperationAdd | CombineCommandFlags.Destination0 | CombineCommandFlags.Source1 | CombineCommandFlags.Destination0 ; combiner.Flags = CombineCommandFlags.OperationOr | CombineCommandFlags.Destination0;; // |CombineFastCommandFlags.OperationAverage; combiner.Run(fieldsOnlyImage); //combined = true; } catch (Exception exCombine) { logger.Error(exCombine, $"error combining field {field.Name} {rect300}"); } } var imageInfo = new ImageInfo() { Image = image, ImageFileInfo = new FileInfo(fileName) }; imageField.ImageInfo = imageInfo; if (!isBlock && zoneType != OcrZoneType.Graphic) { using (IOcrPage ocrPage = OcrEngine.CreatePage(image, OcrImageSharingMode.AutoDispose)) { OcrZone ocrZone = new OcrZone { ZoneType = zoneType, Bounds = new LeadRect(fudge, fudge, image.ImageSize.Width - fudge, image.ImageSize.Height - fudge) }; ocrPage.Zones.Add(ocrZone); ocrPage.Recognize(null); if (zoneType == OcrZoneType.Omr) { if (field.Name.Contains("C2NGVD1929")) { logger.Info(ocrZone.Bounds); } GetOmrReading(ocrPage, field, imageField); } else if (zoneType == OcrZoneType.Text) { var resultsPage = GetPageConfidence(ocrPage); imageField.FieldResult.Confidence = resultsPage.Confidence; char[] crlf = { '\r', '\n' }; imageField.FieldResult.Text = ocrPage.GetText(0).TrimEnd(crlf); } } } logger.Info( $"field {field.Name} {rect300} [{imageField.FieldResult.Text}] confidence: {imageField.FieldResult.Confidence}"); } catch (Exception exField) { logger.Error(exField, $"Error processing {field.Name}"); formResults.FieldsWithError++; imageField.FieldResult.Error = exField.Message; } newForm.ImageFields.Add(imageField); formResults.OcrFields.Add(imageField.FieldResult); formResults.Status = "FormMatched"; } catch (Exception ex) { logger.Error(ex, $"Error on field {field.Name} {rect300}"); newForm.Status = $"Error|Field {field.Name} {rect300}: [{ex.Message}]"; } } RasterCodecs.Save(PrepareOmrImage(fieldsOnlyImage), fileNameFieldOnly, RasterImageFormat.Jpeg, bitsPerPixel: 8); var googleResults = GoogleOcr(fileNameFieldOnly); if (googleResults.Count > 0) { var json = JsonConvert.SerializeObject(googleResults, Formatting.Indented); File.WriteAllText(googleResultsFile, json); MergeGoogleOcr(newForm, googleResults); } usedMasters.Add(currentMasterBlockForm); } else { newForm.Status = "Unmatched|No MasterForm match"; } logger.Info($"FilledForm processed {newForm.Name} {newForm.Status} {stopWatch.ElapsedMilliseconds} "); if (usedMasters.Count == BlockMasterForms.Count) { logger.Info("found all master forms"); break; } } stopWatch.Stop(); return(retForms); } catch (Exception ex) { logger.Error(ex, "Untrapped error found"); return(null); } }
private void _cmbSelectedPage_SelectedIndexChanged(object sender, EventArgs e) { try { if (_filledFormViewer.Image != null) { _filledFormViewer.Image.Dispose(); } //If the user chose to only recognize the first page, there will only be a recognition confidence value for the first page if (_filledForms[_cmbSelectedForm.SelectedIndex].Result.PageResults.Count < _cmbSelectedPage.Items.Count) { _txtPageConfidence.Enabled = false; _txtPageConfidence.Text = ""; } else { _txtPageConfidence.Enabled = true; _txtPageConfidence.Text = _filledForms[_cmbSelectedForm.SelectedIndex].Result.PageResults[_cmbSelectedPage.SelectedIndex].Confidence.ToString() + "%"; } _filledForms[_cmbSelectedForm.SelectedIndex].Image.Page = _cmbSelectedPage.SelectedIndex + 1; _filledFormViewer.Image = _filledForms[_cmbSelectedForm.SelectedIndex].Image.Clone(); _fieldResults.Rows.Clear(); if (_filledForms[_cmbSelectedForm.SelectedIndex].ProcessingPages != null && _filledForms[_cmbSelectedForm.SelectedIndex].ProcessingPages.Count > _cmbSelectedPage.SelectedIndex) { foreach (FormField field in _filledForms[_cmbSelectedForm.SelectedIndex].ProcessingPages[_cmbSelectedPage.SelectedIndex]) { string[] row = new string[5]; row[0] = field.Name; LeadRect alignedBounds = LeadRect.Empty; if (field is TableFormField) { TableFormField tableField = field as TableFormField; int pageIndex = Math.Max(tableField.ExpectedPages.IndexOf(_cmbSelectedPage.SelectedIndex), _cmbSelectedPage.SelectedIndex); if (tableField.PagesBounds.ContainsKey(pageIndex)) { alignedBounds = tableField.PagesBounds[pageIndex]; } else { alignedBounds = LeadRect.Empty; } } else if (field is UnStructuredTextFormField) { alignedBounds = field.Bounds; } else if (field is OmrFormField) { alignedBounds = _filledForms[_cmbSelectedForm.SelectedIndex].Alignment[field.MasterPageNumber - 1].AlignOmrRectangle(field.Bounds); } else { alignedBounds = _filledForms[_cmbSelectedForm.SelectedIndex].Alignment[field.MasterPageNumber - 1].AlignRectangle(field.Bounds); } row[4] = alignedBounds.ToString(); bool bAdded = true; if (field.Result != null) { if (field is TextFormField) { row[1] = "Text"; row[2] = ((field as TextFormField).Result as TextFormFieldResult).Text; row[3] = ((field as TextFormField).Result as TextFormFieldResult).AverageConfidence.ToString(); } else if (field is UnStructuredTextFormField) { row[1] = "Unstructured Text"; row[2] = ((field as UnStructuredTextFormField).Result as TextFormFieldResult).Text; row[3] = ((field as UnStructuredTextFormField).Result as TextFormFieldResult).AverageConfidence.ToString(); } else if (field is OmrFormField) { row[1] = "Omr"; row[2] = ((field as OmrFormField).Result as OmrFormFieldResult).Text; row[3] = ((field as OmrFormField).Result as OmrFormFieldResult).AverageConfidence.ToString(); } else if (field is BarcodeFormField) { row[1] = "Barcode"; for (int i = 0; i < ((field as BarcodeFormField).Result as BarcodeFormFieldResult).BarcodeData.Count; i++) { row[2] = GetDataString(((field as BarcodeFormField).Result as BarcodeFormFieldResult).BarcodeData[i].GetData()); } row[3] = "N/A"; } else if (field is ImageFormField) { row[1] = "Image"; row[2] = "N/A"; row[3] = "N/A"; } else if (field is TableFormField) { row[1] = "Table"; row[2] = "Double click here to view the results..."; row[3] = "N/A"; } else if (field is SingleSelectionField) { row[1] = "SingleSelection"; row[2] = ((field as SingleSelectionField).Result as OmrFormFieldResult).Text; row[3] = ((field as SingleSelectionField).Result as OmrFormFieldResult).AverageConfidence.ToString(); } else if (field is BubbleWordField) { row[1] = "BubbleWord"; row[2] = ((field as BubbleWordField).Result as OmrFormFieldResult).Text; row[3] = ((field as BubbleWordField).Result as OmrFormFieldResult).AverageConfidence.ToString(); } #if LEADTOOLS_V20_OR_LATER else if (field is OmrAnswerAreaField) { row[1] = "OmrAnswerAreaField"; row[2] = ((field as OmrAnswerAreaField).Result as OmrFormFieldResult).Text; row[3] = ((field as OmrAnswerAreaField).Result as OmrFormFieldResult).AverageConfidence.ToString(); } else if (field is OmrDateField) { row[1] = "OmrDateField"; row[2] = ((field as OmrDateField).Result as OmrFormFieldResult).Text; row[3] = ((field as OmrDateField).Result as OmrFormFieldResult).AverageConfidence.ToString(); } #endif //#if LEADTOOLS_V20_OR_LATER } if (bAdded) { _fieldResults.Rows.Add(row); } if (field is TableFormField) { _fieldResults.Rows[_fieldResults.Rows.Count - 1].Cells[2].Style.ForeColor = Color.Blue; } } if (_fieldResults.Rows.Count > 0) { _fieldResults.Rows[0].Selected = true; } } UpdateControls(); } catch (Exception exp) { Messager.ShowError(this, exp); } }