private RecognitionStatistics computeStatistics(FREngine.IFRDocument frDocument) { RecognitionStatistics result = new RecognitionStatistics(); result.RecognizedText = new List <RecognizedText>(); CustomFile file = new CustomFile(); for (int pageIndex = 0; pageIndex < frDocument.Pages.Count; pageIndex++) { FREngine.IFRPage frPage = frDocument.Pages[pageIndex]; FREngine.ILayout layout = frPage.Layout; int blocksCount = layout.Blocks.Count; for (int blockIndex = 0; blockIndex < blocksCount; blockIndex++) { FREngine.IBlock block = layout.Blocks[blockIndex]; result += computeStatisticsForBlock(pageIndex, block); } result.RecognizedText.Add(new RecognizedText(frPage, pageIndex)); frPage.Flush(false); } return(result); }
// Build text using recognized layout public RecognizedText(FREngine.IFRPage frPage, int pageIndex) { Text = new List <Character>(); words = new List <Word>(); FREngine.ILayoutBlocks blocks = frPage.Layout.Blocks; lineAll = new List <CustomLine>(); listWords = new List <CustomWord>(); Page = new CustomPage(); Page.Index = pageIndex; tableIndex = 0; for (int iBlock = 0; iBlock < blocks.Count; iBlock++) { FREngine.IBlock block = blocks[iBlock]; CustomBlock csBlock = buildTextFromBlock(block, iBlock); if (csBlock != null) { csBlock.ParentIndex = pageIndex; Page.BlockItems.Add(csBlock); } } ProcessLine(); }
private void applyImageTransformations(FREngine.IFRPage page, ProcessingSettings settings) { checkProcessingSettingsForImage(page.ImageDocument.ImageColorType, ref settings); if (settings.RemoveGarbage) { page.ImageDocument.RemoveGarbage(null, settings.GarbageSize); } if (settings.RemoveNoise) { page.ImageDocument.RemoveNoise(settings.NoiseModel, settings.HasLargeCharacters); } if (settings.RemoveMotionBlur) { page.ImageDocument.RemoveMotionBlur(null); } if (settings.SuppressColor) { page.ImageDocument.SuppressColorObjects(settings.HslHueToSuppress, settings.HslSaturationBoundaryToSuppress); } if (settings.RemoveObjects) { FREngine.ObjectsColorEnum colorToRemove = FREngine.ObjectsColorEnum.OC_Blue; switch (settings.ColorToRemove) { case ProcessingSettings.ObjectsColorEnum.Red: colorToRemove = FREngine.ObjectsColorEnum.OC_Red; break; case ProcessingSettings.ObjectsColorEnum.Green: colorToRemove = FREngine.ObjectsColorEnum.OC_Green; break; case ProcessingSettings.ObjectsColorEnum.Blue: colorToRemove = FREngine.ObjectsColorEnum.OC_Blue; break; case ProcessingSettings.ObjectsColorEnum.Yellow: colorToRemove = FREngine.ObjectsColorEnum.OC_Yellow; break; } FREngine.ObjectsTypeEnum objectsTypeToRemove = FREngine.ObjectsTypeEnum.OT_Background; switch (settings.ObjectsTypeToRemove) { case ProcessingSettings.ObjectsTypeEnum.Full: objectsTypeToRemove = FREngine.ObjectsTypeEnum.OT_Full; break; case ProcessingSettings.ObjectsTypeEnum.Background: objectsTypeToRemove = FREngine.ObjectsTypeEnum.OT_Background; break; case ProcessingSettings.ObjectsTypeEnum.Stamp: objectsTypeToRemove = FREngine.ObjectsTypeEnum.OT_Stamp; break; } page.ImageDocument.RemoveColorObjects(null, colorToRemove, objectsTypeToRemove); } }
private void applyVisualEnhancements(FREngine.IFRPage page, ProcessingSettings settings) { checkProcessingSettingsForImage(page.ImageDocument.ImageColorType, ref settings); if (settings.EqualizeBrightness) { page.ImageDocument.EqualizeBrightness(settings.MakeWhiteBackground); } if (settings.SmoothTexture) { page.ImageDocument.SmoothTexture(); } if (settings.ConvertToBW && settings.EnableDithering) { page.ImageDocument.DitherImage(FREngine.DitheringMethodEnum.DM_FloydSteinberg); } }
private System.Drawing.Image getImageFromPage(FREngine.IFRPage frPage) { FREngine.IImageDocument imageDocument = frPage.ImageDocument; FREngine.IImage image = null; switch (imageDocument.ImageColorType) { case FREngine.ImageColorTypeEnum.ICT_BlackWhite: image = imageDocument.BlackWhiteImage; break; case FREngine.ImageColorTypeEnum.ICT_Gray: case FREngine.ImageColorTypeEnum.ICT_Color: image = imageDocument.ColorImage; break; } FREngine.IHandle hBitmap = image.GetBitmap(null); System.Drawing.Image newImage = System.Drawing.Image.FromHbitmap(hBitmap.Handle); hBitmap.CloseHandle(); return(newImage); }
// Detect orientation of all pages in a document // Return array with orientations for all pages private FREngine.RotationTypeEnum[] detectOrientation(string imagePath, FREngine.PrepareImageMode _prepareImageMode, FREngine.PageProcessingParams pageProcessingParams) { // Correct skew during loading FREngine.PrepareImageMode prepareImageMode = engine.CreatePrepareImageMode(); prepareImageMode.CopyFrom(_prepareImageMode); prepareImageMode.CorrectSkew = true; prepareImageMode.CorrectSkewMode = correctSkewFlags; FREngine.RotationTypeEnum[] rotations = null; FREngine.FRDocument frDocument = engine.CreateFRDocument(); try { frDocument.AddImageFile(imagePath, prepareImageMode, null); // Get orientation for every page int pagesCount = frDocument.Pages.Count; rotations = new FREngine.RotationTypeEnum[pagesCount]; for (int pageIndex = 0; pageIndex < pagesCount; pageIndex++) { FREngine.IFRPage frPage = frDocument.Pages[pageIndex]; FREngine.TextOrientation textOrientation = frPage.DetectOrientation(null, pageProcessingParams.ObjectsExtractionParams, pageProcessingParams.RecognizerParams); rotations[pageIndex] = FREngine.RotationTypeEnum.RT_UnknownRotation; if (textOrientation != null) { rotations[pageIndex] = textOrientation.RotationType; } frPage.Flush(false); } } finally { frDocument.Close(); } return(rotations); }
private int[] splitImage(FREngine.IFRDocument frDoc, FREngine.IPageProcessingParams ppp, ProcessingSettings settings) { FREngine.PageSplittingParams pageSplittingParams = engine.CreatePageSplittingParams(); pageSplittingParams.SplitType = FREngine.PageSplitTypeEnum.PST_None; if (settings.SplitPages) { pageSplittingParams.SplitType = FREngine.PageSplitTypeEnum.PST_DoublePageSplit; } else if (settings.SplitBusinessCards) { pageSplittingParams.SplitType = FREngine.PageSplitTypeEnum.PST_BusinessCardSplit; } else { int[] emptyPageIndices = new int[1]; emptyPageIndices[0] = 0; return(emptyPageIndices); } if (settings.CorrectOrientationMode != ProcessingSettings.OrientationCorrectionMode.None) { ppp.PagePreprocessingParams.CorrectOrientation = true; } engine.MultiProcessingParams.MultiProcessingMode = FREngine.MultiProcessingModeEnum.MPM_Sequential; FREngine.SplitRegions splitRegions = frDoc.SplitPages(null, pageSplittingParams, ppp.ObjectsExtractionParams); engine.MultiProcessingParams.MultiProcessingMode = FREngine.MultiProcessingModeEnum.MPM_Auto; int[] sourcePageIndices = new int[frDoc.Pages.Count]; for (int pageIndex = 0; pageIndex < sourcePageIndices.Length; pageIndex++) { sourcePageIndices[pageIndex] = -1; } foreach (FREngine.SplitRegion item in splitRegions) { FREngine.IFRPage frPage = frDoc.Pages[item.CurrentPageIndex]; sourcePageIndices[item.CurrentPageIndex] = item.SourcePageIndex; if (settings.CropImage) { frPage.ImageDocument.CropImage(); } if (settings.CorrectDistortions) { frPage.CorrectGeometricalDistortions(ppp.ObjectsExtractionParams); } if (settings.DeskewImage) { frPage.ImageDocument.CorrectSkew(correctSkewFlags); } frPage.Flush(true); } for (int pageIndex = 0; pageIndex < sourcePageIndices.Length; pageIndex++) { if (sourcePageIndices[pageIndex] == -1) { sourcePageIndices[pageIndex] = pageIndex == 0 ? 0 : sourcePageIndices[pageIndex - 1] + 1; } } return(sourcePageIndices); }
private void applyGeometricalTransformations(FREngine.IFRPage page, FREngine.IPageProcessingParams ppp, ProcessingSettings settings, FREngine.RotationTypeEnum detectedRotation) { checkProcessingSettingsForImage(page.ImageDocument.ImageColorType, ref settings); if (settings.CropImage) { page.ImageDocument.CropImage(); } if (settings.EnhanceLocalContrast) { page.ImageDocument.EnhanceLocalContrast(); } if (settings.CorrectOrientationMode != ProcessingSettings.OrientationCorrectionMode.None && detectedRotation != FREngine.RotationTypeEnum.RT_NoRotation) { FREngine.RotationTypeEnum rotation = FREngine.RotationTypeEnum.RT_NoRotation; bool mirror = false; if (detectedRotation != FREngine.RotationTypeEnum.RT_UnknownRotation) { switch (detectedRotation) { case FREngine.RotationTypeEnum.RT_Clockwise: rotation = FREngine.RotationTypeEnum.RT_Counterclockwise; break; case FREngine.RotationTypeEnum.RT_Upsidedown: rotation = FREngine.RotationTypeEnum.RT_Upsidedown; break; case FREngine.RotationTypeEnum.RT_Counterclockwise: rotation = FREngine.RotationTypeEnum.RT_Clockwise; break; default: break; } } else { switch (settings.CorrectOrientationMode) { case ProcessingSettings.OrientationCorrectionMode.Rotate90CW: rotation = FREngine.RotationTypeEnum.RT_Clockwise; break; case ProcessingSettings.OrientationCorrectionMode.Rotate180UpsideDown: rotation = FREngine.RotationTypeEnum.RT_Upsidedown; break; case ProcessingSettings.OrientationCorrectionMode.Rotate90CCW: rotation = FREngine.RotationTypeEnum.RT_Counterclockwise; break; case ProcessingSettings.OrientationCorrectionMode.MirrorHorizontally: mirror = true; break; default: break; } } page.ImageDocument.Transform(rotation, mirror, false); } if (settings.InvertImage) { page.ImageDocument.Transform(FREngine.RotationTypeEnum.RT_NoRotation, false, true); } if (settings.CorrectDistortions) { page.CorrectGeometricalDistortions(ppp.ObjectsExtractionParams); } if (settings.DeskewImage) { page.ImageDocument.CorrectSkew(correctSkewFlags); } }
public RecognitionStatistics Process(string imagePath, ProcessingSettings settings) { DateTime startTime = System.DateTime.Now; setStep("Applying profile..."); engine.LoadPredefinedProfile("DocumentConversion_Accuracy"); setStep("Applying settings..."); FREngine.PrepareImageMode pim = engine.CreatePrepareImageMode(); FREngine.DocumentProcessingParams dpp = engine.CreateDocumentProcessingParams(); FREngine.PageProcessingParams ppp = dpp.PageProcessingParams; ppp.RecognizerParams.SetPredefinedTextLanguage(settings.Language); disableAllModifications(pim, ppp); pim.AutoOverwriteResolution = false; if (settings.CorrectResolution) { if (settings.NewResolution == 0) { pim.AutoOverwriteResolution = true; } else if (settings.NewResolution > 0) { pim.OverwriteResolution = true; pim.XResolutionToOverwrite = settings.NewResolution; pim.YResolutionToOverwrite = settings.NewResolution; } } if (settings.ConvertToBW) { pim.DiscardColorImage = true; } if (settings.DeskewImage) { pim.CorrectSkew = true; } // Detect orientation for all pages setStep("Detecting orientation..."); FREngine.RotationTypeEnum[] rotation = null; if (settings.CorrectOrientationMode == ProcessingSettings.OrientationCorrectionMode.Automatic) { rotation = detectOrientation(imagePath, pim, ppp); } setStep("Loading image..."); // Create document FREngine.FRDocument frDocument = engine.CreateFRDocument(); RecognitionStatistics recognitionStats = new RecognitionStatistics(); try { // Add image file to document frDocument.AddImageFile(imagePath, pim, null); if (frDocument.Pages.Count == 0) { throw new Exception("No pages in a file"); } setStep("Performing image modification..."); for (int pageIndex = 0; pageIndex < frDocument.Pages.Count; pageIndex++) { FREngine.IFRPage frPage = frDocument.Pages[pageIndex]; FREngine.RotationTypeEnum pageRotation = FREngine.RotationTypeEnum.RT_UnknownRotation; if (rotation != null && pageIndex < rotation.Length) { pageRotation = rotation[pageIndex]; } applyGeometricalTransformations(frPage, ppp, settings, pageRotation); GC.Collect(); GC.WaitForPendingFinalizers(); frPage.Flush(true); } int[] sourcePageIndices = splitImage(frDocument, ppp, settings); for (int pageIndex = 0; pageIndex < frDocument.Pages.Count; pageIndex++) { FREngine.IFRPage frPage = frDocument.Pages[pageIndex]; applyImageTransformations(frPage, settings); GC.Collect(); GC.WaitForPendingFinalizers(); frPage.Flush(true); } if (settings.IsRecognize) { setStep("Recognizing image..."); frDocument.Process(dpp); } setStep("Applying visual enhancements..."); for (int pageIndex = 0; pageIndex < frDocument.Pages.Count; pageIndex++) { FREngine.IFRPage frPage = frDocument.Pages[pageIndex]; applyVisualEnhancements(frPage, settings); GC.Collect(); GC.WaitForPendingFinalizers(); frPage.Flush(true); } TimeSpan processingTime = DateTime.Now - startTime; setStep("Computing statistics..."); recognitionStats = computeStatistics(frDocument); recognitionStats.TotalProcessingTime = processingTime; recognitionStats.SourcePageIndices = sourcePageIndices; setStep("Retrieving images..."); for (int pageIndex = 0; pageIndex < frDocument.Pages.Count; pageIndex++) { if (recognitionStats.PreprocessedImages == null) { recognitionStats.PreprocessedImages = new System.Drawing.Image[frDocument.Pages.Count]; } FREngine.IFRPage frPage = frDocument.Pages[pageIndex]; recognitionStats.PreprocessedImages[pageIndex] = getImageFromPage(frPage); GC.Collect(); GC.WaitForPendingFinalizers(); frPage.Flush(false); } //frDocument.Export(AppDomain.CurrentDomain.BaseDirectory + "FileSample2.xml", FREngine.FileExportFormatEnum.FEF_XML, null); //frDocument.Export(AppDomain.CurrentDomain.BaseDirectory + "FileSample2.txt", FREngine.FileExportFormatEnum.FEF_TextUnicodeDefaults, null); } finally { frDocument.Close(); } return(recognitionStats); }