private RecognitionStatistics computeStatistics(FREngine.IFRDocument frDocument) { RecognitionStatistics result = new RecognitionStatistics(); result.RecognizedText = new List <RecognizedText>(); CustomFile file = new CustomFile(); for (int pageIndex = 0; pageIndex < frDocument.Pages.Count; pageIndex++) { FREngine.IFRPage frPage = frDocument.Pages[pageIndex]; FREngine.ILayout layout = frPage.Layout; int blocksCount = layout.Blocks.Count; for (int blockIndex = 0; blockIndex < blocksCount; blockIndex++) { FREngine.IBlock block = layout.Blocks[blockIndex]; result += computeStatisticsForBlock(pageIndex, block); } result.RecognizedText.Add(new RecognizedText(frPage, pageIndex)); frPage.Flush(false); } return(result); }
// Detect orientation of all pages in a document // Return array with orientations for all pages private FREngine.RotationTypeEnum[] detectOrientation(string imagePath, FREngine.PrepareImageMode _prepareImageMode, FREngine.PageProcessingParams pageProcessingParams) { // Correct skew during loading FREngine.PrepareImageMode prepareImageMode = engine.CreatePrepareImageMode(); prepareImageMode.CopyFrom(_prepareImageMode); prepareImageMode.CorrectSkew = true; prepareImageMode.CorrectSkewMode = correctSkewFlags; FREngine.RotationTypeEnum[] rotations = null; FREngine.FRDocument frDocument = engine.CreateFRDocument(); try { frDocument.AddImageFile(imagePath, prepareImageMode, null); // Get orientation for every page int pagesCount = frDocument.Pages.Count; rotations = new FREngine.RotationTypeEnum[pagesCount]; for (int pageIndex = 0; pageIndex < pagesCount; pageIndex++) { FREngine.IFRPage frPage = frDocument.Pages[pageIndex]; FREngine.TextOrientation textOrientation = frPage.DetectOrientation(null, pageProcessingParams.ObjectsExtractionParams, pageProcessingParams.RecognizerParams); rotations[pageIndex] = FREngine.RotationTypeEnum.RT_UnknownRotation; if (textOrientation != null) { rotations[pageIndex] = textOrientation.RotationType; } frPage.Flush(false); } } finally { frDocument.Close(); } return(rotations); }
private int[] splitImage(FREngine.IFRDocument frDoc, FREngine.IPageProcessingParams ppp, ProcessingSettings settings) { FREngine.PageSplittingParams pageSplittingParams = engine.CreatePageSplittingParams(); pageSplittingParams.SplitType = FREngine.PageSplitTypeEnum.PST_None; if (settings.SplitPages) { pageSplittingParams.SplitType = FREngine.PageSplitTypeEnum.PST_DoublePageSplit; } else if (settings.SplitBusinessCards) { pageSplittingParams.SplitType = FREngine.PageSplitTypeEnum.PST_BusinessCardSplit; } else { int[] emptyPageIndices = new int[1]; emptyPageIndices[0] = 0; return(emptyPageIndices); } if (settings.CorrectOrientationMode != ProcessingSettings.OrientationCorrectionMode.None) { ppp.PagePreprocessingParams.CorrectOrientation = true; } engine.MultiProcessingParams.MultiProcessingMode = FREngine.MultiProcessingModeEnum.MPM_Sequential; FREngine.SplitRegions splitRegions = frDoc.SplitPages(null, pageSplittingParams, ppp.ObjectsExtractionParams); engine.MultiProcessingParams.MultiProcessingMode = FREngine.MultiProcessingModeEnum.MPM_Auto; int[] sourcePageIndices = new int[frDoc.Pages.Count]; for (int pageIndex = 0; pageIndex < sourcePageIndices.Length; pageIndex++) { sourcePageIndices[pageIndex] = -1; } foreach (FREngine.SplitRegion item in splitRegions) { FREngine.IFRPage frPage = frDoc.Pages[item.CurrentPageIndex]; sourcePageIndices[item.CurrentPageIndex] = item.SourcePageIndex; if (settings.CropImage) { frPage.ImageDocument.CropImage(); } if (settings.CorrectDistortions) { frPage.CorrectGeometricalDistortions(ppp.ObjectsExtractionParams); } if (settings.DeskewImage) { frPage.ImageDocument.CorrectSkew(correctSkewFlags); } frPage.Flush(true); } for (int pageIndex = 0; pageIndex < sourcePageIndices.Length; pageIndex++) { if (sourcePageIndices[pageIndex] == -1) { sourcePageIndices[pageIndex] = pageIndex == 0 ? 0 : sourcePageIndices[pageIndex - 1] + 1; } } return(sourcePageIndices); }
public RecognitionStatistics Process(string imagePath, ProcessingSettings settings) { DateTime startTime = System.DateTime.Now; setStep("Applying profile..."); engine.LoadPredefinedProfile("DocumentConversion_Accuracy"); setStep("Applying settings..."); FREngine.PrepareImageMode pim = engine.CreatePrepareImageMode(); FREngine.DocumentProcessingParams dpp = engine.CreateDocumentProcessingParams(); FREngine.PageProcessingParams ppp = dpp.PageProcessingParams; ppp.RecognizerParams.SetPredefinedTextLanguage(settings.Language); disableAllModifications(pim, ppp); pim.AutoOverwriteResolution = false; if (settings.CorrectResolution) { if (settings.NewResolution == 0) { pim.AutoOverwriteResolution = true; } else if (settings.NewResolution > 0) { pim.OverwriteResolution = true; pim.XResolutionToOverwrite = settings.NewResolution; pim.YResolutionToOverwrite = settings.NewResolution; } } if (settings.ConvertToBW) { pim.DiscardColorImage = true; } if (settings.DeskewImage) { pim.CorrectSkew = true; } // Detect orientation for all pages setStep("Detecting orientation..."); FREngine.RotationTypeEnum[] rotation = null; if (settings.CorrectOrientationMode == ProcessingSettings.OrientationCorrectionMode.Automatic) { rotation = detectOrientation(imagePath, pim, ppp); } setStep("Loading image..."); // Create document FREngine.FRDocument frDocument = engine.CreateFRDocument(); RecognitionStatistics recognitionStats = new RecognitionStatistics(); try { // Add image file to document frDocument.AddImageFile(imagePath, pim, null); if (frDocument.Pages.Count == 0) { throw new Exception("No pages in a file"); } setStep("Performing image modification..."); for (int pageIndex = 0; pageIndex < frDocument.Pages.Count; pageIndex++) { FREngine.IFRPage frPage = frDocument.Pages[pageIndex]; FREngine.RotationTypeEnum pageRotation = FREngine.RotationTypeEnum.RT_UnknownRotation; if (rotation != null && pageIndex < rotation.Length) { pageRotation = rotation[pageIndex]; } applyGeometricalTransformations(frPage, ppp, settings, pageRotation); GC.Collect(); GC.WaitForPendingFinalizers(); frPage.Flush(true); } int[] sourcePageIndices = splitImage(frDocument, ppp, settings); for (int pageIndex = 0; pageIndex < frDocument.Pages.Count; pageIndex++) { FREngine.IFRPage frPage = frDocument.Pages[pageIndex]; applyImageTransformations(frPage, settings); GC.Collect(); GC.WaitForPendingFinalizers(); frPage.Flush(true); } if (settings.IsRecognize) { setStep("Recognizing image..."); frDocument.Process(dpp); } setStep("Applying visual enhancements..."); for (int pageIndex = 0; pageIndex < frDocument.Pages.Count; pageIndex++) { FREngine.IFRPage frPage = frDocument.Pages[pageIndex]; applyVisualEnhancements(frPage, settings); GC.Collect(); GC.WaitForPendingFinalizers(); frPage.Flush(true); } TimeSpan processingTime = DateTime.Now - startTime; setStep("Computing statistics..."); recognitionStats = computeStatistics(frDocument); recognitionStats.TotalProcessingTime = processingTime; recognitionStats.SourcePageIndices = sourcePageIndices; setStep("Retrieving images..."); for (int pageIndex = 0; pageIndex < frDocument.Pages.Count; pageIndex++) { if (recognitionStats.PreprocessedImages == null) { recognitionStats.PreprocessedImages = new System.Drawing.Image[frDocument.Pages.Count]; } FREngine.IFRPage frPage = frDocument.Pages[pageIndex]; recognitionStats.PreprocessedImages[pageIndex] = getImageFromPage(frPage); GC.Collect(); GC.WaitForPendingFinalizers(); frPage.Flush(false); } //frDocument.Export(AppDomain.CurrentDomain.BaseDirectory + "FileSample2.xml", FREngine.FileExportFormatEnum.FEF_XML, null); //frDocument.Export(AppDomain.CurrentDomain.BaseDirectory + "FileSample2.txt", FREngine.FileExportFormatEnum.FEF_TextUnicodeDefaults, null); } finally { frDocument.Close(); } return(recognitionStats); }