public static List <Line> ExtractLinesFromImage(Image image) { List <Line> result = new List <Line>(); string resourceRoot = Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().CodeBase); resourceRoot = resourceRoot.Substring(6); using (Ocr ocrEngine = new Ocr(resourceRoot)) { try { Bitmap bitmap = new Bitmap(image); OcrParams ocrParams = new OcrParams { CheckOrientation = true, Language = Language.English }; OcrResults ocrResults = ocrEngine.Recognize(bitmap, ocrParams); using (var graphics = Graphics.FromImage(bitmap)) { foreach (var region in ocrResults.Regions) { foreach (var line in region.Lines) { result.Add(line); } } } } catch (Exception ex) { Trace.WriteLine(ex.Message); } } return(result); }
private void ConfigureOcr() { bool canUseOcr = IsPdfFile(options.OutputPath) || IsPdfFile(options.EmailFileName); bool useOcr = canUseOcr && !options.DisableOcr && (options.EnableOcr || options.OcrLang != null || userConfigManager.Config.EnableOcr || appConfigManager.Config.OcrState == OcrState.Enabled); string ocrLanguageCode = useOcr ? (options.OcrLang ?? ocrManager.DefaultParams?.LanguageCode) : null; ocrParams = new OcrParams(ocrLanguageCode, ocrManager.DefaultParams?.Mode ?? OcrMode.Default); }
public async Task <bool> Export(string path, ICollection <ScannedImage.Snapshot> snapshots, PdfSettings settings, OcrParams ocrParams, ProgressHandler progressCallback, CancellationToken cancelToken) { return(await Task.Factory.StartNew(() => { var forced = appConfigManager.Config.ForcePdfCompat; var compat = forced == PdfCompat.Default ? settings.Compat : forced; var document = new PdfDocument(); document.Info.Author = settings.Metadata.Author; document.Info.Creator = settings.Metadata.Creator; document.Info.Keywords = settings.Metadata.Keywords; document.Info.Subject = settings.Metadata.Subject; document.Info.Title = settings.Metadata.Title; if (settings.Encryption.EncryptPdf && (!string.IsNullOrEmpty(settings.Encryption.OwnerPassword) || !string.IsNullOrEmpty(settings.Encryption.UserPassword))) { document.SecuritySettings.DocumentSecurityLevel = PdfDocumentSecurityLevel.Encrypted128Bit; if (!string.IsNullOrEmpty(settings.Encryption.OwnerPassword)) { document.SecuritySettings.OwnerPassword = settings.Encryption.OwnerPassword; } if (!string.IsNullOrEmpty(settings.Encryption.UserPassword)) { document.SecuritySettings.UserPassword = settings.Encryption.UserPassword; } document.SecuritySettings.PermitAccessibilityExtractContent = settings.Encryption.AllowContentCopyingForAccessibility; document.SecuritySettings.PermitAnnotations = settings.Encryption.AllowAnnotations; document.SecuritySettings.PermitAssembleDocument = settings.Encryption.AllowDocumentAssembly; document.SecuritySettings.PermitExtractContent = settings.Encryption.AllowContentCopying; document.SecuritySettings.PermitFormsFill = settings.Encryption.AllowFormFilling; document.SecuritySettings.PermitFullQualityPrint = settings.Encryption.AllowFullQualityPrinting; document.SecuritySettings.PermitModifyDocument = settings.Encryption.AllowDocumentModification; document.SecuritySettings.PermitPrint = settings.Encryption.AllowPrinting; } IOcrEngine ocrEngine = null; if (ocrParams?.LanguageCode != null) { var activeEngine = ocrManager.ActiveEngine; if (activeEngine == null) { Log.Error("Supported OCR engine not installed.", ocrParams.LanguageCode); } else if (!activeEngine.CanProcess(ocrParams.LanguageCode)) { Log.Error("OCR files not available for '{0}'.", ocrParams.LanguageCode); } else { ocrEngine = activeEngine; } } bool result = ocrEngine != null ? BuildDocumentWithOcr(progressCallback, cancelToken, document, compat, snapshots, ocrEngine, ocrParams) : BuildDocumentWithoutOcr(progressCallback, cancelToken, document, compat, snapshots); if (!result) { return false; } var now = DateTime.Now; document.Info.CreationDate = now; document.Info.ModificationDate = now; if (compat == PdfCompat.PdfA1B) { PdfAHelper.SetCidStream(document); PdfAHelper.DisableTransparency(document); } if (compat != PdfCompat.Default) { PdfAHelper.SetColorProfile(document); PdfAHelper.SetCidMap(document); PdfAHelper.CreateXmpMetadata(document, compat); } PathHelper.EnsureParentDirExists(path); document.Save(path); return true; }, TaskCreationOptions.LongRunning)); }
private bool BuildDocumentWithOcr(ProgressHandler progressCallback, CancellationToken cancelToken, PdfDocument document, PdfCompat compat, ICollection <ScannedImage.Snapshot> snapshots, IOcrEngine ocrEngine, OcrParams ocrParams) { int progress = 0; progressCallback(progress, snapshots.Count); List <(PdfPage, Task <OcrResult>)> ocrPairs = new List <(PdfPage, Task <OcrResult>)>(); // Step 1: Create the pages, draw the images, and start OCR foreach (var snapshot in snapshots) { if (cancelToken.IsCancellationRequested) { break; } bool importedPdfPassThrough = snapshot.Source.FileFormat == null && !snapshot.TransformList.Any(); PdfPage page; if (importedPdfPassThrough) { page = CopyPdfPageToDoc(document, snapshot.Source); if (PageContainsText(page)) { // Since this page already contains text, don't use OCR continue; } } else { page = document.AddPage(); } string tempImageFilePath = Path.Combine(Paths.Temp, Path.GetRandomFileName()); using (Stream stream = scannedImageRenderer.RenderToStream(snapshot).Result) using (var img = XImage.FromStream(stream)) { if (cancelToken.IsCancellationRequested) { break; } if (!importedPdfPassThrough) { DrawImageOnPage(page, img, compat); } if (cancelToken.IsCancellationRequested) { break; } if (!ocrRequestQueue.HasCachedResult(ocrEngine, snapshot, ocrParams)) { img.GdiImage.Save(tempImageFilePath); } } if (cancelToken.IsCancellationRequested) { File.Delete(tempImageFilePath); break; } // Start OCR var ocrTask = ocrRequestQueue.QueueForeground(ocrEngine, snapshot, tempImageFilePath, ocrParams, cancelToken); ocrTask.ContinueWith(task => { // This is the best place to put progress reporting // Long-running OCR is done, and drawing text on the page (step 2) is very fast if (!cancelToken.IsCancellationRequested) { Interlocked.Increment(ref progress); progressCallback(progress, snapshots.Count); } }, TaskContinuationOptions.ExecuteSynchronously); // Record the page and task for step 2 ocrPairs.Add((page, ocrTask)); } // Step 2: Wait for all the OCR results, and draw the text on each page foreach (var(page, ocrTask) in ocrPairs) { if (cancelToken.IsCancellationRequested) { break; } if (ocrTask.Result == null) { continue; } DrawOcrTextOnPage(page, ocrTask.Result); } return(!cancelToken.IsCancellationRequested); }
public bool Start(string fileName, DateTime dateTime, ICollection <ScannedImage> images, PdfSettings pdfSettings, OcrParams ocrParams, bool email, EmailMessage emailMessage) { ProgressTitle = email ? MiscResources.EmailPdfProgress : MiscResources.SavePdfProgress; var subFileName = fileNamePlaceholders.SubstitutePlaceholders(fileName, dateTime); Status = new OperationStatus { StatusText = string.Format(MiscResources.SavingFormat, Path.GetFileName(subFileName)), MaxProgress = images.Count }; if (Directory.Exists(subFileName)) { // Not supposed to be a directory, but ok... subFileName = fileNamePlaceholders.SubstitutePlaceholders(Path.Combine(subFileName, "$(n).pdf"), dateTime); } if (File.Exists(subFileName)) { if (overwritePrompt.ConfirmOverwrite(subFileName) != DialogResult.Yes) { return(false); } } var snapshots = images.Select(x => x.Preserve()).ToList(); RunAsync(async() => { bool result = false; try { result = await pdfExporter.Export(subFileName, snapshots, pdfSettings, ocrParams, OnProgress, CancelToken); } catch (UnauthorizedAccessException ex) { InvokeError(MiscResources.DontHavePermission, ex); } catch (IOException ex) { if (File.Exists(subFileName)) { InvokeError(MiscResources.FileInUse, ex); } else { Log.ErrorException(MiscResources.ErrorSaving, ex); InvokeError(MiscResources.ErrorSaving, ex); } } catch (Exception ex) { Log.ErrorException(MiscResources.ErrorSaving, ex); InvokeError(MiscResources.ErrorSaving, ex); } finally { snapshots.ForEach(s => s.Dispose()); GC.Collect(); } if (result && email && emailMessage != null) { Status.StatusText = MiscResources.UploadingEmail; Status.CurrentProgress = 0; Status.MaxProgress = 1; Status.ProgressType = OperationProgressType.MB; InvokeStatusChanged(); try { result = await emailProviderFactory.Default.SendEmail(emailMessage, OnProgress, CancelToken); } catch (OperationCanceledException) { } catch (Exception ex) { Log.ErrorException(MiscResources.ErrorEmailing, ex); InvokeError(MiscResources.ErrorEmailing, ex); } } return(result); }); Success.ContinueWith(task => { if (task.Result) { if (email) { Log.Event(EventType.Email, new EventParams { Name = MiscResources.EmailPdf, Pages = snapshots.Count, FileFormat = ".pdf" }); } else { Log.Event(EventType.SavePdf, new EventParams { Name = MiscResources.SavePdf, Pages = snapshots.Count, FileFormat = ".pdf" }); } } }, TaskContinuationOptions.OnlyOnRanToCompletion); return(true); }
public bool Start(string fileName, DateTime dateTime, ICollection <ScannedImage> images, PdfSettings pdfSettings, OcrParams ocrParams, bool email, EmailMessage emailMessage) { ProgressTitle = email ? MiscResources.EmailPdfProgress : MiscResources.SavePdfProgress; Status = new OperationStatus { MaxProgress = images.Count }; if (Directory.Exists(fileNamePlaceholders.SubstitutePlaceholders(fileName, dateTime))) { // Not supposed to be a directory, but ok... fileName = Path.Combine(fileName, "$(n).pdf"); } var singleFile = !pdfSettings.SinglePagePdf || images.Count == 1; var subFileName = fileNamePlaceholders.SubstitutePlaceholders(fileName, dateTime); if (singleFile) { if (File.Exists(subFileName) && overwritePrompt.ConfirmOverwrite(subFileName) != DialogResult.Yes) { return(false); } } var snapshots = images.Select(x => x.Preserve()).ToList(); var snapshotsByFile = pdfSettings.SinglePagePdf ? snapshots.Select(x => new[] { x }).ToArray() : new[] { snapshots.ToArray() }; RunAsync(async() => { bool result = false; try { int digits = (int)Math.Floor(Math.Log10(snapshots.Count)) + 1; int i = 0; foreach (var snapshotArray in snapshotsByFile) { subFileName = fileNamePlaceholders.SubstitutePlaceholders(fileName, dateTime, true, i, singleFile ? 0 : digits); Status.StatusText = string.Format(MiscResources.SavingFormat, Path.GetFileName(subFileName)); InvokeStatusChanged(); if (singleFile && IsFileInUse(subFileName, out var ex)) { InvokeError(MiscResources.FileInUse, ex); break; } var progress = singleFile ? OnProgress : (ProgressHandler)((j, k) => { }); result = await pdfExporter.Export(subFileName, snapshotArray, pdfSettings, ocrParams, progress, CancelToken); if (!result || CancelToken.IsCancellationRequested) { break; } emailMessage?.Attachments.Add(new EmailAttachment { FilePath = subFileName, AttachmentName = Path.GetFileName(subFileName) }); if (i == 0) { FirstFileSaved = subFileName; } i++; if (!singleFile) { OnProgress(i, snapshotsByFile.Length); } } } catch (UnauthorizedAccessException ex) { InvokeError(MiscResources.DontHavePermission, ex); } catch (IOException ex) { if (File.Exists(subFileName)) { InvokeError(MiscResources.FileInUse, ex); } else { Log.ErrorException(MiscResources.ErrorSaving, ex); InvokeError(MiscResources.ErrorSaving, ex); } } catch (Exception ex) { Log.ErrorException(MiscResources.ErrorSaving, ex); InvokeError(MiscResources.ErrorSaving, ex); } finally { snapshots.ForEach(s => s.Dispose()); GC.Collect(); } if (result && !CancelToken.IsCancellationRequested && email && emailMessage != null) { Status.StatusText = MiscResources.UploadingEmail; Status.CurrentProgress = 0; Status.MaxProgress = 1; Status.ProgressType = OperationProgressType.MB; InvokeStatusChanged(); try { result = await emailProviderFactory.Default.SendEmail(emailMessage, OnProgress, CancelToken); } catch (OperationCanceledException) { } catch (Exception ex) { Log.ErrorException(MiscResources.ErrorEmailing, ex); InvokeError(MiscResources.ErrorEmailing, ex); } } return(result); }); Success.ContinueWith(task => { if (task.Result) { if (email) { Log.Event(EventType.Email, new Event { Name = MiscResources.EmailPdf, Pages = snapshots.Count, FileFormat = ".pdf" }); } else { Log.Event(EventType.SavePdf, new Event { Name = MiscResources.SavePdf, Pages = snapshots.Count, FileFormat = ".pdf" }); if (pdfSettings.ShowFolder) { String filePath = Path.GetDirectoryName(fileName); Process.Start("explorer.exe", filePath); } } } }, TaskContinuationOptions.OnlyOnRanToCompletion); return(true); }