public override bool Equals(object o) { if (!(o is DefaultViewer)) { return(false); } DefaultViewer v = o as DefaultViewer; if (!IsActive.Equals(v.IsActive)) { return(false); } if (!OutputFormat.Equals(v.OutputFormat)) { return(false); } if (!Parameters.Equals(v.Parameters)) { return(false); } if (!Path.Equals(v.Path)) { return(false); } return(true); }
/// <summary> /// Reads data from the provided input image file and returns retrieved /// data as string. /// </summary> /// <param name="input"> /// input image /// <see cref="System.IO.FileInfo"/> /// </param> /// <param name="outputFormat"> /// return /// <see cref="OutputFormat"/> /// result /// </param> /// <returns> /// OCR result as a /// <see cref="System.String"/> /// that is /// returned after processing the given image /// </returns> public String DoImageOcr(FileInfo input, OutputFormat outputFormat) { String result = ""; VerifyImageFormatValidity(input); AbstractTesseract4OcrEngine.ITesseractOcrResult processedData = ProcessInputFiles(input, outputFormat); if (processedData != null) { if (outputFormat.Equals(OutputFormat.TXT)) { result = ((AbstractTesseract4OcrEngine.StringTesseractOcrResult)processedData).GetData(); } else { StringBuilder outputText = new StringBuilder(); IDictionary <int, IList <TextInfo> > outputMap = ((AbstractTesseract4OcrEngine.TextInfoTesseractOcrResult)processedData ).GetTextInfos(); foreach (int page in outputMap.Keys) { StringBuilder pageText = new StringBuilder(); foreach (TextInfo textInfo in outputMap.Get(page)) { pageText.Append(textInfo.GetText()); pageText.Append(Environment.NewLine); } outputText.Append(pageText); outputText.Append(Environment.NewLine); } result = outputText.ToString(); } } return(result); }
/// <summary> /// Performs ocr for the provided image /// and returns result as string in required format. /// </summary> /// <remarks> /// Performs ocr for the provided image /// and returns result as string in required format. /// ( /// <see cref="OutputFormat"/> /// is used in .Net version, in java output format /// should already be set) /// </remarks> /// <param name="tesseractInstance"> /// /// <see cref="Tesseract.TesseractEngine"/> /// object to perform OCR /// </param> /// <param name="pix"> /// input image as /// <see cref="Tesseract.Pix"/> /// to be /// processed /// </param> /// <param name="outputFormat"> /// selected /// <see cref="OutputFormat"/> /// for tesseract /// </param> /// <returns> /// result as /// <see cref="System.String"/> /// in required format /// </returns> internal String GetOcrResultAsString(TesseractEngine tesseractInstance, Pix pix, OutputFormat outputFormat) { String result = null; if (pix != null) { Page page = null; try { page = tesseractInstance.Process(pix); if (outputFormat.Equals(OutputFormat.HOCR)) { result = page.GetHOCRText(0); } else { result = page.GetText(); } } finally { if (page != null) { page.Dispose(); } DestroyPix(pix); } } return(result); }
/// <summary> /// Performs ocr for the provided image /// and returns result as string in required format. /// </summary> /// <remarks> /// Performs ocr for the provided image /// and returns result as string in required format. /// ( /// <see cref="OutputFormat"/> /// is used in .Net version, /// in java output format should already be set) /// </remarks> /// <param name="tesseractInstance"> /// /// <see cref="Tesseract.TesseractEngine"/> /// object to perform OCR /// </param> /// <param name="image"> /// input /// <see cref="System.Drawing.Bitmap"/> /// to be processed /// </param> /// <param name="outputFormat"> /// selected /// <see cref="OutputFormat"/> /// for tesseract /// </param> /// <returns> /// result as /// <see cref="System.String"/> /// in required format /// </returns> internal String GetOcrResultAsString(TesseractEngine tesseractInstance, System.Drawing.Bitmap image, OutputFormat outputFormat) { String result = null; if (image != null) { Page page = null; try { page = tesseractInstance.Process(image); if (outputFormat.Equals(OutputFormat.HOCR)) { result = page.GetHOCRText(0); } else { result = page.GetText(); } } finally { if (page != null) { page.Dispose(); } } } return(result); }
/// <summary>Adds path to temporary output file with result.</summary> /// <param name="command">result command as list of strings</param> /// <param name="outputFile">output file with result</param> /// <param name="outputFormat"> /// selected /// <see cref="OutputFormat"/> /// for tesseract /// </param> private void AddOutputFile(IList <String> command, FileInfo outputFile, OutputFormat outputFormat, String inputImagePath ) { String extension = outputFormat.Equals(OutputFormat.HOCR) ? ".hocr" : ".txt"; try { // Workaround for a non-ASCII characters in path // Currently works only if the user words (or output files) reside in the same directory as the input image // Leaves only a filename in this case, otherwise - absolute path to output file String filePath = AreEqualParentDirectories(inputImagePath, outputFile.FullName) ? outputFile.Name : outputFile .FullName; String fileName = new String(filePath.ToCharArray(), 0, filePath.IndexOf(extension, StringComparison.Ordinal )); LogManager.GetLogger(GetType()).Info(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CREATED_TEMPORARY_FILE , outputFile.FullName)); command.Add(AddQuotes(fileName)); } catch (Exception) { // NOSONAR throw new Tesseract4OcrException(Tesseract4OcrException.TESSERACT_FAILED); } }
/// <summary> /// Initializes instance of tesseract if it haven't been already /// initialized or it have been disposed and sets all the required /// properties. /// </summary> /// <param name="outputFormat"> /// selected /// <see cref="OutputFormat"/> /// for tesseract /// </param> public virtual void InitializeTesseract(OutputFormat outputFormat) { if (GetTesseractInstance() == null || TesseractOcrUtil.IsTesseractInstanceDisposed(GetTesseractInstance()) ) { tesseractInstance = TesseractOcrUtil.InitializeTesseractInstance(IsWindows(), GetTessData(), GetLanguagesAsString (), GetTesseract4OcrEngineProperties().GetPathToUserWordsFile()); } GetTesseractInstance().SetVariable("tessedit_create_hocr", outputFormat.Equals(OutputFormat.HOCR) ? "1" : "0"); GetTesseractInstance().SetVariable("user_defined_dpi", "300"); if (GetTesseract4OcrEngineProperties().GetPathToUserWordsFile() != null) { GetTesseractInstance().SetVariable("load_system_dawg", "0"); GetTesseractInstance().SetVariable("load_freq_dawg", "0"); GetTesseractInstance().SetVariable("user_words_suffix", GetTesseract4OcrEngineProperties().GetDefaultUserWordsSuffix ()); GetTesseractInstance().SetVariable("user_words_file", GetTesseract4OcrEngineProperties().GetPathToUserWordsFile ()); } TesseractOcrUtil.SetTesseractProperties(GetTesseractInstance(), GetTessData(), GetLanguagesAsString(), GetTesseract4OcrEngineProperties ().GetPageSegMode(), GetTesseract4OcrEngineProperties().GetPathToUserWordsFile()); }
public override bool Equals(object o) { if (!(o is ConversionProfile)) { return(false); } var v = o as ConversionProfile; if (!AttachmentPage.Equals(v.AttachmentPage)) { return(false); } if (!AutoSave.Equals(v.AutoSave)) { return(false); } if (!BackgroundPage.Equals(v.BackgroundPage)) { return(false); } if (!CoverPage.Equals(v.CoverPage)) { return(false); } if (!EmailClient.Equals(v.EmailClient)) { return(false); } if (!EmailSmtp.Equals(v.EmailSmtp)) { return(false); } if (!Ftp.Equals(v.Ftp)) { return(false); } if (!Ghostscript.Equals(v.Ghostscript)) { return(false); } if (!JpegSettings.Equals(v.JpegSettings)) { return(false); } if (!PdfSettings.Equals(v.PdfSettings)) { return(false); } if (!PngSettings.Equals(v.PngSettings)) { return(false); } if (!Printing.Equals(v.Printing)) { return(false); } if (!Properties.Equals(v.Properties)) { return(false); } if (!SaveDialog.Equals(v.SaveDialog)) { return(false); } if (!Scripting.Equals(v.Scripting)) { return(false); } if (!Stamping.Equals(v.Stamping)) { return(false); } if (!TiffSettings.Equals(v.TiffSettings)) { return(false); } if (!AuthorTemplate.Equals(v.AuthorTemplate)) { return(false); } if (!FileNameTemplate.Equals(v.FileNameTemplate)) { return(false); } if (!Guid.Equals(v.Guid)) { return(false); } if (!Name.Equals(v.Name)) { return(false); } if (!OpenViewer.Equals(v.OpenViewer)) { return(false); } if (!OutputFormat.Equals(v.OutputFormat)) { return(false); } if (!ShowProgress.Equals(v.ShowProgress)) { return(false); } if (!SkipPrintDialog.Equals(v.SkipPrintDialog)) { return(false); } if (!TitleTemplate.Equals(v.TitleTemplate)) { return(false); } return(true); }
public override bool Equals(object o) { if (!(o is ConversionProfile)) { return(false); } ConversionProfile v = o as ConversionProfile; if (!AttachmentPage.Equals(v.AttachmentPage)) { return(false); } if (!AutoSave.Equals(v.AutoSave)) { return(false); } if (!BackgroundPage.Equals(v.BackgroundPage)) { return(false); } if (!CoverPage.Equals(v.CoverPage)) { return(false); } if (!CustomScript.Equals(v.CustomScript)) { return(false); } if (!DropboxSettings.Equals(v.DropboxSettings)) { return(false); } if (!EmailClientSettings.Equals(v.EmailClientSettings)) { return(false); } if (!EmailSmtpSettings.Equals(v.EmailSmtpSettings)) { return(false); } if (!ForwardToFurtherProfile.Equals(v.ForwardToFurtherProfile)) { return(false); } if (!Ftp.Equals(v.Ftp)) { return(false); } if (!Ghostscript.Equals(v.Ghostscript)) { return(false); } if (!HttpSettings.Equals(v.HttpSettings)) { return(false); } if (!JpegSettings.Equals(v.JpegSettings)) { return(false); } if (!PdfSettings.Equals(v.PdfSettings)) { return(false); } if (!PngSettings.Equals(v.PngSettings)) { return(false); } if (!Printing.Equals(v.Printing)) { return(false); } if (!Properties.Equals(v.Properties)) { return(false); } if (!Scripting.Equals(v.Scripting)) { return(false); } if (!Stamping.Equals(v.Stamping)) { return(false); } if (!TextSettings.Equals(v.TextSettings)) { return(false); } if (!TiffSettings.Equals(v.TiffSettings)) { return(false); } if (!UserTokens.Equals(v.UserTokens)) { return(false); } if (!Watermark.Equals(v.Watermark)) { return(false); } if (!ActionOrder.SequenceEqual(v.ActionOrder)) { return(false); } if (!AuthorTemplate.Equals(v.AuthorTemplate)) { return(false); } if (!EnableWorkflowEditor.Equals(v.EnableWorkflowEditor)) { return(false); } if (!FileNameTemplate.Equals(v.FileNameTemplate)) { return(false); } if (!Guid.Equals(v.Guid)) { return(false); } if (!KeywordTemplate.Equals(v.KeywordTemplate)) { return(false); } if (!Name.Equals(v.Name)) { return(false); } if (!OpenViewer.Equals(v.OpenViewer)) { return(false); } if (!OpenWithPdfArchitect.Equals(v.OpenWithPdfArchitect)) { return(false); } if (!OutputFormat.Equals(v.OutputFormat)) { return(false); } if (!SaveFileTemporary.Equals(v.SaveFileTemporary)) { return(false); } if (!ShowAllNotifications.Equals(v.ShowAllNotifications)) { return(false); } if (!ShowOnlyErrorNotifications.Equals(v.ShowOnlyErrorNotifications)) { return(false); } if (!ShowProgress.Equals(v.ShowProgress)) { return(false); } if (!ShowQuickActions.Equals(v.ShowQuickActions)) { return(false); } if (!SkipPrintDialog.Equals(v.SkipPrintDialog)) { return(false); } if (!SubjectTemplate.Equals(v.SubjectTemplate)) { return(false); } if (!TargetDirectory.Equals(v.TargetDirectory)) { return(false); } if (!TitleTemplate.Equals(v.TitleTemplate)) { return(false); } return(true); }
public void ReadArguments(string[] parameters) { if (parameters.Length == 0) { throw new Exception("No parameter is specified!"); } #region Assigning crexport parameters to variables for (int i = 0; i < parameters.Count(); i++) { if (i + 1 < parameters.Count()) { if (parameters[i + 1].Length > 0) { if (parameters[i].ToUpper() == "-U") { UserName = parameters[i + 1]; } else if (parameters[i].ToUpper() == "-P") { Password = parameters[i + 1]; } else if (parameters[i].ToUpper() == "-F") { ReportPath = parameters[i + 1]; } else if (parameters[i].ToUpper() == "-O") { OutputPath = parameters[i + 1]; } else if (parameters[i].ToUpper() == "-S") { ServerName = parameters[i + 1]; } else if (parameters[i].ToUpper() == "-D") { DatabaseName = parameters[i + 1]; } else if (parameters[i].ToUpper() == "-E") { OutputFormat = parameters[i + 1]; if (OutputFormat.Equals("print", StringComparison.OrdinalIgnoreCase)) { PrintOutput = true; } } else if (parameters[i].ToUpper() == "-N") { PrinterName = parameters[i + 1]; } else if (parameters[i].ToUpper() == "-C") { try { PrintCopy = Convert.ToInt32(parameters[i + 1]); } catch (Exception ex) { throw ex; } } else if (parameters[i].ToUpper() == "-A") { ParameterCollection.Add(parameters[i + 1]); } else if (parameters[i].ToUpper() == "-TO") { MailTo = parameters[i + 1]; } else if (parameters[i].ToUpper() == "-LC") { LCID = int.Parse(parameters[i + 1]); } } } if (parameters[i] == "-?" || parameters[i] == "/?") { GetHelp = true; } if (parameters[i].ToUpper() == "-L") { EnableLog = true; } if (parameters[i].ToUpper() == "-NR") { Refresh = false; } } #endregion }
/// <summary>Reads data from the provided input image file.</summary> /// <param name="input"> /// input image /// <see cref="System.IO.FileInfo"/> /// </param> /// <param name="outputFormat"> /// /// <see cref="OutputFormat"/> /// for the result returned /// by /// <see cref="iText.Pdfocr.IOcrEngine"/> /// </param> /// <returns> /// /// <see cref="ITesseractOcrResult"/> /// instance, either /// <see cref="StringTesseractOcrResult"/> /// if output format is TXT, or /// <see cref="TextInfoTesseractOcrResult"/> /// if the output format is HOCR /// </returns> private AbstractTesseract4OcrEngine.ITesseractOcrResult ProcessInputFiles(FileInfo input, OutputFormat outputFormat ) { IDictionary <int, IList <TextInfo> > imageData = new LinkedDictionary <int, IList <TextInfo> >(); StringBuilder data = new StringBuilder(); IList <FileInfo> tempFiles = new List <FileInfo>(); AbstractTesseract4OcrEngine.ITesseractOcrResult result = null; try { // image needs to be paginated only if it's tiff // or preprocessing isn't required int realNumOfPages = !ImagePreprocessingUtil.IsTiffImage(input) ? 1 : ImagePreprocessingUtil.GetNumberOfPageTiff (input); int numOfPages = GetTesseract4OcrEngineProperties().IsPreprocessingImages() ? realNumOfPages : 1; int numOfFiles = GetTesseract4OcrEngineProperties().IsPreprocessingImages() ? 1 : realNumOfPages; for (int page = 1; page <= numOfPages; page++) { String extension = outputFormat.Equals(OutputFormat.HOCR) ? ".hocr" : ".txt"; for (int i = 0; i < numOfFiles; i++) { tempFiles.Add(CreateTempFile(extension)); } DoTesseractOcr(input, tempFiles, outputFormat, page); if (outputFormat.Equals(OutputFormat.HOCR)) { IDictionary <int, IList <TextInfo> > pageData = TesseractHelper.ParseHocrFile(tempFiles, GetTesseract4OcrEngineProperties ().GetTextPositioning()); if (GetTesseract4OcrEngineProperties().IsPreprocessingImages()) { imageData.Put(page, pageData.Get(1)); } else { imageData = pageData; } result = new AbstractTesseract4OcrEngine.TextInfoTesseractOcrResult(imageData); } else { foreach (FileInfo tmpFile in tempFiles) { if (File.Exists(System.IO.Path.Combine(tmpFile.FullName))) { data.Append(TesseractHelper.ReadTxtFile(tmpFile)); } } result = new AbstractTesseract4OcrEngine.StringTesseractOcrResult(data.ToString()); } } } catch (System.IO.IOException e) { LogManager.GetLogger(GetType()).Error(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_OCR_INPUT_FILE , e.Message)); } finally { foreach (FileInfo file in tempFiles) { TesseractHelper.DeleteFile(file.FullName); } } return(result); }
/// <summary> /// Performs tesseract OCR using command line tool for the selected page /// of input image (by default 1st). /// </summary> /// <remarks> /// Performs tesseract OCR using command line tool for the selected page /// of input image (by default 1st). /// Please note that list of output files is accepted instead of a single file because /// page number parameter is not respected in case of TIFF images not requiring preprocessing. /// In other words, if the passed image is the TIFF image and according to the /// <see cref="Tesseract4OcrEngineProperties"/> /// no preprocessing is needed, each page of the TIFF image is OCRed and the number of output files in the list /// is expected to be same as number of pages in the image, otherwise, only one file is expected /// </remarks> /// <param name="inputImage"> /// input image /// <see cref="System.IO.FileInfo"/> /// </param> /// <param name="outputFiles"> /// /// <see cref="System.Collections.IList{E}"/> /// of output files /// (one per each page) /// </param> /// <param name="outputFormat"> /// selected /// <see cref="OutputFormat"/> /// for tesseract /// </param> /// <param name="pageNumber">number of page to be processed</param> internal override void DoTesseractOcr(FileInfo inputImage, IList <FileInfo> outputFiles, OutputFormat outputFormat , int pageNumber) { ScheduledCheck(); IList <String> @params = new List <String>(); String execPath = null; String imagePath = null; try { imagePath = inputImage.FullName; // path to tesseract executable if (GetPathToExecutable() == null || String.IsNullOrEmpty(GetPathToExecutable())) { throw new Tesseract4OcrException(Tesseract4OcrException.CANNOT_FIND_PATH_TO_TESSERACT_EXECUTABLE); } else { if (IsWindows()) { execPath = AddQuotes(GetPathToExecutable()); } else { execPath = GetPathToExecutable(); } @params.Add(execPath); } CheckTesseractInstalled(execPath); // path to tess data AddTessData(@params); // validate languages before preprocessing started ValidateLanguages(GetTesseract4OcrEngineProperties().GetLanguages()); // preprocess input file if needed and add it imagePath = PreprocessImage(inputImage, pageNumber); AddInputFile(@params, imagePath); // move to image directory as tesseract cannot parse non ascii // characters in input path IList <String> moveToDirectoryParams = MoveToImageDirectory(imagePath); // output file AddOutputFile(@params, outputFiles[0], outputFormat, imagePath); // page segmentation mode AddPageSegMode(@params); // add user words if needed AddUserWords(@params, imagePath); // required languages AddLanguages(@params); if (outputFormat.Equals(OutputFormat.HOCR)) { // path to hocr script SetHocrOutput(@params); } // set default user defined dpi AddDefaultDpi(@params); OnEvent(); TesseractHelper.RunCommand(IsWindows() ? "cmd" : "bash", CreateCommandList(moveToDirectoryParams, @params) ); } catch (Tesseract4OcrException e) { LogManager.GetLogger(GetType()).Error(e.Message); throw new Tesseract4OcrException(e.Message, e); } finally { try { if (imagePath != null && !inputImage.FullName.Equals(imagePath)) { TesseractHelper.DeleteFile(imagePath); } } catch (SecurityException e) { LogManager.GetLogger(GetType()).Error(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_DELETE_FILE , imagePath, e.Message)); } try { if (GetTesseract4OcrEngineProperties().GetPathToUserWordsFile() != null && GetTesseract4OcrEngineProperties ().IsUserWordsFileTemporary()) { TesseractHelper.DeleteFile(GetTesseract4OcrEngineProperties().GetPathToUserWordsFile()); } } catch (SecurityException e) { LogManager.GetLogger(GetType()).Error(MessageFormatUtil.Format(Tesseract4LogMessageConstant.CANNOT_DELETE_FILE , GetTesseract4OcrEngineProperties().GetPathToUserWordsFile(), e.Message)); } } }