public TransformResult TransformDocument(string profile, FileInfo sourceFile, FileInfo targetFile) { var retVal = new TransformResult(); var engine = enginesPool.GetEngine(); bool isRecycleRequired = false; try { IFRDocument fineReaderDocument = null; try { fineReaderDocument = LoadFineReaderDocument(sourceFile.FullName, profile, engine); var documentProcessingParams = CreateProcessingParams(engine); fineReaderDocument.Process(documentProcessingParams); fineReaderDocument.Export(targetFile.FullName, FileExportFormatEnum.FEF_PDF, null); // Alles OK retVal.TargetFile = targetFile; retVal.TargetFile.Refresh(); return(retVal); } finally { if (fineReaderDocument != null) { fineReaderDocument.Close(); if (Marshal.IsComObject(fineReaderDocument)) { Marshal.ReleaseComObject(fineReaderDocument); } } } } catch (Exception ex) { Log.Error(ex, $"Abbyy document conversion failed: {ex.Message}"); isRecycleRequired = enginesPool.ShouldRestartEngine(ex); retVal.HasError = true; retVal.ErrorMessage = ex.Message; } finally { enginesPool.ReleaseEngine(engine, isRecycleRequired); } return(retVal); }
public ExtractionResult ExtractTextFromDocument(string inputFile, ITextExtractorSettings settings) { var retVal = new ExtractionResult(settings.MaxExtractionSize); var outputFile = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString("N") + ".txt"); var engine = enginesPool.GetEngine(); bool isRecycleRequired = false; try { IFRDocument fineReaderDocument = null; try { fineReaderDocument = LoadFineReaderDocument(inputFile, settings.TextExtractionProfile, engine, settings.Context); SubscribeExtractionEvents((FRDocument)fineReaderDocument); fineReaderDocument.Process(); // Leerseiten überspringen bei einseitigen Dokumenten. if (fineReaderDocument.Pages.Count == 1) { // IsEmpty nutzt die Einstellungen die über das Profil festgelegt wurden if (fineReaderDocument.Pages[0].IsEmpty()) { Log.Information("The page {inputFile} was detected as empty.", inputFile); return(retVal); } } fineReaderDocument.Export(outputFile, FileExportFormatEnum.FEF_TextUnicodeDefaults, null); // Read the contents of the exported file using (var sr = new StreamReader(outputFile)) { while (sr.Peek() >= 0) { retVal.Append(sr.ReadLine()); if (retVal.LimitExceeded) { break; } } } // Alles OK return(retVal); } finally { if (fineReaderDocument != null) { UnsubscribeExtractionEvents((FRDocument)fineReaderDocument); fineReaderDocument.Close(); if (Marshal.IsComObject(fineReaderDocument)) { Marshal.ReleaseComObject(fineReaderDocument); } } } } catch (Exception ex) { Log.Error(ex, $"Abbyy Textextraction failed: {ex.Message}"); isRecycleRequired = enginesPool.ShouldRestartEngine(ex); retVal.HasError = true; retVal.ErrorMessage = ex.Message; // Push an error message to indicate that the item has failed bus.Publish <AbbyyProgressEvent>(new { __TimeToLive = TimeSpan.FromSeconds(3), File = sourceFile, Process = ProcessType.TextExtraction, EventType = AbbyyEventType.AbbyyOnProgressEvent, HasFailed = true }); } finally { // Push the final message to indicate that the item is finished bus.Publish <AbbyyProgressEvent>(new { __TimeToLive = TimeSpan.FromSeconds(3), File = sourceFile, IsComplete = true, Percentage = 100, Process = ProcessType.TextExtraction, EventType = AbbyyEventType.AbbyyOnProgressEvent }); enginesPool.ReleaseEngine(engine, isRecycleRequired); if (File.Exists(outputFile)) { File.Delete(outputFile); } } return(retVal); }
public TransformResult TransformDocument(string profile, FileInfo inputFile, FileInfo targetFile, JobContext context) { var retVal = new TransformResult(); var engine = enginesPool.GetEngine(); bool isRecycleRequired = false; try { IFRDocument fineReaderDocument = null; try { fineReaderDocument = LoadFineReaderDocument(inputFile.FullName, profile, engine, context); SubscribeTransformEvents((FRDocument)fineReaderDocument); fineReaderDocument.Process(); fineReaderDocument.Export(targetFile.FullName, FileExportFormatEnum.FEF_PDF, null); // Alles OK retVal.TargetFile = targetFile; retVal.TargetFile.Refresh(); return(retVal); } finally { if (fineReaderDocument != null) { UnsubscribeTransformEvents((FRDocument)fineReaderDocument); fineReaderDocument.Close(); if (Marshal.IsComObject(fineReaderDocument)) { Marshal.ReleaseComObject(fineReaderDocument); } } } } catch (Exception ex) { Log.Error(ex, $"Abbyy document conversion failed: {ex.Message}"); isRecycleRequired = enginesPool.ShouldRestartEngine(ex); retVal.HasError = true; retVal.ErrorMessage = ex.Message; // Push an error message to indicate that the item has failed bus.Publish <AbbyyProgressEvent>(new { __TimeToLive = TimeSpan.FromSeconds(3), File = sourceFile, Process = ProcessType.Rendering, EventType = AbbyyEventType.AbbyyOnProgressEvent, HasFailed = true }); } finally { // Push the final message to indicate that the item is finished bus.Publish <AbbyyProgressEvent>(new { __TimeToLive = TimeSpan.FromSeconds(3), File = sourceFile, IsComplete = true, Percentage = 100, Process = ProcessType.Rendering, EventType = AbbyyEventType.AbbyyOnProgressEvent }); enginesPool.ReleaseEngine(engine, isRecycleRequired); } return(retVal); }
public ExtractionResult ExtractTextFromDocument(string inputFile, ITextExtractorSettings settings) { var retVal = new ExtractionResult(settings.MaxExtractionSize); var outputFile = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString("N") + ".txt"); var engine = enginesPool.GetEngine(); bool isRecycleRequired = false; try { IFRDocument fineReaderDocument = null; try { fineReaderDocument = LoadFineReaderDocument(inputFile, settings.TextExtractionProfile, engine); var documentProcessingParams = CreateProcessingParams(engine); fineReaderDocument.Process(documentProcessingParams); // Leerseiten überspringen bei einseitigen Dokumenten. if (fineReaderDocument.Pages.Count == 1) { // IsEmpty nutzt die Einstellungen die über das Profil festgelegt wurden if (fineReaderDocument.Pages[0].IsEmpty()) { return(retVal); } } fineReaderDocument.Export(outputFile, FileExportFormatEnum.FEF_TextUnicodeDefaults, null); // Read the contents of the exported file using (var sr = new StreamReader(outputFile)) { while (sr.Peek() >= 0) { retVal.Append(sr.ReadLine()); if (retVal.LimitExceeded) { break; } } } // Alles OK return(retVal); } finally { if (fineReaderDocument != null) { fineReaderDocument.Close(); if (Marshal.IsComObject(fineReaderDocument)) { Marshal.ReleaseComObject(fineReaderDocument); } } } } catch (Exception ex) { Log.Error(ex, $"Abbyy Textextraction failed: {ex.Message}"); isRecycleRequired = enginesPool.ShouldRestartEngine(ex); retVal.HasError = true; retVal.ErrorMessage = ex.Message; } finally { enginesPool.ReleaseEngine(engine, isRecycleRequired); if (File.Exists(outputFile)) { File.Delete(outputFile); } } return(retVal); }