Beispiel #1
0
        public TransformResult TransformDocument(string profile, FileInfo sourceFile, FileInfo targetFile)
        {
            var retVal = new TransformResult();

            var  engine            = enginesPool.GetEngine();
            bool isRecycleRequired = false;

            try
            {
                IFRDocument fineReaderDocument = null;
                try
                {
                    fineReaderDocument = LoadFineReaderDocument(sourceFile.FullName, profile, engine);
                    var documentProcessingParams = CreateProcessingParams(engine);
                    fineReaderDocument.Process(documentProcessingParams);

                    fineReaderDocument.Export(targetFile.FullName, FileExportFormatEnum.FEF_PDF, null);

                    // Alles OK
                    retVal.TargetFile = targetFile;
                    retVal.TargetFile.Refresh();
                    return(retVal);
                }
                finally
                {
                    if (fineReaderDocument != null)
                    {
                        fineReaderDocument.Close();
                        if (Marshal.IsComObject(fineReaderDocument))
                        {
                            Marshal.ReleaseComObject(fineReaderDocument);
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                Log.Error(ex, $"Abbyy document conversion failed: {ex.Message}");
                isRecycleRequired   = enginesPool.ShouldRestartEngine(ex);
                retVal.HasError     = true;
                retVal.ErrorMessage = ex.Message;
            }
            finally
            {
                enginesPool.ReleaseEngine(engine, isRecycleRequired);
            }

            return(retVal);
        }
Beispiel #2
0
        public ExtractionResult ExtractTextFromDocument(string inputFile, ITextExtractorSettings settings)
        {
            var retVal     = new ExtractionResult(settings.MaxExtractionSize);
            var outputFile = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString("N") + ".txt");

            var  engine            = enginesPool.GetEngine();
            bool isRecycleRequired = false;

            try
            {
                IFRDocument fineReaderDocument = null;
                try
                {
                    fineReaderDocument = LoadFineReaderDocument(inputFile, settings.TextExtractionProfile, engine, settings.Context);
                    SubscribeExtractionEvents((FRDocument)fineReaderDocument);
                    fineReaderDocument.Process();

                    // Leerseiten überspringen bei einseitigen Dokumenten.
                    if (fineReaderDocument.Pages.Count == 1)
                    {
                        // IsEmpty nutzt die Einstellungen die über das Profil festgelegt wurden
                        if (fineReaderDocument.Pages[0].IsEmpty())
                        {
                            Log.Information("The page {inputFile} was detected as empty.", inputFile);
                            return(retVal);
                        }
                    }

                    fineReaderDocument.Export(outputFile, FileExportFormatEnum.FEF_TextUnicodeDefaults, null);

                    // Read the contents of the exported file
                    using (var sr = new StreamReader(outputFile))
                    {
                        while (sr.Peek() >= 0)
                        {
                            retVal.Append(sr.ReadLine());
                            if (retVal.LimitExceeded)
                            {
                                break;
                            }
                        }
                    }

                    // Alles OK
                    return(retVal);
                }
                finally
                {
                    if (fineReaderDocument != null)
                    {
                        UnsubscribeExtractionEvents((FRDocument)fineReaderDocument);
                        fineReaderDocument.Close();
                        if (Marshal.IsComObject(fineReaderDocument))
                        {
                            Marshal.ReleaseComObject(fineReaderDocument);
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                Log.Error(ex, $"Abbyy Textextraction failed: {ex.Message}");
                isRecycleRequired   = enginesPool.ShouldRestartEngine(ex);
                retVal.HasError     = true;
                retVal.ErrorMessage = ex.Message;

                // Push an error message to indicate that the item has failed
                bus.Publish <AbbyyProgressEvent>(new
                {
                    __TimeToLive = TimeSpan.FromSeconds(3),
                    File         = sourceFile,
                    Process      = ProcessType.TextExtraction,
                    EventType    = AbbyyEventType.AbbyyOnProgressEvent,
                    HasFailed    = true
                });
            }
            finally
            {
                // Push the final message to indicate that the item is finished
                bus.Publish <AbbyyProgressEvent>(new
                {
                    __TimeToLive = TimeSpan.FromSeconds(3),
                    File         = sourceFile,
                    IsComplete   = true,
                    Percentage   = 100,
                    Process      = ProcessType.TextExtraction,
                    EventType    = AbbyyEventType.AbbyyOnProgressEvent
                });

                enginesPool.ReleaseEngine(engine, isRecycleRequired);
                if (File.Exists(outputFile))
                {
                    File.Delete(outputFile);
                }
            }

            return(retVal);
        }
Beispiel #3
0
        public TransformResult TransformDocument(string profile, FileInfo inputFile, FileInfo targetFile, JobContext context)
        {
            var retVal = new TransformResult();

            var  engine            = enginesPool.GetEngine();
            bool isRecycleRequired = false;

            try
            {
                IFRDocument fineReaderDocument = null;
                try
                {
                    fineReaderDocument = LoadFineReaderDocument(inputFile.FullName, profile, engine, context);
                    SubscribeTransformEvents((FRDocument)fineReaderDocument);
                    fineReaderDocument.Process();

                    fineReaderDocument.Export(targetFile.FullName, FileExportFormatEnum.FEF_PDF, null);

                    // Alles OK
                    retVal.TargetFile = targetFile;
                    retVal.TargetFile.Refresh();
                    return(retVal);
                }
                finally
                {
                    if (fineReaderDocument != null)
                    {
                        UnsubscribeTransformEvents((FRDocument)fineReaderDocument);
                        fineReaderDocument.Close();
                        if (Marshal.IsComObject(fineReaderDocument))
                        {
                            Marshal.ReleaseComObject(fineReaderDocument);
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                Log.Error(ex, $"Abbyy document conversion failed: {ex.Message}");
                isRecycleRequired   = enginesPool.ShouldRestartEngine(ex);
                retVal.HasError     = true;
                retVal.ErrorMessage = ex.Message;

                // Push an error message to indicate that the item has failed
                bus.Publish <AbbyyProgressEvent>(new
                {
                    __TimeToLive = TimeSpan.FromSeconds(3),
                    File         = sourceFile,
                    Process      = ProcessType.Rendering,
                    EventType    = AbbyyEventType.AbbyyOnProgressEvent,
                    HasFailed    = true
                });
            }
            finally
            {
                // Push the final message to indicate that the item is finished
                bus.Publish <AbbyyProgressEvent>(new
                {
                    __TimeToLive = TimeSpan.FromSeconds(3),
                    File         = sourceFile,
                    IsComplete   = true,
                    Percentage   = 100,
                    Process      = ProcessType.Rendering,
                    EventType    = AbbyyEventType.AbbyyOnProgressEvent
                });
                enginesPool.ReleaseEngine(engine, isRecycleRequired);
            }

            return(retVal);
        }
Beispiel #4
0
        public ExtractionResult ExtractTextFromDocument(string inputFile, ITextExtractorSettings settings)
        {
            var retVal     = new ExtractionResult(settings.MaxExtractionSize);
            var outputFile = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString("N") + ".txt");

            var  engine            = enginesPool.GetEngine();
            bool isRecycleRequired = false;


            try
            {
                IFRDocument fineReaderDocument = null;
                try
                {
                    fineReaderDocument = LoadFineReaderDocument(inputFile, settings.TextExtractionProfile, engine);
                    var documentProcessingParams = CreateProcessingParams(engine);
                    fineReaderDocument.Process(documentProcessingParams);

                    // Leerseiten überspringen bei einseitigen Dokumenten.
                    if (fineReaderDocument.Pages.Count == 1)
                    {
                        // IsEmpty nutzt die Einstellungen die über das Profil festgelegt wurden
                        if (fineReaderDocument.Pages[0].IsEmpty())
                        {
                            return(retVal);
                        }
                    }

                    fineReaderDocument.Export(outputFile, FileExportFormatEnum.FEF_TextUnicodeDefaults, null);

                    // Read the contents of the exported file
                    using (var sr = new StreamReader(outputFile))
                    {
                        while (sr.Peek() >= 0)
                        {
                            retVal.Append(sr.ReadLine());
                            if (retVal.LimitExceeded)
                            {
                                break;
                            }
                        }
                    }

                    // Alles OK
                    return(retVal);
                }
                finally
                {
                    if (fineReaderDocument != null)
                    {
                        fineReaderDocument.Close();
                        if (Marshal.IsComObject(fineReaderDocument))
                        {
                            Marshal.ReleaseComObject(fineReaderDocument);
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                Log.Error(ex, $"Abbyy Textextraction failed: {ex.Message}");
                isRecycleRequired   = enginesPool.ShouldRestartEngine(ex);
                retVal.HasError     = true;
                retVal.ErrorMessage = ex.Message;
            }
            finally
            {
                enginesPool.ReleaseEngine(engine, isRecycleRequired);
                if (File.Exists(outputFile))
                {
                    File.Delete(outputFile);
                }
            }

            return(retVal);
        }