Esempio n. 1
0
        public OcrResult Ocr(string pdfPath, ResultTypeEnum resultTypeEnum, string language = "tur")
        {
            var result = new OcrResult();
            var doc    = PDFDoc.Open(pdfPath);

            if (!string.IsNullOrEmpty(doc.GetText()))
            {
                return(result);
            }
            doc.Ocr(Clock.Utils.OcrMode.Tesseract, language, WriteTextMode.Word, null);

            if (resultTypeEnum.HasFlag(ResultTypeEnum.OcrCompressed) || resultTypeEnum.HasFlag(ResultTypeEnum.All))
            {
                var reader  = new PdfReader(doc.PDFBytes);
                var stamper = new PdfStamper(reader, new FileStream(Path.Combine(Path.GetTempPath(), "Clock.hocr", Path.GetFileNameWithoutExtension(pdfPath) + "ocrCompressed" + Path.GetExtension(pdfPath)), FileMode.Create), PdfWriter.VERSION_1_5);
                var pageNum = reader.NumberOfPages;
                for (var i = 1; i <= pageNum; i++)
                {
                    reader.SetPageContent(i, reader.GetPageContent(i));
                }
                stamper.SetFullCompression();
                stamper.Close();
            }
            if (resultTypeEnum.HasFlag(ResultTypeEnum.Text) || resultTypeEnum.HasFlag(ResultTypeEnum.All))
            {
                result.Text = doc.GetText();
            }
            if (resultTypeEnum.HasFlag(ResultTypeEnum.OcrBest) || resultTypeEnum.HasFlag(ResultTypeEnum.All))
            {
                result.OcredBest = doc.PDFBytes;
            }
            if (resultTypeEnum.HasFlag(ResultTypeEnum.OcrCompressed) || resultTypeEnum.HasFlag(ResultTypeEnum.All))
            {
                result.OcredCompressed = System.IO.File.ReadAllBytes(Path.Combine(Path.GetTempPath(), "Clock.hocr", Path.GetFileNameWithoutExtension(pdfPath) + "ocrCompressed" + Path.GetExtension(pdfPath)));
            }

            _fileHelper.DeleteDirectory(Path.Combine(Path.GetTempPath(), "Clock.hocr"));
            return(result);
        }
Esempio n. 2
0
        protected override void Execute(NativeActivityContext context)
        {
            WorkflowDataContext          dataContext = context.DataContext;
            PropertyDescriptorCollection propertyDescriptorCollection = dataContext.GetProperties();
            string sessionId        = string.Empty;
            int    fileSystemItemId = 0;

            foreach (PropertyDescriptor propertyDesc in propertyDescriptorCollection)
            {
                if (propertyDesc.Name == "ArgSessionId")
                {
                    sessionId = propertyDesc.GetValue(dataContext) as string;
                    break;
                }
            }
            foreach (PropertyDescriptor propertyDesc in propertyDescriptorCollection)
            {
                if (propertyDesc.Name == "ArgFileSystemItemId")
                {
                    fileSystemItemId = (int)propertyDesc.GetValue(dataContext);
                    break;
                }
            }

            if (string.IsNullOrWhiteSpace(sessionId))
            {
                LogHelper.AddGeneralLog(GeneralLogTypeEnum.ActivityError, $"SessionId is null.");
                return;
            }

            if (fileSystemItemId == 0)
            {
                LogHelper.AddSessionLog(SessionLogTypeEnum.ActivityError, sessionId, $"FileSystemItemId is null.");
                return;
            }

            FileSystemItemDto fileSystemItem = null;

            try
            {
                using (var sqlDbContext = new SqlDbContext())
                {
                    fileSystemItem = sqlDbContext.FileSystemItems.FirstOrDefault(x => x.Id == fileSystemItemId);
                }
            }
            catch (Exception ex)
            {
                LogHelper.AddSessionLog(SessionLogTypeEnum.ActivityError, sessionId, $"Exception has been thrown when getting fileSystemItem. FileSystemItemId: {fileSystemItemId}", ex);
                return;
            }

            var targetFile = TargetFilePathForTextFile.Get(context);
            int counter    = 0;

            while (true)
            {
                counter++;
                if (File.Exists(targetFile))
                {
                    var directory = Path.GetDirectoryName(targetFile);
                    var fileName  = Path.GetFileNameWithoutExtension(targetFile);
                    var extension = Path.GetExtension(targetFile);
                    targetFile = Path.Combine(directory, fileName + counter + extension);
                }
                else
                {
                    break;
                }
            }


            try
            {
                TempData.Instance.TempPath = Path.GetTempPath();
                using (PDFDoc doc = PDFDoc.Open(fileSystemItem.FullPath))
                {
                    if (doc.GetText() == string.Empty)
                    {
                        doc.Ocr(OcrMode.Tesseract, "tur", WriteTextMode.Word);
                        doc.Save(TargetFilePathForPDFFile.Get(context));
                        var ocrText = doc.GetText();
                        File.WriteAllText(TargetFilePathForTextFile.Get(context), ocrText);
                    }
                }
            }
            catch (Exception ex)
            {
                LogHelper.AddFileSystemItemLog(fileSystemItemId, fileSystemItem.SourceId, sessionId, FileSystemItemLogTypeEnum.StoppedWithError, $"Exception has been thrown in OCR operation. FileSystemItemId: {fileSystemItemId}", ex);
                return;
            }


            LogHelper.AddFileSystemItemLog(fileSystemItem.Id, fileSystemItem.SourceId, sessionId, FileSystemItemLogTypeEnum.TesseractOcrOk);


            Result.Set(context, true);
        }