Пример #1
0
        public static bool UpdateOcrRendition(Record record)
        {
            bool   success           = false;
            string extractedFilePath = string.Empty;
            string ocrFilePath       = string.Empty;

            try
            {
                // get a temp working location on disk
                var rootDirectory = Path.Combine(Path.GetTempPath(), "cmramble_ocr");
                if (!Directory.Exists(rootDirectory))
                {
                    Directory.CreateDirectory(rootDirectory);
                }
                // formulate file name to extract, delete if exists for some reason
                extractedFilePath = Path.Combine(rootDirectory, $"{record.Uri}.{record.Extension}");
                ocrFilePath       = Path.Combine(rootDirectory, $"{record.Uri}.txt");
                FileHelper.Delete(extractedFilePath);
                FileHelper.Delete(ocrFilePath);
                // fetch document
                Log.Debug($"Extracting Record {record.Number}: {extractedFilePath}");
                record.GetDocument(extractedFilePath, false, "OCR", string.Empty);
                // get the OCR text
                Log.Debug($"Tesseract Ocr Record {record.Number}: {extractedFilePath}");
                ocrFilePath = TesseractOcr.ExtractFromFile(extractedFilePath);
                // use record extension method that removes existing OCR rendition (if exists)
                record.AddOcrRendition(ocrFilePath);

                Log.Debug($"Saving Record {record.Number}");
                record.Save();
                Log.Debug($"Saved Record {record.Number}");
                success = true;
            }
            catch (Exception ex)
            {
                Log.Error(ex);
            }
            finally
            {
                FileHelper.Delete(extractedFilePath);
                FileHelper.Delete(ocrFilePath);
            }
            return(success);
        }
Пример #2
0
        public static bool GenerateOcrRendition(Record record, RecordRendition sourceRendition)
        {
            bool   success           = false;
            string extractedFilePath = string.Empty;
            string ocrFilePath       = string.Empty;

            try
            {
                // get a temp working location on disk
                var rootDirectory = Path.Combine(Path.GetTempPath(), "cmramble_ocr");
                if (!Directory.Exists(rootDirectory))
                {
                    Directory.CreateDirectory(rootDirectory);
                }
                // formulate file name to extract, delete if exists for some reason
                extractedFilePath = Path.Combine(rootDirectory, $"{sourceRendition.Uri}.{sourceRendition.Extension}");
                ocrFilePath       = Path.Combine(rootDirectory, $"{sourceRendition.Uri}.txt");
                FileHelper.Delete(extractedFilePath);
                FileHelper.Delete(ocrFilePath);
                // fetch document
                var extract = sourceRendition.GetExtractDocument();
                extract.FileName = Path.GetFileName(extractedFilePath);
                extract.DoExtract(Path.GetDirectoryName(extractedFilePath), true, false, "");
                if (!String.IsNullOrWhiteSpace(extract.FileName) && File.Exists(extractedFilePath))
                {
                    ocrFilePath = TesseractOcr.ExtractFromFile(extractedFilePath);
                    // use record extension method that removes existing OCR rendition (if exists)
                    record.AddOcrRendition(ocrFilePath);
                    record.Save();
                    success = true;
                }
            }
            catch (Exception ex)
            {
            }
            finally
            {
                FileHelper.Delete(extractedFilePath);
                FileHelper.Delete(ocrFilePath);
            }
            return(success);
        }