public static bool UpdateOcrRendition(Record record) { bool success = false; string extractedFilePath = string.Empty; string ocrFilePath = string.Empty; try { // get a temp working location on disk var rootDirectory = Path.Combine(Path.GetTempPath(), "cmramble_ocr"); if (!Directory.Exists(rootDirectory)) { Directory.CreateDirectory(rootDirectory); } // formulate file name to extract, delete if exists for some reason extractedFilePath = Path.Combine(rootDirectory, $"{record.Uri}.{record.Extension}"); ocrFilePath = Path.Combine(rootDirectory, $"{record.Uri}.txt"); FileHelper.Delete(extractedFilePath); FileHelper.Delete(ocrFilePath); // fetch document Log.Debug($"Extracting Record {record.Number}: {extractedFilePath}"); record.GetDocument(extractedFilePath, false, "OCR", string.Empty); // get the OCR text Log.Debug($"Tesseract Ocr Record {record.Number}: {extractedFilePath}"); ocrFilePath = TesseractOcr.ExtractFromFile(extractedFilePath); // use record extension method that removes existing OCR rendition (if exists) record.AddOcrRendition(ocrFilePath); Log.Debug($"Saving Record {record.Number}"); record.Save(); Log.Debug($"Saved Record {record.Number}"); success = true; } catch (Exception ex) { Log.Error(ex); } finally { FileHelper.Delete(extractedFilePath); FileHelper.Delete(ocrFilePath); } return(success); }
public static bool GenerateOcrRendition(Record record, RecordRendition sourceRendition) { bool success = false; string extractedFilePath = string.Empty; string ocrFilePath = string.Empty; try { // get a temp working location on disk var rootDirectory = Path.Combine(Path.GetTempPath(), "cmramble_ocr"); if (!Directory.Exists(rootDirectory)) { Directory.CreateDirectory(rootDirectory); } // formulate file name to extract, delete if exists for some reason extractedFilePath = Path.Combine(rootDirectory, $"{sourceRendition.Uri}.{sourceRendition.Extension}"); ocrFilePath = Path.Combine(rootDirectory, $"{sourceRendition.Uri}.txt"); FileHelper.Delete(extractedFilePath); FileHelper.Delete(ocrFilePath); // fetch document var extract = sourceRendition.GetExtractDocument(); extract.FileName = Path.GetFileName(extractedFilePath); extract.DoExtract(Path.GetDirectoryName(extractedFilePath), true, false, ""); if (!String.IsNullOrWhiteSpace(extract.FileName) && File.Exists(extractedFilePath)) { ocrFilePath = TesseractOcr.ExtractFromFile(extractedFilePath); // use record extension method that removes existing OCR rendition (if exists) record.AddOcrRendition(ocrFilePath); record.Save(); success = true; } } catch (Exception ex) { } finally { FileHelper.Delete(extractedFilePath); FileHelper.Delete(ocrFilePath); } return(success); }