//------------------------------------------------------------------------- private bool ExtractImages(String PDFSourcePath, Audit sourceAudit, out Exception exception) { //List<System.Drawing.Image> ImgList = new List<System.Drawing.Image>(); exception = null; iTextSharp.text.pdf.RandomAccessFileOrArray RAFObj = null; iTextSharp.text.pdf.PdfReader PDFReaderObj = null; iTextSharp.text.pdf.PdfObject PDFObj = null; iTextSharp.text.pdf.PdfStream PDFStremObj = null; int pageNumber = 0; bool ok = false; try { RAFObj = new iTextSharp.text.pdf.RandomAccessFileOrArray(PDFSourcePath); PDFReaderObj = new iTextSharp.text.pdf.PdfReader(RAFObj, null); for (int i = 0; i < PDFReaderObj.XrefSize; i++) { PDFObj = PDFReaderObj.GetPdfObject(i); if ((PDFObj != null) && PDFObj.IsStream()) { PDFStremObj = (iTextSharp.text.pdf.PdfStream)PDFObj; iTextSharp.text.pdf.PdfObject subtype = PDFStremObj.Get(iTextSharp.text.pdf.PdfName.SUBTYPE); if ((subtype != null) && subtype.ToString() == iTextSharp.text.pdf.PdfName.IMAGE.ToString()) { try { iTextSharp.text.pdf.parser.PdfImageObject PdfImageObj = new iTextSharp.text.pdf.parser.PdfImageObject((iTextSharp.text.pdf.PRStream)PDFStremObj); System.Drawing.Image ImgPDF = PdfImageObj.GetDrawingImage(); pageNumber++; if (pageNumber > PDFReaderObj.NumberOfPages) return false; } catch (Exception) { } } } } pageNumber = 0; PageCount = PDFReaderObj.NumberOfPages; for (int i = 0; i < PDFReaderObj.XrefSize; i++) { PDFObj = PDFReaderObj.GetPdfObject(i); if ((PDFObj != null) && PDFObj.IsStream()) { PDFStremObj = (iTextSharp.text.pdf.PdfStream)PDFObj; iTextSharp.text.pdf.PdfObject subtype = PDFStremObj.Get(iTextSharp.text.pdf.PdfName.SUBTYPE); if ((subtype != null) && subtype.ToString() == iTextSharp.text.pdf.PdfName.IMAGE.ToString()) { try { iTextSharp.text.pdf.parser.PdfImageObject PdfImageObj = new iTextSharp.text.pdf.parser.PdfImageObject((iTextSharp.text.pdf.PRStream)PDFStremObj); System.Drawing.Image ImgPDF = PdfImageObj.GetDrawingImage(); pageNumber++; //using (Bitmap bmp = new Bitmap(ImgPDF)) //{ // SaveFrameFiles(PDFSourcePath, sourceAudit, bmp, pageNumber, PageCount); try { SaveFrameFiles(PDFSourcePath, sourceAudit, (Bitmap)ImgPDF, pageNumber, PageCount); } catch (Exception) { ImgPDF.Dispose(); continue; } ImgPDF.Dispose(); ok = true; //} } catch (Exception) { } } } } return ok; } catch (Exception)// ex { //log.LogMessage(PDFSourcePath + " Page" + pageNumber.ToString() + " ", ex); return false; } finally { PDFReaderObj.Close(); PDFReaderObj.Dispose(); } }
//------------------------------------------------------------------------- public Audit CreateAuditAndMoveToTempFolder(string fileName, string directory, bool delete = true) { string sourceSHA1Hash = GetSHA1FromFile(fileName); var tempFileName = directory + sourceSHA1Hash + Path.GetExtension(fileName); var tempAuditFileName = directory + sourceSHA1Hash + ".audit"; Audit audit = new Audit(fileName, sourceSHA1Hash); if (CanAccess(fileName)) { File.Copy(fileName, tempFileName, true); if (delete) { iOHelper.DeleteFile(fileName); } audit.Save(tempAuditFileName); } return audit; }
//------------------------------------------------------------------------- public Audit Clone() { var newAudit = new Audit() { fileName = this.fileName, SHA1Hash = this.SHA1Hash, type = this.type, error = this.error, dataFileName = this.dataFileName, dataFileSHA1Hash = this.dataFileSHA1Hash, sourceFilePath = this.sourceFilePath, sourceFileName = this.sourceFileName, sourceSHA1Hash = this.sourceSHA1Hash, archiveFileName = this.archiveFileName, sourcePage = this.sourcePage }; return newAudit; }
//------------------------------------------------------------------------- public void MoveFileFromTemp(string dir, string incomingFile, Audit audit, string destFileName) { string tempFileName = dir + Path.GetFileName(incomingFile); string tempAuditFileName = GetFileAuditName(tempFileName); string destAuditFileName = GetFileAuditName(destFileName); try { audit.Save(destAuditFileName); File.Copy(tempFileName, destFileName, true); iOHelper.DeleteFile(tempAuditFileName); iOHelper.DeleteFile(tempFileName); } catch (Exception ex) { log.LogMessage(ex); } }
//------------------------------------------------------------------------- public void MoveBadFile(string dir, string ManualNextProccessingFolder, string fileName, Audit audit, string exMessage) { if (!Directory.Exists(ManualNextProccessingFolder)) { Directory.CreateDirectory(ManualNextProccessingFolder); } audit.MadeFailedAudit(exMessage); var destFileName = ManualNextProccessingFolder + audit.sourceSHA1Hash + Path.GetExtension(fileName); MoveFileFromTemp(dir, fileName, audit, destFileName); }
//------------------------------------------------------------------------- public string GetFileForRecognize(string fileName, string tempdirectory, bool showLog = true) { //bool useErrFolder = false; if (string.IsNullOrEmpty(tempdirectory)) { //useErrFolder = true; tempdirectory = OcrAppConfig.TempEdFolder; } if (showLog) log.LogMessage("Get file for recognize: " + fileName); if (!File.Exists(fileName)) { if (showLog) log.LogMessage("File does not exist " + fileName); return null; } string fileNameAudit = GetFileAuditName(fileName); var sha1hash = GetSHA1FromFile(fileName); if (File.Exists(fileNameAudit) && fileName.Contains(sha1hash) && fileName.Contains(tempdirectory)) { return fileName; } if (fileName.Contains(sha1hash) && fileName.Contains(tempdirectory)) { iOHelper.DeleteFile(fileNameAudit); var audit = new Audit(fileName, GetSHA1FromFile(fileName)); audit.Save(GetFileAuditName(fileName)); return fileName; } string destfileName = tempdirectory + sha1hash + Path.GetExtension(fileName); string destfileNameAudit = GetFileAuditName(destfileName);//The file exists. if (File.Exists(fileNameAudit))// && fileName.Contains(sha1hash) { //try //{ // File.Move(fileName, destfileName); // File.Move(fileNameAudit, destfileNameAudit); //} //catch (Exception ex) //{ // log.LogMessage(ex); // return null; //} try { if (!File.Exists(destfileName)) { File.Move(fileName, destfileName); } else { File.Delete(fileName); } } catch (Exception ex) { log.LogMessage(ex); } try { if (!File.Exists(destfileNameAudit)) { File.Move(fileNameAudit, destfileNameAudit); } else { File.Delete(fileNameAudit); } } catch (Exception ex) { log.LogMessage(ex); } return destfileName; } else { log.LogMessage("File " + fileNameAudit + " not exists"); FileInfo fi = new FileInfo(fileName); DateTime begEx = DateTime.Parse(fi.CreationTime.ToString()); TimeSpan ts = DateTime.Now - begEx; if (ts.TotalSeconds < 2) return ""; } CreateAuditAndMoveToTempFolder(fileName, tempdirectory); return destfileName; }
//------------------------------------------------------------------------- private void FileInTempFolder(string fileName) { Exception exception; var auditFileName = utils.GetFileAuditName(fileName); Audit sourceAudit = new Audit(auditFileName, out exception); if (sourceAudit == null || !File.Exists(auditFileName)) { log.LogMessage("___" + "FileInTempFolder (sourceAudit == null) : true"); if (exception != null) log.LogMessage(exception); File.Copy(fileName, defaults.InputFolder + Path.GetFileName(fileName), true);//если файл без файла-аудита -> перемещение в папку InputFolder iOHelper.DeleteFile(fileName); return; } Result = new ProcessingIncomingFileResult() { SourceFileName = fileName, FileName = fileName, Audit = sourceAudit }; }
public Recognize(string fileName, Defaults defaults) { this.defaults = defaults; if (String.IsNullOrEmpty(fileName) || !File.Exists(fileName)) { exception = new Exception("File name is incorrect: " + fileName); NotifyUpdated(NotifyKey.Exception, exception); return; } FileName = fileName; FileNameAudit = fileName.Replace(new FileInfo(fileName).Extension, ".audit"); #region Get Bitmap PDFLibNet.PDFWrapper pdfDoc = null; try { Image = Utils.GetBitmapFromFile(FileName, ref pdfDoc); Image = Utils.NormalizeBitmap(Image); Image = RecognitionTools.GetMonohromeNoIndexBitmap(Image); } catch (Exception ex) { exception = ex; NotifyUpdated(NotifyKey.Exception, exception); return; } finally { if (pdfDoc != null) { pdfDoc.Dispose(); pdfDoc = null; } } #endregion #region Get audit file FileAudit = Utils.GetAuditFromFile(FileNameAudit, out exception); if (FileAudit == null) { NotifyUpdated(NotifyKey.Exception, exception); return; } #endregion regionsExt = RecognitionTools.GetRegionsExt(out sheetIdentifiers, Defaults.ManualConfigsFolder); }
//------------------------------------------------------------------------- private void SaveFrameFiles(string fileName, Audit sourceAudit, Bitmap entryBitmap, int pageNumber, int pageCount) { if (workGetFramesFromPic) return; workGetFramesFromPic = true; try { var tempdir = OcrAppConfig.TempFolder + "SaveFrame\\"; iOHelper.CreateDirectory(tempdir); var temp = tempdir + sourceAudit.sourceSHA1Hash + "_" + pageNumber + ".tiff"; //iOHelper.DeleteFile(temp); string[] ss = Directory.GetFiles(tempdir); for (int j = 0; j < ss.Length; j++) { try { iOHelper.DeleteFile(ss[j]); } catch (Exception) { } } //Bitmap bmp = (Bitmap)entryBitmap.Clone();// new Bitmap(entryBitmap.Width, entryBitmap.Height, PixelFormat.Format24bppRgb); //bmp = (Bitmap)bmp.GetThumbnailImage(bmp.Width, bmp.Height, null, IntPtr.Zero); //bmp.SetResolution(entryBitmap.VerticalResolution, entryBitmap.HorizontalResolution); //bmp.Save("SaveFrameFiles.bmp", ImageFormat.Bmp); entryBitmap.Save(temp, ImageFormat.Tiff); //bmp.Save(temp, ImageFormat.Tiff); //bmp.Dispose(); var sha1Hash = utils.GetSHA1FromFile(temp); var tempFileName = OcrAppConfig.TempFramesFolder + sha1Hash + ".tiff"; var tempFileNameAudit = OcrAppConfig.TempFramesFolder + sha1Hash + ".audit"; if (File.Exists(tempFileName) && File.Exists(tempFileNameAudit)) { log.LogMessage("Frame exists page " + pageNumber.ToString() + " of " + pageCount.ToString() + " from " + sourceAudit.sourceFileName); iOHelper.DeleteFile(temp); //iOHelper.DeleteDirectory(tempdir, false); workGetFramesFromPic = false; return; } var audit = sourceAudit.GetFrameAudit(tempFileName, sha1Hash, pageNumber); audit.Save(tempFileNameAudit); log.LogMessage("Get frame page " + pageNumber.ToString() + " of " + pageCount.ToString() + " from " + sourceAudit.sourceFileName); //File.Move(temp, tempFileName); File.Copy(temp, tempFileName, true); //System.Windows.Forms.Application.DoEvents(); //iOHelper.DeleteFile(temp); //iOHelper.DeleteDirectory(tempdir, false); //entryBitmap.Dispose();//недопустимый параметр workGetFramesFromPic = false; } catch (Exception) { workGetFramesFromPic = false; } }
//------------------------------------------------------------------------- private void GetFramesFromPic(string fileName, Audit sourceAudit, out Exception exception) { exception = null; if (workGetFramesFromPic) return; workGetFramesFromPic = true; Bitmap entryBitmap; try { entryBitmap = GetPicFile(fileName); } catch (Exception ex) { exception = ex; workGetFramesFromPic = false; return; } int i = 0; try { Guid[] guids = entryBitmap.FrameDimensionsList; FrameDimension fd = new FrameDimension(guids[0]); PageCount = entryBitmap.GetFrameCount(fd); for (i = 0; i < PageCount; i++) { entryBitmap.SelectActiveFrame(fd, i); //SaveFrameFiles(fileName, sourceAudit, entryBitmap, i + 1, PageCount); var tempdir = OcrAppConfig.TempFolder + "SaveFrame\\"; iOHelper.CreateDirectory(tempdir); string[] ss = Directory.GetFiles(tempdir); for (int j = 0; j < ss.Length; j++) { try { iOHelper.DeleteFile(ss[j]); } catch (Exception) { } } var temp = tempdir + sourceAudit.sourceSHA1Hash + "_" + i + 1 + ".tiff"; //iOHelper.DeleteFile(temp); //Bitmap bmp = (Bitmap)entryBitmap.Clone();// new Bitmap(entryBitmap.Width, entryBitmap.Height, PixelFormat.Format24bppRgb); //bmp = (Bitmap)bmp.GetThumbnailImage(bmp.Width, bmp.Height, null, IntPtr.Zero); //bmp.SetResolution(entryBitmap.VerticalResolution, entryBitmap.HorizontalResolution); //bmp.Save("SaveFrameFiles.bmp", ImageFormat.Bmp); entryBitmap.Save(temp, ImageFormat.Tiff); //bmp.Save(temp, ImageFormat.Tiff); //bmp.Dispose(); var sha1Hash = utils.GetSHA1FromFile(temp); //проверить на ""!!! var tempFileName = OcrAppConfig.TempFramesFolder + sha1Hash + ".tiff"; var tempFileNameAudit = OcrAppConfig.TempFramesFolder + sha1Hash + ".audit"; log.LogMessage("Get frame page " + (i + 1).ToString() + " of " + pageCount.ToString() + " from " + sourceAudit.sourceFileName); if (File.Exists(tempFileName) && File.Exists(tempFileNameAudit)) { log.LogMessage("Frame exists page " + (i + 1).ToString() + " of " + pageCount.ToString() + " from " + sourceAudit.sourceFileName); try { iOHelper.DeleteFile(temp); } catch (Exception) { } //iOHelper.DeleteDirectory(tempdir, false); //entryBitmap.Dispose(); //return; continue; } var audit = sourceAudit.GetFrameAudit(tempFileName, sha1Hash, i + 1); audit.Save(tempFileNameAudit); //File.Move(temp, tempFileName); File.Copy(temp, tempFileName, true); //System.Windows.Forms.Application.DoEvents(); //try //{ // iOHelper.DeleteFile(temp); //} //catch (Exception ex) //{ // log.LogMessage(fileName + " Page" + (i + 1).ToString() + " (for) ", ex); // continue; //} //iOHelper.DeleteDirectory(tempdir, false); //entryBitmap.Dispose();//недопустимый параметр } entryBitmap.Dispose(); workGetFramesFromPic = false; } catch (Exception ex) { workGetFramesFromPic = false; entryBitmap.Dispose(); log.LogMessage(fileName + " Page" + (i + 1).ToString() + " ", ex); } }
//------------------------------------------------------------------------- private void GetFramesFromPDF(string fileName, Audit sourceAudit, out Exception exception) { PdfReader doc = null; PDFLibNet.PDFWrapper pdfDoc = null; int pageNumber = 0; exception = null; //ImageExtractor try { if (ExtractImages(fileName, sourceAudit, out exception)) return; ////for (int i = 0; i < list.Count; i++) ////{ //// var item = list[i]; //// using (Bitmap bmp = new Bitmap(item)) //// { //// SaveFrameFiles(fileName, sourceAudit, bmp, i + 1, list.Count); //// } ////} ////list.Clear(); //doc = new PdfReader(fileName); //for (pageNumber = 1; pageNumber <= doc.NumberOfPages; pageNumber++) //{ // //using (Bitmap bmp = GetBitmapFromPDFPage(doc, pageNumber)) // //{ // // SaveFrameFiles(fileName, sourceAudit, bmp, pageNumber, doc.NumberOfPages); // //} // PdfDictionary dict = doc.GetPageN(pageNumber); // PdfDictionary res = (PdfDictionary)(PdfReader.GetPdfObject(dict.Get(PdfName.RESOURCES))); // PdfDictionary xobj = (PdfDictionary)(PdfReader.GetPdfObject(res.Get(PdfName.XOBJECT))); // if (xobj != null) // { // foreach (PdfName name in xobj.Keys) // { // PdfObject obj = xobj.Get(name); // if (obj.IsIndirect()) // { // PdfDictionary tg = (PdfDictionary)(PdfReader.GetPdfObject(obj)); // PdfName subtype = (PdfName)(PdfReader.GetPdfObject(tg.Get(PdfName.SUBTYPE))); // if (PdfName.IMAGE.Equals(subtype)) // { // int xrefIdx = ((PRIndirectReference)obj).Number; // PdfObject pdfObj = doc.GetPdfObject(xrefIdx); // PdfStream str = (PdfStream)(pdfObj); // iTextSharp.text.pdf.parser.PdfImageObject pdfImage = // new iTextSharp.text.pdf.parser.PdfImageObject((PRStream)str); // System.Drawing.Image ImgPDF = pdfImage.GetDrawingImage(); // using (Bitmap bmp = new Bitmap(ImgPDF)) // { // SaveFrameFiles(fileName, sourceAudit, bmp, pageNumber, doc.NumberOfPages); // } // } // else if (PdfName.FORM.Equals(subtype) || PdfName.GROUP.Equals(subtype)) // { // //images.AddRange(GetImagesFromPdfDict(tg, doc)); // } // } // } // } //} try { pdfDoc = GetPDFDoc(fileName); } catch (Exception ex) { exception = ex; return; } int i = 0; if (pdfDoc != null) { PageCount = pdfDoc.PageCount; for (i = 0; i < PageCount; i++) { var bmp = GetBitmapFromPDFPage(pdfDoc, i + 1); SaveFrameFiles(fileName, sourceAudit, bmp, i + 1, PageCount); bmp.Dispose(); } } } catch (Exception ex) { log.LogMessage(fileName + " Page" + pageNumber.ToString() + " ", ex); } finally { if (doc != null) { doc.Close(); doc.Dispose(); } if (pdfDoc != null) { pdfDoc.Dispose(); } } }
public void GetFrames(string fileName, Audit sourceAudit) { exception = null; if (IsPDFFile(fileName)) { GetFramesFromPDF(fileName, sourceAudit, out exception); } else { GetFramesFromPic(fileName, sourceAudit, out exception); } }
public Recognize(string fileName, OcrAppConfig defaults, CancellationToken token, bool auto = false , bool normalizeBitmap = true) { SetCancellationToken(token); this.Auto = auto; this.defaults = defaults; recTools.frameFileName = fileName; //defaults.frameFileName = fileName; if (String.IsNullOrEmpty(fileName) || !File.Exists(fileName)) { Exception = new Exception("File name is incorrect: " + fileName); log.LogMessage(Exception); NotifyUpdated(ExceptionEvent, Exception, null); return; } FileName = fileName; AuditFileName = utils.GetFileAuditName(fileName); #region Get Bitmap FramesAndBitmap fab = new FramesAndBitmap(); Bitmap = fab.GetBitmapFromFile(FileName); if (fab.Exception != null) { Exception = fab.Exception; log.LogMessage(Exception); NotifyUpdated(ExceptionEvent, Exception, null); return; } //bool bppIndexed1 = false; //if (Bitmap.PixelFormat == System.Drawing.Imaging.PixelFormat.Format1bppIndexed) // bppIndexed1 = true; if (normalizeBitmap) Bitmap = recTools.NormalizeBitmap(Bitmap, out exception); if (exception != null) { log.LogMessage(Exception); NotifyUpdated(ExceptionEvent, Exception, null); return; } //if (exception != null) //{ // if (auto) // { // fab.Exception = exception; // Audit = new Audit(AuditFileName, out exception); // exception = fab.Exception; // Audit.error = exception.Message; // if (defaults.MoveToNextProccessingFolderOnSheetIdentifierError) // { // utils.MoveBadFile(OcrAppConfig.TempFramesFolder, defaults.ManualNextProccessingFolder, fileName, Audit, fab.Exception.Message);//сохраняем файл в папку для дальнейшей обработки и удаляем из ТЕМПА // File.Delete(fileName); // } // else // { // } // return; // } //} //else if (exception != null) //{ //Bitmap = recTools.GetMonohromeNoIndexBitmap(Bitmap, false, true); //Bitmap.SetResolution(96, 96); #endregion #region Get audit file Audit = new Audit(AuditFileName, out exception); if (Audit == null || !File.Exists(AuditFileName)) { log.LogMessage(Exception); NotifyUpdated(ExceptionEvent, Exception, null); return; } #endregion regionsList = recTools.GetAllRegions(ConfigsFolder); regionsListFLEX = recTools.GetAllRegionsFLEX(ConfigsFolder); }