// when processing file // - first move the file // - then update the doc record to say processed public bool ProcessPdfFile(string fileName, string uniqName, bool bExtractImages, bool bDontOverwriteExistingImages, bool bExtractText, bool bRecogniseDoc, bool bAddToDocInfoDb, bool bAddToDocPagesDb) { // First check if doc details are already in db if (!ScanDocInfoRecordExists(uniqName)) return false; // Make a copy of the file in the archive location string archiveFileName = ScanDocHandler.GetArchiveFileName(uniqName); if (!Delimon.Win32.IO.File.Exists(archiveFileName)) { string statusStr = ""; bool bResult = CopyFile(fileName, archiveFileName, ref statusStr); if (!bResult) { logger.Error("Can't make archive copy {0} excp {1}", archiveFileName, statusStr); return false; } } else { logger.Info("Archive file already exists {0}", archiveFileName); } // Extract text blocks from file ScanPages scanPages = new ScanPages(uniqName); int totalNumPages = 0; if (bExtractText) { PdfTextAndLocExtractor pdfExtractor = new PdfTextAndLocExtractor(); scanPages = pdfExtractor.ExtractDocInfo(uniqName, fileName, _scanConfig._maxPagesForText, ref totalNumPages); } // Extract images from file if (bExtractImages) { bool procImages = (!bDontOverwriteExistingImages) | (!Delimon.Win32.IO.File.Exists(PdfRasterizer.GetFilenameOfImageOfPage(_scanConfig._docAdminImgFolderBase, uniqName, 1, false))); if (procImages) { PdfRasterizer rs = new PdfRasterizer(fileName, THUMBNAIL_POINTS_PER_INCH); try { List<string> imgFileNames = rs.GeneratePageFiles(uniqName, scanPages, _scanConfig._docAdminImgFolderBase, _scanConfig._maxPagesForImages, false); } finally { rs.Close(); } } } // Form partial document info DateTime fileDateTime = Delimon.Win32.IO.File.GetCreationTime(fileName); ScanDocInfo scanDocInfo = new ScanDocInfo(uniqName, totalNumPages, scanPages.scanPagesText.Count, fileDateTime, fileName.Replace('\\', '/'), false); // Add records to mongo databases if (bAddToDocPagesDb) AddScanPagesRecToMongo(scanPages); if (bAddToDocInfoDb) AddDocInfoRecToMongo(scanDocInfo); // Request update to unfiled documents list _scanDocInfoCache.RequestUnfiledListUpdate(); return true; }
private void AddPages_DoWork(object sender, DoWorkEventArgs e) { BackgroundWorker worker = sender as BackgroundWorker; try { _pdfRasterizer = new ScanMonitorApp.PdfRasterizer(_curFileNames[_curBackgroundLoadingFileIdx], POINTS_PER_INCH); } catch(Exception excp) { logger.Error("PDF Editor requires Ghostscript {0}", excp.Message); MessageBox.Show("PDF Editor requires Ghostscript to be installed"); return; } try { int startNewPageNum = 1; int startNewFileNum = 1; int pageTotal = 0; GetFileAndPageOfLastOutDoc(out startNewFileNum, out startNewPageNum, out pageTotal); // Extract page images for (int i = 0; i < _pdfRasterizer.NumPages(); i++) { if ((worker.CancellationPending == true)) { e.Cancel = true; break; } System.Drawing.Image pageImg = _pdfRasterizer.GetPageImage(i + 1, false); object[] args = new object[4]; args[0] = i; args[1] = startNewPageNum; args[2] = startNewFileNum; args[3] = pageImg; this.Dispatcher.BeginInvoke((Action<int, int, int, System.Drawing.Image>)delegate(int pageIdx, int startNewPgNum, int startNewFilNum, System.Drawing.Image pagImg) { BitmapImage bitmap = ConvertToBitmap(pagImg); PdfPageInfo pgInfo = new PdfPageInfo(); pgInfo.PageNum = pageIdx + 1; pgInfo.FileIndex = _curBackgroundLoadingFileIdx; pgInfo.ThumbBitmap = bitmap; pgInfo.SplitAfter = false; pgInfo.DeletePage = false; pgInfo.PageRotation = 0; pgInfo.ShowFileNum = (_curBackgroundLoadingFileIdx > 0); pgInfo.NewDocPageNum = pageIdx + startNewPgNum; pgInfo.NewDocFileNum = startNewFilNum; _pdfPageList.Add(pgInfo); }, args); Thread.Sleep(50); (sender as BackgroundWorker).ReportProgress(i * 100 / _pdfRasterizer.NumPages(), null); } } catch(Exception excp) { logger.Error("PDF Editor AddPages_DoWork failed excp {0}", excp.Message); } finally { // Close file _pdfRasterizer.Close(); _pdfRasterizer = null; } }