Beispiel #1
0
        // when processing file
        // - first move the file
        // - then update the doc record to say processed
        public bool ProcessPdfFile(string fileName, string uniqName, bool bExtractImages, bool bDontOverwriteExistingImages, bool bExtractText, bool bRecogniseDoc,
                                bool bAddToDocInfoDb, bool bAddToDocPagesDb)
        {
            // First check if doc details are already in db
            if (!ScanDocInfoRecordExists(uniqName))
                return false;

            // Make a copy of the file in the archive location
            string archiveFileName = ScanDocHandler.GetArchiveFileName(uniqName);
            if (!Delimon.Win32.IO.File.Exists(archiveFileName))
            {
                string statusStr = "";
                bool bResult = CopyFile(fileName, archiveFileName, ref statusStr);
                if (!bResult)
                {
                    logger.Error("Can't make archive copy {0} excp {1}", archiveFileName, statusStr);
                    return false;
                }
            }
            else
            {
                logger.Info("Archive file already exists {0}", archiveFileName);
            }

            // Extract text blocks from file
            ScanPages scanPages = new ScanPages(uniqName);
            int totalNumPages = 0;
            if (bExtractText)
            {
                PdfTextAndLocExtractor pdfExtractor = new PdfTextAndLocExtractor();
                scanPages = pdfExtractor.ExtractDocInfo(uniqName, fileName, _scanConfig._maxPagesForText, ref totalNumPages);
            }

            // Extract images from file
            if (bExtractImages)
            {
                bool procImages = (!bDontOverwriteExistingImages) | (!Delimon.Win32.IO.File.Exists(PdfRasterizer.GetFilenameOfImageOfPage(_scanConfig._docAdminImgFolderBase, uniqName, 1, false)));
                if (procImages)
                {
                    PdfRasterizer rs = new PdfRasterizer(fileName, THUMBNAIL_POINTS_PER_INCH);
                    try
                    {
                        List<string> imgFileNames = rs.GeneratePageFiles(uniqName, scanPages, _scanConfig._docAdminImgFolderBase, _scanConfig._maxPagesForImages, false);
                    }
                    finally
                    {
                        rs.Close();
                    }

                }
            }

            // Form partial document info
            DateTime fileDateTime = Delimon.Win32.IO.File.GetCreationTime(fileName);
            ScanDocInfo scanDocInfo = new ScanDocInfo(uniqName, totalNumPages, scanPages.scanPagesText.Count, fileDateTime, fileName.Replace('\\', '/'), false);

            // Add records to mongo databases
            if (bAddToDocPagesDb)
                AddScanPagesRecToMongo(scanPages);
            if (bAddToDocInfoDb)
                AddDocInfoRecToMongo(scanDocInfo);

            // Request update to unfiled documents list
            _scanDocInfoCache.RequestUnfiledListUpdate();

            return true;
        }
        private void AddPages_DoWork(object sender, DoWorkEventArgs e)
        {
            BackgroundWorker worker = sender as BackgroundWorker;
            try
            {
                _pdfRasterizer = new ScanMonitorApp.PdfRasterizer(_curFileNames[_curBackgroundLoadingFileIdx], POINTS_PER_INCH);
            }
            catch(Exception excp)
            {
                logger.Error("PDF Editor requires Ghostscript {0}", excp.Message);
                MessageBox.Show("PDF Editor requires Ghostscript to be installed");
                return;
            }

            try
            {
                int startNewPageNum = 1;
                int startNewFileNum = 1;
                int pageTotal = 0;
                GetFileAndPageOfLastOutDoc(out startNewFileNum, out startNewPageNum, out pageTotal);

                // Extract page images
                for (int i = 0; i < _pdfRasterizer.NumPages(); i++)
                {
                    if ((worker.CancellationPending == true))
                    {
                        e.Cancel = true;
                        break;
                    }

                    System.Drawing.Image pageImg = _pdfRasterizer.GetPageImage(i + 1, false);

                    object[] args = new object[4];
                    args[0] = i;
                    args[1] = startNewPageNum;
                    args[2] = startNewFileNum;
                    args[3] = pageImg;

                    this.Dispatcher.BeginInvoke((Action<int, int, int, System.Drawing.Image>)delegate(int pageIdx, int startNewPgNum, int startNewFilNum, System.Drawing.Image pagImg)
                    {
                        BitmapImage bitmap = ConvertToBitmap(pagImg);
                        PdfPageInfo pgInfo = new PdfPageInfo();
                        pgInfo.PageNum = pageIdx + 1;
                        pgInfo.FileIndex = _curBackgroundLoadingFileIdx;
                        pgInfo.ThumbBitmap = bitmap;
                        pgInfo.SplitAfter = false;
                        pgInfo.DeletePage = false;
                        pgInfo.PageRotation = 0;
                        pgInfo.ShowFileNum = (_curBackgroundLoadingFileIdx > 0);
                        pgInfo.NewDocPageNum = pageIdx + startNewPgNum;
                        pgInfo.NewDocFileNum = startNewFilNum;
                        _pdfPageList.Add(pgInfo);
                    }, args);
                    Thread.Sleep(50);
                    (sender as BackgroundWorker).ReportProgress(i * 100 / _pdfRasterizer.NumPages(), null);
                }
            }
            catch(Exception excp)
            {
                logger.Error("PDF Editor AddPages_DoWork failed excp {0}", excp.Message);
            }
            finally
            {
                // Close file
                _pdfRasterizer.Close();
                _pdfRasterizer = null;
            }
        }