C# (CSharp) Qiqqa.Documents.PDF.PDFRendering PDFTextExtractor.Job 예제들

프로그래밍 언어: C# (CSharp)

네임스페이스/패키지 이름: Qiqqa.Documents.PDF.PDFRendering

클래스/타입: PDFTextExtractor.Job

hotexamples.com에서의 예제들: 4

C# (CSharp) Qiqqa.Documents.PDF.PDFRendering PDFTextExtractor.Job - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 C# (CSharp)의 Qiqqa.Documents.PDF.PDFRendering.PDFTextExtractor.Job에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: PDFRenderer.cs 프로젝트: project-renard-survey/qiqqa-open-source

        public void ForceOCRText(string language)
        {
            Logging.Info("Forcing OCR for document {0} in language {1}", document_fingerprint, language);

            // Clear out the old texts
            lock (texts)
            {
                texts.Clear();
            }

            // Queue all the pages for OCR
            for (int page = 1; page <= PageCount; ++page)
            {
                PDFTextExtractor.Job job = new PDFTextExtractor.Job(this, page, TEXT_PAGES_PER_GROUP);
                job.force_job = true;
                job.language  = language;
                PDFTextExtractor.Instance.QueueJobSingle(job);
            }
        }

예제 #2

파일 보기

        public void ForceOCRText(string language = "eng")
        {
            Logging.Info("Forcing OCR for document {0} in language {1}", document_fingerprint, language);

            // Clear out the old texts
            FlushCachedTexts();

            // To truly FORCE the OCR to run again, we have to nuke the old results stored on disk as well!
            ClearOCRText();

            // Queue all the pages for OCR
            for (int page = 1; page <= PageCount; ++page)
            {
                PDFTextExtractor.Job job = new PDFTextExtractor.Job(this, page);
                job.force_job = true;
                job.language  = language;
                PDFTextExtractor.Instance.QueueJobSingle(job);
            }
        }

예제 #3

파일 보기

        public void ForceOCRText(string language)
        {
            Logging.Info("Forcing OCR for document {0} in language {1}", document_fingerprint, language);

            // Clear out the old texts
            Utilities.LockPerfTimer l1_clk = Utilities.LockPerfChecker.Start();
            lock (texts_lock)
            {
                l1_clk.LockPerfTimerStop();
                texts.Clear();
            }

            // Queue all the pages for OCR
            for (int page = 1; page <= PageCount; ++page)
            {
                PDFTextExtractor.Job job = new PDFTextExtractor.Job(this, page, TEXT_PAGES_PER_GROUP);
                job.force_job = true;
                job.language  = language;
                PDFTextExtractor.Instance.QueueJobSingle(job);
            }
        }

예제 #4

파일 보기

        /// <summary>
        /// Returns the OCR words on the page.  Null if the words are not yet available.
        /// The page will be queued for OCRing if they are not available...
        /// Page is 1 based...
        /// </summary>
        /// <param name="page"></param>
        /// <returns></returns>
        public WordList GetOCRText(int page, bool queue_for_ocr = true)
        {
            //Utilities.LockPerfTimer l1_clk = Utilities.LockPerfChecker.Start();
            lock (texts_lock)
            {
                //l1_clk.LockPerfTimerStop();

                // First check our cache
                {
                    TypedWeakReference <WordList> word_list_weak;
                    texts.TryGetValue(page, out word_list_weak);
                    if (null != word_list_weak)
                    {
                        WordList word_list = word_list_weak.TypedTarget;
                        if (null != word_list)
                        {
                            return(word_list);
                        }
                    }
                }

                // Then check for an existing SINGLE file
                {
                    string filename = pdf_render_file_layer.MakeFilename_TextSingle(page);
                    try
                    {
                        if (File.Exists(filename))
                        {
                            // Get this ONE page
                            Dictionary <int, WordList> word_lists = WordList.ReadFromFile(filename, page);
                            WordList word_list = word_lists[page];
                            if (null == word_list)
                            {
                                throw new Exception(String.Format("No words on page {0} in OCR file {1}", page, filename));
                            }
                            texts[page] = new TypedWeakReference <WordList>(word_list);
                            return(word_list);
                        }
                    }
                    catch (Exception ex)
                    {
                        Logging.Warn(ex, "There was an error loading the OCR text for {0} page {1}.", document_fingerprint, page);
                        FileTools.Delete(filename);
                    }
                }

                // Then check for an existing GROUP file
                {
                    string filename = pdf_render_file_layer.MakeFilename_TextGroup(page);
                    try
                    {
                        if (File.Exists(filename))
                        {
                            Dictionary <int, WordList> word_lists = WordList.ReadFromFile(filename);
                            foreach (var pair in word_lists)
                            {
                                texts[pair.Key] = new TypedWeakReference <WordList>(pair.Value);
                            }

                            TypedWeakReference <WordList> word_list_weak;
                            texts.TryGetValue(page, out word_list_weak);
                            if (null != word_list_weak)
                            {
                                WordList word_list = word_list_weak.TypedTarget;
                                if (null != word_list)
                                {
                                    return(word_list);
                                }
                            }
                        }
                    }
                    catch (Exception ex)
                    {
                        Logging.Warn(ex, "There was an error loading the OCR text group for {0} page {1}.", document_fingerprint, page);
                        FileTools.Delete(filename);
                    }
                }
            }

            // If we get this far then the text was not available so queue extraction
            if (queue_for_ocr)
            {
                // If we have never tried the GROUP version before, queue for it
                string filename          = pdf_render_file_layer.MakeFilename_TextGroup(page);
                PDFTextExtractor.Job job = new PDFTextExtractor.Job(this, page);

                if (!File.Exists(filename) && PDFTextExtractor.Instance.JobGroupHasNotFailedBefore(job))
                {
                    PDFTextExtractor.Instance.QueueJobGroup(job);
                }
                else
                {
                    PDFTextExtractor.Instance.QueueJobSingle(job);
                }
            }

            return(null);
        }