public void QueueJobGroup(Job job) { string token = NextJob.GetQueuedJobToken(job); //Utilities.LockPerfTimer l1_clk = Utilities.LockPerfChecker.Start(); lock (queue_lock) { //l1_clk.LockPerfTimerStop(); // Only add the job if it is not already queued, OR if we are queuing a FORCE job, which has priority if (!job_queue_group.ContainsKey(token) || job.force_job) { job_queue_group[token] = job; } } }
private void RecordThatJobHasCompleted(NextJob next_job) { string token = NextJob.GetCurrentJobToken(next_job.job, next_job.is_group); //Utilities.LockPerfTimer l1_clk = Utilities.LockPerfChecker.Start(); lock (queue_lock) { //l1_clk.LockPerfTimerStop(); HashSet <string> current_jobs = next_job.is_group ? current_jobs_group : current_jobs_single; if (!current_jobs.Contains(token)) { Logging.Error("Job is not running, so can't remove it: {0}", token); } current_jobs.Remove(token); } }
public JobMetadata ToJob(JsonSerializerSettings jsonSerializerSettings) { return(new JobMetadata { JobId = JobId, JobKey = JobKey, Status = Status, CountStarted = CountStarted, StartedExecuting = StartedExecuting, ExecutedMachine = ExecutedMachine, JobType = Type.GetType(JobType, true), JobParam = JobParam?.FromJson(Type.GetType(JobParamType), jsonSerializerSettings), StartAt = StartAt, NextJob = NextJob?.ToJob(jsonSerializerSettings), Cron = Cron, Delay = Delay, ObsoleteInterval = ObsoleteInterval }); }
private bool IsSimilarJobRunning(Job job, bool is_group, object queue_lock_REMINDER) { // Check if a similar group job is running if (current_jobs_group.Contains(NextJob.GetCurrentJobToken(job, true))) { return(true); } // If this is a single job, check if a similar single job is running if (!is_group) { if (current_jobs_single.Contains(NextJob.GetCurrentJobToken(job, false))) { return(true); } } // No similar job is running... return(false); }
public JobMetadata ToJob(JsonSerializerSettings jsonSerializerSettings) { return(new JobMetadata { JobId = JobId, JobKey = JobKey, Status = Status, CountStarted = CountStarted, StartedExecuting = StartedExecuting, ExecutedMachine = ExecutedMachine, JobType = Type.GetType(JobType, true), JobParam = JobParam?.FromJson(Type.GetType(JobParamType), jsonSerializerSettings), StartAt = StartAt, NextJob = NextJob?.ToJob(jsonSerializerSettings), Cron = Cron, Delay = Delay, ObsoleteInterval = ObsoleteInterval, RepeatStrategy = string.IsNullOrEmpty(RepeatStrategy) ? null : Type.GetType(RepeatStrategy, true), MaxRepeatCount = MaxRepeatCount }); }
public JobDb ToJobDb() { return(new JobDb { JobKey = JobKey, JobId = JobId, Status = Status, JobType = JobType, JobParamType = JobParamType, JobParam = JobParam, CountStarted = CountStarted, StartedExecuting = StartedExecuting, ExecutedMachine = ExecutedMachine, StartAt = StartAt, NextJob = NextJob?.ToJobDb(), Cron = Cron, Delay = Delay, ObsoleteInterval = ObsoleteInterval, RepeatStrategy = RepeatStrategy, MaxRepeatCount = MaxRepeatCount }); }
private void ThreadEntry() { bool did_some_ocr_since_last_iteration = false; while (true) { if (Utilities.Shutdownable.ShutdownableManager.Instance.IsShuttingDown || !StillRunning) { int job_queue_group_count; int job_queue_single_count; GetJobCounts(out job_queue_group_count, out job_queue_single_count); Logging.Debug特("PDFTextExtractor: shutting down and flushing the queue ({0} + {1} items discarded)", job_queue_group_count, job_queue_single_count); FlushAllJobs(); break; } // If this library is busy, skip it for now if (Library.IsBusyAddingPDFs) { // Get a count of how many jobs are left... int job_queue_group_count; int job_queue_single_count; GetJobCounts(out job_queue_group_count, out job_queue_single_count); int job_queue_total_count = job_queue_group_count + job_queue_single_count; if (0 < job_queue_group_count || 0 < job_queue_single_count) { did_some_ocr_since_last_iteration = true; StatusManager.Instance.UpdateStatus("PDFOCR", "OCR paused while adding documents."); } Thread.Sleep(1000); continue; } if (ConfigurationManager.Instance.ConfigurationRecord.DisableAllBackgroundTasks) { Logging.Debug特("OCR/Textify daemons are forced to sleep via Configuration::DisableAllBackgroundTasks"); Thread.Sleep(1000); continue; } using (NextJob next_job = GetNextJob()) { if (null != next_job) { did_some_ocr_since_last_iteration = true; Logging.Debug特("Doing OCR for job '{0}'", next_job.job); long clk_duration; { Stopwatch clk = new Stopwatch(); clk.Start(); // Relinquish control to the UI thread to make sure responsiveness remains tolerable at 100% CPU load. Utilities.GUI.WPFDoEvents.WaitForUIThreadActivityDone(); clk_duration = clk.ElapsedMilliseconds; } // The call above can take quite a while to complete, so check all abort/delay checks once again, just in case...: if (false || Utilities.Shutdownable.ShutdownableManager.Instance.IsShuttingDown || !StillRunning || clk_duration > 100 || Library.IsBusyAddingPDFs || ConfigurationManager.Instance.ConfigurationRecord.DisableAllBackgroundTasks ) { Logging.Warn("Recheck job queue after WaitForUIThreadActivityDone took {0}ms or shutdown/dealy signals were detected: {1}/{2}/{3}/{4}.", clk_duration, (Utilities.Shutdownable.ShutdownableManager.Instance.IsShuttingDown || !StillRunning) ? "+Shutdown+" : "-SD-", clk_duration > 100 ? "+UI-wait+" : "-UI-", Library.IsBusyAddingPDFs ? "+PDFAddPending+" : "-PDF-", ConfigurationManager.Instance.ConfigurationRecord.DisableAllBackgroundTasks ? "+DisableBackgroundTasks+" : "-DB-" ); // push the job onto the queue and start from the beginning: if (next_job.is_group) { QueueJobGroup(next_job.job); } else { QueueJobSingle(next_job.job); } continue; } else { // Get a count of how many jobs are left... int job_queue_group_count; int job_queue_single_count; GetJobCounts(out job_queue_group_count, out job_queue_single_count); // nitpick: we'll be one off in the counts as we have the current job as well, but I'm fine with an incidental 0/0/99% report. int job_queue_total_count = job_queue_group_count + job_queue_single_count + 1; StatusManager.Instance.UpdateStatus("PDFOCR", String.Format("{0} page(s) to textify and {1} page(s) to OCR.", job_queue_group_count, job_queue_single_count), 1, job_queue_total_count); } // If the text has somehow appeared before we get to process it (perhaps two requests for the same job) if (!next_job.job.force_job && null != next_job.job.pdf_renderer.GetOCRText(next_job.job.page, false)) { Logging.Info("Job '{0}' is redundant as text exists", next_job.job); continue; } string temp_ocr_result_filename = TempFile.GenerateTempFilename("txt"); try { if (next_job.is_group) { ProcessNextJob_Group(next_job, temp_ocr_result_filename); } else { ProcessNextJob_Single(next_job, temp_ocr_result_filename); } } catch (Exception ex) { Logging.Error(ex, "There was a problem processing job {0}", next_job.job); } finally { try { // (it's okay to try to delete the tempfiles when we're terminating; the rest of the job has been skipped) File.Delete(temp_ocr_result_filename); } catch (Exception ex) { Logging.Error(ex, "There was a problem deleting the temporary OCR file {0}", temp_ocr_result_filename); } } } else { if (did_some_ocr_since_last_iteration) { did_some_ocr_since_last_iteration = false; StatusManager.Instance.ClearStatus("PDFOCR"); } Thread.Sleep(500); } } } }
private void ProcessNextJob_Single(NextJob next_job, string temp_ocr_result_filename) { string ocr_parameters = "" + "SINGLE" + " " + '"' + next_job.job.pdf_renderer.DocumentPath + '"' + " " + next_job.job.page + " " + '"' + temp_ocr_result_filename + '"' + " " + '"' + ReversibleEncryption.Instance.EncryptString(next_job.job.pdf_renderer.PDFPassword) + '"' + " " + '"' + next_job.job.language + '"' ; OCRExecReport report; if (CheckOCRProcessSuccess(ocr_parameters, out report)) { next_job.job.pdf_renderer.StorePageTextSingle(next_job.job.page, temp_ocr_result_filename); } else { Logging.Error("Couldn't even perform OCR on the page, so giving up for {0}", next_job.job); // Before we go and 'fake it' to shut up Qiqqa and stop the repeated (and failing) OCR attempts, // we check if the previous error is not due to the edge condition where Qiqqa is terminating/aborting // to prevent index/OCR pollution. // // <handwave /> if (ShutdownableManager.Instance.IsShuttingDown) { Logging.Info("Breaking out of SINGLE Job processing for {0} due to application termination", next_job.job); return; } // TODO: Store an empty file so we don't queue forever... (but only if this is not due to the application terminating) if (failureMaybeDueToEncryptedPDF(report)) { // fake a word file to stop the OCR processes from recurring at later times: string fake_parameters = "" + "SINGLE-FAKE" + " " + '"' + next_job.job.pdf_renderer.DocumentPath + '"' + " " + next_job.job.page + " " + '"' + temp_ocr_result_filename + '"' ; if (!CheckOCRProcessSuccess(fake_parameters, out report)) { Logging.Error("SEVERE OCR PROBLEM: Couldn't even perform FAKE=DUMMY OCR on the page, so giving up for {0}:\n command: {1}\n result: {2}\n error log: {3}", next_job.job, report.OCRParameters, report.exitCode, report.OCRStdioOut); } } else { Logging.Error("SEVERE OCR PROBLEM: Single page OCR on page {0} resulted in an error which cannot be easily resolved. We will attempt a RETRY later for {1}:\n command: {2}\n result: {3}\n error log: {4}", next_job.job.page, next_job.job, report.OCRParameters, report.exitCode, report.OCRStdioOut); } } }
private void ProcessNextJob_Group(NextJob next_job, string temp_ocr_result_filename) { // Check that this PDF has not failed before if (JobGroupHasNotFailedBefore(next_job.job)) { // Build up the page numbers string string page_numbers_string; { int page_range_start = ((next_job.job.page - 1) / Job.TEXT_PAGES_PER_GROUP) * Job.TEXT_PAGES_PER_GROUP + 1; int page_range_end = page_range_start + Job.TEXT_PAGES_PER_GROUP - 1; page_range_end = Math.Min(page_range_end, next_job.job.pdf_renderer.PageCount); StringBuilder sb = new StringBuilder(); for (int page = page_range_start; page <= page_range_end; ++page) { sb.Append(page); sb.Append(','); } page_numbers_string = sb.ToString(); page_numbers_string = page_numbers_string.TrimEnd(','); } string ocr_parameters = "" + "GROUP" + " " + '"' + next_job.job.pdf_renderer.DocumentPath + '"' + " " + page_numbers_string + " " + '"' + temp_ocr_result_filename + '"' + " " + '"' + ReversibleEncryption.Instance.EncryptString(next_job.job.pdf_renderer.PDFPassword) + '"' + " " + '"' + next_job.job.language + '"' ; // https://stackoverflow.com/questions/2870544/c-sharp-4-0-optional-out-ref-arguments if (CheckOCRProcessSuccess(ocr_parameters, out _)) { next_job.job.pdf_renderer.StorePageTextGroup(next_job.job.page, Job.TEXT_PAGES_PER_GROUP, temp_ocr_result_filename); } else { // If the group fails, then we queue it up for single OCR attempts... string new_failed_group_token = NextJob.GetCurrentJobToken(next_job.job, next_job.is_group); //Utilities.LockPerfTimer l1_clk = Utilities.LockPerfChecker.Start(); lock (queue_lock) { //l1_clk.LockPerfTimerStop(); failed_pdf_group_tokens.Add(new_failed_group_token); } // ... and queue it up for single OCR attempts. QueueJobSingle(next_job.job); } } else { // Immediately queue previously failed GROUP attempts on this PDF file as SINGLE OCR attempts instead, // without even trying the GROUP mode again, for it will certainly fail the second/third/etc. // time around as well. QueueJobSingle(next_job.job); } }
private void ThreadEntry(object obj) { Daemon daemon = (Daemon)obj; bool did_some_ocr_since_last_iteration = false; while (true) { if (ShutdownableManager.Instance.IsShuttingDown || !StillRunning) { int job_queue_group_count; int job_queue_single_count; GetJobCounts(out job_queue_group_count, out job_queue_single_count); Logging.Debug特("PDFTextExtractor: shutting down and flushing the queue ({0} + {1} items discarded)", job_queue_group_count, job_queue_single_count); FlushAllJobs(); break; } // If this library is busy, skip it for now if (Library.IsBusyAddingPDFs || Library.IsBusyRegeneratingTags) { // Get a count of how many jobs are left... int job_queue_group_count; int job_queue_single_count; GetJobCounts(out job_queue_group_count, out job_queue_single_count); int job_queue_total_count = job_queue_group_count + job_queue_single_count; if (0 < job_queue_group_count || 0 < job_queue_single_count) { did_some_ocr_since_last_iteration = true; StatusManager.Instance.UpdateStatus("PDFOCR", "OCR paused while adding documents."); ocr_working_next_notification_time.Stop(); } daemon.Sleep(2000); continue; } if (ConfigurationManager.Instance.ConfigurationRecord.DisableAllBackgroundTasks) { Logging.Debug特("OCR/Textify daemons are forced to sleep via Configuration::DisableAllBackgroundTasks"); daemon.Sleep(1000); continue; } using (NextJob next_job = GetNextJob()) { if (null != next_job) { did_some_ocr_since_last_iteration = true; Logging.Debug("Doing OCR for job '{0}'", next_job.job); long clk_duration; { Stopwatch clk = Stopwatch.StartNew(); // Relinquish control to the UI thread to make sure responsiveness remains tolerable at 100% CPU load. WPFDoEvents.InvokeInUIThread(() => { // do nothing. }); clk_duration = clk.ElapsedMilliseconds; } // The call above can take quite a while to complete, so check all abort/delay checks once again, just in case...: bool aborting_or_busy_elsewhere = ( ShutdownableManager.Instance.IsShuttingDown || !StillRunning || Library.IsBusyAddingPDFs || Library.IsBusyRegeneratingTags || ConfigurationManager.Instance.ConfigurationRecord.DisableAllBackgroundTasks ); bool cpu_load_too_high_for_UI_responsiveness = (clk_duration > 300); bool dev_override = !ConfigurationManager.IsEnabled("TextExtraction"); if (aborting_or_busy_elsewhere || cpu_load_too_high_for_UI_responsiveness || dev_override) { Logging.Warn("Recheck job queue after WaitForUIThreadActivityDone took {0}ms or shutdown/delay signals were detected: {1}/{2}/{3}/{4}/{5}/{6}.", clk_duration, (ShutdownableManager.Instance.IsShuttingDown || !StillRunning) ? "+Shutdown+" : "-SD-", cpu_load_too_high_for_UI_responsiveness ? "+UI-wait+" : "-UI-", Library.IsBusyAddingPDFs ? "+PDFAddPending+" : "-PDF-", ConfigurationManager.Instance.ConfigurationRecord.DisableAllBackgroundTasks ? "+DisableBackgroundTasks+" : "-DB-", Library.IsBusyRegeneratingTags ? "+LibRegenerate+" : "-Regen-", dev_override ? "+DevAdvSettings+" : "-DevAdvCfg-" ); // push the job onto the queue and start from the beginning: if (next_job.is_group) { QueueJobGroup(next_job.job); } else { QueueJobSingle(next_job.job); } // reduce CPU load by snoozing for a bit. if ((cpu_load_too_high_for_UI_responsiveness || dev_override) && !ShutdownableManager.Instance.IsShuttingDown) { daemon.Sleep(1000); } continue; } else { // Get a count of how many jobs are left... int job_queue_group_count; int job_queue_single_count; GetJobCounts(out job_queue_group_count, out job_queue_single_count); // nitpick: we'll be one off in the counts as we have the current job as well, but I'm fine with an incidental 0/0/99% report. int job_queue_total_count = job_queue_group_count + job_queue_single_count + 1; // Do not flood the status update system when we zip through the work queue very fast: only update the counts every second or so, // but be sure to be the first to update the counts after work has been (temporarily) stopped: if (!ocr_working_next_notification_time.IsRunning || ocr_working_next_notification_time.ElapsedMilliseconds >= 1000) { StatusManager.Instance.UpdateStatus("PDFOCR", String.Format("{0} page(s) to textify and {1} page(s) to OCR.", job_queue_group_count, job_queue_single_count), 1, job_queue_total_count); } ocr_working_next_notification_time.Restart(); } // If the text has somehow appeared before we get to process it (perhaps two requests for the same job) if (!next_job.job.force_job && null != next_job.job.pdf_renderer.GetOCRText(next_job.job.page, queue_for_ocr: false)) { if (next_job.is_group) { Logging.Info("{1} Job '{0}' is redundant as text exists", next_job.job, "GROUP"); } else { Logging.Warn("{1} Job '{0}' is redundant as text exists", next_job.job, "SINGLE"); } continue; } // Make sure the temp directory exists and has not been deleted by some cleanup tool while Qiqqa is still running: if (!Main.TempDirectoryCreator.CreateDirectoryIfNonExistent()) { Logging.Error(@"Qiqqa needs the directory {0} to exist for it to function properly. The directory was re-created as apparently some overzealous external cleanup routine/application has removed it while Qiqqa is still running.", TempFile.TempDirectoryForQiqqa); } string temp_ocr_result_filename = TempFile.GenerateTempFilename("txt"); try { if (next_job.is_group) { ProcessNextJob_Group(next_job, temp_ocr_result_filename); } else { if (!ConfigurationManager.IsEnabled("RenderPDFPagesForOCR")) { Logging.Info($"Cannot OCR a single PDF page for PDF document {next_job.job.pdf_renderer.Fingerprint}, page {next_job.job.page} as PDF page image rendering has been disabled due to Developer Override setting { "RenderPDFPagesForOCR" }=false"); // re-queue until setting is changed (or have it pending indefinitely) QueueJobSingle(next_job.job); } else { ProcessNextJob_Single(next_job, temp_ocr_result_filename); } } } catch (Exception ex) { Logging.Error(ex, "There was a problem processing job {0}", next_job.job); } finally { try { // (it's okay to try to delete the tempfiles when we're terminating; the rest of the job has been skipped) File.Delete(temp_ocr_result_filename); } catch (Exception ex) { Logging.Error(ex, "There was a problem deleting the temporary OCR file {0}", temp_ocr_result_filename); } } } else { if (did_some_ocr_since_last_iteration) { did_some_ocr_since_last_iteration = false; StatusManager.Instance.ClearStatus("PDFOCR"); ocr_working_next_notification_time.Stop(); } daemon.Sleep(500); } } } }
private void ProcessNextJob_Group(NextJob next_job, string temp_ocr_result_filename) { // Check that this PDF has not failed before string check_failed_group_token = NextJob.GetCurrentJobToken(next_job.job, next_job.is_group, false); if (!failed_pdf_group_tokens.Contains(check_failed_group_token)) { // Build up the page numbers string string page_numbers_string; { int page_range_start = ((next_job.job.page - 1) / next_job.job.TEXT_PAGES_PER_GROUP) * next_job.job.TEXT_PAGES_PER_GROUP + 1; int page_range_end = page_range_start + next_job.job.TEXT_PAGES_PER_GROUP - 1; page_range_end = Math.Min(page_range_end, next_job.job.pdf_renderer.PageCount); StringBuilder sb = new StringBuilder(); for (int page = page_range_start; page <= page_range_end; ++page) { sb.Append(page); sb.Append(','); } page_numbers_string = sb.ToString(); page_numbers_string = page_numbers_string.TrimEnd(','); } string ocr_parameters = "" + "GROUP" + " " + '"' + next_job.job.pdf_renderer.PDFFilename + '"' + " " + page_numbers_string + " " + '"' + temp_ocr_result_filename + '"' + " " + '"' + ReversibleEncryption.Instance.EncryptString(next_job.job.pdf_renderer.PDFUserPassword) + '"' + " " + '"' + next_job.job.language + '"' ; int SECONDS_TO_WAIT = 60; // MAKE SURE THIS NUMBER IS LARGER THAN THE NUMBER IN THE ACTUAL QiqqaOCR so that QiqqaOCR has time to finish up...! if (CheckOCRProcessSuccess(ocr_parameters, SECONDS_TO_WAIT)) { next_job.job.pdf_renderer.StorePageTextGroup(next_job.job.page, next_job.job.TEXT_PAGES_PER_GROUP, temp_ocr_result_filename); } else { // If the group fails, then we queue it up for single OCR attempts... string new_failed_group_token = NextJob.GetCurrentJobToken(next_job.job, next_job.is_group, false); failed_pdf_group_tokens.Add(new_failed_group_token); // ... and queue it up for single OCR attempts. QueueJobSingle(next_job.job); } } else { // Queue previously failed attempts on this PDF file for single OCR attempts. QueueJobSingle(next_job.job); } }
void ThreadEntry() { bool did_some_ocr_since_last_iteration = false; while (still_running) { // Get a count of how many jobs are left... int job_queue_group_count; int job_queue_single_count; lock (queue_lock) { job_queue_group_count = job_queue_group.Count; job_queue_single_count = job_queue_single.Count; } int job_queue_total_count = job_queue_group_count + job_queue_single_count; if ((0 < job_queue_group_count || 0 < job_queue_single_count) && Library.IsBusyAddingPDFs) { StatusManager.Instance.UpdateStatus("PDFOCR", "OCR paused while adding documents."); Thread.Sleep(1000); continue; } using (NextJob next_job = GetNextJob()) { if (null != next_job) { did_some_ocr_since_last_iteration = true; Logging.Info("Doing OCR for job '{0}'", next_job.job); StatusManager.Instance.UpdateStatus("PDFOCR", String.Format("{0} page(s) to textify and {1} page(s) to OCR.", job_queue_group_count, job_queue_single_count), 1, job_queue_total_count); // If the text has somehow appeared before we get to process it (perhaps two requests for the same job) if (!next_job.job.force_job && null != next_job.job.pdf_renderer.GetOCRText(next_job.job.page, false)) { Logging.Info("Job '{0}' is redundant as text exists", next_job.job); continue; } string temp_ocr_result_filename = TempFile.GenerateTempFilename("txt"); try { if (next_job.is_group) { ProcessNextJob_Group(next_job, temp_ocr_result_filename); } else { ProcessNextJob_Single(next_job, temp_ocr_result_filename); } } catch (Exception ex) { Logging.Error(ex, "There was a problem processing job {0}", next_job.job); } finally { try { File.Delete(temp_ocr_result_filename); } catch (Exception ex) { Logging.Error(ex, "There was a problem deleting the temporary OCR file {0}", temp_ocr_result_filename); } } } else { if (did_some_ocr_since_last_iteration) { did_some_ocr_since_last_iteration = false; StatusManager.Instance.ClearStatus("PDFOCR"); } Thread.Sleep(500); } } } }