private static void Main() { Logging.Info("+static Main()"); try { splashscreen_window.UpdateMessage("Logging in"); LoginWindow login_window = new LoginWindow(); login_window.ChooseLogin(splashscreen_window); splashscreen_window.Close(); WPFDoEvents.WaitForUIThreadActivityDone(); try { application.Run(); } catch (Exception ex) { Logging.Error(ex, "Exception caught at Main() application.Run(). Disaster."); } SignalShutdown(); } catch (Exception ex) { Logging.Error(ex, "Exception caught at Main(). Disaster."); } Logging.Info("-static Main()"); }
public static PivotResult GeneratePivot(MultiMapSet <string, string> map_y_axis, MultiMapSet <string, string> map_x_axis) { List <string> y_keys = new List <string>(map_y_axis.Keys); List <string> x_keys = new List <string>(map_x_axis.Keys); y_keys.Sort(); x_keys.Sort(); List <string>[,] common_fingerprints = new List <string> [y_keys.Count, x_keys.Count]; StatusManager.Instance.ClearCancelled("LibraryPivot"); int y_progress = 0; Parallel.For(0, y_keys.Count, (y, loop_state) => //for (int y = 0; y < y_keys.Count; ++y) { int y_progress_locked = Interlocked.Increment(ref y_progress); if (General.HasPercentageJustTicked(y_progress_locked, y_keys.Count)) { StatusManager.Instance.UpdateStatusBusy("LibraryPivot", "Building library pivot", y_progress_locked, y_keys.Count, true); WPFDoEvents.WaitForUIThreadActivityDone(); // HackityHack if (StatusManager.Instance.IsCancelled("LibraryPivot")) { Logging.Warn("User cancelled library pivot generation"); loop_state.Break(); } } string y_key = y_keys[y]; HashSet <string> y_values = map_y_axis.Get(y_key); for (int x = 0; x < x_keys.Count; ++x) { string x_key = x_keys[x]; HashSet <string> x_values = map_x_axis.Get(x_key); var common_fingerprint = y_values.Intersect(x_values); if (common_fingerprint.Any()) { common_fingerprints[y, x] = new List <string>(common_fingerprint); } } }); StatusManager.Instance.UpdateStatus("LibraryPivot", "Built library pivot"); PivotResult pivot_result = new PivotResult(); pivot_result.y_keys = y_keys; pivot_result.x_keys = x_keys; pivot_result.common_fingerprints = common_fingerprints; return(pivot_result); }
public override DocumentPage GetPage(int page_zero_based) { // Hackity hack WPFDoEvents.WaitForUIThreadActivityDone(); last_document_page?.Dispose(); last_document_page = null; int page = page_from + page_zero_based; StatusManager.Instance.UpdateStatus("PDFPrinter", String.Format("Printing page {0} of {1}", page_zero_based + 1, PageCount), page_zero_based + 1, PageCount, true); // Render a page at 300 DPI... using (MemoryStream ms = new MemoryStream(pdf_renderer.GetPageByDPIAsImage(page, 300))) { using (Image image = Image.FromStream(ms)) { PDFOverlayRenderer.RenderAnnotations(image, pdf_document, page, null); PDFOverlayRenderer.RenderHighlights(image, pdf_document, page); PDFOverlayRenderer.RenderInks(image, pdf_document, page); BitmapSource image_page = BitmapImageTools.CreateBitmapSourceFromImage(image); image_page.Freeze(); DrawingVisual dv = new DrawingVisual(); using (DrawingContext dc = dv.RenderOpen()) { // Rotate the image if its orientation does not match the printer if ( page_size.Width < page_size.Height && image_page.Width > image_page.Height || page_size.Width > page_size.Height && image_page.Width < image_page.Height ) { image_page = new TransformedBitmap(image_page, new RotateTransform(90)); image_page.Freeze(); } dc.DrawImage(image_page, new Rect(0, 0, page_size.Width, page_size.Height)); } ++total_pages_printed; last_document_page = new DocumentPage(dv); return(last_document_page); } } }
private void Regenerate() { HashSet <string> parent_fingerprints = null; if (null != PDFDocuments && 0 < PDFDocuments.Count) { parent_fingerprints = new HashSet <string>(); foreach (var pdf_document in PDFDocuments) { parent_fingerprints.Add(pdf_document.Fingerprint); } } MultiMapSet <string, string> map_y_axis = LibraryPivotReportBuilder.GenerateAxisMap((string)ObjYAxis.SelectedItem, Library, parent_fingerprints); MultiMapSet <string, string> map_x_axis = LibraryPivotReportBuilder.GenerateAxisMap((string)ObjXAxis.SelectedItem, Library, parent_fingerprints); LibraryPivotReportBuilder.IdentifierImplementations.IdentifierImplementationDelegate identifier_implementation = LibraryPivotReportBuilder.IdentifierImplementations.GetIdentifierImplementation((string)ObjIdentifier.SelectedItem); LibraryPivotReportBuilder.PivotResult pivot_result = LibraryPivotReportBuilder.GeneratePivot(map_y_axis, map_x_axis); GridControl ObjGridControl = new GridControl(); ObjGridControlHolder.Content = ObjGridControl; ObjGridControl.Model.RowCount = map_y_axis.Count + 2; ObjGridControl.Model.ColumnCount = map_x_axis.Count + 2; // ROW/COLUMN Titles for (int y = 0; y < pivot_result.y_keys.Count; ++y) { ObjGridControl.Model[y + 1, 0].CellValue = pivot_result.y_keys[y]; ObjGridControl.Model[y + 1, 0].CellValueType = typeof(string); } for (int x = 0; x < pivot_result.x_keys.Count; ++x) { ObjGridControl.Model[0, x + 1].CellValue = pivot_result.x_keys[x]; ObjGridControl.Model[0, x + 1].CellValueType = typeof(string); } // Grid contents StatusManager.Instance.ClearCancelled("LibraryPivot"); for (int y = 0; y < pivot_result.y_keys.Count; ++y) { if (General.HasPercentageJustTicked(y, pivot_result.y_keys.Count)) { StatusManager.Instance.UpdateStatusBusy("LibraryPivot", "Building library pivot grid", y, pivot_result.y_keys.Count, true); WPFDoEvents.WaitForUIThreadActivityDone(); // HackityHack if (StatusManager.Instance.IsCancelled("LibraryPivot")) { Logging.Warn("User cancelled library pivot grid generation"); break; } } for (int x = 0; x < pivot_result.x_keys.Count; ++x) { identifier_implementation(Library, pivot_result.common_fingerprints[y, x], ObjGridControl.Model[y + 1, x + 1]); } } StatusManager.Instance.UpdateStatus("LibraryPivot", "Finished library pivot"); // ROW/COLUMN Totals { int y_total = 0; { for (int y = 0; y < pivot_result.y_keys.Count; ++y) { int total = 0; for (int x = 0; x < pivot_result.x_keys.Count; ++x) { if (null != pivot_result.common_fingerprints[y, x]) { total += pivot_result.common_fingerprints[y, x].Count; } } ObjGridControl.Model[y + 1, pivot_result.x_keys.Count + 1].CellValue = total; ObjGridControl.Model[y + 1, pivot_result.x_keys.Count + 1].CellValueType = typeof(int); y_total += total; } } int x_total = 0; { for (int x = 0; x < pivot_result.x_keys.Count; ++x) { int total = 0; for (int y = 0; y < pivot_result.y_keys.Count; ++y) { if (null != pivot_result.common_fingerprints[y, x]) { total += pivot_result.common_fingerprints[y, x].Count; } } ObjGridControl.Model[pivot_result.y_keys.Count + 1, x + 1].CellValue = total; ObjGridControl.Model[pivot_result.y_keys.Count + 1, x + 1].CellValueType = typeof(int); x_total += total; } } int common_total = (x_total + y_total) / 2; if (common_total != x_total || common_total != y_total) { throw new GenericException("X and Y totals do not match?!"); } ObjGridControl.Model[pivot_result.y_keys.Count + 1, pivot_result.x_keys.Count + 1].CellValue = common_total; ObjGridControl.Model[pivot_result.y_keys.Count + 1, pivot_result.x_keys.Count + 1].CellValueType = typeof(int); ObjGridControl.Model[0, pivot_result.x_keys.Count + 1].CellValue = "TOTAL"; ObjGridControl.Model[0, pivot_result.x_keys.Count + 1].CellValueType = typeof(string); ObjGridControl.Model[pivot_result.y_keys.Count + 1, 0].CellValue = "TOTAL"; ObjGridControl.Model[pivot_result.y_keys.Count + 1, 0].CellValueType = typeof(string); } // Store the results for the toolbar buttons last_pivot_result = pivot_result; last_ObjGridControl = ObjGridControl; }
private void Shutdown() { Logging.Info("Stopping PDFTextExtractor threads"); StillRunning = false; int job_queue_group_count; int job_queue_single_count; GetJobCounts(out job_queue_group_count, out job_queue_single_count); Logging.Debug特("PDFTextExtractor::Shutdown: flushing the queue ({0} + {1} items discarded)", job_queue_group_count, job_queue_single_count); FlushAllJobs(); SafeThreadPool.QueueUserWorkItem(o => { Logging.Info("+Stopping PDFTextExtractor threads (async)"); bool[] done = new bool[NUM_OCR_THREADS]; Stopwatch clk = Stopwatch.StartNew(); while (true) { int cnt = 0; for (int i = 0; i < NUM_OCR_THREADS; ++i) { if (!done[i]) { cnt++; if (threads[i].Join(150)) { done[i] = true; threads[i] = null; cnt--; } } } Logging.Info("Stopping PDFTextExtractor threads (async): {0} threads are pending.", cnt); if (cnt == 0) { break; } // abort the threads if they're taking way too long: if (clk.ElapsedMilliseconds >= Constants.MAX_WAIT_TIME_MS_AT_PROGRAM_SHUTDOWN) { for (int i = 0; i < NUM_OCR_THREADS; ++i) { if (!done[i]) { Logging.Error("Stopping PDFTextExtractor threads (async): timeout ({1} sec), hence ABORTing PDF/OCR thread {0}.", i, Constants.MAX_WAIT_TIME_MS_AT_PROGRAM_SHUTDOWN / 1000); threads[i].Abort(); } } } WPFDoEvents.WaitForUIThreadActivityDone(); } Logging.Info("-Stopping PDFTextExtractor threads (async) --> all done!"); }); Logging.Info("Stopped PDFTextExtractor"); }
private void ThreadEntry(object obj) { Daemon daemon = (Daemon)obj; bool did_some_ocr_since_last_iteration = false; while (true) { if (Utilities.Shutdownable.ShutdownableManager.Instance.IsShuttingDown || !StillRunning) { int job_queue_group_count; int job_queue_single_count; GetJobCounts(out job_queue_group_count, out job_queue_single_count); Logging.Debug特("PDFTextExtractor: shutting down and flushing the queue ({0} + {1} items discarded)", job_queue_group_count, job_queue_single_count); FlushAllJobs(); break; } // If this library is busy, skip it for now if (Library.IsBusyAddingPDFs || Library.IsBusyRegeneratingTags) { // Get a count of how many jobs are left... int job_queue_group_count; int job_queue_single_count; GetJobCounts(out job_queue_group_count, out job_queue_single_count); int job_queue_total_count = job_queue_group_count + job_queue_single_count; if (0 < job_queue_group_count || 0 < job_queue_single_count) { did_some_ocr_since_last_iteration = true; StatusManager.Instance.UpdateStatus("PDFOCR", "OCR paused while adding documents."); ocr_working_next_notification_time.Stop(); } daemon.Sleep(2000); continue; } if (ConfigurationManager.Instance.ConfigurationRecord.DisableAllBackgroundTasks) { Logging.Debug特("OCR/Textify daemons are forced to sleep via Configuration::DisableAllBackgroundTasks"); daemon.Sleep(1000); continue; } using (NextJob next_job = GetNextJob()) { if (null != next_job) { did_some_ocr_since_last_iteration = true; Logging.Debug("Doing OCR for job '{0}'", next_job.job); long clk_duration; { Stopwatch clk = Stopwatch.StartNew(); // Relinquish control to the UI thread to make sure responsiveness remains tolerable at 100% CPU load. WPFDoEvents.WaitForUIThreadActivityDone(); clk_duration = clk.ElapsedMilliseconds; } // The call above can take quite a while to complete, so check all abort/delay checks once again, just in case...: if (false || Utilities.Shutdownable.ShutdownableManager.Instance.IsShuttingDown || !StillRunning || clk_duration > 100 || Library.IsBusyAddingPDFs || Library.IsBusyRegeneratingTags || ConfigurationManager.Instance.ConfigurationRecord.DisableAllBackgroundTasks ) { Logging.Warn("Recheck job queue after WaitForUIThreadActivityDone took {0}ms or shutdown/delay signals were detected: {1}/{2}/{3}/{4}/{5}.", clk_duration, (Utilities.Shutdownable.ShutdownableManager.Instance.IsShuttingDown || !StillRunning) ? "+Shutdown+" : "-SD-", clk_duration > 100 ? "+UI-wait+" : "-UI-", Library.IsBusyAddingPDFs ? "+PDFAddPending+" : "-PDF-", ConfigurationManager.Instance.ConfigurationRecord.DisableAllBackgroundTasks ? "+DisableBackgroundTasks+" : "-DB-", Library.IsBusyRegeneratingTags ? "+LibRegenerate+" : "-Regen-" ); // push the job onto the queue and start from the beginning: if (next_job.is_group) { QueueJobGroup(next_job.job); } else { QueueJobSingle(next_job.job); } continue; } else { // Get a count of how many jobs are left... int job_queue_group_count; int job_queue_single_count; GetJobCounts(out job_queue_group_count, out job_queue_single_count); // nitpick: we'll be one off in the counts as we have the current job as well, but I'm fine with an incidental 0/0/99% report. int job_queue_total_count = job_queue_group_count + job_queue_single_count + 1; // Do not flood the status update system when we zip through the work queue very fast: only update the counts every second or so, // but be sure to be the first to update the counts after work has been (temporarily) stopped: if (!ocr_working_next_notification_time.IsRunning || ocr_working_next_notification_time.ElapsedMilliseconds >= 1000) { StatusManager.Instance.UpdateStatus("PDFOCR", String.Format("{0} page(s) to textify and {1} page(s) to OCR.", job_queue_group_count, job_queue_single_count), 1, job_queue_total_count); } ocr_working_next_notification_time.Restart(); } // If the text has somehow appeared before we get to process it (perhaps two requests for the same job) if (!next_job.job.force_job && null != next_job.job.pdf_renderer.GetOCRText(next_job.job.page, queue_for_ocr: false)) { if (next_job.is_group) { Logging.Info("{1} Job '{0}' is redundant as text exists", next_job.job, "GROUP"); } else { Logging.Warn("{1} Job '{0}' is redundant as text exists", next_job.job, "SINGLE"); } continue; } // Make sure the temp directory exists and has not been deleted by some cleanup tool while Qiqqa is still running: if (!Main.TempDirectoryCreator.CreateDirectoryIfNonExistent()) { Logging.Error(@"Qiqqa needs the directory {0} to exist for it to function properly. The directory was re-created as apparently some overzealous external cleanup routine/application has removed it while Qiqqa is still running.", TempFile.TempDirectoryForQiqqa); } string temp_ocr_result_filename = TempFile.GenerateTempFilename("txt"); try { if (next_job.is_group) { ProcessNextJob_Group(next_job, temp_ocr_result_filename); } else { ProcessNextJob_Single(next_job, temp_ocr_result_filename); } } catch (Exception ex) { Logging.Error(ex, "There was a problem processing job {0}", next_job.job); } finally { try { // (it's okay to try to delete the tempfiles when we're terminating; the rest of the job has been skipped) File.Delete(temp_ocr_result_filename); } catch (Exception ex) { Logging.Error(ex, "There was a problem deleting the temporary OCR file {0}", temp_ocr_result_filename); } } } else { if (did_some_ocr_since_last_iteration) { did_some_ocr_since_last_iteration = false; StatusManager.Instance.ClearStatus("PDFOCR"); ocr_working_next_notification_time.Stop(); } daemon.Sleep(500); } } } }
// // Summary: // Represents the method that defines a set of criteria and determines whether the // specified object meets those criteria. // // Parameters: // obj: // The object to compare against the criteria defined within the method represented // by this delegate. // // Type parameters: // T: // The type of the object to compare. // // Returns: // true if obj meets the criteria defined within the method represented by this // delegate; otherwise, false. internal bool DecideIfIncludeDuringDirScan(FileSystemEntryInfo obj) { bool isRegularFile = !(obj.IsDevice || obj.IsDirectory || obj.IsMountPoint || /* obj.IsReparsePoint (hardlink!) || */ obj.IsOffline || obj.IsSystem || obj.IsTemporary); Logging.Debug("FolderWatcher: testing {1} '{0}' for inclusion in the Qiqqa library.", obj.FullPath, isRegularFile ? "regular File" : obj.IsDirectory ? "directory" : "node"); if (Utilities.Shutdownable.ShutdownableManager.Instance.IsShuttingDown) { Logging.Info("FolderWatcher: Breaking out of inner processing loop due to daemon termination"); throw new OperationCanceledException("FolderWatcher: Breaking out of inner processing loop due to daemon termination"); } if (Qiqqa.Common.Configuration.ConfigurationManager.Instance.ConfigurationRecord.DisableAllBackgroundTasks) { Logging.Info("FolderWatcher: Breaking out of inner processing loop due to DisableAllBackgroundTasks"); throw new OperationCanceledException("FolderWatcher: Breaking out of inner processing loop due to DisableAllBackgroundTasks"); } if (library?.TypedTarget == null || folder_watcher_manager?.TypedTarget == null) { Logging.Info("FolderWatcher: Breaking out of inner processing loop due to disposed library and/or watch manager"); throw new OperationCanceledException("FolderWatcher: Breaking out of inner processing loop due to disposed library and/or watch manager"); } bool have_we_slept = false; if (watch_stats.index_processing_clock.ElapsedMilliseconds > MAX_SECONDS_PER_ITERATION) { Logging.Info("FolderWatcher: Taking a nap due to MAX_SECONDS_PER_ITERATION: {0} seconds consumed, {1} threads pending", watch_stats.index_processing_clock.ElapsedMilliseconds / 1E3, SafeThreadPool.QueuedThreadCount); // Collect various 'pending' counts to help produce a stretched sleep/delay period // in order to allow the other background tasks to keep up with the PDF series being // fed into them by this task. int thr_cnt = Math.Max(0, SafeThreadPool.QueuedThreadCount - 2); int queued_cnt = Qiqqa.Documents.Common.DocumentQueuedStorer.Instance.PendingQueueCount; Qiqqa.Documents.PDF.PDFRendering.PDFTextExtractor.Instance.GetJobCounts(out var textify_count, out var ocr_count); int duration = 1 * 1000 + thr_cnt * 250 + queued_cnt * 20 + textify_count * 50 + ocr_count * 500; watch_stats.daemon.Sleep(Math.Min(60 * 1000, duration)); // Relinquish control to the UI thread to make sure responsiveness remains tolerable at 100% CPU load. WPFDoEvents.WaitForUIThreadActivityDone(); // reset: watch_stats.index_processing_clock.Restart(); have_we_slept = true; } // only include *.pdf files. Use a `while` loop to allow easy `break` statements to abort the inclusion filter logic below: while (isRegularFile && obj.Extension.ToLower() == ".pdf") { // check if the given file isn't already present in the library: watch_stats.scanned_file_count++; // If we already have this file in the "cache since we started", skip it if (folder_watcher_manager.TypedTarget.HaveProcessedFile(obj.FullPath)) { Logging.Debug("FolderWatcher is skipping {0} as it has already been processed", obj.FullPath); watch_stats.skipped_file_count++; break; } if (have_we_slept) { // As we have slept a while, it's quite unsure whether that file still exists. // Include it only when it still exists and otherwise be sure to retrigger a scan to follow up // any other directory changes. if (!File.Exists(obj.FullPath)) { Logging.Info("FolderWatcher is skipping {0} as it has disappeared while we were sleeping", obj.FullPath); FolderContentsHaveChanged = true; break; } } // ignore zero-length and tiny sized files as those sure are buggy/illegal PDFs: // // https://stackoverflow.com/questions/17279712/what-is-the-smallest-possible-valid-pdf if (obj.FileSize <= 66) { Logging.Warn("FolderWatcher is skipping {0} as it is too small to be a valid PDF file @ {1} bytes", obj.FullPath, obj.FileSize); break; } // Check that the file is not still locked - if it is, mark that the folder is still "changed" and come back later. // // We do this at the same time as calculating the file fingerprint as both actions require (costly) File I/O // and can be folded together: if the fingerprint fails, that's 99.9% sure a failure in the File I/O, hence // a locked or otherwise inaccessible file. string fingerprint; try { fingerprint = StreamFingerprint.FromFile(obj.FullPath); } catch (Exception ex) { Logging.Error(ex, "Watched folder contains file '{0}' which is locked, so coming back later...", obj.FullPath); FolderContentsHaveChanged = true; break; } // check if the PDF is already known: PDFDocument doc = Library.GetDocumentByFingerprint(fingerprint); if (doc != null) { // Add this file to the list of processed files... Logging.Info("FolderWatcher is skipping {0} as it already exists in the library as fingerprint {1}, title: {2}", obj.FullPath, fingerprint, doc.TitleCombined); folder_watcher_manager.TypedTarget.RememberProcessedFile(obj.FullPath); watch_stats.skipped_file_count++; break; } if (watch_stats.file_hashes_added.TryGetValue(fingerprint, out var dupe_file_path)) { Logging.Info("FolderWatcher is skipping {0} as it has already been included in the import set as file {1} which has the same fingerprint {2}", obj.FullPath, dupe_file_path, fingerprint); watch_stats.skipped_file_count++; break; } watch_stats.file_hashes_added.Add(fingerprint, obj.FullPath); return(true); } return(false); }
private void FlushDocuments(bool force_flush_no_matter_what) { // use a lock to ensure the time-delayed flush doesn't ever collide with the // end-of-execution-run flush initiated by ShutdownableManager. ForcedFlushRequested = force_flush_no_matter_what; if (!force_flush_no_matter_what) { WPFDoEvents.AssertThisCodeIs_NOT_RunningInTheUIThread(); } int done_count_for_status = 0; while (true) { int count_to_go = PendingQueueCount; int todo_count_for_status = done_count_for_status + count_to_go; if (0 < count_to_go) { StatusManager.Instance.UpdateStatus("DocumentQueuedStorer", String.Format("{0}/{1} documents still to flush", count_to_go, todo_count_for_status), done_count_for_status, todo_count_for_status); } else { StatusManager.Instance.ClearStatus("DocumentQueuedStorer"); return; } if (!ForcedFlushRequested) { // No flushing while still adding... unless we're quitting the executable already. if (Library.IsBusyAddingPDFs) { return; } // Relinquish control to the UI thread to make sure responsiveness remains tolerable at 100% CPU load. WPFDoEvents.WaitForUIThreadActivityDone(); } PDFDocument pdf_document_to_flush = null; // grab one PDF to save/flush: // Utilities.LockPerfTimer l2_clk = Utilities.LockPerfChecker.Start(); lock (documents_to_store_lock) { // l2_clk.LockPerfTimerStop(); foreach (var pair in documents_to_store) { pdf_document_to_flush = pair.Value; documents_to_store.Remove(pair.Key); break; } } if (null != pdf_document_to_flush) { pdf_document_to_flush.SaveToMetaData(ForcedFlushRequested); done_count_for_status++; } } }