示例#1
0
        private static void ThreadOCRMainEntry(object arg)
        {
            string     fname = "???";
            List <int> pgnums;
            int        pgnum = 0;

            try
            {
                lock (global_vars_access_lock)
                {
                    fname  = pdf_filename;
                    pgnums = page_numbers;
                }

                Dictionary <int, WordList> word_lists         = new Dictionary <int, WordList>();
                Dictionary <int, bool>     page_ocr_successes = new Dictionary <int, bool>();

                foreach (var p in pgnums)
                {
                    pgnum = p;

                    WordList word_list = DoOCR(fname, pgnum);

                    Logging.Info("We have an OCR word list of length {0} for page {1}", word_list?.Count, pgnum);

                    // Check that we have something to write
                    if (null != word_list && word_list.Count > 0)
                    {
                        word_lists[pgnum]         = word_list;
                        page_ocr_successes[pgnum] = true;
                    }
                    else
                    {
                        // FAKE a word list to shut up Qiqqa for the time being!
                        word_lists[pgnum]         = FakeEngine.ConvertToWordList(); // new WordList();
                        page_ocr_successes[pgnum] = false;
                    }
                }

                // Check that we have something to write
                Logging.Info("Writing OCR to file {0}", ocr_output_filename);
                WordList.WriteToFile(ocr_output_filename, word_lists, page_ocr_successes[pgnums[0]] ? "OCR" : "OCR-Failed");
            }
            catch (Exception ex)
            {
                Logging.Error(ex, "Problem while doing OCR for file {0} @ page {1}", fname, pgnum);

                lock (global_vars_access_lock)
                {
                    exception_ocr = ex;
                }
            }
            finally
            {
                lock (global_vars_access_lock)
                {
                    has_exited_ocr = true;
                }
            }
        }
示例#2
0
        /// <summary>
        /// Arguments are:
        ///
        /// 1) mode: GROUP
        /// 2) pdf_filename
        /// 3) page number(s) - comma separated
        /// 4) ocr_output_filename - where the extracted word list info is stored
        /// 5) pdf_user_password - encrypted
        /// 6) language - (unused)
        ///
        /// or:
        ///
        /// 1) mode: SINGLE
        /// 2) pdf_filename
        /// 3) page number - only one page per run
        /// 4) ocr_output_filename - where the extracted word list info is stored
        /// 5) pdf_user_password - encrypted
        /// 6) language - default is 'eng'
        ///
        /// or:
        ///
        /// 1) mode: SINGLE-FAKE
        /// 2) pdf_filename
        /// 3) page number - only one page per run
        /// 4) ocr_output_filename - where the extracted word list info is stored
        ///
        /// 7) NOKILL (optional)
        /// </summary>
        /// <param name="args"></param>
        /// <returns></returns>
        private static int Main(string[] args)
        {
            // This is used to return any errors to the OS
            int  exit_code = 0;
            bool no_kill   = (args.Length > 6 && 0 == args[6].ToUpper().CompareTo("NOKILL"));

            try
            {
                Thread.CurrentThread.Name = "Main";

                // Check that we were given the right number of parameters
                if (args.Length < 1)
                {
                    throw new Exception("Not enough command line arguments");
                }

                string mode_switch = args[0];
                switch (mode_switch)
                {
                case "GROUP":
                    TextExtractEngine.MainEntry(args, no_kill);
                    break;

                case "SINGLE":
                    OCREngine.MainEntry(args, no_kill);
                    break;

                case "SINGLE-FAKE":
                    FakeEngine.MainEntry(args, no_kill);
                    break;

                default:
                    throw new Exception("Unknown mode switch: " + mode_switch);
                }
            }
            catch (Exception ex)
            {
                StringBuilder sb = new StringBuilder();
                sb.AppendLine("--- Parameters ---");
                foreach (string arg in args)
                {
                    sb.Append(arg);
                    sb.Append(" ");
                }
                sb.AppendLine();

                sb.AppendLine("--- Exception ---");
                sb.AppendLine(ex.ToString());

                Logging.Error("There was an error in QiqqaOCR:\n{0}", sb.ToString());
                exit_code = -1;
            }

            // Check if we should exit
            if (no_kill)
            {
                Logging.Error("PAUSED");
                Console.ReadKey();
            }

            // This must be the last line the application executes, EVAR!
            Logging.ShutDown();

            return(exit_code);
        }