Пример #1
0
 private string Ocr(Pix target, out double confidence)
 {
     try
     {
         var text = "";
         using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default))
         {
             using (var page = engine.Process(target))
             {
                 text       = page.GetText();
                 confidence = page.GetMeanConfidence();
             }
             target.Dispose();
         }
         return(text);
     }
     catch (Exception e)
     {
         Trace.TraceError(e.ToString());
         Debug.WriteLine("Unexpected Error: " + e.Message);
         Debug.WriteLine("Details: ");
         Debug.WriteLine(e.ToString());
     }
     confidence = 0;
     return("");
 }
Пример #2
0
        public static OcrResult PopulateMatrixFromImage(Image image)
        {
            Pix           pix    = LoadBitmapToPix(image as Bitmap);
            List <string> Cells  = GetCellsFromPix(pix);
            var           root   = (int)Math.Floor(Math.Sqrt(Cells.Count));
            var           result = new OcrResult(root);

            for (int cell = 0, row = 0; cell < Cells.Count; cell++)
            {
                if (cell % root == 0 && cell != 0)
                {
                    row++;
                }

                if (row == root)
                {
                    break;
                }

                result.SetCell(row, (int)Math.Floor((decimal)cell % root), Cells[cell]);
            }
            image.Dispose();
            pix.Dispose();

            return(result);
        }
Пример #3
0
        public string OCRimage(Image ImageToUse, int Zoomlevel, out double TimeTaken, out float Confidence)
        {
            DateTime Starttime = DateTime.Now;

            //load in image
            LoadImage(ImageToUse);
            //post process the image
            ImageToOCR = AccordImageProcessing.AccordImageProc.ImageProcessing(ImageToOCR, Zoomlevel);
            //Convert to Tesseract format
            byte[] ImgByte = ToByteArray(ImageToOCR, System.Drawing.Imaging.ImageFormat.Bmp);

            Pix img = Pix.LoadFromMemory(ImgByte);

            // OCR it
            Page page = TesseractOCRCore.Process(img);
            //get test
            string text = page.GetText();

            //Get confidence
            Confidence = page.GetMeanConfidence();
            //Get Time
            DateTime EndTime = DateTime.Now;

            TimeTaken = (EndTime - Starttime).TotalSeconds;
            page.Dispose();
            img.Dispose();
            return(text);
        }
Пример #4
0
        public TesseractOCRResult OCRimage(Image ImageToUse, int Zoomlevel)
        {
            DateTime Starttime = DateTime.Now;

            //load in image
            LoadImage(ImageToUse);
            //post process the image
            ImageToOCR = AccordImageProcessing.AccordImageProc.ImageProcessing(ImageToOCR, Zoomlevel);
            //Convert to Tesseract format
            Pix img = PixConverter.ToPix(ImageToOCR);
            // OCR it
            Page page = TesseractOCRCore.Process(img);
            //get test
            string text = page.GetText();

            //Get confidence
            float Confidence = page.GetMeanConfidence();
            //Get Time
            DateTime EndTime   = DateTime.Now;
            double   TimeTaken = (EndTime - Starttime).TotalSeconds;

            page.Dispose();
            img.Dispose();
            return(new TesseractOCRResult(text, TimeTaken, Confidence));
        }
Пример #5
0
 /// <summary>
 /// Destroys
 /// <see cref="Tesseract.Pix"/>
 /// object.
 /// </summary>
 /// <param name="pix">
 ///
 /// <see cref="Tesseract.Pix"/>
 /// object to be destroyed
 /// </param>
 internal static void DestroyPix(Pix pix)
 {
     if (pix != null)
     {
         pix.Dispose();
     }
 }
Пример #6
0
        public static IRecognitionResult ReadFile(Pix img, TesseractEngine engine)
        {
            Stopwatch timer = new Stopwatch();

            timer.Start();

            try
            {
                using (Page page = engine.Process(img))
                {
                    string text     = page.GetText();
                    float  meanConf = page.GetMeanConfidence();
                    return(new SimpleRecognitionResult(text, meanConf));
                }
            }
            catch (Exception e)
            {
                Console.WriteLine(e);
                return(new ErrorResult(e));
            }
            finally
            {
                timer.Stop();
                Console.WriteLine("Processing time: " + timer.Elapsed);
                bool imageIsNotDisposed = img != null && !img.IsDisposed;
                Console.WriteLine("imageIsNotDisposed " + imageIsNotDisposed);
                if (imageIsNotDisposed)
                {
                    img.Dispose();
                }
            }
        }
Пример #7
0
        public List <string> ocrDetectString(string filename)
        {
            List <string> result = new List <string>();

            if (ocr == null)
            {
                return(result);
            }
            Pix pix = Pix.LoadFromFile(filename);

            if (pix == null)
            {
                return(result);
            }
            Page page      = ocr.Process(pix);
            Pix  processed = page.GetThresholdedImage();

            processed.Save("./adtemp/process.png");
            string text = page.GetText();

            using (var iter = page.GetIterator())
            {
                iter.Begin();
                do
                {
                    do
                    {
                        do
                        {
                            do
                            {
                                if (iter.IsAtBeginningOf(PageIteratorLevel.Block))
                                {
                                    Console.WriteLine("<BLOCK>");
                                }

                                Console.Write(iter.GetText(PageIteratorLevel.Word));
                                Console.Write(" ");

                                if (iter.IsAtFinalOf(PageIteratorLevel.TextLine, PageIteratorLevel.Word))
                                {
                                    Console.WriteLine();
                                }
                            } while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word));

                            if (iter.IsAtFinalOf(PageIteratorLevel.Para, PageIteratorLevel.TextLine))
                            {
                                Console.WriteLine();
                            }
                        } while (iter.Next(PageIteratorLevel.Para, PageIteratorLevel.TextLine));
                    } while (iter.Next(PageIteratorLevel.Block, PageIteratorLevel.Para));
                } while (iter.Next(PageIteratorLevel.Block));
            }
            Console.WriteLine(text);
            result.Add(text);
            page.Dispose();
            pix.Dispose();
            return(result);
        }
Пример #8
0
        string DoOCR(Pix pix)
        {
            gOcr.DefaultPageSegMode = PageSegMode.SingleBlock;
            Page   pg  = gOcr.Process(pix);
            String buf = pg.GetText();

            pix.Dispose();
            pg.Dispose();
            return(buf.Replace("ی", "ي").Replace("ه", "ە").Replace("\n", Environment.NewLine));
        }
Пример #9
0
        public static Pix ToPix(Bitmap img)
        {
            int pixDepth = GetPixDepth(img.PixelFormat);
            Pix pix      = Pix.Create(img.Width, img.Height, pixDepth);

            pix.XRes = (int)Math.Round(img.HorizontalResolution);
            pix.YRes = (int)Math.Round(img.VerticalResolution);
            BitmapData bitmapData = null;

            try
            {
                if ((img.PixelFormat & PixelFormat.Indexed) == PixelFormat.Indexed)
                {
                    CopyColormap(img, pix);
                }
                bitmapData = img.LockBits(new Rectangle(0, 0, img.Width, img.Height), ImageLockMode.ReadOnly, img.PixelFormat);
                PixData data = pix.GetData();
                if (bitmapData.PixelFormat == PixelFormat.Format32bppArgb)
                {
                    TransferDataFormat32bppArgb(bitmapData, data);
                }
                else if (bitmapData.PixelFormat == PixelFormat.Format32bppRgb)
                {
                    TransferDataFormat32bppRgb(bitmapData, data);
                }
                else if (bitmapData.PixelFormat == PixelFormat.Format24bppRgb)
                {
                    TransferDataFormat24bppRgb(bitmapData, data);
                }
                else if (bitmapData.PixelFormat == PixelFormat.Format8bppIndexed)
                {
                    TransferDataFormat8bppIndexed(bitmapData, data);
                }
                else if (bitmapData.PixelFormat == PixelFormat.Format1bppIndexed)
                {
                    TransferDataFormat1bppIndexed(bitmapData, data);
                }
                return(pix);
            }
            catch (Exception)
            {
                pix.Dispose();
                throw;
            }
            finally
            {
                if (bitmapData != null)
                {
                    img.UnlockBits(bitmapData);
                }
            }
        }
Пример #10
0
        private List <string> run_tessract()
        {
            List <string>        re        = new List <string>();
            string               dataPath  = "./tessdata/";
            string               language  = 1 != 1 ? "eng" : "jpn";
            string               inputFile = "./_tmp.bmp";
            OcrEngineMode        oem       = OcrEngineMode.DEFAULT;
            PageSegmentationMode psm       = PageSegmentationMode.AUTO_OSD;

            TessBaseAPI tessBaseAPI = new TessBaseAPI();

            // Initialize tesseract-ocr
            if (!tessBaseAPI.Init(dataPath, language, oem))
            {
                throw new Exception("Could not initialize tesseract.");
            }

            // Set the Page Segmentation mode
            tessBaseAPI.SetPageSegMode(psm);

            // Set the input image
            Pix pix = tessBaseAPI.SetImage(inputFile);

            tessBaseAPI.SetVariable("number", "1234567890");

            // Recognize image
            tessBaseAPI.Recognize();

            ResultIterator resultIterator = tessBaseAPI.GetIterator();

            // extract text from result iterator
            StringBuilder     stringBuilder     = new StringBuilder();
            PageIteratorLevel pageIteratorLevel = PageIteratorLevel.RIL_PARA;

            do
            {
                string str = resultIterator.GetUTF8Text(pageIteratorLevel);


                if (str != null)
                {
                    str = Regex.Replace(str, @"\n", "\r\n");
                    re.Add(str);
                }
            } while (resultIterator.Next(pageIteratorLevel));

            tessBaseAPI.Dispose();
            pix.Dispose();
            return(re);
        }
Пример #11
0
        /// <summary>
        /// Runs a basic OCR on an image.
        /// </summary>
        /// <param name="pixImage">The image.</param>
        /// <param name="dataPath">The directory holding the tesseract data.</param>
        /// <param name="language">The language to use during the OCR process.</param>
        /// <param name="disposeImage">If true, the image will be disposed after the OCR process is done.</param>
        /// <returns></returns>
        public static string GetText(this Pix pixImage,
                                     string dataPath   = "./tessdata",
                                     string language   = "eng",
                                     bool disposeImage = true)
        {
            using var engine  = new TesseractEngine(dataPath, language, EngineMode.Default);
            using var process = engine.Process(pixImage);
            var text = process.GetText();

            if (disposeImage)
            {
                pixImage.Dispose();
            }

            return(text);
        }
Пример #12
0
        /// <summary>
        /// Ensures data folder exists, and run an OCR.
        /// </summary>
        /// <param name="pixImage">The image.</param>
        /// <param name="dataPath">The directory holding the tesseract data.</param>
        /// <param name="language">The language to use during the OCR process.</param>
        /// <param name="disposeImage">If true, the image will be disposed after the OCR process is done.</param>
        /// <returns></returns>
        public static async Task <string> GetTextAndEnsureData(this Pix pixImage,
                                                               string dataPath   = "./tessdata",
                                                               string language   = "eng",
                                                               bool disposeImage = true)
        {
            var tessdataDownloader = new TessdataDownloader();
            await tessdataDownloader.EnsureDataFolder(language, dataFolder : dataPath);

            using var engine  = new TesseractEngine(dataPath, language, EngineMode.Default);
            using var process = engine.Process(pixImage);
            var text = process.GetText();

            if (disposeImage)
            {
                pixImage.Dispose();
            }

            return(text);
        }
Пример #13
0
        //Get word location
        public string OCRimagewithLocation(Image ImageToUse, int Zoomlevel)
        {
            //load in image
            LoadImage(ImageToUse);
            //post process the image
            ImageToOCR = AccordImageProcessing.AccordImageProc.ImageProcessing(ImageToOCR, Zoomlevel);
            //Convert to Tesseract format
            Pix img = PixConverter.ToPix(ImageToOCR);
            // OCR it
            Page page = TesseractOCRCore.Process(img);
            //get test

            string text = page.GetHOCRText(1);

            img.Dispose();
            page.Dispose();
            return(text);
            //return "";
        }
Пример #14
0
        static void example4()
        {
            string               dataPath  = "./tessdata/";
            string               language  = "eng";
            string               inputFile = "./input.png";
            OcrEngineMode        oem       = OcrEngineMode.DEFAULT;
            PageSegmentationMode psm       = PageSegmentationMode.AUTO_OSD;

            TessBaseAPI tessBaseAPI = new TessBaseAPI();

            // Initialize tesseract-ocr
            if (!tessBaseAPI.Init(dataPath, language, oem))
            {
                throw new Exception("Could not initialize tesseract.");
            }

            // Set the Page Segmentation mode
            tessBaseAPI.SetPageSegMode(psm);

            // Set the input image
            Pix pix = tessBaseAPI.SetImage(inputFile);

            // Recognize image
            tessBaseAPI.Recognize();

            //ensure input name is set
            tessBaseAPI.SetInputName(inputFile);

            var    fileInfo     = new System.IO.FileInfo(inputFile);
            string tessDataPath = tessBaseAPI.GetDatapath();
            string outputName   = fileInfo.FullName.Replace(fileInfo.Extension, string.Empty); //input name.pdf

            // call pdf renderer and export pdf
            using (var pdfRenderer = new PdfRenderer(outputName, tessDataPath, false))
            {
                pdfRenderer.BeginDocument("tesseract.net searchable Pdf generation");
                pdfRenderer.AddImage(tessBaseAPI);
                pdfRenderer.EndDocument();
            }

            tessBaseAPI.Dispose();
            pix.Dispose();
        }
Пример #15
0
        //Get word location
        public string OCRimagewithLocation(Image ImageToUse, int Zoomlevel)
        {
            //load in image
            LoadImage(ImageToUse);
            //post process the image
            ImageToOCR = AccordImageProcessing.AccordImageProc.ImageProcessing(ImageToOCR, Zoomlevel);
            //Convert to Tesseract format

            byte[] ImgByte = ToByteArray(ImageToOCR, System.Drawing.Imaging.ImageFormat.Bmp);

            Pix img = Pix.LoadFromMemory(ImgByte);
            // OCR it
            Page page = TesseractOCRCore.Process(img);
            //get test
            string text = page.GetHOCRText(1);

            img.Dispose();
            page.Dispose();
            return(text);
        }
Пример #16
0
        private void FillPagesData(ATAPY.Document.Data.Core.Document document, string pathToImage, string language)
        {
            var extension = Path.GetExtension(pathToImage);

            using (var engine = new TesseractEngine(ENGINE_DATAPATH, language))
            {
                if (IsSinglePageImage(extension))
                {
                    Pix pageData = null;
                    try
                    {
                        pageData = Pix.LoadFromFile(pathToImage);
                        FillDocumentPage(document, language, engine, pageData);
                    }
                    finally
                    {
                        pageData?.Dispose();
                    }
                }
                else if (IsMultiPageImage(extension))
                {
                    PixArray pixes = null;
                    try
                    {
                        pixes = PixArray.LoadMultiPageTiffFromFile(pathToImage);
                        foreach (Pix pageData in pixes)
                        {
                            FillDocumentPage(document, language, engine, pageData);
                        }
                    }
                    finally
                    {
                        pixes?.Dispose();
                    }
                }
                else
                {
                    throw new FormatException("Please specify path to the image file");
                }
            }
        }
Пример #17
0
        static void example3()
        {
            string dataPath = "./tessdata/";
            //string language = "eng";
            string               language  = "chi_sim";
            string               inputFile = "./input.png";
            OcrEngineMode        oem       = OcrEngineMode.DEFAULT;
            PageSegmentationMode psm       = PageSegmentationMode.AUTO_OSD;

            TessBaseAPI tessBaseAPI = new TessBaseAPI();

            // Initialize tesseract-ocr
            if (!tessBaseAPI.Init(dataPath, language, oem))
            {
                throw new Exception("Could not initialize tesseract.");
            }

            // Set the Page Segmentation mode
            tessBaseAPI.SetPageSegMode(psm);

            // Set the input image
            Pix pix = tessBaseAPI.SetImage(inputFile);

            // Recognize image
            tessBaseAPI.Recognize();

            ResultIterator resultIterator = tessBaseAPI.GetIterator();

            // extract text from result iterator
            StringBuilder     stringBuilder     = new StringBuilder();
            PageIteratorLevel pageIteratorLevel = PageIteratorLevel.RIL_PARA;

            do
            {
                stringBuilder.Append(resultIterator.GetUTF8Text(pageIteratorLevel));
            } while (resultIterator.Next(pageIteratorLevel));

            tessBaseAPI.Dispose();
            pix.Dispose();
        }
Пример #18
0
        static void Main(string[] args)
        {
            new DirectoryInfo(@"C:\Users\ACER\Desktop\Tool")
            .GetFiles("*.jpg", SearchOption.AllDirectories)
            .ToList()
            .ForEach(x => {
                Pix pixs = Pix.LoadFromFile(x.FullName);
                pixs     = pixs.PixBackgroundNormSimple();
                pixs     = pixs.PixConvertRGBToGray();
                pixs     = pixs.PixFindSkewAndDeskew();
                pixs     = pixs.PixTophat();
                pixs     = pixs.PixInvert();
                pixs     = pixs.PixGammaRTC();
                pixs     = pixs.PixThresholdToBinary();

                /*
                 * pixs = Pix.Create(LeptonicaNativeApi.Native.pixBackgroundNormSimple(pixs.Reference, IntPtr.Zero, IntPtr.Zero));
                 * pixs = Pix.Create(LeptonicaNativeApi.Native.pixConvertRGBToGray(pixs.Reference, 0.5f, 0.3f, 0.2f));
                 *
                 * float angle, confidence;
                 * pixs = Pix.Create(LeptonicaNativeApi.Native.pixFindSkewAndDeskew(pixs.Reference, 2, out angle, out confidence));
                 *
                 * if(confidence > 2 && confidence < 3) {
                 *  angle *= (float) Math.PI / 180.0f;
                 *  pixs = Pix.Create(LeptonicaNativeApi.Native.pixRotate(pixs.Reference, angle, RotationMethod.Shear, RotationFill.White, pixs.Width, pixs.Height));
                 * }
                 *
                 * pixs = Pix.Create(LeptonicaNativeApi.Native.pixTophat(pixs.Reference, 17, 17, L_TOPHAT.BLACK));
                 * LeptonicaNativeApi.Native.pixInvert(pixs.Reference, pixs.Reference);
                 * LeptonicaNativeApi.Native.pixGammaTRC(pixs.Reference, pixs.Reference, 1.0f, 170, 245);
                 *
                 * pixs = Pix.Create(LeptonicaNativeApi.Native.pixThresholdToBinary(pixs.Reference, 35));*/
                pixs.Save(x.ToNewExtension(".pdf").FullName, ImageSaveFormat.Lpdf);
                pixs.Dispose();
            });

            Console.WriteLine("Entre key to exit...");
            Console.ReadLine();
        }
Пример #19
0
        //public static void  clearFaceFlag() { faceFlag = 0; }
        public string RecognizeText(int id)
        {
            DBService dbs = new DBService();
            Document  doc = dbs.FindDocumentById(id);

            Image <Gray, Byte> img = scale(doc);

            //var image = PixConverter.ToPix(img.ToBitmap()))

            Tesseract.Native.DllImports.TesseractDirectory = System.Web.HttpContext.Current.Server.MapPath("~/Tesseract/bin/Debug/DLLS/");
            TessBaseAPI tessBaseAPI = new TessBaseAPI();

            System.Diagnostics.Debug.WriteLine("The current version is {0}", tessBaseAPI.GetVersion());

            string dataPath = System.Web.HttpContext.Current.Server.MapPath("~/tessdata/");
            string language = "eng";

            string        inputFile = doc.Path;
            OcrEngineMode oem       = OcrEngineMode.DEFAULT;
            //OcrEngineMode oem = OcrEngineMode.DEFAULT;
            PageSegmentationMode psm = PageSegmentationMode.AUTO_OSD;

            // Initialize tesseract-ocr
            if (!tessBaseAPI.Init(dataPath, language, oem))
            {
                throw new Exception("Could not initialize tesseract.");
            }

            // Set the Page Segmentation mode
            tessBaseAPI.SetPageSegMode(psm);

            // Set the input image
            Pix pix = tessBaseAPI.SetImage(inputFile);

            // Recognize image
            tessBaseAPI.Recognize();

            ResultIterator resultIterator = tessBaseAPI.GetIterator();

            // extract text from result iterator
            StringBuilder stringBuilder = new StringBuilder();
            int           top, bottom, left, right, i = 0;

            List <OCRText> forJson = new List <OCRText>();

            PageIteratorLevel pageIteratorLevel = PageIteratorLevel.RIL_TEXTLINE;

            do
            {
                string textContent = resultIterator.GetUTF8Text(pageIteratorLevel);
                resultIterator.BoundingBox(pageIteratorLevel, out left, out top, out right, out bottom);
                string coordsString = "" + left + "," + top + "," + right + "," + bottom;

                forJson.Add(new OCRText()
                {
                    Coords = coordsString, Text = textContent
                });
            } while (resultIterator.Next(pageIteratorLevel));

            tessBaseAPI.Dispose();
            pix.Dispose();

            var textForReturn = JsonConvert.SerializeObject(forJson);

            dbs.UpdateDocument(textForReturn, id);

            if (HasFace(img) == true)
            {
                FaceFlag = 1;
            }
            else
            {
                FaceFlag = 0;
            }

            return(textForReturn);
        }
Пример #20
0
        public new void KeyDown(KeyboardHookEventArgs e)
        {
            // Use Singleton design to store location information
            PartLocation currentLocation = PartLocation._instance;

            // Print Screen key was pressed
            if (e.Key == Keys.PrintScreen)
            {
                // Grab image from screen and convert to black and white using a threshold
                PrintScreenThreshold();

                // Process text from image
                try
                {
                    string               dataPath  = @"./tessdata";
                    string               language  = "eng";
                    string               inputFile = "rewards.jpg";
                    OcrEngineMode        oem       = OcrEngineMode.DEFAULT;
                    PageSegmentationMode psm       = PageSegmentationMode.SINGLE_LINE;

                    TessBaseAPI tessBaseAPI = new TessBaseAPI();

                    // Initialize tesseract-ocr
                    if (!tessBaseAPI.Init(dataPath, language, oem))
                    {
                        throw new Exception("Could not initialize tesseract");
                    }

                    // Set the Page Segmentation mode
                    tessBaseAPI.SetPageSegMode(psm);

                    // Warframe Relics are displayed using A-Z, a-z, and &
                    tessBaseAPI.SetVariable("tessedit_char_whitelist", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz&");

                    // Set input file
                    Pix pix = tessBaseAPI.SetImage(inputFile);

                    TB_Part1.Text = GetText(tessBaseAPI, currentLocation.PartLoc1, 580);
                    TB_Part2.Text = GetText(tessBaseAPI, currentLocation.PartLoc2, 580);
                    if (currentLocation.NumPeople >= 3)
                    {
                        TB_Part3.Text = GetText(tessBaseAPI, currentLocation.PartLoc3, 580);
                    }
                    if (currentLocation.NumPeople == 4)
                    {
                        TB_Part4.Text = GetText(tessBaseAPI, currentLocation.PartLoc4, 580);
                    }

                    tessBaseAPI.Dispose();
                    pix.Dispose();

                    // Grab latest order list for parts
                    // TODO: Try multiple threads! If that's too fast, cache prices for a duration
                    using (var client = new WebClient())
                    {
                        GetPriceJson(client, TB_Part1, TB_Plat1);
                        GetPriceJson(client, TB_Part2, TB_Plat2);
                        if (currentLocation.NumPeople >= 3)
                        {
                            GetPriceJson(client, TB_Part3, TB_Plat3);
                        }
                        if (currentLocation.NumPeople == 4)
                        {
                            GetPriceJson(client, TB_Part4, TB_Plat4);
                        }

                        // These get cached
                        GetDucatsJson(client, TB_Part1, TB_Ducats1);
                        GetDucatsJson(client, TB_Part2, TB_Ducats2);
                        if (currentLocation.NumPeople >= 3)
                        {
                            GetDucatsJson(client, TB_Part3, TB_Ducats3);
                        }
                        if (currentLocation.NumPeople == 4)
                        {
                            GetDucatsJson(client, TB_Part4, TB_Ducats4);
                        }
                    }

                    List <PartWorth> sortPartWorth = new List <PartWorth>();
                    sortPartWorth.Add(new PartWorth(TB_Part1, TB_Ducats1));
                    sortPartWorth.Add(new PartWorth(TB_Part2, TB_Ducats2));
                    if (currentLocation.NumPeople >= 3)
                    {
                        sortPartWorth.Add(new PartWorth(TB_Part3, TB_Ducats3));
                    }
                    if (currentLocation.NumPeople == 4)
                    {
                        sortPartWorth.Add(new PartWorth(TB_Part4, TB_Ducats4));
                    }

                    // Sort by Plat, then Ducats
                    sortPartWorth = sortPartWorth.OrderByDescending(o => Int32.Parse(o.platinum.Tag.ToString())).ThenByDescending(o => Int32.Parse(o.ducats.Tag.ToString())).ToList();

                    // If max Plat is low, sort by the reverse
                    if (Int32.Parse(sortPartWorth[0].platinum.Tag.ToString()) < 15)
                    {
                        sortPartWorth = sortPartWorth.OrderByDescending(o => Int32.Parse(o.ducats.Tag.ToString())).ThenByDescending(o => Int32.Parse(o.platinum.Tag.ToString())).ToList();
                    }

                    // Show best option
                    TB_Pick.Text = sortPartWorth[0].platinum.Text;

                    // Show current amount of Plat (and Ducats) made in the current session
                    if (Int32.Parse(sortPartWorth[0].platinum.Tag.ToString()) != -1)
                    {
                        //GlobalVar.PLAT += Int32.Parse(platDuc[0].plat.Tag.ToString());
                        LB_Plat.Tag = Int32.Parse(LB_Plat.Tag.ToString()) + Int32.Parse(sortPartWorth[0].platinum.Tag.ToString());
                    }
                    if (Int32.Parse(sortPartWorth[0].ducats.Tag.ToString()) != -1)
                    {
                        //GlobalVar.DUCATS += Int32.Parse(platDuc[0].ducats.Tag.ToString());
                        LB_Ducs.Tag = Int32.Parse(LB_Ducs.Tag.ToString()) + Int32.Parse(sortPartWorth[0].ducats.Tag.ToString());
                    }

                    LB_Plat.Text = LB_Plat.Tag.ToString() + " p";
                    LB_Ducs.Text = LB_Ducs.Tag.ToString() + " d";
                }
                catch (Exception ex)
                {
                    LogError("KeyDown: " + ex.Message);
                }
            }
            else if (e.Key == Keys.NumPad2)
            {
                Debug.WriteLine("Switched to 2 people");

                ClearBox();
                VisibleRow(false, LB_Part3, TB_Part3, TB_Plat3, TB_Ducats3);
                VisibleRow(false, LB_Part4, TB_Part4, TB_Plat4, TB_Ducats4);

                currentLocation.SetPeople2();
            }
            else if (e.Key == Keys.NumPad3)
            {
                Debug.WriteLine("Switched to 3 people");

                ClearBox();
                VisibleRow(true, LB_Part3, TB_Part3, TB_Plat3, TB_Ducats3);
                VisibleRow(false, LB_Part4, TB_Part4, TB_Plat4, TB_Ducats4);

                currentLocation.SetPeople3();
            }
            else if (e.Key == Keys.NumPad4)
            {
                Debug.WriteLine("Switched to 4 people");

                ClearBox();
                VisibleRow(true, LB_Part3, TB_Part3, TB_Plat3, TB_Ducats3);
                VisibleRow(true, LB_Part4, TB_Part4, TB_Plat4, TB_Ducats4);

                currentLocation.SetPeople4();
            }
            else if (e.Key == Keys.Pause)
            {
                Environment.Exit(0);
            }
        }
Пример #21
0
        protected override string DoScan(string filename, LoggingSection log)
        {
            PdfDocument   pdfDocument   = PdfReader.Open(filename);
            StringBuilder stringBuilder = new StringBuilder();

            for (int pageIndex = 0; pageIndex < pdfDocument.PageCount; pageIndex++)
            {
                log.Verbose($"Scanning page {pageIndex + 1} of {pdfDocument.PageCount}");
                PdfPage pdfPage = pdfDocument.Pages[pageIndex];
                //Extract text from text elements
                stringBuilder.Append($"{ExtractTextFromPdfPage(pdfPage)}{Environment.NewLine}");

                //Extract text from image elements with Tesseract OCR - awesome! :)
                PdfDictionary resources = pdfPage.Elements.GetDictionary("/Resources");
                if (resources != null)
                {
                    PdfDictionary xObjects = resources.Elements.GetDictionary("/XObject");
                    if (xObjects != null)
                    {
                        ICollection <PdfItem> items = xObjects.Elements.Values;
                        foreach (PdfItem item in items)
                        {
                            PdfReference reference = item as PdfReference;
                            if (reference != null)
                            {
                                PdfDictionary xObject = reference.Value as PdfDictionary;
                                if (xObject != null && xObject.Elements.GetString("/Subtype") == "/Image")
                                {
                                    Bitmap bitmap = PdfImageToBitmap(xObject);
                                    if (bitmap == null)
                                    {
                                        log.Error("Could not extract bitmap from PDF image element. Seems like the PDF image filter type is not supported. Skipping element!");
                                        continue;
                                    }
                                    log.Debug("Rotating image");
                                    bitmap.RotateFlip(RotateFlipType.Rotate90FlipNone);
                                    log.Debug("Upscaling image 2x");
                                    BitmapUtils.Scale(ref bitmap, 2);
                                    log.Debug("Grayscaling image");
                                    BitmapUtils.GrayscaleWithLockBits(bitmap);
                                    log.Debug("Denoising image");
                                    BitmapUtils.DenoiseWithLockBits(bitmap);
                                    log.Debug("Applying OCR on image");
                                    Pix             pix             = PixConverter.ToPix(bitmap);
                                    TesseractEngine tesseractEngine = Services.OCRProvider.AwaitResource();
                                    Page            tesseractPage   = tesseractEngine.Process(pix);
                                    try
                                    {
                                        string text = tesseractPage.GetText();
                                        log.Debug($"Text is {text.Length} characters long");
                                        if (!string.IsNullOrWhiteSpace(text) && text != "\n")
                                        {
                                            stringBuilder.Append(text.Replace("\n", " "));
                                        }
                                    }
                                    catch (InvalidOperationException e)
                                    {
                                        log.Error($"OCR failed on Page {pageIndex} of file {filename}:\n{e.StackTrace}");
                                    }
                                    Services.OCRProvider.Feed(tesseractEngine);
                                    pix.Dispose();
                                }
                            }
                        }
                    }
                }
                stringBuilder.Append("\n");
            }

            log.Debug("Trimming text");
            string documentText = stringBuilder.ToString();

            documentText = documentText.Trim();
            while (documentText.Contains("  "))
            {
                documentText = documentText.Replace("  ", " ");
            }
            while (documentText.Contains("\n\n"))
            {
                documentText = documentText.Replace("\n\n", "\n");
            }
            return(stringBuilder.ToString());
        }