Example #1
0
        static void example3()
        {
            string               dataPath  = "./tessdata/";
            string               language  = "eng";
            string               inputFile = "./input.png";
            OcrEngineMode        oem       = OcrEngineMode.DEFAULT;
            PageSegmentationMode psm       = PageSegmentationMode.AUTO_OSD;

            TessBaseAPI tessBaseAPI = new TessBaseAPI();

            // Initialize tesseract-ocr
            if (!tessBaseAPI.Init(dataPath, language, oem))
            {
                throw new Exception("Could not initialize tesseract.");
            }

            // Set the Page Segmentation mode
            tessBaseAPI.SetPageSegMode(psm);

            // Set the input image
            Pix pix = tessBaseAPI.SetImage(inputFile);

            // Recognize image
            tessBaseAPI.Recognize();

            ResultIterator resultIterator = tessBaseAPI.GetIterator();

            // extract text from result iterator
            StringBuilder     stringBuilder     = new StringBuilder();
            PageIteratorLevel pageIteratorLevel = PageIteratorLevel.RIL_PARA;

            do
            {
                stringBuilder.Append(resultIterator.GetUTF8Text(pageIteratorLevel));
            } while (resultIterator.Next(pageIteratorLevel));

            tessBaseAPI.Dispose();
            pix.Dispose();
        }
        public void /*Image*/ GetTextFromImage(IWebElement element, string uniqueName)
        {
            Screenshot screenshot = ((ITakesScreenshot)Driver).GetScreenshot();

            string pth       = Assembly.GetCallingAssembly().CodeBase;
            string finalpth  = pth.Substring(0, pth.LastIndexOf("bin")) + "Screenshots/" + uniqueName + ".jpeg";
            string localpath = new Uri(finalpth).LocalPath;

            screenshot.SaveAsFile(localpath, ScreenshotImageFormat.Jpeg);

            Image     img  = Image.FromFile(localpath /*uniqueName*/);
            Rectangle rect = new Rectangle();

            if (element != null)
            {
                // Get the Width and Height of the WebElement using
                int width  = element.Size.Width;
                int height = element.Size.Height;

                // Get the Location of WebElement in a Point.
                // This will provide X & Y co-ordinates of the WebElement
                Point p = element.Location;

                // Create a rectangle using Width, Height and element location
                rect = new Rectangle(p.X, p.Y, width, height);
            }

            //croping the image based on rect.
            Bitmap bmpImage   = new Bitmap(img);
            var    cropedImag = bmpImage.Clone(rect, bmpImage.PixelFormat);


            string dataPath = @"C:\Betsold\AutomationTesting\Tests\testdata\";
            string language = "eng";
            string imgPath  = @"C:\Betsold\AutomationTesting\Tests\Screenshots\logo-test.jpeg";

            OcrEngineMode        oem = OcrEngineMode.LSTM_ONLY;
            PageSegmentationMode psm = PageSegmentationMode.AUTO;

            TessBaseAPI tessBaseAPI = new TessBaseAPI(dataPath, language, oem, psm);

            // Set the input image
            tessBaseAPI.SetImage(imgPath);

            var processedImage = tessBaseAPI.GetThresholdedImage();

            processedImage.Write(@"C:\Users\ibozhinovski\Desktop\", ImageFileFormatTypes.IFF_JFIF_JPEG);


            // Recognize image
            tessBaseAPI.Recognize();

            ResultIterator resultIterator = tessBaseAPI.GetIterator();

            // Extract text from result iterator
            StringBuilder     text = new StringBuilder();
            PageIteratorLevel pageIteratorLevel = PageIteratorLevel.RIL_PARA;

            do
            {
                text.Append(resultIterator.GetUTF8Text(pageIteratorLevel));
            } while (resultIterator.Next(pageIteratorLevel));

            tessBaseAPI.Dispose();

            Console.Read();

            /*
             * // croping the image based on rect.
             * Bitmap bmpImage = new Bitmap(img);
             * var cropedImag = bmpImage.Clone(rect, bmpImage.PixelFormat);
             *
             * var ocr = new TesseractEngine("./testdata", "eng");
             *
             * var page = ocr.Process(cropedImag);
             *
             * var result = page.GetText();
             *
             * Console.WriteLine(result);
             */
        }
Example #3
0
        protected override ApiResult ProcessClientQueueMessage(ImageArtifact message)
        {
            BitmapData bData = message.Image.LockBits(
                new Rectangle(0, 0, message.Image.Width, message.Image.Height), ImageLockMode.ReadOnly, message.Image.PixelFormat);
            int w = bData.Width, h = bData.Height, bpp = Image.GetPixelFormatSize(bData.PixelFormat) / 8;

            unsafe
            {
                TesseractImage.SetImage(new UIntPtr(bData.Scan0.ToPointer()), w, h, bpp, bData.Stride);
            }
            Pix = TesseractImage.GetInputImage();

            Debug("Pix has width: {0} height: {1} depth: {2} xres: {3} yres: {4}.", Pix.Width, Pix.Height, Pix.Depth,
                  Pix.XRes, Pix.YRes);
            List <string> text;

            using (var op = Begin("Tesseract OCR (fast)"))
            {
                TesseractImage.Recognize();
                ResultIterator resultIterator = TesseractImage.GetIterator();
                text = new List <string>();
                PageIteratorLevel pageIteratorLevel = PageIteratorLevel.RIL_PARA;
                do
                {
                    string ant = TextArtifact.GetAlphaNumericString(resultIterator.GetUTF8Text(pageIteratorLevel));
                    ant = string.Join(" ",
                                      ant.Split(new[] { ' ', '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries)
                                      .Where(word => TextArtifact.IsNumber(word) || word.Length > 3 || Pipeline.Dictionaries["common_words_en_3grams"].Contains(word)))
                          .Trim();
                    if (ant.IsEmpty())
                    {
                        continue;
                    }
                    else
                    {
                        text.Add(ant);
                    }
                }while (resultIterator.Next(pageIteratorLevel));

                if (text.Count > 0)
                {
                    string alltext = text.Aggregate((s1, s2) => s1 + " " + s2).Trim();
                    if (text.Count < 7)
                    {
                        Info("Artifact id {0} is likely a photo or non-text image.", message.Id);
                    }
                    else
                    {
                        message.OCRText = text;
                        Info("OCR Text: {0}", alltext);
                    }
                }
                else
                {
                    Info("No text recognized in artifact id {0}.", message.Id);
                }
                op.Complete();
            }

            message.Image.UnlockBits(bData);
            if (text.Count >= 7)
            {
                TextArtifact artifact = new TextArtifact(message.Name + ".txt", string.Join(Environment.NewLine, text.ToArray()));
                artifact.Source             = message.Source;
                artifact.CurrentProcess     = message.CurrentProcess;
                artifact.CurrentWindowTitle = message.CurrentWindowTitle;
                artifact.Image       = message;
                message.TextArtifact = artifact;
                EnqueueMessage(artifact);
                Info("{0} added artifact id {1} of type {2} from artifact {3}.", Name, artifact.Id, artifact.GetType(),
                     message.Id);
            }
            return(ApiResult.Success);
        }
Example #4
0
        protected override ApiResult ProcessClientQueueMessage(ImageArtifact message)
        {
            BitmapData bData = message.Image.LockBits(
                new Rectangle(0, 0, message.Image.Width, message.Image.Height), ImageLockMode.ReadOnly, message.Image.PixelFormat);
            int w = bData.Width, h = bData.Height, bpp = Image.GetPixelFormatSize(bData.PixelFormat) / 8;

            unsafe
            {
                TesseractImage.SetImage(new UIntPtr(bData.Scan0.ToPointer()), w, h, bpp, bData.Stride);
            }
            Pix = TesseractImage.GetInputImage();

            Debug("Pix has width: {0} height: {1} depth: {2} xres: {3} yres: {4}.", Pix.Width, Pix.Height, Pix.Depth,
                  Pix.XRes, Pix.YRes);
            List <string> text;

            using (var op = Begin("Tesseract OCR (fast)"))
            {
                TesseractImage.Recognize();
                ResultIterator resultIterator = TesseractImage.GetIterator();
                text = new List <string>();
                PageIteratorLevel pageIteratorLevel = PageIteratorLevel.RIL_PARA;
                do
                {
                    string r = resultIterator.GetUTF8Text(pageIteratorLevel);
                    if (r.IsEmpty())
                    {
                        continue;
                    }
                    text.Add(r.Trim());
                }while (resultIterator.Next(pageIteratorLevel));

                if (text.Count > 0)
                {
                    string alltext = text.Aggregate((s1, s2) => s1 + " " + s2).Trim();

                    if (text.Count < 7)
                    {
                        Info("Artifact id {0} is likely a photo or non-text image.", message.Id);
                    }
                    else
                    {
                        message.OCRText = text;
                        Info("OCR Text: {0}", alltext);
                    }
                }
                else
                {
                    Info("No text recognized in artifact id {0}.", message.Id);
                }
                op.Complete();
            }

            message.Image.UnlockBits(bData);
            if (text.Count >= 7)
            {
                TextArtifact artifact = new TextArtifact(message.Name + ".txt", text);
                EnqueueMessage(artifact);
                Info("{0} added artifact id {1} of type {2} from artifact {3}.", Name, artifact.Id, artifact.GetType(),
                     message.Id);
            }

            return(ApiResult.Success);
        }
Example #5
0
        //public static void  clearFaceFlag() { faceFlag = 0; }
        public string RecognizeText(int id)
        {
            DBService dbs = new DBService();
            Document  doc = dbs.FindDocumentById(id);

            Image <Gray, Byte> img = scale(doc);

            //var image = PixConverter.ToPix(img.ToBitmap()))

            Tesseract.Native.DllImports.TesseractDirectory = System.Web.HttpContext.Current.Server.MapPath("~/Tesseract/bin/Debug/DLLS/");
            TessBaseAPI tessBaseAPI = new TessBaseAPI();

            System.Diagnostics.Debug.WriteLine("The current version is {0}", tessBaseAPI.GetVersion());

            string dataPath = System.Web.HttpContext.Current.Server.MapPath("~/tessdata/");
            string language = "eng";

            string        inputFile = doc.Path;
            OcrEngineMode oem       = OcrEngineMode.DEFAULT;
            //OcrEngineMode oem = OcrEngineMode.DEFAULT;
            PageSegmentationMode psm = PageSegmentationMode.AUTO_OSD;

            // Initialize tesseract-ocr
            if (!tessBaseAPI.Init(dataPath, language, oem))
            {
                throw new Exception("Could not initialize tesseract.");
            }

            // Set the Page Segmentation mode
            tessBaseAPI.SetPageSegMode(psm);

            // Set the input image
            Pix pix = tessBaseAPI.SetImage(inputFile);

            // Recognize image
            tessBaseAPI.Recognize();

            ResultIterator resultIterator = tessBaseAPI.GetIterator();

            // extract text from result iterator
            StringBuilder stringBuilder = new StringBuilder();
            int           top, bottom, left, right, i = 0;

            List <OCRText> forJson = new List <OCRText>();

            PageIteratorLevel pageIteratorLevel = PageIteratorLevel.RIL_TEXTLINE;

            do
            {
                string textContent = resultIterator.GetUTF8Text(pageIteratorLevel);
                resultIterator.BoundingBox(pageIteratorLevel, out left, out top, out right, out bottom);
                string coordsString = "" + left + "," + top + "," + right + "," + bottom;

                forJson.Add(new OCRText()
                {
                    Coords = coordsString, Text = textContent
                });
            } while (resultIterator.Next(pageIteratorLevel));

            tessBaseAPI.Dispose();
            pix.Dispose();

            var textForReturn = JsonConvert.SerializeObject(forJson);

            dbs.UpdateDocument(textForReturn, id);

            if (HasFace(img) == true)
            {
                FaceFlag = 1;
            }
            else
            {
                FaceFlag = 0;
            }

            return(textForReturn);
        }