コード例 #1
0
ファイル: Ocr.cs プロジェクト: mezstd/PdfDocumentParser
 public float DetectDeskewAngle(Bitmap b)
 {
     using (Tesseract.Page page = engine.Process(b, PageSegMode.OsdOnly))
     {
         return(page.AnalyseLayout().GetProperties().DeskewAngle);
     }
 }
コード例 #2
0
        /// <summary>
        /// 文字認識
        /// </summary>
        private void AnalyzeString()
        {
            // Bitmapを処理、適度に切り取った方がOCRを扱いやすい
            System.Drawing.Rectangle rect = new System.Drawing.Rectangle(20, 90, 450, 100);
            SrcImg = SrcImg.Clone(rect, SrcImg.PixelFormat);

            // 確認用の画面に画像を出力
            this.Dispatcher.Invoke(() => {
                this._Image_crop.Source = ConvertBitmapToIS(SrcImg);
            });

            // 以下OCR
            string langPath = @"C:\tessdata";
            string lngStr   = "eng";

            //画像ファイルでテストするならパス指定
            //var img = new Bitmap(@"C:\test.jpg");
            var img = SrcImg;

            using (var tesseract = new Tesseract.TesseractEngine(langPath, lngStr))
            {
                // OCRの実行
                Pix            pix  = PixConverter.ToPix(img);
                Tesseract.Page page = tesseract.Process(pix);

                //表示
                Console.WriteLine(page.GetText());
                Console.ReadLine(); // err?
            }
        }
コード例 #3
0
ファイル: Ocr.cs プロジェクト: sergeystoyan/PdfDocumentParser
 public string GetHtml(Bitmap b, PageSegMode pageSegMode)
 {
     using (Tesseract.Page page = engine.Process(b, pageSegMode))
     {
         return(page.GetHOCRText(0, false));
     }
 }
コード例 #4
0
        /// <summary>
        /// Charactor Recognition Proccess
        /// </summary>
        /// <param name="imgPath"></param>
        /// <param name="dataDirPath"></param>
        /// <param name="lang"></param>
        /// <returns></returns>
        private static string Character_recognition_Process(string imgPath, string dataDirPath, string lang)
        {
            if (!System.IO.File.Exists(imgPath))
            {
                //Console.Error.WriteLine("画像のパスに画像が見つかりませんでした");
                return("画像のパスに画像が見つかりませんでした");
            }

            //Reading language learning data
            string traindedDataPath = System.IO.Path.Combine(dataDirPath, lang + ".traineddata");

            if (!System.IO.File.Exists(traindedDataPath))
            {
                //Console.Error.WriteLine(lang + ".traineddataがみつかりませんでした");
                return(lang + ".traineddataがみつかりませんでした");
            }

            // Character recognition
            using (TesseractEngine tesseract = new Tesseract.TesseractEngine(dataDirPath, lang))
            {
                // Import an image file
                var img = new System.Drawing.Bitmap(imgPath);

                // Specifying a character
                tesseract.SetVariable("tessedit_char_whitelist", "1234567890ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz.,");
                // PERFORM OCR
                Tesseract.Page page = tesseract.Process(img);
                //Console.WriteLine(page.GetText());

                return(page.GetText());
            }
        }
コード例 #5
0
        internal async void Analyze(object sender, RoutedEventArgs e)
        {
            if (WPFHelper.GetCurrentMainWindow().currentImageSource == null)
            {
                return;
            }
            ((Button)sender).IsEnabled = false;
            uiItemsMatched.Clear();
            uiItemsUnknown.Clear();

            using (Tesseract.Page p = engine.Process((Bitmap)Bitmap.FromFile(WPFHelper.GetCurrentMainWindow().currentImageSource), PageSegMode.Auto)) {
                StringParser  instance = new StringParser(ruleset);
                ParsingResult result   = null;
                string[]      split    = p.GetText().Split('\n');
                string[]      ready    = split.Where((s) => { return(!string.IsNullOrWhiteSpace(s)); }).ToArray();
                try {
                    result = await Task.Run(() => { return(instance.Parse(ready)); });
                }
                catch (ParsingEntryNotFoundException) {
                    string[] modified = new string[ready.Length + 1];
                    modified[0] = ruleset.startMarkers[0];
                    ready.CopyTo(modified, 1);
                    result = await Task.Run(() => { return(instance.Parse(modified)); });
                }
                ConstructUI(result.parsed, uiItemsMatched);
                ConstructUI(result.unknown, uiItemsUnknown);
            }
            ((Button)sender).IsEnabled = true;
        }
コード例 #6
0
ファイル: Ocr.cs プロジェクト: sergeystoyan/PdfDocumentParser
 public Orientation DetectOrientation(Bitmap b)
 {
     using (Tesseract.Page page = engine.Process(b, PageSegMode.OsdOnly))
     {
         return(page.AnalyseLayout().GetProperties().Orientation);
     }
 }
コード例 #7
0
ファイル: Ocr.cs プロジェクト: sergeystoyan/PdfDocumentParser
 public List <CharBox> GetCharBoxs(Bitmap b, PageSegMode pageSegMode)
 {
     using (Tesseract.Page page = engine.Process(b, pageSegMode))
     {
         return(getCharBoxs(page));
     }
 }
コード例 #8
0
        private static void Main(string[] args)
        {
            Console.Write("Input image path:");

            string input = Console.ReadLine();

            Stopwatch stopwatch = new Stopwatch();

            stopwatch.Start();

            T.TesseractEngine engine = new T.TesseractEngine("tessdata", "tur", T.EngineMode.TesseractAndLstm);

            stopwatch.Stop();

            Console.WriteLine("Engine creation :" + stopwatch.ElapsedMilliseconds.ToString() + " ms");

            stopwatch.Restart();

            T.Pix image = T.Pix.LoadFromFile(input);

            T.Page page = engine.Process(image);

            string text = page.GetText();

            stopwatch.Stop();

            Console.WriteLine("Process time :" + stopwatch.ElapsedMilliseconds.ToString() + " ms");

            Console.Write("Result: " + text);

            Console.Read();
        }
コード例 #9
0
ファイル: Ocr.cs プロジェクト: sergeystoyan/PdfDocumentParser
        //Tesseract.Page getPage(Bitmap b)
        //{
        //    if (cachedPageBitmap != b)
        //    {
        //        cachedPage?.Dispose();
        //        cachedPage = engine.Process(b, PageSegMode.SparseTextOsd);
        //        cachedPageBitmap = b;
        //    }
        //    return cachedPage;
        //}
        //Bitmap cachedPageBitmap = null;
        //Tesseract.Page cachedPage = null;

        public int DetectOrientationAngle(Bitmap b, out float confidence)
        {
            using (Tesseract.Page page = engine.Process(b, PageSegMode.OsdOnly))
            {
                page.DetectBestOrientation(out int o, out confidence);
                return(o);
            }
        }
コード例 #10
0
ファイル: Form1.cs プロジェクト: Vertx-RU/OCR
 private void button2_Click(object sender, EventArgs e)
 {
     //pictureBox1.Image = Clipboard.GetImage();
     engine        = new TesseractEngine(System.Environment.CurrentDirectory + "\\tessdata", "chi_sim", EngineMode.Default);
     img           = new Bitmap(pictureBox1.Image);
     page          = engine.Process(img);
     textBox1.Text = page.GetText();
 }
コード例 #11
0
ファイル: Play.cs プロジェクト: reinf92/AutoTune
        private string ExtractText(Bitmap bitmap)
        {
            TesseractEngine engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default);

            Tesseract.Page page = engine.Process(bitmap);

            return(page.GetText());
        }
コード例 #12
0
            /// <summary>
            /// 取得圖片中的文字
            /// </summary>
            /// <param name="ImgData">圖片檔</param>
            /// <param name="TessData_Path">圖片文字識別資源檔之路徑</param>
            /// <param name="TessData_Language">選擇的文字語言</param>
            /// <returns></returns>
            public static string GetImgText(byte[] ImgData, string TessData_Path = @"./tessdata", string TessData_Language = "eng")
            {
                string ImgText;

                Mat MatImg = Todo_OpenCvSharp4.ImgByteArrayToMat(ImgData);

                using (var inms = new MemoryStream(MatImg.ToBytes()))
                    using (var outms = new MemoryStream())
                    {
                        System.Drawing.Bitmap.FromStream(inms).Save(outms, System.Drawing.Imaging.ImageFormat.Tiff);
                        var pix = Pix.LoadTiffFromMemory(outms.ToArray());

                        using (var engine = new TesseractEngine(TessData_Path, TessData_Language, EngineMode.Default))
                        {
                            Tesseract.Page page = engine.Process(pix);
                            ImgText = page.GetText();
                        }
                    }

                //Mat src = Cv2.ImDecode(image, ImreadModes.Color);
                //using (new OpenCvSharp.Window("asdf", src))
                //{

                //}

                ////Mat src = new Mat("lenna.png", ImreadModes.Grayscale);
                //Mat src = Cv2.ImDecode(image, ImreadModes.Grayscale);
                ////Mat dst = new Mat();

                ////Cv2.Canny(src, dst, 50, 200);
                //using (new OpenCvSharp.Window("src image", src))
                //{
                //    Cv2.WaitKey();
                //}

                //using (var inms = new MemoryStream(src.ToBytes()))
                //using (var outms = new MemoryStream())
                //{
                //    System.Drawing.Bitmap.FromStream(inms).Save(outms, System.Drawing.Imaging.ImageFormat.Tiff);
                //    var pix = Pix.LoadTiffFromMemory(outms.ToArray());

                //    ImageSource result;
                //    result = BitmapFrame.Create(outms, BitmapCreateOptions.None, BitmapCacheOption.OnLoad);
                //    Img_Test.Source = result;


                //    using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default))
                //    {
                //        Tesseract.Page page = engine.Process(pix);

                //        string res = page.GetText();
                //        lbl_Test.Content = res;
                //    }
                //}
                return(ImgText);
            }
コード例 #13
0
ファイル: Ocr.cs プロジェクト: mezstd/PdfDocumentParser
 public string GetTextSurroundedByRectangle(Bitmap b, RectangleF r, PageSegMode pageSegMode)
 {
     if (!getScaled(b, ref r))
     {
         return(null);
     }
     using (Tesseract.Page page = engine.Process(b, new Rect((int)r.X, (int)r.Y, (int)r.Width, (int)r.Height), pageSegMode))
     {
         return(page.GetText());
     }
 }
コード例 #14
0
        public MainWindow()
        {
            InitializeComponent();
            Bitmap          img    = new Bitmap("C:\\Users\\Bikalpa\\Documents\\testdll\\testimage.png");
            TesseractEngine engine = new TesseractEngine("./tessdata", "eng", EngineMode.Default);

            Tesseract.Page page   = engine.Process(img, PageSegMode.Auto);
            string         result = page.GetText();

            MessageBox.Show(result);
        }
コード例 #15
0
ファイル: Ocr.cs プロジェクト: sergeystoyan/PdfDocumentParser
        //!!!abandoned for GetCharBoxsSurroundedByRectangle() because:
        //- it does not give good end lines;
        //- it does not accept expilicitly TextAutoInsertSpace
        //public string GetTextSurroundedByRectangle(Bitmap b, RectangleF r, PageSegMode pageSegMode)
        //{
        //    if (!getScaled(b, ref r))
        //        return string.Empty;
        //    using (Tesseract.Page page = engine.Process(b, new Rect((int)r.X, (int)r.Y, (int)r.Width, (int)r.Height), pageSegMode))
        //    {
        //        return page.GetText();
        //    }
        //}

        public List <CharBox> GetCharBoxsSurroundedByRectangle(Bitmap b, RectangleF r, PageSegMode pageSegMode)
        {
            if (!getScaled(b, ref r))
            {
                return(null);
            }
            using (Tesseract.Page page = engine.Process(b, new Rect((int)r.X, (int)r.Y, (int)r.Width, (int)r.Height), pageSegMode))
            {
                return(getCharBoxs(page));
            }
        }
コード例 #16
0
        /// <summary>
        /// Processes the specific image.
        /// </summary>
        /// <remarks>
        /// You can only have one result iterator open at any one time.
        /// </remarks>
        /// <param name="image">The image to process.</param>
        /// <param name="region">The image region to process.</param>
        /// <returns>A result iterator</returns>
        public Page Process(Pix image, Rect region, PageSegMode? pageSegMode = null)
        {
            if (image == null) throw new ArgumentNullException("image");
            if (region.X1 < 0 || region.Y1 < 0 || region.X2 > image.Width || region.Y2 > image.Height)
                throw new ArgumentException("The image region to be processed must be within the image bounds.", "region");
            if (processCount > 0) throw new InvalidOperationException("Only one image can be processed at once. Please make sure you dispose of the page once your finished with it.");

            processCount++;

            Interop.TessApi.BaseAPISetPageSegMode(handle, pageSegMode.HasValue ? pageSegMode.Value : DefaultPageSegMode);
            Interop.TessApi.BaseApiSetImage(handle, image.Handle);
            Interop.TessApi.BaseApiSetRectangle(handle, region.X1, region.Y1, region.Width, region.Height);

            var page = new Page(this);
            page.Disposed += OnIteratorDisposed;
            return page;
        }
コード例 #17
0
        protected void Page_Load(object sender, EventArgs e)
        {
            string ocrTtxt = "";
            //chi_sim是中文库
            const string language = "chi_sim";
            //Nuget安装的Tessract版本为3.20,tessdata的版本必须与其匹配,另外路径最后必须以"\"或者"/"结尾
            string          TessractData = AppDomain.CurrentDomain.BaseDirectory + @"tessdata\";
            TesseractEngine test         = new TesseractEngine(TessractData, language);
            //创建一个图片对象
            Bitmap tmpVal = new Bitmap(AppDomain.CurrentDomain.BaseDirectory + @"Content\捕获.PNG");
            //灰度化,可以提高识别率
            var tmpImage = Helper.Class.ToGray(tmpVal);
            //Page tmpPage = test.Process(tmpImage, pageSegMode: test.DefaultPageSegMode);
            Page tmpPage = test.Process(tmpImage);

            ocrTtxt = tmpPage.GetText();
        }
コード例 #18
0
ファイル: evSUNAT.cs プロジェクト: eincioch/ProyectoLabMedica
        //En este metodo es el que utiliza el tesseract ... se obtiene la imagen del captcha terminada
        // y devuelve el texto obtenido ...
        public string UseTesseract()
        {
            string text = String.Empty;

            //Recordemos que el metodo ( si ya obviaré las tildes ) ...
            // el metodo ReadCapcha devuelve la imagen ya procesada ...
            using (Bitmap bm = new Bitmap(ReadCapcha()))
            {
                //Instanciamos el TesseractEngine declarado arriba !
                engine = new TesseractEngine(@".\tessdata", "eng", EngineMode.Default);
                engine.DefaultPageSegMode = PageSegMode.SingleBlock;
                Tesseract.Page p = engine.Process(bm);
                text = p.GetText().Trim().ToUpper().Replace(" ", "");
                //  Console.WriteLine("Text recognized: " + text);
            }
            //Retornamos luego del trabajo del OCR el texto obtenido
            return(text);
        }
コード例 #19
0
        //Aqui obtenemos el captcha
        private String ReadCapcha()
        {
            try
            {
                System.Net.ServicePointManager.ServerCertificateValidationCallback = new System.Net.Security.RemoteCertificateValidationCallback(ValidarCertificado);
                //Esta es la direccion que les pase en el grupo de facebook para obtener el captcha
                //HttpWebRequest myWebRequest = (HttpWebRequest)WebRequest.Create("http://www.sunat.gob.pe/cl-ti-itmrconsruc/captcha?accion=image&magic=2");
                HttpWebRequest myWebRequest = (HttpWebRequest)WebRequest.Create("http://e-consultaruc.sunat.gob.pe/cl-ti-itmrconsruc/captcha?accion=image&magic=2");
                myWebRequest.CookieContainer = myCookie;
                myWebRequest.Proxy           = null;
                myWebRequest.Credentials     = CredentialCache.DefaultCredentials;
                HttpWebResponse myWebResponse = (HttpWebResponse)myWebRequest.GetResponse();
                Stream          myImgStream   = myWebResponse.GetResponseStream();
                //Modificación 1 ... Esta fue la primera modificación ... cree un mapa de bits que utilizaré como
                //parámetro para en fin ... mejor se los muestro xd
                Bitmap bm = new Bitmap(Image.FromStream(myImgStream));
                //quitamos el color a nuestro mapa de bits
                qutarColor(bm);
                //Procesamos la imagen (separación de carácteres, alineación etc)
                //Y se devuelve la imagen lista para ser procesada por el OCR

                //Recordemos que el metodo ( si ya obviaré las tildes ) ...
                // el metodo ReadCapcha devuelve la imagen ya procesada ...
                using (Bitmap bm2 = new Bitmap((Image)PreProcessImage(bm)))
                {
                    //Instanciamos el TesseractEngine declarado arriba !
                    //engine = new TesseractEngine(@".\tessdata", "eng", EngineMode.Default);
                    string dataPath = HostingEnvironment.MapPath(@"~/tessdata");
                    //string dataPath = @"E:\Mario\Proyectos\IB2B.LocalizacionNew\IB2B.Localizacion\IB2B.Localizacion.Web\tessdata";

                    engine = new TesseractEngine(dataPath, "eng", EngineMode.Default);
                    engine.DefaultPageSegMode = PageSegMode.SingleBlock;
                    Tesseract.Page p = engine.Process(bm2);
                    TextoCapcha = p.GetText().Trim().ToUpper().Replace(" ", "");
                    //  Console.WriteLine("Text recognized: " + text);
                }
                return(TextoCapcha);
            }
            catch (Exception ex)
            {
                throw ex;
            }
        }
コード例 #20
0
        // 工具栏 “识别” 按钮单击事件
        private void Button_Click(object sender, RoutedEventArgs e)
        {
            TesseractEngine ocr;
            string          path         = AppDomain.CurrentDomain.BaseDirectory;
            string          rootPath     = path.Substring(0, path.LastIndexOf("bin"));
            string          tessdataPath = string.Concat(rootPath, "tessdata"); // TODO: 修改为资源->打包后可用

            ocr = new TesseractEngine(tessdataPath, "eng", EngineMode.Default); //设置语言   英文

            var img = Pix.LoadFromFile(imgPath);

            //bit = PreprocesImage(bit);//进行图像处理,如果识别率低可试试
            Tesseract.Page page = ocr.Process(img);
            string         str  = "识别结果:\n\n";

            str += page.GetText();//识别后的内容
            page.Dispose();

            //string str = ImageToText(imgPath);
            tessTextBlock.Text = str;  // 识别结果显示到界面
            // TODO: 识别失败的处理
        }
コード例 #21
0
        public string ImageFromText(Image img)
        {
            try
            {
                string text = String.Empty;
                using (Bitmap bm = new Bitmap(img))
                {
                    //Instanciamos el TesseractEngine declarado arriba !
                    TesseractEngine engine = new TesseractEngine(@"C:\Sistema\tessdata", "eng", EngineMode.Default);
                    engine.DefaultPageSegMode = PageSegMode.SingleBlock;
                    Tesseract.Page p = engine.Process(bm);
                    text = p.GetText().Trim().ToUpper().Replace(" ", "");
                };

                return(text);
            }
            catch (TargetInvocationException ex)
            {
                MessageBox.Show("Problema al obtener el codigo: " + ex.Message);
                return(null);
            }
        }
コード例 #22
0
ファイル: PageIterator.cs プロジェクト: Picazsoo/tesseract
 internal PageIterator(Page page, IntPtr handle)
 {
     this.page = page;
 	this.handle = new HandleRef(this, handle);
 }
コード例 #23
0
ファイル: Ocr.cs プロジェクト: sergeystoyan/PdfDocumentParser
        List <CharBox> getCharBoxs(Tesseract.Page page)
        {
            List <CharBox> cbs = new List <CharBox>();

            //string t = page.GetHOCRText(1, true);
            //var dfg = page.GetThresholdedImage();
            //Tesseract.Orientation o;
            //float c;
            // page.DetectBestOrientation(out o, out c);
            //  var l = page.AnalyseLayout();
            //var ti =   l.GetBinaryImage(Tesseract.PageIteratorLevel.Para);
            //Tesseract.Rect r;
            // l.TryGetBoundingBox(Tesseract.PageIteratorLevel.Block, out r);
            using (var i = page.GetIterator())
            {
                //int j = 0;
                //i.Begin();
                //do
                //{
                //    bool g = i.IsAtBeginningOf(Tesseract.PageIteratorLevel.Block);
                //    bool v = i.TryGetBoundingBox(Tesseract.PageIteratorLevel.Block, out r);
                //    var bt = i.BlockType;
                //    //if (Regex.IsMatch(bt.ToString(), @"image", RegexOptions.IgnoreCase))
                //    //{
                //    //    //i.TryGetBoundingBox(Tesseract.PageIteratorLevel.Block,out r);
                //    //    Tesseract.Pix p = i.GetBinaryImage(Tesseract.PageIteratorLevel.Block);
                //    //    Bitmap b = Tesseract.PixConverter.ToBitmap(p);
                //    //    b.Save(Log.AppDir + "\\test" + (j++) + ".png", System.Drawing.Imaging.ImageFormat.Png);
                //    //}
                //} while (i.Next(Tesseract.PageIteratorLevel.Block));
                //do
                //{
                //    do
                //    {
                //        do
                //        {
                //            do
                //        {
                do
                {
                    //if (i.IsAtBeginningOf(PageIteratorLevel.Block))
                    //{
                    //}
                    //if (i.IsAtBeginningOf(PageIteratorLevel.Para))
                    //{
                    //}
                    //if (i.IsAtBeginningOf(PageIteratorLevel.TextLine))
                    //{
                    //}

                    Rect r;
                    if (i.TryGetBoundingBox(PageIteratorLevel.Symbol, out r))
                    {
                        //if (i.IsAtBeginningOf(PageIteratorLevel.Word))
                        //{
                        //if (i.IsAtBeginningOf(PageIteratorLevel.Para))
                        //{
                        //    cbs.Add(new CharBox
                        //    {
                        //        Char = "\r\n",
                        //        AutoInserted = true,
                        //        R = new RectangleF(r.X1 * Settings.Constants.Pdf2ImageResolutionRatio - Settings.Constants.CoordinateDeviationMargin * 2, r.Y1 * Settings.Constants.Pdf2ImageResolutionRatio, r.Width * Settings.Constants.Pdf2ImageResolutionRatio, r.Height * Settings.Constants.Pdf2ImageResolutionRatio)
                        //    });
                        //}//seems to work not well

                        //cbs.Add(new CharBox//worked well before autoinsert was moved
                        //{
                        //    Char = " ",
                        //    AutoInserted = true,
                        //    R = new RectangleF(r.X1 * Settings.Constants.Pdf2ImageResolutionRatio - Settings.Constants.CoordinateDeviationMargin * 2, r.Y1 * Settings.Constants.Pdf2ImageResolutionRatio, r.Width * Settings.Constants.Pdf2ImageResolutionRatio, r.Height * Settings.Constants.Pdf2ImageResolutionRatio)
                        //});
                        //}
                        cbs.Add(new CharBox
                        {
                            Char = i.GetText(PageIteratorLevel.Symbol),
                            R    = new RectangleF(r.X1 * Settings.Constants.Pdf2ImageResolutionRatio, r.Y1 * Settings.Constants.Pdf2ImageResolutionRatio, r.Width * Settings.Constants.Pdf2ImageResolutionRatio, r.Height * Settings.Constants.Pdf2ImageResolutionRatio)
                        });
                    }
                } while (i.Next(PageIteratorLevel.Symbol));
                //            } while (i.Next(PageIteratorLevel.Word, PageIteratorLevel.Symbol));
                //        } while (i.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word));
                //    } while (i.Next(PageIteratorLevel.Para, PageIteratorLevel.TextLine));
                //} while (i.Next(PageIteratorLevel.Block, PageIteratorLevel.Para));
            }
            return(cbs);
        }
コード例 #24
0
        public Boolean Classification(int[,] AddedAreaPoints, string ImageID)
        {
            //  Bitmap DestinationImage = new Bitmap(28, 28);      /// To see Result of Scaling
            Bitmap DestinationImage = new Bitmap(AddedAreaPoints.Rows(), AddedAreaPoints.Columns());
            int[,] DestinationPoints = new int[28, 28];        /// To see Result of Scaling

            double XConvertor = (double)(AddedAreaPoints.Rows()) / (double)(28);
            double YConvertor = (double)(AddedAreaPoints.Columns()) / (double)(28);

            double[] input = new double[784];

            for (int i = 0; i < 28; i++)
            {
                for (int j = 0; j < 28; j++)
                {
                    double XInSourceImage = (double)(XConvertor * i);
                    double YInSourceImage = (double)(YConvertor * j);
                    int X = (int)(Math.Floor(XInSourceImage));
                    int Y = (int)(Math.Floor(YInSourceImage));
                    input[i * 28 + j] = AddedAreaPoints[X, Y];
                    DestinationPoints[i, j] = AddedAreaPoints[X, Y];
                }
            }

              //  for (int i = 0; i < 28; i++)
                for (int i = 0; i < AddedAreaPoints.Rows(); i++)
            {
              //  for (int j = 0; j < 28; j++)
                    for (int j = 0; j < AddedAreaPoints.Columns(); j++)
                {
                  //  if (DestinationPoints[i, j] == 1)
                        if (AddedAreaPoints[i, j] == 1)
                        DestinationImage.SetPixel(i, j, Color.Black);
                    else
                        DestinationImage.SetPixel(i, j, Color.White);
                }
            }

            DestinationImage.Save(@"C:\Users\nhonarva\Documents\ResultsOfScaling\scaling" + ImageID + ".png");
            var image = Pix.LoadFromFile(@"C:\Users\nhonarva\Documents\ResultsOfScaling\scaling" + ImageID + ".png");
              //  Page page;
            page=_engine.Process(image, PageSegMode.SingleBlock);
            string text = page.GetText();
            double confidence = page.GetMeanConfidence();
            page.Dispose();

            int actual = (int)ksvm.Compute(input);
            if (actual == 1)
                return true;
            else
                return false;
        }
コード例 #25
0
ファイル: RecognizeDoc.cs プロジェクト: Badou03080/earchive
        /*
        int LookingTextMarker(RecognazeRule Rule, Page page, out ResultIterator BestLineIter, out int word)
        {
            word = -1;
            BestLineIter = null;
            int BestDistance = 10000;

            ResultIterator LineIter = page.GetIterator();
            string[] Words = Rule.TextMarker.Split(new char[] {' '}, StringSplitOptions.RemoveEmptyEntries);
            int NumberOfWords = Words.Length;
            LineIter.Begin();
            do
            {
                int CurrentWordNumber = -1;
                int CurrentBestDistance = 10000;
                string Line = LineIter.GetText(PageIteratorLevel.TextLine);
                if(Line == null)
                    continue;
                string[] WordsOfLine = Line.Split(new char[] {' '}, StringSplitOptions.None);
                if(WordsOfLine.Length < NumberOfWords)
                    continue;

                for(int shift = 0; shift <= WordsOfLine.Length - NumberOfWords; shift++)
                {
                    int PassDistance = 0;
                    for(int i = 0; i < NumberOfWords; i++)
                    {
                        PassDistance += FuzzyStringComparer.GetDistanceLevenshtein(WordsOfLine[shift + i],
                                                                                      Words[i],
                                                                                      StringComparison.CurrentCultureIgnoreCase);
                    }
                    if(PassDistance < CurrentBestDistance)
                    {
                        CurrentBestDistance = PassDistance;
                        CurrentWordNumber = shift + 1;
                    }
                }
                if(CurrentBestDistance < BestDistance)
                {
                    AddToLog ("new best");
                    AddToLog (LineIter.GetText(PageIteratorLevel.Word));
                    word = CurrentWordNumber;
                    if(BestLineIter != null)
                        BestLineIter.Dispose();
                    BestLineIter = LineIter.Clone();
                    AddToLog (BestLineIter.GetText(PageIteratorLevel.TextLine));
                    BestDistance = CurrentBestDistance;
                }
            } while( LineIter.Next(PageIteratorLevel.TextLine));
            LineIter.Dispose();
            return BestDistance;
        } */
        int GetTextPosition(string Text, Page page, out int PosX, out int PosY)
        {
            int BestDistance = 10000;
            PosX = -1;
            PosY = -1;
            ResultIterator LineIter = page.GetIterator();
            string[] Words = Text.Split(new char[] {' '}, StringSplitOptions.RemoveEmptyEntries);
            int NumberOfWords = Words.Length;
            LineIter.Begin();
            do
            {
                int CurrentWordNumber = -1;
                int CurrentBestDistance = 10000;
                string Line = LineIter.GetText(PageIteratorLevel.TextLine);
                if(Line == null)
                    continue;
                Line = Line.Trim();
                string[] WordsOfLine = Line.Split(new char[] {' '}, StringSplitOptions.None);
                if(WordsOfLine.Length < NumberOfWords)
                    continue;

                for(int shift = 0; shift <= WordsOfLine.Length - NumberOfWords; shift++)
                {
                    int PassDistance = 0;
                    for(int i = 0; i < NumberOfWords; i++)
                    {
                        PassDistance += FuzzyStringComparer.GetDistanceLevenshtein(WordsOfLine[shift + i],
                                                                                   Words[i],
                                                                                   StringComparison.CurrentCultureIgnoreCase);
                    }
                    if(PassDistance < CurrentBestDistance)
                    {
                        CurrentBestDistance = PassDistance;
                        CurrentWordNumber = shift;
                    }
                }
                if(CurrentBestDistance < BestDistance)
                {
                    AddToLog ("new best");
                    AddToLog (LineIter.GetText(PageIteratorLevel.TextLine).Trim());
                    BestDistance = CurrentBestDistance;
                    for(int i = 0; i < CurrentWordNumber; i++)
                    {
                        LineIter.Next(PageIteratorLevel.Word);
                    }
                    Rect Box;
                    LineIter.TryGetBoundingBox(PageIteratorLevel.Word, out Box);
                    PosX = Box.X1;
                    PosY = Box.Y1;
                }
            } while( LineIter.Next(PageIteratorLevel.TextLine));
            LineIter.Dispose();
            return BestDistance;
        }
コード例 #26
0
        /// <summary>
        /// Processes a specified region in the image using the specified page layout analysis mode.
        /// </summary>
        /// <remarks>
        /// You can only have one result iterator open at any one time.
        /// </remarks>
        /// <param name="image">The image to process.</param>
        /// <param name="inputName">Sets the input file's name, only needed for training or loading a uzn file.</param>
        /// <param name="region">The image region to process.</param>
        /// <param name="pageSegMode">The page layout analyasis method to use.</param>
        /// <returns>A result iterator</returns>
        public Page Process(Pix image, string inputName, Rect region, PageSegMode? pageSegMode = null)
        {
            if (image == null) throw new ArgumentNullException("image");
            if (region.X1 < 0 || region.Y1 < 0 || region.X2 > image.Width || region.Y2 > image.Height)
                throw new ArgumentException("The image region to be processed must be within the image bounds.", "region");
            if (processCount > 0) throw new InvalidOperationException("Only one image can be processed at once. Please make sure you dispose of the page once your finished with it.");

            processCount++;

            var actualPageSegmentMode = pageSegMode.HasValue ? pageSegMode.Value : DefaultPageSegMode;
            Interop.TessApi.Native.BaseAPISetPageSegMode(handle, actualPageSegmentMode);
            Interop.TessApi.Native.BaseApiSetImage(handle, image.Handle);
            if(!String.IsNullOrEmpty(inputName)) {
                Interop.TessApi.Native.BaseApiSetInputName(handle, inputName);
            }
            var page = new Page(this, image, inputName, region, actualPageSegmentMode);
            page.Disposed += OnIteratorDisposed;
            return page;
        }
コード例 #27
0
ファイル: ResultIterator.cs プロジェクト: Picazsoo/tesseract
 internal ResultIterator(Page page, IntPtr handle)
     : base(page, handle)
 {
 }
コード例 #28
0
        /*
        int LookingTextMarker(RecognazeRule Rule, Page page, out ResultIterator BestLineIter, out int word)
        {
            word = -1;
            BestLineIter = null;
            int BestDistance = 10000;

            ResultIterator LineIter = page.GetIterator();
            string[] Words = Rule.TextMarker.Split(new char[] {' '}, StringSplitOptions.RemoveEmptyEntries);
            int NumberOfWords = Words.Length;
            LineIter.Begin();
            do
            {
                int CurrentWordNumber = -1;
                int CurrentBestDistance = 10000;
                string Line = LineIter.GetText(PageIteratorLevel.TextLine);
                if(Line == null)
                    continue;
                string[] WordsOfLine = Line.Split(new char[] {' '}, StringSplitOptions.None);
                if(WordsOfLine.Length < NumberOfWords)
                    continue;

                for(int shift = 0; shift <= WordsOfLine.Length - NumberOfWords; shift++)
                {
                    int PassDistance = 0;
                    for(int i = 0; i < NumberOfWords; i++)
                    {
                        PassDistance += FuzzyStringComparer.GetDistanceLevenshtein(WordsOfLine[shift + i],
                                                                                      Words[i],
                                                                                      StringComparison.CurrentCultureIgnoreCase);
                    }
                    if(PassDistance < CurrentBestDistance)
                    {
                        CurrentBestDistance = PassDistance;
                        CurrentWordNumber = shift + 1;
                    }
                }
                if(CurrentBestDistance < BestDistance)
                {
                    AddToLog ("new best");
                    AddToLog (LineIter.GetText(PageIteratorLevel.Word));
                    word = CurrentWordNumber;
                    if(BestLineIter != null)
                        BestLineIter.Dispose();
                    BestLineIter = LineIter.Clone();
                    AddToLog (BestLineIter.GetText(PageIteratorLevel.TextLine));
                    BestDistance = CurrentBestDistance;
                }
            } while( LineIter.Next(PageIteratorLevel.TextLine));
            LineIter.Dispose();
            return BestDistance;
        } */
        int GetTextPosition(string Text, Page page, out int PosX, out int PosY, out double AngleRad, RecognazeRule[] AfterMarkerRules)
        {
            int BestDistance = 10000;
            PosX = -1;
            PosY = -1;
            AngleRad = 0;
            logger.Debug("Marker zone text:{0}", page.GetText());
            ResultIterator LineIter = page.GetIterator();
            string[] Words = Text.Split(new char[] {' '}, StringSplitOptions.RemoveEmptyEntries);
            int NumberOfWords = Words.Length;
            LineIter.Begin();
            do
            {
                int CurrentWordNumber = -1;
                int CurrentAfterWord = 0;
                int CurrentBestDistance = 10000;
                string Line = LineIter.GetText(PageIteratorLevel.TextLine);

                if(Line == null || Line == "")
                    continue;
                Line = Line.Trim();
                string[] WordsOfLine = Line.Split(new char[] {' '}, StringSplitOptions.RemoveEmptyEntries);

                if(WordsOfLine.Length == 0)
                    continue;
                for(int shift = 0; shift < WordsOfLine.Length; shift++)
                {
                    for(int i = 1; i <= NumberOfWords && i <= WordsOfLine.Length - shift; i++)
                    {
                        string passString = String.Join(" ", WordsOfLine, shift, i);

                        int PassDistance = FuzzyStringComparer.GetDistanceLevenshtein(passString,
                            Text,
                            StringComparison.CurrentCultureIgnoreCase);
                        if(PassDistance < CurrentBestDistance)
                        {
                            CurrentBestDistance = PassDistance;
                            CurrentWordNumber = shift;
                            CurrentAfterWord = shift + i;
                        }
                    }
                }
                if(CurrentBestDistance < BestDistance)
                {
                    logger.Debug("new best");
                    logger.Debug(LineIter.GetText(PageIteratorLevel.TextLine).Trim());
                    //Заполняем поля данными после маркера.
                    foreach(RecognazeRule rule in AfterMarkerRules)
                    {
                        if(rule.NextAfterTextMarker && WordsOfLine.Length > CurrentAfterWord + rule.ShiftWordsCount)
                        {
                            rule.AfterTextMarkerValue = WordsOfLine[CurrentAfterWord + rule.ShiftWordsCount];
                        }
                    }

                    BestDistance = CurrentBestDistance;
                    for(int i = 0; i < CurrentWordNumber; i++)
                    {
                        LineIter.Next(PageIteratorLevel.Word);
                    }
                    Rect Box;
                    LineIter.TryGetBoundingBox(PageIteratorLevel.Word, out Box);
                    PosX = Box.X1;
                    PosY = Box.Y1;
                    logger.Debug("Position X1:{0} Y1:{1} X2:{2} Y2:{3}", Box.X1, Box.Y1, Box.X2, Box.Y2);
                    LineIter.TryGetBaseline(PageIteratorLevel.Word, out Box);
                    logger.Debug("BaseLine X1:{0} Y1:{1} X2:{2} Y2:{3}", Box.X1, Box.Y1, Box.X2, Box.Y2);
                    AngleRad = Math.Atan2(Box.Y2 - Box.Y1, Box.X2 - Box.X1); //угл наклона базовой линии.
                    double AngleGrad = AngleRad * (180/Math.PI);
                    logger.Debug("Angle rad:{0} grad:{1}", AngleRad, AngleGrad);
                }

            } while( LineIter.Next(PageIteratorLevel.TextLine));
            LineIter.Dispose();
            return BestDistance;
        }
コード例 #29
0
        protected void Unnamed_Click(object sender, EventArgs e)
        {
            if (!checkBoxOCR.Checked)
            {
                if (fileUpload.HasFile)
                {
                    if (Path.GetExtension(fileUpload.FileName) == ".pdf")
                    {
                        XmlDocument doc = new XmlDocument();
                        doc.Load(Server.MapPath("~/xml.xml"));
                        using (var rasterizer = new GhostscriptRasterizer())
                        {
                            rasterizer.Open(fileUpload.FileContent, gvi, true);
                            for (int i = 1; i <= 5; i++)
                            {
                                var            ocr     = new TesseractEngine(Server.MapPath("./tessdata"), "eng");
                                var            pdf2jpg = (System.Drawing.Image)rasterizer.GetPage(200, 200, i).Clone();
                                Tesseract.Page page    = ocr.Process((Bitmap)pdf2jpg);

                                List <Field> fields = new List <Field>();
                                switch (i)
                                {
                                case 1:
                                    fields = Constants.fields1;
                                    break;

                                case 2:
                                    fields = Constants.fields2;
                                    break;

                                case 3:
                                    fields = Constants.fields3;
                                    break;

                                case 4:
                                    fields = Constants.fields4;
                                    break;

                                case 5:
                                    fields = Constants.fields5;
                                    break;
                                }
                                foreach (Field field in fields)
                                {
                                    string txtRetrieved = "";
                                    if (field.isCross)
                                    {
                                        if (field.specialTreatment)
                                        {
                                            txtRetrieved = !Methods.IsBlank(Methods.CropImage(new Bitmap(pdf2jpg), field.rect.X1, field.rect.Y1, field.rect.Width, field.rect.Height)) ? field.value : "";
                                        }
                                        else
                                        {
                                            txtRetrieved = !Methods.IsBlank(Methods.CropImage(new Bitmap(pdf2jpg), field.rect.X1, field.rect.Y1, field.rect.Width, field.rect.Height)) ? "Y" : field.CrossNo ? "N" : "";
                                        }
                                    }
                                    else
                                    {
                                        page.RegionOfInterest = field.rect;
                                        txtRetrieved          = page.GetText();
                                    }
                                    doc.SelectSingleNode(field.xmlField).InnerText = txtRetrieved;
                                }
                            }
                            Response.Clear();
                            Response.AddHeader("Content-Disposition", "attachment;filename=MyXmlDocument.xml");
                            Response.AddHeader("Content-Length", doc.OuterXml.Length.ToString());
                            Response.ContentType = "application/octet-stream";
                            Response.Write(doc.OuterXml);
                            Response.End();
                        }
                    }
                }
            }
            else
            {
                if (page1Upload.HasFile && page2Upload.HasFile && page3Upload.HasFile && page4Upload.HasFile && page5Upload.HasFile)
                {
                    XmlDocument doc = new XmlDocument();
                    List <System.Drawing.Image> pages = new List <System.Drawing.Image>()
                    {
                        ResizeImage(System.Drawing.Image.FromStream(new MemoryStream(page1Upload.FileBytes)), 1700, 2200),
                        ResizeImage(System.Drawing.Image.FromStream(new MemoryStream(page2Upload.FileBytes)), 1700, 2200),
                        ResizeImage(System.Drawing.Image.FromStream(new MemoryStream(page3Upload.FileBytes)), 1700, 2200),
                        ResizeImage(System.Drawing.Image.FromStream(new MemoryStream(page4Upload.FileBytes)), 1700, 2200),
                        ResizeImage(System.Drawing.Image.FromStream(new MemoryStream(page5Upload.FileBytes)), 1700, 2200)
                    };

                    using (var rasterizer = new GhostscriptRasterizer())
                    {
                        doc.Load(Server.MapPath("~/xml.xml"));
                        List <Field> fields = new List <Field>();
                        for (int i = 0; i <= 4; i++)
                        {
                            using (var page = pages[i])
                            {
                                var            ocr     = new TesseractEngine(Server.MapPath("./tessdata"), "eng");
                                Tesseract.Page pageOCR = ocr.Process((Bitmap)page);

                                switch (i)
                                {
                                case 0:
                                    fields = Constants.fields1;
                                    break;

                                case 1:
                                    fields = Constants.fields2;
                                    break;

                                case 2:
                                    fields = Constants.fields3;
                                    break;

                                case 3:
                                    fields = Constants.fields4;
                                    break;

                                case 4:
                                    fields = Constants.fields5;
                                    break;
                                }
                                foreach (Field field in fields)
                                {
                                    string text = "";
                                    if (field.isCross)
                                    {
                                        if (field.specialTreatment)
                                        {
                                            text = !Methods.IsBlank(Methods.CropImage(new Bitmap(page), field.rect.X1, field.rect.Y1, field.rect.Width, field.rect.Height)) ? field.value : "";
                                        }
                                        else
                                        {
                                            text = !Methods.IsBlank(Methods.CropImage(new Bitmap(page), field.rect.X1, field.rect.Y1, field.rect.Width, field.rect.Height)) ? "Y" : field.CrossNo ? "N" : "";
                                        }
                                    }
                                    else
                                    {
                                        pageOCR.RegionOfInterest = field.rect;
                                        text = pageOCR.GetText();
                                    }
                                    doc.SelectSingleNode(field.xmlField).InnerText = text;
                                }
                            }
                        }
                        MemoryStream stream = new MemoryStream();
                        doc.Save(stream);
                        StreamReader reader = new StreamReader(stream);
                        Response.Clear();
                        Response.AddHeader("Content-Disposition", "attachment;filename=MyXmlDocument.xml");
                        Response.AddHeader("Content-Length", doc.OuterXml.Length.ToString());
                        Response.ContentType = "application/octet-stream";
                        Response.Write(reader.ReadToEnd());
                        Response.End();
                    }
                }
            }
        }
コード例 #30
0
			public PageDisposalHandle(Page page, Pix pix)
			{
				this.page = page;
				this.pix = pix;
				page.Disposed += OnPageDisposed;
			}