예제 #1
0
      private void PerformOCR(string fileName)
      {
         //Read the image from file
         Image<Gray, Byte> image = new Image<Gray, byte>(fileName);

         fileNameTextBox.Text = fileName;

         //Resize the image if it is too big, display it on the image box
         int width = Math.Min(image.Width, imageBox1.Width);
         int height = Math.Min(image.Height, imageBox1.Height);
         imageBox1.Image = image.Resize(width, height, true);

         //Perform OCR
         Tesseract ocr = new Tesseract();
         //You can download more language definition data from
         //http://code.google.com/p/tesseract-ocr/downloads/list
         //Languages supported includes:
         //Dutch, Spanish, German, Italian, French and English
         ocr.Init("eng", numericalOnlyCheckBox.Checked); 
         List<tessnet2.Word> result = ocr.DoOCR(image.Bitmap, Rectangle.Empty);

         //Obtain the texts from OCR result
         String[] texts = result.ConvertAll<String>(delegate(Word w) { return w.Text; }).ToArray();

         //Display the text in the text box
         textBox1.Text = String.Join(" ", texts);
      }
        public List<Word> GetTextFromImage( Bitmap image)
        {
            List<Word> tempResult;
            List<Word> result = new List<Word>();
            try
            {
                var ocr = new Tesseract();
                //ocr.SetVariable("tessedit_char_whitelist", "0123456789");
                ocr.SetVariable("tessedit_char_whitelist", "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789:<>"); 
                ocr.Init("tessdata", "eng", false);
                tempResult = ocr.DoOCR(image, Rectangle.Empty);
                
                // Weed out the bad match results.
                foreach (Word word in tempResult)
                {
                    if (word.Confidence < 160) {
                        result.Add(word);
                    }
                }
            }
            catch (Exception e)
            {
                throw new ClosedEyedVisuals_Exception("OCR error:" + e.ToString());
            }


            return result;
        }
예제 #3
0
        public void Return(string path)
        {
            Tesseract tesseract = new Tesseract();
            Bitmap bitmap = new Bitmap(path);
            Point start = new Point(0,0);
            Rectangle rect = new Rectangle(start,bitmap.Size);
            var output = tesseract.DoOCR(bitmap, rect);
            string text = string.Empty;
            foreach (Word a in output)
            {
               text += a.Text;

            }
            int b = 0;
        }
예제 #4
0
        private static string DoTesseract(Image input)
        {
            var bmp = new Bitmap(input, new Size(100, 44));
            var ocr = new Tesseract();
            //ocr.SetVariable("tessedit_char_blacklist", "0123456789+-");
            ocr.Init(null, "eng", false);

            var result = ocr.DoOCR(bmp, Rectangle.Empty);
            string ret = string.Empty;

            foreach (var item in result)
            {
                ret += item.Text + " ";
            }

            return ret.Trim();
        }
예제 #5
0
파일: clp.aspx.cs 프로젝트: teknet-dev/PR3
        //Fonction qui permet de detecter le nombre de caractères
        public int getText(Image imageToSplit)
        {
            string s     = "";
            var    image = new Bitmap(imageToSplit);
            var    ocr   = new Tesseract();

            ocr.SetVariable("load_system_dawg", false);
            ocr.SetVariable("load_freq_dawg", false);
            ocr.Init(Server.MapPath(@"\tessdata\"), "eng", false);
            var result   = ocr.DoOCR(image, Rectangle.Empty);
            int nbLettre = 0;

            foreach (tessnet2.Word word in result)
            {
                s += Text.Text;

                Text.Text = let;
            }

            mot = s;
            return(nbC = nbLettre);
        }
 static void Main(string[] args)
 {
     try
     {
         var image = new Bitmap(@"C:\OCRTest\saimon.jpg");
         var ocr   = new Tesseract();
         ocr.SetVariable("tessedit_char_whitelist", "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz.,$-/#&=()\"':?");             // If digit only
         //@"C:\OCRTest\tessdata" contains the language package, without this the method crash and app breaks
         ocr.Init(@"C:\OCRTest\tessdata", "eng", true);
         var result = ocr.DoOCR(image, Rectangle.Empty);
         foreach (Word word in result)
         {
             if (word.contains("aimon"))
             {
                 Console.WriteLine("" + word.Confidence + " " + word.Text + " " + word.Top + " " + word.Bottom + " " + word.Left + " " + word.Right);
             }
         }
         Console.ReadLine();
     }
     catch (Exception exception)
     {
     }
 }
예제 #7
0
        private void BtnOCR_Click(object sender, EventArgs args)
        {
            var ocr = new Tesseract();

            ocr.Init(@".\tessdata", "eng", false);

            using (OpenFileDialog fileDialog = new OpenFileDialog()
            {
                Filter = COMPATIBLE_FILETYPES
            })
            {
                if (fileDialog.ShowDialog() == DialogResult.OK)
                {
                    Bitmap img = (Bitmap)Image.FromFile(fileDialog.FileName);
                    ImgBox.Image = img;
                    var result = ocr.DoOCR(img, Rectangle.Empty);

                    foreach (var word in result)
                    {
                        MessageBox.Show(word.Text);
                    }
                }
            }
        }
예제 #8
0
        public static string genCaptcha()
        {
            HttpWebRequest request = (HttpWebRequest)WebRequest.Create("http://www.sunat.gob.pe/cl-ti-itmrconsruc/captcha?accion=image");

            ServicePointManager.SecurityProtocol = SecurityProtocolType.Ssl3;
            request.CookieContainer = cokkie;
            HttpWebResponse response       = (HttpWebResponse)request.GetResponse();
            Stream          responseStream = response.GetResponseStream();

            var image = new Bitmap(responseStream);
            var ocr   = new Tesseract();
            //ocr.Init(@"C:\Users\Miguel\Desktop\SUNAT\PRUEBA\Content\tessdata", "eng", false);
            var path = HostingEnvironment.MapPath(@"~/Componentes/tessdata");

            ocr.Init(path, "eng", false);

            var result = ocr.DoOCR(image, Rectangle.Empty);

            foreach (Word word in result)
            {
                captcha = word.Text;
            }
            return(captcha);
        }
        private static void CreateJson()
        {
            var files = new DirectoryInfo(@"C:\WindowsServiceInput\").GetFiles();

            foreach (var file in files)
            {
                if (!File.Exists(@"C:\WindowsServiceOutput\" + file.Name))
                {
                    var ocr   = new Tesseract();
                    var image = Image.FromFile(file.FullName);
                    ocr.Init(@"..\..\Content\tessdata", "eng", false);
                    var           result = ocr.DoOCR((Bitmap)image, Rectangle.Empty);
                    List <string> data   = new List <string>();
                    foreach (Word word in result)
                    {
                        data.Add(word.Text);
                    }

                    string json = JsonConvert.SerializeObject(data.ToArray());
                    System.IO.File.WriteAllText(
                        @"C:\WindowsServiceOutput\" + Path.GetFileNameWithoutExtension(file.Name) + ".json", json);
                }
            }
        }
예제 #10
0
 public List<Word> GetWordFromImage(Bitmap bitmap)
 {
     Tesseract ocr = new Tesseract();
         ocr.SetVariable("tessedit_char_whitelist", alphabetnum);
         ocr.Init(null, "eng", false);
         List<tessnet2.Word> result = ocr.DoOCR(bitmap, Rectangle.Empty);
         return result;
 }
예제 #11
0
파일: Form1.cs 프로젝트: Olavz/byepass
        private void ProcessImageOCR(Bitmap b, Rectangle rect, int retry)
        {
            Log("Start processing password from image. (OCR)");
            var ocr = new Tesseract();
            ocr.SetVariable("tessedit_char_whitelist", "0123456789");

            ocr.Init(@tessResFolder, "eng", true);
            var result = ocr.DoOCR(b, Rectangle.Empty);
            foreach (Word word in result)
            {
                Log("OCR returned: " + word.Text);
                if(word.Text.Length >= 5)
                {
                    Clipboard.SetText(word.Text);
                    Log("Password copied to clipboard.");
                } else
                {
                    Thread.Sleep(1000);
                    Log("OCR not accepted. Retrying... " + retry);
                    CaptureScreen(rect, retry - 1);
                }

            }

            if(isFastRun)
            {
                // Close the application as we saved results to clipboard.
                Application.Exit();
            }
        }
예제 #12
0
        public static WordList DoOCR(string pdf_filename, int page_number)
        {
            Logging.Info("+Rendering page");
            SoraxPDFRenderer renderer = new SoraxPDFRenderer(pdf_filename, pdf_user_password, pdf_user_password);
            Bitmap           bitmap   = (Bitmap)Image.FromStream(new MemoryStream(renderer.GetPageByDPIAsImage(page_number, 200)));

            Logging.Info("-Rendering page");

            Logging.Info("Startup directory is {0}", Environment.CurrentDirectory);
            Logging.Info("Language is '{0}'", language);

            Tesseract ocr = new Tesseract();

            ocr.Init(null, language, false);

            Logging.Info("+Doing OCR");

            // Build a list of all the rectangles to process
            PDFRegionLocator pdf_region_locator = new PDFRegionLocator(bitmap);

            PDFRegionLocator.Region last_region = pdf_region_locator.regions[0];
            List <Rectangle>        rectangles  = new List <Rectangle>();

            foreach (PDFRegionLocator.Region region in pdf_region_locator.regions)
            {
                if (false)
                {
                }
                else if (last_region.state == PDFRegionLocator.SegmentState.BLANKS)
                {
                    // LHS
                    {
                        Rectangle rectangle = new Rectangle(0, last_region.y, bitmap.Width / 2, region.y - last_region.y);
                        rectangles.Add(rectangle);
                    }
                    // RHS
                    {
                        Rectangle rectangle = new Rectangle(bitmap.Width / 2, last_region.y, bitmap.Width / 2, region.y - last_region.y);
                        rectangles.Add(rectangle);
                    }
                }
                else if (last_region.state == PDFRegionLocator.SegmentState.PIXELS)
                {
                    // Full column
                    {
                        Rectangle rectangle = new Rectangle(0, last_region.y, bitmap.Width, region.y - last_region.y);
                        rectangles.Add(rectangle);
                    }
                }

                last_region = region;
            }

            // DEBUG CODE: Draw in the region rectangles
            //{
            //    Graphics g = Graphics.FromImage(bitmap);
            //    foreach (Rectangle rectangle in rectangles)
            //    {
            //        g.DrawRectangle(Pens.Black, rectangle);
            //    }

            //    bitmap.Save(@"C:\temp\aaaaaa.png", ImageFormat.Png);
            //}

            // Do the OCR on each of the rectangles
            WordList word_list = new WordList();

            foreach (Rectangle rectangle in rectangles)
            {
                if (0 == rectangle.Width || 0 == rectangle.Height)
                {
                    Logging.Info("Skipping zero extent rectangle {0}", rectangle.ToString());
                    continue;
                }

                Logging.Info("Doing OCR for region {0}", rectangle.ToString());
                List <Word> result = ocr.DoOCR(bitmap, rectangle);
                Logging.Info("Got {0} words", result.Count);
                word_list.AddRange(ConvertToWordList(result, rectangle, bitmap));
            }

            Logging.Info("-Doing OCR");


            Logging.Info("Found {0} words", word_list.Count);

            //Logging.Info("+Reordering words for columns");
            //WordList word_list_ordered = ColumnWordOrderer.ReorderWords(word_list);
            //Logging.Info("-Reordering words for columns");
            //word_list_ordered.WriteToFile(ocr_output_filename);

            return(word_list);
        }
예제 #13
0
        /// <summary>
        /// Main Thread for processing one ad in AdList at Pos index value
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void mainThread_DoWork(object sender, DoWorkEventArgs e)
        {
            var worker = sender as BackgroundWorker;

            Random rnd = new Random();
            var delay = rnd.Next(5000, 10000);
            doMessage(flag.SendText, "Random Delay: " + delay.ToString() + " ms");
            Sem.WaitOne(delay);

            doMessage(flag.SendText, "Loading page...");
            doMessage(flag.ReportProgress, 10);

            while (GetState())
            {
                if (worker.CancellationPending == true)
                {
                    sendStopText();
                    e.Cancel = true;
                    return;
                }

                Application.DoEvents();
                Thread.Sleep(20);
            };

            doMessage(flag.ReportProgress, 50);
            doMessage(flag.SendText, "Waiting " + adList[Pos].Time.ToString() + " ms");
            Sem.WaitOne(adList[Pos].Time);

            if (worker.CancellationPending == true)
            {
                sendStopText();
                e.Cancel = true;
                return;
            }

            doMessage(flag.ReportProgress, 60);
            doMessage(flag.SendText, "Waiting for capcha");

            Bitmap pic = new Bitmap(1, 1);
            Stopwatch stopwatch = new Stopwatch();

            //Starting Timeout watch...
            stopwatch.Start();

            //Starting waiting loop
            Start:

            if (worker.CancellationPending == true)
            {
                sendStopText();
                e.Cancel = true;
                return;
            }

            string html = GetDocumentText();

            //Flash captcha, that we can't solve.
            if (html.Contains("ACPuzzleUtil.callbacks"))
            {
                doMessage(flag.ReportProgress, 0);
                doMessage(flag.SendText, "Bad Captcha!");
                //adList[Pos].Clicked = false;
                adList[Pos].resInfo.AdResult = AdResult.FlashCapcha;
                adList[Pos].resInfo.Stat = AdStatus.Failture;
                doMessage(flag.UpdStatus, string.Empty);
                return;
            }

            //Yiss! We got captcha image.
            if (html.Contains("captcha?c=view"))
            {
                byte[] captchaBytes = Convert.FromBase64String(GetBitmap());
                MemoryStream ms = new MemoryStream(captchaBytes);
                pic = Utils.cropImage(Image.FromStream(ms), new Rectangle(0, 0, 90, 40));
            }
            else
            {
                //Continue waiting for captcha
                Application.DoEvents();

                Sem.WaitOne(1000);

                //Checking Timeout...
                if (stopwatch.ElapsedMilliseconds >= waitTimeout)
                {
                    stopwatch.Stop();
                    doMessage(flag.SendText, "timeout!");
                    doMessage(flag.ReportProgress, 0);
                    //adList[Pos].Clicked = false;
                    adList[Pos].resInfo.AdResult = AdResult.Timeout;
                    adList[Pos].resInfo.Stat = AdStatus.Failture;
                    doMessage(flag.UpdStatus, string.Empty);
                    return;
                }
                else
                    goto Start;

            }

            stopwatch.Stop();

            if (worker.CancellationPending == true)
            {
                sendStopText();
                e.Cancel = true;
                return;
            }

            doMessage(flag.ReportProgress, 80);

            //Solving captcha

            var ocr = new Tesseract();
            //Digits only
            ocr.SetVariable("tessedit_char_whitelist", "0123456789+-");
            ocr.Init(null, "eng", false);
               // adList[Pos].resInfo.CapImg = ocr.GetThresholdedImage(pic, Rectangle.Empty);
            adList[Pos].resInfo.CapImg = pic;

            var solved = ocr.DoOCR(Utils.MakeGrayscale(pic), Rectangle.Empty)[0].Text;

            //C'mon, Do Math!
            var dt = new DataTable();
            string result = string.Empty;

            try
            {
                result = dt.Compute(solved, null).ToString();
            }
            catch (Exception)
            {
                Utils.AddtoLog("Unsolved: " + solved);
            }
            finally
            {
                dt.Dispose();
            }

            adList[Pos].resInfo.CapStr = solved + " = " + result;

            if (worker.CancellationPending == true)
            {
                sendStopText();
                e.Cancel = true;
                return;
            }

            //Sending Answer
            doMessage(flag.SendText, "Sending answer");
            doMessage(flag.ReportProgress, 90);

            var pozzz = html.IndexOf("var h = '") + 9;
            string longJonson = html.Substring(pozzz, 128);

            cookie = GetCookieContainer(GetCook());

            string res = SendPost(string.Format(vEndReq, longJonson, result, jsKey), vEndUrl, adList[Pos].Link);

            doMessage(flag.ReportProgress, 100);

            if (res == "1")
            {
                //adList[Pos].Clicked = true;
                adList[Pos].resInfo.AdResult = AdResult.Solved;
                adList[Pos].resInfo.Stat = AdStatus.Success;

            }
            else
            {
                adList[Pos].resInfo.AdResult = AdResult.NotSolved;
                adList[Pos].resInfo.Stat = AdStatus.Failture;
            }
            doMessage(flag.UpdStatus, string.Empty);
        }
예제 #14
0
        private static async Task <SunatDTO> ConsultaSunat(string ruc)
        {
            SunatDTO resultado = new SunatDTO();

            try
            {
                if (httpClient.BaseAddress == null)
                {
                    httpClient.BaseAddress = new Uri("http://www.sunat.gob.pe/");
                }

                #region Captcha

                HttpWebRequest httpWebRequest = (HttpWebRequest)WebRequest.Create("http://www.sunat.gob.pe/cl-ti-itmrconsruc/captcha?accion=image");
                ServicePointManager.SecurityProtocol = SecurityProtocolType.Ssl3;
                httpWebRequest.CookieContainer       = _cookies;
                Bitmap    bitmap    = new Bitmap(httpWebRequest.GetResponse().GetResponseStream());
                Tesseract tesseract = new Tesseract();
                string    str       = Path.Combine(Environment.CurrentDirectory, "Content/tessdata");
                if (!Directory.Exists(str))
                {
                    Directory.CreateDirectory(str);
                }
                tesseract.Init(str, "eng", false);
                foreach (Word word in tesseract.DoOCR(bitmap, Rectangle.Empty))
                {
                    resultado.captcha = word.Text;
                }

                #endregion

                //Consulta Sunat
                string rawResponseAsync = await getRawResponseAsync(string.Format("http://www.sunat.gob.pe/cl-ti-itmrconsruc/jcrS00Alias?accion=consPorRuc&nroRuc={0}&codigo={1}&tipdoc=1", ruc, resultado.captcha));

                #region Documento

                HtmlDocument htmlDocument = new HtmlDocument();
                htmlDocument.LoadHtml(rawResponseAsync.Trim());
                if (htmlDocument.DocumentNode.SelectNodes("//html[1]/head[1]/title[1]").FirstOrDefault <HtmlNode>().InnerText.Equals("Consulta RUC"))
                {
                    HtmlNode htmlNode = htmlDocument.DocumentNode.SelectNodes("//html[1]/body[1]/table[1]").FirstOrDefault();
                    if (htmlNode != null)
                    {
                        HtmlNodeCollection htmlNodeCollection1 = htmlNode.SelectNodes("tr");

                        #region Recorre respuesta y rellena propiedades

                        resultado.contribuyente = new Contribuyente();
                        foreach (HtmlNode htmlNode1 in htmlNodeCollection1)
                        {
                            HtmlNodeCollection htmlNodeCollection2 = htmlNode1.SelectNodes("td");
                            if (htmlNodeCollection2[0].InnerHtml.Contains("Estado del"))
                            {
                                resultado.contribuyente.Estado = beautifulString(htmlNodeCollection2[1].InnerText);
                            }
                            if (htmlNodeCollection2.Count == 2)
                            {
                                if (htmlNodeCollection2[0].InnerHtml.Contains("RUC:"))
                                {
                                    string[] strArray = htmlNodeCollection2[1].InnerText.Split('-');
                                    resultado.contribuyente.Ruc         = beautifulString(strArray[0]);
                                    resultado.contribuyente.RazonSocial = beautifulString(strArray[1].TrimStart());
                                }
                                if (htmlNodeCollection2[0].InnerHtml.Contains("Tipo Contribuyente:"))
                                {
                                    resultado.contribuyente.Tipo = htmlNodeCollection2[1].InnerText;
                                }
                                if (htmlNodeCollection2[0].InnerHtml.Contains("Nombre Comercial:"))
                                {
                                    resultado.contribuyente.NombreComercial = beautifulString(htmlNodeCollection2[1].InnerHtml);
                                }
                                if (htmlNodeCollection2[0].InnerHtml.Contains("Direcci&oacute;n del Domicilio Fiscal"))
                                {
                                    string[] strArray = htmlNodeCollection2[1].InnerText.Split('-');
                                    Regex    regex    = new Regex("  ", RegexOptions.RightToLeft);
                                    if (strArray.Length == 3)
                                    {
                                        int index = regex.Match(beautifulString(strArray[0])).Index;
                                        resultado.contribuyente.Direccion    = beautifulString(strArray[0].Substring(0, index));
                                        resultado.contribuyente.Departamento = beautifulString(strArray[0].Substring(index));
                                    }
                                    if (strArray.Length > 3)
                                    {
                                        string empty = string.Empty;
                                        for (int index = 0; index < strArray.Length - 2; ++index)
                                        {
                                            empty += strArray[index];
                                        }
                                        int index1 = regex.Match(beautifulString(empty)).Index;
                                        if (index1 != 0)
                                        {
                                            resultado.contribuyente.Direccion    = beautifulString(empty.Substring(0, index1));
                                            resultado.contribuyente.Departamento = beautifulString(empty.Substring(index1));
                                        }
                                        else
                                        {
                                            resultado.contribuyente.Direccion    = beautifulString(empty);
                                            resultado.contribuyente.Departamento = "-";
                                        }
                                    }
                                    resultado.contribuyente.Provincia = beautifulString(strArray[strArray.Length - 2]);
                                    resultado.contribuyente.Distrito  = beautifulString(strArray[strArray.Length - 1]);
                                }
                                if (htmlNodeCollection2[0].InnerHtml.Contains("Sistema de Contabilidad:"))
                                {
                                    resultado.contribuyente.SisContabilidad = beautifulString(htmlNodeCollection2[1].InnerText);
                                }
                                if (htmlNodeCollection2[0].InnerHtml.Contains("Emisor electr&oacute;nico desde:"))
                                {
                                    resultado.contribuyente.EmisorElectronicoDesde = beautifulString(htmlNodeCollection2[1].InnerText);
                                }
                                if (htmlNodeCollection2[0].InnerHtml.Contains("Comprobantes Electr&oacute;nicos"))
                                {
                                    string[] strArray = htmlNodeCollection2[1].InnerText.Split(',');
                                    resultado.contribuyente.ComprobantesElectronicos = ((IEnumerable <string>)strArray).ToList <string>();
                                }
                                if (htmlNodeCollection2[0].InnerHtml.Contains("Afiliado al PLE desde:"))
                                {
                                    resultado.contribuyente.AfiliadoPLEDesde = beautifulString(htmlNodeCollection2[1].InnerText);
                                }
                                if (htmlNodeCollection2[0].InnerHtml.Contains("Actividad(es) Econ&oacute;mica(s):"))
                                {
                                    HtmlNodeCollection htmlNodeCollection3 = htmlNodeCollection2[1].SelectNodes("select/option");
                                    if (htmlNodeCollection3 != null)
                                    {
                                        foreach (HtmlNode htmlNode2 in (IEnumerable <HtmlNode>)htmlNodeCollection3)
                                        {
                                            resultado.contribuyente.ActividadesEconomicas.Add(beautifulString(htmlNode2.InnerText));
                                        }
                                    }
                                }
                                if (htmlNodeCollection2[0].InnerHtml.Contains("Sistema de Emision Electronica:"))
                                {
                                    HtmlNodeCollection htmlNodeCollection3 = htmlNodeCollection2[1].SelectNodes("select/option");
                                    if (htmlNodeCollection3 != null)
                                    {
                                        foreach (HtmlNode htmlNode2 in (IEnumerable <HtmlNode>)htmlNodeCollection3)
                                        {
                                            resultado.contribuyente.SisEmisionElectronica.Add(beautifulString(htmlNode2.InnerText));
                                        }
                                    }
                                }
                                if (htmlNodeCollection2[0].InnerHtml.Contains("Padrones"))
                                {
                                    HtmlNodeCollection htmlNodeCollection3 = htmlNodeCollection2[1].SelectNodes("select/option");
                                    if (htmlNodeCollection3 != null)
                                    {
                                        foreach (HtmlNode htmlNode2 in (IEnumerable <HtmlNode>)htmlNodeCollection3)
                                        {
                                            resultado.contribuyente.Padrones.Add(beautifulString(htmlNode2.InnerText));
                                        }
                                    }
                                }
                                if (htmlNodeCollection2[0].InnerHtml.Contains("Condici&oacute;n del Contribuyente:"))
                                {
                                    resultado.contribuyente.Condicion = beautifulString(htmlNodeCollection2[1].InnerText);
                                }
                                if (htmlNodeCollection2[0].InnerHtml.Contains("Comprobantes de Pago c/aut."))
                                {
                                    HtmlNodeCollection htmlNodeCollection3 = htmlNodeCollection2[1].SelectNodes("select/option");
                                    if (htmlNodeCollection3 != null)
                                    {
                                        foreach (HtmlNode htmlNode2 in (IEnumerable <HtmlNode>)htmlNodeCollection3)
                                        {
                                            resultado.contribuyente.ComprobantesPagoAutImpresion.Add(beautifulString(htmlNode2.InnerText));
                                        }
                                    }
                                }
                            }
                            if (htmlNodeCollection2.Count == 3 && htmlNodeCollection2[0].InnerHtml.Contains("Comprobantes de Pago c/aut."))
                            {
                                HtmlNodeCollection htmlNodeCollection3 = htmlNodeCollection2[1].SelectNodes("select/option");
                                if (htmlNodeCollection3 != null)
                                {
                                    foreach (HtmlNode htmlNode2 in (IEnumerable <HtmlNode>)htmlNodeCollection3)
                                    {
                                        resultado.contribuyente.ComprobantesPagoAutImpresion.Add(beautifulString(htmlNode2.InnerText));
                                    }
                                }
                                resultado.contribuyente.ObligadoEmitirCPE = beautifulString(htmlNodeCollection2[2].InnerText.Substring(22));
                            }
                            if (htmlNodeCollection2.Count == 4)
                            {
                                if (htmlNodeCollection2[0].InnerHtml.Contains("Fecha de Inscripci&oacute;n:"))
                                {
                                    resultado.contribuyente.FechaInscripcion       = beautifulString(htmlNodeCollection2[1].InnerText);
                                    resultado.contribuyente.FechaInicioActividades = beautifulString(htmlNodeCollection2[3].InnerText);
                                }
                                if (htmlNodeCollection2[0].InnerHtml.Contains("Condici&oacute;n del Contribuyente:"))
                                {
                                    resultado.contribuyente.Condicion       = beautifulString(htmlNodeCollection2[1].InnerText);
                                    resultado.contribuyente.ProfesionOficio = beautifulString(htmlNodeCollection2[3].InnerText);
                                }
                                if (htmlNodeCollection2[0].InnerHtml.Contains("Sistema de Emisi&oacute;n de Comprobante:"))
                                {
                                    resultado.contribuyente.SisEmisionComprobante = beautifulString(htmlNodeCollection2[1].InnerText);
                                    resultado.contribuyente.ActComercioExterior   = beautifulString(htmlNodeCollection2[3].InnerText);
                                }
                            }
                        }

                        resultado.status  = 1;
                        resultado.mensaje = "Se ha encontrado contribuyente!";
                        return(resultado);

                        #endregion
                    }
                }

                #endregion

                return(resultado);
            }
            catch (Exception ex)
            {
                resultado.status        = 0;
                resultado.contribuyente = null;
                resultado.mensaje       = ex.Message;
                return(resultado);
            }
        }
        public async Task <Contribuyente> ConsultaRuc(string ruc)
        {
            Contribuyente contribuyente = new Contribuyente();

            try
            {
                ServicePointManager.DefaultConnectionLimit = 2;

                // Descarga la imagen captcha
                HttpResponseMessage responseMessage = await httpClient.GetAsync($"cl-ti-itmrconsruc/captcha?accion=image");

                if (responseMessage.IsSuccessStatusCode)
                {
                    Stream responseStream = await responseMessage.Content.ReadAsStreamAsync();;
                    var    image          = new Bitmap(responseStream);
                    var    ocr            = new Tesseract();

                    // Indicamos la ruta de la libreria
                    ocr.Init(rutaTessData, "eng", false);

                    // Convertir la imagen a texto plano
                    var result = ocr.DoOCR(image, Rectangle.Empty);
                    foreach (Word word in result)
                    {
                        captcha += word.Text;
                    }
                }
                else
                {
                    return(null);
                }

                // Consulta el RUC enviando el codigo captcha
                var ConsultaRuc = await httpClient.GetAsync($"cl-ti-itmrconsruc/jcrS03Alias?accion=consPorRuc&razSoc=&nroRuc={ruc}&nrodoc=&contexto=ti-it&tQuery=on&search1={ruc}&tipdoc=1&search2=&coddpto=&codprov=&coddist=&search3=&codigo={captcha.Trim().ToUpper()}&tipodocumento=1");

                // Si la consulta es exitosa
                if (ConsultaRuc.IsSuccessStatusCode)
                {
                    string msg = string.Empty;

                    // Libreria que permite trabajar con etiquetas HTML
                    HtmlAgilityPack.HtmlDocument document = new HtmlAgilityPack.HtmlDocument();

                    // Carga el contenido html de la consulta
                    document.LoadHtml(await ConsultaRuc.Content.ReadAsStringAsync());
                    var NodeTable = document.DocumentNode
                                    .SelectNodes("//table")
                                    .FirstOrDefault();
                    if (NodeTable != null)
                    {
                        var listNodeTr = NodeTable.Elements("tr").ToArray();
                        if (listNodeTr != null)
                        {
                            // Extrae los valores de las celdas de la tabla.
                            var nodeRazonSocial = listNodeTr[1].Elements("td").ToArray();
                            if (nodeRazonSocial != null)
                            {
                                string ConsultaCliente = LimpiarEspacios(nodeRazonSocial[1].InnerHtml.Trim());
                                contribuyente.RUC         = ConsultaCliente.Substring(0, 11).Trim();
                                contribuyente.RazonSocial = ConsultaCliente.Substring(13, ConsultaCliente.Length - 13).Trim();
                            }
                            var nodeDireccion = listNodeTr[7].Elements("td").ToArray();
                            if (ruc.StartsWith("10"))
                            {
                                nodeDireccion = listNodeTr[8].Elements("td").ToArray();
                            }
                            if (nodeDireccion != null)
                            {
                                string ConsultaDireccion = LimpiarEspacios(nodeDireccion[1].InnerHtml.Trim());
                                contribuyente.Direccion = ConsultaDireccion.Trim();
                            }
                        }
                    }
                }
            }
            catch (Exception ex)
            {
            }
            return(await Task.Run(() => contribuyente));
        }
예제 #16
0
        /// <summary>
        /// Main Thread for processing one ad in AdList at Pos index value
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void mainThread_DoWork(object sender, DoWorkEventArgs e)
        {
            var worker = sender as BackgroundWorker;

            Random rnd   = new Random();
            var    delay = rnd.Next(5000, 10000);

            doMessage(flag.SendText, "Random Delay: " + delay.ToString() + " ms");
            Sem.WaitOne(delay);


            doMessage(flag.SendText, "Loading page...");
            doMessage(flag.ReportProgress, 10);

            while (GetState())
            {
                if (worker.CancellationPending == true)
                {
                    sendStopText();
                    e.Cancel = true;
                    return;
                }

                Application.DoEvents();
                Thread.Sleep(20);
            }
            ;


            doMessage(flag.ReportProgress, 50);
            doMessage(flag.SendText, "Waiting " + adList[Pos].Time.ToString() + " ms");
            Sem.WaitOne(adList[Pos].Time);

            if (worker.CancellationPending == true)
            {
                sendStopText();
                e.Cancel = true;
                return;
            }

            doMessage(flag.ReportProgress, 60);
            doMessage(flag.SendText, "Waiting for capcha");

            Bitmap    pic       = new Bitmap(1, 1);
            Stopwatch stopwatch = new Stopwatch();

            //Starting Timeout watch...
            stopwatch.Start();


            //Starting waiting loop
Start:

            if (worker.CancellationPending == true)
            {
                sendStopText();
                e.Cancel = true;
                return;
            }

            string html = GetDocumentText();

            //Flash captcha, that we can't solve.
            if (html.Contains("ACPuzzleUtil.callbacks"))
            {
                doMessage(flag.ReportProgress, 0);
                doMessage(flag.SendText, "Bad Captcha!");
                //adList[Pos].Clicked = false;
                adList[Pos].resInfo.AdResult = AdResult.FlashCapcha;
                adList[Pos].resInfo.Stat     = AdStatus.Failture;
                doMessage(flag.UpdStatus, string.Empty);
                return;
            }

            //Yiss! We got captcha image.
            if (html.Contains("captcha?c=view"))
            {
                byte[]       captchaBytes = Convert.FromBase64String(GetBitmap());
                MemoryStream ms           = new MemoryStream(captchaBytes);
                pic = Utils.cropImage(Image.FromStream(ms), new Rectangle(0, 0, 90, 40));
            }
            else
            {
                //Continue waiting for captcha
                Application.DoEvents();

                Sem.WaitOne(1000);

                //Checking Timeout...
                if (stopwatch.ElapsedMilliseconds >= waitTimeout)
                {
                    stopwatch.Stop();
                    doMessage(flag.SendText, "timeout!");
                    doMessage(flag.ReportProgress, 0);
                    //adList[Pos].Clicked = false;
                    adList[Pos].resInfo.AdResult = AdResult.Timeout;
                    adList[Pos].resInfo.Stat     = AdStatus.Failture;
                    doMessage(flag.UpdStatus, string.Empty);
                    return;
                }
                else
                {
                    goto Start;
                }
            }

            stopwatch.Stop();


            if (worker.CancellationPending == true)
            {
                sendStopText();
                e.Cancel = true;
                return;
            }


            doMessage(flag.ReportProgress, 80);

            //Solving captcha

            var ocr = new Tesseract();

            //Digits only
            ocr.SetVariable("tessedit_char_whitelist", "0123456789+-");
            ocr.Init(null, "eng", false);
            // adList[Pos].resInfo.CapImg = ocr.GetThresholdedImage(pic, Rectangle.Empty);
            adList[Pos].resInfo.CapImg = pic;

            var solved = ocr.DoOCR(Utils.MakeGrayscale(pic), Rectangle.Empty)[0].Text;

            //C'mon, Do Math!
            var    dt     = new DataTable();
            string result = string.Empty;

            try
            {
                result = dt.Compute(solved, null).ToString();
            }
            catch (Exception)
            {
                Utils.AddtoLog("Unsolved: " + solved);
            }
            finally
            {
                dt.Dispose();
            }

            adList[Pos].resInfo.CapStr = solved + " = " + result;

            if (worker.CancellationPending == true)
            {
                sendStopText();
                e.Cancel = true;
                return;
            }

            //Sending Answer
            doMessage(flag.SendText, "Sending answer");
            doMessage(flag.ReportProgress, 90);

            var    pozzz      = html.IndexOf("var h = '") + 9;
            string longJonson = html.Substring(pozzz, 128);

            cookie = GetCookieContainer(GetCook());

            string res = SendPost(string.Format(vEndReq, longJonson, result, jsKey), vEndUrl, adList[Pos].Link);

            doMessage(flag.ReportProgress, 100);

            if (res == "1")
            {
                //adList[Pos].Clicked = true;
                adList[Pos].resInfo.AdResult = AdResult.Solved;
                adList[Pos].resInfo.Stat     = AdStatus.Success;
            }
            else
            {
                adList[Pos].resInfo.AdResult = AdResult.NotSolved;
                adList[Pos].resInfo.Stat     = AdStatus.Failture;
            }
            doMessage(flag.UpdStatus, string.Empty);
        }
예제 #17
0
        public static WordList DoOCR(string pdf_filename, int page_number)
        {
            Logging.Info("+Rendering page {1} for PDF file {0}", pdf_filename, page_number);
            using (MemoryStream ms = new MemoryStream(SoraxPDFRenderer.GetPageByDPIAsImage(pdf_filename, pdf_user_password, page_number, 200)))
            {
                Bitmap bitmap = (Bitmap)Image.FromStream(ms);

                Logging.Info("-Rendering page #{0}", page_number);

                Logging.Info("Startup directory is {0}", Environment.CurrentDirectory);
                Logging.Info("Language is '{0}'", language);

                using (Tesseract ocr = new Tesseract())
                {
                    ocr.Init(null, language, false);

                    Logging.Info("+Doing OCR");

                    const int MIN_WIDTH = 0;

                    // Build a list of all the rectangles to process
                    PDFRegionLocator        pdf_region_locator = new PDFRegionLocator(bitmap);
                    PDFRegionLocator.Region last_region        = pdf_region_locator.regions[0];
                    List <Rectangle>        rectangles         = new List <Rectangle>();
                    Rectangle last_rectangle = new Rectangle();
                    foreach (PDFRegionLocator.Region region in pdf_region_locator.regions)
                    {
                        int  rect_height     = region.y - last_region.y;
                        bool alarming_height = (rect_height <= 0);

                        Rectangle rectangle = new Rectangle();

                        if (last_region.state == PDFRegionLocator.SegmentState.BLANKS)
                        {
                            // LHS
                            {
                                rectangle = new Rectangle(0, last_region.y, bitmap.Width / 2, Math.Max(MIN_WIDTH, rect_height));
                            }
                            // RHS
                            {
                                rectangle = new Rectangle(bitmap.Width / 2, last_region.y, bitmap.Width / 2, Math.Max(MIN_WIDTH, rect_height));
                            }
                        }
                        else if (last_region.state == PDFRegionLocator.SegmentState.PIXELS)
                        {
                            // Full column
                            {
                                rectangle = new Rectangle(0, last_region.y, bitmap.Width, Math.Max(MIN_WIDTH, rect_height));
                            }
                        }

                        if (alarming_height || rectangle.Height <= 0)
                        {
                            Logging.Warn("Calculated region height is negative or zero: {0} :: Calculated region {1} <-- CURRENT:{2} - LAST:{3}", rect_height, rectangle, region, last_region);

                            // skip rectangle
                        }
                        else if (last_rectangle.X == rectangle.X && last_rectangle.Y == rectangle.Y)
                        {
                            Logging.Warn("Overlapping subsequent rectangles will be merged :: CURRENT:{0} - LAST:{1}", rectangle, last_rectangle);
                            last_rectangle.Width  = Math.Max(last_rectangle.Width, rectangle.Width);
                            last_rectangle.Height = Math.Max(last_rectangle.Height, rectangle.Height);
                            Logging.Warn("--> Updated 'last' rectangle:{0}", last_rectangle);
                        }
                        else
                        {
                            rectangles.Add(rectangle);
                            last_rectangle = rectangle;
                        }

                        last_region = region;
                    }

                    // DEBUG CODE: Draw in the region rectangles
                    //
                    // When we run in NOKILL mode, we "know" we're running in a debugger or stand-alone environment
                    // intended for testing this code. Hence we should dump the regions image as part of the process.
                    if (no_kill)
                    {
                        string bitmap_diag_path = pdf_filename + @"." + page_number + @"-ocr.png";

                        Logging.Info("Dumping regions-augmented page {0} PNG image to file {1}", page_number, bitmap_diag_path);
                        Graphics g = Graphics.FromImage(bitmap);
                        foreach (Rectangle rectangle in rectangles)
                        {
                            if (rectangle.Width <= MIN_WIDTH && rectangle.Height > MIN_WIDTH)
                            {
                                DrawRectangleOutline(g, Pens.Purple, rectangle);
                            }
                            else if (rectangle.Width > MIN_WIDTH && rectangle.Height <= MIN_WIDTH)
                            {
                                DrawRectangleOutline(g, Pens.PowderBlue, rectangle);
                            }
                            else if (rectangle.Width <= MIN_WIDTH && rectangle.Height <= MIN_WIDTH)
                            {
                                DrawRectangleOutline(g, Pens.Red, rectangle);
                            }
                            else
                            {
                                DrawRectangleOutline(g, Pens.LawnGreen, rectangle);
                            }
                        }

                        bitmap.Save(bitmap_diag_path, ImageFormat.Png);
                    }

                    // Do the OCR on each of the rectangles
                    WordList word_list = new WordList();
                    foreach (Rectangle rectangle in rectangles)
                    {
                        if (0 == rectangle.Width || 0 == rectangle.Height)
                        {
                            Logging.Info("Skipping zero extent rectangle {0}", rectangle.ToString());
                            continue;
                        }

                        Logging.Info("Doing OCR for region {0} on bitmap WxH: {1}x{2}", rectangle.ToString(), bitmap.Width, bitmap.Height);
                        List <Word> result = ocr.DoOCR(bitmap, rectangle);
                        Logging.Info("Got {0} words", result.Count);
                        word_list.AddRange(ConvertToWordList(result, rectangle, bitmap));
                    }

                    Logging.Info("-Doing OCR");


                    Logging.Info("Found {0} words ({1} @ #{2})", word_list.Count, pdf_filename, page_number);

#if false
                    Logging.Info("+Reordering words for columns");
                    WordList word_list_ordered = ColumnWordOrderer.ReorderWords(word_list);
                    Logging.Info("-Reordering words for columns");
                    word_list_ordered.WriteToFile(ocr_output_filename);
#endif

                    return(word_list);
                }
            }
        }
예제 #18
0
        private void FindLicensePlate(
            Contour <Point> contours, Image <Gray, Byte> gray, Image <Gray, Byte> canny,
            List <Image <Gray, Byte> > licensePlateImagesList, List <Image <Gray, Byte> > filteredLicensePlateImagesList, List <MCvBox2D> detectedLicensePlateRegionList,
            List <List <Word> > licenses)
        {
            for (; contours != null; contours = contours.HNext)
            {
                int numberOfChildren = GetNumberOfChildren(contours);
                //if it does not contains any children (charactor), it is not a license plate region
                if (numberOfChildren == 0)
                {
                    continue;
                }

                //Check area of licensePlate
                if (contours.Area > 400)
                {
                    if (numberOfChildren < 3)
                    {
                        //If the contour has less than 3 children, it is not a license plate (assuming license plate has at least 3 charactor)
                        //However we should search the children of this contour to see if any of them is a license plate
                        FindLicensePlate(contours.VNext, gray, canny, licensePlateImagesList, filteredLicensePlateImagesList, detectedLicensePlateRegionList, licenses);
                        continue;
                    }

                    MCvBox2D box = contours.GetMinAreaRect();
                    if (box.angle < -45.0)
                    {
                        float tmp = box.size.Width;
                        box.size.Width  = box.size.Height;
                        box.size.Height = tmp;
                        box.angle      += 90.0f;
                    }
                    else if (box.angle > 45.0)
                    {
                        float tmp = box.size.Width;
                        box.size.Width  = box.size.Height;
                        box.size.Height = tmp;
                        box.angle      -= 90.0f;
                    }

                    double whRatio = (double)box.size.Width / box.size.Height;
                    if (!(1.0 < whRatio && whRatio < 2.0))
                    {  //if the width height ratio is not in the specific range,it is not a license plate
                        //However we should search the children of this contour to see if any of them is a license plate
                        Contour <Point> child = contours.VNext;
                        if (child != null)
                        {
                            FindLicensePlate(child, gray, canny, licensePlateImagesList, filteredLicensePlateImagesList, detectedLicensePlateRegionList, licenses);
                        }
                        continue;
                    }
                    //box.size.Width -= 2;
                    //box.size.Height -= 2;
                    Image <Gray, Byte> plate         = gray.Copy(box);
                    Image <Gray, Byte> filteredPlate = FilterPlate(plate);

                    List <Word>   words      = new List <Word>();
                    StringBuilder strBuilder = new StringBuilder();
                    using (Bitmap bmp = filteredPlate.Bitmap)
                    {
                        words = _ocr.DoOCR(plate.Bitmap, plate.ROI);
                        if (words.Count == 0)
                        {
                            continue;
                        }

                        for (int i = 0; i < words.Count; i++)
                        {
                            strBuilder.Append(words[i].Text);
                        }
                    }

                    licenses.Add(words);
                    licensePlateImagesList.Add(plate);
                    filteredLicensePlateImagesList.Add(filteredPlate);
                    detectedLicensePlateRegionList.Add(box);
                    return;
                }
            }
        }
예제 #19
0
        private void HandleHotkey()
        {
            //Create a new bitmap.
            var bmpScreenshot = new Bitmap(1200, 300, PixelFormat.Format32bppArgb);

            // Create a graphics object from the bitmap.
            var gfxScreenshot = Graphics.FromImage(bmpScreenshot);

            // Take the screenshot from the upper left corner to the right bottom corner.
            gfxScreenshot.CopyFromScreen(150, 650, Screen.PrimaryScreen.Bounds.X, Screen.PrimaryScreen.Bounds.Y,
                                         Screen.PrimaryScreen.Bounds.Size,
                                         CopyPixelOperation.SourceCopy);
            int advance = 0;

            // Save the screenshot to the specified path that the user has chosen.
            bmpScreenshot.Save("Screenshot" + i + ".png", ImageFormat.Png);
            var image = new Bitmap("Screenshot" + i + ".png");
            var ocr   = new Tesseract();

            ocr.Init(@"C:/Users/Emin/Desktop/OCR_project/packages/NuGet.Tessnet2.1.1.1/content/Content/tessdata", "eng", false);
            var result  = ocr.DoOCR(image, Rectangle.Empty);
            int t       = 0;
            int founded = 0;

            foreach (tessnet2.Word word in result)
            {
                //textBox1.AppendText(word.Text);
                word.Text = RemoveSpecialCharacters(word.Text);
                word.Text = word.Text.Replace("0", "o");
                word.Text = word.Text.Replace("1", "l");
                word.Text = word.Text.Replace(".", "");
                word.Text = word.Text.ToLower();
                using (SQLiteConnection conn = new SQLiteConnection("data source=database.db"))
                {
                    using (SQLiteCommand sqliteCommand = new SQLiteCommand(conn))
                    {
                        conn.Open();
                        string[] levels = new string[] { "A1", "A2", "B1", "B2", "C1", "C2" };


                        for (int i = 0; i < 6; i++)
                        {
                            sqliteCommand.CommandText = "SELECT ing,tur from " + levels[i] + " where ing='" + word.Text + "'";
                            using (SQLiteDataReader sqliteReader = sqliteCommand.ExecuteReader())
                            {
                                int count = 0;
                                while (sqliteReader.Read())
                                {
                                    try
                                    {
                                        ing[t] = sqliteReader[0].ToString();
                                        tur[t] = sqliteReader[1].ToString();
                                    }
                                    catch (Exception)
                                    {
                                    }
                                    //MessageBox.Show(ing+" "+tur);
                                    count++;
                                }
                                if (count == 1)
                                {
                                    founded = i;
                                }


                                for (int j = 0; j < 6; j++)
                                {
                                    if (level == levels[j])
                                    {
                                        advance = j;
                                    }
                                }
                            }
                        }
                        if (founded > advance)
                        {
                            try
                            {
                                if (ing[t] != "" && tur[t] != "")
                                {
                                    //MessageBox.Show(word.Text);
                                    query = "INSERT INTO " + name + "(eng, tur) VALUES ('" + ing[t] + "','" + tur[t] + "')";
                                    sqliteCommand.CommandText = query;
                                    //sqliteCommand.Parameters.AddWithValue("@name", name);
                                    //sqliteCommand.Parameters.AddWithValue("@ing", ing);
                                    //sqliteCommand.Parameters.AddWithValue("@tur", tur);
                                    sqliteCommand.ExecuteNonQuery();
                                }
                            }
                            catch (Exception)
                            {
                            }
                            sqliteCommand.CommandText = "DELETE FROM " + name + " where eng=''";
                            sqliteCommand.ExecuteNonQuery();
                        }
                    }
                }



                t++;
            }

            i++;
        }
예제 #20
-1
        public static TextAnalysisResult ProcessText(Bitmap bmp, List<TextTrigger> filters)
        {
            TextAnalysisResult analysisResult = new TextAnalysisResult();

            //if there aren't any triggers, don't do anything, this stuff is busy
            if (filters.Count == 0)
            {
                Debug.WriteLine("TextAnalysis.ProcessText no text triggers");
                return analysisResult;
            }

            double confidenceFilter = double.Parse(ConfigurationManager.AppSettings["textFilterConfidence"]);

            List<Word> results;

            //perform OCR thread safe :/
            Stopwatch watch = new Stopwatch();
            watch.Start();
            lock (lockObj)
            {
                using (Tesseract tessocr = new Tesseract())
                {
                    tessocr.Init(configPath, "eng", false);
                    results = tessocr.DoOCR(bmp, Rectangle.Empty);
                }
            }
            watch.Stop();
            Debug.WriteLine("TextAnalysis.ProcessText OCR took {0}ms", watch.ElapsedMilliseconds);

            //lower number for confidence is greater certainty, don't ask, i don't know why.
            foreach (var resultWord in results.Where(word => word.Confidence < confidenceFilter))
            {
                string word = resultWord.Text;
                foreach (var trigger in filters)
                {
                    if (Regex.IsMatch(word.ToUpperInvariant(), trigger.triggerString.ToUpperInvariant()))
                    {
                        analysisResult.AddFault(trigger.userEmail, trigger.triggerString, word, (int)resultWord.Confidence);
                    }
                }
            }
            return analysisResult;
        }