private void PerformOCR(string fileName) { //Read the image from file Image<Gray, Byte> image = new Image<Gray, byte>(fileName); fileNameTextBox.Text = fileName; //Resize the image if it is too big, display it on the image box int width = Math.Min(image.Width, imageBox1.Width); int height = Math.Min(image.Height, imageBox1.Height); imageBox1.Image = image.Resize(width, height, true); //Perform OCR Tesseract ocr = new Tesseract(); //You can download more language definition data from //http://code.google.com/p/tesseract-ocr/downloads/list //Languages supported includes: //Dutch, Spanish, German, Italian, French and English ocr.Init("eng", numericalOnlyCheckBox.Checked); List<tessnet2.Word> result = ocr.DoOCR(image.Bitmap, Rectangle.Empty); //Obtain the texts from OCR result String[] texts = result.ConvertAll<String>(delegate(Word w) { return w.Text; }).ToArray(); //Display the text in the text box textBox1.Text = String.Join(" ", texts); }
public List<Word> GetTextFromImage( Bitmap image) { List<Word> tempResult; List<Word> result = new List<Word>(); try { var ocr = new Tesseract(); //ocr.SetVariable("tessedit_char_whitelist", "0123456789"); ocr.SetVariable("tessedit_char_whitelist", "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789:<>"); ocr.Init("tessdata", "eng", false); tempResult = ocr.DoOCR(image, Rectangle.Empty); // Weed out the bad match results. foreach (Word word in tempResult) { if (word.Confidence < 160) { result.Add(word); } } } catch (Exception e) { throw new ClosedEyedVisuals_Exception("OCR error:" + e.ToString()); } return result; }
public void Return(string path) { Tesseract tesseract = new Tesseract(); Bitmap bitmap = new Bitmap(path); Point start = new Point(0,0); Rectangle rect = new Rectangle(start,bitmap.Size); var output = tesseract.DoOCR(bitmap, rect); string text = string.Empty; foreach (Word a in output) { text += a.Text; } int b = 0; }
private static string DoTesseract(Image input) { var bmp = new Bitmap(input, new Size(100, 44)); var ocr = new Tesseract(); //ocr.SetVariable("tessedit_char_blacklist", "0123456789+-"); ocr.Init(null, "eng", false); var result = ocr.DoOCR(bmp, Rectangle.Empty); string ret = string.Empty; foreach (var item in result) { ret += item.Text + " "; } return ret.Trim(); }
//Fonction qui permet de detecter le nombre de caractères public int getText(Image imageToSplit) { string s = ""; var image = new Bitmap(imageToSplit); var ocr = new Tesseract(); ocr.SetVariable("load_system_dawg", false); ocr.SetVariable("load_freq_dawg", false); ocr.Init(Server.MapPath(@"\tessdata\"), "eng", false); var result = ocr.DoOCR(image, Rectangle.Empty); int nbLettre = 0; foreach (tessnet2.Word word in result) { s += Text.Text; Text.Text = let; } mot = s; return(nbC = nbLettre); }
static void Main(string[] args) { try { var image = new Bitmap(@"C:\OCRTest\saimon.jpg"); var ocr = new Tesseract(); ocr.SetVariable("tessedit_char_whitelist", "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz.,$-/#&=()\"':?"); // If digit only //@"C:\OCRTest\tessdata" contains the language package, without this the method crash and app breaks ocr.Init(@"C:\OCRTest\tessdata", "eng", true); var result = ocr.DoOCR(image, Rectangle.Empty); foreach (Word word in result) { if (word.contains("aimon")) { Console.WriteLine("" + word.Confidence + " " + word.Text + " " + word.Top + " " + word.Bottom + " " + word.Left + " " + word.Right); } } Console.ReadLine(); } catch (Exception exception) { } }
private void BtnOCR_Click(object sender, EventArgs args) { var ocr = new Tesseract(); ocr.Init(@".\tessdata", "eng", false); using (OpenFileDialog fileDialog = new OpenFileDialog() { Filter = COMPATIBLE_FILETYPES }) { if (fileDialog.ShowDialog() == DialogResult.OK) { Bitmap img = (Bitmap)Image.FromFile(fileDialog.FileName); ImgBox.Image = img; var result = ocr.DoOCR(img, Rectangle.Empty); foreach (var word in result) { MessageBox.Show(word.Text); } } } }
public static string genCaptcha() { HttpWebRequest request = (HttpWebRequest)WebRequest.Create("http://www.sunat.gob.pe/cl-ti-itmrconsruc/captcha?accion=image"); ServicePointManager.SecurityProtocol = SecurityProtocolType.Ssl3; request.CookieContainer = cokkie; HttpWebResponse response = (HttpWebResponse)request.GetResponse(); Stream responseStream = response.GetResponseStream(); var image = new Bitmap(responseStream); var ocr = new Tesseract(); //ocr.Init(@"C:\Users\Miguel\Desktop\SUNAT\PRUEBA\Content\tessdata", "eng", false); var path = HostingEnvironment.MapPath(@"~/Componentes/tessdata"); ocr.Init(path, "eng", false); var result = ocr.DoOCR(image, Rectangle.Empty); foreach (Word word in result) { captcha = word.Text; } return(captcha); }
private static void CreateJson() { var files = new DirectoryInfo(@"C:\WindowsServiceInput\").GetFiles(); foreach (var file in files) { if (!File.Exists(@"C:\WindowsServiceOutput\" + file.Name)) { var ocr = new Tesseract(); var image = Image.FromFile(file.FullName); ocr.Init(@"..\..\Content\tessdata", "eng", false); var result = ocr.DoOCR((Bitmap)image, Rectangle.Empty); List <string> data = new List <string>(); foreach (Word word in result) { data.Add(word.Text); } string json = JsonConvert.SerializeObject(data.ToArray()); System.IO.File.WriteAllText( @"C:\WindowsServiceOutput\" + Path.GetFileNameWithoutExtension(file.Name) + ".json", json); } } }
public List<Word> GetWordFromImage(Bitmap bitmap) { Tesseract ocr = new Tesseract(); ocr.SetVariable("tessedit_char_whitelist", alphabetnum); ocr.Init(null, "eng", false); List<tessnet2.Word> result = ocr.DoOCR(bitmap, Rectangle.Empty); return result; }
private void ProcessImageOCR(Bitmap b, Rectangle rect, int retry) { Log("Start processing password from image. (OCR)"); var ocr = new Tesseract(); ocr.SetVariable("tessedit_char_whitelist", "0123456789"); ocr.Init(@tessResFolder, "eng", true); var result = ocr.DoOCR(b, Rectangle.Empty); foreach (Word word in result) { Log("OCR returned: " + word.Text); if(word.Text.Length >= 5) { Clipboard.SetText(word.Text); Log("Password copied to clipboard."); } else { Thread.Sleep(1000); Log("OCR not accepted. Retrying... " + retry); CaptureScreen(rect, retry - 1); } } if(isFastRun) { // Close the application as we saved results to clipboard. Application.Exit(); } }
public static WordList DoOCR(string pdf_filename, int page_number) { Logging.Info("+Rendering page"); SoraxPDFRenderer renderer = new SoraxPDFRenderer(pdf_filename, pdf_user_password, pdf_user_password); Bitmap bitmap = (Bitmap)Image.FromStream(new MemoryStream(renderer.GetPageByDPIAsImage(page_number, 200))); Logging.Info("-Rendering page"); Logging.Info("Startup directory is {0}", Environment.CurrentDirectory); Logging.Info("Language is '{0}'", language); Tesseract ocr = new Tesseract(); ocr.Init(null, language, false); Logging.Info("+Doing OCR"); // Build a list of all the rectangles to process PDFRegionLocator pdf_region_locator = new PDFRegionLocator(bitmap); PDFRegionLocator.Region last_region = pdf_region_locator.regions[0]; List <Rectangle> rectangles = new List <Rectangle>(); foreach (PDFRegionLocator.Region region in pdf_region_locator.regions) { if (false) { } else if (last_region.state == PDFRegionLocator.SegmentState.BLANKS) { // LHS { Rectangle rectangle = new Rectangle(0, last_region.y, bitmap.Width / 2, region.y - last_region.y); rectangles.Add(rectangle); } // RHS { Rectangle rectangle = new Rectangle(bitmap.Width / 2, last_region.y, bitmap.Width / 2, region.y - last_region.y); rectangles.Add(rectangle); } } else if (last_region.state == PDFRegionLocator.SegmentState.PIXELS) { // Full column { Rectangle rectangle = new Rectangle(0, last_region.y, bitmap.Width, region.y - last_region.y); rectangles.Add(rectangle); } } last_region = region; } // DEBUG CODE: Draw in the region rectangles //{ // Graphics g = Graphics.FromImage(bitmap); // foreach (Rectangle rectangle in rectangles) // { // g.DrawRectangle(Pens.Black, rectangle); // } // bitmap.Save(@"C:\temp\aaaaaa.png", ImageFormat.Png); //} // Do the OCR on each of the rectangles WordList word_list = new WordList(); foreach (Rectangle rectangle in rectangles) { if (0 == rectangle.Width || 0 == rectangle.Height) { Logging.Info("Skipping zero extent rectangle {0}", rectangle.ToString()); continue; } Logging.Info("Doing OCR for region {0}", rectangle.ToString()); List <Word> result = ocr.DoOCR(bitmap, rectangle); Logging.Info("Got {0} words", result.Count); word_list.AddRange(ConvertToWordList(result, rectangle, bitmap)); } Logging.Info("-Doing OCR"); Logging.Info("Found {0} words", word_list.Count); //Logging.Info("+Reordering words for columns"); //WordList word_list_ordered = ColumnWordOrderer.ReorderWords(word_list); //Logging.Info("-Reordering words for columns"); //word_list_ordered.WriteToFile(ocr_output_filename); return(word_list); }
/// <summary> /// Main Thread for processing one ad in AdList at Pos index value /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private void mainThread_DoWork(object sender, DoWorkEventArgs e) { var worker = sender as BackgroundWorker; Random rnd = new Random(); var delay = rnd.Next(5000, 10000); doMessage(flag.SendText, "Random Delay: " + delay.ToString() + " ms"); Sem.WaitOne(delay); doMessage(flag.SendText, "Loading page..."); doMessage(flag.ReportProgress, 10); while (GetState()) { if (worker.CancellationPending == true) { sendStopText(); e.Cancel = true; return; } Application.DoEvents(); Thread.Sleep(20); }; doMessage(flag.ReportProgress, 50); doMessage(flag.SendText, "Waiting " + adList[Pos].Time.ToString() + " ms"); Sem.WaitOne(adList[Pos].Time); if (worker.CancellationPending == true) { sendStopText(); e.Cancel = true; return; } doMessage(flag.ReportProgress, 60); doMessage(flag.SendText, "Waiting for capcha"); Bitmap pic = new Bitmap(1, 1); Stopwatch stopwatch = new Stopwatch(); //Starting Timeout watch... stopwatch.Start(); //Starting waiting loop Start: if (worker.CancellationPending == true) { sendStopText(); e.Cancel = true; return; } string html = GetDocumentText(); //Flash captcha, that we can't solve. if (html.Contains("ACPuzzleUtil.callbacks")) { doMessage(flag.ReportProgress, 0); doMessage(flag.SendText, "Bad Captcha!"); //adList[Pos].Clicked = false; adList[Pos].resInfo.AdResult = AdResult.FlashCapcha; adList[Pos].resInfo.Stat = AdStatus.Failture; doMessage(flag.UpdStatus, string.Empty); return; } //Yiss! We got captcha image. if (html.Contains("captcha?c=view")) { byte[] captchaBytes = Convert.FromBase64String(GetBitmap()); MemoryStream ms = new MemoryStream(captchaBytes); pic = Utils.cropImage(Image.FromStream(ms), new Rectangle(0, 0, 90, 40)); } else { //Continue waiting for captcha Application.DoEvents(); Sem.WaitOne(1000); //Checking Timeout... if (stopwatch.ElapsedMilliseconds >= waitTimeout) { stopwatch.Stop(); doMessage(flag.SendText, "timeout!"); doMessage(flag.ReportProgress, 0); //adList[Pos].Clicked = false; adList[Pos].resInfo.AdResult = AdResult.Timeout; adList[Pos].resInfo.Stat = AdStatus.Failture; doMessage(flag.UpdStatus, string.Empty); return; } else goto Start; } stopwatch.Stop(); if (worker.CancellationPending == true) { sendStopText(); e.Cancel = true; return; } doMessage(flag.ReportProgress, 80); //Solving captcha var ocr = new Tesseract(); //Digits only ocr.SetVariable("tessedit_char_whitelist", "0123456789+-"); ocr.Init(null, "eng", false); // adList[Pos].resInfo.CapImg = ocr.GetThresholdedImage(pic, Rectangle.Empty); adList[Pos].resInfo.CapImg = pic; var solved = ocr.DoOCR(Utils.MakeGrayscale(pic), Rectangle.Empty)[0].Text; //C'mon, Do Math! var dt = new DataTable(); string result = string.Empty; try { result = dt.Compute(solved, null).ToString(); } catch (Exception) { Utils.AddtoLog("Unsolved: " + solved); } finally { dt.Dispose(); } adList[Pos].resInfo.CapStr = solved + " = " + result; if (worker.CancellationPending == true) { sendStopText(); e.Cancel = true; return; } //Sending Answer doMessage(flag.SendText, "Sending answer"); doMessage(flag.ReportProgress, 90); var pozzz = html.IndexOf("var h = '") + 9; string longJonson = html.Substring(pozzz, 128); cookie = GetCookieContainer(GetCook()); string res = SendPost(string.Format(vEndReq, longJonson, result, jsKey), vEndUrl, adList[Pos].Link); doMessage(flag.ReportProgress, 100); if (res == "1") { //adList[Pos].Clicked = true; adList[Pos].resInfo.AdResult = AdResult.Solved; adList[Pos].resInfo.Stat = AdStatus.Success; } else { adList[Pos].resInfo.AdResult = AdResult.NotSolved; adList[Pos].resInfo.Stat = AdStatus.Failture; } doMessage(flag.UpdStatus, string.Empty); }
private static async Task <SunatDTO> ConsultaSunat(string ruc) { SunatDTO resultado = new SunatDTO(); try { if (httpClient.BaseAddress == null) { httpClient.BaseAddress = new Uri("http://www.sunat.gob.pe/"); } #region Captcha HttpWebRequest httpWebRequest = (HttpWebRequest)WebRequest.Create("http://www.sunat.gob.pe/cl-ti-itmrconsruc/captcha?accion=image"); ServicePointManager.SecurityProtocol = SecurityProtocolType.Ssl3; httpWebRequest.CookieContainer = _cookies; Bitmap bitmap = new Bitmap(httpWebRequest.GetResponse().GetResponseStream()); Tesseract tesseract = new Tesseract(); string str = Path.Combine(Environment.CurrentDirectory, "Content/tessdata"); if (!Directory.Exists(str)) { Directory.CreateDirectory(str); } tesseract.Init(str, "eng", false); foreach (Word word in tesseract.DoOCR(bitmap, Rectangle.Empty)) { resultado.captcha = word.Text; } #endregion //Consulta Sunat string rawResponseAsync = await getRawResponseAsync(string.Format("http://www.sunat.gob.pe/cl-ti-itmrconsruc/jcrS00Alias?accion=consPorRuc&nroRuc={0}&codigo={1}&tipdoc=1", ruc, resultado.captcha)); #region Documento HtmlDocument htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(rawResponseAsync.Trim()); if (htmlDocument.DocumentNode.SelectNodes("//html[1]/head[1]/title[1]").FirstOrDefault <HtmlNode>().InnerText.Equals("Consulta RUC")) { HtmlNode htmlNode = htmlDocument.DocumentNode.SelectNodes("//html[1]/body[1]/table[1]").FirstOrDefault(); if (htmlNode != null) { HtmlNodeCollection htmlNodeCollection1 = htmlNode.SelectNodes("tr"); #region Recorre respuesta y rellena propiedades resultado.contribuyente = new Contribuyente(); foreach (HtmlNode htmlNode1 in htmlNodeCollection1) { HtmlNodeCollection htmlNodeCollection2 = htmlNode1.SelectNodes("td"); if (htmlNodeCollection2[0].InnerHtml.Contains("Estado del")) { resultado.contribuyente.Estado = beautifulString(htmlNodeCollection2[1].InnerText); } if (htmlNodeCollection2.Count == 2) { if (htmlNodeCollection2[0].InnerHtml.Contains("RUC:")) { string[] strArray = htmlNodeCollection2[1].InnerText.Split('-'); resultado.contribuyente.Ruc = beautifulString(strArray[0]); resultado.contribuyente.RazonSocial = beautifulString(strArray[1].TrimStart()); } if (htmlNodeCollection2[0].InnerHtml.Contains("Tipo Contribuyente:")) { resultado.contribuyente.Tipo = htmlNodeCollection2[1].InnerText; } if (htmlNodeCollection2[0].InnerHtml.Contains("Nombre Comercial:")) { resultado.contribuyente.NombreComercial = beautifulString(htmlNodeCollection2[1].InnerHtml); } if (htmlNodeCollection2[0].InnerHtml.Contains("Dirección del Domicilio Fiscal")) { string[] strArray = htmlNodeCollection2[1].InnerText.Split('-'); Regex regex = new Regex(" ", RegexOptions.RightToLeft); if (strArray.Length == 3) { int index = regex.Match(beautifulString(strArray[0])).Index; resultado.contribuyente.Direccion = beautifulString(strArray[0].Substring(0, index)); resultado.contribuyente.Departamento = beautifulString(strArray[0].Substring(index)); } if (strArray.Length > 3) { string empty = string.Empty; for (int index = 0; index < strArray.Length - 2; ++index) { empty += strArray[index]; } int index1 = regex.Match(beautifulString(empty)).Index; if (index1 != 0) { resultado.contribuyente.Direccion = beautifulString(empty.Substring(0, index1)); resultado.contribuyente.Departamento = beautifulString(empty.Substring(index1)); } else { resultado.contribuyente.Direccion = beautifulString(empty); resultado.contribuyente.Departamento = "-"; } } resultado.contribuyente.Provincia = beautifulString(strArray[strArray.Length - 2]); resultado.contribuyente.Distrito = beautifulString(strArray[strArray.Length - 1]); } if (htmlNodeCollection2[0].InnerHtml.Contains("Sistema de Contabilidad:")) { resultado.contribuyente.SisContabilidad = beautifulString(htmlNodeCollection2[1].InnerText); } if (htmlNodeCollection2[0].InnerHtml.Contains("Emisor electrónico desde:")) { resultado.contribuyente.EmisorElectronicoDesde = beautifulString(htmlNodeCollection2[1].InnerText); } if (htmlNodeCollection2[0].InnerHtml.Contains("Comprobantes Electrónicos")) { string[] strArray = htmlNodeCollection2[1].InnerText.Split(','); resultado.contribuyente.ComprobantesElectronicos = ((IEnumerable <string>)strArray).ToList <string>(); } if (htmlNodeCollection2[0].InnerHtml.Contains("Afiliado al PLE desde:")) { resultado.contribuyente.AfiliadoPLEDesde = beautifulString(htmlNodeCollection2[1].InnerText); } if (htmlNodeCollection2[0].InnerHtml.Contains("Actividad(es) Económica(s):")) { HtmlNodeCollection htmlNodeCollection3 = htmlNodeCollection2[1].SelectNodes("select/option"); if (htmlNodeCollection3 != null) { foreach (HtmlNode htmlNode2 in (IEnumerable <HtmlNode>)htmlNodeCollection3) { resultado.contribuyente.ActividadesEconomicas.Add(beautifulString(htmlNode2.InnerText)); } } } if (htmlNodeCollection2[0].InnerHtml.Contains("Sistema de Emision Electronica:")) { HtmlNodeCollection htmlNodeCollection3 = htmlNodeCollection2[1].SelectNodes("select/option"); if (htmlNodeCollection3 != null) { foreach (HtmlNode htmlNode2 in (IEnumerable <HtmlNode>)htmlNodeCollection3) { resultado.contribuyente.SisEmisionElectronica.Add(beautifulString(htmlNode2.InnerText)); } } } if (htmlNodeCollection2[0].InnerHtml.Contains("Padrones")) { HtmlNodeCollection htmlNodeCollection3 = htmlNodeCollection2[1].SelectNodes("select/option"); if (htmlNodeCollection3 != null) { foreach (HtmlNode htmlNode2 in (IEnumerable <HtmlNode>)htmlNodeCollection3) { resultado.contribuyente.Padrones.Add(beautifulString(htmlNode2.InnerText)); } } } if (htmlNodeCollection2[0].InnerHtml.Contains("Condición del Contribuyente:")) { resultado.contribuyente.Condicion = beautifulString(htmlNodeCollection2[1].InnerText); } if (htmlNodeCollection2[0].InnerHtml.Contains("Comprobantes de Pago c/aut.")) { HtmlNodeCollection htmlNodeCollection3 = htmlNodeCollection2[1].SelectNodes("select/option"); if (htmlNodeCollection3 != null) { foreach (HtmlNode htmlNode2 in (IEnumerable <HtmlNode>)htmlNodeCollection3) { resultado.contribuyente.ComprobantesPagoAutImpresion.Add(beautifulString(htmlNode2.InnerText)); } } } } if (htmlNodeCollection2.Count == 3 && htmlNodeCollection2[0].InnerHtml.Contains("Comprobantes de Pago c/aut.")) { HtmlNodeCollection htmlNodeCollection3 = htmlNodeCollection2[1].SelectNodes("select/option"); if (htmlNodeCollection3 != null) { foreach (HtmlNode htmlNode2 in (IEnumerable <HtmlNode>)htmlNodeCollection3) { resultado.contribuyente.ComprobantesPagoAutImpresion.Add(beautifulString(htmlNode2.InnerText)); } } resultado.contribuyente.ObligadoEmitirCPE = beautifulString(htmlNodeCollection2[2].InnerText.Substring(22)); } if (htmlNodeCollection2.Count == 4) { if (htmlNodeCollection2[0].InnerHtml.Contains("Fecha de Inscripción:")) { resultado.contribuyente.FechaInscripcion = beautifulString(htmlNodeCollection2[1].InnerText); resultado.contribuyente.FechaInicioActividades = beautifulString(htmlNodeCollection2[3].InnerText); } if (htmlNodeCollection2[0].InnerHtml.Contains("Condición del Contribuyente:")) { resultado.contribuyente.Condicion = beautifulString(htmlNodeCollection2[1].InnerText); resultado.contribuyente.ProfesionOficio = beautifulString(htmlNodeCollection2[3].InnerText); } if (htmlNodeCollection2[0].InnerHtml.Contains("Sistema de Emisión de Comprobante:")) { resultado.contribuyente.SisEmisionComprobante = beautifulString(htmlNodeCollection2[1].InnerText); resultado.contribuyente.ActComercioExterior = beautifulString(htmlNodeCollection2[3].InnerText); } } } resultado.status = 1; resultado.mensaje = "Se ha encontrado contribuyente!"; return(resultado); #endregion } } #endregion return(resultado); } catch (Exception ex) { resultado.status = 0; resultado.contribuyente = null; resultado.mensaje = ex.Message; return(resultado); } }
public async Task <Contribuyente> ConsultaRuc(string ruc) { Contribuyente contribuyente = new Contribuyente(); try { ServicePointManager.DefaultConnectionLimit = 2; // Descarga la imagen captcha HttpResponseMessage responseMessage = await httpClient.GetAsync($"cl-ti-itmrconsruc/captcha?accion=image"); if (responseMessage.IsSuccessStatusCode) { Stream responseStream = await responseMessage.Content.ReadAsStreamAsync();; var image = new Bitmap(responseStream); var ocr = new Tesseract(); // Indicamos la ruta de la libreria ocr.Init(rutaTessData, "eng", false); // Convertir la imagen a texto plano var result = ocr.DoOCR(image, Rectangle.Empty); foreach (Word word in result) { captcha += word.Text; } } else { return(null); } // Consulta el RUC enviando el codigo captcha var ConsultaRuc = await httpClient.GetAsync($"cl-ti-itmrconsruc/jcrS03Alias?accion=consPorRuc&razSoc=&nroRuc={ruc}&nrodoc=&contexto=ti-it&tQuery=on&search1={ruc}&tipdoc=1&search2=&coddpto=&codprov=&coddist=&search3=&codigo={captcha.Trim().ToUpper()}&tipodocumento=1"); // Si la consulta es exitosa if (ConsultaRuc.IsSuccessStatusCode) { string msg = string.Empty; // Libreria que permite trabajar con etiquetas HTML HtmlAgilityPack.HtmlDocument document = new HtmlAgilityPack.HtmlDocument(); // Carga el contenido html de la consulta document.LoadHtml(await ConsultaRuc.Content.ReadAsStringAsync()); var NodeTable = document.DocumentNode .SelectNodes("//table") .FirstOrDefault(); if (NodeTable != null) { var listNodeTr = NodeTable.Elements("tr").ToArray(); if (listNodeTr != null) { // Extrae los valores de las celdas de la tabla. var nodeRazonSocial = listNodeTr[1].Elements("td").ToArray(); if (nodeRazonSocial != null) { string ConsultaCliente = LimpiarEspacios(nodeRazonSocial[1].InnerHtml.Trim()); contribuyente.RUC = ConsultaCliente.Substring(0, 11).Trim(); contribuyente.RazonSocial = ConsultaCliente.Substring(13, ConsultaCliente.Length - 13).Trim(); } var nodeDireccion = listNodeTr[7].Elements("td").ToArray(); if (ruc.StartsWith("10")) { nodeDireccion = listNodeTr[8].Elements("td").ToArray(); } if (nodeDireccion != null) { string ConsultaDireccion = LimpiarEspacios(nodeDireccion[1].InnerHtml.Trim()); contribuyente.Direccion = ConsultaDireccion.Trim(); } } } } } catch (Exception ex) { } return(await Task.Run(() => contribuyente)); }
/// <summary> /// Main Thread for processing one ad in AdList at Pos index value /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private void mainThread_DoWork(object sender, DoWorkEventArgs e) { var worker = sender as BackgroundWorker; Random rnd = new Random(); var delay = rnd.Next(5000, 10000); doMessage(flag.SendText, "Random Delay: " + delay.ToString() + " ms"); Sem.WaitOne(delay); doMessage(flag.SendText, "Loading page..."); doMessage(flag.ReportProgress, 10); while (GetState()) { if (worker.CancellationPending == true) { sendStopText(); e.Cancel = true; return; } Application.DoEvents(); Thread.Sleep(20); } ; doMessage(flag.ReportProgress, 50); doMessage(flag.SendText, "Waiting " + adList[Pos].Time.ToString() + " ms"); Sem.WaitOne(adList[Pos].Time); if (worker.CancellationPending == true) { sendStopText(); e.Cancel = true; return; } doMessage(flag.ReportProgress, 60); doMessage(flag.SendText, "Waiting for capcha"); Bitmap pic = new Bitmap(1, 1); Stopwatch stopwatch = new Stopwatch(); //Starting Timeout watch... stopwatch.Start(); //Starting waiting loop Start: if (worker.CancellationPending == true) { sendStopText(); e.Cancel = true; return; } string html = GetDocumentText(); //Flash captcha, that we can't solve. if (html.Contains("ACPuzzleUtil.callbacks")) { doMessage(flag.ReportProgress, 0); doMessage(flag.SendText, "Bad Captcha!"); //adList[Pos].Clicked = false; adList[Pos].resInfo.AdResult = AdResult.FlashCapcha; adList[Pos].resInfo.Stat = AdStatus.Failture; doMessage(flag.UpdStatus, string.Empty); return; } //Yiss! We got captcha image. if (html.Contains("captcha?c=view")) { byte[] captchaBytes = Convert.FromBase64String(GetBitmap()); MemoryStream ms = new MemoryStream(captchaBytes); pic = Utils.cropImage(Image.FromStream(ms), new Rectangle(0, 0, 90, 40)); } else { //Continue waiting for captcha Application.DoEvents(); Sem.WaitOne(1000); //Checking Timeout... if (stopwatch.ElapsedMilliseconds >= waitTimeout) { stopwatch.Stop(); doMessage(flag.SendText, "timeout!"); doMessage(flag.ReportProgress, 0); //adList[Pos].Clicked = false; adList[Pos].resInfo.AdResult = AdResult.Timeout; adList[Pos].resInfo.Stat = AdStatus.Failture; doMessage(flag.UpdStatus, string.Empty); return; } else { goto Start; } } stopwatch.Stop(); if (worker.CancellationPending == true) { sendStopText(); e.Cancel = true; return; } doMessage(flag.ReportProgress, 80); //Solving captcha var ocr = new Tesseract(); //Digits only ocr.SetVariable("tessedit_char_whitelist", "0123456789+-"); ocr.Init(null, "eng", false); // adList[Pos].resInfo.CapImg = ocr.GetThresholdedImage(pic, Rectangle.Empty); adList[Pos].resInfo.CapImg = pic; var solved = ocr.DoOCR(Utils.MakeGrayscale(pic), Rectangle.Empty)[0].Text; //C'mon, Do Math! var dt = new DataTable(); string result = string.Empty; try { result = dt.Compute(solved, null).ToString(); } catch (Exception) { Utils.AddtoLog("Unsolved: " + solved); } finally { dt.Dispose(); } adList[Pos].resInfo.CapStr = solved + " = " + result; if (worker.CancellationPending == true) { sendStopText(); e.Cancel = true; return; } //Sending Answer doMessage(flag.SendText, "Sending answer"); doMessage(flag.ReportProgress, 90); var pozzz = html.IndexOf("var h = '") + 9; string longJonson = html.Substring(pozzz, 128); cookie = GetCookieContainer(GetCook()); string res = SendPost(string.Format(vEndReq, longJonson, result, jsKey), vEndUrl, adList[Pos].Link); doMessage(flag.ReportProgress, 100); if (res == "1") { //adList[Pos].Clicked = true; adList[Pos].resInfo.AdResult = AdResult.Solved; adList[Pos].resInfo.Stat = AdStatus.Success; } else { adList[Pos].resInfo.AdResult = AdResult.NotSolved; adList[Pos].resInfo.Stat = AdStatus.Failture; } doMessage(flag.UpdStatus, string.Empty); }
public static WordList DoOCR(string pdf_filename, int page_number) { Logging.Info("+Rendering page {1} for PDF file {0}", pdf_filename, page_number); using (MemoryStream ms = new MemoryStream(SoraxPDFRenderer.GetPageByDPIAsImage(pdf_filename, pdf_user_password, page_number, 200))) { Bitmap bitmap = (Bitmap)Image.FromStream(ms); Logging.Info("-Rendering page #{0}", page_number); Logging.Info("Startup directory is {0}", Environment.CurrentDirectory); Logging.Info("Language is '{0}'", language); using (Tesseract ocr = new Tesseract()) { ocr.Init(null, language, false); Logging.Info("+Doing OCR"); const int MIN_WIDTH = 0; // Build a list of all the rectangles to process PDFRegionLocator pdf_region_locator = new PDFRegionLocator(bitmap); PDFRegionLocator.Region last_region = pdf_region_locator.regions[0]; List <Rectangle> rectangles = new List <Rectangle>(); Rectangle last_rectangle = new Rectangle(); foreach (PDFRegionLocator.Region region in pdf_region_locator.regions) { int rect_height = region.y - last_region.y; bool alarming_height = (rect_height <= 0); Rectangle rectangle = new Rectangle(); if (last_region.state == PDFRegionLocator.SegmentState.BLANKS) { // LHS { rectangle = new Rectangle(0, last_region.y, bitmap.Width / 2, Math.Max(MIN_WIDTH, rect_height)); } // RHS { rectangle = new Rectangle(bitmap.Width / 2, last_region.y, bitmap.Width / 2, Math.Max(MIN_WIDTH, rect_height)); } } else if (last_region.state == PDFRegionLocator.SegmentState.PIXELS) { // Full column { rectangle = new Rectangle(0, last_region.y, bitmap.Width, Math.Max(MIN_WIDTH, rect_height)); } } if (alarming_height || rectangle.Height <= 0) { Logging.Warn("Calculated region height is negative or zero: {0} :: Calculated region {1} <-- CURRENT:{2} - LAST:{3}", rect_height, rectangle, region, last_region); // skip rectangle } else if (last_rectangle.X == rectangle.X && last_rectangle.Y == rectangle.Y) { Logging.Warn("Overlapping subsequent rectangles will be merged :: CURRENT:{0} - LAST:{1}", rectangle, last_rectangle); last_rectangle.Width = Math.Max(last_rectangle.Width, rectangle.Width); last_rectangle.Height = Math.Max(last_rectangle.Height, rectangle.Height); Logging.Warn("--> Updated 'last' rectangle:{0}", last_rectangle); } else { rectangles.Add(rectangle); last_rectangle = rectangle; } last_region = region; } // DEBUG CODE: Draw in the region rectangles // // When we run in NOKILL mode, we "know" we're running in a debugger or stand-alone environment // intended for testing this code. Hence we should dump the regions image as part of the process. if (no_kill) { string bitmap_diag_path = pdf_filename + @"." + page_number + @"-ocr.png"; Logging.Info("Dumping regions-augmented page {0} PNG image to file {1}", page_number, bitmap_diag_path); Graphics g = Graphics.FromImage(bitmap); foreach (Rectangle rectangle in rectangles) { if (rectangle.Width <= MIN_WIDTH && rectangle.Height > MIN_WIDTH) { DrawRectangleOutline(g, Pens.Purple, rectangle); } else if (rectangle.Width > MIN_WIDTH && rectangle.Height <= MIN_WIDTH) { DrawRectangleOutline(g, Pens.PowderBlue, rectangle); } else if (rectangle.Width <= MIN_WIDTH && rectangle.Height <= MIN_WIDTH) { DrawRectangleOutline(g, Pens.Red, rectangle); } else { DrawRectangleOutline(g, Pens.LawnGreen, rectangle); } } bitmap.Save(bitmap_diag_path, ImageFormat.Png); } // Do the OCR on each of the rectangles WordList word_list = new WordList(); foreach (Rectangle rectangle in rectangles) { if (0 == rectangle.Width || 0 == rectangle.Height) { Logging.Info("Skipping zero extent rectangle {0}", rectangle.ToString()); continue; } Logging.Info("Doing OCR for region {0} on bitmap WxH: {1}x{2}", rectangle.ToString(), bitmap.Width, bitmap.Height); List <Word> result = ocr.DoOCR(bitmap, rectangle); Logging.Info("Got {0} words", result.Count); word_list.AddRange(ConvertToWordList(result, rectangle, bitmap)); } Logging.Info("-Doing OCR"); Logging.Info("Found {0} words ({1} @ #{2})", word_list.Count, pdf_filename, page_number); #if false Logging.Info("+Reordering words for columns"); WordList word_list_ordered = ColumnWordOrderer.ReorderWords(word_list); Logging.Info("-Reordering words for columns"); word_list_ordered.WriteToFile(ocr_output_filename); #endif return(word_list); } } }
private void FindLicensePlate( Contour <Point> contours, Image <Gray, Byte> gray, Image <Gray, Byte> canny, List <Image <Gray, Byte> > licensePlateImagesList, List <Image <Gray, Byte> > filteredLicensePlateImagesList, List <MCvBox2D> detectedLicensePlateRegionList, List <List <Word> > licenses) { for (; contours != null; contours = contours.HNext) { int numberOfChildren = GetNumberOfChildren(contours); //if it does not contains any children (charactor), it is not a license plate region if (numberOfChildren == 0) { continue; } //Check area of licensePlate if (contours.Area > 400) { if (numberOfChildren < 3) { //If the contour has less than 3 children, it is not a license plate (assuming license plate has at least 3 charactor) //However we should search the children of this contour to see if any of them is a license plate FindLicensePlate(contours.VNext, gray, canny, licensePlateImagesList, filteredLicensePlateImagesList, detectedLicensePlateRegionList, licenses); continue; } MCvBox2D box = contours.GetMinAreaRect(); if (box.angle < -45.0) { float tmp = box.size.Width; box.size.Width = box.size.Height; box.size.Height = tmp; box.angle += 90.0f; } else if (box.angle > 45.0) { float tmp = box.size.Width; box.size.Width = box.size.Height; box.size.Height = tmp; box.angle -= 90.0f; } double whRatio = (double)box.size.Width / box.size.Height; if (!(1.0 < whRatio && whRatio < 2.0)) { //if the width height ratio is not in the specific range,it is not a license plate //However we should search the children of this contour to see if any of them is a license plate Contour <Point> child = contours.VNext; if (child != null) { FindLicensePlate(child, gray, canny, licensePlateImagesList, filteredLicensePlateImagesList, detectedLicensePlateRegionList, licenses); } continue; } //box.size.Width -= 2; //box.size.Height -= 2; Image <Gray, Byte> plate = gray.Copy(box); Image <Gray, Byte> filteredPlate = FilterPlate(plate); List <Word> words = new List <Word>(); StringBuilder strBuilder = new StringBuilder(); using (Bitmap bmp = filteredPlate.Bitmap) { words = _ocr.DoOCR(plate.Bitmap, plate.ROI); if (words.Count == 0) { continue; } for (int i = 0; i < words.Count; i++) { strBuilder.Append(words[i].Text); } } licenses.Add(words); licensePlateImagesList.Add(plate); filteredLicensePlateImagesList.Add(filteredPlate); detectedLicensePlateRegionList.Add(box); return; } } }
private void HandleHotkey() { //Create a new bitmap. var bmpScreenshot = new Bitmap(1200, 300, PixelFormat.Format32bppArgb); // Create a graphics object from the bitmap. var gfxScreenshot = Graphics.FromImage(bmpScreenshot); // Take the screenshot from the upper left corner to the right bottom corner. gfxScreenshot.CopyFromScreen(150, 650, Screen.PrimaryScreen.Bounds.X, Screen.PrimaryScreen.Bounds.Y, Screen.PrimaryScreen.Bounds.Size, CopyPixelOperation.SourceCopy); int advance = 0; // Save the screenshot to the specified path that the user has chosen. bmpScreenshot.Save("Screenshot" + i + ".png", ImageFormat.Png); var image = new Bitmap("Screenshot" + i + ".png"); var ocr = new Tesseract(); ocr.Init(@"C:/Users/Emin/Desktop/OCR_project/packages/NuGet.Tessnet2.1.1.1/content/Content/tessdata", "eng", false); var result = ocr.DoOCR(image, Rectangle.Empty); int t = 0; int founded = 0; foreach (tessnet2.Word word in result) { //textBox1.AppendText(word.Text); word.Text = RemoveSpecialCharacters(word.Text); word.Text = word.Text.Replace("0", "o"); word.Text = word.Text.Replace("1", "l"); word.Text = word.Text.Replace(".", ""); word.Text = word.Text.ToLower(); using (SQLiteConnection conn = new SQLiteConnection("data source=database.db")) { using (SQLiteCommand sqliteCommand = new SQLiteCommand(conn)) { conn.Open(); string[] levels = new string[] { "A1", "A2", "B1", "B2", "C1", "C2" }; for (int i = 0; i < 6; i++) { sqliteCommand.CommandText = "SELECT ing,tur from " + levels[i] + " where ing='" + word.Text + "'"; using (SQLiteDataReader sqliteReader = sqliteCommand.ExecuteReader()) { int count = 0; while (sqliteReader.Read()) { try { ing[t] = sqliteReader[0].ToString(); tur[t] = sqliteReader[1].ToString(); } catch (Exception) { } //MessageBox.Show(ing+" "+tur); count++; } if (count == 1) { founded = i; } for (int j = 0; j < 6; j++) { if (level == levels[j]) { advance = j; } } } } if (founded > advance) { try { if (ing[t] != "" && tur[t] != "") { //MessageBox.Show(word.Text); query = "INSERT INTO " + name + "(eng, tur) VALUES ('" + ing[t] + "','" + tur[t] + "')"; sqliteCommand.CommandText = query; //sqliteCommand.Parameters.AddWithValue("@name", name); //sqliteCommand.Parameters.AddWithValue("@ing", ing); //sqliteCommand.Parameters.AddWithValue("@tur", tur); sqliteCommand.ExecuteNonQuery(); } } catch (Exception) { } sqliteCommand.CommandText = "DELETE FROM " + name + " where eng=''"; sqliteCommand.ExecuteNonQuery(); } } } t++; } i++; }
public static TextAnalysisResult ProcessText(Bitmap bmp, List<TextTrigger> filters) { TextAnalysisResult analysisResult = new TextAnalysisResult(); //if there aren't any triggers, don't do anything, this stuff is busy if (filters.Count == 0) { Debug.WriteLine("TextAnalysis.ProcessText no text triggers"); return analysisResult; } double confidenceFilter = double.Parse(ConfigurationManager.AppSettings["textFilterConfidence"]); List<Word> results; //perform OCR thread safe :/ Stopwatch watch = new Stopwatch(); watch.Start(); lock (lockObj) { using (Tesseract tessocr = new Tesseract()) { tessocr.Init(configPath, "eng", false); results = tessocr.DoOCR(bmp, Rectangle.Empty); } } watch.Stop(); Debug.WriteLine("TextAnalysis.ProcessText OCR took {0}ms", watch.ElapsedMilliseconds); //lower number for confidence is greater certainty, don't ask, i don't know why. foreach (var resultWord in results.Where(word => word.Confidence < confidenceFilter)) { string word = resultWord.Text; foreach (var trigger in filters) { if (Regex.IsMatch(word.ToUpperInvariant(), trigger.triggerString.ToUpperInvariant())) { analysisResult.AddFault(trigger.userEmail, trigger.triggerString, word, (int)resultWord.Confidence); } } } return analysisResult; }