static void Main(string[] args) { // setup the browser ScrapingBrowser Browser = new ScrapingBrowser(); Browser.AllowAutoRedirect = true; // Browser has many settings you can access in setup Browser.AllowMetaRedirect = true; //go to the home page WebPage PageResult = Browser.NavigateToPage(new Uri("http://localhost:51621/")); // get first piece of data, the page title HtmlNode TitleNode = PageResult.Html.CssSelect(".navbar-brand").First(); string PageTitle = TitleNode.InnerText; // get a list of data from a table List <String> Names = new List <string>(); var Table = PageResult.Html.CssSelect("#PersonTable").First(); foreach (var row in Table.SelectNodes("tbody/tr")) { foreach (var cell in row.SelectNodes("td[1]")) { Names.Add(cell.InnerText); } } // find a form and send back data PageWebForm form = PageResult.FindFormById("dataForm"); // assign values to the form fields form["UserName"] = "******"; form["Gender"] = "M"; form.Method = HttpVerb.Post; WebPage resultsPage = form.Submit(); }
public WebPage getPage() { HtmlNode node = homePage.Html.CssSelect("form.nav-search").FirstOrDefault(); PageWebForm form = new PageWebForm(node, browser); form["as_word"] = search.name; form.Method = HttpVerb.Get; WebPage page = form.Submit(); return(setCondition(page, (search.param.Equals("ORD_PRICE")))); }
public static void scrape() { ScrapingBrowser browser = new ScrapingBrowser(); WebPage homePage = browser.NavigateToPage(new Uri("http://www.bing.com/")); PageWebForm form = homePage.FindFormById("sb_form"); form["q"] = "scrapysharp"; form.Method = HttpVerb.Get; WebPage resultsPage = form.Submit(); HtmlNode[] resultsLinks = resultsPage.Html.CssSelect("div.sb_tlst h3 a").ToArray(); WebPage blogPage = resultsPage.FindLinks(By.Text("romcyber blog | Just another WordPress site")).Single().Click(); }
private async Task DeleteImage(string deleteUrl) { ScrapingBrowser browser = new ScrapingBrowser(); WebPage sbPage = await browser.NavigateToPageAsync(new Uri(deleteUrl)); PageWebForm form = new PageWebForm(sbPage.Html.Descendants("form").First(), browser); form.Method = HttpVerb.Post; WebPage resultsPage = form.Submit(); if (!resultsPage.Content.Contains("has been deleted successfully")) { throw new CommandException("Error: Deleting the image failed."); } }
private void Login() { if (Regsetting.DMS_Setting.UserID.Length == 0 || Regsetting.DMS_Setting.Password.Length == 0) { result.Add("No UserID/Pasword, Can't Login."); log.WriteLine("No UserID/Pasword, Can't Login."); loggedin = false; return; } try { resultsPage = browser.NavigateToPage(new Uri(URLBase + "/DMS/index.jsp")); form = resultsPage.FindForm("loginActionForm"); form["personcode"] = Regsetting.DMS_Setting.UserID; form["password"] = Regsetting.DMS_Setting.getPassword(); form.Method = HttpVerb.Post; resultsPage = form.Submit(); log.WriteLine("Login form submitted."); #if (DEBUG) string cookie = resultsPage.RawRequest.Headers.FirstOrDefault(c => (c.Key == "Set-Cookie") || (c.Key == "Cookie")).Value; //cookie = cookie.Substring(cookie.IndexOf("="), cookie.Length); result.Add("[D]DMS Login with Session: " + cookie); #endif var valid = resultsPage.Html.CssSelect("h3"); if (valid.Count() > 0) { result.Add(resultsPage.Html.CssSelect("h3").First()?.InnerText?.Trim()); result.Add(resultsPage.Html.CssSelect("ul").First().InnerText.Trim()); loggedin = false; } else { loggedin = true; } } catch (Exception ex) { result.Add("Connection fail : " + ex.Message + " -> " + ex.InnerException.Message); log.WriteLine("Login fail : " + ex.Message + " -> " + ex.InnerException.Message); loggedin = false; } }
// https://www.drf.com/live_odds/winodds/track/GG/USA/11/D public void TestBing() { ScrapingBrowser browser = new ScrapingBrowser(); //set UseDefaultCookiesParser as false if a website returns invalid cookies format //browser.UseDefaultCookiesParser = false; WebPage homePage = browser.NavigateToPage(new Uri("http://www.bing.com/")); PageWebForm form = homePage.FindFormById("sb_form"); form["q"] = "scrapysharp"; form.Method = HttpVerb.Get; WebPage resultsPage = form.Submit(); HtmlNode[] resultsLinks = resultsPage.Html.CssSelect("div.sb_tlst h3 a").ToArray(); WebPage blogPage = resultsPage.FindLinks(By.Text("romcyber blog | Just another WordPress site")).Single().Click(); }
public static ScrapingBrowser GetLoggedBrowser() { ScrapingBrowser Browser = new ScrapingBrowser(); Browser.AllowAutoRedirect = true; // Browser has many settings you can access in setup Browser.AllowMetaRedirect = true; //go to the home page WebPage PageResult = Browser.NavigateToPage(new Uri("http://www.pollitika.com")); PageWebForm form = PageResult.FindFormById("user-login-form"); // assign values to the form fields form["name"] = "Zvone Radikalni"; form["pass"] = "******"; form.Method = HttpVerb.Post; WebPage resultsPage = form.Submit(); return(Browser); }
static void Main(string[] args) { // setup the browser ScrapingBrowser Browser = new ScrapingBrowser(); Browser.AllowAutoRedirect = true; // Browser has many settings you can access in setup Browser.AllowMetaRedirect = true; //go to the home page WebPage PageResult = Browser.NavigateToPage(new Uri("http://www.pollitika.com")); // get first piece of data, the page title //HtmlNode TitleNode = PageResult.Html.CssSelect(".navbar-brand").First(); //string PageTitle = TitleNode.InnerText; //// get a list of data from a table //List<String> Names = new List<string>(); //var Table = PageResult.Html.CssSelect("#PersonTable").First(); //foreach (var row in Table.SelectNodes("tbody/tr")) //{ // foreach (var cell in row.SelectNodes("td[1]")) // { // Names.Add(cell.InnerText); // } //} // find a form and send back data PageWebForm form = PageResult.FindFormById("user-login-form"); // assign values to the form fields form["name"] = "Zvone Radikalni"; form["pass"] = "******"; form.Method = HttpVerb.Post; WebPage resultsPage = form.Submit(); PageResult = Browser.NavigateToPage(new Uri("http://pollitika.com/hrvatsko-zdravstvo-i-sovjetska-automobilska-industrija")); Console.WriteLine(PageResult.Html.InnerHtml); }
public IEnumerable <PharmGroup> ObtainPharmGroups() { using (var client = new HttpClient()) { var html = client.GetStringAsync(Constants.SiteUrl).GetAwaiter().GetResult(); } var pharmGroups = Enumerable.Empty <PharmGroup>(); var browser = new ScrapingBrowser() { Encoding = Encoding.UTF8 }; //set UseDefaultCookiesParser as false if a website returns invalid cookies format //browser.UseDefaultCookiesParser = false; WebPage homePage = browser.NavigateToPage(new Uri(Constants.SiteUrl)); var nodeForm = homePage.Html.CssSelect("form").First(); PageWebForm form = new PageWebForm(nodeForm, browser); form.Method = HttpVerb.Post; form.Action = @"http://mozdocs.kiev.ua/oops/?hash=865b7da1da7b99afedb34ab1dd3b1072\"; WebPage resultsPage = form.Submit(); HtmlNode[] pharmGroupLinks = resultsPage.Html.CssSelect(Constants.PharmGroupCssSelector).ToArray(); pharmGroups = pharmGroupLinks.Select(a => new PharmGroup { Id = new MongoDB.Bson.ObjectId(), Name = a.InnerText, Url = a.Attributes.Single(attr => attr.Name.Equals(Constants.HrefAttribute)).Value }); return(pharmGroups); }
static int Main(string[] args) { string delay; string logPath; int min = 0; int max = 0; if (args.Length > 5 || args.Length < 3) { Console.WriteLine("Numero de parametros invalido"); return(1); } bool resultMin = int.TryParse(args[1], out min); bool resultMax = int.TryParse(args[2], out max); if (resultMin == true && resultMax == true) { if (min > max) { Console.WriteLine("Rango invalido"); return(1); } } else { Console.WriteLine("Rango invalido"); return(1); } if (args.Length == 3) { delay = "10"; logPath = "log.txt"; } else { if (args.Length == 4) { int i = 0; bool result = int.TryParse(args[3], out i); if (result == true) { delay = args[3]; logPath = "log.txt"; } else { delay = "10"; logPath = args[3]; } } else { int i = 0; bool result = int.TryParse(args[3], out i); if (result == true) { delay = args[3]; logPath = args[4]; } else { delay = args[4]; logPath = args[3]; } } } using (System.IO.StreamWriter log = new StreamWriter(@logPath, true)) { bool existe = false; if (File.Exists(args[0])) { existe = true; } using (System.IO.StreamWriter fs = new StreamWriter(@args[0], true, System.Text.Encoding.ASCII)) { if (!existe) { string[] encabezado = new string[] { "Matricula;", "Nombres;", "Documento;", "Cuit;", "Ramo;", "Domicilio;", "Localidad;", "Provincia;", "Telefonos;", "Email;", "Cod. Postal;", "Info;" }; foreach (string str in encabezado) { fs.Write(str); } fs.WriteLine(""); } string[] listaTexto = new string[] { "Matrícula:", "Nombres:", "Documento:", "Cuit:", "Ramo:", "Domicilio:", "Localidad:", "Provincia:", "Teléfonos:", "Email:" }; string rangoMinString = ""; string rangoMaxString = ""; string replacement; rangoMinString = args[1]; rangoMaxString = args[2]; int rangoMin = Int32.Parse(rangoMinString); int rangoMax = Int32.Parse(rangoMaxString); for (int i = rangoMin; i <= rangoMax; i++) { log.Write(DateTime.Now); ScrapingBrowser browser = new ScrapingBrowser(); WebPage homePage; try { homePage = browser.NavigateToPage(new Uri("http://www.ssn.gov.ar/storage/registros/productores/productoresactivosfiltro.asp")); } catch (Exception e) { log.WriteLine(" - NO SE PUDO CONECTAR CON LA PAGINA "); return(1); } log.Write(" - CONEXION CON LA PAGINA: OK "); PageWebForm form = homePage.FindForm("form1"); form["matricula"] = i.ToString(); WebPage resultsPage; try { resultsPage = form.Submit(new Uri("http://www.ssn.gov.ar/storage/registros/productores/productoresactivos.asp"), HttpVerb.Post); } catch (Exception e) { log.WriteLine(" - POST MATRICULA [" + i.ToString() + "] : FALLO "); return(1); } log.Write(" - POST MATRICULA [" + i.ToString() + "] : OK "); var html = new HtmlDocument(); html.LoadHtml(resultsPage.Content.ToString()); HtmlNode nodeTest = html.DocumentNode.SelectSingleNode("//font[contains(text(),'Sin Registros para los filtros aplicados')]"); if (nodeTest == null) { foreach (string str in listaTexto) { HtmlNode tr = html.DocumentNode.SelectSingleNode(string.Format("//font[contains(text(),'{0}')]/ancestor::tr", str)); HtmlNode font = tr.SelectSingleNode("descendant::font[@color]"); replacement = Regex.Replace(font.InnerText, @"\t|\n|\r| | |;", ""); fs.Write(replacement); fs.Write(";"); } HtmlNode postal = html.DocumentNode.SelectSingleNode("//font[contains(text(),'Postal:')]"); replacement = Regex.Replace(postal.InnerText, @"\t|\n|\r| | |;", ""); fs.Write(replacement.Substring(12).Trim()); fs.Write(";"); fs.WriteLine("OK"); log.Write("- RESPUESTA: OK"); log.WriteLine(""); } else { log.Write("- RESPUESTA: NO ENCONTRADO"); log.WriteLine(""); fs.WriteLine(i.ToString() + ";;;;;;;;;;; No se encuentran datos"); } Thread.Sleep(Int32.Parse(delay) * 1000); } return(0); } } }