private void btnTestHttpCaptureText_Click(object sender, EventArgs e) { Scrape s = new Scrape(); this.btnTestHttpCaptureText.Enabled = false; this.toolStripStatusLabel1.Text = "Loading URL ..."; this.groupBox1.Text = string.Empty; this.txtCaptureRURL.Text = string.Empty; this.txtSourceTextREC.Text = string.Empty; this.txtURL.Enabled = false; this.txtCaptureRURL.Enabled = false; this.Refresh(); string capture = string.Empty; try { WebScrapeResponse r = s.HttpCapture(this.txtURL.Text, string.Empty, string.Empty, null); this.groupBox1.Text = string.Format("Done: ({0}) {1}", r.StatusCode, r.StatusText); this.toolStripStatusLabel1.Text = "Loading complete"; this.txtCaptureRURL.Text = (r.ErrorMessage == string.Empty ? r.Content : r.ErrorMessage); this.txtSourceTextREC.Text = this.txtCaptureRURL.Text; this.tabControl1.TabPages["tabRegExChunk"].Focus(); } catch (Exception ex) { this.groupBox1.Text = "Failed to retrieve URL"; this.toolStripStatusLabel1.Text = "Loading failed"; this.txtCaptureRURL.Text = DetailedException.WithUserContent(ref ex); } this.btnTestHttpCaptureText.Enabled = true; this.txtURL.Enabled = true; this.txtCaptureRURL.Enabled = true; }
private void btnFullExample_Click(object sender, EventArgs e) { string I_AM_CHROME = "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36"; Scrape s = new Scrape(); StringBuilder result = new StringBuilder(); result.AppendLine("Date\tNumbers\tPayoutInfo"); this.btnFullExample.Enabled = false; this.txtFullExampleResult.Text = string.Empty; this.lstFullExampleActivity.Items.Clear(); this.dgvFullExample.Rows.Clear(); this.tabControl4.TabPages["tabActivity"].Focus(); this.Refresh(); bool Continue = true; string Captured = string.Empty; try { string SourceURL = string.Format( "http://www.palottery.state.pa.us/Games/Print-Past-Winning-Numbers.aspx?id=12&year={0}&print=1", (string)this.cbxYear.Items[this.cbxYear.SelectedIndex]); this.lstFullExampleActivity.Items.Add("Loading content from URL ..."); this.lstFullExampleActivity.Items.Add(SourceURL); WebScrapeResponse r = s.HttpCapture(SourceURL, string.Empty, I_AM_CHROME, null); this.groupBox1.Text = string.Format("Done: ({0}) {1}", r.StatusCode, r.StatusText); if (!string.IsNullOrWhiteSpace(r.ErrorMessage)) { throw new Exception("Unexpected error message from HttpCapture: " + r.ErrorMessage); } if (r.StatusCode != "200") { throw new Exception("Status code value was not 200. Can not continue"); } this.lstFullExampleActivity.Items.Add("Loading complete"); Captured = r.Content; } catch (Exception err) { this.lstFullExampleActivity.Items.Add("Failed to retrieve URL: see result for error text"); this.txtFullExampleResult.Text = DetailedException.WithUserContent(ref err); Continue = false; } if (Continue) { this.lstFullExampleActivity.Items.Add("Extract informational rows..."); try { StringCollection infoRows = s.RegExChunk(Captured, @"<tr>\s+<td>\d\d/\d\d/\d\d\d\d</td><td>\s+\d[\d]?\s+ ", true, 0, "</tr>", true, 1, 0, 0); if (infoRows.Count == 0) { throw new Exception("No informational rows encountered."); } int rowIndex = 0; foreach (string infoRow in infoRows) { StringCollection infoDraws = s.RegExChunk(infoRow, @"<td[>|\s]", true, 0, "</td>", true, 1, 0, 0); if (infoDraws.Count != 3) { this.lstFullExampleActivity.Items.Add(string.Format("Can not read malformed row({0}): {1}", rowIndex, infoRow)); continue; } string[] values = new string[3]; for (int cellIndex = 0; cellIndex < 3; cellIndex++) { switch (cellIndex) { case 0: // date values[0] = s.RegExFind(infoDraws[cellIndex], @"([\d]?[\d]/){2}[\d]{4}", true, 0)[0]; result.Append(values[0]); break; case 1: // numbers // just get the digits and the semi-colons string numberRaw = Regex.Replace(infoDraws[cellIndex], @"[^0-9;]", string.Empty); string[] n = numberRaw.Split(new char[] { ';' }, StringSplitOptions.RemoveEmptyEntries); if (n.Length != 7) { this.lstFullExampleActivity.Items.Add(string.Format("Expected 7 numbers, but got {0} in \"{1}\"", n.Length, n)); continue; } values[1] = string.Format("\t{0}-{1}-{2}-{3}-{4} [{5}] x{6}", n[0], n[1], n[2], n[3], n[4], n[5], n[6]); result.Append("\t" + values[1]); break; case 2: // link for payout stats values[2] = "http://www.palottery.state.pa.us/Games" + s.RegExFind(infoDraws[cellIndex], "/Payouts.aspx[^\"]+", true, 0)[0]; result.AppendLine("\t" + values[2]); break; } } this.dgvFullExample.Rows.Add(values); } this.txtFullExampleResult.Text = result.ToString(); this.lstFullExampleActivity.Items.Add("Extraction successfully completed. See result tab"); } catch (Exception err) { this.lstFullExampleActivity.Items.Add("Failed to retrieve URL: see result for error text"); this.txtFullExampleResult.Text = DetailedException.WithUserContent(ref err); Continue = false; } } this.btnFullExample.Enabled = true; this.tabControl4.TabPages[Continue ? "tabResult" : "tabActivity"].Focus(); this.dgvFullExample.Visible = Continue; }