bool doScrape() { logDebug("\t scrape action: {0}", vars["s_action"]); //check if we have a url if (string.IsNullOrEmpty(vars["s_url"])) { logError("no scrape URL defined"); return false; } //setup web request NaviXWebRequest webData = new NaviXWebRequest(vars["s_url"]) { Action = vars["s_action"], Referer = vars["s_referer"], RequestCookies = vars["s_cookie"], Method = vars["s_method"], UserAgent = vars["s_agent"], PostData = vars["s_postdata"], RequestHeaders = headers }; //retrieve request webData.GetWebData(); //store response text vars["htmRaw"] = webData.Content; //get final url of request vars["geturl"] = webData.GetURL; //if scrape action is get url, set match as final url if (vars["s_action"] == "geturl") vars["v1"] = webData.GetURL; //Copy response headers logDebug("Response headers"); foreach (string key in webData.ResponseHeaders.Keys) { string hKey = "headers." + key; string hVal = webData.ResponseHeaders[key]; vars[hKey] = hVal; logDebug("\t {0}: {1}", hKey, hVal); } //Copy response cookies logDebug("Response cookies"); foreach (string key in webData.ResponseCookies.Keys) { string cKey = "cookies." + key; string cVal = webData.ResponseCookies[key]; vars[cKey] = cVal; logDebug("\t {0}: {1}", cKey, cVal); } //if we're set to read and we have a response and a regex pattern if (vars["s_action"] == "read" && !string.IsNullOrEmpty(vars["regex"]) && !string.IsNullOrEmpty(vars["htmRaw"])) { //create regex logDebug("Scrape regex: {0}", vars["regex"]); Regex reg; try { reg = new Regex(vars["regex"]); } catch (Exception ex) { logError("error creating regex with pattern " + vars["regex"] + " - " + ex.Message); return false; } //create regex vars vars["nomatch"] = ""; rep["nomatch"] = ""; for (int i = 1; i < 12; i++) { string ke = "v" + i.ToString(); vars[ke] = ""; rep[ke] = ""; } //match against scrape response Match m = reg.Match(vars["htmRaw"]); if (m.Success) { logDebug("Scrape matches:"); for (int i = 1; i < m.Groups.Count; i++) { //create vars for each match group, v1,v2,v3 etc string val = m.Groups[i].Value; if (val == null) val = ""; string key = "v" + i.ToString(); vars[key] = val; rep[key] = val; logDebug("\t {0}={1}", key, val.Replace("\r\n", " ")); } } else //no match { logDebug("Scrape regex: no match"); vars["nomatch"] = "1"; rep["nomatch"] = "1"; } } //reset scrape vars for next scrape vars.Reset(true); return true; }
bool doScrape() { logDebug("\t scrape action: {0}", vars["s_action"]); //check if we have a url if (string.IsNullOrEmpty(vars["s_url"])) { logError("no scrape URL defined"); return(false); } //setup web request NaviXWebRequest webData = new NaviXWebRequest(vars["s_url"]) { Action = vars["s_action"], Referer = vars["s_referer"], RequestCookies = vars["s_cookie"], Method = vars["s_method"], UserAgent = vars["s_agent"], PostData = vars["s_postdata"], RequestHeaders = headers }; //retrieve request webData.GetWebData(); //store response text vars["htmRaw"] = webData.Content; //get final url of request vars["geturl"] = webData.GetURL; //if scrape action is get url, set match as final url if (vars["s_action"] == "geturl") { vars["v1"] = webData.GetURL; } //Copy response headers logDebug("Response headers"); foreach (string key in webData.ResponseHeaders.Keys) { string hKey = "headers." + key; string hVal = webData.ResponseHeaders[key]; vars[hKey] = hVal; logDebug("\t {0}: {1}", hKey, hVal); } //Copy response cookies logDebug("Response cookies"); foreach (string key in webData.ResponseCookies.Keys) { string cKey = "cookies." + key; string cVal = webData.ResponseCookies[key]; vars[cKey] = cVal; logDebug("\t {0}: {1}", cKey, cVal); } //if we're set to read and we have a response and a regex pattern if (vars["s_action"] == "read" && !string.IsNullOrEmpty(vars["regex"]) && !string.IsNullOrEmpty(vars["htmRaw"])) { //create regex logDebug("Scrape regex: {0}", vars["regex"]); Regex reg; try { reg = new Regex(vars["regex"]); } catch (Exception ex) { logError("error creating regex with pattern " + vars["regex"] + " - " + ex.Message); return(false); } //create regex vars vars["nomatch"] = ""; rep["nomatch"] = ""; for (int i = 1; i < 12; i++) { string ke = "v" + i.ToString(); vars[ke] = ""; rep[ke] = ""; } //match against scrape response Match m = reg.Match(vars["htmRaw"]); if (m.Success) { logDebug("Scrape matches:"); for (int i = 1; i < m.Groups.Count; i++) { //create vars for each match group, v1,v2,v3 etc string val = m.Groups[i].Value; if (val == null) { val = ""; } string key = "v" + i.ToString(); vars[key] = val; rep[key] = val; logDebug("\t {0}={1}", key, val.Replace("\r\n", " ")); } } else //no match { logDebug("Scrape regex: no match"); vars["nomatch"] = "1"; rep["nomatch"] = "1"; } } //reset scrape vars for next scrape vars.Reset(true); return(true); }