bool doScrape()
        {
            logDebug("\t scrape action: {0}", vars["s_action"]);
            //check if we have a url
            if (string.IsNullOrEmpty(vars["s_url"]))
            {
                logError("no scrape URL defined");
                return false;
            }
            //setup web request
            NaviXWebRequest webData = new NaviXWebRequest(vars["s_url"])
            {
                Action = vars["s_action"],
                Referer = vars["s_referer"],
                RequestCookies = vars["s_cookie"],
                Method = vars["s_method"],
                UserAgent = vars["s_agent"],
                PostData = vars["s_postdata"],
                RequestHeaders = headers
            };
            //retrieve request
            webData.GetWebData();
            //store response text
            vars["htmRaw"] = webData.Content;
            //get final url of request
            vars["geturl"] = webData.GetURL;
            //if scrape action is get url, set match as final url
            if (vars["s_action"] == "geturl")
                vars["v1"] = webData.GetURL;

            //Copy response headers
            logDebug("Response headers");
            foreach (string key in webData.ResponseHeaders.Keys)
            {
                string hKey = "headers." + key;
                string hVal = webData.ResponseHeaders[key];
                vars[hKey] = hVal;
                logDebug("\t {0}: {1}", hKey, hVal);
            }

            //Copy response cookies
            logDebug("Response cookies");
            foreach (string key in webData.ResponseCookies.Keys)
            {
                string cKey = "cookies." + key;
                string cVal = webData.ResponseCookies[key];
                vars[cKey] = cVal;
                logDebug("\t {0}: {1}", cKey, cVal);
            }

            //if we're set to read and we have a response and a regex pattern
            if (vars["s_action"] == "read" && !string.IsNullOrEmpty(vars["regex"]) && !string.IsNullOrEmpty(vars["htmRaw"]))
            {
                //create regex
                logDebug("Scrape regex: {0}", vars["regex"]);
                Regex reg;
                try
                {
                    reg = new Regex(vars["regex"]);
                }
                catch (Exception ex)
                {
                    logError("error creating regex with pattern " + vars["regex"] + " - " + ex.Message);
                    return false;
                }
                //create regex vars
                vars["nomatch"] = "";
                rep["nomatch"] = "";
                for (int i = 1; i < 12; i++)
                {
                    string ke = "v" + i.ToString();
                    vars[ke] = "";
                    rep[ke] = "";
                }
                //match against scrape response
                Match m = reg.Match(vars["htmRaw"]);
                if (m.Success)
                {
                    logDebug("Scrape matches:");
                    for (int i = 1; i < m.Groups.Count; i++)
                    {
                        //create vars for each match group, v1,v2,v3 etc
                        string val = m.Groups[i].Value;
                        if (val == null)
                            val = "";
                        string key = "v" + i.ToString();
                        vars[key] = val;
                        rep[key] = val;
                        logDebug("\t {0}={1}", key, val.Replace("\r\n", " "));
                    }
                }
                else //no match
                {
                    logDebug("Scrape regex: no match");
                    vars["nomatch"] = "1";
                    rep["nomatch"] = "1";
                }
            }
            //reset scrape vars for next scrape
            vars.Reset(true);
            return true;
        }
Пример #2
0
        bool doScrape()
        {
            logDebug("\t scrape action: {0}", vars["s_action"]);
            //check if we have a url
            if (string.IsNullOrEmpty(vars["s_url"]))
            {
                logError("no scrape URL defined");
                return(false);
            }
            //setup web request
            NaviXWebRequest webData = new NaviXWebRequest(vars["s_url"])
            {
                Action         = vars["s_action"],
                Referer        = vars["s_referer"],
                RequestCookies = vars["s_cookie"],
                Method         = vars["s_method"],
                UserAgent      = vars["s_agent"],
                PostData       = vars["s_postdata"],
                RequestHeaders = headers
            };

            //retrieve request
            webData.GetWebData();
            //store response text
            vars["htmRaw"] = webData.Content;
            //get final url of request
            vars["geturl"] = webData.GetURL;
            //if scrape action is get url, set match as final url
            if (vars["s_action"] == "geturl")
            {
                vars["v1"] = webData.GetURL;
            }

            //Copy response headers
            logDebug("Response headers");
            foreach (string key in webData.ResponseHeaders.Keys)
            {
                string hKey = "headers." + key;
                string hVal = webData.ResponseHeaders[key];
                vars[hKey] = hVal;
                logDebug("\t {0}: {1}", hKey, hVal);
            }

            //Copy response cookies
            logDebug("Response cookies");
            foreach (string key in webData.ResponseCookies.Keys)
            {
                string cKey = "cookies." + key;
                string cVal = webData.ResponseCookies[key];
                vars[cKey] = cVal;
                logDebug("\t {0}: {1}", cKey, cVal);
            }

            //if we're set to read and we have a response and a regex pattern
            if (vars["s_action"] == "read" && !string.IsNullOrEmpty(vars["regex"]) && !string.IsNullOrEmpty(vars["htmRaw"]))
            {
                //create regex
                logDebug("Scrape regex: {0}", vars["regex"]);
                Regex reg;
                try
                {
                    reg = new Regex(vars["regex"]);
                }
                catch (Exception ex)
                {
                    logError("error creating regex with pattern " + vars["regex"] + " - " + ex.Message);
                    return(false);
                }
                //create regex vars
                vars["nomatch"] = "";
                rep["nomatch"]  = "";
                for (int i = 1; i < 12; i++)
                {
                    string ke = "v" + i.ToString();
                    vars[ke] = "";
                    rep[ke]  = "";
                }
                //match against scrape response
                Match m = reg.Match(vars["htmRaw"]);
                if (m.Success)
                {
                    logDebug("Scrape matches:");
                    for (int i = 1; i < m.Groups.Count; i++)
                    {
                        //create vars for each match group, v1,v2,v3 etc
                        string val = m.Groups[i].Value;
                        if (val == null)
                        {
                            val = "";
                        }
                        string key = "v" + i.ToString();
                        vars[key] = val;
                        rep[key]  = val;
                        logDebug("\t {0}={1}", key, val.Replace("\r\n", " "));
                    }
                }
                else //no match
                {
                    logDebug("Scrape regex: no match");
                    vars["nomatch"] = "1";
                    rep["nomatch"]  = "1";
                }
            }
            //reset scrape vars for next scrape
            vars.Reset(true);
            return(true);
        }