private bool ProcessOrRules(XmlNode orNode, DOMReader dom) { foreach (XmlNode node in orNode.ChildNodes) { if (node.NodeType == XmlNodeType.Element) { if (node.Name != "Rule") { if (ProcessLogicNode(node, dom)) { return(true); //short circuit if true } } else { //process rule if (ValidateRule(new MarketShareRule(node), dom)) { return(true); //shortcircuit if true } } } } return(false); }
private bool ProcessRuleNode(XmlNode productNode, DOMReader dom) { foreach (XmlNode andOrNode in productNode.SelectSingleNode("Rules").ChildNodes) { if (andOrNode.NodeType == XmlNodeType.Element) { if (andOrNode.Name != "Rule") { bool match = ProcessLogicNode(andOrNode, dom); if (match) { return(true); } } else { //just in case there is only 1 rule in rules. No AND/OR if (ValidateRule(new MarketShareRule(andOrNode), dom)) { return(true); } } } } return(false); }
/// <summary> /// Checks for the existence of an admin login page. /// </summary> /// <param name="dom">Domain to check, w/out the www.</param> /// <param name="rule">Rule that contains the admin page value to append to the domain. /// Rule.Value is the admin page name that is appended to the domain. /// Rule.Property is the exact text to match on the returned admin page</param> /// <returns></returns> public bool Process(DOMReader dom, MarketShareRule rule) { try { HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create("http://www." + dom.Domain + "/" + rule.Value); request.UserAgent = "User-Agent Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)"; request.AllowAutoRedirect = true; request.Method = "GET"; request.Timeout = 5000; HttpWebResponse resp = (HttpWebResponse)request.GetResponse(); if (resp.StatusCode == HttpStatusCode.OK) { Regex r = new Regex(rule.Property, RegexOptions.IgnoreCase); HtmlAgilityPack.HtmlNode source = dom.Document.DocumentNode.SelectSingleNode("html"); if (!Object.Equals(null, source)) { return(r.Match(source.InnerHtml).Success); } } return(false); } catch (Exception) { //todo: log } return(false); }
public string ShoppingCart(string domain) { //DOMReader dom = new DOMReader(domain, _maxPageByteSize); //if (!Object.Equals(null, dom.Document.DocumentNode) && !String.IsNullOrWhiteSpace(dom.Document.DocumentNode.InnerText)) //{ // foreach (XmlNode productNode in _shoppingcartRulesXML.SelectNodes("//Product[@name]")) // { // if (ProcessRuleNode(productNode, dom, _builderCancellation.Token)) // { // return productNode.Attributes["name"].Value; // } // } //} DOMReader dom = new DOMReader(domain, _maxPageByteSize); string retVal = "none"; if (!Object.Equals(null, dom.Document.DocumentNode) && !String.IsNullOrWhiteSpace(dom.Document.DocumentNode.InnerText)) { System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch(); sw.Restart(); var nodeList = new List <XmlNode>(_shoppingcartRulesXML.SelectNodes("//Product[@name]").Cast <XmlNode>()); Parallel.ForEach(nodeList, new ParallelOptions { MaxDegreeOfParallelism = 8 }, (productNode, loopState) => { if (ProcessRuleNode(productNode, dom, loopState)) { //dom = null; loopState.Stop(); retVal = productNode.Attributes["name"].Value; } }); sw.Stop(); TimeSpan old = sw.Elapsed; sw.Restart(); foreach (XmlNode productNode in _shoppingcartRulesXML.SelectNodes("//Product[@name]")) { if (ProcessRuleNode(productNode, dom)) { //dom = null; retVal = productNode.Attributes["name"].Value; } } sw.Stop(); TimeSpan newt = sw.Elapsed; } dom = null; return(retVal); }
/// <summary> /// Gets the cookies for the domain and determines if there is a rule match /// </summary> /// <param name="dom">The DocumentReader object for the domain</param> /// <param name="rule">The rule to match</param> /// <returns>True if a cookie is found matching the rule</returns> public bool Process(DOMReader dom, MarketShareRule rule) { try { if (Object.Equals(null, dom.RequestCookies) && Object.Equals(null, dom.ResponseCookies)) { //get cookies and look for a match HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create("http://www." + dom.Domain); CookieContainer cookieJar = new CookieContainer(); request.UserAgent = "User-Agent Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)"; request.CookieContainer = cookieJar; request.AllowAutoRedirect = true; request.Method = "GET"; request.Timeout = 5000; HttpWebResponse resp = (HttpWebResponse)request.GetResponse(); dom.RequestCookies = cookieJar.GetCookies(request.RequestUri); dom.ResponseCookies = cookieJar.GetCookies(resp.ResponseUri); } if (dom.RequestCookies != null) { foreach (Cookie c1 in dom.RequestCookies) { if (c1.Name.Contains(rule.Value)) { return(true); } } } if (dom.ResponseCookies != null) { foreach (Cookie c in dom.ResponseCookies) { if (c.Name.Contains(rule.Value)) { return(true); } } } } catch (Exception e) { if (!e.Message.Contains("404") && !e.Message.Contains("timed out")) { //todo: log //Utility.WriteToLogFile(String.Format("SmallBiz_NoCookieInfo_{0:M_d_yyyy}", DateTime.Today) + ".log", string.Format("Domain: {0}", dom.Domain)); } } return(false); }
private bool ValidateRule(MarketShareRule rule, DOMReader dom) { //default to text, so that it will just regex the whole page IProcessor proc = _processors["text"]; if (_processors.ContainsKey(rule.Type.ToLower())) { proc = _processors[rule.Type.ToLower()]; } return(proc.Process(dom, rule)); }
/// <summary> /// Determines if the value exists in the Meta tag collection of the document /// </summary> /// <param name="dom">DocumentReader object for the domain</param> /// <param name="rule">The rule to match</param> /// <returns>True if the rule value is found in the meta tag collection.</returns> public bool Process(DOMReader dom, MarketShareRule rule) { try { return(dom.ExistsInCollection(dom.MetaTags, rule)); } catch (Exception e) { ExceptionExtensions.LogWarning(e, "API.MarketAnalysis.Metas.Process()", string.Format("Domain: {0}, {1}", dom.Domain, rule.ToString())); } return(false); }
public bool Process(DOMReader dom, MarketShareRule rule) { try { return(dom.Domain.Contains(rule.Value)); } catch (Exception e) { ExceptionExtensions.LogWarning(e, "API.MarketAnalysis.Urls.Process()", string.Format("Domain: {0}, {1}", dom.Domain, rule.ToString())); } return(false); }
private bool ProcessAndRules(XmlNode andNode, DOMReader dom) { int matchCount = 0; //if no min is specified, they all must be true int minCount = andNode.ChildNodes.Count; //set min number of rules that must be true if (!Object.Equals(null, andNode.Attributes["mincount"])) { if (!Int32.TryParse(andNode.Attributes["mincount"].Value, out minCount)) { minCount = andNode.ChildNodes.Count; } } //evaluate all childnodes foreach (XmlNode node in andNode.ChildNodes) { if (node.NodeType == XmlNodeType.Element) { if (node.Name != "Rule") { if (ProcessLogicNode(node, dom)) { matchCount++; } } else { if (ValidateRule(new MarketShareRule(node), dom)) { matchCount++; } } if (matchCount >= minCount) { break; } } } return(matchCount >= minCount); }
/// <summary> /// Uses RegEx to determines if the value exists in the text of the document source. /// </summary> /// <param name="dom">DocumentReader object for the domain</param> /// <param name="rule">The rule to match</param> /// <returns>True if the rule value is found in the source of the document.</returns> public bool Process(DOMReader dom, MarketShareRule rule) { try { Regex r = new Regex(rule.Value.Replace("{domain}", "www." + dom.Domain), RegexOptions.IgnoreCase); HtmlAgilityPack.HtmlNode source = dom.Document.DocumentNode.SelectSingleNode(rule.Property); if (!Object.Equals(null, source)) { return(r.Match(source.InnerHtml).Success); } } catch (Exception e) { ExceptionExtensions.LogWarning(e, "API.MarketAnalysis.Text.Process()", string.Format("Domain: {0}, {1}", dom.Domain, rule.ToString())); } return(false); }
/// <summary> /// Determines if the value exists in the Stylesheet tag collection of the document /// </summary> /// <param name="dom">DocumentReader object for the domain</param> /// <param name="rule">The rule to match</param> /// <returns>True if the rule value is found in the stylesheet tag collection.</returns> public bool Process(DOMReader dom, MarketShareRule rule) { try { foreach (var item in dom.Stylesheets) { if (item.Attributes.Contains(rule.Property)) { string propValue = item.Attributes[rule.Property].Value.ToLower(); return(propValue.Contains(rule.Value.ToLower())); } } return(false); } catch (Exception e) { ExceptionExtensions.LogWarning(e, "API.MarketAnalysis.Stylesheets.Process()", string.Format("Domain: {0}, {1}", dom.Domain, rule.ToString())); } return(false); }
/// <summary> /// Determines if the Generator tag contains the value in the rule /// </summary> /// <param name="dom">DocumentReader object for the domain</param> /// <param name="rule">The rule to match</param> /// <returns>True if the rule value is found in the generator tag collection</returns> public bool Process(DOMReader dom, MarketShareRule rule) { try { foreach (HtmlAgilityPack.HtmlNode node in dom.GeneratorTags) { if (!Object.Equals(null, node.Attributes[rule.Property])) { if (node.Attributes[rule.Property].Value.ToLower().Contains(rule.Value.ToLower())) { return(true); } } } } catch (Exception e) { ExceptionExtensions.LogWarning(e, "API.MarketAnalysis.Generators.Process()", string.Format("Domain: {0}, {1}", dom.Domain, rule.ToString())); } return(false); }
public string ShoppingCart(string domain, HtmlDocument document, HttpWebResponse response, bool isRoot) { DOMReader dom = new DOMReader(domain, document); if (!Object.Equals(null, response)) { dom.ResponseCookies = response.Cookies; } if (!Object.Equals(null, dom.Document.DocumentNode)) { foreach (XmlNode productNode in _shoppingcartRulesXML.SelectNodes("//Product[@name]")) { if (ProcessRuleNode(productNode, dom, isRoot)) { return(productNode.Attributes["name"].Value); } } } dom = null; return("None"); }
private bool ProcessLogicNode(XmlNode andOrNode, DOMReader dom) { return(andOrNode.Name == "And" ? ProcessAndRules(andOrNode, dom) : ProcessOrRules(andOrNode, dom)); }