Example #1
0
        private bool ProcessOrRules(XmlNode orNode, DOMReader dom)
        {
            foreach (XmlNode node in orNode.ChildNodes)
            {
                if (node.NodeType == XmlNodeType.Element)
                {
                    if (node.Name != "Rule")
                    {
                        if (ProcessLogicNode(node, dom))
                        {
                            return(true);                           //short circuit if true
                        }
                    }
                    else
                    {
                        //process rule
                        if (ValidateRule(new MarketShareRule(node), dom))
                        {
                            return(true);                            //shortcircuit if true
                        }
                    }
                }
            }

            return(false);
        }
Example #2
0
        private bool ProcessRuleNode(XmlNode productNode, DOMReader dom)
        {
            foreach (XmlNode andOrNode in productNode.SelectSingleNode("Rules").ChildNodes)
            {
                if (andOrNode.NodeType == XmlNodeType.Element)
                {
                    if (andOrNode.Name != "Rule")
                    {
                        bool match = ProcessLogicNode(andOrNode, dom);
                        if (match)
                        {
                            return(true);
                        }
                    }
                    else
                    {
                        //just in case there is only 1 rule in rules. No AND/OR
                        if (ValidateRule(new MarketShareRule(andOrNode), dom))
                        {
                            return(true);
                        }
                    }
                }
            }

            return(false);
        }
Example #3
0
        /// <summary>
        /// Checks for the existence of an admin login page.
        /// </summary>
        /// <param name="dom">Domain to check, w/out the www.</param>
        /// <param name="rule">Rule that contains the admin page value to append to the domain.
        /// Rule.Value is the admin page name that is appended to the domain.
        /// Rule.Property is the exact text to match on the returned admin page</param>
        /// <returns></returns>
        public bool Process(DOMReader dom, MarketShareRule rule)
        {
            try
            {
                HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create("http://www." + dom.Domain + "/" + rule.Value);

                request.UserAgent         = "User-Agent	Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)";
                request.AllowAutoRedirect = true;
                request.Method            = "GET";
                request.Timeout           = 5000;
                HttpWebResponse resp = (HttpWebResponse)request.GetResponse();

                if (resp.StatusCode == HttpStatusCode.OK)
                {
                    Regex r = new Regex(rule.Property, RegexOptions.IgnoreCase);
                    HtmlAgilityPack.HtmlNode source = dom.Document.DocumentNode.SelectSingleNode("html");

                    if (!Object.Equals(null, source))
                    {
                        return(r.Match(source.InnerHtml).Success);
                    }
                }

                return(false);
            }
            catch (Exception)
            {
                //todo: log
            }

            return(false);
        }
Example #4
0
        public string ShoppingCart(string domain)
        {
            //DOMReader dom = new DOMReader(domain, _maxPageByteSize);

            //if (!Object.Equals(null, dom.Document.DocumentNode) && !String.IsNullOrWhiteSpace(dom.Document.DocumentNode.InnerText))
            //{
            //    foreach (XmlNode productNode in _shoppingcartRulesXML.SelectNodes("//Product[@name]"))
            //    {
            //        if (ProcessRuleNode(productNode, dom, _builderCancellation.Token))
            //        {
            //            return productNode.Attributes["name"].Value;
            //        }
            //    }
            //}
            DOMReader dom    = new DOMReader(domain, _maxPageByteSize);
            string    retVal = "none";

            if (!Object.Equals(null, dom.Document.DocumentNode) && !String.IsNullOrWhiteSpace(dom.Document.DocumentNode.InnerText))
            {
                System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch();

                sw.Restart();
                var nodeList = new List <XmlNode>(_shoppingcartRulesXML.SelectNodes("//Product[@name]").Cast <XmlNode>());

                Parallel.ForEach(nodeList, new ParallelOptions {
                    MaxDegreeOfParallelism = 8
                }, (productNode, loopState) =>
                {
                    if (ProcessRuleNode(productNode, dom, loopState))
                    {
                        //dom = null;
                        loopState.Stop();
                        retVal = productNode.Attributes["name"].Value;
                    }
                });
                sw.Stop();
                TimeSpan old = sw.Elapsed;


                sw.Restart();
                foreach (XmlNode productNode in _shoppingcartRulesXML.SelectNodes("//Product[@name]"))
                {
                    if (ProcessRuleNode(productNode, dom))
                    {
                        //dom = null;
                        retVal = productNode.Attributes["name"].Value;
                    }
                }
                sw.Stop();
                TimeSpan newt = sw.Elapsed;
            }

            dom = null;
            return(retVal);
        }
Example #5
0
        /// <summary>
        /// Gets the cookies for the domain and determines if there is a rule match
        /// </summary>
        /// <param name="dom">The DocumentReader object for the domain</param>
        /// <param name="rule">The rule to match</param>
        /// <returns>True if a cookie is found matching the rule</returns>
        public bool Process(DOMReader dom, MarketShareRule rule)
        {
            try
            {
                if (Object.Equals(null, dom.RequestCookies) && Object.Equals(null, dom.ResponseCookies))
                {
                    //get cookies and look for a match
                    HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create("http://www." + dom.Domain);

                    CookieContainer cookieJar = new CookieContainer();
                    request.UserAgent         = "User-Agent	Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)";
                    request.CookieContainer   = cookieJar;
                    request.AllowAutoRedirect = true;
                    request.Method            = "GET";
                    request.Timeout           = 5000;
                    HttpWebResponse resp = (HttpWebResponse)request.GetResponse();

                    dom.RequestCookies  = cookieJar.GetCookies(request.RequestUri);
                    dom.ResponseCookies = cookieJar.GetCookies(resp.ResponseUri);
                }

                if (dom.RequestCookies != null)
                {
                    foreach (Cookie c1 in dom.RequestCookies)
                    {
                        if (c1.Name.Contains(rule.Value))
                        {
                            return(true);
                        }
                    }
                }

                if (dom.ResponseCookies != null)
                {
                    foreach (Cookie c in dom.ResponseCookies)
                    {
                        if (c.Name.Contains(rule.Value))
                        {
                            return(true);
                        }
                    }
                }
            }
            catch (Exception e)
            {
                if (!e.Message.Contains("404") && !e.Message.Contains("timed out"))
                {
                    //todo: log
                    //Utility.WriteToLogFile(String.Format("SmallBiz_NoCookieInfo_{0:M_d_yyyy}", DateTime.Today) + ".log", string.Format("Domain: {0}", dom.Domain));
                }
            }

            return(false);
        }
Example #6
0
        private bool ValidateRule(MarketShareRule rule, DOMReader dom)
        {
            //default to text, so that it will just regex the whole page
            IProcessor proc = _processors["text"];

            if (_processors.ContainsKey(rule.Type.ToLower()))
            {
                proc = _processors[rule.Type.ToLower()];
            }

            return(proc.Process(dom, rule));
        }
Example #7
0
 /// <summary>
 /// Determines if the value exists in the Meta tag collection of the document
 /// </summary>
 /// <param name="dom">DocumentReader object for the domain</param>
 /// <param name="rule">The rule to match</param>
 /// <returns>True if the rule value is found in the meta tag collection.</returns>
 public bool Process(DOMReader dom, MarketShareRule rule)
 {
     try
     {
         return(dom.ExistsInCollection(dom.MetaTags, rule));
     }
     catch (Exception e)
     {
         ExceptionExtensions.LogWarning(e, "API.MarketAnalysis.Metas.Process()", string.Format("Domain: {0}, {1}", dom.Domain, rule.ToString()));
     }
     return(false);
 }
Example #8
0
        public bool Process(DOMReader dom, MarketShareRule rule)
        {
            try
            {
                return(dom.Domain.Contains(rule.Value));
            }
            catch (Exception e)
            {
                ExceptionExtensions.LogWarning(e, "API.MarketAnalysis.Urls.Process()", string.Format("Domain: {0}, {1}", dom.Domain, rule.ToString()));
            }

            return(false);
        }
Example #9
0
        private bool ProcessAndRules(XmlNode andNode, DOMReader dom)
        {
            int matchCount = 0;
            //if no min is specified, they all must be true
            int minCount = andNode.ChildNodes.Count;

            //set min number of rules that must be true
            if (!Object.Equals(null, andNode.Attributes["mincount"]))
            {
                if (!Int32.TryParse(andNode.Attributes["mincount"].Value, out minCount))
                {
                    minCount = andNode.ChildNodes.Count;
                }
            }

            //evaluate all childnodes
            foreach (XmlNode node in andNode.ChildNodes)
            {
                if (node.NodeType == XmlNodeType.Element)
                {
                    if (node.Name != "Rule")
                    {
                        if (ProcessLogicNode(node, dom))
                        {
                            matchCount++;
                        }
                    }
                    else
                    {
                        if (ValidateRule(new MarketShareRule(node), dom))
                        {
                            matchCount++;
                        }
                    }

                    if (matchCount >= minCount)
                    {
                        break;
                    }
                }
            }

            return(matchCount >= minCount);
        }
Example #10
0
        /// <summary>
        /// Uses RegEx to determines if the value exists in the text of the document source.
        /// </summary>
        /// <param name="dom">DocumentReader object for the domain</param>
        /// <param name="rule">The rule to match</param>
        /// <returns>True if the rule value is found in the source of the document.</returns>
        public bool Process(DOMReader dom, MarketShareRule rule)
        {
            try
            {
                Regex r = new Regex(rule.Value.Replace("{domain}", "www." + dom.Domain), RegexOptions.IgnoreCase);
                HtmlAgilityPack.HtmlNode source = dom.Document.DocumentNode.SelectSingleNode(rule.Property);

                if (!Object.Equals(null, source))
                {
                    return(r.Match(source.InnerHtml).Success);
                }
            }
            catch (Exception e)
            {
                ExceptionExtensions.LogWarning(e, "API.MarketAnalysis.Text.Process()", string.Format("Domain: {0}, {1}", dom.Domain, rule.ToString()));
            }

            return(false);
        }
Example #11
0
 /// <summary>
 /// Determines if the value exists in the Stylesheet tag collection of the document
 /// </summary>
 /// <param name="dom">DocumentReader object for the domain</param>
 /// <param name="rule">The rule to match</param>
 /// <returns>True if the rule value is found in the stylesheet tag collection.</returns>
 public bool Process(DOMReader dom, MarketShareRule rule)
 {
     try
     {
         foreach (var item in dom.Stylesheets)
         {
             if (item.Attributes.Contains(rule.Property))
             {
                 string propValue = item.Attributes[rule.Property].Value.ToLower();
                 return(propValue.Contains(rule.Value.ToLower()));
             }
         }
         return(false);
     }
     catch (Exception e)
     {
         ExceptionExtensions.LogWarning(e, "API.MarketAnalysis.Stylesheets.Process()", string.Format("Domain: {0}, {1}", dom.Domain, rule.ToString()));
     }
     return(false);
 }
Example #12
0
        /// <summary>
        /// Determines if the Generator tag contains the value in the rule
        /// </summary>
        /// <param name="dom">DocumentReader object for the domain</param>
        /// <param name="rule">The rule to match</param>
        /// <returns>True if the rule value is found in the generator tag collection</returns>
        public bool Process(DOMReader dom, MarketShareRule rule)
        {
            try
            {
                foreach (HtmlAgilityPack.HtmlNode node in dom.GeneratorTags)
                {
                    if (!Object.Equals(null, node.Attributes[rule.Property]))
                    {
                        if (node.Attributes[rule.Property].Value.ToLower().Contains(rule.Value.ToLower()))
                        {
                            return(true);
                        }
                    }
                }
            }
            catch (Exception e)
            {
                ExceptionExtensions.LogWarning(e, "API.MarketAnalysis.Generators.Process()", string.Format("Domain: {0}, {1}", dom.Domain, rule.ToString()));
            }

            return(false);
        }
Example #13
0
        public string ShoppingCart(string domain, HtmlDocument document, HttpWebResponse response, bool isRoot)
        {
            DOMReader dom = new DOMReader(domain, document);

            if (!Object.Equals(null, response))
            {
                dom.ResponseCookies = response.Cookies;
            }

            if (!Object.Equals(null, dom.Document.DocumentNode))
            {
                foreach (XmlNode productNode in _shoppingcartRulesXML.SelectNodes("//Product[@name]"))
                {
                    if (ProcessRuleNode(productNode, dom, isRoot))
                    {
                        return(productNode.Attributes["name"].Value);
                    }
                }
            }

            dom = null;
            return("None");
        }
Example #14
0
 private bool ProcessLogicNode(XmlNode andOrNode, DOMReader dom)
 {
     return(andOrNode.Name == "And" ? ProcessAndRules(andOrNode, dom) : ProcessOrRules(andOrNode, dom));
 }