Beispiel #1
0
        private bool CrawlAllowed(Uri uri)
        {
            string host = String.Format("http://{0}", uri.Host);

            WebRequest  webRequest = WebRequest.Create(String.Format("{0}/robots.txt", host));
            WebResponse response;

            try
            {
                response = webRequest.GetResponse();
            }
            catch (WebException)
            {
                return(true);
            }

            using (response)
            {
                using (Stream content = response.GetResponseStream())
                {
                    using (StreamReader reader = new StreamReader(content))
                    {
                        AgentPrivileges privileges = null;

                        string line;
                        while ((line = reader.ReadLine()) != null)
                        {
                            line = line.Split('#')[0];
                            if (String.IsNullOrWhiteSpace(line))
                            {
                                continue;
                            }

                            string[] lineSplit = line.Split(':');

                            if (lineSplit[0] == "User-agent")
                            {
                                if (privileges != null)
                                {
                                    break;
                                }

                                string agentName = lineSplit[1].Trim();

                                if ((agentName == "*") || Regex.IsMatch(agentName, "^Yggdrasil$"))
                                {
                                    privileges = new AgentPrivileges();
                                }
                            }
                            else if ((privileges != null) && (lineSplit[0] == "Disallow"))
                            {
                                string relativeURL = lineSplit[1].Trim();

                                if (String.IsNullOrWhiteSpace(relativeURL))
                                {
                                    privileges.AddAllowedURL(host + relativeURL);
                                }
                                else
                                {
                                    privileges.AddBlockedURL(host + relativeURL);
                                }
                            }
                            else if ((privileges != null) && (lineSplit[0] == "Allow"))
                            {
                                string relativeURL = lineSplit[1].Trim();

                                privileges.AddAllowedURL(host + relativeURL);
                            }
                            else
                            {
                                continue;
                            }
                        }

                        if (privileges != null)
                        {
                            return(privileges.IsAllowed(uri.ToString()));
                        }
                    }
                }
            }

            return(true);
        }
Beispiel #2
0
        private bool CrawlAllowed(Uri uri)
        {
            string host = String.Format("http://{0}", uri.Host);

            WebRequest webRequest = WebRequest.Create(String.Format("{0}/robots.txt", host));
            WebResponse response;

            try
            {
                response = webRequest.GetResponse();
            }
            catch (WebException)
            {
                return true;
            }

            using (response)
            {
                using (Stream content = response.GetResponseStream())
                {
                    using (StreamReader reader = new StreamReader(content))
                    {
                        AgentPrivileges privileges = null;

                        string line;
                        while ((line = reader.ReadLine()) != null)
                        {
                            line = line.Split('#')[0];
                            if (String.IsNullOrWhiteSpace(line))
                            {
                                continue;
                            }

                            string[] lineSplit = line.Split(':');

                            if (lineSplit[0] == "User-agent")
                            {
                                if (privileges != null)
                                {
                                    break;
                                }

                                string agentName = lineSplit[1].Trim();

                                if ((agentName == "*") || Regex.IsMatch(agentName, "^Yggdrasil$"))
                                {
                                    privileges = new AgentPrivileges();
                                }
                            }
                            else if ((privileges != null) && (lineSplit[0] == "Disallow"))
                            {
                                string relativeURL = lineSplit[1].Trim();

                                if (String.IsNullOrWhiteSpace(relativeURL))
                                {
                                    privileges.AddAllowedURL(host + relativeURL);
                                }
                                else
                                {
                                    privileges.AddBlockedURL(host + relativeURL);
                                }
                            }
                            else if ((privileges != null) && (lineSplit[0] == "Allow"))
                            {
                                string relativeURL = lineSplit[1].Trim();

                                privileges.AddAllowedURL(host + relativeURL);
                            }
                            else
                            {
                                continue;
                            }
                        }

                        if (privileges != null)
                        {
                            return privileges.IsAllowed(uri.ToString());
                        }
                    }
                }
            }

            return true;
        }