private bool CrawlAllowed(Uri uri) { string host = String.Format("http://{0}", uri.Host); WebRequest webRequest = WebRequest.Create(String.Format("{0}/robots.txt", host)); WebResponse response; try { response = webRequest.GetResponse(); } catch (WebException) { return(true); } using (response) { using (Stream content = response.GetResponseStream()) { using (StreamReader reader = new StreamReader(content)) { AgentPrivileges privileges = null; string line; while ((line = reader.ReadLine()) != null) { line = line.Split('#')[0]; if (String.IsNullOrWhiteSpace(line)) { continue; } string[] lineSplit = line.Split(':'); if (lineSplit[0] == "User-agent") { if (privileges != null) { break; } string agentName = lineSplit[1].Trim(); if ((agentName == "*") || Regex.IsMatch(agentName, "^Yggdrasil$")) { privileges = new AgentPrivileges(); } } else if ((privileges != null) && (lineSplit[0] == "Disallow")) { string relativeURL = lineSplit[1].Trim(); if (String.IsNullOrWhiteSpace(relativeURL)) { privileges.AddAllowedURL(host + relativeURL); } else { privileges.AddBlockedURL(host + relativeURL); } } else if ((privileges != null) && (lineSplit[0] == "Allow")) { string relativeURL = lineSplit[1].Trim(); privileges.AddAllowedURL(host + relativeURL); } else { continue; } } if (privileges != null) { return(privileges.IsAllowed(uri.ToString())); } } } } return(true); }
private bool CrawlAllowed(Uri uri) { string host = String.Format("http://{0}", uri.Host); WebRequest webRequest = WebRequest.Create(String.Format("{0}/robots.txt", host)); WebResponse response; try { response = webRequest.GetResponse(); } catch (WebException) { return true; } using (response) { using (Stream content = response.GetResponseStream()) { using (StreamReader reader = new StreamReader(content)) { AgentPrivileges privileges = null; string line; while ((line = reader.ReadLine()) != null) { line = line.Split('#')[0]; if (String.IsNullOrWhiteSpace(line)) { continue; } string[] lineSplit = line.Split(':'); if (lineSplit[0] == "User-agent") { if (privileges != null) { break; } string agentName = lineSplit[1].Trim(); if ((agentName == "*") || Regex.IsMatch(agentName, "^Yggdrasil$")) { privileges = new AgentPrivileges(); } } else if ((privileges != null) && (lineSplit[0] == "Disallow")) { string relativeURL = lineSplit[1].Trim(); if (String.IsNullOrWhiteSpace(relativeURL)) { privileges.AddAllowedURL(host + relativeURL); } else { privileges.AddBlockedURL(host + relativeURL); } } else if ((privileges != null) && (lineSplit[0] == "Allow")) { string relativeURL = lineSplit[1].Trim(); privileges.AddAllowedURL(host + relativeURL); } else { continue; } } if (privileges != null) { return privileges.IsAllowed(uri.ToString()); } } } } return true; }