public static Robot ParseRobotTxtToRobotObject(string robotTxt, string urlTargetTest) { Robot robot = new Robot(); robot.Domain = urlTargetTest.ToLower().Replace("/robots.txt", string.Empty); try { var matchUserAgent = GetRegexMatches(robotTxt, "User-agent: (.+)", RegexOptions.None); var matchDisallow = GetRegexMatches(robotTxt, "Disallow: (.+)", RegexOptions.None); var matchAllow = GetRegexMatches(robotTxt, "Allow: (.+)", RegexOptions.None); var matchComments = GetRegexMatches(robotTxt, "# (.+)", RegexOptions.None); var matchSiteMap = GetRegexMatches(robotTxt, "Sitemap: (.+)", RegexOptions.None); ParseUserAgentData(matchUserAgent, robot); ParseDisallowData(matchDisallow, robot); ParseAllowData(matchAllow, robot); ParseCommentsData(matchComments, robot); ParseSiteMapData(matchSiteMap, robot); } catch (Exception ex) { robot = null; throw new Exception("An error occurred when I try to parse Robot object", ex); } return robot; }
private static void ParseUserAgentData(MatchCollection matchUserAgent, Robot robot) { if (matchUserAgent != null && matchUserAgent.Count > 0) { robot.UserAgent = new List<string>(); foreach (var userAgent in matchUserAgent) robot.UserAgent.Add(userAgent.ToString().TrimEnd('\r', '\n').ToLower().Replace("user-agent: ", string.Empty)); } }
private static void ParseDisallowData(MatchCollection matchDisallow, Robot robot) { if (matchDisallow != null && matchDisallow.Count > 0) { robot.Disallows = new List<string>(); foreach (var disallow in matchDisallow) robot.Disallows.Add(disallow.ToString().TrimEnd('\r', '\n').ToLower().Replace("disallow: ", string.Empty)); } }
private static void ParseCommentsData(MatchCollection matchComments, Robot robot) { if (matchComments != null && matchComments.Count > 0) { robot.Comments = new List<string>(); foreach (var comments in matchComments) robot.Comments.Add(comments.ToString().ToLower().TrimEnd('\r', '\n').Replace("# ", string.Empty)); } }
private static void ParseSiteMapData(MatchCollection matchSiteMap, Robot robot) { if (matchSiteMap != null && matchSiteMap.Count > 0) { robot.SiteMap = new List<string>(); foreach (var siteMap in matchSiteMap) robot.Comments.Add(siteMap.ToString().TrimEnd('\r', '\n').ToLower().Replace("sitemap: ", string.Empty)); } }
private static void ProcessResponse(HttpWebResponse response) { Util.Util.ChangeConsoleColorToGreen(); Console.WriteLine("TARGET FOUND :)"); Console.WriteLine(); Util.Util.ChangeConsoleColorToDefault(); Console.WriteLine("Getting Robots.txt..."); Console.WriteLine(); string robotsTxt = Util.Util.ParseResponseStreamToText(response); Util.Util.ChangeConsoleColorToGreen(); Console.WriteLine("VOILÀ!"); Console.WriteLine(); Util.Util.ChangeConsoleColorToDefault(); Console.WriteLine("Begin of file"); Console.WriteLine("-------------------------------------------"); Console.WriteLine(); Console.WriteLine(robotsTxt); Console.WriteLine(); Console.WriteLine("-------------------------------------------"); Console.WriteLine("End of file"); Console.WriteLine(); robot = Util.Util.ParseRobotTxtToRobotObject(robotsTxt, _urlTarget); if (robot.Disallows != null && robot.Disallows.Count > 0) { Console.WriteLine("Listing 'disallow' directories..."); Console.WriteLine(); foreach (var disallowDirectory in robot.Disallows) Console.WriteLine(disallowDirectory); Console.WriteLine(); AskForAttack(); } else { Console.WriteLine("There is no 'disallow' directories on the target."); Console.WriteLine(); FinishExecution(); } }