public void ParseRobotInstructionSouthTest() { const string line = "S 5"; var instruciton = new RobotInstruction(line); Assert.AreEqual(CardinalDirection.South, instruciton.CardinalDirection); Assert.AreEqual(5, instruciton.Steps); }
public void ParseRobotInstructionWestTest() { const string line = "W 20"; var instruciton = new RobotInstruction(line); Assert.AreEqual(CardinalDirection.West, instruciton.CardinalDirection); Assert.AreEqual(20, instruciton.Steps); }
public void ParseRobotInstructionNorthTest() { const string line = "N 21231320"; var instruciton = new RobotInstruction(line); Assert.AreEqual(CardinalDirection.North, instruciton.CardinalDirection); Assert.AreEqual(21231320, instruciton.Steps); }
public void MoveNorthThenReturnTest() { var robotCleaner = new RobotCleaner.RobotCleaner(0, 0); var robotInstructionNorth = new RobotInstruction("N 100"); var robotInstructionSouth = new RobotInstruction("S 100"); robotCleaner.ExecuteInstructions(new List <RobotInstruction>() { robotInstructionNorth, robotInstructionSouth }); Assert.AreEqual(101, robotCleaner.GetCleanedLocationsCount()); }
public void MoveWestThenReturnTest() { var robotCleaner = new RobotCleaner.RobotCleaner(0, 0); var robotInstructionWest = new RobotInstruction("W 55"); var robotInstructionEast = new RobotInstruction("E 55"); robotCleaner.ExecuteInstructions(new List <RobotInstruction>() { robotInstructionWest, robotInstructionEast }); Assert.AreEqual(56, robotCleaner.GetCleanedLocationsCount()); }
public void MoveNorthTest() { var robotCleaner = new RobotCleaner.RobotCleaner(0, 0); var robotInstruction = new RobotInstruction("S 100"); robotCleaner.ExecuteInstructions(new List <RobotInstruction>() { robotInstruction }); var robotPosition = robotCleaner.GetCurrentLocation(); Assert.AreEqual(0, robotPosition.X); Assert.AreEqual(-100, robotPosition.Y); }
public void NotLostTest_InputGrid2_2_Robot1_0_W_Perform_F_Expected0_0_W_NotLost() { // Arrange var grid = new MarsGrid(2, 2); var robot = new MarsRobot(1, 0, RobotDirection.West); var instructions = new RobotInstruction[] { RobotInstruction.Forward }; // Act var result = robot.PerformInstructions(grid, instructions); // Assert Assert.Multiple(() => { Assert.AreEqual((0, 0), (result.endX, result.endY)); Assert.AreEqual(RobotDirection.West, result.direction); Assert.IsFalse(result.lost); }); }
public void LostTest_InputGrid2_2_Robot2_2_N_Perform_F_Expected2_2_N_Lost() { // Arrange var grid = new MarsGrid(2, 2); var robot = new MarsRobot(2, 2, RobotDirection.North); var instructions = new RobotInstruction[] { RobotInstruction.Forward }; // Act var result = robot.PerformInstructions(grid, instructions); // Assert Assert.Multiple(() => { Assert.AreEqual((2, 2), (result.endX, result.endY)); Assert.AreEqual(RobotDirection.North, result.direction); Assert.IsTrue(result.lost); }); }
public void ScentTest_InputFirstRobotLostSecondRepeatsRouteButHasDifferentLastDirection_ExpectedBothLost() { // Arrange var grid = new MarsGrid(2, 2); var robot = new MarsRobot(2, 2, RobotDirection.North); var robot2 = new MarsRobot(2, 2, RobotDirection.North); var instructions = new RobotInstruction[] { RobotInstruction.Forward }; var instructions2 = new RobotInstruction[] { RobotInstruction.TurnRight, RobotInstruction.Forward }; // Act var result1 = robot.PerformInstructions(grid, instructions); var result2 = robot2.PerformInstructions(grid, instructions2); // Assert Assert.Multiple(() => { Assert.AreEqual((2, 2), (result1.endX, result1.endY)); Assert.AreEqual((2, 2), (result2.endX, result2.endY)); Assert.IsTrue(result1.lost); Assert.IsTrue(result2.lost); }); }
private async Task Initialize() { try { var robotsUri = new Uri("http://{0}/robots.txt".FormatWith(this.m_StartPageUri.Host)); var robots = await this.m_WebDownloader.DownloadAsync(new CrawlStep(robotsUri, 0), null, DownloadMethod.GET).ConfigureAwait(false); if (robots == null || robots.StatusCode != HttpStatusCode.OK) { return; } string fileContents; using (var stream = new StreamReader(robots.GetResponse(), Encoding.ASCII)) { fileContents = stream.ReadToEnd(); } var fileLines = fileContents.Split(Environment.NewLine.ToCharArray(), StringSplitOptions.RemoveEmptyEntries); var rulesApply = false; var rules = new List <string>(); foreach (var line in fileLines) { var ri = new RobotInstruction(line); if (!ri.Instruction.IsNullOrEmpty()) { switch (ri.Instruction[0]) { case '#': //then comment - ignore break; case 'u': // User-Agent if ((ri.UrlOrAgent.IndexOf("*") >= 0) || (ri.UrlOrAgent.IndexOf(this.m_WebDownloader.UserAgent) >= 0)) { // these rules apply rulesApply = true; } else { rulesApply = false; } break; case 'd': // Disallow if (rulesApply) { rules.Add(ri.UrlOrAgent.ToUpperInvariant()); } break; case 'a': // Allow break; default: // empty/unknown/error break; } } } this.m_DenyUrls = rules.ToArray(); } catch (Exception) { } }
public RobotsTxt(Uri startPageUri, string userAgent) { _UserAgent = userAgent; _Server = startPageUri.Host; System.Net.HttpWebRequest req = (System.Net.HttpWebRequest)System.Net.WebRequest.Create("http://" + startPageUri.Authority + "/robots.txt"); try { System.Net.HttpWebResponse webresponse = (System.Net.HttpWebResponse)req.GetResponse(); using (System.IO.StreamReader stream = new System.IO.StreamReader(webresponse.GetResponseStream(), Encoding.ASCII)) { _FileContents = stream.ReadToEnd(); } // stream.Close(); //ProgressEvent(this, new ProgressEventArgs(1, "robots.txt file loaded from " + server + "robots.txt")); string[] fileLines = _FileContents.Split(new string[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries); bool rulesApply = false; foreach (string line in fileLines) { RobotInstruction ri = new RobotInstruction(line); switch (ri.Instruction[0]) { case '#': //then comment - ignore break; case 'u': // User-Agent if ((ri.UrlOrAgent.IndexOf("*") >= 0) || (ri.UrlOrAgent.IndexOf(_UserAgent) >= 0)) { // these rules apply rulesApply = true; Console.WriteLine(ri.UrlOrAgent + " " + rulesApply); } else { rulesApply = false; } break; case 'd': // Disallow if (rulesApply) { _DenyUrls.Add(ri.UrlOrAgent.ToLower()); Console.WriteLine("D " + ri.UrlOrAgent); } else { Console.WriteLine("D " + ri.UrlOrAgent + " is for another user-agent"); } break; case 'a': // Allow Console.WriteLine("A" + ri.UrlOrAgent); break; default: // empty/unknown/error Console.WriteLine("# Unrecognised robots.txt entry [" + line + "]"); break; } } } catch (System.Net.WebException) { _FileContents = String.Empty; //ProgressEvent(this, new ProgressEventArgs(1, "No robots.txt file found at " + server)); } }
public RobotsTxt(Uri startPageUri, string userAgent) { _UserAgent = userAgent; _Server = startPageUri.Host; try { System.Net.WebProxy proxyObject = null; if (Preferences.UseProxy) { // [v6] stephenlane80 suggested proxy code proxyObject = new System.Net.WebProxy(Preferences.ProxyUrl, true); proxyObject.Credentials = System.Net.CredentialCache.DefaultCredentials; } System.Net.HttpWebRequest req = (System.Net.HttpWebRequest)System.Net.WebRequest.Create("http://" + startPageUri.Authority + "/robots.txt"); if (Preferences.UseProxy) { req.Proxy = proxyObject; // [v6] stephenlane80 } System.Net.HttpWebResponse webresponse = (System.Net.HttpWebResponse)req.GetResponse(); if (webresponse.StatusCode != System.Net.HttpStatusCode.OK) { Console.WriteLine("ROBOTS.TXT request returned HttpStatus " + webresponse.StatusCode.ToString()); _FileContents = String.Empty; return; } using (System.IO.StreamReader stream = new System.IO.StreamReader(webresponse.GetResponseStream(), Encoding.ASCII)) { _FileContents = stream.ReadToEnd(); } // stream.Close(); //ProgressEvent(this, new ProgressEventArgs(1, "robots.txt file loaded from " + server + "robots.txt")); // [v6] fix by maaguirr (Matt) to read Unix-based ROBOTS.TXT files string[] fileLines = _FileContents.Split(Environment.NewLine.ToCharArray(), StringSplitOptions.RemoveEmptyEntries); bool rulesApply = false; foreach (string line in fileLines) { if (line.Trim() != "") { RobotInstruction ri = new RobotInstruction(line); switch (ri.Instruction[0]) { case '#': //then comment - ignore break; case 'u': // User-Agent if ((ri.UrlOrAgent.IndexOf("*") >= 0) || (ri.UrlOrAgent.IndexOf(_UserAgent) >= 0)) { // these rules apply rulesApply = true; Console.WriteLine(ri.UrlOrAgent + " " + rulesApply); } else { rulesApply = false; } break; case 'd': // Disallow if (rulesApply) { _DenyUrls.Add(ri.UrlOrAgent.ToLower()); Console.WriteLine("D " + ri.UrlOrAgent); } else { Console.WriteLine("D " + ri.UrlOrAgent + " is for another user-agent"); } break; case 'a': // Allow Console.WriteLine("A" + ri.UrlOrAgent); break; default: // empty/unknown/error Console.WriteLine("# Unrecognised robots.txt entry [" + line + "]"); break; } } } } catch (System.Net.WebException) { _FileContents = String.Empty; //ProgressEvent(this, new ProgressEventArgs(1, "No robots.txt file found at " + server)); } catch (System.Security.SecurityException) { _FileContents = String.Empty; //ProgressEvent(this, new ProgressEventArgs(1, "Could not load ROBOTS.TXT file from " + server)); } }
private void Initialize() { try { Uri robotsUri = new Uri("http://{0}/robots.txt".FormatWith(m_StartPageUri.Host)); PropertyBag robots = m_WebDownloader.Download(new CrawlStep(robotsUri, 0), DownloadMethod.Get); if (robots.StatusCode != HttpStatusCode.OK) { return; } string fileContents; using (StreamReader stream = new StreamReader(robots.GetResponseStream(), Encoding.ASCII)) { fileContents = stream.ReadToEnd(); } string[] fileLines = fileContents.Split(Environment.NewLine.ToCharArray(), StringSplitOptions.RemoveEmptyEntries); bool rulesApply = false; List<string> rules = new List<string>(); foreach (string line in fileLines) { RobotInstruction ri = new RobotInstruction(line); if (!ri.Instruction.IsNullOrEmpty()) { switch (ri.Instruction[0]) { case '#': //then comment - ignore break; case 'u': // User-Agent if ((ri.UrlOrAgent.IndexOf("*") >= 0) || (ri.UrlOrAgent.IndexOf(m_WebDownloader.UserAgent) >= 0)) { // these rules apply rulesApply = true; } else { rulesApply = false; } break; case 'd': // Disallow if (rulesApply) { rules.Add(ri.UrlOrAgent.ToUpperInvariant()); } break; case 'a': // Allow break; default: // empty/unknown/error break; } } } m_DenyUrls = rules.ToArray(); } catch (WebException) { } catch (SecurityException) { } }
private List<string> DownloadRobots(string host) { List<string> deniedUrls = new List<string>(); Uri resolvedUri = new Uri(string.Format("http://{0}/", host)); using (ManualResetEvent done = new ManualResetEvent(false)) { try { WebDownloader web = new WebDownloader( string.Format("http://{0}/robots.txt", resolvedUri.Host).ToUri(), null, ea => { if (!ea.Stream.IsNull()) { using (StreamReader sr = new StreamReader(ea.Stream)) { bool rulesApply = false; while (sr.Peek() >= 0) { string instructionLine = sr.ReadLine().ToUpperInvariant(); if (!instructionLine.IsNullOrEmpty()) { RobotInstruction ri = new RobotInstruction(instructionLine); int commentPosition = instructionLine.IndexOf("#"); if (commentPosition > -1) instructionLine = instructionLine.Substring(0, commentPosition); if (instructionLine.Length > 0) { if (instructionLine.StartsWith("U")) { // User-agent: * int colonPosition = instructionLine.IndexOf(":"); instructionLine = instructionLine.Substring(colonPosition + 1).Trim(); if ((instructionLine.StartsWith("*") == true) || ((ri.UrlOrAgent.IndexOf(user_agent) >= 0))) rulesApply = true; else rulesApply = false; } else if (instructionLine.StartsWith("D")) { // Disallow: / // Disallow: /cgi-bin if (rulesApply) { int colonPosition = instructionLine.IndexOf(":"); instructionLine = instructionLine.Substring(colonPosition + 1).Trim(); Uri possibleDenyUri; if (Uri.TryCreate(resolvedUri, instructionLine, out possibleDenyUri)) { if (!deniedUrls.Contains(possibleDenyUri.AbsoluteUri.ToUpperInvariant())) deniedUrls.Add(possibleDenyUri.AbsoluteUri.ToUpperInvariant()); } } } } } } sr.Close(); } } done.Set(); }); web.Download(); done.WaitOne(); } catch { // Do nothing for now } cache.AddOrUpdate(host, deniedUrls, (s, l) => { return l; }); return deniedUrls; } }
public RobotsTxt(Uri startPageUri, string userAgent) { _UserAgent = userAgent; _Server = startPageUri.Host; try { System.Net.WebProxy proxyObject = null; if (Preferences.UseProxy) { // [v6] stephenlane80 suggested proxy code proxyObject = new System.Net.WebProxy(Preferences.ProxyUrl, true); proxyObject.Credentials = System.Net.CredentialCache.DefaultCredentials; } System.Net.HttpWebRequest req = (System.Net.HttpWebRequest)System.Net.WebRequest.Create("http://" + startPageUri.Authority + "/robots.txt"); if (Preferences.UseProxy) req.Proxy = proxyObject; // [v6] stephenlane80 System.Net.HttpWebResponse webresponse = (System.Net.HttpWebResponse)req.GetResponse(); if (webresponse.StatusCode != System.Net.HttpStatusCode.OK) { Console.WriteLine("ROBOTS.TXT request returned HttpStatus " + webresponse.StatusCode.ToString()); _FileContents = String.Empty; return; } using (System.IO.StreamReader stream = new System.IO.StreamReader(webresponse.GetResponseStream(), Encoding.ASCII)) { _FileContents = stream.ReadToEnd(); } // stream.Close(); //ProgressEvent(this, new ProgressEventArgs(1, "robots.txt file loaded from " + server + "robots.txt")); // [v6] fix by maaguirr (Matt) to read Unix-based ROBOTS.TXT files string[] fileLines = _FileContents.Split(Environment.NewLine.ToCharArray(), StringSplitOptions.RemoveEmptyEntries); bool rulesApply = false; foreach (string line in fileLines) { if (line.Trim() != "") { RobotInstruction ri = new RobotInstruction(line); switch (ri.Instruction[0]) { case '#': //then comment - ignore break; case 'u': // User-Agent if ((ri.UrlOrAgent.IndexOf("*") >= 0) || (ri.UrlOrAgent.IndexOf(_UserAgent) >= 0)) { // these rules apply rulesApply = true; Console.WriteLine(ri.UrlOrAgent + " " + rulesApply); } else { rulesApply = false; } break; case 'd': // Disallow if (rulesApply) { _DenyUrls.Add(ri.UrlOrAgent.ToLower()); Console.WriteLine("D " + ri.UrlOrAgent); } else { Console.WriteLine("D " + ri.UrlOrAgent + " is for another user-agent"); } break; case 'a': // Allow Console.WriteLine("A" + ri.UrlOrAgent); break; default: // empty/unknown/error Console.WriteLine("# Unrecognised robots.txt entry [" + line + "]"); break; } } } } catch (System.Net.WebException) { _FileContents = String.Empty; //ProgressEvent(this, new ProgressEventArgs(1, "No robots.txt file found at " + server)); } catch (System.Security.SecurityException) { _FileContents = String.Empty; //ProgressEvent(this, new ProgressEventArgs(1, "Could not load ROBOTS.TXT file from " + server)); } }
public Robots(Uri startPageUri, string userAgent, bool debug = false) { _UserAgent = userAgent; _Server = startPageUri.Host; try { System.Net.HttpWebRequest req = (System.Net.HttpWebRequest)System.Net.WebRequest.Create("http://" + startPageUri.Authority + "/robots.txt"); req.UserAgent = Browser.UserAgent; req.Accept = "text/plain"; System.Net.HttpWebResponse webresponse = (System.Net.HttpWebResponse)req.GetResponse(); using (System.IO.StreamReader stream = new System.IO.StreamReader(webresponse.GetResponseStream(), Encoding.ASCII)) { _FileContents = stream.ReadToEnd(); } string[] fileLines = _FileContents.Split(new string[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries); if (fileLines.Length < 2) { fileLines = _FileContents.Split(new string[] { "\n" }, StringSplitOptions.RemoveEmptyEntries); } bool rulesApply = false; foreach (string line in fileLines) { RobotInstruction ri = new RobotInstruction(line); if (ri.Instruction.Length < 1) { continue; } switch (ri.Instruction.TrimStart()[0]) // trim as leading whitespace before comments is valid http://www.robotstxt.org/orig.html { case '#': //then comment - ignore break; case 'u': // User-Agent if ((ri.UrlOrAgent == "*") || (ri.UrlOrAgent.IndexOf(_UserAgent) >= 0)) { // these rules apply rulesApply = true; if (debug) { Console.WriteLine(ri.UrlOrAgent + " " + rulesApply); } } else { rulesApply = false; } break; case 'd': // Disallow if (rulesApply) { _DenyUrls.Add(ri.UrlOrAgent.ToLower()); if (debug) { Console.WriteLine("D " + ri.UrlOrAgent); } } else { if (debug) { Console.WriteLine("D " + ri.UrlOrAgent + " is for another user-agent"); } } break; case 'a': // Allow if (debug) { Console.WriteLine("A" + ri.UrlOrAgent); } break; case 'c': if (rulesApply) { if (debug) { Console.WriteLine("C " + ri.UrlOrAgent); } _crawlDelay = Math.Abs(Convert.ToInt32(ri.UrlOrAgent)); } break; default: // empty/unknown/error Console.WriteLine("Unrecognised robots.txt entry [" + line + "]"); break; } } } catch (System.Net.WebException) { _FileContents = String.Empty; } catch (System.Exception ex) { Console.WriteLine("Robots exception"); Console.WriteLine("Will continue, but will be extra cautious as it could be our fault"); Console.WriteLine("Attempted URL was " + "http://" + startPageUri.Authority + "/robots.txt"); Console.WriteLine(ex.Message); _crawlDelay = 20; _FileContents = String.Empty; } }
private void Initialize() { try { Uri robotsUri = new Uri("http://{0}/robots.txt".FormatWith(m_StartPageUri.Host)); PropertyBag robots = m_WebDownloader.Download(new CrawlStep(robotsUri, 0), DownloadMethod.Get); if (robots.StatusCode != HttpStatusCode.OK) { return; } string fileContents; using (StreamReader stream = new StreamReader(robots.GetResponseStream(), Encoding.ASCII)) { fileContents = stream.ReadToEnd(); } string[] fileLines = fileContents.Split(Environment.NewLine.ToCharArray(), StringSplitOptions.RemoveEmptyEntries); bool rulesApply = false; List <string> rules = new List <string>(); foreach (string line in fileLines) { RobotInstruction ri = new RobotInstruction(line); if (!ri.Instruction.IsNullOrEmpty()) { switch (ri.Instruction[0]) { case '#': //then comment - ignore break; case 'u': // User-Agent if ((ri.UrlOrAgent.IndexOf("*") >= 0) || (ri.UrlOrAgent.IndexOf(m_WebDownloader.UserAgent) >= 0)) { // these rules apply rulesApply = true; } else { rulesApply = false; } break; case 'd': // Disallow if (rulesApply) { rules.Add(ri.UrlOrAgent.ToUpperInvariant()); } break; case 'a': // Allow break; default: // empty/unknown/error break; } } } m_DenyUrls = rules.ToArray(); } catch (WebException) { } catch (SecurityException) { } }