public RobotsTxt Parse(string domainUrl) { RobotsTxt robotTxt = new RobotsTxt(domainUrl); Uri uri = new Uri(robotTxt.Domain, ROBOT_TXT); Politeness currentPoliteness = null; var reader = GetStream(uri); if (reader != null) { string line; while ((line = reader.ReadLine()) != null) { var agentMatch = agentRegex.Match(line); if (agentMatch.Success) { string agent = agentMatch.Groups[1].Value; if (!robotTxt.HasAgent(agent)) { currentPoliteness = new Politeness(); robotTxt.PutPoliteness(agent, currentPoliteness); } else currentPoliteness = robotTxt.GetPoliteness(agent); } else { var disallowMatch = disallowRegex.Match(line); var allowMatch = allowRegex.Match(line); if (disallowMatch.Success) { if (currentPoliteness != null) currentPoliteness.Disallows.Add(disallowMatch.Groups[1].Value); } else if (allowMatch.Success) { if (currentPoliteness != null) currentPoliteness.Allows.Add(allowMatch.Groups[1].Value); } } var sitemapMatch = sitemapRegex.Match(line); if (!sitemapMatch.Success) continue; if (currentPoliteness != null) robotTxt.Sitemaps.Add(sitemapMatch.Groups[1].Value); } return robotTxt; } return null; }
public void PutPoliteness(string agent, Politeness politeness) { _politenesses.Add(agent, politeness); }