Exemplo n.º 1
0
        public RobotsTxt Parse(string domainUrl)
        {
            RobotsTxt robotTxt = new RobotsTxt(domainUrl);
            Uri uri = new Uri(robotTxt.Domain, ROBOT_TXT);
            Politeness currentPoliteness = null;

            var reader = GetStream(uri);
            if (reader != null)
            {
                string line;

                while ((line = reader.ReadLine()) != null)
                {
                    var agentMatch = agentRegex.Match(line);

                    if (agentMatch.Success)
                    {
                        string agent = agentMatch.Groups[1].Value;

                        if (!robotTxt.HasAgent(agent))
                        {
                            currentPoliteness = new Politeness();
                            robotTxt.PutPoliteness(agent, currentPoliteness);
                        }
                        else
                            currentPoliteness = robotTxt.GetPoliteness(agent);
                    }
                    else
                    {
                        var disallowMatch = disallowRegex.Match(line);
                        var allowMatch = allowRegex.Match(line);

                        if (disallowMatch.Success)
                        {
                            if (currentPoliteness != null)
                                currentPoliteness.Disallows.Add(disallowMatch.Groups[1].Value);
                        }
                        else if (allowMatch.Success)
                        {
                            if (currentPoliteness != null)
                                currentPoliteness.Allows.Add(allowMatch.Groups[1].Value);
                        }
                    }


                    var sitemapMatch = sitemapRegex.Match(line);

                    if (!sitemapMatch.Success) continue;
                    if (currentPoliteness != null)
                        robotTxt.Sitemaps.Add(sitemapMatch.Groups[1].Value);
                }

                return robotTxt;
            }

            return null;
        }
Exemplo n.º 2
0
 public void PutPoliteness(string agent, Politeness politeness)
 {
     _politenesses.Add(agent, politeness);
 }