Beispiel #1
0
        internal static Sitemap FromLine(Line line)
        {
            Sitemap s = new Sitemap {
                Value = line.Value
            };

            try
            {
                s.Url = new Uri(line.Value);
            }
            catch (UriFormatException)
            {
                // fail silently, we can't do anything about the uri being invalid.
            }
            return(s);
        }
Beispiel #2
0
        private void readLines(string[] lines)
        {
            globalAccessRules   = new List <AccessRule>();
            specificAccessRules = new List <AccessRule>();
            crawlDelayRules     = new List <CrawlDelayRule>();
            Sitemaps            = new List <Sitemap>();
            string userAgent = String.Empty;

            int ruleCount = 0;

            for (int i = 0; i < lines.Length; i++)
            {
                var line       = lines[i];
                var robotsLine = new Line(line);
                switch (robotsLine.Type)
                {
                case LineType.Comment:     //ignore the comments
                    continue;

                case LineType.UserAgent:
                    userAgent = robotsLine.Value;
                    continue;

                case LineType.Sitemap:
                    Sitemaps.Add(Sitemap.FromLine(robotsLine));
                    continue;

                case LineType.AccessRule:
                case LineType.CrawlDelayRule:
                    //if there's a rule without user-agent declaration, ignore it
                    if (String.IsNullOrEmpty(userAgent))
                    {
                        Malformed = true;
                        continue;
                    }
                    if (robotsLine.Type == LineType.AccessRule)
                    {
                        var accessRule = new AccessRule(userAgent, robotsLine, ++ruleCount);
                        if (accessRule.For.Equals("*"))
                        {
                            globalAccessRules.Add(accessRule);
                        }
                        else
                        {
                            specificAccessRules.Add(accessRule);
                        }
                        if (!accessRule.Allowed && !String.IsNullOrEmpty(accessRule.Path))
                        {
                            // We say !String.IsNullOrEmpty(x.Path) because the rule "Disallow: " means nothing is disallowed.
                            IsAnyPathDisallowed = true;
                        }
                    }
                    else
                    {
                        crawlDelayRules.Add(new CrawlDelayRule(userAgent, robotsLine, ++ruleCount));
                    }
                    HasRules = true;
                    continue;

                case LineType.Unknown:
                    Malformed = true;
                    continue;
                }
            }
        }