private void ReadLines(string[] lines) { globalAccessRules = new List<AccessRule>(); specificAccessRules = new List<AccessRule>(); crawlDelayRules = new List<CrawlDelayRule>(); Sitemaps = new List<Sitemap>(); string userAgent = String.Empty; int ruleCount = 0; foreach (var robotsLine in lines.Select(line => new Line(line))) { switch (robotsLine.Type) { case LineType.Comment: //ignore the comments continue; case LineType.UserAgent: userAgent = robotsLine.Value; continue; case LineType.Sitemap: if (Sitemap.FromLine(robotsLine) != null) { Sitemaps.Add(Sitemap.FromLine(robotsLine)); } continue; case LineType.AccessRule: case LineType.CrawlDelayRule: //if there's a rule without user-agent declaration, ignore it if (String.IsNullOrEmpty(userAgent)) { this.Malformed = true; continue; } if (robotsLine.Type == LineType.AccessRule) { var accessRule = new AccessRule(userAgent, robotsLine, ++ruleCount); if (accessRule.For.Equals("*")) { this.globalAccessRules.Add(accessRule); } else { this.specificAccessRules.Add(accessRule); } if (!accessRule.Allowed && !String.IsNullOrEmpty(accessRule.Path)) { // We say !String.IsNullOrEmpty(x.Path) because the rule "Disallow: " means nothing is disallowed. this.IsAnyPathDisallowed = true; } } else { this.crawlDelayRules.Add(new CrawlDelayRule(userAgent, robotsLine, ++ruleCount)); } this.HasRules = true; continue; case LineType.Unknown: this.Malformed = true; continue; default: this.Malformed = true; continue; } } if (globalAccessRules.Any()) { foreach (var accessRule in this.globalAccessRules.Where(accessRule => !accessRule.Allowed && !String.IsNullOrEmpty(accessRule.Path))) { this.HaveNoAllowRules = true; } } if (specificAccessRules.Any()) { foreach (var accessRule in this.specificAccessRules.Where(accessRule => !accessRule.Allowed && !String.IsNullOrEmpty(accessRule.Path))) { this.HaveNoAllowRules = true; } } }
private void readLines(string[] lines) { globalAccessRules = new List <AccessRule>(); specificAccessRules = new List <AccessRule>(); crawlDelayRules = new List <CrawlDelayRule>(); Sitemaps = new List <Sitemap>(); string userAgent = String.Empty; int ruleCount = 0; for (int i = 0; i < lines.Length; i++) { var line = lines[i]; var robotsLine = new Line(line); switch (robotsLine.Type) { case LineType.Comment: //ignore the comments continue; case LineType.UserAgent: userAgent = robotsLine.Value; continue; case LineType.Sitemap: Sitemaps.Add(Sitemap.FromLine(robotsLine)); continue; case LineType.AccessRule: case LineType.CrawlDelayRule: //if there's a rule without user-agent declaration, ignore it if (String.IsNullOrEmpty(userAgent)) { Malformed = true; continue; } if (robotsLine.Type == LineType.AccessRule) { var accessRule = new AccessRule(userAgent, robotsLine, ++ruleCount); if (accessRule.For.Equals("*")) { globalAccessRules.Add(accessRule); } else { specificAccessRules.Add(accessRule); } if (!accessRule.Allowed && !String.IsNullOrEmpty(accessRule.Path)) { // We say !String.IsNullOrEmpty(x.Path) because the rule "Disallow: " means nothing is disallowed. IsAnyPathDisallowed = true; } } else { crawlDelayRules.Add(new CrawlDelayRule(userAgent, robotsLine, ++ruleCount)); } HasRules = true; continue; case LineType.Unknown: Malformed = true; continue; } } }