private void readLines(string[] lines) { globalAccessRules = new List <AccessRule>(); specificAccessRules = new List <AccessRule>(); crawlDelayRules = new List <CrawlDelayRule>(); Sitemaps = new List <Sitemap>(); string userAgent = String.Empty; int ruleCount = 0; for (int i = 0; i < lines.Length; i++) { var line = lines[i]; var robotsLine = new Line(line); switch (robotsLine.Type) { case LineType.Comment: //ignore the comments continue; case LineType.UserAgent: userAgent = robotsLine.Value; continue; case LineType.Sitemap: Sitemaps.Add(Sitemap.FromLine(robotsLine)); continue; case LineType.AccessRule: case LineType.CrawlDelayRule: //if there's a rule without user-agent declaration, ignore it if (String.IsNullOrEmpty(userAgent)) { Malformed = true; continue; } if (robotsLine.Type == LineType.AccessRule) { var accessRule = new AccessRule(userAgent, robotsLine, ++ruleCount); if (accessRule.For.Equals("*")) { globalAccessRules.Add(accessRule); } else { specificAccessRules.Add(accessRule); } if (!accessRule.Allowed && !String.IsNullOrEmpty(accessRule.Path)) { // We say !String.IsNullOrEmpty(x.Path) because the rule "Disallow: " means nothing is disallowed. IsAnyPathDisallowed = true; } } else { crawlDelayRules.Add(new CrawlDelayRule(userAgent, robotsLine, ++ruleCount)); } HasRules = true; continue; case LineType.Unknown: Malformed = true; continue; } } }
private void readLines( string[] lines ) { globalAccessRules = new List<AccessRule>(); specificAccessRules = new List<AccessRule>(); crawlDelayRules = new List<CrawlDelayRule>(); Sitemaps = new List<Sitemap>(); string userAgent = String.Empty; int ruleCount = 0; for ( int i = 0; i < lines.Length; i++ ) { var line = lines[i]; var robotsLine = new Line( line ); switch ( robotsLine.Type ) { case LineType.Comment: //ignore the comments continue; case LineType.UserAgent: userAgent = robotsLine.Value; continue; case LineType.Sitemap: Sitemaps.Add( Sitemap.FromLine( robotsLine ) ); continue; case LineType.AccessRule: case LineType.CrawlDelayRule: //if there's a rule without user-agent declaration, ignore it if ( String.IsNullOrEmpty( userAgent ) ) { Malformed = true; continue; } if ( robotsLine.Type == LineType.AccessRule ) { var accessRule = new AccessRule( userAgent, robotsLine, ++ruleCount ); if ( accessRule.For.Equals( "*" ) ) { globalAccessRules.Add( accessRule ); } else { specificAccessRules.Add( accessRule ); } if ( !accessRule.Allowed && !String.IsNullOrEmpty( accessRule.Path ) ) { // We say !String.IsNullOrEmpty(x.Path) because the rule "Disallow: " means nothing is disallowed. IsAnyPathDisallowed = true; } } else { crawlDelayRules.Add( new CrawlDelayRule( userAgent, robotsLine, ++ruleCount ) ); } HasRules = true; continue; case LineType.Unknown: Malformed = true; continue; } } }