Beispiel #1
0
        private void readLines(string[] lines)
        {
            globalAccessRules   = new List <AccessRule>();
            specificAccessRules = new List <AccessRule>();
            crawlDelayRules     = new List <CrawlDelayRule>();
            Sitemaps            = new List <Sitemap>();
            string userAgent = String.Empty;

            int ruleCount = 0;

            for (int i = 0; i < lines.Length; i++)
            {
                var line       = lines[i];
                var robotsLine = new Line(line);
                switch (robotsLine.Type)
                {
                case LineType.Comment:     //ignore the comments
                    continue;

                case LineType.UserAgent:
                    userAgent = robotsLine.Value;
                    continue;

                case LineType.Sitemap:
                    Sitemaps.Add(Sitemap.FromLine(robotsLine));
                    continue;

                case LineType.AccessRule:
                case LineType.CrawlDelayRule:
                    //if there's a rule without user-agent declaration, ignore it
                    if (String.IsNullOrEmpty(userAgent))
                    {
                        Malformed = true;
                        continue;
                    }
                    if (robotsLine.Type == LineType.AccessRule)
                    {
                        var accessRule = new AccessRule(userAgent, robotsLine, ++ruleCount);
                        if (accessRule.For.Equals("*"))
                        {
                            globalAccessRules.Add(accessRule);
                        }
                        else
                        {
                            specificAccessRules.Add(accessRule);
                        }
                        if (!accessRule.Allowed && !String.IsNullOrEmpty(accessRule.Path))
                        {
                            // We say !String.IsNullOrEmpty(x.Path) because the rule "Disallow: " means nothing is disallowed.
                            IsAnyPathDisallowed = true;
                        }
                    }
                    else
                    {
                        crawlDelayRules.Add(new CrawlDelayRule(userAgent, robotsLine, ++ruleCount));
                    }
                    HasRules = true;
                    continue;

                case LineType.Unknown:
                    Malformed = true;
                    continue;
                }
            }
        }
Beispiel #2
0
        private void readLines( string[] lines )
        {
            globalAccessRules = new List<AccessRule>();
            specificAccessRules = new List<AccessRule>();
            crawlDelayRules = new List<CrawlDelayRule>();
            Sitemaps = new List<Sitemap>();
            string userAgent = String.Empty;

            int ruleCount = 0;
            for ( int i = 0; i < lines.Length; i++ )
            {
                var line = lines[i];
                var robotsLine = new Line( line );
                switch ( robotsLine.Type )
                {
                    case LineType.Comment: //ignore the comments
                        continue;
                    case LineType.UserAgent:
                        userAgent = robotsLine.Value;
                        continue;
                    case LineType.Sitemap:
                        Sitemaps.Add( Sitemap.FromLine( robotsLine ) );
                        continue;
                    case LineType.AccessRule:
                    case LineType.CrawlDelayRule:
                        //if there's a rule without user-agent declaration, ignore it
                        if ( String.IsNullOrEmpty( userAgent ) )
                        {
                            Malformed = true;
                            continue;
                        }
                        if ( robotsLine.Type == LineType.AccessRule )
                        {
                            var accessRule = new AccessRule( userAgent, robotsLine, ++ruleCount );
                            if ( accessRule.For.Equals( "*" ) )
                            {
                                globalAccessRules.Add( accessRule );
                            }
                            else
                            {
                                specificAccessRules.Add( accessRule );
                            }
                            if ( !accessRule.Allowed && !String.IsNullOrEmpty( accessRule.Path ) )
                            {
                                // We say !String.IsNullOrEmpty(x.Path) because the rule "Disallow: " means nothing is disallowed.
                                IsAnyPathDisallowed = true;
                            }
                        }
                        else
                        {
                            crawlDelayRules.Add( new CrawlDelayRule( userAgent, robotsLine, ++ruleCount ) );
                        }
                        HasRules = true;
                        continue;
                    case LineType.Unknown:
                        Malformed = true;
                        continue;
                }
            }
        }