示例#1
0
        public static bool IsAllowed(System.Uri url)
        {
            System.String host = url.Host;

            RobotRuleSet robotRules = (RobotRuleSet)CACHE[host];

            if (robotRules == null)
            {
                // cache miss
                //UPGRADE_TODO: Class 'java.net.URL' was converted to a 'System.Uri' which does not throw an exception if a URL specifies an unknown protocol. 'ms-help://MS.VSCC.2003/commoner/redir/redirect.htm?keyword="jlca1132_3"'
                HttpResponseMgr response = new HttpResponseMgr(new System.Uri(url, "/robots.txt"));

                if (response.Code == 200)
                {
                    // found rules: parse them
                    robotRules = new RobotRulesParser().ParseRules(response.Content);
                }
                else if ((response.Code == 403) && (!ALLOW_FORBIDDEN))
                {
                    robotRules = FORBID_ALL_RULES;
                }
                // use forbid all
                else
                {
                    robotRules = EMPTY_RULES;             // use default rules
                }
                CACHE[host] = robotRules;                 // cache rules for host
            }

            System.String path = url.AbsolutePath;             // check rules
            if ((path == null) || "".Equals(path))
            {
                path = "/";
            }

            return(robotRules.IsAllowed(path));
        }
				internal RobotsEntry(RobotRuleSet enclosingInstance, System.String prefix, bool allowed)
				{
					InitBlock(enclosingInstance);
					this.m_strPrefix = prefix;
					this.m_bAllowed = allowed;
				}
				private void  InitBlock(RobotRuleSet enclosingInstance)
				{
					this.m_enclosingInstance = enclosingInstance;
				}
		/// <summary> Returns a {@link RobotRuleSet} object which encapsulates the
		/// rules parsed from the supplied <code>robotContent</code>.
		/// </summary>
		internal virtual RobotRuleSet ParseRules(byte[] robotContent)
		{
			if (robotContent == null)
			{
				return EMPTY_RULES;
			}
			
			System.String content = new System.String(SupportMisc.ToCharArray(robotContent));
			
			Tokenizer lineParser = new Tokenizer(content, "\n\r");
			
			RobotRuleSet bestRulesSoFar = null;
			int bestPrecedenceSoFar = NO_PRECEDENCE;
			
			RobotRuleSet currentRules = new RobotRuleSet();
			int currentPrecedence = NO_PRECEDENCE;
			
			bool addRules = false; // in stanza for our robot
			bool doneAgents = false; // detect multiple agent lines
			
			while (lineParser.HasMoreTokens())
			{
				System.String line = lineParser.NextToken();
				
				// trim out comments and whitespace
				int hashPos = line.IndexOf("#");
				if (hashPos >= 0)
					line = line.Substring(0, (hashPos) - (0));
				line = line.Trim();
				
				if ((line.Length >= 11) && (line.Substring(0, (11) - (0)).ToUpper().Equals("User-agent:".ToUpper())))
				{
					
					if (doneAgents)
					{
						if (currentPrecedence < bestPrecedenceSoFar)
						{
							bestPrecedenceSoFar = currentPrecedence;
							bestRulesSoFar = currentRules;
							currentPrecedence = NO_PRECEDENCE;
							currentRules = new RobotRuleSet();
						}
						addRules = false;
					}
					doneAgents = false;
					
					System.String agentNames = line.Substring(line.IndexOf(":") + 1);
					agentNames = agentNames.Trim();
					Tokenizer agentTokenizer = new Tokenizer(agentNames);
					
					while (agentTokenizer.HasMoreTokens())
					{
						// for each agent listed, see if it's us:
						System.String agentName = agentTokenizer.NextToken().ToLower();
						
						object obInt = m_robotNames[agentName];
						
						if (obInt != null)
						{
							int precedence = (Int32)obInt;
							if ((precedence < currentPrecedence) && (precedence < bestPrecedenceSoFar))
								currentPrecedence = precedence;
						}
					}
					
					if (currentPrecedence < bestPrecedenceSoFar)
						addRules = true;
				}
				else if ((line.Length >= 9) && (line.Substring(0, (9) - (0)).ToUpper().Equals("Disallow:".ToUpper())))
				{
					
					doneAgents = true;
					System.String path = line.Substring(line.IndexOf(":") + 1);
					path = path.Trim();
					try
					{
						path = HttpUtility.UrlDecode(path, System.Text.Encoding.GetEncoding(CHARACTER_ENCODING.ToLower()));
					}
					catch (System.Exception e)
					{
						//LOG.warning("error parsing robots rules- can't decode path: " + path);
						Trace.WriteLine("error parsing robots rules- can't decode path: " + path);
					}
					
					if (path.Length == 0)
					{
						// "empty rule"
						if (addRules)
							currentRules.ClearPrefixes();
					}
					else
					{
						// rule with path
						if (addRules)
							currentRules.AddPrefix(path, false);
					}
				}
				else if ((line.Length >= 6) && (line.Substring(0, (6) - (0)).ToUpper().Equals("Allow:".ToUpper())))
				{
					
					doneAgents = true;
					System.String path = line.Substring(line.IndexOf(":") + 1);
					path = path.Trim();
					
					if (path.Length == 0)
					{
						// "empty rule"- treat same as empty disallow
						if (addRules)
							currentRules.ClearPrefixes();
					}
					else
					{
						// rule with path
						if (addRules)
							currentRules.AddPrefix(path, true);
					}
				}
			}
			
			if (currentPrecedence < bestPrecedenceSoFar)
			{
				bestPrecedenceSoFar = currentPrecedence;
				bestRulesSoFar = currentRules;
			}
			
			if (bestPrecedenceSoFar == NO_PRECEDENCE)
				return EMPTY_RULES;
			return bestRulesSoFar;
		}
示例#5
0
 internal RobotsEntry(RobotRuleSet enclosingInstance, System.String prefix, bool allowed)
 {
     InitBlock(enclosingInstance);
     this.m_strPrefix = prefix;
     this.m_bAllowed  = allowed;
 }
示例#6
0
 private void  InitBlock(RobotRuleSet enclosingInstance)
 {
     this.m_enclosingInstance = enclosingInstance;
 }
示例#7
0
        /// <summary> Returns a {@link RobotRuleSet} object which encapsulates the
        /// rules parsed from the supplied <code>robotContent</code>.
        /// </summary>
        internal virtual RobotRuleSet ParseRules(byte[] robotContent)
        {
            if (robotContent == null)
            {
                return(EMPTY_RULES);
            }

            System.String content = new System.String(SupportMisc.ToCharArray(robotContent));

            Tokenizer lineParser = new Tokenizer(content, "\n\r");

            RobotRuleSet bestRulesSoFar      = null;
            int          bestPrecedenceSoFar = NO_PRECEDENCE;

            RobotRuleSet currentRules      = new RobotRuleSet();
            int          currentPrecedence = NO_PRECEDENCE;

            bool addRules   = false;           // in stanza for our robot
            bool doneAgents = false;           // detect multiple agent lines

            while (lineParser.HasMoreTokens())
            {
                System.String line = lineParser.NextToken();

                // trim out comments and whitespace
                int hashPos = line.IndexOf("#");
                if (hashPos >= 0)
                {
                    line = line.Substring(0, (hashPos) - (0));
                }
                line = line.Trim();

                if ((line.Length >= 11) && (line.Substring(0, (11) - (0)).ToUpper().Equals("User-agent:".ToUpper())))
                {
                    if (doneAgents)
                    {
                        if (currentPrecedence < bestPrecedenceSoFar)
                        {
                            bestPrecedenceSoFar = currentPrecedence;
                            bestRulesSoFar      = currentRules;
                            currentPrecedence   = NO_PRECEDENCE;
                            currentRules        = new RobotRuleSet();
                        }
                        addRules = false;
                    }
                    doneAgents = false;

                    System.String agentNames = line.Substring(line.IndexOf(":") + 1);
                    agentNames = agentNames.Trim();
                    Tokenizer agentTokenizer = new Tokenizer(agentNames);

                    while (agentTokenizer.HasMoreTokens())
                    {
                        // for each agent listed, see if it's us:
                        System.String agentName = agentTokenizer.NextToken().ToLower();

                        object obInt = m_robotNames[agentName];

                        if (obInt != null)
                        {
                            int precedence = (Int32)obInt;
                            if ((precedence < currentPrecedence) && (precedence < bestPrecedenceSoFar))
                            {
                                currentPrecedence = precedence;
                            }
                        }
                    }

                    if (currentPrecedence < bestPrecedenceSoFar)
                    {
                        addRules = true;
                    }
                }
                else if ((line.Length >= 9) && (line.Substring(0, (9) - (0)).ToUpper().Equals("Disallow:".ToUpper())))
                {
                    doneAgents = true;
                    System.String path = line.Substring(line.IndexOf(":") + 1);
                    path = path.Trim();
                    try
                    {
                        path = HttpUtility.UrlDecode(path, System.Text.Encoding.GetEncoding(CHARACTER_ENCODING.ToLower()));
                    }
                    catch (System.Exception e)
                    {
                        //LOG.warning("error parsing robots rules- can't decode path: " + path);
                        Trace.WriteLine("error parsing robots rules- can't decode path: " + path);
                    }

                    if (path.Length == 0)
                    {
                        // "empty rule"
                        if (addRules)
                        {
                            currentRules.ClearPrefixes();
                        }
                    }
                    else
                    {
                        // rule with path
                        if (addRules)
                        {
                            currentRules.AddPrefix(path, false);
                        }
                    }
                }
                else if ((line.Length >= 6) && (line.Substring(0, (6) - (0)).ToUpper().Equals("Allow:".ToUpper())))
                {
                    doneAgents = true;
                    System.String path = line.Substring(line.IndexOf(":") + 1);
                    path = path.Trim();

                    if (path.Length == 0)
                    {
                        // "empty rule"- treat same as empty disallow
                        if (addRules)
                        {
                            currentRules.ClearPrefixes();
                        }
                    }
                    else
                    {
                        // rule with path
                        if (addRules)
                        {
                            currentRules.AddPrefix(path, true);
                        }
                    }
                }
            }

            if (currentPrecedence < bestPrecedenceSoFar)
            {
                bestPrecedenceSoFar = currentPrecedence;
                bestRulesSoFar      = currentRules;
            }

            if (bestPrecedenceSoFar == NO_PRECEDENCE)
            {
                return(EMPTY_RULES);
            }
            return(bestRulesSoFar);
        }