/// <summary> Returns a {@link RobotRuleSet} object which encapsulates the /// rules parsed from the supplied <code>robotContent</code>. /// </summary> internal virtual RobotRuleSet ParseRules(byte[] robotContent) { if (robotContent == null) { return EMPTY_RULES; } System.String content = new System.String(SupportMisc.ToCharArray(robotContent)); Tokenizer lineParser = new Tokenizer(content, "\n\r"); RobotRuleSet bestRulesSoFar = null; int bestPrecedenceSoFar = NO_PRECEDENCE; RobotRuleSet currentRules = new RobotRuleSet(); int currentPrecedence = NO_PRECEDENCE; bool addRules = false; // in stanza for our robot bool doneAgents = false; // detect multiple agent lines while (lineParser.HasMoreTokens()) { System.String line = lineParser.NextToken(); // trim out comments and whitespace int hashPos = line.IndexOf("#"); if (hashPos >= 0) line = line.Substring(0, (hashPos) - (0)); line = line.Trim(); if ((line.Length >= 11) && (line.Substring(0, (11) - (0)).ToUpper().Equals("User-agent:".ToUpper()))) { if (doneAgents) { if (currentPrecedence < bestPrecedenceSoFar) { bestPrecedenceSoFar = currentPrecedence; bestRulesSoFar = currentRules; currentPrecedence = NO_PRECEDENCE; currentRules = new RobotRuleSet(); } addRules = false; } doneAgents = false; System.String agentNames = line.Substring(line.IndexOf(":") + 1); agentNames = agentNames.Trim(); Tokenizer agentTokenizer = new Tokenizer(agentNames); while (agentTokenizer.HasMoreTokens()) { // for each agent listed, see if it's us: System.String agentName = agentTokenizer.NextToken().ToLower(); object obInt = m_robotNames[agentName]; if (obInt != null) { int precedence = (Int32)obInt; if ((precedence < currentPrecedence) && (precedence < bestPrecedenceSoFar)) currentPrecedence = precedence; } } if (currentPrecedence < bestPrecedenceSoFar) addRules = true; } else if ((line.Length >= 9) && (line.Substring(0, (9) - (0)).ToUpper().Equals("Disallow:".ToUpper()))) { doneAgents = true; System.String path = line.Substring(line.IndexOf(":") + 1); path = path.Trim(); try { path = HttpUtility.UrlDecode(path, System.Text.Encoding.GetEncoding(CHARACTER_ENCODING.ToLower())); } catch (System.Exception e) { //LOG.warning("error parsing robots rules- can't decode path: " + path); Trace.WriteLine("error parsing robots rules- can't decode path: " + path); } if (path.Length == 0) { // "empty rule" if (addRules) currentRules.ClearPrefixes(); } else { // rule with path if (addRules) currentRules.AddPrefix(path, false); } } else if ((line.Length >= 6) && (line.Substring(0, (6) - (0)).ToUpper().Equals("Allow:".ToUpper()))) { doneAgents = true; System.String path = line.Substring(line.IndexOf(":") + 1); path = path.Trim(); if (path.Length == 0) { // "empty rule"- treat same as empty disallow if (addRules) currentRules.ClearPrefixes(); } else { // rule with path if (addRules) currentRules.AddPrefix(path, true); } } } if (currentPrecedence < bestPrecedenceSoFar) { bestPrecedenceSoFar = currentPrecedence; bestRulesSoFar = currentRules; } if (bestPrecedenceSoFar == NO_PRECEDENCE) return EMPTY_RULES; return bestRulesSoFar; }
/// <summary>Returns the value of the <code>name</code> property as an array of /// strings. If no such property is specified, then <code>null</code> /// is returned. Values are whitespace or comma delimted. /// </summary> public virtual System.String[] GetStrings(System.String name) { System.String valueString = GetPoperty(name); if (valueString == null) { return null; } Tokenizer tokenizer = new Tokenizer(valueString, ", \t\n\r\f"); System.Collections.IList values = new System.Collections.ArrayList(); while (tokenizer.HasMoreTokens()) { values.Add(tokenizer.NextToken()); } return (System.String[]) Support.ICollectionSupport.ToArray(values, new System.String[values.Count]); }