Exemple #1
0
        private static Dictionary <string, EmailRecord> getNameValueByElementType(
            HtmlAgilityPack.HtmlDocument source,
            SalesForce salesForce
            )
        {
            Dictionary <string, EmailRecord> output
                = new Dictionary <string, EmailRecord>();

            var document = source.DocumentNode;

            foreach (KeyValuePair <string, bool> emailItem
                     in salesForce.emailHeaderIdentities)
            {
                if (emailItem.Value)
                {
                    HtmlAgilityPack.HtmlNode editNode = source.GetElementbyId(emailItem.Key);

                    if (editNode.Attributes.ToList().Count(x => x.Name == "value") >= 1)
                    {
                        HtmlAgilityPack.HtmlAttribute attribute = editNode.Attributes["value"];

                        EmailRecord record = new EmailRecord();

                        record.emailAddress = attribute.Value;

                        output.Add(emailItem.Key, record);
                    }
                }
            }

            return(output);
        }
        public HtmlAttributeWrapper(HtmlAttribute wrappedAttribute)
        {
            if(wrappedAttribute == null)
                throw new ArgumentNullException("wrappedAttribute");

            _wrappedAttribute = wrappedAttribute;
        }
Exemple #3
0
		/// <summary>
		/// Creates a duplicate of this attribute.
		/// </summary>
		/// <returns>The cloned attribute.</returns>
		public HtmlAttribute Clone()
		{
			HtmlAttribute att = new HtmlAttribute(_ownerdocument);
			att.Name = Name;
			att.Value = Value;
			return att;
		}
 private static void MakeAbsolute(HtmlAttribute attr, Uri baseUrl) {
     var url = attr.Value;
     if (!url.StartsWith("http:", StringComparison.OrdinalIgnoreCase)
         && !url.StartsWith("https:", StringComparison.OrdinalIgnoreCase)
         && Uri.IsWellFormedUriString(url, UriKind.Relative)) {
         attr.Value = new Uri(baseUrl, url).ToString();
     }
 }
        private string ExtractModel(string make, HtmlAttribute htmlAttribute)
        {
            if (htmlAttribute != null)
            {
                var stringToReplace = string.Format(@"/usedcar/{0}__", make);
                return htmlAttribute.Value.Replace(stringToReplace, string.Empty).Replace(".html", string.Empty);
            }

            return string.Empty;
        }
Exemple #6
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="source"></param>
        /// <param name="advanced"></param>
        /// <returns></returns>
        private static void getNameValueByElementTypeVoid(
            HtmlAgilityPack.HtmlDocument source,
            SalesForce salesForce
            )
        {
            var document = source.DocumentNode;

            HtmlAgilityPack.HtmlNode editNode = document.QuerySelector(salesForce.elementOne);

            HtmlAgilityPack.HtmlAttribute attribute = editNode.Attributes["href"];

            salesForce.endpointEditSearchFieldTemplate = attribute.Value;
        }
        /// <summary>
        /// Inserts the specified attribute as the first node in the collection.
        /// </summary>
        /// <param name="newAttribute">The attribute to insert. May not be null.</param>
        /// <returns>The prepended attribute.</returns>
        public HtmlAttribute Prepend(HtmlAttribute newAttribute)
        {
            if (newAttribute == null)
            {
                throw new ArgumentNullException("newAttribute");
            }

            _hashitems[newAttribute.Name] = newAttribute;
            newAttribute._ownernode = _ownernode;
            _items.Insert(0, newAttribute);

            _ownernode._innerchanged = true;
            _ownernode._outerchanged = true;
            return newAttribute;
        }
Exemple #8
0
        /// <summary>
        /// This removes the vulnerable keywords and make values safe by html encoding and html character escaping.
        /// </summary>
        /// <param name="attribute">Attribute that contain values that need to check and clean.</param>
        private void CleanAttributeValues(HapHtmlAttribute attribute)
        {
            if (CleanAttributes)
            {
                attribute.Value = HttpUtility.HtmlEncode(attribute.Value);

                attribute.Value = Regex.Replace(attribute.Value, @"\s*j\s*a\s*v\s*a\s*s\s*c\s*r\s*i\s*p\s*t\s*", "", RegexOptions.IgnoreCase);
                attribute.Value = Regex.Replace(attribute.Value, @"\s*s\s*c\s*r\s*i\s*p\s*t\s*", "", RegexOptions.IgnoreCase);

                if (attribute.Name.ToLower() == "style")
                {
                    attribute.Value = Regex.Replace(attribute.Value, @"\s*e\s*x\s*p\s*r\s*e\s*s\s*s\s*i\s*o\s*n\s*", "", RegexOptions.IgnoreCase);
                    attribute.Value = Regex.Replace(attribute.Value, @"\s*b\s*e\s*h\s*a\s*v\s*i\s*o\s*r\s*", "", RegexOptions.IgnoreCase);
                }

                if (attribute.Name.ToLower() == "href" || attribute.Name.ToLower() == "src")
                {
                    attribute.Value = Regex.Replace(attribute.Value, @"\s*m\s*o\s*c\s*h\s*a\s*", "", RegexOptions.IgnoreCase);
                }
            }

            // HtmlEntity Escape
            if (EncodeHtmlEntities)
            {
                // Ensure no double encoding goes on - reverse the ones done by the CreoleParser
                string value = attribute.Value;
                value           = value.Replace("&#x32;", "\"");
                value           = value.Replace("&#x3C;", "<");
                value           = value.Replace("&#x3E;", ">");
                value           = value.Replace("&#x26;", "&");
                value           = value.Replace("&#x27;", "'");
                attribute.Value = value;

                StringBuilder sbAttributeValue = new StringBuilder();
                foreach (char c in attribute.Value.ToCharArray())
                {
                    sbAttributeValue.Append(EncodeCharacterToHtmlEntityEscape(c));
                }

                attribute.Value = sbAttributeValue.ToString();
            }
        }
 private string RetrieveImageUrl(HtmlAttribute htmlAttribute)
 {
     try
     {
         var url = htmlAttribute.Value;
         var htmlDocument = _htmlWeb.Load(url);
         if (htmlDocument.DocumentNode != null)
         {
             var node = htmlDocument.DocumentNode.SelectSingleNode("//*[@id = 'frontCover']");
             if (node != null)
             {
                 return string.Format("{0}{1}", BaseUrl, node.Attributes["src"].Value);
             }
         }
         return null;
     }
     catch (Exception)
     {
         return null;
     }
 }
Exemple #10
0
        private string _value(HtmlAttribute x)
        {
            if (x == null)
                return "";

            return x.Value;
        }
Exemple #11
0
 public static void WithValue(this HtmlAttribute a, string expectedValue)
 {
     Assert.AreEqual(expectedValue, a.Value);
 }
        private void Parse()
        {
            int lastquote = 0;
            if (OptionComputeChecksum)
            {
                _crc32 = new Crc32();
            }

            Lastnodes = new Dictionary<string, HtmlNode>();
            _c = 0;
            _fullcomment = false;
            _parseerrors = new List<HtmlParseError>();
            _line = 1;
            _lineposition = 1;
            _maxlineposition = 1;

            _state = ParseState.Text;
            _oldstate = _state;
            _documentnode._innerlength = Text.Length;
            _documentnode._outerlength = Text.Length;
            _remainderOffset = Text.Length;

            _lastparentnode = _documentnode;
            _currentnode = CreateNode(HtmlNodeType.Text, 0);
            _currentattribute = null;

            _index = 0;
            PushNodeStart(HtmlNodeType.Text, 0);
            while (_index < Text.Length)
            {
                _c = Text[_index];
                IncrementPosition();

                switch (_state)
                {
                    case ParseState.Text:
                        if (NewCheck())
                            continue;
                        break;

                    case ParseState.WhichTag:
                        if (NewCheck())
                            continue;
                        if (_c == '/')
                        {
                            PushNodeNameStart(false, _index);
                        }
                        else
                        {
                            PushNodeNameStart(true, _index - 1);
                            DecrementPosition();
                        }
                        _state = ParseState.Tag;
                        break;

                    case ParseState.Tag:
                        if (NewCheck())
                            continue;
                        if (IsWhiteSpace(_c))
                        {
                            PushNodeNameEnd(_index - 1);
                            if (_state != ParseState.Tag)
                                continue;
                            _state = ParseState.BetweenAttributes;
                            continue;
                        }
                        if (_c == '/')
                        {
                            PushNodeNameEnd(_index - 1);
                            if (_state != ParseState.Tag)
                                continue;
                            _state = ParseState.EmptyTag;
                            continue;
                        }
                        if (_c == '>')
                        {
                            PushNodeNameEnd(_index - 1);
                            if (_state != ParseState.Tag)
                                continue;
                            if (!PushNodeEnd(_index, false))
                            {
                                // stop parsing
                                _index = Text.Length;
                                break;
                            }
                            if (_state != ParseState.Tag)
                                continue;
                            _state = ParseState.Text;
                            PushNodeStart(HtmlNodeType.Text, _index);
                        }
                        break;

                    case ParseState.BetweenAttributes:
                        if (NewCheck())
                            continue;

                        if (IsWhiteSpace(_c))
                            continue;

                        if ((_c == '/') || (_c == '?'))
                        {
                            _state = ParseState.EmptyTag;
                            continue;
                        }

                        if (_c == '>')
                        {
                            if (!PushNodeEnd(_index, false))
                            {
                                // stop parsing
                                _index = Text.Length;
                                break;
                            }

                            if (_state != ParseState.BetweenAttributes)
                                continue;
                            _state = ParseState.Text;
                            PushNodeStart(HtmlNodeType.Text, _index);
                            continue;
                        }

                        PushAttributeNameStart(_index - 1);
                        _state = ParseState.AttributeName;
                        break;

                    case ParseState.EmptyTag:
                        if (NewCheck())
                            continue;

                        if (_c == '>')
                        {
                            if (!PushNodeEnd(_index, true))
                            {
                                // stop parsing
                                _index = Text.Length;
                                break;
                            }

                            if (_state != ParseState.EmptyTag)
                                continue;
                            _state = ParseState.Text;
                            PushNodeStart(HtmlNodeType.Text, _index);
                            continue;
                        }
                        _state = ParseState.BetweenAttributes;
                        break;

                    case ParseState.AttributeName:
                        if (NewCheck())
                            continue;

                        if (IsWhiteSpace(_c))
                        {
                            PushAttributeNameEnd(_index - 1);
                            _state = ParseState.AttributeBeforeEquals;
                            continue;
                        }
                        if (_c == '=')
                        {
                            PushAttributeNameEnd(_index - 1);
                            _state = ParseState.AttributeAfterEquals;
                            continue;
                        }
                        if (_c == '>')
                        {
                            PushAttributeNameEnd(_index - 1);
                            if (!PushNodeEnd(_index, false))
                            {
                                // stop parsing
                                _index = Text.Length;
                                break;
                            }
                            if (_state != ParseState.AttributeName)
                                continue;
                            _state = ParseState.Text;
                            PushNodeStart(HtmlNodeType.Text, _index);
                            continue;
                        }
                        break;

                    case ParseState.AttributeBeforeEquals:
                        if (NewCheck())
                            continue;

                        if (IsWhiteSpace(_c))
                            continue;
                        if (_c == '>')
                        {
                            if (!PushNodeEnd(_index, false))
                            {
                                // stop parsing
                                _index = Text.Length;
                                break;
                            }
                            if (_state != ParseState.AttributeBeforeEquals)
                                continue;
                            _state = ParseState.Text;
                            PushNodeStart(HtmlNodeType.Text, _index);
                            continue;
                        }
                        if (_c == '=')
                        {
                            _state = ParseState.AttributeAfterEquals;
                            continue;
                        }
                        // no equals, no whitespace, it's a new attrribute starting
                        _state = ParseState.BetweenAttributes;
                        DecrementPosition();
                        break;

                    case ParseState.AttributeAfterEquals:
                        if (NewCheck())
                            continue;

                        if (IsWhiteSpace(_c))
                            continue;

                        if ((_c == '\'') || (_c == '"'))
                        {
                            _state = ParseState.QuotedAttributeValue;
                            PushAttributeValueStart(_index, _c);
                            lastquote = _c;
                            continue;
                        }
                        if (_c == '>')
                        {
                            if (!PushNodeEnd(_index, false))
                            {
                                // stop parsing
                                _index = Text.Length;
                                break;
                            }
                            if (_state != ParseState.AttributeAfterEquals)
                                continue;
                            _state = ParseState.Text;
                            PushNodeStart(HtmlNodeType.Text, _index);
                            continue;
                        }
                        PushAttributeValueStart(_index - 1);
                        _state = ParseState.AttributeValue;
                        break;

                    case ParseState.AttributeValue:
                        if (NewCheck())
                            continue;

                        if (IsWhiteSpace(_c))
                        {
                            PushAttributeValueEnd(_index - 1);
                            _state = ParseState.BetweenAttributes;
                            continue;
                        }

                        if (_c == '>')
                        {
                            PushAttributeValueEnd(_index - 1);
                            if (!PushNodeEnd(_index, false))
                            {
                                // stop parsing
                                _index = Text.Length;
                                break;
                            }
                            if (_state != ParseState.AttributeValue)
                                continue;
                            _state = ParseState.Text;
                            PushNodeStart(HtmlNodeType.Text, _index);
                            continue;
                        }
                        break;

                    case ParseState.QuotedAttributeValue:
                        if (_c == lastquote)
                        {
                            PushAttributeValueEnd(_index - 1);
                            _state = ParseState.BetweenAttributes;
                            continue;
                        }
                        if (_c == '<')
                        {
                            if (_index < Text.Length)
                            {
                                if (Text[_index] == '%')
                                {
                                    _oldstate = _state;
                                    _state = ParseState.ServerSideCode;
                                    continue;
                                }
                            }
                        }
                        break;

                    case ParseState.Comment:
                        if (_c == '>')
                        {
                            if (_fullcomment)
                            {
                                if ((Text[_index - 2] != '-') ||
                                    (Text[_index - 3] != '-'))
                                {
                                    continue;
                                }
                            }
                            if (!PushNodeEnd(_index, false))
                            {
                                // stop parsing
                                _index = Text.Length;
                                break;
                            }
                            _state = ParseState.Text;
                            PushNodeStart(HtmlNodeType.Text, _index);
                            continue;
                        }
                        break;

                    case ParseState.ServerSideCode:
                        if (_c == '%')
                        {
                            if (_index < Text.Length)
                            {
                                if (Text[_index] == '>')
                                {
                                    switch (_oldstate)
                                    {
                                        case ParseState.AttributeAfterEquals:
                                            _state = ParseState.AttributeValue;
                                            break;

                                        case ParseState.BetweenAttributes:
                                            PushAttributeNameEnd(_index + 1);
                                            _state = ParseState.BetweenAttributes;
                                            break;

                                        default:
                                            _state = _oldstate;
                                            break;
                                    }
                                    IncrementPosition();
                                }
                            }
                        }
                        break;

                    case ParseState.PcData:
                        // look for </tag + 1 char

                        // check buffer end
                        if ((_currentnode._namelength + 3) <= (Text.Length - (_index - 1)))
                        {
                            if (string.Compare(Text.Substring(_index - 1, _currentnode._namelength + 2),
                                               "</" + _currentnode.Name, StringComparison.OrdinalIgnoreCase) == 0)
                            {
                                int c = Text[_index - 1 + 2 + _currentnode.Name.Length];
                                if ((c == '>') || (IsWhiteSpace(c)))
                                {
                                    // add the script as a text node
                                    HtmlNode script = CreateNode(HtmlNodeType.Text,
                                                                 _currentnode._outerstartindex +
                                                                 _currentnode._outerlength);
                                    script._outerlength = _index - 1 - script._outerstartindex;
                                    _currentnode.AppendChild(script);

                                    PushNodeStart(HtmlNodeType.Element, _index - 1);
                                    PushNodeNameStart(false, _index - 1 + 2);
                                    _state = ParseState.Tag;
                                    IncrementPosition();
                                }
                            }
                        }
                        break;
                }
            }

            // finish the current work
            if (_currentnode._namestartindex > 0)
            {
                PushNodeNameEnd(_index);
            }
            PushNodeEnd(_index, false);

            // we don't need this anymore
            Lastnodes.Clear();
        }
 private string htmlValue(HtmlAttribute node)
 {
     if (node == null)
         return String.Empty;
     else
         return node.Value;
 }
 internal HtmlAttributeAdapter(AP.HtmlAttribute attribute)
 {
     _attribute = attribute;
 }
 private void PushAttributeNameStart(int index)
 {
   this._currentattribute = this.CreateAttribute();
   this._currentattribute._namestartindex = index;
   this._currentattribute.Line = this._line;
   this._currentattribute._lineposition = this._lineposition;
   this._currentattribute._streamposition = index;
 }
		internal void WriteAttribute(TextWriter outText, HtmlAttribute att)
		{
			string name;

			if (_ownerdocument.OptionOutputAsXml)
			{
				if (_ownerdocument.OptionOutputUpperCase)
				{
					name = att.XmlName.ToUpper();
				}
				else
				{
					name = att.XmlName;
				}

				outText.Write(" " + name + "=\"" + HtmlDocument.HtmlEncode(att.XmlValue) + "\"");
			}
			else
			{
				if (_ownerdocument.OptionOutputUpperCase)
				{
					name = att.Name.ToUpper();
				}
				else
				{
					name = att.Name;
				}

				if (att.Name.Length >= 4)
				{
					if ((att.Name[0] == '<') && (att.Name[1] == '%') &&
						(att.Name[att.Name.Length-1] == '>') && (att.Name[att.Name.Length-2] == '%'))
					{
						outText.Write(" " + name);
						return;
					}
				}
				if (_ownerdocument.OptionOutputOptimizeAttributeValues)
				{
					if (att.Value.IndexOfAny(new Char[]{(char)10, (char)13, (char)9, ' '}) < 0)
					{
						outText.Write(" " + name + "=" + att.Value);
					}
					else
					{
						outText.Write(" " + name + "=\"" + att.Value + "\"");
					}
				}
				else
				{
					outText.Write(" " + name + "=\"" + att.Value + "\"");
				}
			}
		}
 /// <summary>
 /// Removes a given attribute from the list.
 /// </summary>
 /// <param name="attribute">The attribute to remove. May not be null.</param>
 public void Remove(HtmlAttribute attribute)
 {
     if (attribute == null)
     {
         throw new ArgumentNullException("attribute");
     }
     int index = GetAttributeIndex(attribute);
     if (index == -1)
     {
         throw new IndexOutOfRangeException();
     }
     RemoveAt(index);
 }
 private void ProcessAttrubute(HtmlAttribute imgSrcAttribute)
 {
     var newSrcValue = ProcessUrl(imgSrcAttribute.Value);
     imgSrcAttribute.Value = newSrcValue;
 }
        /// <summary>
        /// This removes the vulnerable keywords and make values safe by html encoding and html character escaping.
        /// </summary>        
        /// <param name="attribute">Attribute that contain values that need to check and clean.</param>
        private void CleanAttributeValues(HapHtmlAttribute attribute)
        {
            if (CleanAttributes)
            {
                attribute.Value = HttpUtility.HtmlEncode(attribute.Value);

                attribute.Value = Regex.Replace(attribute.Value, @"\s*j\s*a\s*v\s*a\s*s\s*c\s*r\s*i\s*p\s*t\s*", "", RegexOptions.IgnoreCase);
                attribute.Value = Regex.Replace(attribute.Value, @"\s*s\s*c\s*r\s*i\s*p\s*t\s*", "", RegexOptions.IgnoreCase);

                if (attribute.Name.ToLower() == "style")
                {
                    attribute.Value = Regex.Replace(attribute.Value, @"\s*e\s*x\s*p\s*r\s*e\s*s\s*s\s*i\s*o\s*n\s*", "", RegexOptions.IgnoreCase);
                    attribute.Value = Regex.Replace(attribute.Value, @"\s*b\s*e\s*h\s*a\s*v\s*i\s*o\s*r\s*", "", RegexOptions.IgnoreCase);
                }

                if (attribute.Name.ToLower() == "href" || attribute.Name.ToLower() == "src")
                {
                    attribute.Value = Regex.Replace(attribute.Value, @"\s*m\s*o\s*c\s*h\s*a\s*", "", RegexOptions.IgnoreCase);
                }
            }

            // HtmlEntity Escape
            if (EncodeHtmlEntities)
            {
                // Ensure no double encoding goes on - reverse the ones done by the CreoleParser
                string value = attribute.Value;
                value = value.Replace("&#x32;", "\"");
                value = value.Replace("&#x3C;", "<");
                value = value.Replace("&#x3E;", ">");
                value = value.Replace("&#x26;", "&");
                value = value.Replace("&#x27;", "'");
                attribute.Value = value;

                StringBuilder sbAttributeValue = new StringBuilder();
                foreach (char c in attribute.Value.ToCharArray())
                {
                    sbAttributeValue.Append(EncodeCharacterToHtmlEntityEscape(c));
                }

                attribute.Value = sbAttributeValue.ToString();
            }
        }
Exemple #20
0
        /// <summary>
        /// Creates and inserts a new attribute as the last attribute in the collection.
        /// </summary>
        /// <param name="name">The name of the attribute to insert.</param>
        /// <param name="value">The value of the attribute to insert.</param>
        /// <returns>The appended attribute.</returns>
        public HtmlAttribute Append(string name, string value)
        {
            HtmlAttribute att = _ownernode._ownerdocument.CreateAttribute(name, value);

            return(Append(att));
        }
 public HtmlAttributeFacade(HtmlAttribute attrib)
 {
     _attrib = attrib;
 }
Exemple #22
0
        /// <summary>
        /// This removes the vulnerable keywords and make values safe by html encoding and html character escaping.
        /// </summary>        
        /// <param name="attribute">Attribute that contain values that need to check and clean.</param>
        private static void CleanAttributeValues(HtmlAttribute attribute)
        {
            attribute.Value = HttpUtility.HtmlEncode(attribute.Value);

            attribute.Value = Regex.Replace(attribute.Value, @"\s*j\s*a\s*v\s*a\s*s\s*c\s*r\s*i\s*p\s*t\s*", "", RegexOptions.IgnoreCase);
            attribute.Value = Regex.Replace(attribute.Value, @"\s*s\s*c\s*r\s*i\s*p\s*t\s*", "", RegexOptions.IgnoreCase);

            if (attribute.Name.ToLower() == "style")
            {
                attribute.Value = Regex.Replace(attribute.Value, @"\s*e\s*x\s*p\s*r\s*e\s*s\s*s\s*i\s*o\s*n\s*", "", RegexOptions.IgnoreCase);
                attribute.Value = Regex.Replace(attribute.Value, @"\s*b\s*e\s*h\s*a\s*v\s*i\s*o\s*r\s*", "", RegexOptions.IgnoreCase);
            }

            if (attribute.Name.ToLower() == "href" || attribute.Name.ToLower() == "src")
            {
                //if (!attribute.Value.StartsWith("http://") || attribute.Value.StartsWith("/"))
                //    attribute.Value = "";
                attribute.Value = Regex.Replace(attribute.Value, @"\s*m\s*o\s*c\s*h\s*a\s*", "", RegexOptions.IgnoreCase);
            }

            // HtmlEntity Escape
            StringBuilder sbAttriuteValue = new StringBuilder();
            foreach (char c in attribute.Value.ToCharArray())
            {
                sbAttriuteValue.Append(EncodeCharacterToHtmlEntityEscape(c));
            }

            attribute.Value = sbAttriuteValue.ToString();
        }
        private void ParsePrefixAttribute(RdfAParserContext context, RdfAEvaluationContext evalContext, HtmlAttribute attr, String baseUri, Dictionary<string,Uri> hiddenPrefixes, List<String> inScopePrefixes)
        {
            //Do nothing if the @prefix attribute is empty
            if (attr.Value.Equals(String.Empty)) return;

            StringReader reader = new StringReader(attr.Value);
            char next;
            bool canExit = false;

            do 
            {
                StringBuilder prefixData = new StringBuilder();
                StringBuilder uriData = new StringBuilder();

                //Grab a Prefix - characters up to the next colon
                next = (char)reader.Peek();
                while (next != ':')
                {
                    //Add the Character and discard it
                    prefixData.Append(next);
                    reader.Read();
                    if (reader.Peek() == -1)
                    {
                        this.OnWarning("Aborted parsing a prefix attribute since failed to find a prefix of the form prefix: from the following content: " + prefixData.ToString());
                        return;
                    }
                    else
                    {
                        next = (char)reader.Peek();
                    }
                }

                //Discard the colon
                reader.Read();

                //Discard the whitespace
                next = (char)reader.Peek();
                while (Char.IsWhiteSpace(next))
                {
                    reader.Read();
                    if (reader.Peek() == -1)
                    {
                        this.OnWarning("Aborted parsing a prefix attribute since reached the end of the attribute without finding a URI to go with the prefix '" + prefixData.ToString() + ":'");
                        return;
                    }
                    else
                    {
                        next = (char)reader.Peek();
                    }
                }

                //Grab the URI - characters up to the next whitespace or end of string
                next = (char)reader.Peek();
                while (!Char.IsWhiteSpace(next))
                {
                    uriData.Append(next);
                    reader.Read();
                    if (reader.Peek() == -1)
                    {
                        //End of string so will exit after this
                        canExit = true;
                        break;
                    }
                    else
                    {
                        next = (char)reader.Peek();
                    }
                }

                //Now resolve the URI and apply it
                String uri = Tools.ResolveUri(uriData.ToString(), baseUri);
                if (!(uri.EndsWith("/") || uri.EndsWith("#"))) uri += "#";
                String prefix = prefixData.ToString();
                if (evalContext.NamespaceMap.HasNamespace(prefix))
                {
                    if (hiddenPrefixes == null) hiddenPrefixes = new Dictionary<string, Uri>();
                    hiddenPrefixes.Add(prefix, new Uri(uri));
                }
                evalContext.NamespaceMap.AddNamespace(prefix, new Uri(uri));
                inScopePrefixes.Add(prefix);
            } while (!canExit);
        }
 private void Parse()
 {
   int num = 0;
   if (this.OptionComputeChecksum)
     this._crc32 = new Crc32();
   this.Lastnodes = new Dictionary<string, HtmlNode>();
   this._c = 0;
   this._fullcomment = false;
   this._parseerrors = new List<HtmlParseError>();
   this._line = 1;
   this._lineposition = 1;
   this._maxlineposition = 1;
   this._state = HtmlDocument.ParseState.Text;
   this._oldstate = this._state;
   this._documentnode._innerlength = this.Text.Length;
   this._documentnode._outerlength = this.Text.Length;
   this._remainderOffset = this.Text.Length;
   this._lastparentnode = this._documentnode;
   this._currentnode = this.CreateNode(HtmlNodeType.Text, 0);
   this._currentattribute = (HtmlAttribute) null;
   this._index = 0;
   this.PushNodeStart(HtmlNodeType.Text, 0);
   while (this._index < this.Text.Length)
   {
     this._c = (int) this.Text[this._index];
     this.IncrementPosition();
     switch (this._state)
     {
       case HtmlDocument.ParseState.Text:
         if (!this.NewCheck())
           continue;
         continue;
       case HtmlDocument.ParseState.WhichTag:
         if (!this.NewCheck())
         {
           if (this._c == 47)
           {
             this.PushNodeNameStart(false, this._index);
           }
           else
           {
             this.PushNodeNameStart(true, this._index - 1);
             this.DecrementPosition();
           }
           this._state = HtmlDocument.ParseState.Tag;
           continue;
         }
         continue;
       case HtmlDocument.ParseState.Tag:
         if (!this.NewCheck())
         {
           if (HtmlDocument.IsWhiteSpace(this._c))
           {
             this.PushNodeNameEnd(this._index - 1);
             if (this._state == HtmlDocument.ParseState.Tag)
             {
               this._state = HtmlDocument.ParseState.BetweenAttributes;
               continue;
             }
             continue;
           }
           if (this._c == 47)
           {
             this.PushNodeNameEnd(this._index - 1);
             if (this._state == HtmlDocument.ParseState.Tag)
             {
               this._state = HtmlDocument.ParseState.EmptyTag;
               continue;
             }
             continue;
           }
           if (this._c == 62)
           {
             this.PushNodeNameEnd(this._index - 1);
             if (this._state == HtmlDocument.ParseState.Tag)
             {
               if (!this.PushNodeEnd(this._index, false))
               {
                 this._index = this.Text.Length;
                 continue;
               }
               if (this._state == HtmlDocument.ParseState.Tag)
               {
                 this._state = HtmlDocument.ParseState.Text;
                 this.PushNodeStart(HtmlNodeType.Text, this._index);
                 continue;
               }
               continue;
             }
             continue;
           }
           continue;
         }
         continue;
       case HtmlDocument.ParseState.BetweenAttributes:
         if (!this.NewCheck() && !HtmlDocument.IsWhiteSpace(this._c))
         {
           if (this._c == 47 || this._c == 63)
           {
             this._state = HtmlDocument.ParseState.EmptyTag;
             continue;
           }
           if (this._c == 62)
           {
             if (!this.PushNodeEnd(this._index, false))
             {
               this._index = this.Text.Length;
               continue;
             }
             if (this._state == HtmlDocument.ParseState.BetweenAttributes)
             {
               this._state = HtmlDocument.ParseState.Text;
               this.PushNodeStart(HtmlNodeType.Text, this._index);
               continue;
             }
             continue;
           }
           this.PushAttributeNameStart(this._index - 1);
           this._state = HtmlDocument.ParseState.AttributeName;
           continue;
         }
         continue;
       case HtmlDocument.ParseState.EmptyTag:
         if (!this.NewCheck())
         {
           if (this._c == 62)
           {
             if (!this.PushNodeEnd(this._index, true))
             {
               this._index = this.Text.Length;
               continue;
             }
             if (this._state == HtmlDocument.ParseState.EmptyTag)
             {
               this._state = HtmlDocument.ParseState.Text;
               this.PushNodeStart(HtmlNodeType.Text, this._index);
               continue;
             }
             continue;
           }
           this._state = HtmlDocument.ParseState.BetweenAttributes;
           continue;
         }
         continue;
       case HtmlDocument.ParseState.AttributeName:
         if (!this.NewCheck())
         {
           if (HtmlDocument.IsWhiteSpace(this._c))
           {
             this.PushAttributeNameEnd(this._index - 1);
             this._state = HtmlDocument.ParseState.AttributeBeforeEquals;
             continue;
           }
           if (this._c == 61)
           {
             this.PushAttributeNameEnd(this._index - 1);
             this._state = HtmlDocument.ParseState.AttributeAfterEquals;
             continue;
           }
           if (this._c == 62)
           {
             this.PushAttributeNameEnd(this._index - 1);
             if (!this.PushNodeEnd(this._index, false))
             {
               this._index = this.Text.Length;
               continue;
             }
             if (this._state == HtmlDocument.ParseState.AttributeName)
             {
               this._state = HtmlDocument.ParseState.Text;
               this.PushNodeStart(HtmlNodeType.Text, this._index);
               continue;
             }
             continue;
           }
           continue;
         }
         continue;
       case HtmlDocument.ParseState.AttributeBeforeEquals:
         if (!this.NewCheck() && !HtmlDocument.IsWhiteSpace(this._c))
         {
           if (this._c == 62)
           {
             if (!this.PushNodeEnd(this._index, false))
             {
               this._index = this.Text.Length;
               continue;
             }
             if (this._state == HtmlDocument.ParseState.AttributeBeforeEquals)
             {
               this._state = HtmlDocument.ParseState.Text;
               this.PushNodeStart(HtmlNodeType.Text, this._index);
               continue;
             }
             continue;
           }
           if (this._c == 61)
           {
             this._state = HtmlDocument.ParseState.AttributeAfterEquals;
             continue;
           }
           this._state = HtmlDocument.ParseState.BetweenAttributes;
           this.DecrementPosition();
           continue;
         }
         continue;
       case HtmlDocument.ParseState.AttributeAfterEquals:
         if (!this.NewCheck() && !HtmlDocument.IsWhiteSpace(this._c))
         {
           if (this._c == 39 || this._c == 34)
           {
             this._state = HtmlDocument.ParseState.QuotedAttributeValue;
             this.PushAttributeValueStart(this._index, this._c);
             num = this._c;
             continue;
           }
           if (this._c == 62)
           {
             if (!this.PushNodeEnd(this._index, false))
             {
               this._index = this.Text.Length;
               continue;
             }
             if (this._state == HtmlDocument.ParseState.AttributeAfterEquals)
             {
               this._state = HtmlDocument.ParseState.Text;
               this.PushNodeStart(HtmlNodeType.Text, this._index);
               continue;
             }
             continue;
           }
           this.PushAttributeValueStart(this._index - 1);
           this._state = HtmlDocument.ParseState.AttributeValue;
           continue;
         }
         continue;
       case HtmlDocument.ParseState.AttributeValue:
         if (!this.NewCheck())
         {
           if (HtmlDocument.IsWhiteSpace(this._c))
           {
             this.PushAttributeValueEnd(this._index - 1);
             this._state = HtmlDocument.ParseState.BetweenAttributes;
             continue;
           }
           if (this._c == 62)
           {
             this.PushAttributeValueEnd(this._index - 1);
             if (!this.PushNodeEnd(this._index, false))
             {
               this._index = this.Text.Length;
               continue;
             }
             if (this._state == HtmlDocument.ParseState.AttributeValue)
             {
               this._state = HtmlDocument.ParseState.Text;
               this.PushNodeStart(HtmlNodeType.Text, this._index);
               continue;
             }
             continue;
           }
           continue;
         }
         continue;
       case HtmlDocument.ParseState.Comment:
         if (this._c == 62 && (!this._fullcomment || (int) this.Text[this._index - 2] == 45 && (int) this.Text[this._index - 3] == 45))
         {
           if (!this.PushNodeEnd(this._index, false))
           {
             this._index = this.Text.Length;
             continue;
           }
           this._state = HtmlDocument.ParseState.Text;
           this.PushNodeStart(HtmlNodeType.Text, this._index);
           continue;
         }
         continue;
       case HtmlDocument.ParseState.QuotedAttributeValue:
         if (this._c == num)
         {
           this.PushAttributeValueEnd(this._index - 1);
           this._state = HtmlDocument.ParseState.BetweenAttributes;
           continue;
         }
         if (this._c == 60 && this._index < this.Text.Length && (int) this.Text[this._index] == 37)
         {
           this._oldstate = this._state;
           this._state = HtmlDocument.ParseState.ServerSideCode;
           continue;
         }
         continue;
       case HtmlDocument.ParseState.ServerSideCode:
         if (this._c == 37 && this._index < this.Text.Length && (int) this.Text[this._index] == 62)
         {
           switch (this._oldstate)
           {
             case HtmlDocument.ParseState.BetweenAttributes:
               this.PushAttributeNameEnd(this._index + 1);
               this._state = HtmlDocument.ParseState.BetweenAttributes;
               break;
             case HtmlDocument.ParseState.AttributeAfterEquals:
               this._state = HtmlDocument.ParseState.AttributeValue;
               break;
             default:
               this._state = this._oldstate;
               break;
           }
           this.IncrementPosition();
           continue;
         }
         continue;
       case HtmlDocument.ParseState.PcData:
         if (this._currentnode._namelength + 3 <= this.Text.Length - (this._index - 1) && string.Compare(this.Text.Substring(this._index - 1, this._currentnode._namelength + 2), "</" + this._currentnode.Name, StringComparison.OrdinalIgnoreCase) == 0)
         {
           int c = (int) this.Text[this._index - 1 + 2 + this._currentnode.Name.Length];
           if (c == 62 || HtmlDocument.IsWhiteSpace(c))
           {
             HtmlNode node = this.CreateNode(HtmlNodeType.Text, this._currentnode._outerstartindex + this._currentnode._outerlength);
             node._outerlength = this._index - 1 - node._outerstartindex;
             this._currentnode.AppendChild(node);
             this.PushNodeStart(HtmlNodeType.Element, this._index - 1);
             this.PushNodeNameStart(false, this._index - 1 + 2);
             this._state = HtmlDocument.ParseState.Tag;
             this.IncrementPosition();
             continue;
           }
           continue;
         }
         continue;
       default:
         continue;
     }
   }
   if (this._currentnode._namestartindex > 0)
     this.PushNodeNameEnd(this._index);
   this.PushNodeEnd(this._index, false);
   this.Lastnodes.Clear();
 }
 private void ParseVocabAttribute(RdfAParserContext context, RdfAEvaluationContext evalContext, HtmlAttribute attr)
 {
     if (attr.Value.Equals(String.Empty))
     {
         //Reset Local Vocabulary
         evalContext.LocalVocabulary = new TermMappings(context.DefaultVocabulary);
     }
     else
     {
         evalContext.LocalVocabulary.VocabularyUri = attr.Value;
     }
 }
Exemple #26
0
        private bool ExtendLastAttributeValue(int index, bool close)
        {
            System.Diagnostics.Debug.Assert(_currentnode != null && _currentnode.Attributes.Count > 0);

            var lastattribute = _currentattribute;
            
            // find an attribute with a value to be extended
            var attrs = _currentnode.Attributes;
            int candidate = -1;
            for (int i = attrs.Count - 1; i >= 0; i--)
                if (attrs[i]._valuelength > 0 || attrs[i]._valuestartindex > (attrs[i]._namestartindex + attrs[i]._namelength))   // an attribute with a value specified
                {
                    candidate = i;
                    break;
                }

            if (candidate < 0)
                return false;   // no candidate to be extended

            // set new last attribute
            _currentattribute = attrs[candidate];
            while (attrs.Count > candidate + 1)
                attrs.RemoveAt(attrs.Count - 1);

            // 
            if (close)
            {
                PushAttributeValueEnd(index);
                _state = ParseState.BetweenAttributes;
            }
            else
            {
                _currentattribute._valuelength = 0;
                _state = ParseState.QuotedAttributeValue;
            }

            return true;
        }
 /// <summary>
 ///     Construct a HTML element attribute wrapper object.
 /// </summary>
 /// <param name="attribute"></param>
 protected HtmlAttributeBase(HtmlAgilityPack.HtmlAttribute attribute)
 {
     this.attribute = attribute;
 }
 internal int GetAttributeIndex(HtmlAttribute attribute)
 {
     if (attribute == null)
     {
         throw new ArgumentNullException("attribute");
     }
     for (int i = 0; i < _items.Count; i++)
     {
         if (((HtmlAttribute)_items[i]) == attribute)
             return i;
     }
     return -1;
 }
Exemple #29
0
        private bool IsStaticResource(HtmlNode node, HtmlAttribute attr)
        {
            var nodeName = node.Name.ToLower();
            if (nodeName == "link" || nodeName == "script" || nodeName == "img")
            {
                return true;
            }
            var attValue = (attr.Value ?? "").ToLower();
            if (attr.Value.StartsWith("#") || attr.Value.StartsWith("javascript:"))
            {
                return true;
            }

            if (attValue.EndsWith(".js") || attValue.EndsWith(".css") || attValue.EndsWith(".png") || attValue.EndsWith(".jpg")
                || attValue.EndsWith(".jpeg") || attValue.EndsWith(".ico") || attValue.EndsWith(".gif"))
            {
                return true;
            }

            return false;
        }
Exemple #30
0
 public static IHtmlAttribute AsAttribute(this AP.HtmlAttribute attribute)
 {
     return(new HtmlAttributeAdapter(attribute));
 }
Exemple #31
0
        private bool IsStaticResource(HtmlNode node, HtmlAttribute attr)
        {
            var nodeName = node.Name.ToLower();
            if (nodeName == "link" || nodeName == "script" || nodeName == "img")
            {
                return true;
            }
            var extension = Path.GetExtension(attr.Value).ToLower();

            if (extension == ".js" || extension == ".css" || extension == ".png" || extension == ".jpg" || extension == ".jpeg" || extension == ".ico" || extension == ".gif")
            {
                return true;
            }

            return false;
        }
        private bool ParseProfileAttribute(RdfAParserContext context, RdfAEvaluationContext evalContext, HtmlAttribute attr)
        {
            String[] profiles;
            if (attr.Value.Contains(" "))
            {
                profiles = attr.Value.Split(' ');
            }
            else
            {
                profiles = new String[] { attr.Value };
            }

            String prefixQuery = "PREFIX rdfa: <" + RdfANamespace + "> SELECT SAMPLE(?prefix) AS ?NamespacePrefix SAMPLE(?uri) AS ?NamespaceURI WHERE { ?s rdfa:prefix ?prefix ; rdfa:uri ?uri } GROUP BY ?s HAVING (COUNT(?prefix) = 1 && COUNT(?uri) = 1)";
            String termQuery = "PREFIX rdfa: <" + RdfANamespace + "> SELECT SAMPLE(?term) AS ?Term SAMPLE(?uri) AS ?URI WHERE {?s rdfa:term ?term ; rdfa:uri ?uri } GROUP BY ?s HAVING (COUNT(?term) = 1 && COUNT(?uri) = 1)";

            foreach (String profile in profiles)
            {
                try
                {
                    Graph g = new Graph();

                    if (profile.Equals(XHtmlVocabNamespace) || profile.Equals(XHtmlVocabNamespace.Substring(0, XHtmlVocabNamespace.Length-1)))
                    {
                        //XHTML Vocabulary is a fixed vocabulary
                        evalContext.LocalVocabulary.Merge(new XHtmlRdfAVocabulary());
                    }
                    else
                    {
                        try
                        {
                            UriLoader.Load(g, new Uri(profile));
                        }
                        catch
                        {
                            //If we fail then we return false which indicates that the DOM subtree is ignored
                            this.OnWarning("Unable to retrieve a Profile document which the library could parse from the URI '" + profile + "'");
                            return false;
                        }

                        //Namespace Mappings
                        Object results = g.ExecuteQuery(prefixQuery);
                        if (results is SparqlResultSet)
                        {
                            SparqlResultSet rset = (SparqlResultSet)results;
                            foreach (SparqlResult r in rset.Results)
                            {
                                INode prefixNode = r["NamespacePrefix"];
                                INode nsNode = r["NamespaceURI"];
                                if (prefixNode.NodeType == NodeType.Literal && nsNode.NodeType == NodeType.Literal)
                                {
                                    String prefix = ((ILiteralNode)prefixNode).Value.ToLower();
                                    String ns = ((ILiteralNode)nsNode).Value;
                                    evalContext.LocalVocabulary.AddNamespace(prefix, ns);
                                }
                            }
                        }

                        //Term Mappings
                        results = g.ExecuteQuery(termQuery);
                        if (results is SparqlResultSet)
                        {
                            SparqlResultSet rset = (SparqlResultSet)results;
                            foreach (SparqlResult r in rset.Results)
                            {
                                INode termNode = r["Term"];
                                INode uriNode = r["URI"];
                                if (termNode.NodeType == NodeType.Literal && uriNode.NodeType == NodeType.Literal)
                                {
                                    String term = ((ILiteralNode)termNode).Value;
                                    String uri = ((ILiteralNode)uriNode).Value;
                                    evalContext.LocalVocabulary.AddTerm(term, uri);
                                }
                            }
                        }
                    }
                }
                catch
                {
                    //Ignore errors and continue processing
                    this.OnWarning("Ignoring the value '" + profile + "' since this is not a valid URI or a profile document was not successfully retrieved and parsed from this URI");
                    return false;
                }
            }

            return true;
        }
 private static string ExtractMake(HtmlAttribute htmlAttribute)
 {
     if (htmlAttribute != null)
         return htmlAttribute.Value.Replace(@"/usedcar/", string.Empty).Replace(@"/index.html", string.Empty).ToUpper();
     return string.Empty;
 }
Exemple #34
0
        internal void WriteAttribute(TextWriter outText, HtmlAttribute att)
        {
            string name;
            string quote = att.QuoteType == AttributeValueQuote.DoubleQuote ? "\"" : "'";
            if (_ownerdocument.OptionOutputAsXml)
            {
                if (_ownerdocument.OptionOutputOriginalCase)
                    name = att.OriginalName;
                else
                    name = _ownerdocument.OptionOutputUpperCase ? att.XmlName.ToUpper() : att.XmlName;

                if (!string.IsNullOrEmpty(name))
                {
                    char firstChar = name[0];

                    if (firstChar == '-' || (firstChar >= '0' && firstChar <= '9'))  // an invalid XML attribute name
                        name = '_' + name;  //   // fix such attribute name by prepending '_'
                }

                outText.Write(" " + name + "=" + quote + HtmlDocument.HtmlEncode(att.XmlValue) + quote);
            }
            else
            {
                if (_ownerdocument.OptionOutputUpperCase)
                {
                    name = att.Name.ToUpper();
                }
                else
                {
                    name = att.Name;
                }

                if (att.Name.Length >= 4)
                {
                    if ((att.Name[0] == '<') && (att.Name[1] == '%') &&
                        (att.Name[att.Name.Length - 1] == '>') && (att.Name[att.Name.Length - 2] == '%'))
                    {
                        outText.Write(" " + name);
                        return;
                    }
                }
                if (_ownerdocument.OptionOutputOptimizeAttributeValues)
                {
                    if (att.Value.IndexOfAny(new Char[] {(char) 10, (char) 13, (char) 9, ' '}) < 0)
                    {
                        outText.Write(" " + name + "=" + att.Value);
                    }
                    else
                    {
                        outText.Write(" " + name + "=" + quote + att.Value + quote);
                    }
                }
                else
                {
                    outText.Write(" " + name + "=" + quote + att.Value + quote);
                }
            }
        }
 /// <summary>
 /// Creates a duplicate of this attribute.
 /// </summary>
 /// <returns>The cloned attribute.</returns>
 public HtmlAttribute Clone()
 {
     var att = new HtmlAttribute(_ownerdocument) {
         Name = Name,
         Value = Value
     };
     return att;
 }
        internal void WriteAttribute(TextWriter outText, HtmlAttribute att)
        {
            string name;
            string quote = att.QuoteType == AttributeValueQuote.DoubleQuote ? "\"" : "'";
            if (_ownerdocument.OptionOutputAsXml)
            {
                name = _ownerdocument.OptionOutputUpperCase ? att.XmlName.ToUpper() : att.XmlName;
                if (_ownerdocument.OptionOutputOriginalCase)
                    name = att.OriginalName;

                outText.Write(" " + name + "=" + quote + HtmlDocument.HtmlEncode(att.XmlValue) + quote);
            }
            else
            {
                name = _ownerdocument.OptionOutputUpperCase ? att.Name.ToUpper() : att.Name;

                if (att.Name.Length >= 4)
                {
                    if ((att.Name[0] == '<') && (att.Name[1] == '%') &&
                        (att.Name[att.Name.Length - 1] == '>') && (att.Name[att.Name.Length - 2] == '%'))
                    {
                        outText.Write(" " + name);
                        return;
                    }
                }
                if (_ownerdocument.OptionOutputOptimizeAttributeValues)
                    if (att.Value.IndexOfAny(new[] {(char) 10, (char) 13, (char) 9, ' '}) < 0)
                        outText.Write(" " + name + "=" + att.Value);
                    else
                        outText.Write(" " + name + "=" + quote + att.Value + quote);
                else
                    outText.Write(" " + name + "=" + quote + att.Value + quote);
            }
        }
 internal HtmlAttributeAdapter( AP.HtmlAttribute attribute )
 {
   _attribute = attribute;
 }
 private void PushAttributeNameStart(int index)
 {
     _currentattribute = CreateAttribute();
     _currentattribute._namestartindex = index;
     _currentattribute.Line = _line;
     _currentattribute._lineposition = _lineposition;
     _currentattribute._streamposition = index;
 }
Exemple #39
0
 public static HtmlAttribute ValueShouldContain(this HtmlAttribute a, string expectedSubstring)
 {
     a.Value.ShouldContain(expectedSubstring);
     return(a);
 }