private static Dictionary <string, EmailRecord> getNameValueByElementType( HtmlAgilityPack.HtmlDocument source, SalesForce salesForce ) { Dictionary <string, EmailRecord> output = new Dictionary <string, EmailRecord>(); var document = source.DocumentNode; foreach (KeyValuePair <string, bool> emailItem in salesForce.emailHeaderIdentities) { if (emailItem.Value) { HtmlAgilityPack.HtmlNode editNode = source.GetElementbyId(emailItem.Key); if (editNode.Attributes.ToList().Count(x => x.Name == "value") >= 1) { HtmlAgilityPack.HtmlAttribute attribute = editNode.Attributes["value"]; EmailRecord record = new EmailRecord(); record.emailAddress = attribute.Value; output.Add(emailItem.Key, record); } } } return(output); }
public HtmlAttributeWrapper(HtmlAttribute wrappedAttribute) { if(wrappedAttribute == null) throw new ArgumentNullException("wrappedAttribute"); _wrappedAttribute = wrappedAttribute; }
/// <summary> /// Creates a duplicate of this attribute. /// </summary> /// <returns>The cloned attribute.</returns> public HtmlAttribute Clone() { HtmlAttribute att = new HtmlAttribute(_ownerdocument); att.Name = Name; att.Value = Value; return att; }
private static void MakeAbsolute(HtmlAttribute attr, Uri baseUrl) { var url = attr.Value; if (!url.StartsWith("http:", StringComparison.OrdinalIgnoreCase) && !url.StartsWith("https:", StringComparison.OrdinalIgnoreCase) && Uri.IsWellFormedUriString(url, UriKind.Relative)) { attr.Value = new Uri(baseUrl, url).ToString(); } }
private string ExtractModel(string make, HtmlAttribute htmlAttribute) { if (htmlAttribute != null) { var stringToReplace = string.Format(@"/usedcar/{0}__", make); return htmlAttribute.Value.Replace(stringToReplace, string.Empty).Replace(".html", string.Empty); } return string.Empty; }
/// <summary> /// /// </summary> /// <param name="source"></param> /// <param name="advanced"></param> /// <returns></returns> private static void getNameValueByElementTypeVoid( HtmlAgilityPack.HtmlDocument source, SalesForce salesForce ) { var document = source.DocumentNode; HtmlAgilityPack.HtmlNode editNode = document.QuerySelector(salesForce.elementOne); HtmlAgilityPack.HtmlAttribute attribute = editNode.Attributes["href"]; salesForce.endpointEditSearchFieldTemplate = attribute.Value; }
/// <summary> /// Inserts the specified attribute as the first node in the collection. /// </summary> /// <param name="newAttribute">The attribute to insert. May not be null.</param> /// <returns>The prepended attribute.</returns> public HtmlAttribute Prepend(HtmlAttribute newAttribute) { if (newAttribute == null) { throw new ArgumentNullException("newAttribute"); } _hashitems[newAttribute.Name] = newAttribute; newAttribute._ownernode = _ownernode; _items.Insert(0, newAttribute); _ownernode._innerchanged = true; _ownernode._outerchanged = true; return newAttribute; }
/// <summary> /// This removes the vulnerable keywords and make values safe by html encoding and html character escaping. /// </summary> /// <param name="attribute">Attribute that contain values that need to check and clean.</param> private void CleanAttributeValues(HapHtmlAttribute attribute) { if (CleanAttributes) { attribute.Value = HttpUtility.HtmlEncode(attribute.Value); attribute.Value = Regex.Replace(attribute.Value, @"\s*j\s*a\s*v\s*a\s*s\s*c\s*r\s*i\s*p\s*t\s*", "", RegexOptions.IgnoreCase); attribute.Value = Regex.Replace(attribute.Value, @"\s*s\s*c\s*r\s*i\s*p\s*t\s*", "", RegexOptions.IgnoreCase); if (attribute.Name.ToLower() == "style") { attribute.Value = Regex.Replace(attribute.Value, @"\s*e\s*x\s*p\s*r\s*e\s*s\s*s\s*i\s*o\s*n\s*", "", RegexOptions.IgnoreCase); attribute.Value = Regex.Replace(attribute.Value, @"\s*b\s*e\s*h\s*a\s*v\s*i\s*o\s*r\s*", "", RegexOptions.IgnoreCase); } if (attribute.Name.ToLower() == "href" || attribute.Name.ToLower() == "src") { attribute.Value = Regex.Replace(attribute.Value, @"\s*m\s*o\s*c\s*h\s*a\s*", "", RegexOptions.IgnoreCase); } } // HtmlEntity Escape if (EncodeHtmlEntities) { // Ensure no double encoding goes on - reverse the ones done by the CreoleParser string value = attribute.Value; value = value.Replace("2", "\""); value = value.Replace("<", "<"); value = value.Replace(">", ">"); value = value.Replace("&", "&"); value = value.Replace("'", "'"); attribute.Value = value; StringBuilder sbAttributeValue = new StringBuilder(); foreach (char c in attribute.Value.ToCharArray()) { sbAttributeValue.Append(EncodeCharacterToHtmlEntityEscape(c)); } attribute.Value = sbAttributeValue.ToString(); } }
private string RetrieveImageUrl(HtmlAttribute htmlAttribute) { try { var url = htmlAttribute.Value; var htmlDocument = _htmlWeb.Load(url); if (htmlDocument.DocumentNode != null) { var node = htmlDocument.DocumentNode.SelectSingleNode("//*[@id = 'frontCover']"); if (node != null) { return string.Format("{0}{1}", BaseUrl, node.Attributes["src"].Value); } } return null; } catch (Exception) { return null; } }
private string _value(HtmlAttribute x) { if (x == null) return ""; return x.Value; }
public static void WithValue(this HtmlAttribute a, string expectedValue) { Assert.AreEqual(expectedValue, a.Value); }
private void Parse() { int lastquote = 0; if (OptionComputeChecksum) { _crc32 = new Crc32(); } Lastnodes = new Dictionary<string, HtmlNode>(); _c = 0; _fullcomment = false; _parseerrors = new List<HtmlParseError>(); _line = 1; _lineposition = 1; _maxlineposition = 1; _state = ParseState.Text; _oldstate = _state; _documentnode._innerlength = Text.Length; _documentnode._outerlength = Text.Length; _remainderOffset = Text.Length; _lastparentnode = _documentnode; _currentnode = CreateNode(HtmlNodeType.Text, 0); _currentattribute = null; _index = 0; PushNodeStart(HtmlNodeType.Text, 0); while (_index < Text.Length) { _c = Text[_index]; IncrementPosition(); switch (_state) { case ParseState.Text: if (NewCheck()) continue; break; case ParseState.WhichTag: if (NewCheck()) continue; if (_c == '/') { PushNodeNameStart(false, _index); } else { PushNodeNameStart(true, _index - 1); DecrementPosition(); } _state = ParseState.Tag; break; case ParseState.Tag: if (NewCheck()) continue; if (IsWhiteSpace(_c)) { PushNodeNameEnd(_index - 1); if (_state != ParseState.Tag) continue; _state = ParseState.BetweenAttributes; continue; } if (_c == '/') { PushNodeNameEnd(_index - 1); if (_state != ParseState.Tag) continue; _state = ParseState.EmptyTag; continue; } if (_c == '>') { PushNodeNameEnd(_index - 1); if (_state != ParseState.Tag) continue; if (!PushNodeEnd(_index, false)) { // stop parsing _index = Text.Length; break; } if (_state != ParseState.Tag) continue; _state = ParseState.Text; PushNodeStart(HtmlNodeType.Text, _index); } break; case ParseState.BetweenAttributes: if (NewCheck()) continue; if (IsWhiteSpace(_c)) continue; if ((_c == '/') || (_c == '?')) { _state = ParseState.EmptyTag; continue; } if (_c == '>') { if (!PushNodeEnd(_index, false)) { // stop parsing _index = Text.Length; break; } if (_state != ParseState.BetweenAttributes) continue; _state = ParseState.Text; PushNodeStart(HtmlNodeType.Text, _index); continue; } PushAttributeNameStart(_index - 1); _state = ParseState.AttributeName; break; case ParseState.EmptyTag: if (NewCheck()) continue; if (_c == '>') { if (!PushNodeEnd(_index, true)) { // stop parsing _index = Text.Length; break; } if (_state != ParseState.EmptyTag) continue; _state = ParseState.Text; PushNodeStart(HtmlNodeType.Text, _index); continue; } _state = ParseState.BetweenAttributes; break; case ParseState.AttributeName: if (NewCheck()) continue; if (IsWhiteSpace(_c)) { PushAttributeNameEnd(_index - 1); _state = ParseState.AttributeBeforeEquals; continue; } if (_c == '=') { PushAttributeNameEnd(_index - 1); _state = ParseState.AttributeAfterEquals; continue; } if (_c == '>') { PushAttributeNameEnd(_index - 1); if (!PushNodeEnd(_index, false)) { // stop parsing _index = Text.Length; break; } if (_state != ParseState.AttributeName) continue; _state = ParseState.Text; PushNodeStart(HtmlNodeType.Text, _index); continue; } break; case ParseState.AttributeBeforeEquals: if (NewCheck()) continue; if (IsWhiteSpace(_c)) continue; if (_c == '>') { if (!PushNodeEnd(_index, false)) { // stop parsing _index = Text.Length; break; } if (_state != ParseState.AttributeBeforeEquals) continue; _state = ParseState.Text; PushNodeStart(HtmlNodeType.Text, _index); continue; } if (_c == '=') { _state = ParseState.AttributeAfterEquals; continue; } // no equals, no whitespace, it's a new attrribute starting _state = ParseState.BetweenAttributes; DecrementPosition(); break; case ParseState.AttributeAfterEquals: if (NewCheck()) continue; if (IsWhiteSpace(_c)) continue; if ((_c == '\'') || (_c == '"')) { _state = ParseState.QuotedAttributeValue; PushAttributeValueStart(_index, _c); lastquote = _c; continue; } if (_c == '>') { if (!PushNodeEnd(_index, false)) { // stop parsing _index = Text.Length; break; } if (_state != ParseState.AttributeAfterEquals) continue; _state = ParseState.Text; PushNodeStart(HtmlNodeType.Text, _index); continue; } PushAttributeValueStart(_index - 1); _state = ParseState.AttributeValue; break; case ParseState.AttributeValue: if (NewCheck()) continue; if (IsWhiteSpace(_c)) { PushAttributeValueEnd(_index - 1); _state = ParseState.BetweenAttributes; continue; } if (_c == '>') { PushAttributeValueEnd(_index - 1); if (!PushNodeEnd(_index, false)) { // stop parsing _index = Text.Length; break; } if (_state != ParseState.AttributeValue) continue; _state = ParseState.Text; PushNodeStart(HtmlNodeType.Text, _index); continue; } break; case ParseState.QuotedAttributeValue: if (_c == lastquote) { PushAttributeValueEnd(_index - 1); _state = ParseState.BetweenAttributes; continue; } if (_c == '<') { if (_index < Text.Length) { if (Text[_index] == '%') { _oldstate = _state; _state = ParseState.ServerSideCode; continue; } } } break; case ParseState.Comment: if (_c == '>') { if (_fullcomment) { if ((Text[_index - 2] != '-') || (Text[_index - 3] != '-')) { continue; } } if (!PushNodeEnd(_index, false)) { // stop parsing _index = Text.Length; break; } _state = ParseState.Text; PushNodeStart(HtmlNodeType.Text, _index); continue; } break; case ParseState.ServerSideCode: if (_c == '%') { if (_index < Text.Length) { if (Text[_index] == '>') { switch (_oldstate) { case ParseState.AttributeAfterEquals: _state = ParseState.AttributeValue; break; case ParseState.BetweenAttributes: PushAttributeNameEnd(_index + 1); _state = ParseState.BetweenAttributes; break; default: _state = _oldstate; break; } IncrementPosition(); } } } break; case ParseState.PcData: // look for </tag + 1 char // check buffer end if ((_currentnode._namelength + 3) <= (Text.Length - (_index - 1))) { if (string.Compare(Text.Substring(_index - 1, _currentnode._namelength + 2), "</" + _currentnode.Name, StringComparison.OrdinalIgnoreCase) == 0) { int c = Text[_index - 1 + 2 + _currentnode.Name.Length]; if ((c == '>') || (IsWhiteSpace(c))) { // add the script as a text node HtmlNode script = CreateNode(HtmlNodeType.Text, _currentnode._outerstartindex + _currentnode._outerlength); script._outerlength = _index - 1 - script._outerstartindex; _currentnode.AppendChild(script); PushNodeStart(HtmlNodeType.Element, _index - 1); PushNodeNameStart(false, _index - 1 + 2); _state = ParseState.Tag; IncrementPosition(); } } } break; } } // finish the current work if (_currentnode._namestartindex > 0) { PushNodeNameEnd(_index); } PushNodeEnd(_index, false); // we don't need this anymore Lastnodes.Clear(); }
private string htmlValue(HtmlAttribute node) { if (node == null) return String.Empty; else return node.Value; }
internal HtmlAttributeAdapter(AP.HtmlAttribute attribute) { _attribute = attribute; }
private void PushAttributeNameStart(int index) { this._currentattribute = this.CreateAttribute(); this._currentattribute._namestartindex = index; this._currentattribute.Line = this._line; this._currentattribute._lineposition = this._lineposition; this._currentattribute._streamposition = index; }
internal void WriteAttribute(TextWriter outText, HtmlAttribute att) { string name; if (_ownerdocument.OptionOutputAsXml) { if (_ownerdocument.OptionOutputUpperCase) { name = att.XmlName.ToUpper(); } else { name = att.XmlName; } outText.Write(" " + name + "=\"" + HtmlDocument.HtmlEncode(att.XmlValue) + "\""); } else { if (_ownerdocument.OptionOutputUpperCase) { name = att.Name.ToUpper(); } else { name = att.Name; } if (att.Name.Length >= 4) { if ((att.Name[0] == '<') && (att.Name[1] == '%') && (att.Name[att.Name.Length-1] == '>') && (att.Name[att.Name.Length-2] == '%')) { outText.Write(" " + name); return; } } if (_ownerdocument.OptionOutputOptimizeAttributeValues) { if (att.Value.IndexOfAny(new Char[]{(char)10, (char)13, (char)9, ' '}) < 0) { outText.Write(" " + name + "=" + att.Value); } else { outText.Write(" " + name + "=\"" + att.Value + "\""); } } else { outText.Write(" " + name + "=\"" + att.Value + "\""); } } }
/// <summary> /// Removes a given attribute from the list. /// </summary> /// <param name="attribute">The attribute to remove. May not be null.</param> public void Remove(HtmlAttribute attribute) { if (attribute == null) { throw new ArgumentNullException("attribute"); } int index = GetAttributeIndex(attribute); if (index == -1) { throw new IndexOutOfRangeException(); } RemoveAt(index); }
private void ProcessAttrubute(HtmlAttribute imgSrcAttribute) { var newSrcValue = ProcessUrl(imgSrcAttribute.Value); imgSrcAttribute.Value = newSrcValue; }
/// <summary> /// Creates and inserts a new attribute as the last attribute in the collection. /// </summary> /// <param name="name">The name of the attribute to insert.</param> /// <param name="value">The value of the attribute to insert.</param> /// <returns>The appended attribute.</returns> public HtmlAttribute Append(string name, string value) { HtmlAttribute att = _ownernode._ownerdocument.CreateAttribute(name, value); return(Append(att)); }
public HtmlAttributeFacade(HtmlAttribute attrib) { _attrib = attrib; }
/// <summary> /// This removes the vulnerable keywords and make values safe by html encoding and html character escaping. /// </summary> /// <param name="attribute">Attribute that contain values that need to check and clean.</param> private static void CleanAttributeValues(HtmlAttribute attribute) { attribute.Value = HttpUtility.HtmlEncode(attribute.Value); attribute.Value = Regex.Replace(attribute.Value, @"\s*j\s*a\s*v\s*a\s*s\s*c\s*r\s*i\s*p\s*t\s*", "", RegexOptions.IgnoreCase); attribute.Value = Regex.Replace(attribute.Value, @"\s*s\s*c\s*r\s*i\s*p\s*t\s*", "", RegexOptions.IgnoreCase); if (attribute.Name.ToLower() == "style") { attribute.Value = Regex.Replace(attribute.Value, @"\s*e\s*x\s*p\s*r\s*e\s*s\s*s\s*i\s*o\s*n\s*", "", RegexOptions.IgnoreCase); attribute.Value = Regex.Replace(attribute.Value, @"\s*b\s*e\s*h\s*a\s*v\s*i\s*o\s*r\s*", "", RegexOptions.IgnoreCase); } if (attribute.Name.ToLower() == "href" || attribute.Name.ToLower() == "src") { //if (!attribute.Value.StartsWith("http://") || attribute.Value.StartsWith("/")) // attribute.Value = ""; attribute.Value = Regex.Replace(attribute.Value, @"\s*m\s*o\s*c\s*h\s*a\s*", "", RegexOptions.IgnoreCase); } // HtmlEntity Escape StringBuilder sbAttriuteValue = new StringBuilder(); foreach (char c in attribute.Value.ToCharArray()) { sbAttriuteValue.Append(EncodeCharacterToHtmlEntityEscape(c)); } attribute.Value = sbAttriuteValue.ToString(); }
private void ParsePrefixAttribute(RdfAParserContext context, RdfAEvaluationContext evalContext, HtmlAttribute attr, String baseUri, Dictionary<string,Uri> hiddenPrefixes, List<String> inScopePrefixes) { //Do nothing if the @prefix attribute is empty if (attr.Value.Equals(String.Empty)) return; StringReader reader = new StringReader(attr.Value); char next; bool canExit = false; do { StringBuilder prefixData = new StringBuilder(); StringBuilder uriData = new StringBuilder(); //Grab a Prefix - characters up to the next colon next = (char)reader.Peek(); while (next != ':') { //Add the Character and discard it prefixData.Append(next); reader.Read(); if (reader.Peek() == -1) { this.OnWarning("Aborted parsing a prefix attribute since failed to find a prefix of the form prefix: from the following content: " + prefixData.ToString()); return; } else { next = (char)reader.Peek(); } } //Discard the colon reader.Read(); //Discard the whitespace next = (char)reader.Peek(); while (Char.IsWhiteSpace(next)) { reader.Read(); if (reader.Peek() == -1) { this.OnWarning("Aborted parsing a prefix attribute since reached the end of the attribute without finding a URI to go with the prefix '" + prefixData.ToString() + ":'"); return; } else { next = (char)reader.Peek(); } } //Grab the URI - characters up to the next whitespace or end of string next = (char)reader.Peek(); while (!Char.IsWhiteSpace(next)) { uriData.Append(next); reader.Read(); if (reader.Peek() == -1) { //End of string so will exit after this canExit = true; break; } else { next = (char)reader.Peek(); } } //Now resolve the URI and apply it String uri = Tools.ResolveUri(uriData.ToString(), baseUri); if (!(uri.EndsWith("/") || uri.EndsWith("#"))) uri += "#"; String prefix = prefixData.ToString(); if (evalContext.NamespaceMap.HasNamespace(prefix)) { if (hiddenPrefixes == null) hiddenPrefixes = new Dictionary<string, Uri>(); hiddenPrefixes.Add(prefix, new Uri(uri)); } evalContext.NamespaceMap.AddNamespace(prefix, new Uri(uri)); inScopePrefixes.Add(prefix); } while (!canExit); }
private void Parse() { int num = 0; if (this.OptionComputeChecksum) this._crc32 = new Crc32(); this.Lastnodes = new Dictionary<string, HtmlNode>(); this._c = 0; this._fullcomment = false; this._parseerrors = new List<HtmlParseError>(); this._line = 1; this._lineposition = 1; this._maxlineposition = 1; this._state = HtmlDocument.ParseState.Text; this._oldstate = this._state; this._documentnode._innerlength = this.Text.Length; this._documentnode._outerlength = this.Text.Length; this._remainderOffset = this.Text.Length; this._lastparentnode = this._documentnode; this._currentnode = this.CreateNode(HtmlNodeType.Text, 0); this._currentattribute = (HtmlAttribute) null; this._index = 0; this.PushNodeStart(HtmlNodeType.Text, 0); while (this._index < this.Text.Length) { this._c = (int) this.Text[this._index]; this.IncrementPosition(); switch (this._state) { case HtmlDocument.ParseState.Text: if (!this.NewCheck()) continue; continue; case HtmlDocument.ParseState.WhichTag: if (!this.NewCheck()) { if (this._c == 47) { this.PushNodeNameStart(false, this._index); } else { this.PushNodeNameStart(true, this._index - 1); this.DecrementPosition(); } this._state = HtmlDocument.ParseState.Tag; continue; } continue; case HtmlDocument.ParseState.Tag: if (!this.NewCheck()) { if (HtmlDocument.IsWhiteSpace(this._c)) { this.PushNodeNameEnd(this._index - 1); if (this._state == HtmlDocument.ParseState.Tag) { this._state = HtmlDocument.ParseState.BetweenAttributes; continue; } continue; } if (this._c == 47) { this.PushNodeNameEnd(this._index - 1); if (this._state == HtmlDocument.ParseState.Tag) { this._state = HtmlDocument.ParseState.EmptyTag; continue; } continue; } if (this._c == 62) { this.PushNodeNameEnd(this._index - 1); if (this._state == HtmlDocument.ParseState.Tag) { if (!this.PushNodeEnd(this._index, false)) { this._index = this.Text.Length; continue; } if (this._state == HtmlDocument.ParseState.Tag) { this._state = HtmlDocument.ParseState.Text; this.PushNodeStart(HtmlNodeType.Text, this._index); continue; } continue; } continue; } continue; } continue; case HtmlDocument.ParseState.BetweenAttributes: if (!this.NewCheck() && !HtmlDocument.IsWhiteSpace(this._c)) { if (this._c == 47 || this._c == 63) { this._state = HtmlDocument.ParseState.EmptyTag; continue; } if (this._c == 62) { if (!this.PushNodeEnd(this._index, false)) { this._index = this.Text.Length; continue; } if (this._state == HtmlDocument.ParseState.BetweenAttributes) { this._state = HtmlDocument.ParseState.Text; this.PushNodeStart(HtmlNodeType.Text, this._index); continue; } continue; } this.PushAttributeNameStart(this._index - 1); this._state = HtmlDocument.ParseState.AttributeName; continue; } continue; case HtmlDocument.ParseState.EmptyTag: if (!this.NewCheck()) { if (this._c == 62) { if (!this.PushNodeEnd(this._index, true)) { this._index = this.Text.Length; continue; } if (this._state == HtmlDocument.ParseState.EmptyTag) { this._state = HtmlDocument.ParseState.Text; this.PushNodeStart(HtmlNodeType.Text, this._index); continue; } continue; } this._state = HtmlDocument.ParseState.BetweenAttributes; continue; } continue; case HtmlDocument.ParseState.AttributeName: if (!this.NewCheck()) { if (HtmlDocument.IsWhiteSpace(this._c)) { this.PushAttributeNameEnd(this._index - 1); this._state = HtmlDocument.ParseState.AttributeBeforeEquals; continue; } if (this._c == 61) { this.PushAttributeNameEnd(this._index - 1); this._state = HtmlDocument.ParseState.AttributeAfterEquals; continue; } if (this._c == 62) { this.PushAttributeNameEnd(this._index - 1); if (!this.PushNodeEnd(this._index, false)) { this._index = this.Text.Length; continue; } if (this._state == HtmlDocument.ParseState.AttributeName) { this._state = HtmlDocument.ParseState.Text; this.PushNodeStart(HtmlNodeType.Text, this._index); continue; } continue; } continue; } continue; case HtmlDocument.ParseState.AttributeBeforeEquals: if (!this.NewCheck() && !HtmlDocument.IsWhiteSpace(this._c)) { if (this._c == 62) { if (!this.PushNodeEnd(this._index, false)) { this._index = this.Text.Length; continue; } if (this._state == HtmlDocument.ParseState.AttributeBeforeEquals) { this._state = HtmlDocument.ParseState.Text; this.PushNodeStart(HtmlNodeType.Text, this._index); continue; } continue; } if (this._c == 61) { this._state = HtmlDocument.ParseState.AttributeAfterEquals; continue; } this._state = HtmlDocument.ParseState.BetweenAttributes; this.DecrementPosition(); continue; } continue; case HtmlDocument.ParseState.AttributeAfterEquals: if (!this.NewCheck() && !HtmlDocument.IsWhiteSpace(this._c)) { if (this._c == 39 || this._c == 34) { this._state = HtmlDocument.ParseState.QuotedAttributeValue; this.PushAttributeValueStart(this._index, this._c); num = this._c; continue; } if (this._c == 62) { if (!this.PushNodeEnd(this._index, false)) { this._index = this.Text.Length; continue; } if (this._state == HtmlDocument.ParseState.AttributeAfterEquals) { this._state = HtmlDocument.ParseState.Text; this.PushNodeStart(HtmlNodeType.Text, this._index); continue; } continue; } this.PushAttributeValueStart(this._index - 1); this._state = HtmlDocument.ParseState.AttributeValue; continue; } continue; case HtmlDocument.ParseState.AttributeValue: if (!this.NewCheck()) { if (HtmlDocument.IsWhiteSpace(this._c)) { this.PushAttributeValueEnd(this._index - 1); this._state = HtmlDocument.ParseState.BetweenAttributes; continue; } if (this._c == 62) { this.PushAttributeValueEnd(this._index - 1); if (!this.PushNodeEnd(this._index, false)) { this._index = this.Text.Length; continue; } if (this._state == HtmlDocument.ParseState.AttributeValue) { this._state = HtmlDocument.ParseState.Text; this.PushNodeStart(HtmlNodeType.Text, this._index); continue; } continue; } continue; } continue; case HtmlDocument.ParseState.Comment: if (this._c == 62 && (!this._fullcomment || (int) this.Text[this._index - 2] == 45 && (int) this.Text[this._index - 3] == 45)) { if (!this.PushNodeEnd(this._index, false)) { this._index = this.Text.Length; continue; } this._state = HtmlDocument.ParseState.Text; this.PushNodeStart(HtmlNodeType.Text, this._index); continue; } continue; case HtmlDocument.ParseState.QuotedAttributeValue: if (this._c == num) { this.PushAttributeValueEnd(this._index - 1); this._state = HtmlDocument.ParseState.BetweenAttributes; continue; } if (this._c == 60 && this._index < this.Text.Length && (int) this.Text[this._index] == 37) { this._oldstate = this._state; this._state = HtmlDocument.ParseState.ServerSideCode; continue; } continue; case HtmlDocument.ParseState.ServerSideCode: if (this._c == 37 && this._index < this.Text.Length && (int) this.Text[this._index] == 62) { switch (this._oldstate) { case HtmlDocument.ParseState.BetweenAttributes: this.PushAttributeNameEnd(this._index + 1); this._state = HtmlDocument.ParseState.BetweenAttributes; break; case HtmlDocument.ParseState.AttributeAfterEquals: this._state = HtmlDocument.ParseState.AttributeValue; break; default: this._state = this._oldstate; break; } this.IncrementPosition(); continue; } continue; case HtmlDocument.ParseState.PcData: if (this._currentnode._namelength + 3 <= this.Text.Length - (this._index - 1) && string.Compare(this.Text.Substring(this._index - 1, this._currentnode._namelength + 2), "</" + this._currentnode.Name, StringComparison.OrdinalIgnoreCase) == 0) { int c = (int) this.Text[this._index - 1 + 2 + this._currentnode.Name.Length]; if (c == 62 || HtmlDocument.IsWhiteSpace(c)) { HtmlNode node = this.CreateNode(HtmlNodeType.Text, this._currentnode._outerstartindex + this._currentnode._outerlength); node._outerlength = this._index - 1 - node._outerstartindex; this._currentnode.AppendChild(node); this.PushNodeStart(HtmlNodeType.Element, this._index - 1); this.PushNodeNameStart(false, this._index - 1 + 2); this._state = HtmlDocument.ParseState.Tag; this.IncrementPosition(); continue; } continue; } continue; default: continue; } } if (this._currentnode._namestartindex > 0) this.PushNodeNameEnd(this._index); this.PushNodeEnd(this._index, false); this.Lastnodes.Clear(); }
private void ParseVocabAttribute(RdfAParserContext context, RdfAEvaluationContext evalContext, HtmlAttribute attr) { if (attr.Value.Equals(String.Empty)) { //Reset Local Vocabulary evalContext.LocalVocabulary = new TermMappings(context.DefaultVocabulary); } else { evalContext.LocalVocabulary.VocabularyUri = attr.Value; } }
private bool ExtendLastAttributeValue(int index, bool close) { System.Diagnostics.Debug.Assert(_currentnode != null && _currentnode.Attributes.Count > 0); var lastattribute = _currentattribute; // find an attribute with a value to be extended var attrs = _currentnode.Attributes; int candidate = -1; for (int i = attrs.Count - 1; i >= 0; i--) if (attrs[i]._valuelength > 0 || attrs[i]._valuestartindex > (attrs[i]._namestartindex + attrs[i]._namelength)) // an attribute with a value specified { candidate = i; break; } if (candidate < 0) return false; // no candidate to be extended // set new last attribute _currentattribute = attrs[candidate]; while (attrs.Count > candidate + 1) attrs.RemoveAt(attrs.Count - 1); // if (close) { PushAttributeValueEnd(index); _state = ParseState.BetweenAttributes; } else { _currentattribute._valuelength = 0; _state = ParseState.QuotedAttributeValue; } return true; }
/// <summary> /// Construct a HTML element attribute wrapper object. /// </summary> /// <param name="attribute"></param> protected HtmlAttributeBase(HtmlAgilityPack.HtmlAttribute attribute) { this.attribute = attribute; }
internal int GetAttributeIndex(HtmlAttribute attribute) { if (attribute == null) { throw new ArgumentNullException("attribute"); } for (int i = 0; i < _items.Count; i++) { if (((HtmlAttribute)_items[i]) == attribute) return i; } return -1; }
private bool IsStaticResource(HtmlNode node, HtmlAttribute attr) { var nodeName = node.Name.ToLower(); if (nodeName == "link" || nodeName == "script" || nodeName == "img") { return true; } var attValue = (attr.Value ?? "").ToLower(); if (attr.Value.StartsWith("#") || attr.Value.StartsWith("javascript:")) { return true; } if (attValue.EndsWith(".js") || attValue.EndsWith(".css") || attValue.EndsWith(".png") || attValue.EndsWith(".jpg") || attValue.EndsWith(".jpeg") || attValue.EndsWith(".ico") || attValue.EndsWith(".gif")) { return true; } return false; }
public static IHtmlAttribute AsAttribute(this AP.HtmlAttribute attribute) { return(new HtmlAttributeAdapter(attribute)); }
private bool IsStaticResource(HtmlNode node, HtmlAttribute attr) { var nodeName = node.Name.ToLower(); if (nodeName == "link" || nodeName == "script" || nodeName == "img") { return true; } var extension = Path.GetExtension(attr.Value).ToLower(); if (extension == ".js" || extension == ".css" || extension == ".png" || extension == ".jpg" || extension == ".jpeg" || extension == ".ico" || extension == ".gif") { return true; } return false; }
private bool ParseProfileAttribute(RdfAParserContext context, RdfAEvaluationContext evalContext, HtmlAttribute attr) { String[] profiles; if (attr.Value.Contains(" ")) { profiles = attr.Value.Split(' '); } else { profiles = new String[] { attr.Value }; } String prefixQuery = "PREFIX rdfa: <" + RdfANamespace + "> SELECT SAMPLE(?prefix) AS ?NamespacePrefix SAMPLE(?uri) AS ?NamespaceURI WHERE { ?s rdfa:prefix ?prefix ; rdfa:uri ?uri } GROUP BY ?s HAVING (COUNT(?prefix) = 1 && COUNT(?uri) = 1)"; String termQuery = "PREFIX rdfa: <" + RdfANamespace + "> SELECT SAMPLE(?term) AS ?Term SAMPLE(?uri) AS ?URI WHERE {?s rdfa:term ?term ; rdfa:uri ?uri } GROUP BY ?s HAVING (COUNT(?term) = 1 && COUNT(?uri) = 1)"; foreach (String profile in profiles) { try { Graph g = new Graph(); if (profile.Equals(XHtmlVocabNamespace) || profile.Equals(XHtmlVocabNamespace.Substring(0, XHtmlVocabNamespace.Length-1))) { //XHTML Vocabulary is a fixed vocabulary evalContext.LocalVocabulary.Merge(new XHtmlRdfAVocabulary()); } else { try { UriLoader.Load(g, new Uri(profile)); } catch { //If we fail then we return false which indicates that the DOM subtree is ignored this.OnWarning("Unable to retrieve a Profile document which the library could parse from the URI '" + profile + "'"); return false; } //Namespace Mappings Object results = g.ExecuteQuery(prefixQuery); if (results is SparqlResultSet) { SparqlResultSet rset = (SparqlResultSet)results; foreach (SparqlResult r in rset.Results) { INode prefixNode = r["NamespacePrefix"]; INode nsNode = r["NamespaceURI"]; if (prefixNode.NodeType == NodeType.Literal && nsNode.NodeType == NodeType.Literal) { String prefix = ((ILiteralNode)prefixNode).Value.ToLower(); String ns = ((ILiteralNode)nsNode).Value; evalContext.LocalVocabulary.AddNamespace(prefix, ns); } } } //Term Mappings results = g.ExecuteQuery(termQuery); if (results is SparqlResultSet) { SparqlResultSet rset = (SparqlResultSet)results; foreach (SparqlResult r in rset.Results) { INode termNode = r["Term"]; INode uriNode = r["URI"]; if (termNode.NodeType == NodeType.Literal && uriNode.NodeType == NodeType.Literal) { String term = ((ILiteralNode)termNode).Value; String uri = ((ILiteralNode)uriNode).Value; evalContext.LocalVocabulary.AddTerm(term, uri); } } } } } catch { //Ignore errors and continue processing this.OnWarning("Ignoring the value '" + profile + "' since this is not a valid URI or a profile document was not successfully retrieved and parsed from this URI"); return false; } } return true; }
private static string ExtractMake(HtmlAttribute htmlAttribute) { if (htmlAttribute != null) return htmlAttribute.Value.Replace(@"/usedcar/", string.Empty).Replace(@"/index.html", string.Empty).ToUpper(); return string.Empty; }
internal void WriteAttribute(TextWriter outText, HtmlAttribute att) { string name; string quote = att.QuoteType == AttributeValueQuote.DoubleQuote ? "\"" : "'"; if (_ownerdocument.OptionOutputAsXml) { if (_ownerdocument.OptionOutputOriginalCase) name = att.OriginalName; else name = _ownerdocument.OptionOutputUpperCase ? att.XmlName.ToUpper() : att.XmlName; if (!string.IsNullOrEmpty(name)) { char firstChar = name[0]; if (firstChar == '-' || (firstChar >= '0' && firstChar <= '9')) // an invalid XML attribute name name = '_' + name; // // fix such attribute name by prepending '_' } outText.Write(" " + name + "=" + quote + HtmlDocument.HtmlEncode(att.XmlValue) + quote); } else { if (_ownerdocument.OptionOutputUpperCase) { name = att.Name.ToUpper(); } else { name = att.Name; } if (att.Name.Length >= 4) { if ((att.Name[0] == '<') && (att.Name[1] == '%') && (att.Name[att.Name.Length - 1] == '>') && (att.Name[att.Name.Length - 2] == '%')) { outText.Write(" " + name); return; } } if (_ownerdocument.OptionOutputOptimizeAttributeValues) { if (att.Value.IndexOfAny(new Char[] {(char) 10, (char) 13, (char) 9, ' '}) < 0) { outText.Write(" " + name + "=" + att.Value); } else { outText.Write(" " + name + "=" + quote + att.Value + quote); } } else { outText.Write(" " + name + "=" + quote + att.Value + quote); } } }
/// <summary> /// Creates a duplicate of this attribute. /// </summary> /// <returns>The cloned attribute.</returns> public HtmlAttribute Clone() { var att = new HtmlAttribute(_ownerdocument) { Name = Name, Value = Value }; return att; }
internal void WriteAttribute(TextWriter outText, HtmlAttribute att) { string name; string quote = att.QuoteType == AttributeValueQuote.DoubleQuote ? "\"" : "'"; if (_ownerdocument.OptionOutputAsXml) { name = _ownerdocument.OptionOutputUpperCase ? att.XmlName.ToUpper() : att.XmlName; if (_ownerdocument.OptionOutputOriginalCase) name = att.OriginalName; outText.Write(" " + name + "=" + quote + HtmlDocument.HtmlEncode(att.XmlValue) + quote); } else { name = _ownerdocument.OptionOutputUpperCase ? att.Name.ToUpper() : att.Name; if (att.Name.Length >= 4) { if ((att.Name[0] == '<') && (att.Name[1] == '%') && (att.Name[att.Name.Length - 1] == '>') && (att.Name[att.Name.Length - 2] == '%')) { outText.Write(" " + name); return; } } if (_ownerdocument.OptionOutputOptimizeAttributeValues) if (att.Value.IndexOfAny(new[] {(char) 10, (char) 13, (char) 9, ' '}) < 0) outText.Write(" " + name + "=" + att.Value); else outText.Write(" " + name + "=" + quote + att.Value + quote); else outText.Write(" " + name + "=" + quote + att.Value + quote); } }
internal HtmlAttributeAdapter( AP.HtmlAttribute attribute ) { _attribute = attribute; }
private void PushAttributeNameStart(int index) { _currentattribute = CreateAttribute(); _currentattribute._namestartindex = index; _currentattribute.Line = _line; _currentattribute._lineposition = _lineposition; _currentattribute._streamposition = index; }
public static HtmlAttribute ValueShouldContain(this HtmlAttribute a, string expectedSubstring) { a.Value.ShouldContain(expectedSubstring); return(a); }