/// <summary> /// IHTMLDOMNode から HtmlNode を作成 /// </summary> /// <param name="node"></param> /// <returns></returns> protected HtmlNode LoadHtmlNode(IHTMLDOMNode node) { var nn = new HtmlNode(node.nodeName, node.nodeValue.ToString()); if (nn.TagName == "#text") { return(nn); } if (nn.TagName == "#comment") { // append comment tag. nn.TagName = "comment"; string v = System.Web.HttpUtility.HtmlEncode(nn.Value); nn.Children.Add(new HtmlNode("#text", v)); nn.Value = v; return(nn); } // append attributes IHTMLAttributeCollection attrs = node.attributes; if (attrs != null) { foreach (IHTMLDOMAttribute at in attrs) { if (at.specified) { string nodeValue = ""; if (at.nodeValue != null) { nodeValue = at.nodeValue.ToString(); } nn.Attrs.Add(new HtmlAttr { Key = at.nodeName, Value = nodeValue }); } } } var col = node.childNodes as IHTMLDOMChildrenCollection; if (col != null) { foreach (IHTMLDOMNode nd in col) { HtmlNode el = LoadHtmlNode(nd); el.Parent = nn; nn.Children.Add(el); } if (nn.Children.Count > 0 && nn.Children[0].TagName == "#text") { nn.Value = nn.Children[0].Value; } if (nn.Children.Count > 0 && nn.Children[0].TagName == "#comment") { nn.Value = nn.Children[0].Value; } } return(nn); }
/// <summary> /// Creates a name-value collection of available attributes /// </summary> /// <param name="element">element to evaluate</param> /// <returns>NameValueCollection of available attributes actually listed in the HTML</returns> public NameValueCollection GetAvailableAttributes(IHTMLElement element) { var nvcAvailableAttributes = new NameValueCollection(); IHTMLAttributeCollection elementAttributes = ((IHTMLDOMNode)element).attributes; foreach (IHTMLDOMAttribute attribute in elementAttributes) { // only get the attributes that are really part of the HTML if (!element.outerHTML.Contains(attribute.nodeName + "=")) { continue; } string value = attribute.nodeValue != null?attribute.nodeValue.ToString() : null; if (string.IsNullOrEmpty(value)) { continue; } nvcAvailableAttributes.Add(attribute.nodeName, value); if (!_localPattern.Contains(attribute.nodeName) && !attribute.nodeName.Contains("-")) { _localPattern.Add(attribute.nodeName); } } if (element.innerText != null) { _localPattern.Add("Text"); nvcAvailableAttributes.Add("Text", element.innerText); } return(nvcAvailableAttributes); }
/// <summary> /// 要素に対して明示的に指定された属性を取得する /// </summary> /// <remarks> /// HtmlElementが隠ぺいしているMSHTML内のインターフェースから直接取得するため、 /// HtmlElementでは取得できない属性も含まれる可能性がある。 /// HtmlElementから取得できるものだけを取得したい場合はExtractUsableAttributesを使用する。 /// </remarks> /// <param name="sourceElement">属性を取り出したい要素</param> /// <returns>属性名と属性値のペアリスト</returns> public static Dictionary <string, string> ExtractAttributes(HtmlElement sourceElement) { Dictionary <string, string> extractedAttributes = new Dictionary <string, string>(); if (sourceElement == null) { throw new ArgumentNullException("nullが指定されています"); } else if (sourceElement.TagName == "!" || sourceElement.TagName == "?") { // attributesコレクションを取り出せなくて例外が発生するため // 別途処理してしまう。 return(extractedAttributes); } else { IHTMLElement2 domElement = (IHTMLElement2)sourceElement.DomElement; IHTMLDOMNode node = (IHTMLDOMNode)domElement; IHTMLAttributeCollection attributes = (IHTMLAttributeCollection)node.attributes; foreach (IHTMLDOMAttribute attribute in attributes) { if (attribute.specified) { extractedAttributes.Add(attribute.nodeName.ToUpper(), attribute.nodeValue.ToString()); } } return(extractedAttributes); } }
/// <summary> /// We don't want to prune elements that have class, style, id, or event attributes. /// It seems like if the author went through the trouble to put these attributes /// on, we shouldn't trim. (Maybe we should even keep any element with any attributes?) /// </summary> private static bool HasInterestingAttributes(IHTMLDOMNode node) { IHTMLAttributeCollection attrs = node.attributes as IHTMLAttributeCollection; if (attrs != null) { foreach (IHTMLDOMAttribute attr in attrs) { if (attr.specified) { string attrName = attr.nodeName as string; if (attrName != null) { attrName = attrName.ToUpperInvariant(); switch (attrName) { case "CLASSNAME": case "CLASS": case "STYLE": case "ID": return(true); } return(attrName.StartsWith("on", StringComparison.OrdinalIgnoreCase)); } } } } return(false); }
private void InitialiseAttributeEnumerator() { IHTMLAttributeCollection collection = (IHTMLAttributeCollection)node.attributes; if (collection != null) { attributeEnumerator = collection.GetEnumerator(); } }
private void BuildAttributeDictionary(IHTMLElement htmlElem) { IHTMLDOMNode htmlNode = (IHTMLDOMNode)htmlElem; IHTMLAttributeCollection attrCollection = (IHTMLAttributeCollection)htmlNode.attributes; for (int i = 0; i < attrCollection.length; ++i) { Object crntIndex = i; IHTMLDOMAttribute crntAttribute = (IHTMLDOMAttribute)attrCollection.item(ref crntIndex); String nodeName = ((String)(crntAttribute.nodeName)).ToLower(); if (nodeName != CatStudioConstants.HOOKED_BY_REC_ATTR) { if ((nodeName == "src") || (nodeName == "href") || (nodeName == "id") || (nodeName == "name") || (nodeName == "class") || (nodeName == "alt") || (nodeName == "title") || (nodeName == "action") || (nodeName == "for") || (nodeName == "value")) { String nodeValue = crntAttribute.nodeValue as String; if (nodeValue != null) { this.attributeMap.Add(nodeName, nodeValue); } } } } // Add "uiName" pseudo-attribute to dictionary. IElement twbstElem = this.browser.core.AttachToNativeElement(htmlElem); String textAttr = twbstElem.uiName.Trim(); // Remove blanks from start/end of the text. // Skip too long texts or empty strings. if (!String.IsNullOrEmpty(textAttr) && (textAttr.Length <= CatStudioConstants.MAX_TEXT_ATTR_LEN_TO_RECORD)) { this.attributeMap.Add("uiname", textAttr); } // Add innerText for Watir recorder. String innerText = htmlElem.innerText; if (!String.IsNullOrEmpty(innerText) && (innerText.Length <= CatStudioConstants.MAX_TEXT_ATTR_LEN_TO_RECORD)) { this.attributeMap.Add("innertext", innerText); } }
public override bool MoveToFirstAttribute() { IHTMLAttributeCollection collection = (IHTMLAttributeCollection)CurrentNode.attributes; if (collection == null) { return(false); } attrs = new AttributeNodes(CurrentNode); if (!attrs.MoveNext()) { attrs = null; return(false); } return(true); }
internal HtmlAttributeCollection(IHTMLAttributeCollection collection) { _collection = collection; }
/// <summary> /// Utility for properly printing the start tag for an element. /// This utility takes care of including/suppressing attributes and namespaces properly. /// </summary> /// <param name="writer"></param> /// <param name="element"></param> private static void printElementStart(HtmlWriter writer, IHTMLElement element) { string tagName = element.tagName; // If there is no tag name, this is mostly an artificial tag reported by mshtml, // and not really present in the markup // (e.g HTMLTableCaptionClass) if (string.IsNullOrEmpty(tagName)) { return; } //XHTML tags are all lowercase tagName = tagName.ToLower(CultureInfo.InvariantCulture); //this is a standard HTML tag, so just write it out. writer.WriteStartElement(tagName); IHTMLDOMNode node = element as IHTMLDOMNode; IHTMLAttributeCollection attrs = node.attributes as IHTMLAttributeCollection; if (attrs != null) { foreach (IHTMLDOMAttribute attr in attrs) { string attrName = attr.nodeName as string; if (attr.specified) { string attrNameLower = attrName.ToLower(CultureInfo.InvariantCulture); //get the raw attribute value (so that IE doesn't try to expand out paths in the value). string attrValue = element.getAttribute(attrName, 2) as string; if (attrValue == null) { //IE won't return some attributes (like class) using IHTMLElement.getAttribute(), //so if the value is null, try to get the value directly from the DOM Attribute. //Note: we can't use the DOM value by default, because IE will rewrite the value //to contain a fully-qualified path on some attributes (like src and href). attrValue = attr.nodeValue as string; if (attrValue == null) { if ((attrNameLower == "hspace" || attrNameLower == "vspace") && attr.nodeValue is int) { attrValue = ((int)attr.nodeValue).ToString(CultureInfo.InvariantCulture); } else if (attrNameLower == "style") { //Avoid bug: Images that are resized with the editor insert a STYLE attribute. //IE won't return the style attribute using the standard API, so we have to grab //it from the style object attrValue = element.style.cssText; } else if (attrNameLower == "colspan") { attrValue = (element as IHTMLTableCell).colSpan.ToString(CultureInfo.InvariantCulture); } else if (attrNameLower == "rowspan") { attrValue = (element as IHTMLTableCell).rowSpan.ToString(CultureInfo.InvariantCulture); } else if (attrNameLower == "align" && attr.nodeValue is int) { // This is not documented anywhere. Just discovered the values empirically on IE7 (Vista). switch ((int)attr.nodeValue) { case 1: attrValue = "left"; break; case 2: attrValue = "center"; break; case 3: attrValue = "right"; break; case 4: attrValue = "texttop"; break; case 5: attrValue = "absmiddle"; break; case 6: attrValue = "baseline"; break; case 7: attrValue = "absbottom"; break; case 8: attrValue = "bottom"; break; case 9: attrValue = "middle"; break; case 10: attrValue = "top"; break; } } } Debug.WriteLineIf(attrValue != null && attrName != "id", String.Format(CultureInfo.InvariantCulture, "{0}.{1} attribute value not retreived", tagName, attrName), element.outerHTML); } // Minimized attributes are not allowed, according // to section 4.5 of XHTML 1.0 specification. // TODO: Deal with simple values that are not strings if (attrValue == null && attrNameLower != "id") { attrValue = attrName; } if (attrName != null && attrValue != null) { //write out this attribute. writer.WriteAttributeString(attrName, attrValue); } } } } }
/// <summary> /// Walk the supplied HTML DOM node (recursively) and add its contents into the /// supplied page using the supplied TextBlockBuilder. /// </summary> /// <remarks>When this routine is done there may be some residual text still in /// tbBuilder. The caller is resonsible for checking this and adding it to the /// page if present.</remarks> /// <param name="node">The HTML DOM node to recursively walk.</param> /// <param name="tbBuilder">The TextBlockBuilder to put the text into.</param> private TextBlockBuilder ParseDomNode(IHTMLDOMNode node, TextBlockBuilder tbBuilder) { TagType tagType = GetTagType(node.nodeName); switch (tagType) { case TagType.IMG: // Before we add the image, see if we need to write the text object first if (tbBuilder.HasText) { // Yes it has tbBuilder.Append(TagId.EOL); FlushTextToBlock(m_CurrentPage, tbBuilder, m_MainBodyTextAttr); tbBuilder = new TextBlockBuilder(GetNextObjId(), m_CharMapper); } IHTMLAttributeCollection attribs = (IHTMLAttributeCollection)node.attributes; object name = "src"; string src = ((IHTMLDOMAttribute)attribs.item(ref name)).nodeValue.ToString(); name = "height"; string height = ((IHTMLDOMAttribute)attribs.item(ref name)).nodeValue.ToString(); name = "width"; string width = ((IHTMLDOMAttribute)attribs.item(ref name)).nodeValue.ToString(); addPageImage(m_CurrentPage, src, ushort.Parse(width), ushort.Parse(height)); break; case TagType.text: AppendTextToBlock((string)node.nodeValue, tbBuilder); break; case TagType.I: tbBuilder.Append(TagId.ItalicBegin); break; case TagType.B: tbBuilder.Append(TagId.FontWeight, LegacyBBeB.k_BoldFontWeight); break; case TagType.SUP: tbBuilder.Append(TagId.BeginSup); break; case TagType.SUB: tbBuilder.Append(TagId.BeginSub); break; case TagType.H1: case TagType.H2: case TagType.H3: case TagType.H4: case TagType.H5: case TagType.H6: FlushTextToBlock(m_CurrentPage, tbBuilder, m_MainBodyTextAttr); tbBuilder = new TextBlockBuilder(GetNextObjId(), m_CharMapper); if (GetHeadingLevel(tagType) <= GetHeadingLevel(m_eNewPageHeadingFilter)) { if (m_CurrentPage.Children.Count > 0) // If current page not empty { // Start a new page finalizePage(m_CurrentPage); m_CurrentPage = createPage(); addBookPage(m_CurrentPage); } } m_HeadingNodePageId[node] = m_CurrentPage.ID; m_TextObjectIdHeadingNode[tbBuilder.TextObjectId] = node; tbBuilder.Append(TagId.FontSize, GetHeadingFontSize(tagType)); break; } if (node.hasChildNodes()) { IHTMLDOMChildrenCollection childNodes = (IHTMLDOMChildrenCollection)node.childNodes; foreach (IHTMLDOMNode child in childNodes) { tbBuilder = ParseDomNode(child, tbBuilder); } } switch (tagType) { case TagType.I: tbBuilder.Append(TagId.ItalicEnd); break; case TagType.B: tbBuilder.Append(TagId.FontWeight, LegacyBBeB.k_NormalFontWeight); break; case TagType.SUP: tbBuilder.Append(TagId.EndSup); break; case TagType.SUB: tbBuilder.Append(TagId.EndSub); break; case TagType.P: tbBuilder.Append(TagId.EOL); tbBuilder.Append(TagId.EOL); break; case TagType.H1: case TagType.H2: case TagType.H3: case TagType.H4: case TagType.H5: case TagType.H6: tbBuilder.Append(TagId.FontSize, LegacyBBeB.DefaultFontSize); FlushTextToBlock(m_CurrentPage, tbBuilder, m_MainBodyTextAttr); tbBuilder = new TextBlockBuilder(GetNextObjId(), m_CharMapper); break; case TagType.BR: tbBuilder.Append(TagId.EOL); break; } return(tbBuilder); }
public AttributeCollection( ElementAdapter element ) { _attributes = element._node.attributes as IHTMLAttributeCollection; _attributes2 = element._node.attributes as IHTMLAttributeCollection2; }