Exemple #1
0
        /// <summary>
        /// IHTMLDOMNode から HtmlNode を作成
        /// </summary>
        /// <param name="node"></param>
        /// <returns></returns>
        protected HtmlNode LoadHtmlNode(IHTMLDOMNode node)
        {
            var nn = new HtmlNode(node.nodeName, node.nodeValue.ToString());

            if (nn.TagName == "#text")
            {
                return(nn);
            }
            if (nn.TagName == "#comment")
            {
                // append comment tag.
                nn.TagName = "comment";
                string v = System.Web.HttpUtility.HtmlEncode(nn.Value);
                nn.Children.Add(new HtmlNode("#text", v));
                nn.Value = v;
                return(nn);
            }

            // append attributes
            IHTMLAttributeCollection attrs = node.attributes;

            if (attrs != null)
            {
                foreach (IHTMLDOMAttribute at in attrs)
                {
                    if (at.specified)
                    {
                        string nodeValue = "";
                        if (at.nodeValue != null)
                        {
                            nodeValue = at.nodeValue.ToString();
                        }
                        nn.Attrs.Add(new HtmlAttr {
                            Key = at.nodeName, Value = nodeValue
                        });
                    }
                }
            }

            var col = node.childNodes as IHTMLDOMChildrenCollection;

            if (col != null)
            {
                foreach (IHTMLDOMNode nd in col)
                {
                    HtmlNode el = LoadHtmlNode(nd);
                    el.Parent = nn;
                    nn.Children.Add(el);
                }
                if (nn.Children.Count > 0 && nn.Children[0].TagName == "#text")
                {
                    nn.Value = nn.Children[0].Value;
                }
                if (nn.Children.Count > 0 && nn.Children[0].TagName == "#comment")
                {
                    nn.Value = nn.Children[0].Value;
                }
            }
            return(nn);
        }
        /// <summary>
        /// Creates a name-value collection of available attributes
        /// </summary>
        /// <param name="element">element to evaluate</param>
        /// <returns>NameValueCollection of available attributes actually listed in the HTML</returns>
        public NameValueCollection GetAvailableAttributes(IHTMLElement element)
        {
            var nvcAvailableAttributes = new NameValueCollection();
            IHTMLAttributeCollection elementAttributes = ((IHTMLDOMNode)element).attributes;

            foreach (IHTMLDOMAttribute attribute in elementAttributes)
            {
                // only get the attributes that are really part of the HTML
                if (!element.outerHTML.Contains(attribute.nodeName + "="))
                {
                    continue;
                }
                string value = attribute.nodeValue != null?attribute.nodeValue.ToString() : null;

                if (string.IsNullOrEmpty(value))
                {
                    continue;
                }
                nvcAvailableAttributes.Add(attribute.nodeName, value);
                if (!_localPattern.Contains(attribute.nodeName) &&
                    !attribute.nodeName.Contains("-"))
                {
                    _localPattern.Add(attribute.nodeName);
                }
            }

            if (element.innerText != null)
            {
                _localPattern.Add("Text");
                nvcAvailableAttributes.Add("Text", element.innerText);
            }

            return(nvcAvailableAttributes);
        }
        /// <summary>
        /// 要素に対して明示的に指定された属性を取得する
        /// </summary>
        /// <remarks>
        /// HtmlElementが隠ぺいしているMSHTML内のインターフェースから直接取得するため、
        /// HtmlElementでは取得できない属性も含まれる可能性がある。
        /// HtmlElementから取得できるものだけを取得したい場合はExtractUsableAttributesを使用する。
        /// </remarks>
        /// <param name="sourceElement">属性を取り出したい要素</param>
        /// <returns>属性名と属性値のペアリスト</returns>
        public static Dictionary <string, string> ExtractAttributes(HtmlElement sourceElement)
        {
            Dictionary <string, string> extractedAttributes = new Dictionary <string, string>();

            if (sourceElement == null)
            {
                throw new ArgumentNullException("nullが指定されています");
            }
            else if (sourceElement.TagName == "!" || sourceElement.TagName == "?")
            {
                // attributesコレクションを取り出せなくて例外が発生するため
                // 別途処理してしまう。
                return(extractedAttributes);
            }
            else
            {
                IHTMLElement2            domElement = (IHTMLElement2)sourceElement.DomElement;
                IHTMLDOMNode             node       = (IHTMLDOMNode)domElement;
                IHTMLAttributeCollection attributes = (IHTMLAttributeCollection)node.attributes;

                foreach (IHTMLDOMAttribute attribute in attributes)
                {
                    if (attribute.specified)
                    {
                        extractedAttributes.Add(attribute.nodeName.ToUpper(), attribute.nodeValue.ToString());
                    }
                }

                return(extractedAttributes);
            }
        }
Exemple #4
0
        /// <summary>
        /// We don't want to prune elements that have class, style, id, or event attributes.
        /// It seems like if the author went through the trouble to put these attributes
        /// on, we shouldn't trim.  (Maybe we should even keep any element with any attributes?)
        /// </summary>
        private static bool HasInterestingAttributes(IHTMLDOMNode node)
        {
            IHTMLAttributeCollection attrs = node.attributes as IHTMLAttributeCollection;

            if (attrs != null)
            {
                foreach (IHTMLDOMAttribute attr in attrs)
                {
                    if (attr.specified)
                    {
                        string attrName = attr.nodeName as string;
                        if (attrName != null)
                        {
                            attrName = attrName.ToUpperInvariant();
                            switch (attrName)
                            {
                            case "CLASSNAME":
                            case "CLASS":
                            case "STYLE":
                            case "ID":
                                return(true);
                            }
                            return(attrName.StartsWith("on", StringComparison.OrdinalIgnoreCase));
                        }
                    }
                }
            }
            return(false);
        }
Exemple #5
0
        private void InitialiseAttributeEnumerator()
        {
            IHTMLAttributeCollection collection = (IHTMLAttributeCollection)node.attributes;

            if (collection != null)
            {
                attributeEnumerator = collection.GetEnumerator();
            }
        }
Exemple #6
0
        private void BuildAttributeDictionary(IHTMLElement htmlElem)
        {
            IHTMLDOMNode             htmlNode       = (IHTMLDOMNode)htmlElem;
            IHTMLAttributeCollection attrCollection = (IHTMLAttributeCollection)htmlNode.attributes;

            for (int i = 0; i < attrCollection.length; ++i)
            {
                Object            crntIndex     = i;
                IHTMLDOMAttribute crntAttribute = (IHTMLDOMAttribute)attrCollection.item(ref crntIndex);

                String nodeName = ((String)(crntAttribute.nodeName)).ToLower();
                if (nodeName != CatStudioConstants.HOOKED_BY_REC_ATTR)
                {
                    if ((nodeName == "src") || (nodeName == "href") ||
                        (nodeName == "id") || (nodeName == "name") ||
                        (nodeName == "class") || (nodeName == "alt") ||
                        (nodeName == "title") || (nodeName == "action") ||
                        (nodeName == "for") || (nodeName == "value"))
                    {
                        String nodeValue = crntAttribute.nodeValue as String;
                        if (nodeValue != null)
                        {
                            this.attributeMap.Add(nodeName, nodeValue);
                        }
                    }
                }
            }

            // Add "uiName" pseudo-attribute to dictionary.
            IElement twbstElem = this.browser.core.AttachToNativeElement(htmlElem);
            String   textAttr  = twbstElem.uiName.Trim(); // Remove blanks from start/end of the text.

            // Skip too long texts or empty strings.
            if (!String.IsNullOrEmpty(textAttr) && (textAttr.Length <= CatStudioConstants.MAX_TEXT_ATTR_LEN_TO_RECORD))
            {
                this.attributeMap.Add("uiname", textAttr);
            }

            // Add innerText for Watir recorder.
            String innerText = htmlElem.innerText;

            if (!String.IsNullOrEmpty(innerText) && (innerText.Length <= CatStudioConstants.MAX_TEXT_ATTR_LEN_TO_RECORD))
            {
                this.attributeMap.Add("innertext", innerText);
            }
        }
        public override bool MoveToFirstAttribute()
        {
            IHTMLAttributeCollection collection = (IHTMLAttributeCollection)CurrentNode.attributes;

            if (collection == null)
            {
                return(false);
            }

            attrs = new AttributeNodes(CurrentNode);
            if (!attrs.MoveNext())
            {
                attrs = null;
                return(false);
            }
            return(true);
        }
Exemple #8
0
 internal HtmlAttributeCollection(IHTMLAttributeCollection collection)
 {
     _collection = collection;
 }
Exemple #9
0
        /// <summary>
        /// Utility for properly printing the start tag for an element.
        /// This utility takes care of including/suppressing attributes and namespaces properly.
        /// </summary>
        /// <param name="writer"></param>
        /// <param name="element"></param>
        private static void printElementStart(HtmlWriter writer, IHTMLElement element)
        {
            string tagName = element.tagName;

            // If there is no tag name, this is mostly an artificial tag reported by mshtml,
            // and not really present in the markup
            // (e.g HTMLTableCaptionClass)
            if (string.IsNullOrEmpty(tagName))
            {
                return;
            }

            //XHTML tags are all lowercase
            tagName = tagName.ToLower(CultureInfo.InvariantCulture);
            //this is a standard HTML tag, so just write it out.
            writer.WriteStartElement(tagName);

            IHTMLDOMNode             node  = element as IHTMLDOMNode;
            IHTMLAttributeCollection attrs = node.attributes as IHTMLAttributeCollection;

            if (attrs != null)
            {
                foreach (IHTMLDOMAttribute attr in attrs)
                {
                    string attrName = attr.nodeName as string;
                    if (attr.specified)
                    {
                        string attrNameLower = attrName.ToLower(CultureInfo.InvariantCulture);

                        //get the raw attribute value (so that IE doesn't try to expand out paths in the value).
                        string attrValue = element.getAttribute(attrName, 2) as string;
                        if (attrValue == null)
                        {
                            //IE won't return some attributes (like class) using IHTMLElement.getAttribute(),
                            //so if the value is null, try to get the value directly from the DOM Attribute.
                            //Note: we can't use the DOM value by default, because IE will rewrite the value
                            //to contain a fully-qualified path on some attributes (like src and href).
                            attrValue = attr.nodeValue as string;

                            if (attrValue == null)
                            {
                                if ((attrNameLower == "hspace" || attrNameLower == "vspace") && attr.nodeValue is int)
                                {
                                    attrValue = ((int)attr.nodeValue).ToString(CultureInfo.InvariantCulture);
                                }
                                else if (attrNameLower == "style")
                                {
                                    //Avoid bug: Images that are resized with the editor insert a STYLE attribute.
                                    //IE won't return the style attribute using the standard API, so we have to grab
                                    //it from the style object
                                    attrValue = element.style.cssText;
                                }
                                else if (attrNameLower == "colspan")
                                {
                                    attrValue = (element as IHTMLTableCell).colSpan.ToString(CultureInfo.InvariantCulture);
                                }
                                else if (attrNameLower == "rowspan")
                                {
                                    attrValue = (element as IHTMLTableCell).rowSpan.ToString(CultureInfo.InvariantCulture);
                                }
                                else if (attrNameLower == "align" && attr.nodeValue is int)
                                {
                                    // This is not documented anywhere. Just discovered the values empirically on IE7 (Vista).
                                    switch ((int)attr.nodeValue)
                                    {
                                    case 1:
                                        attrValue = "left";
                                        break;

                                    case 2:
                                        attrValue = "center";
                                        break;

                                    case 3:
                                        attrValue = "right";
                                        break;

                                    case 4:
                                        attrValue = "texttop";
                                        break;

                                    case 5:
                                        attrValue = "absmiddle";
                                        break;

                                    case 6:
                                        attrValue = "baseline";
                                        break;

                                    case 7:
                                        attrValue = "absbottom";
                                        break;

                                    case 8:
                                        attrValue = "bottom";
                                        break;

                                    case 9:
                                        attrValue = "middle";
                                        break;

                                    case 10:
                                        attrValue = "top";
                                        break;
                                    }
                                }
                            }
                            Debug.WriteLineIf(attrValue != null && attrName != "id", String.Format(CultureInfo.InvariantCulture, "{0}.{1} attribute value not retreived", tagName, attrName), element.outerHTML);
                        }

                        // Minimized attributes are not allowed, according
                        // to section 4.5 of XHTML 1.0 specification.
                        // TODO: Deal with simple values that are not strings
                        if (attrValue == null && attrNameLower != "id")
                        {
                            attrValue = attrName;
                        }

                        if (attrName != null && attrValue != null)
                        {
                            //write out this attribute.
                            writer.WriteAttributeString(attrName, attrValue);
                        }
                    }
                }
            }
        }
Exemple #10
0
        /// <summary>
        /// Walk the supplied HTML DOM node (recursively) and add its contents into the
        /// supplied page using the supplied TextBlockBuilder.
        /// </summary>
        /// <remarks>When this routine is done there may be some residual text still in
        /// tbBuilder. The caller is resonsible for checking this and adding it to the
        /// page if present.</remarks>
        /// <param name="node">The HTML DOM node to recursively walk.</param>
        /// <param name="tbBuilder">The TextBlockBuilder to put the text into.</param>
        private TextBlockBuilder ParseDomNode(IHTMLDOMNode node, TextBlockBuilder tbBuilder)
        {
            TagType tagType = GetTagType(node.nodeName);

            switch (tagType)
            {
            case TagType.IMG:

                // Before we add the image, see if we need to write the text object first
                if (tbBuilder.HasText)
                {
                    // Yes it has
                    tbBuilder.Append(TagId.EOL);
                    FlushTextToBlock(m_CurrentPage, tbBuilder, m_MainBodyTextAttr);
                    tbBuilder = new TextBlockBuilder(GetNextObjId(), m_CharMapper);
                }

                IHTMLAttributeCollection attribs = (IHTMLAttributeCollection)node.attributes;
                object name = "src";
                string src  = ((IHTMLDOMAttribute)attribs.item(ref name)).nodeValue.ToString();
                name = "height";
                string height = ((IHTMLDOMAttribute)attribs.item(ref name)).nodeValue.ToString();
                name = "width";
                string width = ((IHTMLDOMAttribute)attribs.item(ref name)).nodeValue.ToString();

                addPageImage(m_CurrentPage, src, ushort.Parse(width), ushort.Parse(height));
                break;

            case TagType.text:
                AppendTextToBlock((string)node.nodeValue, tbBuilder);
                break;

            case TagType.I:
                tbBuilder.Append(TagId.ItalicBegin);
                break;

            case TagType.B:
                tbBuilder.Append(TagId.FontWeight, LegacyBBeB.k_BoldFontWeight);
                break;

            case TagType.SUP:
                tbBuilder.Append(TagId.BeginSup);
                break;

            case TagType.SUB:
                tbBuilder.Append(TagId.BeginSub);
                break;

            case TagType.H1:
            case TagType.H2:
            case TagType.H3:
            case TagType.H4:
            case TagType.H5:
            case TagType.H6:

                FlushTextToBlock(m_CurrentPage, tbBuilder, m_MainBodyTextAttr);
                tbBuilder = new TextBlockBuilder(GetNextObjId(), m_CharMapper);

                if (GetHeadingLevel(tagType) <= GetHeadingLevel(m_eNewPageHeadingFilter))
                {
                    if (m_CurrentPage.Children.Count > 0)                               // If current page not empty
                    {
                        // Start a new page
                        finalizePage(m_CurrentPage);

                        m_CurrentPage = createPage();

                        addBookPage(m_CurrentPage);
                    }
                }

                m_HeadingNodePageId[node] = m_CurrentPage.ID;
                m_TextObjectIdHeadingNode[tbBuilder.TextObjectId] = node;

                tbBuilder.Append(TagId.FontSize, GetHeadingFontSize(tagType));
                break;
            }

            if (node.hasChildNodes())
            {
                IHTMLDOMChildrenCollection childNodes = (IHTMLDOMChildrenCollection)node.childNodes;
                foreach (IHTMLDOMNode child in childNodes)
                {
                    tbBuilder = ParseDomNode(child, tbBuilder);
                }
            }

            switch (tagType)
            {
            case TagType.I:
                tbBuilder.Append(TagId.ItalicEnd);
                break;

            case TagType.B:
                tbBuilder.Append(TagId.FontWeight, LegacyBBeB.k_NormalFontWeight);
                break;

            case TagType.SUP:
                tbBuilder.Append(TagId.EndSup);
                break;

            case TagType.SUB:
                tbBuilder.Append(TagId.EndSub);
                break;

            case TagType.P:
                tbBuilder.Append(TagId.EOL);
                tbBuilder.Append(TagId.EOL);
                break;

            case TagType.H1:
            case TagType.H2:
            case TagType.H3:
            case TagType.H4:
            case TagType.H5:
            case TagType.H6:
                tbBuilder.Append(TagId.FontSize, LegacyBBeB.DefaultFontSize);
                FlushTextToBlock(m_CurrentPage, tbBuilder, m_MainBodyTextAttr);
                tbBuilder = new TextBlockBuilder(GetNextObjId(), m_CharMapper);
                break;

            case TagType.BR:
                tbBuilder.Append(TagId.EOL);
                break;
            }

            return(tbBuilder);
        }
Exemple #11
0
 public AttributeCollection( ElementAdapter element )
 {
   _attributes = element._node.attributes as IHTMLAttributeCollection;
   _attributes2 = element._node.attributes as IHTMLAttributeCollection2;
 }