private void CheckImageAlt(CHtmlDocument doc, CHtmlNodeCollection nodes)
        {
            foreach (CHtmlNode node in nodes)
            {
                if (node is CHtmlElement)
                {
                    CHtmlElement element = node as CHtmlElement;
                    if ("img".Equals(element.Name.ToLower(), StringComparison.OrdinalIgnoreCase))
                    {
                        if (element.Attributes.HasAttribute("alt") == false)
                        {
                            // 리포터 모듈 작성 할것
                            AddReportItem(doc.HRef, element.HTML, "["+1001+"] img태그에 Alt가 없습니다.");
                        }

                        if(element.Attributes.HasAttribute("height") == false)
                        {
                            AddReportItem(doc.HRef, element.HTML, "["+1002+"] img태그에 Height속성이 없습니다.");
                        }

                        if(element.Attributes.HasAttribute("width") == false)
                        {
                            AddReportItem(doc.HRef, element.HTML, "[" + 1003 + "] img태그에 Width속성이 없습니다.");
                        }
                    }

                    if (element.Nodes.Count > 0)
                        CheckImageAlt(doc, element.Nodes);
                }
            }
        }
        /////////////////////////////////////////////////////////////////////////////////
        /// <summary>
        /// This constructs a new HTML element with the specified tag name.
        /// </summary>
        public CHtmlElement(string name)
        {
            System.Diagnostics.Debug.Assert(name != null && CHtmlUtil.ExistWhiteSpaceChar(name) == false);

            m_nodes = new CHtmlNodeCollection(this);
            m_name = name.Trim().ToLower();
        }
        /////////////////////////////////////////////////////////////////////////////////
        /// <summary>
        ///
        /// </summary>
        /// <param name="result"></param>
        /// <param name="name"></param>
        /// <param name="searchChildren"></param>
        public void FindByName(CHtmlNodeCollection result, string name, bool searchChildren)
        {
            System.Diagnostics.Debug.Assert(result != null);
            System.Diagnostics.Debug.Assert(name != null);

            name = name.Trim().ToLower();

            for (int index = 0, count = m_nodeList.Count; index < count; ++index)
            {
                CHtmlNode node = m_nodeList[index];
                if (node.NodeName.Equals(name))
                {
                    result.Add(node);
                }

                if (searchChildren == true && node is CHtmlElement)
                {
                    ((CHtmlElement)node).Nodes.FindByName(result, name, searchChildren);
                }
            }
        }
        /////////////////////////////////////////////////////////////////////////////////
        /// <summary>
        ///
        /// </summary>
        /// <param name="html"></param>
        public virtual void LoadHtml(string html)
        {
            System.Diagnostics.Debug.Assert(html != null);
            m_nodeList.Clear();
            m_parser.Parse(html, m_nodeList);

            if (m_charset == null)
            {
                m_charset = DetectCharset(m_nodeList);
                if (m_charset == null)
                {
                    m_charset = Encoding.Unicode;
                }
            }

            CHtmlNodeCollection bases = m_nodeList.FindByName("base", true);

            if (bases != null && bases.Count > 0 && ((CHtmlElement)bases[bases.Count - 1]).Attributes["href"] != null)
            {
                this.docBase = ((CHtmlElement)bases[bases.Count - 1]).Attributes["href"].Value;
            }
        }
Example #5
0
        ///////////////////////////////////////////////////////////////////////////////
        /// <summary>
        /// »ý¼ºÀÚ
        /// </summary>
        public CHtmlElement(CHtmlElement obj) : base(obj)
        {
            System.Diagnostics.Debug.Assert(obj != null);

            obj.AssertValid();

            m_name  = obj.m_name;
            m_nodes = new CHtmlNodeCollection(this);

            int count = obj.m_nodes.Count;

            m_nodes.Capacity = count;
            for (int index = 0; index < count; ++index)
            {
                m_nodes.Add((CHtmlNode)obj.m_nodes[index].Clone());
            }

            count = obj.m_attributes.Count;
            m_attributes.Capacity = count;
            for (int index = 0; index < count; ++index)
            {
                m_attributes.Add((CHtmlAttribute)obj.m_attributes[index].Clone());
            }
        }
        private void FindLink(CHtmlNodeCollection parentNodes, ref CHtmlNodeCollection links)
        {
            if (parentNodes == null || parentNodes.Count == 0) return;

            CHtmlAttribute attr;
            foreach(CHtmlNode node in parentNodes)
            {
                if(node is CHtmlElement)
                {
                    if(node == null) continue;

                    attr = null;
                    CHtmlElement element = node as CHtmlElement;
                    switch(element.Name.Trim().ToLower())
                    {
                        case "a":
                        case "link":
                        case "frame":
                            attr = element.Attributes["href"];
                            break;
                        case "script":
                            attr = element.Attributes["src"];
                            break;
                    }

                    if(attr != null)
                    {
                        links.Add(node);
                    }

                    if(element.Nodes.Count > 0)
                    {
                        FindLink(element.Nodes, ref links);
                    }
                }
            }
        }
        /////////////////////////////////////////////////////////////////////////////////
        /// <summary>
        /// 
        /// </summary>
        /// <param name="htmlStream"></param>
        /// <returns></returns>
        private Encoding DetectCharset(CHtmlNodeCollection nodes)
        {
            Encoding result = null;

            string charset = "";

            CHtmlNodeCollection metaNodes = new CHtmlNodeCollection();
            CHtmlElement node = nodes["html"] as CHtmlElement;
            if(node != null) node = node.Nodes["head"] as CHtmlElement;
            if(node != null) node.Nodes.FindByNameAttribute(metaNodes, "meta", "content", false);

            for(int nodeIndex = 0, count = metaNodes.Count; nodeIndex < count; ++nodeIndex)
            {
                CHtmlElement metaElement = metaNodes[nodeIndex] as CHtmlElement;
                if(metaElement != null)
                {
                    int index = -1;
                    CHtmlAttributeCollection attributes = metaElement.Attributes.FindByName("content");
                    for(int attributeIndex = 0, attributeCount = attributes.Count; attributeIndex < attributeCount; ++attributeIndex)
                    {
                        CHtmlAttribute attribute = attributes[attributeIndex];
                        if((index = attribute.Value.IndexOf("charset")) != -1)
                        {
                            string value = attribute.Value;
                            int startIndex = index + 7;
                            while(startIndex < value.Length && CHtmlUtil.EqualesOfAnyChar(value[startIndex], " =")) ++startIndex;
                            int endIndex = startIndex + 1;
                            while(endIndex < value.Length && !CHtmlUtil.EqualesOfAnyChar(value[endIndex], " ")) ++endIndex;

                            if(startIndex < value.Length && endIndex - startIndex > 0)
                            {
                                charset = value.Substring(startIndex, endIndex - startIndex);
                                try
                                {
                                    result = Encoding.GetEncoding(charset);
                                    break;
                                }
                                catch(Exception)
                                {
                                }
                            }
                        }
                    }
                }
            }

            return result;
        }
        /////////////////////////////////////////////////////////////////////////////////
        /// <summary>
        /// 
        /// </summary>
        /// <param name="result"></param>
        /// <param name="name"></param>
        /// <param name="searchChildren"></param>
        public void FindByName(CHtmlNodeCollection result, string name, bool searchChildren)
        {
            System.Diagnostics.Debug.Assert(result != null);
            System.Diagnostics.Debug.Assert(name != null);

            name = name.Trim().ToLower();

            for(int index = 0, count = m_nodeList.Count; index < count; ++index)
            {
                CHtmlNode node = m_nodeList[index];
                if(node.NodeName.Equals(name))
                    result.Add(node);

                if(searchChildren == true && node is CHtmlElement)
                    ((CHtmlElement)node).Nodes.FindByName(result, name, searchChildren);
            }
        }
        /////////////////////////////////////////////////////////////////////////////////
        /// <summary>
        /// 
        /// </summary>
        /// <param name="visitor"></param>
        /// <param name="nodeIDCount"></param>
        /// <returns></returns>
        private void ResolveElement(ref int visitor, ref int nodeIDCount)
        {
            System.Diagnostics.Debug.Assert(m_tokens[visitor].Type == Token.TokenType.TagName);

            CHtmlElement element = new CHtmlElement(m_tokens[visitor].Content);
            element.m_close = false;
            ++nodeIDCount;
            element.NodeID = nodeIDCount;

            ++visitor;
            ResolveAttribute(ref visitor, element.Attributes);

            System.Diagnostics.Debug.Assert(m_tokens[visitor].Type == Token.TokenType.TagEnd || m_tokens[visitor].Type == Token.TokenType.TagCloseEnd);

            if(m_tokens[visitor].Type == Token.TokenType.TagEnd) // <tag>
            {
                switch(element.Name)
                {
                    // Empty tag
                    case "area":
                    case "bgsound":
                    case "base":
                    case "br":
                    case "basefont":
                    case "col":
                    case "embed":
                    case "frame":
                    case "hr":
                    case "img":
                    case "isindex":
                    case "input":
                    case "keygen":
                    case "link":
                    case "meta":
                    case "nextid":
                    case "option":
                    case "param":
                    case "sound":
                    case "spacer":
                    case "wbr":
                        element.TerminatedType = CHtmlElement.EndTagType.NonTerminated;
                        CloseElement(element);

                        if(ElementCreatedEvent != null) ElementCreatedEvent(element);
                        m_currentLevel.Add(element);
                        break;

                    default:
                        {
                            element.m_previousWithSameNameNode = (CHtmlElement)m_lastOpenNodes[element.Name];
                            m_lastOpenNodes[element.Name] = element;

                            if(ElementCreatedEvent != null) ElementCreatedEvent(element);
                            m_currentLevel.Add(element);
                            m_currentLevel = element.Nodes;
                        }
                        break;
                }
            }
            else // <tag/>
            {
                element.TerminatedType = CHtmlElement.EndTagType.Terminated;
                CloseElement(element);

                if(ElementCreatedEvent != null) ElementCreatedEvent(element);
                m_currentLevel.Add(element);
            }

            ++visitor;
        }
        /////////////////////////////////////////////////////////////////////////////////
        /// <summary>
        /// This will parse a string containing HTML and will produce a domain tree.
        /// </summary>
        /// <param name="html">The HTML to be parsed</param> 
        /// <returns></returns>
        public CHtmlNodeCollection Parse(string html)
        {
            System.Diagnostics.Debug.Assert(html != null);

            CHtmlNodeCollection result = new CHtmlNodeCollection();
            Parse(html, result);
            return result;
        }
        /////////////////////////////////////////////////////////////////////////////////
        /// <summary>
        /// This will parse a string containing HTML and will produce a domain tree.
        /// </summary>
        /// <param name="html">The HTML to be parsed</param> 
        /// <returns></returns>
        public void Parse(string html, CHtmlNodeCollection result)
        {
            System.Diagnostics.Debug.Assert(html != null);
            System.Diagnostics.Debug.Assert(result != null);

            m_input = html;
            m_result = result;
            m_currentLevel = m_result;

            ParseTokens();
            ResolveTokens();

            m_result = null;
            m_input = null;
            m_currentLevel = null;
            m_lastOpenNodes.Clear();
        }
        /////////////////////////////////////////////////////////////////////////////////
        /// <summary>
        /// 
        /// </summary>
        /// <param name="result"></param>
        public void GetDescendent(CHtmlNodeCollection result)
        {
            System.Diagnostics.Debug.Assert(result != null);

            for(int index = 0, count = m_nodeList.Count; index < count; ++index)
            {
                CHtmlNode node = m_nodeList[index];
                result.Add(node);
                if(node is CHtmlElement)
                    ((CHtmlElement)node).Nodes.GetDescendent(result);
            }
        }
        /////////////////////////////////////////////////////////////////////////////////
        /// <summary>
        /// 
        /// </summary>
        /// <returns></returns>
        public CHtmlNodeCollection GetDescendent()
        {
            CHtmlNodeCollection result = new CHtmlNodeCollection(64);
            GetDescendent(result);

            return result;
        }
        /////////////////////////////////////////////////////////////////////////////////
        /// <summary>
        /// 
        /// </summary>
        /// <param name="result"></param>
        /// <param name="name"></param>
        /// <param name="attributeName"></param>
        /// <param name="attributeValue"></param>
        /// <param name="searchChildren"></param>
        public void FindByNameAttributeValue(CHtmlNodeCollection result, string name, string attributeName, string attributeValue, bool searchChildren)
        {
            System.Diagnostics.Debug.Assert(result != null);
            System.Diagnostics.Debug.Assert(attributeName != null);

            attributeName = attributeName.Trim().ToLower();

            for(int index = 0, count = m_nodeList.Count; index < count; ++index)
            {
                CHtmlNode node = m_nodeList[index];

                if(node.NodeName.Equals(name) && node is IHtmlNodeHasAttribute)
                {
                    CHtmlAttribute attribute = ((IHtmlNodeHasAttribute)node).Attributes[attributeName];
                    if(attribute != null && attribute.Value == attributeValue)
                        result.Add(node);
                }

                if(searchChildren && node is CHtmlElement)
                    ((CHtmlElement)node).Nodes.FindByNameAttributeValue(result, name, attributeName, attributeValue, searchChildren);
            }
        }
        /////////////////////////////////////////////////////////////////////////////////
        /// <summary>
        /// 
        /// </summary>
        /// <param name="result"></param>
        /// <param name="name"></param>
        /// <param name="searchChildren"></param>
        public void FindByName(CHtmlNodeCollection result, string name, int depth)
        {
            System.Diagnostics.Debug.Assert(result != null);
            System.Diagnostics.Debug.Assert(name != null);
            System.Diagnostics.Debug.Assert(depth >= 0);

            if(depth > 0)
            {
                name = name.Trim().ToLower();

                for(int index = 0, count = m_nodeList.Count; index < count; ++index)
                {
                    CHtmlNode node = m_nodeList[index];
                    if(node.NodeName.Equals(name))
                        result.Add(node);

                    if(depth > 1 && node is CHtmlElement)
                        ((CHtmlElement)node).Nodes.FindByName(result, name, depth - 1);
                }
            }
        }
        /////////////////////////////////////////////////////////////////////////////////
        /// <summary>
        /// 
        /// </summary>
        /// <param name="visitor"></param>
        /// <param name="nodeIDCount"></param>
        /// <returns></returns>
        private void ResolveEndTag(ref int visitor)
        {
            System.Diagnostics.Debug.Assert(m_tokens[visitor].Type == Token.TokenType.TagCloseBegin);
            ++visitor;
            System.Diagnostics.Debug.Assert(m_tokens[visitor].Type == Token.TokenType.TagName);

            string tagName = m_tokens[visitor].Content;
            CHtmlElement openElement = (CHtmlElement)m_lastOpenNodes[tagName];

            // "000<b>111<a>222</b>333</a>444" will be transformed into "000<b>111<a>222</a></b>333444".
            // The end tag "</a> will be ignored.

            if(openElement != null) // If open tag is not found, we ignore the end tag
            {
                openElement.TerminatedType = CHtmlElement.EndTagType.ExplicitlyTerminated;
                CloseElement(openElement);

                if(openElement.Parent != null)
                    m_currentLevel = openElement.Parent.Nodes;
                else m_currentLevel = m_result;
            }

            ++visitor;
            System.Diagnostics.Debug.Assert(m_tokens[visitor].Type == Token.TokenType.TagEnd);

            ++visitor;
        }
        /////////////////////////////////////////////////////////////////////////////////
        #region

        /////////////////////////////////////////////////////////////////////////////////
        /// <summary>
        ///
        /// </summary>
        /// <param name="htmlStream"></param>
        /// <returns></returns>
        private Encoding DetectCharset(CHtmlNodeCollection nodes)
        {
            Encoding result = null;

            string charset = "";

            CHtmlNodeCollection metaNodes = new CHtmlNodeCollection();
            CHtmlElement        node      = nodes["html"] as CHtmlElement;

            if (node != null)
            {
                node = node.Nodes["head"] as CHtmlElement;
            }
            if (node != null)
            {
                node.Nodes.FindByNameAttribute(metaNodes, "meta", "content", false);
            }

            for (int nodeIndex = 0, count = metaNodes.Count; nodeIndex < count; ++nodeIndex)
            {
                CHtmlElement metaElement = metaNodes[nodeIndex] as CHtmlElement;
                if (metaElement != null)
                {
                    int index = -1;
                    CHtmlAttributeCollection attributes = metaElement.Attributes.FindByName("content");
                    for (int attributeIndex = 0, attributeCount = attributes.Count; attributeIndex < attributeCount; ++attributeIndex)
                    {
                        CHtmlAttribute attribute = attributes[attributeIndex];
                        if ((index = attribute.Value.IndexOf("charset")) != -1)
                        {
                            string value      = attribute.Value;
                            int    startIndex = index + 7;
                            while (startIndex < value.Length && CHtmlUtil.EqualesOfAnyChar(value[startIndex], " ="))
                            {
                                ++startIndex;
                            }
                            int endIndex = startIndex + 1;
                            while (endIndex < value.Length && !CHtmlUtil.EqualesOfAnyChar(value[endIndex], " "))
                            {
                                ++endIndex;
                            }

                            if (startIndex < value.Length && endIndex - startIndex > 0)
                            {
                                charset = value.Substring(startIndex, endIndex - startIndex);
                                try
                                {
                                    result = Encoding.GetEncoding(charset);
                                    break;
                                }
                                catch (Exception)
                                {
                                }
                            }
                        }
                    }
                }
            }

            return(result);
        }
        /////////////////////////////////////////////////////////////////////////////////
        /// <summary>
        /// 
        /// </summary>
        /// <param name="tagName"></param>
        /// <param name="boundElements"></param>
        private void ResolveEndTagOptionalElement(string[] targetElements, string[] boundElements)
        {
            System.Diagnostics.Debug.Assert(targetElements != null);
            System.Diagnostics.Debug.Assert(boundElements != null);

            for(int targetIndex = 0, targetCount = targetElements.Length; targetIndex < targetCount; ++targetIndex)
            {
                // Find open element
                CHtmlElement openElement = (CHtmlElement)m_lastOpenNodes[targetElements[targetIndex]];
                if(openElement != null)
                {
                    bool fixNestedElement = true;
                    CHtmlElement boundElement = null;

                    // Find bound element
                    for(int index = 0, count = boundElements.Length; index < count; ++index)
                    {
                        boundElement = (CHtmlElement)m_lastOpenNodes[boundElements[index]];
                        if(boundElement != null &&
                           boundElement.m_close == false && boundElement.NodeID > openElement.NodeID)
                        {
                            // The bound element is found, and the "tagName" can cross the bound element to
                            // match the open element
                            fixNestedElement = false;
                            break;
                        }
                    }

                    if(fixNestedElement == true)
                    {
                        CHtmlElement parent = openElement.Parent;

                        CloseElement(openElement);

                        if(openElement.Parent != null)
                            m_currentLevel = openElement.Parent.Nodes;
                        else m_currentLevel = m_result;
                    }
                }
            }
        }
        ///////////////////////////////////////////////////////////////////////////////
        /// <summary>
        /// ������
        /// </summary>
        public CHtmlElement(CHtmlElement obj)
            : base(obj)
        {
            System.Diagnostics.Debug.Assert(obj != null);

            obj.AssertValid();

            m_name = obj.m_name;
            m_nodes = new CHtmlNodeCollection(this);

            int count = obj.m_nodes.Count;
            m_nodes.Capacity = count;
            for(int index = 0; index < count; ++index)
                m_nodes.Add((CHtmlNode)obj.m_nodes[index].Clone());

            count = obj.m_attributes.Count;
            m_attributes.Capacity = count;
            for(int index = 0; index < count; ++index)
                m_attributes.Add((CHtmlAttribute)obj.m_attributes[index].Clone());
        }
        /////////////////////////////////////////////////////////////////////////////////
        /// <summary>
        /// This will search though this collection of nodes for all elements with the
        /// specified name. If you want to search the subnodes recursively, you should
        /// pass True as the parameter in searchChildren. This search is guaranteed to
        /// return nodes in the order in which they are found in the document.
        /// </summary>
        /// <param name="name">The name of the element to find</param>
        /// <param name="searchChildren">True if you want to search sub-nodes, False to
        /// only search this collection.</param>
        /// <returns>A collection of all the nodes that macth.</returns>
        public CHtmlNodeCollection FindByName(string name, bool searchChildren)
        {
            System.Diagnostics.Debug.Assert(name != null);

            name = name.Trim().ToLower();

            CHtmlNodeCollection result = new CHtmlNodeCollection(64);
            FindByName(result, name, searchChildren);

            return result;
        }