Exemple #1
0
        /// <summary>
        /// Parse the html part, the part from prev parsing index to the beginning of the next html tag.<br/>
        /// </summary>
        /// <param name="source">the html source to parse</param>
        /// <param name="tagIdx">the index of the next html tag</param>
        /// <param name="curBox">the current box in html tree parsing</param>
        /// <returns>the end of the parsed part, the new start index</returns>
        private static int ParseHtmlTag(string source, int tagIdx, ref CssBox curBox)
        {
            var endIdx = source.IndexOf('>', tagIdx + 1);

            if (endIdx > 0)
            {
                string tagName;
                Dictionary <string, string> tagAttributes;
                var length = endIdx - tagIdx + 1 - (source[endIdx - 1] == '/' ? 1 : 0);
                if (ParseHtmlTag(source, tagIdx, length, out tagName, out tagAttributes))
                {
                    if (!HtmlUtils.IsSingleTag(tagName) && curBox.ParentBox != null)
                    {
                        // need to find the parent tag to go one level up
                        curBox = DomUtils.FindParent(curBox.ParentBox, tagName, curBox);
                    }
                }
                else if (!string.IsNullOrEmpty(tagName))
                {
                    //new SubString(source, lastEnd + 1, tagmatch.Index - lastEnd - 1)
                    var isSingle = HtmlUtils.IsSingleTag(tagName) || source[endIdx - 1] == '/';
                    var tag      = new HtmlTag(tagName, isSingle, tagAttributes);

                    if (isSingle)
                    {
                        // the current box is not changed
                        CssBox.CreateBox(tag, curBox);
                    }
                    else
                    {
                        // go one level down, make the new box the current box
                        curBox = CssBox.CreateBox(tag, curBox);
                    }
                }
                else
                {
                    endIdx = tagIdx + 1;
                }
            }

            return(endIdx);
        }
Exemple #2
0
        /// <summary>
        /// Parses the document
        /// </summary>
        public static CssBox ParseDocument(string document)
        {
            document = RemoveHtmlComments(document);

            int    lastEnd = -1;
            CssBox root    = null;
            CssBox curBox  = null;
            var    tags    = RegexParserUtils.Match(RegexParserUtils.HtmlTag, document);

            foreach (Match tagmatch in tags)
            {
                string text = tagmatch.Index > 0 ? document.Substring(lastEnd + 1, tagmatch.Index - lastEnd - 1) : String.Empty;

                var emptyText = String.IsNullOrEmpty(text.Trim());
                if (!emptyText)
                {
                    if (curBox == null)
                    {
                        root = curBox = CssBox.CreateBlock();
                    }

                    var abox = CssBox.CreateBox(curBox);
                    abox.Text = text;
                }

                var tag = ParseHtmlTag(tagmatch.Value);

                if (tag.IsClosing)
                {
                    // handle tags that have no content but whitespace
                    if (emptyText && curBox != null && curBox.Boxes.Count == 0 && !string.IsNullOrEmpty(text))
                    {
                        var abox = CssBox.CreateBox(curBox);
                        abox.Text = " ";
                    }

                    // need to find the parent tag to go one level up
                    curBox = DomUtils.FindParent(root, tag.Name, curBox);
                }
                else if (tag.IsSingle)
                {
                    // the current box is not changed
                    new CssBox(curBox, tag);
                }
                else
                {
                    // go one level down, make the new box the current box
                    curBox = new CssBox(curBox, tag);
                }

                if (root == null && curBox != null)
                {
                    root         = curBox;
                    root.Display = CssConstants.Block;
                }

                lastEnd = tagmatch.Index + tagmatch.Length - 1;
            }

            if (root == null)
            {
                root = CssBox.CreateBlock();
                var abox = CssBox.CreateBox(root);
                abox.Text = document;
            }
            else if (lastEnd < document.Length)
            {
                var endText = document.Substring(lastEnd + 1);
                if (!string.IsNullOrEmpty(endText.Trim()))
                {
                    var abox = CssBox.CreateBox(root);
                    abox.Text = endText;
                }
            }

            return(root);
        }