示例#1
0
        /// <summary>
        /// Parses the source html to css boxes tree structure.
        /// </summary>
        /// <param name="source">the html source to parse</param>
        public static CssBox ParseDocument(string source)
        {
            var root   = CssBox.CreateBlock();
            var curBox = root;

            int endIdx   = 0;
            int startIdx = 0;

            while (startIdx >= 0)
            {
                var tagIdx = source.IndexOf('<', startIdx);
                if (tagIdx >= 0 && tagIdx < source.Length)
                {
                    // add the html text as anon css box to the structure
                    AddTextBox(source, startIdx, tagIdx, ref curBox);

                    if (source[tagIdx + 1] == '!')
                    {
                        // skip the html crap elements (<!-- bla -->) (<!crap bla>)
                        startIdx = source.IndexOf(">", tagIdx + 2);
                        endIdx   = startIdx > 0 ? startIdx + 1 : tagIdx + 2;
                    }
                    else
                    {
                        // parse element tag to css box structure
                        endIdx = ParseHtmlTag(source, tagIdx, ref curBox) + 1;
                    }
                }
                startIdx = tagIdx > -1 && endIdx > 0 ? endIdx : -1;
            }

            // handle pices of html without proper structure
            if (endIdx < source.Length)
            {
                // there is text after the end of last element
                var endText = new SubString(source, endIdx, source.Length - endIdx);
                if (!endText.IsEmptyOrWhitespace())
                {
                    var abox = CssBox.CreateBox(root);
                    abox.Text = endText;
                }
            }

            return(root);
        }
示例#2
0
        /// <summary>
        /// Parses the source html to css boxes tree structure.
        /// </summary>
        /// <param name="source">the html source to parse</param>
        public static CssBox ParseDocument(string source)
        {
            var root   = CssBox.CreateBlock();
            var curBox = root;

            int endIdx   = 0;
            int startIdx = 0;

            while (startIdx >= 0)
            {
                var tagIdx = source.IndexOf('<', startIdx);
                if (tagIdx >= 0 && tagIdx < source.Length)
                {
                    // add the html text as anon css box to the structure
                    AddTextBox(source, startIdx, tagIdx, ref curBox);

                    if (source[tagIdx + 1] == '!')
                    {
                        if (source[tagIdx + 2] == '-')
                        {
                            // skip the html comment elements (<!-- bla -->)
                            startIdx = source.IndexOf("-->", tagIdx + 2);
                            endIdx   = startIdx > 0 ? startIdx + 3 : tagIdx + 2;
                        }
                        else
                        {
                            // skip the html crap elements (<!crap bla>)
                            startIdx = source.IndexOf(">", tagIdx + 2);
                            endIdx   = startIdx > 0 ? startIdx + 1 : tagIdx + 2;
                        }
                    }
                    else
                    {
                        // parse element tag to css box structure
                        endIdx = ParseHtmlTag(source, tagIdx, ref curBox) + 1;

                        if (curBox.HtmlTag != null &&
                            curBox.HtmlTag.Name.Equals(HtmlConstants.Style, StringComparison.OrdinalIgnoreCase))
                        {
                            var endIdxS = endIdx;
                            endIdx = source.IndexOf("</style>", endIdx, StringComparison.OrdinalIgnoreCase);
                            if (endIdx > -1)
                            {
                                AddTextBox(source, endIdxS, endIdx, ref curBox);
                            }
                        }
                    }
                }

                startIdx = tagIdx > -1 && endIdx > 0 ? endIdx : -1;
            }

            // handle pieces of html without proper structure
            if (endIdx > -1 && endIdx < source.Length)
            {
                // there is text after the end of last element
                var endText = new SubString(source, endIdx, source.Length - endIdx);
                if (!endText.IsEmptyOrWhitespace())
                {
                    var abox = CssBox.CreateBox(root);
                    abox.Text = endText;
                }
            }

            return(root);
        }