/// <summary> /// Parses the source html to css boxes tree structure. /// </summary> /// <param name="source">the html source to parse</param> public static CssBox ParseDocument(string source) { var root = CssBox.CreateBlock(); var curBox = root; int endIdx = 0; int startIdx = 0; while (startIdx >= 0) { var tagIdx = source.IndexOf('<', startIdx); if (tagIdx >= 0 && tagIdx < source.Length) { // add the html text as anon css box to the structure AddTextBox(source, startIdx, tagIdx, ref curBox); if (source[tagIdx + 1] == '!') { // skip the html crap elements (<!-- bla -->) (<!crap bla>) startIdx = source.IndexOf(">", tagIdx + 2); endIdx = startIdx > 0 ? startIdx + 1 : tagIdx + 2; } else { // parse element tag to css box structure endIdx = ParseHtmlTag(source, tagIdx, ref curBox) + 1; } } startIdx = tagIdx > -1 && endIdx > 0 ? endIdx : -1; } // handle pices of html without proper structure if (endIdx < source.Length) { // there is text after the end of last element var endText = new SubString(source, endIdx, source.Length - endIdx); if (!endText.IsEmptyOrWhitespace()) { var abox = CssBox.CreateBox(root); abox.Text = endText; } } return(root); }
/// <summary> /// Parses the source html to css boxes tree structure. /// </summary> /// <param name="source">the html source to parse</param> public static CssBox ParseDocument(string source) { var root = CssBox.CreateBlock(); var curBox = root; int endIdx = 0; int startIdx = 0; while (startIdx >= 0) { var tagIdx = source.IndexOf('<', startIdx); if (tagIdx >= 0 && tagIdx < source.Length) { // add the html text as anon css box to the structure AddTextBox(source, startIdx, tagIdx, ref curBox); if (source[tagIdx + 1] == '!') { if (source[tagIdx + 2] == '-') { // skip the html comment elements (<!-- bla -->) startIdx = source.IndexOf("-->", tagIdx + 2); endIdx = startIdx > 0 ? startIdx + 3 : tagIdx + 2; } else { // skip the html crap elements (<!crap bla>) startIdx = source.IndexOf(">", tagIdx + 2); endIdx = startIdx > 0 ? startIdx + 1 : tagIdx + 2; } } else { // parse element tag to css box structure endIdx = ParseHtmlTag(source, tagIdx, ref curBox) + 1; if (curBox.HtmlTag != null && curBox.HtmlTag.Name.Equals(HtmlConstants.Style, StringComparison.OrdinalIgnoreCase)) { var endIdxS = endIdx; endIdx = source.IndexOf("</style>", endIdx, StringComparison.OrdinalIgnoreCase); if (endIdx > -1) { AddTextBox(source, endIdxS, endIdx, ref curBox); } } } } startIdx = tagIdx > -1 && endIdx > 0 ? endIdx : -1; } // handle pieces of html without proper structure if (endIdx > -1 && endIdx < source.Length) { // there is text after the end of last element var endText = new SubString(source, endIdx, source.Length - endIdx); if (!endText.IsEmptyOrWhitespace()) { var abox = CssBox.CreateBox(root); abox.Text = endText; } } return(root); }