/// <summary> /// Parse given stylesheet for media CSS blocks<br/> /// This blocks are added under the specific media block they are found. /// </summary> /// <param name="cssData">the CSS data to fill with parsed CSS objects</param> /// <param name="stylesheet">the stylesheet to parse</param> private static void ParseMediaStyleBlocks(CssData cssData, string stylesheet) { int startIdx = 0; string atrule; while ((atrule = RegexParserUtils.GetCssAtRules(stylesheet, ref startIdx)) != null) { //Just process @media rules if (!atrule.StartsWith("@media", StringComparison.InvariantCultureIgnoreCase)) { continue; } //Extract specified media types MatchCollection types = RegexParserUtils.Match(RegexParserUtils.CssMediaTypes, atrule); if (types.Count == 1) { string line = types[0].Value; if (line.StartsWith("@media", StringComparison.InvariantCultureIgnoreCase) && line.EndsWith("{")) { //Get specified media types in the at-rule string[] media = line.Substring(6, line.Length - 7).Split(' '); //Scan media types foreach (string t in media) { if (!String.IsNullOrEmpty(t.Trim())) { //Get blocks inside the at-rule var insideBlocks = RegexParserUtils.Match(RegexParserUtils.CssBlocks, atrule); //Scan blocks and feed them to the style sheet foreach (Match insideBlock in insideBlocks) { FeedStyleBlock(cssData, insideBlock.Value, t.Trim()); } } } } } } }
/// <summary> /// Parses the document /// </summary> public static CssBox ParseDocument(string document) { document = RemoveHtmlComments(document); int lastEnd = -1; CssBox root = null; CssBox curBox = null; var tags = RegexParserUtils.Match(RegexParserUtils.HtmlTag, document); foreach (Match tagmatch in tags) { string text = tagmatch.Index > 0 ? document.Substring(lastEnd + 1, tagmatch.Index - lastEnd - 1) : String.Empty; var emptyText = String.IsNullOrEmpty(text.Trim()); if (!emptyText) { if (curBox == null) { root = curBox = CssBox.CreateBlock(); } var abox = CssBox.CreateBox(curBox); abox.Text = text; } var tag = ParseHtmlTag(tagmatch.Value); if (tag.IsClosing) { // handle tags that have no content but whitespace if (emptyText && curBox != null && curBox.Boxes.Count == 0 && !string.IsNullOrEmpty(text)) { var abox = CssBox.CreateBox(curBox); abox.Text = " "; } // need to find the parent tag to go one level up curBox = DomUtils.FindParent(root, tag.Name, curBox); } else if (tag.IsSingle) { // the current box is not changed new CssBox(curBox, tag); } else { // go one level down, make the new box the current box curBox = new CssBox(curBox, tag); } if (root == null && curBox != null) { root = curBox; root.Display = CssConstants.Block; } lastEnd = tagmatch.Index + tagmatch.Length - 1; } if (root == null) { root = CssBox.CreateBlock(); var abox = CssBox.CreateBox(root); abox.Text = document; } else if (lastEnd < document.Length) { var endText = document.Substring(lastEnd + 1); if (!string.IsNullOrEmpty(endText.Trim())) { var abox = CssBox.CreateBox(root); abox.Text = endText; } } return(root); }
/// <summary> /// Parse raw html tag source to <seealso cref="HtmlTag"/> object.<br/> /// Extract attributes found on the tag. /// </summary> /// <param name="source">the html tag to parse</param> private static HtmlTag ParseHtmlTag(string source) { source = source.Substring(1, source.Length - (source.Length > 2 && source[source.Length - 2] == '/' ? 3 : 2)); int spaceIndex = source.IndexOf(" "); //Extract tag name string tagName; if (spaceIndex < 0) { tagName = source; } else { tagName = source.Substring(0, spaceIndex); } //Check if is end tag bool isClosing = false; if (tagName.StartsWith("/")) { isClosing = true; tagName = tagName.Substring(1); } tagName = tagName.ToLower(); //Extract attributes var attributes = new Dictionary <string, string>(); var atts = RegexParserUtils.Match(RegexParserUtils.HmlTagAttributes, source); foreach (Match att in atts) { if (!att.Value.Contains(@"=")) { if (!attributes.ContainsKey(att.Value)) { attributes.Add(att.Value.ToLower(), string.Empty); } } else { //Extract attribute and value string[] chunks = new string[2]; chunks[0] = att.Value.Substring(0, att.Value.IndexOf('=')); chunks[1] = att.Value.Substring(att.Value.IndexOf('=') + 1); string attname = chunks[0].Trim(); string attvalue = chunks[1].Trim(); if (attvalue.Length > 2 && ((attvalue.StartsWith("\"") && attvalue.EndsWith("\"")) || (attvalue.StartsWith("\'") && attvalue.EndsWith("\'")))) { attvalue = attvalue.Substring(1, attvalue.Length - 2); } if (!attributes.ContainsKey(attname)) { attributes.Add(attname, attvalue); } } } return(new HtmlTag(tagName, attributes, isClosing)); }