/// <summary> /// Parse a complex font property value that contains multiple css properties into specific css properties. /// </summary> /// <param name="propValue">the value of the property to parse to specific values</param> /// <param name="properties">the properties collection to add the specific properties to</param> private static void ParseFontProperty(string propValue, Dictionary <string, string> properties) { int mustBePos; string mustBe = RegexParserUtils.Search(RegexParserUtils.CssFontSizeAndLineHeight, propValue, out mustBePos); if (!string.IsNullOrEmpty(mustBe)) { mustBe = mustBe.Trim(); //Check for style||variant||weight on the left string leftSide = propValue.Substring(0, mustBePos); string fontStyle = RegexParserUtils.Search(RegexParserUtils.CssFontStyle, leftSide); string fontVariant = RegexParserUtils.Search(RegexParserUtils.CssFontVariant, leftSide); string fontWeight = RegexParserUtils.Search(RegexParserUtils.CssFontWeight, leftSide); //Check for family on the right string rightSide = propValue.Substring(mustBePos + mustBe.Length); string fontFamily = rightSide.Trim(); //Parser.Search(Parser.CssFontFamily, rightSide); //TODO: Would this be right? //Check for font-size and line-height string fontSize = mustBe; string lineHeight = string.Empty; if (mustBe.Contains("/") && mustBe.Length > mustBe.IndexOf("/") + 1) { int slashPos = mustBe.IndexOf("/"); fontSize = mustBe.Substring(0, slashPos); lineHeight = mustBe.Substring(slashPos + 1); } if (!string.IsNullOrEmpty(fontFamily)) { properties["font-family"] = ParseFontFamilyProperty(fontFamily); } if (!string.IsNullOrEmpty(fontStyle)) { properties["font-style"] = fontStyle; } if (!string.IsNullOrEmpty(fontVariant)) { properties["font-variant"] = fontVariant; } if (!string.IsNullOrEmpty(fontWeight)) { properties["font-weight"] = fontWeight; } if (!string.IsNullOrEmpty(fontSize)) { properties["font-size"] = fontSize; } if (!string.IsNullOrEmpty(lineHeight)) { properties["line-height"] = lineHeight; } } else { // Check for: caption | icon | menu | message-box | small-caption | status-bar //TODO: Interpret font values of: caption | icon | menu | message-box | small-caption | status-bar } }
/// <summary> /// Parse given stylesheet for media CSS blocks<br/> /// This blocks are added under the specific media block they are found. /// </summary> /// <param name="cssData">the CSS data to fill with parsed CSS objects</param> /// <param name="stylesheet">the stylesheet to parse</param> private static void ParseMediaStyleBlocks(CssData cssData, string stylesheet) { int startIdx = 0; string atrule; while ((atrule = RegexParserUtils.GetCssAtRules(stylesheet, ref startIdx)) != null) { //Just process @media rules if (!atrule.StartsWith("@media", StringComparison.InvariantCultureIgnoreCase)) { continue; } //Extract specified media types MatchCollection types = RegexParserUtils.Match(RegexParserUtils.CssMediaTypes, atrule); if (types.Count == 1) { string line = types[0].Value; if (line.StartsWith("@media", StringComparison.InvariantCultureIgnoreCase) && line.EndsWith("{")) { //Get specified media types in the at-rule string[] media = line.Substring(6, line.Length - 7).Split(' '); //Scan media types foreach (string t in media) { if (!String.IsNullOrEmpty(t.Trim())) { //Get blocks inside the at-rule var insideBlocks = RegexParserUtils.Match(RegexParserUtils.CssBlocks, atrule); //Scan blocks and feed them to the style sheet foreach (Match insideBlock in insideBlocks) { FeedStyleBlock(cssData, insideBlock.Value, t.Trim()); } } } } } } }
/// <summary> /// Parse a complex border property value that contains multiple css properties into specific css properties. /// </summary> /// <param name="propValue">the value of the property to parse to specific values</param> /// <param name="direction">the left, top, right or bottom direction of the border to parse</param> /// <param name="properties">the properties collection to add the specific properties to</param> private static void ParseBorderProperty(string propValue, string direction, Dictionary <string, string> properties) { string borderWidth = RegexParserUtils.Search(RegexParserUtils.CssBorderWidth, propValue); string borderStyle = RegexParserUtils.Search(RegexParserUtils.CssBorderStyle, propValue); string borderColor = RegexParserUtils.Search(RegexParserUtils.CssColors, propValue); if (direction != null) { if (borderWidth != null) { properties["border" + direction + "-width"] = borderWidth; } if (borderStyle != null) { properties["border" + direction + "-style"] = borderStyle; } if (borderColor != null) { properties["border" + direction + "-color"] = borderColor; } } else { if (borderWidth != null) { ParseBorderWidthProperty(borderWidth, properties); } if (borderStyle != null) { ParseBorderStyleProperty(borderStyle, properties); } if (borderColor != null) { ParseBorderColorProperty(borderColor, properties); } } }
/// <summary> /// Parses the document /// </summary> public static CssBox ParseDocument(string document) { document = RemoveHtmlComments(document); int lastEnd = -1; CssBox root = null; CssBox curBox = null; var tags = RegexParserUtils.Match(RegexParserUtils.HtmlTag, document); foreach (Match tagmatch in tags) { string text = tagmatch.Index > 0 ? document.Substring(lastEnd + 1, tagmatch.Index - lastEnd - 1) : String.Empty; var emptyText = String.IsNullOrEmpty(text.Trim()); if (!emptyText) { if (curBox == null) { root = curBox = CssBox.CreateBlock(); } var abox = CssBox.CreateBox(curBox); abox.Text = text; } var tag = ParseHtmlTag(tagmatch.Value); if (tag.IsClosing) { // handle tags that have no content but whitespace if (emptyText && curBox != null && curBox.Boxes.Count == 0 && !string.IsNullOrEmpty(text)) { var abox = CssBox.CreateBox(curBox); abox.Text = " "; } // need to find the parent tag to go one level up curBox = DomUtils.FindParent(root, tag.Name, curBox); } else if (tag.IsSingle) { // the current box is not changed new CssBox(curBox, tag); } else { // go one level down, make the new box the current box curBox = new CssBox(curBox, tag); } if (root == null && curBox != null) { root = curBox; root.Display = CssConstants.Block; } lastEnd = tagmatch.Index + tagmatch.Length - 1; } if (root == null) { root = CssBox.CreateBlock(); var abox = CssBox.CreateBox(root); abox.Text = document; } else if (lastEnd < document.Length) { var endText = document.Substring(lastEnd + 1); if (!string.IsNullOrEmpty(endText.Trim())) { var abox = CssBox.CreateBox(root); abox.Text = endText; } } return(root); }
/// <summary> /// Parse raw html tag source to <seealso cref="HtmlTag"/> object.<br/> /// Extract attributes found on the tag. /// </summary> /// <param name="source">the html tag to parse</param> private static HtmlTag ParseHtmlTag(string source) { source = source.Substring(1, source.Length - (source.Length > 2 && source[source.Length - 2] == '/' ? 3 : 2)); int spaceIndex = source.IndexOf(" "); //Extract tag name string tagName; if (spaceIndex < 0) { tagName = source; } else { tagName = source.Substring(0, spaceIndex); } //Check if is end tag bool isClosing = false; if (tagName.StartsWith("/")) { isClosing = true; tagName = tagName.Substring(1); } tagName = tagName.ToLower(); //Extract attributes var attributes = new Dictionary <string, string>(); var atts = RegexParserUtils.Match(RegexParserUtils.HmlTagAttributes, source); foreach (Match att in atts) { if (!att.Value.Contains(@"=")) { if (!attributes.ContainsKey(att.Value)) { attributes.Add(att.Value.ToLower(), string.Empty); } } else { //Extract attribute and value string[] chunks = new string[2]; chunks[0] = att.Value.Substring(0, att.Value.IndexOf('=')); chunks[1] = att.Value.Substring(att.Value.IndexOf('=') + 1); string attname = chunks[0].Trim(); string attvalue = chunks[1].Trim(); if (attvalue.Length > 2 && ((attvalue.StartsWith("\"") && attvalue.EndsWith("\"")) || (attvalue.StartsWith("\'") && attvalue.EndsWith("\'")))) { attvalue = attvalue.Substring(1, attvalue.Length - 2); } if (!attributes.ContainsKey(attname)) { attributes.Add(attname, attvalue); } } } return(new HtmlTag(tagName, attributes, isClosing)); }