/// <summary> /// Parses an AOML string into open, close and content nodes /// </summary> /// <param name="aoml">The aoml string to be parsed</param> public NodeCollection Parse(string aoml) { NodeCollection nodes = new NodeCollection(); // Create regular expression instance if (this.regex == null) { this.regex = new Regex( "[<]\\s*(?<closer>[/]?)\\s*" + "(?<name>[a-z]+)" + "(\\s+((?<attribute>[a-z_\\-]+)\\s*" + "(=\\s*(" + /**/ (Mode == ParserMode.Strict ? "" : "(?<value>[^\\s<>'\"]+)|") + /**/ "[\"](?<value>[^\"]*)[\"]|" + /**/ (Mode == ParserMode.Compatibility ? "['](?<value>[^'>]*)[']?)|" : "['](?<value>[^']*)['])|") + /**/ "(?<value>)" + ")?)\\s*)*" + "\\s*(?<closed>[/]?)[>]", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Multiline); } // Some setup int regexCloser = this.regex.GroupNumberFromName("closer"); int regexName = this.regex.GroupNumberFromName("name"); int regexAttribute = this.regex.GroupNumberFromName("attribute"); int regexValue = this.regex.GroupNumberFromName("value"); int regexClosed = this.regex.GroupNumberFromName("closed"); // Convert to unix line endings aoml = aoml.Replace("\r\n", "\n"); // Parse AOML MatchCollection matches = this.regex.Matches(aoml); int offset = 0; foreach (Match match in matches) { // Extract content if (match.Index > offset) { string content = aoml.Substring(offset, match.Index - offset); AddContent(nodes, content, content); } offset = match.Index + match.Length; // Extract tag string raw = match.Groups[0].Value; string name = match.Groups[regexName].Value; if (string.IsNullOrEmpty(name)) { // No name, let's assume it's just text AddContent(nodes, raw, raw); continue; } bool closer = !string.IsNullOrEmpty(match.Groups[regexCloser].Value); bool closed = !string.IsNullOrEmpty(match.Groups[regexClosed].Value); if (closer) { // Closing tag nodes.Add(new CloseNode(name, raw)); } else { // Opening tag OpenNode node = new OpenNode(name, raw, closed); // Add attributes for (int i = 0; i < match.Groups[regexAttribute].Captures.Count; i++) { node.AddAttribute( match.Groups[regexAttribute].Captures[i].Value, match.Groups[regexValue].Captures[i].Value); } nodes.Add(node); } } // Remainder is content if (offset < aoml.Length) { string content = aoml.Substring(offset); AddContent(nodes, content, content); } return(nodes); }
/// <summary> /// Sanitizes the stream of nodes into exclusively valid AOML. /// This process involves changing and removing nodes. /// </summary> public void Sanitize(NodeCollection nodes) { if (nodes == null) { throw new ArgumentNullException(); } // Loop through all nodes nodes.Reset(); Node node = null; while ((node = nodes.Next()) != null) { // Skip content nodes if (node.Type == NodeType.Content) { continue; } // Gather info OpenNode openNode = null; CloseNode closeNode = null; string name = null; if (node.Type == NodeType.Open) { openNode = (OpenNode)node; name = openNode.Name; // Check attributes for (int i = 0; i < openNode.Count; i++) { string attr = openNode.GetAttributeName(i); if (!validAttributes.Contains(attr)) { openNode.RemoveAttribute(attr); i--; } } } else if (node.Type == NodeType.Close) { closeNode = (CloseNode)node; name = closeNode.Name; } // Singular elements if (singularElements.Contains(name)) { // Singular elements don't have closing nodes if (closeNode != null) { nodes.Remove(closeNode); } // Singular elements are always self-closing if (openNode != null) { openNode.Closed = true; } continue; } else if (inlineElements.Contains(name) || blockElements.Contains(name)) { if (openNode == null) { continue; } // Let's not use self-closing elements here if (openNode.Closed) { openNode.Closed = false; nodes.InsertAfter(openNode, new CloseNode(name, "")); } continue; } // Replace node as content node if (this.InvalidElementsToContent) { // Replace node as content this.ReplaceContent(nodes, node, node.Raw, node.Raw); } else { // Remove node this.ReplaceContent(nodes, node, "", ""); } } // And we're done! nodes.Reset(); }
/// <summary> /// Balances out the stream of nodes to form a valid tree. /// This process will always try to resolve conflicts by introducing extra inline elements over block elements. /// </summary> public void Balance(NodeCollection nodes) { if (nodes == null) { throw new ArgumentNullException(); } // Fill in missing elements List <string> nameStack = new List <string>(); nodes.Reset(); Node node = null; while ((node = nodes.Next()) != null) { switch (node.Type) { case NodeType.Open: OpenNode openNode = (OpenNode)node; if (openNode.Closed) { continue; } nameStack.Insert(0, openNode.Name); break; case NodeType.Close: CloseNode closeNode = (CloseNode)node; if (!nameStack.Contains(closeNode.Name)) { // Found CloseNode without OpenNode // Fix by insert an OpenNode before it nodes.InsertBefore( closeNode, new OpenNode(closeNode.Name, "", false)); } nameStack.Remove(closeNode.Name); break; } } while (nameStack.Count > 0) { // Found OpenNode without CloseNode // Fix by adding a CloseNode at the end of the stream string name = nameStack[0]; nameStack.RemoveAt(0); nodes.Add(new CloseNode(name, "")); } // Resolve tree structure List <OpenNode> nodeStack = new List <OpenNode>(); nodes.Reset(); while ((node = nodes.Next()) != null) { switch (node.Type) { case NodeType.Open: OpenNode on = (OpenNode)node; if (on.Closed) { continue; } nodeStack.Insert(0, on); break; case NodeType.Close: // A bit of setup CloseNode closeNode = (CloseNode)node; bool block = blockElements.Contains(closeNode.Name); // Find matching OpenNode OpenNode openNode = null; foreach (OpenNode o in nodeStack) { if (o.Name == closeNode.Name) { openNode = o; break; } } if (openNode == null) { throw new InvalidOperationException("Unable to find matching OpenNode to CloseNode"); } // Handle incorrect balance int offset = 0; while (nodeStack[offset].Name != closeNode.Name) { OpenNode n = nodeStack[offset]; if (block) { nodes.InsertBefore(node, new CloseNode(n.Name, "")); nodes.InsertAfter(node, n.Clone()); nodeStack.RemoveAt(offset); offset--; } else { nodes.InsertBefore(n, closeNode.Clone()); nodes.InsertAfter(n, openNode.Clone()); } offset++; } // All fixed nodeStack.RemoveAt(offset); break; } } // And we're done! nodes.Reset(); }
public Element Parse(string aoml) { ContainerElement container = new ContainerElement(); // Parse AOML NodeCollection nodes = this.Parser.Parse(aoml); this.Parser.Sanitize(nodes); this.Parser.Balance(nodes); // Transform AOML into a DOM tree Stack <Element> elements = new Stack <Element>(); elements.Push(container); foreach (Node node in nodes) { Element element = null; switch (node.Type) { case NodeType.Content: // Content doesn't introduce a new level // We'll simply add it as child at the current depth ContentNode content = (ContentNode)node; element = new TextElement(Web.UnescapeHtml(content.Value)); elements.Peek().Children.Add(element); break; case NodeType.Open: OpenNode open = (OpenNode)node; switch (open.Name) { case "br": // Singular linebreak element element = new BreakElement(); elements.Peek().Children.Add(element); break; case "img": // Singular image element element = this.CreateImageElement(open); if (element != null) { elements.Peek().Children.Add(element); } break; case "font": element = this.CreateFontElement(open); elements.Peek().Children.Add(element); if (!open.Closed) { elements.Push(element); } break; case "a": element = this.CreateLinkElement(open); elements.Peek().Children.Add(element); if (!open.Closed) { elements.Push(element); } break; case "u": element = new UnderlineElement(); elements.Peek().Children.Add(element); if (!open.Closed) { elements.Push(element); } break; case "i": element = new ItalicElement(); elements.Peek().Children.Add(element); if (!open.Closed) { elements.Push(element); } break; case "center": case "left": case "right": case "div": Alignment alignment = Alignment.Inherit; if (open.Name == "center" || open.GetAttribute("align") == "center") { alignment = Alignment.Center; } else if (open.Name == "left" || open.GetAttribute("align") == "left") { alignment = Alignment.Left; } else if (open.Name == "right" || open.GetAttribute("align") == "right") { alignment = Alignment.Right; } element = new AlignElement(alignment); elements.Peek().Children.Add(element); if (!open.Closed) { elements.Push(element); } break; default: throw new ArgumentException("Unexpected tag: " + open.Name); } break; case NodeType.Close: // Closing a node means consuming an element from the stack // But first check whether the node matches the element on the stack CloseNode close = (CloseNode)node; switch (close.Name) { case "br": throw new ArgumentException("Unexpected 'br' closing tag"); case "img": throw new ArgumentException("Unexpected 'img' closing tag"); case "font": if (elements.Peek().Type == ElementType.Color) { break; } if (elements.Peek().Type == ElementType.Container) { break; } throw new ArgumentException("Unexpected 'font' closing tag"); case "a": if (elements.Peek().Type == ElementType.Link) { break; } if (elements.Peek().Type == ElementType.Container) { break; } throw new ArgumentException("Unexpected 'a' closing tag"); case "u": if (elements.Peek().Type == ElementType.Underline) { break; } throw new ArgumentException("Unexpected 'u' closing tag"); case "i": if (elements.Peek().Type == ElementType.Italic) { break; } throw new ArgumentException("Unexpected 'i' closing tag"); case "div": case "center": case "left": case "right": if (elements.Peek().Type == ElementType.Align) { break; } throw new ArgumentException("Unexpected '" + close.Name + "' closing tag"); default: throw new ArgumentException("Unexpected tag: " + close.Name); } if (elements.Count <= 1) { throw new ArgumentException("Unexpected closing tag"); } // Go 1 step back down in the tree elements.Pop(); break; default: throw new InvalidOperationException("Unexpected node type"); } } // Let's report our progress if (container.Children.Count == 0) { return(null); } if (container.Children.Count == 1) { return(container.Children.ToArray()[0]); } return(container); }