private bool CheckHeading(string trimmedLine) { if (trimmedLine[0] == '#' && _characterStream.State == MarkdownStates.ParagraphBreak) { var level = 1; while (level < 6 && trimmedLine.Length > level && trimmedLine[level] == '#') { level++; } var start = level; while (trimmedLine.Length > start && char.IsWhiteSpace(trimmedLine[start])) { start++; } if (trimmedLine.Length > start) { var heading = new HeadingElement { Name = new String('#', level), Level = level }; PushElement(heading); ParseText(trimmedLine.Substring(start)); EndSection(); } return(true); } return(false); }
private bool CheckSingleUnderline(string rawLine) { if (rawLine.Length > 1 && rawLine.All(c => c == '-') && _characterStream.State == MarkdownStates.Paragraph) { var currentElement = CurrentElement as ParagraphElement; if (currentElement != null) { CurrentElement = new HeadingElement { Name = rawLine, Level = 2, Children = currentElement.Children } } ; EndSection(); return(true); } return(false); }
/// <summary> /// Parses an opening tag like '<div>'. Starts with the input stream /// pointing to the opening < character. /// </summary> private Element ProcessTagOpenState(Element currentElement) { if (!_characterStream.MoveNext()) { _document.ConformanceLevel *= 0.8f; return(null); } switch (_characterStream.Current) { case '!': _characterStream.State = HtmlStates.MarkupDeclarationOpen; return(currentElement); case '/': _characterStream.State = HtmlStates.EndTagOpen; return(currentElement); } var startPosition = _characterStream.CurrentPosition; string tagName = null; var selfClosing = false; using (var nameBuffer = _stringBuilderFactory.Create(_maximumNameLength)) { char?terminator; _characterStream.State = HtmlStates.TagName; if (_stringParser.TakeUntil(nameBuffer, _maximumNameLength, c => char.IsWhiteSpace(c) || c == '>' || c == '/', out terminator)) { tagName = nameBuffer.ToString().ToLower(); if (!terminator.HasValue || terminator == '/') { _characterStream.State = HtmlStates.SelfClosingStartTag; if (_stringParser.Peek() == '>') { _stringParser.TakeOne(); } else { _document.ConformanceLevel *= 0.9f; } } else if (terminator == '>') { _characterStream.State = _voidElements.Contains(tagName) ? HtmlStates.SelfClosingStartTag : HtmlStates.Data; } else { _characterStream.State = HtmlStates.AttributeName; selfClosing = _voidElements.Contains(tagName); } } else { _document.ConformanceLevel *= 0.9f; nameBuffer.Clear(); _stringParser.Take(nameBuffer, _maximumNameLength); var buffer = nameBuffer.ToString().ToLower(); _characterStream.State = HtmlStates.Data; foreach (var name in _allElements) { if (buffer.StartsWith(name)) { nameBuffer.Clear(); _characterStream.Reset(startPosition); _stringParser.Take(nameBuffer, name.Length); tagName = nameBuffer.ToString(); _characterStream.State = HtmlStates.AttributeName; selfClosing = _voidElements.Contains(tagName); break; } } if (tagName == null) { _document.ConformanceLevel *= 0.6f; } } } var attributes = _characterStream.State == HtmlStates.AttributeName ? ParseAttributes(selfClosing) : null; if (currentElement != null) { var parentElement = currentElement; switch (tagName) { case "html": case "body": case "form": case "header": case "footer": // These elements are not parsed and contain no details. They are included in the output // only as containers for their children currentElement = new UnsupportedElement { Attributes = attributes, SuppressOutput = false }; break; case "p": case "li": // These elements are treated as paragraphs in other markup formats, for // example in markdown they will have a blank line above them to create a // paragraph break. currentElement = new ParagraphElement { Attributes = attributes }; break; case "blockquote": currentElement = new ParagraphElement { Attributes = attributes, Styles = new Dictionary <string, string> { { "margin-top", "10px" }, { "margin-bottom", "10px" }, { "margin-left", "50px" }, { "padding-left", "15px" }, { "border-left", "3px solid #ccc" } } }; break; case "div": // Divs are tricky because some pwople use them to group elements with similar // style and other people used them instead of paragraphs. Since divs are by // default block elements it makes more sense in most cases to treat them link // paragraphs unless they have paraphraphs or other divs within them. currentElement = new ContainerElement { ContainerType = ContainerTypes.Division, Attributes = attributes }; break; case "span": // These elements are treated as inline text. For example in markdown // these are rendered without an extra blank line and are therefore rendered // as part of the prior paragraph currentElement = new SpanElement { Attributes = attributes }; break; case "a": // Anchor tags are a special case if (attributes != null && attributes.ContainsKey("href")) { currentElement = new AnchorElement { LinkAddress = attributes["href"] } } ; else { currentElement = new UnsupportedElement { Attributes = attributes } }; break; case "iframe": case "img": // Image tags are a special case if (attributes != null && attributes.ContainsKey("src")) { var alt = attributes.ContainsKey("alt") ? attributes["alt"] : null; currentElement = new ImageElement { LinkAddress = attributes["src"], AltText = alt }; } else { currentElement = new UnsupportedElement { Attributes = attributes }; } break; case "h1": currentElement = new HeadingElement { Level = 1 }; break; case "h2": currentElement = new HeadingElement { Level = 2 }; break; case "h3": currentElement = new HeadingElement { Level = 3 }; break; case "h4": currentElement = new HeadingElement { Level = 4 }; break; case "h5": currentElement = new HeadingElement { Level = 5 }; break; case "h6": currentElement = new HeadingElement { Level = 6 }; break; case "strong": case "b": // Bold is represented as an inline style currentElement = new FormattedElement { ElementType = ElementTypes.InlineText, Styles = new Dictionary <string, string> { { "font-weight", "bold" } } }; break; case "cite": case "q": case "i": case "em": // Italic is represented as an inline style currentElement = new FormattedElement { ElementType = ElementTypes.InlineText, Styles = new Dictionary <string, string> { { "font-style", "italic" } } }; break; case "u": // Underline is represented as an inline style currentElement = new FormattedElement { ElementType = ElementTypes.InlineText, Styles = new Dictionary <string, string> { { "text-decoration", "underline" } } }; break; case "small": // Small is represented as an inline style currentElement = new FormattedElement { ElementType = ElementTypes.InlineText, Styles = new Dictionary <string, string> { { "font-size", "smaller" } } }; break; case "sup": // Superscript is represented as an inline style currentElement = new FormattedElement { ElementType = ElementTypes.InlineText, Styles = new Dictionary <string, string> { { "vertical-align", "super" }, { "font-size", "smaller" } } }; break; case "sub": // Subscript is represented as an inline style currentElement = new FormattedElement { ElementType = ElementTypes.InlineText, Styles = new Dictionary <string, string> { { "vertical-align", "sub" }, { "font-size", "smaller" } } }; break; case "br": currentElement = new BreakElement { BreakType = BreakTypes.LineBreak }; break; case "hr": currentElement = new BreakElement { BreakType = BreakTypes.HorizontalRule }; break; case "ul": currentElement = new ContainerElement { ContainerType = ContainerTypes.BulletList, Attributes = attributes }; break; case "ol": currentElement = new ContainerElement { ContainerType = ContainerTypes.NumberedList, Attributes = attributes }; break; case "table": currentElement = new ContainerElement { ContainerType = ContainerTypes.Table, Attributes = attributes }; break; case "tr": currentElement = new ContainerElement { ContainerType = ContainerTypes.TableDataRow, Attributes = attributes }; break; case "th": currentElement = new ContainerElement { ContainerType = ContainerTypes.TableHeaderRow, Attributes = attributes }; break; case "td": currentElement = new ContainerElement { ContainerType = ContainerTypes.TableDataCell, Attributes = attributes }; break; default: // All other elements will be excluded from the output document, but will // be parsed just so that we know where they and and the next valid element // begins. currentElement = new UnsupportedElement { Attributes = attributes }; break; } var styleElement = currentElement as IStyleElement; if (styleElement != null && attributes != null) { if (attributes.ContainsKey("class")) { styleElement.ClassNames = attributes["class"]; attributes.Remove("class"); } if (attributes.ContainsKey("style")) { if (styleElement.Styles == null) { styleElement.Styles = new Dictionary <string, string>(); } var styles = attributes["style"].Split(';').Select(s => s.Trim()).Where(s => s.Length > 0); foreach (var style in styles) { var colonPos = style.IndexOf(':'); if (colonPos > 0 && colonPos < style.Length - 1) { var name = style.Substring(0, colonPos).Trim().ToLower(); var value = style.Substring(colonPos + 1).Trim().ToLower(); if (!styleElement.Styles.ContainsKey(name)) { styleElement.Styles[name] = value; } } else { _document.ConformanceLevel *= 0.9f; } } attributes.Remove("style"); } } currentElement.Name = tagName; currentElement.Parent = parentElement; if (parentElement.SuppressOutput) { currentElement.SuppressOutput = true; } if (parentElement.Children == null) { parentElement.Children = new List <IDocumentElement>(); } parentElement.Children.Add(currentElement); if (!Begin(currentElement)) { return(null); } } return(currentElement); }