private bool CheckHeading(string trimmedLine)
        {
            if (trimmedLine[0] == '#' &&
                _characterStream.State == MarkdownStates.ParagraphBreak)
            {
                var level = 1;
                while (level < 6 && trimmedLine.Length > level && trimmedLine[level] == '#')
                {
                    level++;
                }

                var start = level;
                while (trimmedLine.Length > start && char.IsWhiteSpace(trimmedLine[start]))
                {
                    start++;
                }

                if (trimmedLine.Length > start)
                {
                    var heading = new HeadingElement {
                        Name = new String('#', level), Level = level
                    };
                    PushElement(heading);
                    ParseText(trimmedLine.Substring(start));
                    EndSection();
                }

                return(true);
            }
            return(false);
        }
 private bool CheckSingleUnderline(string rawLine)
 {
     if (rawLine.Length > 1 &&
         rawLine.All(c => c == '-') &&
         _characterStream.State == MarkdownStates.Paragraph)
     {
         var currentElement = CurrentElement as ParagraphElement;
         if (currentElement != null)
         {
             CurrentElement = new HeadingElement {
                 Name = rawLine, Level = 2, Children = currentElement.Children
             }
         }
         ;
         EndSection();
         return(true);
     }
     return(false);
 }
예제 #3
0
        /// <summary>
        /// Parses an opening tag like '&lt;div>'. Starts with the input stream
        /// pointing to the opening &lt; character.
        /// </summary>
        private Element ProcessTagOpenState(Element currentElement)
        {
            if (!_characterStream.MoveNext())
            {
                _document.ConformanceLevel *= 0.8f;
                return(null);
            }

            switch (_characterStream.Current)
            {
            case '!':
                _characterStream.State = HtmlStates.MarkupDeclarationOpen;
                return(currentElement);

            case '/':
                _characterStream.State = HtmlStates.EndTagOpen;
                return(currentElement);
            }

            var startPosition = _characterStream.CurrentPosition;

            string tagName     = null;
            var    selfClosing = false;

            using (var nameBuffer = _stringBuilderFactory.Create(_maximumNameLength))
            {
                char?terminator;
                _characterStream.State = HtmlStates.TagName;
                if (_stringParser.TakeUntil(nameBuffer, _maximumNameLength, c => char.IsWhiteSpace(c) || c == '>' || c == '/', out terminator))
                {
                    tagName = nameBuffer.ToString().ToLower();
                    if (!terminator.HasValue || terminator == '/')
                    {
                        _characterStream.State = HtmlStates.SelfClosingStartTag;
                        if (_stringParser.Peek() == '>')
                        {
                            _stringParser.TakeOne();
                        }
                        else
                        {
                            _document.ConformanceLevel *= 0.9f;
                        }
                    }
                    else if (terminator == '>')
                    {
                        _characterStream.State = _voidElements.Contains(tagName)
                            ? HtmlStates.SelfClosingStartTag
                            : HtmlStates.Data;
                    }
                    else
                    {
                        _characterStream.State = HtmlStates.AttributeName;
                        selfClosing            = _voidElements.Contains(tagName);
                    }
                }
                else
                {
                    _document.ConformanceLevel *= 0.9f;
                    nameBuffer.Clear();
                    _stringParser.Take(nameBuffer, _maximumNameLength);
                    var buffer = nameBuffer.ToString().ToLower();
                    _characterStream.State = HtmlStates.Data;
                    foreach (var name in _allElements)
                    {
                        if (buffer.StartsWith(name))
                        {
                            nameBuffer.Clear();
                            _characterStream.Reset(startPosition);
                            _stringParser.Take(nameBuffer, name.Length);
                            tagName = nameBuffer.ToString();
                            _characterStream.State = HtmlStates.AttributeName;
                            selfClosing            = _voidElements.Contains(tagName);
                            break;
                        }
                    }
                    if (tagName == null)
                    {
                        _document.ConformanceLevel *= 0.6f;
                    }
                }
            }

            var attributes = _characterStream.State == HtmlStates.AttributeName ? ParseAttributes(selfClosing) : null;

            if (currentElement != null)
            {
                var parentElement = currentElement;

                switch (tagName)
                {
                case "html":
                case "body":
                case "form":
                case "header":
                case "footer":
                    // These elements are not parsed and contain no details. They are included in the output
                    // only as containers for their children
                    currentElement = new UnsupportedElement {
                        Attributes = attributes, SuppressOutput = false
                    };
                    break;

                case "p":
                case "li":
                    // These elements are treated as paragraphs in other markup formats, for
                    // example in markdown they will have a blank line above them to create a
                    // paragraph break.
                    currentElement = new ParagraphElement {
                        Attributes = attributes
                    };
                    break;

                case "blockquote":
                    currentElement = new ParagraphElement
                    {
                        Attributes = attributes,
                        Styles     = new Dictionary <string, string>
                        {
                            { "margin-top", "10px" },
                            { "margin-bottom", "10px" },
                            { "margin-left", "50px" },
                            { "padding-left", "15px" },
                            { "border-left", "3px solid #ccc" }
                        }
                    };
                    break;

                case "div":
                    // Divs are tricky because some pwople use them to group elements with similar
                    // style and other people used them instead of paragraphs. Since divs are by
                    // default block elements it makes more sense in most cases to treat them link
                    // paragraphs unless they have paraphraphs or other divs within them.
                    currentElement = new ContainerElement {
                        ContainerType = ContainerTypes.Division, Attributes = attributes
                    };
                    break;

                case "span":
                    // These elements are treated as inline text. For example in markdown
                    // these are rendered without an extra blank line and are therefore rendered
                    // as part of the prior paragraph
                    currentElement = new SpanElement {
                        Attributes = attributes
                    };
                    break;

                case "a":
                    // Anchor tags are a special case
                    if (attributes != null && attributes.ContainsKey("href"))
                    {
                        currentElement = new AnchorElement {
                            LinkAddress = attributes["href"]
                        }
                    }
                    ;
                    else
                    {
                        currentElement = new UnsupportedElement {
                            Attributes = attributes
                        }
                    };
                    break;

                case "iframe":
                case "img":
                    // Image tags are a special case
                    if (attributes != null && attributes.ContainsKey("src"))
                    {
                        var alt = attributes.ContainsKey("alt") ? attributes["alt"] : null;
                        currentElement = new ImageElement {
                            LinkAddress = attributes["src"], AltText = alt
                        };
                    }
                    else
                    {
                        currentElement = new UnsupportedElement {
                            Attributes = attributes
                        };
                    }
                    break;

                case "h1":
                    currentElement = new HeadingElement {
                        Level = 1
                    };
                    break;

                case "h2":
                    currentElement = new HeadingElement {
                        Level = 2
                    };
                    break;

                case "h3":
                    currentElement = new HeadingElement {
                        Level = 3
                    };
                    break;

                case "h4":
                    currentElement = new HeadingElement {
                        Level = 4
                    };
                    break;

                case "h5":
                    currentElement = new HeadingElement {
                        Level = 5
                    };
                    break;

                case "h6":
                    currentElement = new HeadingElement {
                        Level = 6
                    };
                    break;

                case "strong":
                case "b":
                    // Bold is represented as an inline style
                    currentElement = new FormattedElement
                    {
                        ElementType = ElementTypes.InlineText,
                        Styles      = new Dictionary <string, string>
                        {
                            { "font-weight", "bold" }
                        }
                    };
                    break;

                case "cite":
                case "q":
                case "i":
                case "em":
                    // Italic is represented as an inline style
                    currentElement = new FormattedElement
                    {
                        ElementType = ElementTypes.InlineText,
                        Styles      = new Dictionary <string, string>
                        {
                            { "font-style", "italic" }
                        }
                    };
                    break;

                case "u":
                    // Underline is represented as an inline style
                    currentElement = new FormattedElement
                    {
                        ElementType = ElementTypes.InlineText,
                        Styles      = new Dictionary <string, string>
                        {
                            { "text-decoration", "underline" }
                        }
                    };
                    break;

                case "small":
                    // Small is represented as an inline style
                    currentElement = new FormattedElement
                    {
                        ElementType = ElementTypes.InlineText,
                        Styles      = new Dictionary <string, string>
                        {
                            { "font-size", "smaller" }
                        }
                    };
                    break;

                case "sup":
                    // Superscript is represented as an inline style
                    currentElement = new FormattedElement
                    {
                        ElementType = ElementTypes.InlineText,
                        Styles      = new Dictionary <string, string>
                        {
                            { "vertical-align", "super" },
                            { "font-size", "smaller" }
                        }
                    };
                    break;

                case "sub":
                    // Subscript is represented as an inline style
                    currentElement = new FormattedElement
                    {
                        ElementType = ElementTypes.InlineText,
                        Styles      = new Dictionary <string, string>
                        {
                            { "vertical-align", "sub" },
                            { "font-size", "smaller" }
                        }
                    };
                    break;

                case "br":
                    currentElement = new BreakElement {
                        BreakType = BreakTypes.LineBreak
                    };
                    break;

                case "hr":
                    currentElement = new BreakElement {
                        BreakType = BreakTypes.HorizontalRule
                    };
                    break;

                case "ul":
                    currentElement = new ContainerElement {
                        ContainerType = ContainerTypes.BulletList, Attributes = attributes
                    };
                    break;

                case "ol":
                    currentElement = new ContainerElement {
                        ContainerType = ContainerTypes.NumberedList, Attributes = attributes
                    };
                    break;

                case "table":
                    currentElement = new ContainerElement {
                        ContainerType = ContainerTypes.Table, Attributes = attributes
                    };
                    break;

                case "tr":
                    currentElement = new ContainerElement {
                        ContainerType = ContainerTypes.TableDataRow, Attributes = attributes
                    };
                    break;

                case "th":
                    currentElement = new ContainerElement {
                        ContainerType = ContainerTypes.TableHeaderRow, Attributes = attributes
                    };
                    break;

                case "td":
                    currentElement = new ContainerElement {
                        ContainerType = ContainerTypes.TableDataCell, Attributes = attributes
                    };
                    break;

                default:
                    // All other elements will be excluded from the output document, but will
                    // be parsed just so that we know where they and and the next valid element
                    // begins.
                    currentElement = new UnsupportedElement {
                        Attributes = attributes
                    };
                    break;
                }

                var styleElement = currentElement as IStyleElement;
                if (styleElement != null && attributes != null)
                {
                    if (attributes.ContainsKey("class"))
                    {
                        styleElement.ClassNames = attributes["class"];
                        attributes.Remove("class");
                    }
                    if (attributes.ContainsKey("style"))
                    {
                        if (styleElement.Styles == null)
                        {
                            styleElement.Styles = new Dictionary <string, string>();
                        }
                        var styles = attributes["style"].Split(';').Select(s => s.Trim()).Where(s => s.Length > 0);
                        foreach (var style in styles)
                        {
                            var colonPos = style.IndexOf(':');
                            if (colonPos > 0 && colonPos < style.Length - 1)
                            {
                                var name  = style.Substring(0, colonPos).Trim().ToLower();
                                var value = style.Substring(colonPos + 1).Trim().ToLower();
                                if (!styleElement.Styles.ContainsKey(name))
                                {
                                    styleElement.Styles[name] = value;
                                }
                            }
                            else
                            {
                                _document.ConformanceLevel *= 0.9f;
                            }
                        }
                        attributes.Remove("style");
                    }
                }

                currentElement.Name   = tagName;
                currentElement.Parent = parentElement;
                if (parentElement.SuppressOutput)
                {
                    currentElement.SuppressOutput = true;
                }

                if (parentElement.Children == null)
                {
                    parentElement.Children = new List <IDocumentElement>();
                }
                parentElement.Children.Add(currentElement);

                if (!Begin(currentElement))
                {
                    return(null);
                }
            }

            return(currentElement);
        }