//
        // methods
        //

        #region public IPDFComponent GetComponent(IContentParser parser, string name)

        /// <summary>
        /// Returns a new component for the parser based on the specified tag name.
        /// </summary>
        /// <param name="parser"></param>
        /// <param name="name"></param>
        /// <returns>The instaniated component or null if the name is not recognised</returns>
        public IPDFComponent GetComponent(IHtmlContentParser parser, string name, out HtmlComponentType type)
        {
            IPDFComponent           proxy = null;
            IParserComponentFactory innerfact;

            if (null != _last && _lastName == name)
            {
                proxy = _last.GetComponent(parser, name, out type);
            }
            else if (_knowntags.TryGetValue(name, out innerfact))
            {
                _last     = innerfact;
                _lastName = name;
                proxy     = innerfact.GetComponent(parser, name, out type);
            }
            else
            {
                _last     = null;
                _lastName = null;
                type      = HtmlComponentType.Unknown;
                proxy     = GetUnknownComponent(parser, name);
            }

            if (proxy is Component)
            {
                ((Component)proxy).Tag = name;
            }
            return(proxy);
        }
 /// <summary>
 /// Creates a new instance of of the ParserResult struct
 /// </summary>
 /// <param name="parsed"></param>
 /// <param name="offsetStart"></param>
 /// <param name="offsetEnd"></param>
 /// <param name="start"></param>
 public HTMLParserResult(IPDFComponent parsed, HtmlComponentType type, string value, int offsetStart, int offsetEnd, bool start)
 {
     _value   = value;
     _parsed  = parsed;
     _type    = type;
     _start   = offsetStart;
     _end     = offsetEnd;
     _isStart = start;
     _valid   = true;
 }
        /// <summary>
        /// Returns true if the current cursor is over a CDATA, Processing Instruction, DocType or Comment section.
        /// </summary>
        /// <param name="isCdata"></param>
        /// <returns></returns>
        protected bool IsNonHtmlTag(out HtmlComponentType type, out string terminator)
        {
            //CData is <![CDATA[ ... content ... ]]>
            //Comment is <!-- ... content ... -->
            //Processing Instruction is <? .... instruction ... ?>
            //Doc Type is <!DOCTYPE ... type ... >

            //Quick check on second char first
            if (this.Source.Offset + 1 < this.Source.Length)
            {
                if (this.Source.Peek(1) == HTMLCommentSecondChar) //second char for comment, doctype and CData are the same.)
                {
                    //Full check on comment string
                    if (this.Source.Offset + HTMLCommentStart.Length < this.Source.Length &&
                        this.Source.Substring(HTMLCommentStart.Length) == HTMLCommentStart)
                    {
                        this.Source.Offset += HTMLCommentStart.Length;
                        type       = HtmlComponentType.Comment;
                        terminator = HTMLCommentEnd;
                        return(true);
                    }
                    //Full check on CDATA string
                    if (this.Source.Offset + HTMLCDATAStart.Length < this.Source.Length && this.Source.Substring(HTMLCDATAStart.Length) == HTMLCDATAStart)
                    {
                        this.Source.Offset += HTMLCDATAStart.Length;
                        type       = HtmlComponentType.CData;
                        terminator = HTMLCDATAEnd;
                        return(true);
                    }

                    if (this.Source.Offset + HTMLDocTypeStart.Length < this.Source.Length && this.Source.Substring(HTMLDocTypeStart.Length) == HTMLDocTypeStart)
                    {
                        this.Source.Offset += HTMLDocTypeStart.Length;
                        type       = HtmlComponentType.DocType;
                        terminator = HTMLDocTypeEnd;
                        return(true);
                    }
                }
                else if (this.Source.Offset + HTMLProcessingInstructionStart.Length < this.Source.Length && this.Source.Substring(HTMLProcessingInstructionStart.Length) == HTMLProcessingInstructionStart)
                {
                    this.Source.Offset += HTMLProcessingInstructionStart.Length;
                    type       = HtmlComponentType.ProcessingInstruction;
                    terminator = HTMLProcessingInstructionEnd;
                    return(true);
                }
            }
            type       = HtmlComponentType.None;
            terminator = string.Empty;
            return(false);
        }
        protected virtual bool ShouldSkipHtmlType(HtmlComponentType type)
        {
            switch (type)
            {
            case HtmlComponentType.DocType:
                return(this.Parser.Settings.SkipDocType);

            case HtmlComponentType.Comment:
                return(this.Parser.Settings.SkipComments);

            case HtmlComponentType.ProcessingInstruction:
                return(this.Parser.Settings.SkipProcessingInstructions);

            case HtmlComponentType.CData:
                return(this.Parser.Settings.SkipCData);

            case HtmlComponentType.Unknown:
                return(this.Parser.Settings.SkipUnknownTags);

            default:
                return(false);
            }
        }
        /// <summary>
        /// Based on the current source reads in a single HTML tag and all it's attributes.
        /// If it is a known component (based on the parsers component factory) then this will be returned, otherwise null.
        /// </summary>
        /// <param name="name">The actual characters read as the tag name</param>
        /// <param name="autoend">Set to true if this tag / component should always end after it has begun (does not contain other components / tags)</param>
        /// <param name="isEndMarker">Set to true if this is the end marker of another tag (&lt;/span&gt; etc)</param>
        /// <returns>The parsed component or null</returns>
        private IPDFComponent GetCurrentTag(out string name, out HtmlComponentType type, out bool autoend, out bool isEndMarker)
        {
            autoend     = false;
            isEndMarker = false;
            name        = string.Empty;
            this.Source.MoveNext();
            char cur = this.Source.Current;

            if (cur == HTMLEndMarker) //We are parsing a closing tag and want the inner text
            {
                if (this.Source.MoveNext())
                {
                    cur         = this.Source.Current;
                    isEndMarker = true;
                }
                else
                {
                    type = HtmlComponentType.None;
                    return(null);
                }
            }

            this.Buffer.Clear();

            while (!this.Source.EOS)
            {
                cur = this.Source.Current;
                if (cur == HTMLEndMarker || cur == HTMLCloseTag || cur == HTMLWhiteSpace)
                {
                    break;
                }
                else
                {
                    this.Buffer.Append(cur);
                }

                this.Source.MoveNext();
            }

            name = this.Buffer.ToString();

            if (isEndMarker)
            {
                if (this.ParsedPath.Count > 0)
                {
                    int index = this.ParsedPath.IndexOf(name);

                    if (index == this.ParsedPath.Count - 1)
                    {
                        //Last one so just pop it
                        HTMLParserResult prev = this.ParsedPath.Pop();
                        type = prev.Type;
                        return(prev.Parsed);
                    }
                    else if (index > -1)
                    {
                        HTMLParserResult[] tags = this.ParsedPath.PopToTag(name);
                        this.TagsToClose.PushRange(tags);
                        HTMLParserResult prev = this.TagsToClose.Pop();
                        type = prev.Type;
                        return(prev.Parsed);
                    }
                }
                //Orphan end marker
                type = HtmlComponentType.None;
                return(null);
            }
            else if (IsSkippedTag(name))
            {
                string endTag = HTMLStartTag.ToString() + HTMLEndMarker.ToString() + name + HTMLCloseTag.ToString();
                MoveToEnd(endTag);
                type = HtmlComponentType.None;
                return(null);
            }

            IPDFComponent parsed = this._owner.ComponentFactory.GetComponent(this.Parser, name, out type);

            if (!this.Source.EOS)
            {
                if (cur == HTMLWhiteSpace)
                {
                    ReadAttributes(parsed, name);
                }
                else
                {
                    while (cur == HTMLEndMarker || cur == HTMLCloseTag)
                    {
                        if (!this.Source.MoveNext())
                        {
                            break;
                        }
                        cur = this.Source.Current;
                    }

                    if (!this.Source.EOS)
                    {
                        this.Source.MovePrev(); //extra character for looking past marker removed
                    }
                }
            }



            if (null != parsed)
            {
                autoend = !this._owner.ComponentFactory.IsContainerComponent(this.Parser, parsed, name);
            }
            else
            {
                autoend = false;
            }

            return(parsed);
        }
Beispiel #6
0
        public override IPDFComponent GetComponent(IHtmlContentParser parser, string name, out HtmlComponentType type)
        {
            type = HtmlComponentType.Body;
            HTMLBody body = new HTMLBody();

            body.ID = "html-body";
            return(body);
        }
Beispiel #7
0
 public override IPDFComponent GetComponent(IHtmlContentParser parser, string name, out HtmlComponentType type)
 {
     type = HtmlComponentType.Link;
     return(new HTMLAnchor());
 }
Beispiel #8
0
 public override IPDFComponent GetComponent(IHtmlContentParser parser, string name, out HtmlComponentType type)
 {
     type = HtmlComponentType.HorizontalLine;
     return(new HTMLHorizontalRule());
 }
Beispiel #9
0
        public override IPDFComponent GetComponent(IHtmlContentParser parser, string name, out HtmlComponentType type)
        {
            SpanBase span;

            type = HtmlComponentType.Span;
            switch (name.ToLower())
            {
            case "b":
            case "strong":
                span = new HTMLBoldSpan();
                break;

            case "i":
            case "em":
                span = new HTMLItalicSpan();
                break;

            case "u":
                span = new HTMLUnderlinedSpan();
                break;

            case ("font"):
                span = new HTMLFontSpan();
                break;

            default:
                span = new HTMLSpan();
                break;
            }
            return(span);
        }
Beispiel #10
0
 /// <summary>
 /// Abstract method that inheritors implement to create and return specific types of components.
 /// </summary>
 /// <param name="parser"></param>
 /// <param name="name"></param>
 /// <returns></returns>
 public abstract IPDFComponent GetComponent(IHtmlContentParser parser, string name, out HtmlComponentType type);
Beispiel #11
0
 public override IPDFComponent GetComponent(IHtmlContentParser parser, string name, out HtmlComponentType type)
 {
     type = HtmlComponentType.Preformatted;
     return(new HTMLPreformatted());
 }
Beispiel #12
0
 public override IPDFComponent GetComponent(IHtmlContentParser parser, string name, out HtmlComponentType type)
 {
     type = HtmlComponentType.TableCell;
     return(new HTMLTableHeaderCell());
 }
Beispiel #13
0
 public override IPDFComponent GetComponent(IHtmlContentParser parser, string name, out HtmlComponentType type)
 {
     type = HtmlComponentType.Panel;
     return(new HTMLParagraph());
 }
Beispiel #14
0
        public override IPDFComponent GetComponent(IHtmlContentParser parser, string name, out HtmlComponentType type)
        {
            type = HtmlComponentType.Heading;
            HeadingBase head;

            switch (name.ToLower())
            {
            case "h1":
                head = new HTMLHead1();
                break;

            case "h2":
                head = new HTMLHead2();
                break;

            case "h3":
                head = new HTMLHead3();
                break;

            case "h4":
                head = new HTMLHead4();
                break;

            case "h5":
                head = new HTMLHead5();
                break;

            case "h6":
                head = new HTMLHead6();
                break;

            default:
                head = null;
                break;
            }
            return(head);
        }
Beispiel #15
0
 public override IPDFComponent GetComponent(IHtmlContentParser parser, string name, out HtmlComponentType type)
 {
     type = HtmlComponentType.ListItem;
     return(new HTMLListDefinitionItem());
 }
Beispiel #16
0
 public override IPDFComponent GetComponent(IHtmlContentParser parser, string name, out HtmlComponentType type)
 {
     type = HtmlComponentType.List;
     return(new HTMLListOrdered());
 }
Beispiel #17
0
 public override IPDFComponent GetComponent(IHtmlContentParser parser, string name, out HtmlComponentType type)
 {
     type = HtmlComponentType.Image;
     return(new HTMLImage());
 }
Beispiel #18
0
 public override IPDFComponent GetComponent(IHtmlContentParser parser, string name, out HtmlComponentType type)
 {
     type = HtmlComponentType.Panel;
     if (String.Equals("blockquote", name, StringComparison.OrdinalIgnoreCase))
     {
         return(new HTMLBlockQuote());
     }
     else if (String.Equals("fieldset", name, StringComparison.OrdinalIgnoreCase))
     {
         return(new HTMLFieldSet());
     }
     else if (String.Equals("legend", name, StringComparison.OrdinalIgnoreCase))
     {
         return(new HTMLLegend());
     }
     else
     {
         return(new HTMLDiv());
     }
 }