// // methods // #region public IPDFComponent GetComponent(IContentParser parser, string name) /// <summary> /// Returns a new component for the parser based on the specified tag name. /// </summary> /// <param name="parser"></param> /// <param name="name"></param> /// <returns>The instaniated component or null if the name is not recognised</returns> public IPDFComponent GetComponent(IHtmlContentParser parser, string name, out HtmlComponentType type) { IPDFComponent proxy = null; IParserComponentFactory innerfact; if (null != _last && _lastName == name) { proxy = _last.GetComponent(parser, name, out type); } else if (_knowntags.TryGetValue(name, out innerfact)) { _last = innerfact; _lastName = name; proxy = innerfact.GetComponent(parser, name, out type); } else { _last = null; _lastName = null; type = HtmlComponentType.Unknown; proxy = GetUnknownComponent(parser, name); } if (proxy is Component) { ((Component)proxy).Tag = name; } return(proxy); }
/// <summary> /// Creates a new instance of of the ParserResult struct /// </summary> /// <param name="parsed"></param> /// <param name="offsetStart"></param> /// <param name="offsetEnd"></param> /// <param name="start"></param> public HTMLParserResult(IPDFComponent parsed, HtmlComponentType type, string value, int offsetStart, int offsetEnd, bool start) { _value = value; _parsed = parsed; _type = type; _start = offsetStart; _end = offsetEnd; _isStart = start; _valid = true; }
/// <summary> /// Returns true if the current cursor is over a CDATA, Processing Instruction, DocType or Comment section. /// </summary> /// <param name="isCdata"></param> /// <returns></returns> protected bool IsNonHtmlTag(out HtmlComponentType type, out string terminator) { //CData is <![CDATA[ ... content ... ]]> //Comment is <!-- ... content ... --> //Processing Instruction is <? .... instruction ... ?> //Doc Type is <!DOCTYPE ... type ... > //Quick check on second char first if (this.Source.Offset + 1 < this.Source.Length) { if (this.Source.Peek(1) == HTMLCommentSecondChar) //second char for comment, doctype and CData are the same.) { //Full check on comment string if (this.Source.Offset + HTMLCommentStart.Length < this.Source.Length && this.Source.Substring(HTMLCommentStart.Length) == HTMLCommentStart) { this.Source.Offset += HTMLCommentStart.Length; type = HtmlComponentType.Comment; terminator = HTMLCommentEnd; return(true); } //Full check on CDATA string if (this.Source.Offset + HTMLCDATAStart.Length < this.Source.Length && this.Source.Substring(HTMLCDATAStart.Length) == HTMLCDATAStart) { this.Source.Offset += HTMLCDATAStart.Length; type = HtmlComponentType.CData; terminator = HTMLCDATAEnd; return(true); } if (this.Source.Offset + HTMLDocTypeStart.Length < this.Source.Length && this.Source.Substring(HTMLDocTypeStart.Length) == HTMLDocTypeStart) { this.Source.Offset += HTMLDocTypeStart.Length; type = HtmlComponentType.DocType; terminator = HTMLDocTypeEnd; return(true); } } else if (this.Source.Offset + HTMLProcessingInstructionStart.Length < this.Source.Length && this.Source.Substring(HTMLProcessingInstructionStart.Length) == HTMLProcessingInstructionStart) { this.Source.Offset += HTMLProcessingInstructionStart.Length; type = HtmlComponentType.ProcessingInstruction; terminator = HTMLProcessingInstructionEnd; return(true); } } type = HtmlComponentType.None; terminator = string.Empty; return(false); }
protected virtual bool ShouldSkipHtmlType(HtmlComponentType type) { switch (type) { case HtmlComponentType.DocType: return(this.Parser.Settings.SkipDocType); case HtmlComponentType.Comment: return(this.Parser.Settings.SkipComments); case HtmlComponentType.ProcessingInstruction: return(this.Parser.Settings.SkipProcessingInstructions); case HtmlComponentType.CData: return(this.Parser.Settings.SkipCData); case HtmlComponentType.Unknown: return(this.Parser.Settings.SkipUnknownTags); default: return(false); } }
/// <summary> /// Based on the current source reads in a single HTML tag and all it's attributes. /// If it is a known component (based on the parsers component factory) then this will be returned, otherwise null. /// </summary> /// <param name="name">The actual characters read as the tag name</param> /// <param name="autoend">Set to true if this tag / component should always end after it has begun (does not contain other components / tags)</param> /// <param name="isEndMarker">Set to true if this is the end marker of another tag (</span> etc)</param> /// <returns>The parsed component or null</returns> private IPDFComponent GetCurrentTag(out string name, out HtmlComponentType type, out bool autoend, out bool isEndMarker) { autoend = false; isEndMarker = false; name = string.Empty; this.Source.MoveNext(); char cur = this.Source.Current; if (cur == HTMLEndMarker) //We are parsing a closing tag and want the inner text { if (this.Source.MoveNext()) { cur = this.Source.Current; isEndMarker = true; } else { type = HtmlComponentType.None; return(null); } } this.Buffer.Clear(); while (!this.Source.EOS) { cur = this.Source.Current; if (cur == HTMLEndMarker || cur == HTMLCloseTag || cur == HTMLWhiteSpace) { break; } else { this.Buffer.Append(cur); } this.Source.MoveNext(); } name = this.Buffer.ToString(); if (isEndMarker) { if (this.ParsedPath.Count > 0) { int index = this.ParsedPath.IndexOf(name); if (index == this.ParsedPath.Count - 1) { //Last one so just pop it HTMLParserResult prev = this.ParsedPath.Pop(); type = prev.Type; return(prev.Parsed); } else if (index > -1) { HTMLParserResult[] tags = this.ParsedPath.PopToTag(name); this.TagsToClose.PushRange(tags); HTMLParserResult prev = this.TagsToClose.Pop(); type = prev.Type; return(prev.Parsed); } } //Orphan end marker type = HtmlComponentType.None; return(null); } else if (IsSkippedTag(name)) { string endTag = HTMLStartTag.ToString() + HTMLEndMarker.ToString() + name + HTMLCloseTag.ToString(); MoveToEnd(endTag); type = HtmlComponentType.None; return(null); } IPDFComponent parsed = this._owner.ComponentFactory.GetComponent(this.Parser, name, out type); if (!this.Source.EOS) { if (cur == HTMLWhiteSpace) { ReadAttributes(parsed, name); } else { while (cur == HTMLEndMarker || cur == HTMLCloseTag) { if (!this.Source.MoveNext()) { break; } cur = this.Source.Current; } if (!this.Source.EOS) { this.Source.MovePrev(); //extra character for looking past marker removed } } } if (null != parsed) { autoend = !this._owner.ComponentFactory.IsContainerComponent(this.Parser, parsed, name); } else { autoend = false; } return(parsed); }
public override IPDFComponent GetComponent(IHtmlContentParser parser, string name, out HtmlComponentType type) { type = HtmlComponentType.Body; HTMLBody body = new HTMLBody(); body.ID = "html-body"; return(body); }
public override IPDFComponent GetComponent(IHtmlContentParser parser, string name, out HtmlComponentType type) { type = HtmlComponentType.Link; return(new HTMLAnchor()); }
public override IPDFComponent GetComponent(IHtmlContentParser parser, string name, out HtmlComponentType type) { type = HtmlComponentType.HorizontalLine; return(new HTMLHorizontalRule()); }
public override IPDFComponent GetComponent(IHtmlContentParser parser, string name, out HtmlComponentType type) { SpanBase span; type = HtmlComponentType.Span; switch (name.ToLower()) { case "b": case "strong": span = new HTMLBoldSpan(); break; case "i": case "em": span = new HTMLItalicSpan(); break; case "u": span = new HTMLUnderlinedSpan(); break; case ("font"): span = new HTMLFontSpan(); break; default: span = new HTMLSpan(); break; } return(span); }
/// <summary> /// Abstract method that inheritors implement to create and return specific types of components. /// </summary> /// <param name="parser"></param> /// <param name="name"></param> /// <returns></returns> public abstract IPDFComponent GetComponent(IHtmlContentParser parser, string name, out HtmlComponentType type);
public override IPDFComponent GetComponent(IHtmlContentParser parser, string name, out HtmlComponentType type) { type = HtmlComponentType.Preformatted; return(new HTMLPreformatted()); }
public override IPDFComponent GetComponent(IHtmlContentParser parser, string name, out HtmlComponentType type) { type = HtmlComponentType.TableCell; return(new HTMLTableHeaderCell()); }
public override IPDFComponent GetComponent(IHtmlContentParser parser, string name, out HtmlComponentType type) { type = HtmlComponentType.Panel; return(new HTMLParagraph()); }
public override IPDFComponent GetComponent(IHtmlContentParser parser, string name, out HtmlComponentType type) { type = HtmlComponentType.Heading; HeadingBase head; switch (name.ToLower()) { case "h1": head = new HTMLHead1(); break; case "h2": head = new HTMLHead2(); break; case "h3": head = new HTMLHead3(); break; case "h4": head = new HTMLHead4(); break; case "h5": head = new HTMLHead5(); break; case "h6": head = new HTMLHead6(); break; default: head = null; break; } return(head); }
public override IPDFComponent GetComponent(IHtmlContentParser parser, string name, out HtmlComponentType type) { type = HtmlComponentType.ListItem; return(new HTMLListDefinitionItem()); }
public override IPDFComponent GetComponent(IHtmlContentParser parser, string name, out HtmlComponentType type) { type = HtmlComponentType.List; return(new HTMLListOrdered()); }
public override IPDFComponent GetComponent(IHtmlContentParser parser, string name, out HtmlComponentType type) { type = HtmlComponentType.Image; return(new HTMLImage()); }
public override IPDFComponent GetComponent(IHtmlContentParser parser, string name, out HtmlComponentType type) { type = HtmlComponentType.Panel; if (String.Equals("blockquote", name, StringComparison.OrdinalIgnoreCase)) { return(new HTMLBlockQuote()); } else if (String.Equals("fieldset", name, StringComparison.OrdinalIgnoreCase)) { return(new HTMLFieldSet()); } else if (String.Equals("legend", name, StringComparison.OrdinalIgnoreCase)) { return(new HTMLLegend()); } else { return(new HTMLDiv()); } }