public virtual bool SetXhtmlDocType(Node root) { string fpi = " "; string sysid = ""; string namespace_Renamed = XHTML_NAMESPACE; Node doctype; doctype = root.FindDocType(); if (Options.DocType == TidyNet.DocType.Omit) { if (doctype != null) Node.DiscardElement(doctype); return true; } if (Options.DocType == TidyNet.DocType.Auto) { /* see what flavor of XHTML this document matches */ if ((this.versions & HtmlVersion.Html40Strict) != 0) { /* use XHTML strict */ fpi = "-//W3C//DTD XHTML 1.0 Strict//EN"; sysid = voyager_strict; } else if ((this.versions & HtmlVersion.Loose) != 0) { fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN"; sysid = voyager_loose; } else if ((this.versions & HtmlVersion.Frames) != 0) { /* use XHTML frames */ fpi = "-//W3C//DTD XHTML 1.0 Frameset//EN"; sysid = voyager_frameset; } else { /* lets assume XHTML transitional */ fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN"; sysid = voyager_loose; } } else if (Options.DocType == TidyNet.DocType.Strict) { fpi = "-//W3C//DTD XHTML 1.0 Strict//EN"; sysid = voyager_strict; } else if (Options.DocType == TidyNet.DocType.Loose) { fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN"; sysid = voyager_loose; } FixHtmlNameSpace(root, namespace_Renamed); if (doctype == null) { doctype = NewNode(Node.DocTypeTag, this.lexbuf, 0, 0); doctype.Next = root.Content; doctype.Parent = root; doctype.Prev = null; root.Content = doctype; } if (Options.DocType == TidyNet.DocType.User && Options.DocTypeStr != null) { fpi = Options.DocTypeStr; sysid = ""; } this.txtstart = this.lexsize; this.txtend = this.lexsize; /* add public identifier */ AddStringLiteral("html PUBLIC "); /* check if the fpi is quoted or not */ if (fpi[0] == '"') { AddStringLiteral(fpi); } else { AddStringLiteral("\""); AddStringLiteral(fpi); AddStringLiteral("\""); } if (sysid.Length + 6 >= this.Options.WrapLen) { AddStringLiteral("\n\""); } else { AddStringLiteral("\n \""); } /* add system identifier */ AddStringLiteral(sysid); AddStringLiteral("\""); this.txtend = this.lexsize; doctype.Start = this.txtstart; doctype.End = this.txtend; return false; }
/* fixup doctype if missing */ public virtual bool FixDocType(Node root) { Node doctype; HtmlVersion guessed = HtmlVersion.Html40Strict; int i; if (this.badDoctype) { Report.Warning(this, null, null, Report.MALFORMED_DOCTYPE); } if (Options.XmlOut) { return true; } doctype = root.FindDocType(); if (Options.DocType == TidyNet.DocType.Omit) { if (doctype != null) { Node.DiscardElement(doctype); } return true; } if (Options.DocType == TidyNet.DocType.Strict) { Node.DiscardElement(doctype); doctype = null; guessed = HtmlVersion.Html40Strict; } else if (Options.DocType == TidyNet.DocType.Loose) { Node.DiscardElement(doctype); doctype = null; guessed = HtmlVersion.Html40Loose; } else if (Options.DocType == TidyNet.DocType.Auto) { if (doctype != null) { if (this.doctype == HtmlVersion.Unknown) { return false; } switch (this.doctype) { case HtmlVersion.Unknown: return false; case HtmlVersion.Html20: if ((this.versions & HtmlVersion.Html20) != 0) { return true; } break; /* to replace old version by new */ case HtmlVersion.Html32: if ((this.versions & HtmlVersion.Html32) != 0) { return true; } break; /* to replace old version by new */ case HtmlVersion.Html40Strict: if ((this.versions & HtmlVersion.Html40Strict) != 0) { return true; } break; /* to replace old version by new */ case HtmlVersion.Html40Loose: if ((this.versions & HtmlVersion.Html40Loose) != 0) { return true; } break; /* to replace old version by new */ case HtmlVersion.Frames: if ((this.versions & HtmlVersion.Frames) != 0) { return true; } break; /* to replace old version by new */ } /* INCONSISTENT_VERSION warning is now issued by ApparentVersion() */ } /* choose new doctype */ guessed = GetHtmlVersion(); } if (guessed == HtmlVersion.Unknown) { return false; } /* for XML use the Voyager system identifier */ if (this.Options.XmlOut || this.Options.XmlTags || this.isvoyager) { if (doctype != null) Node.DiscardElement(doctype); for (i = 0; i < W3CVersion.Length; ++i) { if (guessed == W3CVersion[i].Version) { FixHtmlNameSpace(root, W3CVersion[i].Profile); break; } } return true; } if (doctype == null) { doctype = NewNode(Node.DocTypeTag, this.lexbuf, 0, 0); doctype.Next = root.Content; doctype.Parent = root; doctype.Prev = null; root.Content = doctype; } this.txtstart = this.lexsize; this.txtend = this.lexsize; /* use the appropriate public identifier */ AddStringLiteral("html PUBLIC "); if (Options.DocType == TidyNet.DocType.User && Options.DocTypeStr != null) { AddStringLiteral(Options.DocTypeStr); } else if (guessed == HtmlVersion.Html20) { AddStringLiteral("\"-//IETF//DTD HTML 2.0//EN\""); } else { AddStringLiteral("\"-//W3C//DTD "); for (i = 0; i < W3CVersion.Length; ++i) { if (guessed == W3CVersion[i].Version) { AddStringLiteral(W3CVersion[i].Name); break; } } AddStringLiteral("//EN\""); } this.txtend = this.lexsize; doctype.Start = this.txtstart; doctype.End = this.txtend; return true; }
/// <summary> Internal routine that actually does the parsing. The caller /// can pass either an InputStream or file name. If both are passed, /// the file name is preferred. /// </summary> internal Node ParseInternal(Stream input, string file, Stream Output, TidyMessageCollection messages) { Lexer lexer; Node document = null; Node doctype; Out o = new OutImpl(); /* normal output stream */ PPrint pprint; /* ensure config is self-consistent */ _options.Adjust(); if (file != null) { input = new FileStream(file, FileMode.Open, FileAccess.Read); } else if (input == null) { input = Console.OpenStandardInput(); } if (input != null) { lexer = new Lexer(new ClsStreamInImpl(input, _options.CharEncoding, _options.TabSize), _options); lexer.messages = messages; /* * store pointer to lexer in input stream * to allow character encoding errors to be * reported */ lexer.input.Lexer = lexer; /* Tidy doesn't alter the doctype for generic XML docs */ if (_options.XmlTags) { document = ParserImpl.parseXMLDocument(lexer); } else { document = ParserImpl.parseDocument(lexer); if (!document.CheckNodeIntegrity()) { Report.BadTree(lexer); return(null); } Clean cleaner = new Clean(_options.tt); /* simplifies <b><b> ... </b> ...</b> etc. */ cleaner.NestedEmphasis(document); /* cleans up <dir>indented text</dir> etc. */ cleaner.List2BQ(document); cleaner.BQ2Div(document); /* replaces i by em and b by strong */ if (_options.LogicalEmphasis) { cleaner.EmFromI(document); } if (_options.Word2000 && cleaner.IsWord2000(document, _options.tt)) { /* prune Word2000's <![if ...]> ... <![endif]> */ cleaner.DropSections(lexer, document); /* drop style & class attributes and empty p, span elements */ cleaner.CleanWord2000(lexer, document); } /* replaces presentational markup by style rules */ if (_options.MakeClean || _options.DropFontTags) { cleaner.CleanTree(lexer, document); } if (!document.CheckNodeIntegrity()) { Report.BadTree(lexer); return(null); } doctype = document.FindDocType(); if (document.Content != null) { if (_options.Xhtml) { lexer.SetXhtmlDocType(document); } else { lexer.FixDocType(document); } if (_options.TidyMark) { lexer.AddGenerator(document); } } /* ensure presence of initial <?XML version="1.0"?> */ if (_options.XmlOut && _options.XmlPi) { lexer.FixXmlPI(document); } if (document.Content != null) { Report.ReportVersion(lexer, doctype); Report.ReportNumWarnings(lexer); } } // Try to close the InputStream but only if if we created it. if ((file != null) && (input != Console.OpenStandardOutput())) { try { input.Close(); } catch (IOException) { } } if (lexer.messages.Errors > 0) { Report.NeedsAuthorIntervention(lexer); } o.State = StreamIn.FSM_ASCII; o.Encoding = _options.CharEncoding; if (lexer.messages.Errors == 0) { if (_options.BurstSlides) { Node body; body = null; /* * remove doctype to avoid potential clash with * markup introduced when bursting into slides */ /* discard the document type */ doctype = document.FindDocType(); if (doctype != null) { Node.DiscardElement(doctype); } /* slides use transitional features */ lexer.versions |= HtmlVersion.Html40Loose; /* and patch up doctype to match */ if (_options.Xhtml) { lexer.SetXhtmlDocType(document); } else { lexer.FixDocType(document); } /* find the body element which may be implicit */ body = document.FindBody(_options.tt); if (body != null) { pprint = new PPrint(_options); Report.ReportNumberOfSlides(lexer, pprint.CountSlides(body)); pprint.CreateSlides(lexer, document); } else { Report.MissingBody(lexer); } } else if (Output != null) { pprint = new PPrint(_options); o.Output = Output; if (_options.XmlTags) { pprint.PrintXmlTree(o, (short)0, 0, lexer, document); } else { pprint.PrintTree(o, (short)0, 0, lexer, document); } pprint.FlushLine(o, 0); } } Report.ErrorSummary(lexer); } return(document); }