public virtual bool SetXhtmlDocType(Node root)
		{
			string fpi = " ";
			string sysid = "";
			string namespace_Renamed = XHTML_NAMESPACE;
			Node doctype;
			
			doctype = root.FindDocType();
			
			if (Options.DocType == TidyNet.DocType.Omit)
			{
				if (doctype != null)
					Node.DiscardElement(doctype);
				return true;
			}
			
			if (Options.DocType == TidyNet.DocType.Auto)
			{
				/* see what flavor of XHTML this document matches */
				if ((this.versions & HtmlVersion.Html40Strict) != 0)
				{
					/* use XHTML strict */
					fpi = "-//W3C//DTD XHTML 1.0 Strict//EN";
					sysid = voyager_strict;
				}
				else if ((this.versions & HtmlVersion.Loose) != 0)
				{
					fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN";
					sysid = voyager_loose;
				}
				else if ((this.versions & HtmlVersion.Frames) != 0)
				{
					/* use XHTML frames */
					fpi = "-//W3C//DTD XHTML 1.0 Frameset//EN";
					sysid = voyager_frameset;
				}
				else
				{
					/* lets assume XHTML transitional */
					fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN";
					sysid = voyager_loose;
				}
			}
			else if (Options.DocType == TidyNet.DocType.Strict)
			{
				fpi = "-//W3C//DTD XHTML 1.0 Strict//EN";
				sysid = voyager_strict;
			}
			else if (Options.DocType == TidyNet.DocType.Loose)
			{
				fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN";
				sysid = voyager_loose;
			}
			
			FixHtmlNameSpace(root, namespace_Renamed);
			
			if (doctype == null)
			{
				doctype = NewNode(Node.DocTypeTag, this.lexbuf, 0, 0);
				doctype.Next = root.Content;
				doctype.Parent = root;
				doctype.Prev = null;
				root.Content = doctype;
			}
			
			if (Options.DocType == TidyNet.DocType.User && Options.DocTypeStr != null)
			{
				fpi = Options.DocTypeStr;
				sysid = "";
			}
			
			this.txtstart = this.lexsize;
			this.txtend = this.lexsize;
			
			/* add public identifier */
			AddStringLiteral("html PUBLIC ");
			
			/* check if the fpi is quoted or not */
			if (fpi[0] == '"')
			{
				AddStringLiteral(fpi);
			}
			else
			{
				AddStringLiteral("\"");
				AddStringLiteral(fpi);
				AddStringLiteral("\"");
			}
			
			if (sysid.Length + 6 >= this.Options.WrapLen)
			{
				AddStringLiteral("\n\"");
			}
			else
			{
				AddStringLiteral("\n    \"");
			}

			/* add system identifier */
			AddStringLiteral(sysid);
			AddStringLiteral("\"");
			
			this.txtend = this.lexsize;
			
			doctype.Start = this.txtstart;
			doctype.End = this.txtend;
			
			return false;
		}
		/* fixup doctype if missing */
		public virtual bool FixDocType(Node root)
		{
			Node doctype;
			HtmlVersion guessed = HtmlVersion.Html40Strict;
			int i;
			
			if (this.badDoctype)
			{
				Report.Warning(this, null, null, Report.MALFORMED_DOCTYPE);
			}
			
			if (Options.XmlOut)
			{
				return true;
			}
			
			doctype = root.FindDocType();
			
			if (Options.DocType == TidyNet.DocType.Omit)
			{
				if (doctype != null)
				{
					Node.DiscardElement(doctype);
				}
				return true;
			}
			
			if (Options.DocType == TidyNet.DocType.Strict)
			{
				Node.DiscardElement(doctype);
				doctype = null;
				guessed = HtmlVersion.Html40Strict;
			}
			else if (Options.DocType == TidyNet.DocType.Loose)
			{
				Node.DiscardElement(doctype);
				doctype = null;
				guessed = HtmlVersion.Html40Loose;
			}
			else if (Options.DocType == TidyNet.DocType.Auto)
			{
				if (doctype != null)
				{
					if (this.doctype == HtmlVersion.Unknown)
					{
						return false;
					}

					switch (this.doctype)
					{
					case HtmlVersion.Unknown:
						return false;

					case HtmlVersion.Html20:
						if ((this.versions & HtmlVersion.Html20) != 0)
						{
							return true;
						}
						break; /* to replace old version by new */
						
						
					case HtmlVersion.Html32:
						if ((this.versions & HtmlVersion.Html32) != 0)
						{
							return true;
						}
						break; /* to replace old version by new */
						
						
					case HtmlVersion.Html40Strict:
						if ((this.versions & HtmlVersion.Html40Strict) != 0)
						{
							return true;
						}
						break; /* to replace old version by new */
						
						
					case HtmlVersion.Html40Loose:
						if ((this.versions & HtmlVersion.Html40Loose) != 0)
						{
							return true;
						}
						break; /* to replace old version by new */
						
						
					case HtmlVersion.Frames:
						if ((this.versions & HtmlVersion.Frames) != 0)
						{
							return true;
						}
						break; /* to replace old version by new */
					}
					
					/* INCONSISTENT_VERSION warning is now issued by ApparentVersion() */
				}
				
				/* choose new doctype */
				guessed = GetHtmlVersion();
			}
			
			if (guessed == HtmlVersion.Unknown)
			{
				return false;
			}
			
			/* for XML use the Voyager system identifier */
			if (this.Options.XmlOut || this.Options.XmlTags || this.isvoyager)
			{
				if (doctype != null)
					Node.DiscardElement(doctype);
				
				for (i = 0; i < W3CVersion.Length; ++i)
				{
					if (guessed == W3CVersion[i].Version)
					{
						FixHtmlNameSpace(root, W3CVersion[i].Profile);
						break;
					}
				}
				
				return true;
			}
			
			if (doctype == null)
			{
				doctype = NewNode(Node.DocTypeTag, this.lexbuf, 0, 0);
				doctype.Next = root.Content;
				doctype.Parent = root;
				doctype.Prev = null;
				root.Content = doctype;
			}
			
			this.txtstart = this.lexsize;
			this.txtend = this.lexsize;
			
			/* use the appropriate public identifier */
			AddStringLiteral("html PUBLIC ");
			
			if (Options.DocType == TidyNet.DocType.User && Options.DocTypeStr != null)
			{
				AddStringLiteral(Options.DocTypeStr);
			}
			else if (guessed == HtmlVersion.Html20)
			{
				AddStringLiteral("\"-//IETF//DTD HTML 2.0//EN\"");
			}
			else
			{
				AddStringLiteral("\"-//W3C//DTD ");
				
				for (i = 0; i < W3CVersion.Length; ++i)
				{
					if (guessed == W3CVersion[i].Version)
					{
						AddStringLiteral(W3CVersion[i].Name);
						break;
					}
				}
				
				AddStringLiteral("//EN\"");
			}
			
			this.txtend = this.lexsize;
			
			doctype.Start = this.txtstart;
			doctype.End = this.txtend;
			
			return true;
		}
Example #3
0
        /// <summary> Internal routine that actually does the parsing.  The caller
        /// can pass either an InputStream or file name.  If both are passed,
        /// the file name is preferred.
        /// </summary>
        internal Node ParseInternal(Stream input, string file, Stream Output, TidyMessageCollection messages)
        {
            Lexer  lexer;
            Node   document = null;
            Node   doctype;
            Out    o = new OutImpl();          /* normal output stream */
            PPrint pprint;

            /* ensure config is self-consistent */
            _options.Adjust();

            if (file != null)
            {
                input = new FileStream(file, FileMode.Open, FileAccess.Read);
            }
            else if (input == null)
            {
                input = Console.OpenStandardInput();
            }

            if (input != null)
            {
                lexer          = new Lexer(new ClsStreamInImpl(input, _options.CharEncoding, _options.TabSize), _options);
                lexer.messages = messages;

                /*
                 * store pointer to lexer in input stream
                 * to allow character encoding errors to be
                 * reported
                 */
                lexer.input.Lexer = lexer;

                /* Tidy doesn't alter the doctype for generic XML docs */
                if (_options.XmlTags)
                {
                    document = ParserImpl.parseXMLDocument(lexer);
                }
                else
                {
                    document = ParserImpl.parseDocument(lexer);

                    if (!document.CheckNodeIntegrity())
                    {
                        Report.BadTree(lexer);
                        return(null);
                    }

                    Clean cleaner = new Clean(_options.tt);

                    /* simplifies <b><b> ... </b> ...</b> etc. */
                    cleaner.NestedEmphasis(document);

                    /* cleans up <dir>indented text</dir> etc. */
                    cleaner.List2BQ(document);
                    cleaner.BQ2Div(document);

                    /* replaces i by em and b by strong */
                    if (_options.LogicalEmphasis)
                    {
                        cleaner.EmFromI(document);
                    }

                    if (_options.Word2000 && cleaner.IsWord2000(document, _options.tt))
                    {
                        /* prune Word2000's <![if ...]> ... <![endif]> */
                        cleaner.DropSections(lexer, document);

                        /* drop style & class attributes and empty p, span elements */
                        cleaner.CleanWord2000(lexer, document);
                    }

                    /* replaces presentational markup by style rules */
                    if (_options.MakeClean || _options.DropFontTags)
                    {
                        cleaner.CleanTree(lexer, document);
                    }

                    if (!document.CheckNodeIntegrity())
                    {
                        Report.BadTree(lexer);
                        return(null);
                    }
                    doctype = document.FindDocType();
                    if (document.Content != null)
                    {
                        if (_options.Xhtml)
                        {
                            lexer.SetXhtmlDocType(document);
                        }
                        else
                        {
                            lexer.FixDocType(document);
                        }

                        if (_options.TidyMark)
                        {
                            lexer.AddGenerator(document);
                        }
                    }

                    /* ensure presence of initial <?XML version="1.0"?> */
                    if (_options.XmlOut && _options.XmlPi)
                    {
                        lexer.FixXmlPI(document);
                    }

                    if (document.Content != null)
                    {
                        Report.ReportVersion(lexer, doctype);
                        Report.ReportNumWarnings(lexer);
                    }
                }

                // Try to close the InputStream but only if if we created it.

                if ((file != null) && (input != Console.OpenStandardOutput()))
                {
                    try
                    {
                        input.Close();
                    }
                    catch (IOException)
                    {
                    }
                }

                if (lexer.messages.Errors > 0)
                {
                    Report.NeedsAuthorIntervention(lexer);
                }

                o.State    = StreamIn.FSM_ASCII;
                o.Encoding = _options.CharEncoding;

                if (lexer.messages.Errors == 0)
                {
                    if (_options.BurstSlides)
                    {
                        Node body;

                        body = null;

                        /*
                         * remove doctype to avoid potential clash with
                         * markup introduced when bursting into slides
                         */
                        /* discard the document type */
                        doctype = document.FindDocType();

                        if (doctype != null)
                        {
                            Node.DiscardElement(doctype);
                        }

                        /* slides use transitional features */
                        lexer.versions |= HtmlVersion.Html40Loose;

                        /* and patch up doctype to match */
                        if (_options.Xhtml)
                        {
                            lexer.SetXhtmlDocType(document);
                        }
                        else
                        {
                            lexer.FixDocType(document);
                        }

                        /* find the body element which may be implicit */
                        body = document.FindBody(_options.tt);

                        if (body != null)
                        {
                            pprint = new PPrint(_options);
                            Report.ReportNumberOfSlides(lexer, pprint.CountSlides(body));
                            pprint.CreateSlides(lexer, document);
                        }
                        else
                        {
                            Report.MissingBody(lexer);
                        }
                    }
                    else if (Output != null)
                    {
                        pprint   = new PPrint(_options);
                        o.Output = Output;

                        if (_options.XmlTags)
                        {
                            pprint.PrintXmlTree(o, (short)0, 0, lexer, document);
                        }
                        else
                        {
                            pprint.PrintTree(o, (short)0, 0, lexer, document);
                        }

                        pprint.FlushLine(o, 0);
                    }
                }

                Report.ErrorSummary(lexer);
            }

            return(document);
        }