/*
		modes for GetToken()
		
		MixedContent   -- for elements which don't accept PCDATA
		Preformatted       -- white space preserved as is
		IgnoreMarkup       -- for CDATA elements such as script, style
		*/
		public virtual Node GetToken(short mode)
		{
			short map;
			int c = 0;
			int lastc;
			int badcomment = 0;
			MutableBoolean isempty = new MutableBoolean();
			AttVal attributes;
			
			if (pushed)
			{
				/* duplicate inlines in preference to pushed text nodes when appropriate */
				if (token.Type != Node.TextNode || (insert == - 1 && inode == null))
				{
					pushed = false;
					return token;
				}
			}
			
			/* at start of block elements, unclosed inline
			elements are inserted into the token stream */
			
			if (insert != - 1 || inode != null)
			{
				return InsertedToken();
			}
			
			lines = input.curline;
			columns = input.curcol;
			waswhite = false;
			
			txtstart = lexsize;
			txtend = lexsize;
			
			while (true)
			{
				c = input.ReadChar();
				if (c == StreamIn.EndOfStream)
				{
					break;
				}

				if (insertspace && mode != IgnoreWhitespace)
				{
					AddCharToLexer(' ');
					waswhite = true;
					insertspace = false;
				}
				
				/* treat \r\n as \n and \r as \n */
				
				if (c == '\r')
				{
					c = input.ReadChar();
					
					if (c != '\n')
					{
						input.UngetChar(c);
					}
					
					c = '\n';
				}
				
				AddCharToLexer(c);
				
				switch (state)
				{
				case LEX_CONTENT: 
					map = MAP((char) c);
						
					/*
						Discard white space if appropriate. Its cheaper
						to do this here rather than in parser methods
						for elements that don't have mixed content.
						*/
					if (((map & WHITE) != 0) && (mode == IgnoreWhitespace) && lexsize == txtstart + 1)
					{
						--lexsize;
						waswhite = false;
						lines = input.curline;
						columns = input.curcol;
						continue;
					}
						
					if (c == '<')
					{
						state = LEX_GT;
						continue;
					}
						
					if ((map & WHITE) != 0)
					{
						/* was previous char white? */
						if (waswhite)
						{
							if (mode != Preformatted && mode != IgnoreMarkup)
							{
								--lexsize;
								lines = input.curline;
								columns = input.curcol;
							}
						}
							/* prev char wasn't white */
						else
						{
							waswhite = true;
							lastc = c;
								
							if (mode != Preformatted && mode != IgnoreMarkup && c != ' ')
							{
								ChangeChar((byte) ' ');
							}
						}
							
						continue;
					}
					else if (c == '&' && mode != IgnoreMarkup)
					{
						ParseEntity(mode);
					}
						
					/* this is needed to avoid trimming trailing whitespace */
					if (mode == IgnoreWhitespace)
						mode = MixedContent;
						
					waswhite = false;
					continue;
					
					
				case LEX_GT: 
					if (c == '/')
					{
						c = input.ReadChar();
						if (c == StreamIn.EndOfStream)
						{
							input.UngetChar(c);
							continue;
						}
							
						AddCharToLexer(c);
						map = MAP((char) c);
							
						if ((map & LETTER) != 0)
						{
							lexsize -= 3;
							txtend = lexsize;
							input.UngetChar(c);
							state = LEX_ENDTAG;
							lexbuf[lexsize] = (byte) '\x0000'; /* debug */
							input.curcol -= 2;
								
							/* if some text before the </ return it now */
							if (txtend > txtstart)
							{
								/* trim space char before end tag */
								if (mode == IgnoreWhitespace && lexbuf[lexsize - 1] == (byte) ' ')
								{
									lexsize -= 1;
									txtend = lexsize;
								}
									
								token = NewNode(Node.TextNode, lexbuf, txtstart, txtend);
								return token;
							}
								
							continue; /* no text so keep going */
						}
							
						/* otherwise treat as CDATA */
						waswhite = false;
						state = LEX_CONTENT;
						continue;
					}
						
					if (mode == IgnoreMarkup)
					{
						/* otherwise treat as CDATA */
						waswhite = false;
						state = LEX_CONTENT;
						continue;
					}
						
					/*
						look out for comments, doctype or marked sections
						this isn't quite right, but its getting there ...
						*/
					if (c == '!')
					{
						c = input.ReadChar();
						if (c == '-')
						{
							c = input.ReadChar();
							if (c == '-')
							{
								state = LEX_COMMENT; /* comment */
								lexsize -= 2;
								txtend = lexsize;
									
								/* if some text before < return it now */
								if (txtend > txtstart)
								{
									token = NewNode(Node.TextNode, lexbuf, txtstart, txtend);
									return token;
								}
									
								txtstart = lexsize;
								continue;
							}
								
							Report.Warning(this, null, null, Report.MALFORMED_COMMENT);
						}
						else if (c == 'd' || c == 'D')
						{
							state = LEX_DOCTYPE; /* doctype */
							lexsize -= 2;
							txtend = lexsize;
							mode = IgnoreWhitespace;
								
							/* skip until white space or '>' */
								
							for (; ; )
							{
								c = input.ReadChar();
									
								if (c == StreamIn.EndOfStream || c == '>')
								{
									input.UngetChar(c);
									break;
								}
									
								map = MAP((char) c);
								if ((map & WHITE) == 0)
								{
									continue;
								}
									
								/* and skip to end of whitespace */
									
								for (; ; )
								{
									c = input.ReadChar();
										
									if (c == StreamIn.EndOfStream || c == '>')
									{
										input.UngetChar(c);
										break;
									}
										
									map = MAP((char) c);
										
									if ((map & WHITE) != 0)
									{
										continue;
									}
										
									input.UngetChar(c);
									break;
								}
									
								break;
							}
								
							/* if some text before < return it now */
							if (txtend > txtstart)
							{
								token = NewNode(Node.TextNode, lexbuf, txtstart, txtend);
								return token;
							}
								
							txtstart = lexsize;
							continue;
						}
						else if (c == '[')
						{
							/* Word 2000 embeds <![if ...]> ... <![endif]> sequences */
							lexsize -= 2;
							state = LEX_SECTION;
							txtend = lexsize;
								
							/* if some text before < return it now */
							if (txtend > txtstart)
							{
								token = NewNode(Node.TextNode, lexbuf, txtstart, txtend);
								return token;
							}
								
							txtstart = lexsize;
							continue;
						}
							
						/* otherwise swallow chars up to and including next '>' */
						while (true)
						{
							c = input.ReadChar();
							if (c == '>')
							{
								break;
							}
							if (c == - 1)
							{
								input.UngetChar(c);
								break;
							}
						}
							
						lexsize -= 2;
						lexbuf[lexsize] = (byte) '\x0000';
						state = LEX_CONTENT;
						continue;
					}
						
					/*
						processing instructions
						*/
						
					if (c == '?')
					{
						lexsize -= 2;
						state = LEX_PROCINSTR;
						txtend = lexsize;
							
						/* if some text before < return it now */
						if (txtend > txtstart)
						{
							token = NewNode(Node.TextNode, lexbuf, txtstart, txtend);
							return token;
						}
							
						txtstart = lexsize;
						continue;
					}
						
					/* Microsoft ASP's e.g. <% ... server-code ... %> */
					if (c == '%')
					{
						lexsize -= 2;
						state = LEX_ASP;
						txtend = lexsize;
							
						/* if some text before < return it now */
						if (txtend > txtstart)
						{
							token = NewNode(Node.TextNode, lexbuf, txtstart, txtend);
							return token;
						}
							
						txtstart = lexsize;
						continue;
					}
						
					/* Netscapes JSTE e.g. <# ... server-code ... #> */
					if (c == '#')
					{
						lexsize -= 2;
						state = LEX_JSTE;
						txtend = lexsize;
							
						/* if some text before < return it now */
						if (txtend > txtstart)
						{
							token = NewNode(Node.TextNode, lexbuf, txtstart, txtend);
							return token;
						}
							
						txtstart = lexsize;
						continue;
					}
						
					map = MAP((char) c);
						
					/* check for start tag */
					if ((map & LETTER) != 0)
					{
						input.UngetChar(c); /* push back letter */
						lexsize -= 2; /* discard "<" + letter */
						txtend = lexsize;
						state = LEX_STARTTAG; /* ready to read tag name */
							
						/* if some text before < return it now */
						if (txtend > txtstart)
						{
							token = NewNode(Node.TextNode, lexbuf, txtstart, txtend);
							return token;
						}
							
						continue; /* no text so keep going */
					}
						
					/* otherwise treat as CDATA */
					state = LEX_CONTENT;
					waswhite = false;
					continue;
					
					
				case LEX_ENDTAG: 
					txtstart = lexsize - 1;
					input.curcol += 2;
					c = ParseTagName();
					token = NewNode(Node.EndTag, lexbuf, txtstart, txtend, GetString(lexbuf, txtstart, txtend - txtstart));
					lexsize = txtstart;
					txtend = txtstart;
						
					/* skip to '>' */
					while (c != '>')
					{
						c = input.ReadChar();
						if (c == StreamIn.EndOfStream)
						{
							break;
						}
					}
						
					if (c == StreamIn.EndOfStream)
					{
						input.UngetChar(c);
						continue;
					}
						
					state = LEX_CONTENT;
					waswhite = false;
					return token; /* the endtag token */
					
					
				case LEX_STARTTAG: 
					txtstart = lexsize - 1; /* set txtstart to first letter */
					c = ParseTagName();
					isempty.Val = false;
					attributes = null;
					token = NewNode((isempty.Val ? Node.StartEndTag : Node.StartTag), lexbuf, txtstart, txtend, GetString(lexbuf, txtstart, txtend - txtstart));
						
					/* parse attributes, consuming closing ">" */
					if (c != '>')
					{
						if (c == '/')
						{
							input.UngetChar(c);
						}
							
						attributes = ParseAttrs(isempty);
					}
						
					if (isempty.Val)
					{
						token.Type = Node.StartEndTag;
					}
						
					token.Attributes = attributes;
					lexsize = txtstart;
					txtend = txtstart;
						
					/* swallow newline following start tag */
					/* special check needed for CRLF sequence */
					/* this doesn't apply to empty elements */
						
					if (ExpectsContent(token) || token.Tag == Options.tt.TagBr)
					{
						c = input.ReadChar();
						if (c == '\r')
						{
							c = input.ReadChar();
								
							if (c != '\n')
							{
								input.UngetChar(c);
							}
						}
						else if (c != '\n' && c != '\f')
						{
							input.UngetChar(c);
						}
							
						waswhite = true; /* to swallow leading whitespace */
					}
					else
					{
						waswhite = false;
					}
						
					state = LEX_CONTENT;
						
					if (token.Tag == null)
					{
						Report.Error(this, null, token, Report.UNKNOWN_ELEMENT);
					}
					else if (!Options.XmlTags)
					{
						versions &= token.Tag.Versions;
							
						if ((token.Tag.Versions & HtmlVersion.Proprietary) != 0)
						{
							if (!Options.MakeClean && (token.Tag == Options.tt.TagNobr || token.Tag == Options.tt.TagWbr))
							{
								Report.Warning(this, null, token, Report.PROPRIETARY_ELEMENT);
							}
						}
							
						if (token.Tag.CheckAttribs != null)
						{
							token.CheckUniqueAttributes(this);
							token.Tag.CheckAttribs.Check(this, this.token);
						}
						else
						{
							token.CheckAttributes(this);
						}
					}
					return token; /* return start tag */

				case LEX_COMMENT: 
					if (c != '-')
					{
						continue;
					}
						
					c = input.ReadChar();
					AddCharToLexer(c);
					if (c != '-')
					{
						continue;
					}
						
					while (true)
					{
						c = input.ReadChar();
							
						if (c == '>')
						{
							if (badcomment != 0)
							{
								Report.Warning(this, null, null, Report.MALFORMED_COMMENT);
							}
								
							txtend = lexsize - 2; // AQ 8Jul2000
							lexbuf[lexsize] = (byte) '\x0000';
							state = LEX_CONTENT;
							waswhite = false;
							token = NewNode(Node.CommentTag, lexbuf, txtstart, txtend);
								
							/* now look for a line break */
								
							c = input.ReadChar();
								
							if (c == '\r')
							{
								c = input.ReadChar();
									
								if (c != '\n')
								{
									token.Linebreak = true;
								}
							}
								
							if (c == '\n')
							{
								token.Linebreak = true;
							}
							else
							{
								input.UngetChar(c);
							}
								
							return token;
						}
							
						/* note position of first such error in the comment */
						if (badcomment == 0)
						{
							lines = input.curline;
							columns = input.curcol - 3;
						}
							
						badcomment++;
						if (Options.FixComments)
						{
							lexbuf[lexsize - 2] = (byte) '=';
						}
							
						AddCharToLexer(c);
							
						/* if '-' then look for '>' to end the comment */
						if (c != '-')
						{
							break;
						}
					}
						
					/* otherwise continue to look for --> */
					lexbuf[lexsize - 2] = (byte) '=';
					continue;
					
					
				case LEX_DOCTYPE: 
					map = MAP((char) c);
						
					if ((map & WHITE) != 0)
					{
						if (waswhite)
						{
							lexsize -= 1;
						}
							
						waswhite = true;
					}
					else
					{
						waswhite = false;
					}
						
					if (c != '>')
					{
						continue;
					}
						
					lexsize -= 1;
					txtend = lexsize;
					lexbuf[lexsize] = (byte) '\x0000';
					state = LEX_CONTENT;
					waswhite = false;
					token = NewNode(Node.DocTypeTag, lexbuf, txtstart, txtend);
					/* make a note of the version named by the doctype */
					doctype = FindGivenVersion(token);
					return token;
					
					
				case LEX_PROCINSTR: 
						
					if (lexsize - txtstart == 3)
					{
						if ((GetString(lexbuf, txtstart, 3)).Equals("php"))
						{
							state = LEX_PHP;
							continue;
						}
					}
						
					if (Options.XmlPIs)
					{
						/* insist on ?> as terminator */
						if (c != '?')
						{
							continue;
						}
							
						/* now look for '>' */
						c = input.ReadChar();
							
						if (c == StreamIn.EndOfStream)
						{
							Report.Warning(this, null, null, Report.UNEXPECTED_END_OF_FILE);
							input.UngetChar(c);
							continue;
						}
							
						AddCharToLexer(c);
					}
						
					if (c != '>')
					{
						continue;
					}
						
					lexsize -= 1;
					txtend = lexsize;
					lexbuf[lexsize] = (byte) '\x0000';
					state = LEX_CONTENT;
					waswhite = false;
					token = NewNode(Node.ProcInsTag, lexbuf, txtstart, txtend);
					return token;
					
					
				case LEX_ASP: 
					if (c != '%')
					{
						continue;
					}
						
					/* now look for '>' */
					c = input.ReadChar();

					if (c != '>')
					{
						input.UngetChar(c);
						continue;
					}
						
					lexsize -= 1;
					txtend = lexsize;
					lexbuf[lexsize] = (byte) '\x0000';
					state = LEX_CONTENT;
					waswhite = false;
					token = NewNode(Node.AspTag, lexbuf, txtstart, txtend);
					return this.token;

				case LEX_JSTE: 
					if (c != '#')
					{
						continue;
					}
	
					/* now look for '>' */
					c = input.ReadChar();
					if (c != '>')
					{
						input.UngetChar(c);
						continue;
					}
						
					lexsize -= 1;
					txtend = lexsize;
					lexbuf[lexsize] = (byte) '\x0000';
					state = LEX_CONTENT;
					waswhite = false;
					token = NewNode(Node.JsteTag, lexbuf, txtstart, txtend);
					return token;

				case LEX_PHP: 
					if (c != '?')
					{
						continue;
					}
						
					/* now look for '>' */
					c = input.ReadChar();
					if (c != '>')
					{
						input.UngetChar(c);
						continue;
					}
						
					lexsize -= 1;
					txtend = lexsize;
					lexbuf[lexsize] = (byte) '\x0000';
					state = LEX_CONTENT;
					waswhite = false;
					token = NewNode(Node.PhpTag, lexbuf, txtstart, txtend);
					return token;
					
					
				case LEX_SECTION: 
					if (c == '[')
					{
						if (lexsize == (txtstart + 6) && (GetString(lexbuf, txtstart, 6)).Equals("CDATA["))
						{
							state = LEX_CDATA;
							lexsize -= 6;
							continue;
						}
					}
						
					if (c != ']')
					{
						continue;
					}

					/* now look for '>' */
					c = input.ReadChar();
					if (c != '>')
					{
						input.UngetChar(c);
						continue;
					}
						
					lexsize -= 1;
					txtend = lexsize;
					lexbuf[lexsize] = (byte) '\x0000';
					state = LEX_CONTENT;
					waswhite = false;
					token = NewNode(Node.SectionTag, lexbuf, txtstart, txtend);
					return token;

				case LEX_CDATA: 
					if (c != ']')
					{
						continue;
					}

					/* now look for ']' */
					c = input.ReadChar();
					if (c != ']')
					{
						input.UngetChar(c);
						continue;
					}
						
					/* now look for '>' */
					c = input.ReadChar();
					if (c != '>')
					{
						input.UngetChar(c);
						continue;
					}
						
					lexsize -= 1;
					txtend = lexsize;
					lexbuf[lexsize] = (byte) '\x0000';
					state = LEX_CONTENT;
					waswhite = false;
					token = NewNode(Node.CDATATag, lexbuf, txtstart, txtend);
					return token;
				}
			}
			
			if (state == LEX_CONTENT)
			{
				/* text string */
				txtend = lexsize;
				if (txtend > txtstart)
				{
					input.UngetChar(c);
					if (lexbuf[lexsize - 1] == (byte) ' ')
					{
						lexsize -= 1;
						txtend = lexsize;
					}
					
					token = NewNode(Node.TextNode, lexbuf, txtstart, txtend);
					return token;
				}
			}
			else if (state == LEX_COMMENT)
			{
				/* comment */
				if (c == StreamIn.EndOfStream)
				{
					Report.Warning(this, null, null, Report.MALFORMED_COMMENT);
				}
				
				txtend = lexsize;
				lexbuf[lexsize] = (byte) '\x0000';
				state = LEX_CONTENT;
				waswhite = false;
				token = NewNode(Node.CommentTag, lexbuf, txtstart, txtend);
				return token;
			}
			
			return null;
		}