コード例 #1
0
		/*
		1010  A
		1011  B
		1100  C
		1101  D
		1110  E
		1111  F
		*/
		
		/* return one less that the number of bytes used by UTF-8 char */
		/* str points to 1st byte, *ch initialized to 1st byte */
		public static int GetUTF8(byte[] str, int start, MutableInteger ch)
		{
			int c, n, i, bytes;
			
			c = ((int) str[start]) & 0xFF; // Convert to unsigned.
			
			if ((c & 0xE0) == 0xC0)
			{
				/* 110X XXXX  two bytes */
				n = c & 31;
				bytes = 2;
			}
			else if ((c & 0xF0) == 0xE0)
			{
				/* 1110 XXXX  three bytes */
				n = c & 15;
				bytes = 3;
			}
			else if ((c & 0xF8) == 0xF0)
			{
				/* 1111 0XXX  four bytes */
				n = c & 7;
				bytes = 4;
			}
			else if ((c & 0xFC) == 0xF8)
			{
				/* 1111 10XX  five bytes */
				n = c & 3;
				bytes = 5;
			}
			else if ((c & 0xFE) == 0xFC)
			{
				/* 1111 110X  six bytes */
				n = c & 1;
				bytes = 6;
			}
			else
			{
				/* 0XXX XXXX one byte */
				ch.Val = c;
				return 0;
			}
			
			/* successor bytes should have the form 10XX XXXX */
			for (i = 1; i < bytes; ++i)
			{
				c = ((int) str[start + i]) & 0xFF; // Convert to unsigned.
				n = (n << 6) | (c & 0x3F);
			}
			
			ch.Val = n;
			return bytes - 1;
		}
コード例 #2
0
        public static void ReportVersion(Lexer lexer, Node doctype)
        {
            int            i, c;
            int            state   = 0;
            string         vers    = lexer.HtmlVersionName();
            MutableInteger cc      = new MutableInteger();
            StringBuilder  message = new StringBuilder();

            if (doctype != null)
            {
                StringBuilder docTypeStr = new StringBuilder();

                for (i = doctype.Start; i < doctype.End; ++i)
                {
                    c = (int)doctype.Textarray[i];

                    /* look for UTF-8 multibyte character */
                    if (c < 0)
                    {
                        i += PPrint.GetUTF8(doctype.Textarray, i, cc);
                        c  = cc.Val;
                    }

                    if (c == (char)'"')
                    {
                        ++state;
                    }
                    else if (state == 1)
                    {
                        docTypeStr.Append((char)c);
                    }
                }

                lexer.messages.Add(new TidyMessage(lexer, String.Format(GetMessage("doctype_given"), docTypeStr), MessageLevel.Info));
            }

            lexer.messages.Add(new TidyMessage(lexer, String.Format(GetMessage("report_version"), (vers != null ? vers : "HTML proprietary")), MessageLevel.Info));
        }
コード例 #3
0
ファイル: Report.cs プロジェクト: bgarrels/betterpoeditor
        public static void ReportVersion(Lexer lexer, Node doctype)
        {
            int i, c;
            int state = 0;
            string vers = lexer.HtmlVersionName();
            MutableInteger cc = new MutableInteger();
            StringBuilder message = new StringBuilder();

            if (doctype != null)
            {
                StringBuilder docTypeStr = new StringBuilder();

                for (i = doctype.Start; i < doctype.End; ++i)
                {
                    c = (int) doctype.Textarray[i];

                    /* look for UTF-8 multibyte character */
                    if (c < 0)
                    {
                        i += PPrint.GetUTF8(doctype.Textarray, i, cc);
                        c = cc.Val;
                    }

                    if (c == (char) '"')
                    {
                        ++state;
                    }
                    else if (state == 1)
                    {
                        docTypeStr.Append((char)c);
                    }
                }

                lexer.messages.Add(new TidyMessage(lexer, String.Format(GetMessage("doctype_given"), docTypeStr), MessageLevel.Info));
            }

            lexer.messages.Add(new TidyMessage(lexer, String.Format(GetMessage("report_version"), (vers != null ? vers : "HTML proprietary")), MessageLevel.Info));
        }
コード例 #4
0
ファイル: Clean.cs プロジェクト: AlfieJ/TidyNet
        /* map non-breaking spaces to regular spaces */
        private void NormalizeSpaces(Lexer lexer, Node node)
        {
            while (node != null)
            {
                if (node.Content != null)
                {
                    NormalizeSpaces(lexer, node.Content);
                }

                if (node.Type == Node.TextNode)
                {
                    int i;
                    MutableInteger c = new MutableInteger();
                    int p = node.Start;

                    for (i = node.Start; i < node.End; ++i)
                    {
                        c.Val = (int) node.Textarray[i];

                        /* look for UTF-8 multibyte character */
                        if (c.Val > 0x7F)
                        {
                            i += PPrint.GetUTF8(node.Textarray, i, c);
                        }

                        if (c.Val == 160)
                        {
                            c.Val = ' ';
                        }

                        p = PPrint.PutUTF8(node.Textarray, p, c.Val);
                    }
                }

                node = node.Next;
            }
        }
コード例 #5
0
ファイル: PPrint.cs プロジェクト: bgarrels/betterpoeditor
        /*
        The line buffer is uint not char so we can
        hold Unicode values unencoded. The translation
        to UTF-8 is deferred to the outc routine called
        to flush the line buffer.
        */
        private void PrintText(Out fout, int mode, int indent, byte[] textarray, int start, int end)
        {
            int i, c;
            MutableInteger ci = new MutableInteger();

            for (i = start; i < end; ++i)
            {
                if (indent + linelen >= _options.WrapLen)
                {
                    WrapLine(fout, indent);
                }

                c = ((int) textarray[i]) & 0xFF; // Convert to unsigned.

                /* look for UTF-8 multibyte character */
                if (c > 0x7F)
                {
                    i += GetUTF8(textarray, i, ci);
                    c = ci.Val;
                }

                if (c == '\n')
                {
                    FlushLine(fout, indent);
                    continue;
                }

                PrintChar(c, mode);
            }
        }
コード例 #6
0
ファイル: PPrint.cs プロジェクト: bgarrels/betterpoeditor
        private void PrintAttrValue(Out fout, int indent, string val, int delim, bool wrappable)
        {
            int c;
            MutableInteger ci = new MutableInteger();
            bool wasinstring = false;
            byte[] valueChars = null;
            int i;
            int mode = (wrappable?(int) (NORMAL | ATTRIBVALUE):(int) (PREFORMATTED | ATTRIBVALUE));

            if (val != null)
            {
                valueChars = Lexer.GetBytes(val);
            }

            /* look for ASP, Tango or PHP instructions for computed attribute value */
            if (valueChars != null && valueChars.Length >= 5 && valueChars[0] == '<')
            {
                char[] tmpChar;
                tmpChar = new char[valueChars.Length];
                valueChars.CopyTo(tmpChar, 0);
                if (valueChars[1] == '%' || valueChars[1] == '@' || (new string(tmpChar, 0, 5)).Equals("<?php"))
                    mode |= CDATA;
            }

            if (delim == 0)
            {
                delim = '"';
            }

            AddC('=', linelen++);

            /* don't wrap after "=" for xml documents */
            if (!_options.XmlOut)
            {
                if (indent + linelen < _options.WrapLen)
                {
                    wraphere = linelen;
                }

                if (indent + linelen >= _options.WrapLen)
                {
                    WrapLine(fout, indent);
                }

                if (indent + linelen < _options.WrapLen)
                {
                    wraphere = linelen;
                }
                else
                {
                    CondFlushLine(fout, indent);
                }
            }

            AddC(delim, linelen++);

            if (val != null)
            {
                InString = false;

                i = 0;
                while (i < valueChars.Length)
                {
                    c = ((int) valueChars[i]) & 0xFF; // Convert to unsigned.

                    if (wrappable && c == ' ' && indent + linelen < _options.WrapLen)
                    {
                        wraphere = linelen;
                        wasinstring = InString;
                    }

                    if (wrappable && wraphere > 0 && indent + linelen >= _options.WrapLen)
                        WrapAttrVal(fout, indent, wasinstring);

                    if (c == delim)
                    {
                        string entity;

                        entity = (c == '"'?"&quot;":"&#39;");

                        for (int j = 0; j < entity.Length; j++)
                        {
                            AddC(entity[j], linelen++);
                        }

                        ++i;
                        continue;
                    }
                    else if (c == '"')
                    {
                        if (_options.QuoteMarks)
                        {
                            AddC('&', linelen++);
                            AddC('q', linelen++);
                            AddC('u', linelen++);
                            AddC('o', linelen++);
                            AddC('t', linelen++);
                            AddC(';', linelen++);
                        }
                        else
                        {
                            AddC('"', linelen++);
                        }

                        if (delim == '\'')
                        {
                            InString = !InString;
                        }

                        ++i;
                        continue;
                    }
                    else if (c == '\'')
                    {
                        if (_options.QuoteMarks)
                        {
                            AddC('&', linelen++);
                            AddC('#', linelen++);
                            AddC('3', linelen++);
                            AddC('9', linelen++);
                            AddC(';', linelen++);
                        }
                        else
                        {
                            AddC('\'', linelen++);
                        }

                        if (delim == '"')
                        {
                            InString = !InString;
                        }

                        ++i;
                        continue;
                    }

                    /* look for UTF-8 multibyte character */
                    if (c > 0x7F)
                    {
                        i += GetUTF8(valueChars, i, ci);
                        c = ci.Val;
                    }

                    ++i;

                    if (c == '\n')
                    {
                        FlushLine(fout, indent);
                        continue;
                    }

                    PrintChar(c, mode);
                }
            }

            InString = false;
            AddC(delim, linelen++);
        }
コード例 #7
0
		/* swallows closing '>' */
		
		public virtual AttVal ParseAttrs(MutableBoolean isempty)
		{
			AttVal av, list;
			string attribute, val;
			MutableInteger delim = new MutableInteger();
			MutableObject asp = new MutableObject();
			MutableObject php = new MutableObject();
			
			list = null;
			
			while (!EndOfInput())
			{
				attribute = ParseAttribute(isempty, asp, php);
				
				if (attribute == null)
				{
					/* check if attributes are created by ASP markup */
					if (asp.Object != null)
					{
						av = new AttVal(list, null, (Node) asp.Object, null, '\x0000', null, null);
						list = av;
						continue;
					}
					
					/* check if attributes are created by PHP markup */
					if (php.Object != null)
					{
						av = new AttVal(list, null, null, (Node) php.Object, '\x0000', null, null);
						list = av;
						continue;
					}
					
					break;
				}
				
				val = ParseValue(attribute, false, isempty, delim);
				
				if (attribute != null && IsValidAttrName(attribute))
				{
					av = new AttVal(list, null, null, null, delim.Val, attribute, val);
					av.Dict = AttributeTable.DefaultAttributeTable.FindAttribute(av);
					list = av;
				}
				else
				{
					av = new AttVal(null, null, null, null, 0, attribute, val);
					Report.AttrError(this, token, val, Report.BAD_ATTRIBUTE_VALUE);
				}
			}
			
			return list;
		}
コード例 #8
0
		/* values start with "=" or " = " etc. */
		/* doesn't consume the ">" at end of start tag */
		
		public virtual string ParseValue(string name, bool foldCase, MutableBoolean isempty, MutableInteger pdelim)
		{
			int len = 0;
			int start;
			short map;
			bool seen_gt = false;
			bool munge = true;
			int c = 0;
			int lastc, delim, quotewarning;
			string val;
			
			delim = 0;
			pdelim.Val = (int) '"';
			
			/*
			Henry Zrepa reports that some folk are using the
			embed element with script attributes where newlines
			are significant and must be preserved
			*/
			if (Options.LiteralAttribs)
				munge = false;
			
			/* skip white space before the '=' */
			
			for (; ; )
			{
				c = input.ReadChar();
				
				if (c == StreamIn.EndOfStream)
				{
					input.UngetChar(c);
					break;
				}
				
				map = MAP((char) c);
				
				if ((map & WHITE) == 0)
				{
					break;
				}
			}
			
			/*
			c should be '=' if there is a value
			other legal possibilities are white
			space, '/' and '>'
			*/
			
			if (c != '=')
			{
				input.UngetChar(c);
				return null;
			}
			
			/* skip white space after '=' */
			
			for (; ; )
			{
				c = input.ReadChar();
				if (c == StreamIn.EndOfStream)
				{
					input.UngetChar(c);
					break;
				}
				
				map = MAP((char) c);
				
				if ((map & WHITE) == 0)
					break;
			}
			
			/* check for quote marks */
			
			if (c == '"' || c == '\'')
				delim = c;
			else if (c == '<')
			{
				start = lexsize;
				AddCharToLexer(c);
				pdelim.Val = ParseServerInstruction();
				len = lexsize - start;
				lexsize = start;
				return (len > 0?GetString(lexbuf, start, len):null);
			}
			else
			{
				input.UngetChar(c);
			}
			
			/*
			and read the value string
			check for quote mark if needed
			*/
			
			quotewarning = 0;
			start = lexsize;
			c = '\x0000';
			
			for (; ; )
			{
				lastc = c; /* track last character */
				c = input.ReadChar();
				
				if (c == StreamIn.EndOfStream)
				{
					Report.AttrError(this, token, null, Report.UNEXPECTED_END_OF_FILE);
					input.UngetChar(c);
					break;
				}
				
				if (delim == (char) 0)
				{
					if (c == '>')
					{
						input.UngetChar(c);
						break;
					}
					
					if (c == '"' || c == '\'')
					{
						Report.AttrError(this, token, null, Report.UNEXPECTED_QUOTEMARK);
						break;
					}
					
					if (c == '<')
					{
						/* in.UngetChar(c); */
						Report.AttrError(this, token, null, Report.UNEXPECTED_GT);
						/* break; */
					}
					
					/*
					For cases like <br clear=all/> need to avoid treating /> as
					part of the attribute value, however care is needed to avoid
					so treating <a href=http://www.acme.com/> in this way, which
					would map the <a> tag to <a href="http://www.acme.com"/>
					*/
					if (c == '/')
					{
						/* peek ahead in case of /> */
						c = input.ReadChar();
						if (c == '>' && !AttributeTable.DefaultAttributeTable.IsUrl(name))
						{
							isempty.Val = true;
							input.UngetChar(c);
							break;
						}
						
						/* unget peeked char */
						input.UngetChar(c);
						c = '/';
					}
				}
					/* delim is '\'' or '"' */
				else
				{
					if (c == delim)
					{
						break;
					}
					
					/* treat CRLF, CR and LF as single line break */
					
					if (c == '\r')
					{
						c = input.ReadChar();
						if (c != '\n')
						{
							input.UngetChar(c);
						}
						
						c = '\n';
					}
					
					if (c == '\n' || c == '<' || c == '>')
						++quotewarning;
					
					if (c == '>')
						seen_gt = true;
				}
				
				if (c == '&')
				{
					AddCharToLexer(c);
					ParseEntity((short) 0);
					continue;
				}
				
				/*
				kludge for JavaScript attribute values
				with line continuations in string literals
				*/
				if (c == '\\')
				{
					c = input.ReadChar();
					
					if (c != '\n')
					{
						input.UngetChar(c);
						c = '\\';
					}
				}
				
				map = MAP((char) c);
				
				if ((map & WHITE) != 0)
				{
					if (delim == (char) 0)
						break;
					
					if (munge)
					{
						c = ' ';
						
						if (lastc == ' ')
							continue;
					}
				}
				else if (foldCase && (map & UPPERCASE) != 0)
					c += (int) ('a' - 'A');
				
				AddCharToLexer(c);
			}
			
			if (quotewarning > 10 && seen_gt && munge)
			{
				/*
				there is almost certainly a missing trailling quote mark
				as we have see too many newlines, < or > characters.
				
				an exception is made for Javascript attributes and the
				javascript URL scheme which may legitimately include < and >
				*/
				if (!AttributeTable.DefaultAttributeTable.IsScript(name) && !(AttributeTable.DefaultAttributeTable.IsUrl(name) && (GetString(lexbuf, start, 11)).Equals("javascript:")))
					Report.Error(this, null, null, Report.SUSPECTED_MISSING_QUOTE);
			}
			
			len = lexsize - start;
			lexsize = start;
			
			if (len > 0 || delim != 0)
			{
				val = GetString(lexbuf, start, len);
			}
			else
			{
				val = null;
			}
			
			/* note delimiter if given */
			if (delim != 0)
				pdelim.Val = delim;
			else
				pdelim.Val = (int) '"';
			
			return val;
		}