/// <summary> Print the contents of the tag.</summary>
		/// <returns> An string describing the tag. For text that looks like HTML use #toHtml().
		/// </returns>
		public override System.String ToString()
		{
			System.String text;
			System.String type;
			Cursor start;
			Cursor end;
			System.Text.StringBuilder ret;
			
			text = GetText();
			ret = new System.Text.StringBuilder(20 + text.Length);
			if (IsEndTag())
				type = "End";
			else
				type = "Tag";
			start = new Cursor(Page, StartPosition);
			end = new Cursor(Page, EndPosition);
			ret.Append(type);
			ret.Append(" (");
			ret.Append(start);
			ret.Append(",");
			ret.Append(end);
			ret.Append("): ");
			if (80 < ret.Length + text.Length)
			{
				text = text.Substring(0, (77 - ret.Length) - (0));
				ret.Append(text);
				ret.Append("...");
			}
			else
				ret.Append(text);
			
			return (ret.ToString());
		}
		/// <summary> Reset the lexer to start parsing from the beginning again.
		/// The underlying components are reset such that the next call to
		/// <code>nextNode()</code> will return the first lexeme on the page.
		/// </summary>
		public virtual void Reset()
		{
			Page.Reset();
			Cursor = new Cursor(Page, 0);
		}
		/// <summary> Fetch the object at the given index.</summary>
		/// <param name="index">The item number to get.
		/// </param>
		/// <param name="reuse">If this argument is not null, it is an object
		/// acquired from a previous fetch that is no longer needed and
		/// may be returned as the result if it makes mores sense to alter
		/// and return it than to fetch or create a new element. That is, the
		/// reuse object is garbage and may be used to avoid allocating a new
		/// object if that would normally be the strategy.
		/// </param>
		/// <returns> The Ordered object at that index.
		/// </returns>
		public virtual IOrdered Fetch(int index, IOrdered reuse)
		{
			Cursor ret;
			
			if (null != reuse)
			{
				ret = (Cursor) reuse;
				ret.mPosition = mIndices[index];
				ret.mPage = Page; // redundant
			}
			else
				ret = new Cursor(Page, mIndices[index]);
			
			return (ret);
		}
		/// <summary> Add an element to the list</summary>
		/// <param name="cursor">The element to add.
		/// </param>
		/// <returns> The position at which the element was inserted or
		/// the index of the existing element if it is a duplicate.
		/// </returns>
		public virtual int Add(Cursor cursor)
		{
			int position;
			int last;
			int ret;
			
			position = cursor.Position;
			if (0 == mCount)
			{
				ret = 0;
				InsertElementAt(position, ret);
			}
			else
			{
				last = mIndices[mCount - 1];
				if (position == last)
					ret = mCount - 1;
				else if (position > last)
				{
					ret = mCount;
					InsertElementAt(position, ret);
				}
				else
				{
					// find where it goes
					ret = SortImpl.Bsearch(this, cursor);
					
					// insert, but not twice
					if (!((ret < Size()) && (position == mIndices[ret])))
						InsertElementAt(position, ret);
				}
			}
			
			return (ret);
		}
		/// <summary> Get the line number for a cursor.</summary>
		/// <param name="cursor">The character offset into the page.
		/// </param>
		/// <returns> The line number the character is in.
		/// </returns>
		public virtual int Row(Cursor cursor)
		{
			int ret;
			
			ret = SortImpl.Bsearch(this, cursor);
			// handle line transition, the search returns the index if it matches
			// exactly one of the line end positions, so we advance one line if
			// it's equal to the offset at the row index, since that position is
			// actually the beginning of the next line
			if ((ret < mCount) && (cursor.Position == mIndices[ret]))
				ret++;
			
			return (ret);
		}
		/// <summary> Get the column number for a cursor.</summary>
		/// <param name="cursor">The character offset into the page.
		/// </param>
		/// <returns> The character offset into the line this cursor is on.
		/// </returns>
		public virtual int Column(Cursor cursor)
		{
			int row;
			int previous;
			
			row = Row(cursor);
			if (0 != row)
				previous = this.ElementAt(row - 1);
			else
				previous = 0;
			
			return (cursor.Position - previous);
		}
		/// <summary> Express this string node as a printable string
		/// This is suitable for display in a debugger or output to a printout.
		/// Control characters are replaced by their equivalent escape
		/// sequence and contents is truncated to 80 characters.
		/// </summary>
		/// <returns> A string representation of the string node.
		/// </returns>
		public override System.String ToString()
		{
			int startpos;
			int endpos;
			Cursor start;
			Cursor end;
			char c;
			System.Text.StringBuilder ret;
			
			startpos = StartPosition;
			endpos = EndPosition;
			ret = new System.Text.StringBuilder(endpos - startpos + 20);
			if (null == mText)
			{
				start = new Cursor(Page, startpos);
				end = new Cursor(Page, endpos);
				ret.Append("Txt (");
				ret.Append(start);
				ret.Append(",");
				ret.Append(end);
				ret.Append("): ");
				while (start.Position < endpos)
				{
					try
					{
						c = mPage.GetCharacter(start);
						switch (c)
						{
							
							case '\t': 
								ret.Append("\\t");
								break;
							
							case '\n': 
								ret.Append("\\n");
								break;
							
							case '\r': 
								ret.Append("\\r");
								break;
							
							default: 
								ret.Append(c);
								break;
							
						}
					}
					catch (ParserException pe)
					{
						// not really expected, but we're only doing toString, so ignore
					}
					if (77 <= ret.Length)
					{
						ret.Append("...");
						break;
					}
				}
			}
			else
			{
				ret.Append("Txt (");
				ret.Append(startpos);
				ret.Append(",");
				ret.Append(endpos);
				ret.Append("): ");
				for (int i = 0; i < mText.Length; i++)
				{
					c = mText[i];
					switch (c)
					{
						
						case '\t': 
							ret.Append("\\t");
							break;
						
						case '\n': 
							ret.Append("\\n");
							break;
						
						case '\r': 
							ret.Append("\\r");
							break;
						
						default: 
							ret.Append(c);
							break;
						
					}
					if (77 <= ret.Length)
					{
						ret.Append("...");
						break;
					}
				}
			}
			
			return (ret.ToString());
		}
		/// <summary> Decode script encoded by the Microsoft obfuscator.</summary>
		/// <param name="page">The source for encoded text.
		/// </param>
		/// <param name="cursor">The position at which to start decoding.
		/// This is advanced to the end of the encoded text.
		/// </param>
		/// <returns> The plaintext.
		/// </returns>
		/// <exception cref="ParserException">If an error is discovered while decoding.
		/// </exception>
		public static System.String Decode(Page page, Cursor cursor)
		{
			int state;
			int substate_initial;
			int substate_length;
			int substate_prefix;
			int substate_checksum;
			int substate_final;
			long checksum;
			long length;
			char[] buffer;
			buffer = new char[6];
			int index;
			char character;
			int input_character;
			bool found;
			System.Text.StringBuilder ret;
			
			ret = new System.Text.StringBuilder(1024);
			
			state = STATE_INITIAL;
			substate_initial = 0;
			substate_length = 0;
			substate_prefix = 0;
			substate_checksum = 0;
			substate_final = 0;
			length = 0L;
			checksum = 0L;
			index = 0;
			while (STATE_DONE != state)
			{
				input_character = page.GetCharacter(cursor);
				character = (char) input_character;
				if (Page.EOF == input_character)
				{
					if ((STATE_INITIAL != state) || (0 != substate_initial) || (0 != substate_length) || (0 != substate_prefix) || (0 != substate_checksum) || (0 != substate_final))
						throw new ParserException("illegal state for exit");
					state = STATE_DONE;
				}
				else
					switch (state)
					{
						
						case STATE_INITIAL: 
							if (character == mLeader[substate_initial])
							{
								substate_initial++;
								if (substate_initial == mLeader.Length)
								{
									substate_initial = 0;
									state = STATE_LENGTH;
								}
							}
							else
							{
								// oops, flush
								for (int k = 0; 0 < substate_initial; k++)
								{
									ret.Append(mLeader[k++]);
									substate_initial--;
								}
								ret.Append(character);
							}
							break;
						
						
						case STATE_LENGTH: 
							buffer[substate_length] = character;
							substate_length++;
							if (substate_length >= buffer.Length)
							{
								length = DecodeBase64(buffer);
								if (0 > length)
									throw new ParserException("illegal length: " + length);
								substate_length = 0;
								state = STATE_PREFIX;
							}
							break;
						
						
						case STATE_PREFIX: 
							if (character == mPrefix[substate_prefix])
								substate_prefix++;
							else
								throw new ParserException("illegal character encountered: " + (int) character + " ('" + character + "')");
							if (substate_prefix >= mPrefix.Length)
							{
								substate_prefix = 0;
								state = STATE_DECODE;
							}
							break;
						
						
						case STATE_DECODE: 
							if ('@' == character)
								state = STATE_ESCAPE;
							else
							{
								if (input_character < 0x80)
								{
									if (input_character == '\t')
										input_character = 0;
									else if (input_character >= ' ')
										input_character -= (' ' - 1);
									else
										throw new ParserException("illegal encoded character: " + input_character + " ('" + character + "')");
									char ch = mLookupTable[mEncodingIndex[index % 64]][input_character];
									ret.Append(ch);
									checksum += ch;
									index++;
								}
								else
									ret.Append(character);
							}
							length--;
							if (0 == length)
							{
								index = 0;
								state = STATE_CHECKSUM;
							}
							break;
						
						
						case STATE_ESCAPE: 
							found = false;
							for (int i = 0; i < mEscapes.Length; i++)
								if (character == mEscapes[i])
								{
									found = true;
									character = mEscaped[i];
								}
							if (!found)
								throw new ParserException("unexpected escape character: " + (int) character + " ('" + character + "')");
							ret.Append(character);
							checksum += character;
							index++;
							state = STATE_DECODE;
							length--;
							if (0 == length)
							{
								index = 0;
								state = STATE_CHECKSUM;
							}
							break;
						
						
						case STATE_CHECKSUM: 
							buffer[substate_checksum] = character;
							substate_checksum++;
							if (substate_checksum >= buffer.Length)
							{
								long check = DecodeBase64(buffer);
								if (check != checksum)
									throw new ParserException("incorrect checksum, expected " + check + ", calculated " + checksum);
								checksum = 0;
								substate_checksum = 0;
								state = STATE_FINAL;
							}
							break;
						
						
						case STATE_FINAL: 
							if (character == mTrailer[substate_final])
								substate_final++;
							else
								throw new ParserException("illegal character encountered: " + (int) character + " ('" + character + "')");
							if (substate_final >= mTrailer.Length)
							{
								substate_final = 0;
								state = LAST_STATE;
							}
							break;
						
						default: 
							throw new ParserException("invalid state: " + state);
						
					}
			}
			
			return (ret.ToString());
		}
Esempio n. 9
0
		/// <summary> Get the column number for a cursor.</summary>
		/// <param name="cursor">The character offset into the page.
		/// </param>
		/// <returns> The character offset into the line this cursor is on.
		/// </returns>
		public virtual int Column(Cursor cursor)
		{
			return (mIndex.Column(cursor));
		}
Esempio n. 10
0
		/// <summary> Get the text line the position of the cursor lies on.</summary>
		/// <param name="cursor">The position to calculate for.
		/// </param>
		/// <returns> The contents of the URL or file corresponding to the line number
		/// containg the cursor position.
		/// </returns>
		public virtual System.String GetLine(Cursor cursor)
		{
			int line;
			int size;
			int start;
			int end;

			if (mSource == null)
			{
				this.GetPageContent(this.mConnection, false);
			}
			
			line = Row(cursor);
			size = mIndex.Size();
			if (line < size)
			{
				start = mIndex.ElementAt(line);
				line++;
				if (line <= size)
					end = mIndex.ElementAt(line);
				else
					end = mSource.Offset();
			}
				// current line
			else
			{
				start = mIndex.ElementAt(line - 1);
				end = mSource.Offset();
			}
			
			
			return (GetText(start, end));
		}
Esempio n. 11
0
		/// <summary> Get the line number for a cursor.</summary>
		/// <param name="cursor">The character offset into the page.
		/// </param>
		/// <returns> The line number the character is in.
		/// </returns>
		public virtual int Row(Cursor cursor)
		{
			return (mIndex.Row(cursor));
		}
Esempio n. 12
0
		/// <summary> Read the character at the given cursor position.
		/// The cursor position can be only behind or equal to the
		/// current source position.
		/// Returns end of lines (EOL) as \n, by converting \r and \r\n to \n,
		/// and updates the end-of-line index accordingly
		/// Advances the cursor position by one (or two in the \r\n case).
		/// </summary>
		/// <param name="cursor">The position to read at.
		/// </param>
		/// <returns> The character at that position, and modifies the cursor to
		/// prepare for the next read. If the source is exhausted a zero is returned.
		/// </returns>
		/// <exception cref="ParserException">If an IOException on the underlying source
		/// occurs, or an attemp is made to read characters in the future (the
		/// cursor position is ahead of the underlying stream)
		/// </exception>
		public virtual char GetCharacter(Cursor cursor)
		{
			int i;
			char ret;
			
			i = cursor.Position;
			if (mSource.Offset() < i)
				// hmmm, we could skip ahead, but then what about the EOL index
				throw new ParserException("Attempt to read future characters from source " + i + " > " + mSource.Offset());
			else if (mSource.Offset() == i)
				try
				{
					i = mSource.Read();
					if (Source.EOF == i)
						ret = EOF;
					else
					{
						ret = (char) i;
						cursor.Advance();
					}
				}
				catch (System.IO.IOException ioe)
				{
					throw new ParserException("Problem reading a character at position " + cursor.Position, ioe);
				}
			else
			{
				// historic read
				try
				{
					ret = mSource.GetCharacter(i);
				}
				catch (System.IO.IOException ioe)
				{
					throw new ParserException("Can't read a character at position " + i, ioe);
				}
				cursor.Advance();
			}
			
			// handle \r
			if ('\r' == ret)
			{
				// switch to single character EOL
				ret = '\n';
				
				// check for a \n in the next position
				if (mSource.Offset() == cursor.Position)
					try
					{
						i = mSource.Read();
						if (Source.EOF == i)
						{
							// do nothing
						}
						else if ('\n' == (char) i)
							cursor.Advance();
						else
							try
							{
								mSource.Unread();
							}
							catch (System.IO.IOException ioe)
							{
								throw new ParserException("Can't unread a character at position " + cursor.Position, ioe);
							}
					}
					catch (System.IO.IOException ioe)
					{
						throw new ParserException("Problem reading a character at position " + cursor.Position, ioe);
					}
				else
					try
					{
						if ('\n' == mSource.GetCharacter(cursor.Position))
							cursor.Advance();
					}
					catch (System.IO.IOException ioe)
					{
						throw new ParserException("can't read a character at position " + cursor.Position, ioe);
					}
			}
			if ('\n' == ret)
				// update the EOL index in any case
				mIndex.Add(cursor);
			
			return (ret);
		}
Esempio n. 13
0
		/// <summary> Deserialize the page.
		/// For details see <code>writeObject()</code>.
		/// </summary>
		/// <param name="in">The object stream to decode.
		/// </param>
		/// <exception cref="IOException">If there is a deserialization problem with
		/// the stream.
		/// </exception>
		/// <exception cref="ClassNotFoundException">If the deserialized class can't be
		/// located with the current classpath and class loader.
		/// </exception>
		protected Page(System.Runtime.Serialization.SerializationInfo in_Renamed, System.Runtime.Serialization.StreamingContext context)
		{
			bool fromurl;
			int offset;
			System.String href;
			System.Uri url;
			Cursor cursor;
			
			fromurl = in_Renamed.GetBoolean("Winista.Text.Htmlparser.Lex.Pagedata1");
			if (fromurl)
			{
				offset = in_Renamed.GetInt32("Winista.Text.Htmlparser.Lex.Pagedata2");
				href = ((System.String) in_Renamed.GetValue("Winista.Text.Htmlparser.Lex.Pagedata3", typeof(System.String)));
				Support.SupportMisc.DefaultReadObject(in_Renamed, context, this);
				// open the URL
				if (null != Url)
				{
					url = new System.Uri(Url);
					try
					{
						//Connection = (System.Net.HttpWebRequest) System.Net.WebRequest.Create(url);
					}
					catch (ParserException pe)
					{
						throw new System.IO.IOException(pe.Message);
					}
				}
				cursor = new Cursor(this, 0);
				for (int i = 0; i < offset; i++)
					try
					{
						GetCharacter(cursor);
					}
					catch (ParserException pe)
					{
						throw new System.IO.IOException(pe.Message);
					}
				Url = href;
			}
			else
			{
				href = ((System.String) in_Renamed.GetValue("Winista.Text.Htmlparser.Lex.Pagedata4", typeof(System.String)));
				Support.SupportMisc.DefaultReadObject(in_Renamed, context, this);
				Url = href;
			}
		}
Esempio n. 14
0
		/// <summary> Advance the cursor through a JIS escape sequence.</summary>
		/// <param name="cursor">A cursor positioned within the escape sequence.
		/// </param>
		/// <exception cref="ParserException">If a problem occurs reading from the source.
		/// </exception>
		protected internal virtual void ScanJIS(Cursor cursor)
		{
			bool done;
			char ch;
			int state;
			
			done = false;
			state = 0;
			while (!done)
			{
				ch = mPage.GetCharacter(cursor);
				if (Page.EOF == ch)
				{
					done = true;
				}
				else
				{
					switch (state)
					{
						
						case 0: 
							if (0x1b == ch)
								// escape
								state = 1;
							break;
						
						case 1: 
							if ('(' == ch)
								state = 2;
							else
								state = 0;
							break;
						
						case 2: 
							if ('J' == ch)
								done = true;
							else
								state = 0;
							break;
						
						default: 
							throw new System.SystemException("state " + state);
						
					}
				}
			}
		}
		/// <summary> Remove an element from the list</summary>
		/// <param name="cursor">The element to remove.
		/// </param>
		public virtual void  Remove(Cursor cursor)
		{
			int i;
			
			// find it
			i = SortImpl.Bsearch(this, cursor);
			
			// remove
			if ((i < Size()) && (cursor.Position == mIndices[i]))
				RemoveElementAt(i);
		}
Esempio n. 16
0
		/// <summary> Creates a new instance of a Lexer.</summary>
		/// <param name="page">The page with HTML text.
		/// </param>
		public Lexer(Page page)
		{
			Page = page;
			Cursor = new Cursor(page, 0);
			NodeFactory = this;
		}
		public IteratorImpl(Lexer lexer, IParserFeedBack fb)
		{
			mLexer = lexer;
			mFeedback = fb;
			mCursor = new Cursor(mLexer.Page, 0);
		}