/// <summary> Create an empty index.</summary>
		/// <param name="page">The page associated with this index.
		/// </param>
		public PageIndex(Page page)
		{
			mPage = page;
			mIndices = new int[mIncrement];
			mCount = 0;
			mIncrement = mStartIncrement * 2;
		}
		//
		// provide same constructors as super class
		//
		
		private void Init()
		{
			mPage = null;
			mNameStart = - 1;
			mNameEnd = - 1;
			mValueStart = - 1;
			mValueEnd = - 1;
		}
		/// <summary> Create an abstract node with the page positions given.
		/// Remember the page and start & end cursor positions.
		/// </summary>
		/// <param name="page">The page this tag was read from.
		/// </param>
		/// <param name="start">The starting offset of this node within the page.
		/// </param>
		/// <param name="end">The ending offset of this node within the page.
		/// </param>
		public AbstractNode(Page page, int start, int end)
		{
			mPage = page;
			nodeBegin = start;
			nodeEnd = end;
			parent = null;
			children = null;
		}
		/// <summary> Create an attribute.</summary>
		/// <param name="page">The page containing the attribute.
		/// </param>
		/// <param name="name_start">The starting offset of the name within the page.
		/// If this is negative, the name is considered null.
		/// </param>
		/// <param name="name_end">The ending offset of the name within the page.
		/// </param>
		/// <param name="value_start">he starting offset of the value within the page.
		/// If this is negative, the value is considered null.
		/// </param>
		/// <param name="value_end">The ending offset of the value within the page.
		/// </param>
		/// <param name="quote">The quote, if any, surrounding the value of the attribute,
		/// (i.e. ' or "), or zero if none.
		/// </param>
		public PageAttribute(Page page, int name_start, int name_end, int value_start, int value_end, char quote)
		{
			mPage = page;
			mNameStart = name_start;
			mNameEnd = name_end;
			mValueStart = value_start;
			mValueEnd = value_end;
			SetName(null);
			SetAssignment(null);
			SetValue(null);
			SetQuote(quote);
		}
		/// <summary> Create a tag with the location and attributes provided</summary>
		/// <param name="page">The page this tag was read from.
		/// </param>
		/// <param name="start">The starting offset of this node within the page.
		/// </param>
		/// <param name="end">The ending offset of this node within the page.
		/// </param>
		/// <param name="attributes">The list of attributes that were parsed in this tag.
		/// </param>
		/// <seealso cref="Attribute">
		/// </seealso>
		public TagNode(Page page, int start, int end, System.Collections.ArrayList attributes):base(page, start, end)
		{
			
			mScanner = mDefaultScanner;
			mAttributes = attributes;
			if ((null == mAttributes) || (0 == mAttributes.Count))
			{
				System.String[] names;
				
				names = Ids;
				if ((null != names) && (0 != names.Length))
					TagName = names[0];
				else
					TagName = ""; // make sure it's not null
			}
		}
		/// <summary> Create a new remark node.</summary>
		/// <param name="page">The page the node is on.
		/// </param>
		/// <param name="start">The beginning position of the remark.
		/// </param>
		/// <param name="end">The ending position of the remark.
		/// </param>
		/// <returns> A remark node comprising the indicated characters from the page.
		/// </returns>
		public virtual IRemark CreateRemarkNode(Page page, int start, int end)
		{
			IRemark ret;
			
			try
			{
				ret = (IRemark) (RemarkPrototype.Clone());
				ret.Page = page;
				ret.StartPosition = start;
				ret.EndPosition = end;
			}
			catch
			{
				ret = new RemarkNode(page, start, end);
			}
			
			return (ret);
		}
		/// <summary> Create a new string node.</summary>
		/// <param name="page">The page the node is on.
		/// </param>
		/// <param name="start">The beginning position of the string.
		/// </param>
		/// <param name="end">The ending position of the string.
		/// </param>
		/// <returns> A text node comprising the indicated characters from the page.
		/// </returns>
		public virtual IText CreateStringNode(Page page, int start, int end)
		{
			IText ret;
			
			try
			{
				ret = (IText) (TextPrototype.Clone());
				ret.Page = page;
				ret.StartPosition = start;
				ret.EndPosition = end;
			}
			catch
			{
				ret = new TextNode(page, start, end);
			}
			
			return (ret);
		}
		/// <summary> Creates a new instance of a Lexer.</summary>
		/// <param name="page">The page with HTML text.
		/// </param>
		public Lexer(Page page)
		{
			Page = page;
			Cursor = new Cursor(page, 0);
			NodeFactory = this;
		}
		/// <summary> Create a new tag node.
		/// Note that the attributes vector contains at least one element,
		/// which is the tag name (standalone attribute) at position zero.
		/// This can be used to decide which type of node to create, or
		/// gate other processing that may be appropriate.
		/// </summary>
		/// <param name="page">The page the node is on.
		/// </param>
		/// <param name="start">The beginning position of the tag.
		/// </param>
		/// <param name="end">The ending positiong of the tag.
		/// </param>
		/// <param name="attributes">The attributes contained in this tag.
		/// </param>
		/// <returns> The created Tag node.
		/// </returns>
		public virtual ITag CreateTagNode(Page page, int start, int end, System.Collections.ArrayList attributes)
		{
			return (new TagNode(page, start, end, attributes));
		}
示例#10
0
		/// <summary> Create a new remark node.</summary>
		/// <param name="page">The page the node is on.
		/// </param>
		/// <param name="start">The beginning position of the remark.
		/// </param>
		/// <param name="end">The ending positiong of the remark.
		/// </param>
		/// <returns> The created Remark node.
		/// </returns>
		public virtual IRemark CreateRemarkNode(Page page, int start, int end)
		{
			return (new RemarkNode(page, start, end));
		}
示例#11
0
		//
		// NodeFactory interface
		//
		
		/// <summary> Create a new string node.</summary>
		/// <param name="page">The page the node is on.
		/// </param>
		/// <param name="start">The beginning position of the string.
		/// </param>
		/// <param name="end">The ending positiong of the string.
		/// </param>
		/// <returns> The created Text node.
		/// </returns>
		public virtual IText CreateStringNode(Page page, int start, int end)
		{
			return (new TextNode(page, start, end));
		}
		/// <summary> Creates an end tag with the same name as the given tag.</summary>
		/// <param name="tag">The tag to end.
		/// </param>
		/// <param name="lexer">The object containg the node factory.
		/// </param>
		/// <param name="page">The page the tag is on (virtually).
		/// </param>
		/// <param name="position">The offset into the page at which the tag is to
		/// be anchored.
		/// </param>
		/// <returns> An end tag with the name '"/" + tag.getTagName()' and a start
		/// and end position at the given position. The fact these positions are
		/// equal may be used to distinguish it as a virtual tag later on.
		/// </returns>
		protected internal virtual ITag CreateVirtualEndTag(ITag tag, Lexer lexer, Page page, int position)
		{
			ITag ret;
			System.String name;
			System.Collections.ArrayList attributes;
			
			name = "/" + tag.RawTagName;
			attributes = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
			attributes.Add(new TagAttribute(name, (System.String) null));
			ret = lexer.NodeFactory.CreateTagNode(page, position, position, attributes);
			
			return (ret);
		}
		/// <summary> Create an index with the elements given.</summary>
		/// <param name="page">The page associated with this index.
		/// </param>
		/// <param name="cursors">The initial elements of the index.
		/// NOTE: The list must be sorted in ascending order.
		/// </param>
		public PageIndex(Page page, int[] cursors)
		{
			mPage = page;
			mIndices = cursors;
			mCount = cursors.Length;
		}
		/// <summary> Constructor takes in the page and beginning and ending posns.</summary>
		/// <param name="page">The page this string is on.
		/// </param>
		/// <param name="start">The beginning position of the string.
		/// </param>
		/// <param name="end">The ending positiong of the string.
		/// </param>
		public TextNode(Page page, int start, int end):base(page, start, end)
		{
			mText = null;
		}
		/// <summary> Create a new tag node.
		/// Note that the attributes vector contains at least one element,
		/// which is the tag name (standalone attribute) at position zero.
		/// This can be used to decide which type of node to create, or
		/// gate other processing that may be appropriate.
		/// </summary>
		/// <param name="page">The page the node is on.
		/// </param>
		/// <param name="start">The beginning position of the tag.
		/// </param>
		/// <param name="end">The ending positiong of the tag.
		/// </param>
		/// <param name="attributes">The attributes contained in this tag.
		/// </param>
		/// <returns> A tag node comprising the indicated characters from the page.
		/// </returns>
		public virtual ITag CreateTagNode(Page page, int start, int end, System.Collections.ArrayList attributes)
		{
			TagAttribute attribute;
			System.String id;
			ITag prototype;
			ITag ret;
			
			ret = null;
			
			if (0 != attributes.Count)
			{
				attribute = (TagAttribute) attributes[0];
				id = attribute.GetName();
				if (null != id)
				{
					try
					{
						id = id.ToUpper(new System.Globalization.CultureInfo("en"));
						if (!id.StartsWith("/"))
						{
							if (id.EndsWith("/"))
								id = id.Substring(0, (id.Length - 1) - (0));
							prototype = (ITag) mBlastocyst[id];
							if (null != prototype)
							{
								ret = (ITag) prototype.Clone();
								ret.Page = page;
								ret.StartPosition = start;
								ret.EndPosition = end;
								ret.AttributesEx = attributes;
							}
						}
					}
					catch
					{
						// default to creating a generic one
					}
				}
			}
			if (null == ret)
			{
				// generate a generic node
				try
				{
					ret = (ITag) TagPrototype.Clone();
					ret.Page = page;
					ret.StartPosition = start;
					ret.EndPosition = end;
					ret.AttributesEx = attributes;
				}
				catch
				{
					ret = new TagNode(page, start, end, attributes);
				}
			}
			
			return (ret);
		}
		/// <summary> Construct a <code>Cursor</code> from the page and position given.</summary>
		/// <param name="page">The page this cursor is on.
		/// </param>
		/// <param name="offset">The character offset within the page.
		/// </param>
		public Cursor(Page page, int offset)
		{
			mPage = page;
			mPosition = offset;
		}
		/// <summary> Decode script encoded by the Microsoft obfuscator.</summary>
		/// <param name="page">The source for encoded text.
		/// </param>
		/// <param name="cursor">The position at which to start decoding.
		/// This is advanced to the end of the encoded text.
		/// </param>
		/// <returns> The plaintext.
		/// </returns>
		/// <exception cref="ParserException">If an error is discovered while decoding.
		/// </exception>
		public static System.String Decode(Page page, Cursor cursor)
		{
			int state;
			int substate_initial;
			int substate_length;
			int substate_prefix;
			int substate_checksum;
			int substate_final;
			long checksum;
			long length;
			char[] buffer;
			buffer = new char[6];
			int index;
			char character;
			int input_character;
			bool found;
			System.Text.StringBuilder ret;
			
			ret = new System.Text.StringBuilder(1024);
			
			state = STATE_INITIAL;
			substate_initial = 0;
			substate_length = 0;
			substate_prefix = 0;
			substate_checksum = 0;
			substate_final = 0;
			length = 0L;
			checksum = 0L;
			index = 0;
			while (STATE_DONE != state)
			{
				input_character = page.GetCharacter(cursor);
				character = (char) input_character;
				if (Page.EOF == input_character)
				{
					if ((STATE_INITIAL != state) || (0 != substate_initial) || (0 != substate_length) || (0 != substate_prefix) || (0 != substate_checksum) || (0 != substate_final))
						throw new ParserException("illegal state for exit");
					state = STATE_DONE;
				}
				else
					switch (state)
					{
						
						case STATE_INITIAL: 
							if (character == mLeader[substate_initial])
							{
								substate_initial++;
								if (substate_initial == mLeader.Length)
								{
									substate_initial = 0;
									state = STATE_LENGTH;
								}
							}
							else
							{
								// oops, flush
								for (int k = 0; 0 < substate_initial; k++)
								{
									ret.Append(mLeader[k++]);
									substate_initial--;
								}
								ret.Append(character);
							}
							break;
						
						
						case STATE_LENGTH: 
							buffer[substate_length] = character;
							substate_length++;
							if (substate_length >= buffer.Length)
							{
								length = DecodeBase64(buffer);
								if (0 > length)
									throw new ParserException("illegal length: " + length);
								substate_length = 0;
								state = STATE_PREFIX;
							}
							break;
						
						
						case STATE_PREFIX: 
							if (character == mPrefix[substate_prefix])
								substate_prefix++;
							else
								throw new ParserException("illegal character encountered: " + (int) character + " ('" + character + "')");
							if (substate_prefix >= mPrefix.Length)
							{
								substate_prefix = 0;
								state = STATE_DECODE;
							}
							break;
						
						
						case STATE_DECODE: 
							if ('@' == character)
								state = STATE_ESCAPE;
							else
							{
								if (input_character < 0x80)
								{
									if (input_character == '\t')
										input_character = 0;
									else if (input_character >= ' ')
										input_character -= (' ' - 1);
									else
										throw new ParserException("illegal encoded character: " + input_character + " ('" + character + "')");
									char ch = mLookupTable[mEncodingIndex[index % 64]][input_character];
									ret.Append(ch);
									checksum += ch;
									index++;
								}
								else
									ret.Append(character);
							}
							length--;
							if (0 == length)
							{
								index = 0;
								state = STATE_CHECKSUM;
							}
							break;
						
						
						case STATE_ESCAPE: 
							found = false;
							for (int i = 0; i < mEscapes.Length; i++)
								if (character == mEscapes[i])
								{
									found = true;
									character = mEscaped[i];
								}
							if (!found)
								throw new ParserException("unexpected escape character: " + (int) character + " ('" + character + "')");
							ret.Append(character);
							checksum += character;
							index++;
							state = STATE_DECODE;
							length--;
							if (0 == length)
							{
								index = 0;
								state = STATE_CHECKSUM;
							}
							break;
						
						
						case STATE_CHECKSUM: 
							buffer[substate_checksum] = character;
							substate_checksum++;
							if (substate_checksum >= buffer.Length)
							{
								long check = DecodeBase64(buffer);
								if (check != checksum)
									throw new ParserException("incorrect checksum, expected " + check + ", calculated " + checksum);
								checksum = 0;
								substate_checksum = 0;
								state = STATE_FINAL;
							}
							break;
						
						
						case STATE_FINAL: 
							if (character == mTrailer[substate_final])
								substate_final++;
							else
								throw new ParserException("illegal character encountered: " + (int) character + " ('" + character + "')");
							if (substate_final >= mTrailer.Length)
							{
								substate_final = 0;
								state = LAST_STATE;
							}
							break;
						
						default: 
							throw new ParserException("invalid state: " + state);
						
					}
			}
			
			return (ret.ToString());
		}