/// <summary> Create an empty index.</summary> /// <param name="page">The page associated with this index. /// </param> public PageIndex(Page page) { mPage = page; mIndices = new int[mIncrement]; mCount = 0; mIncrement = mStartIncrement * 2; }
// // provide same constructors as super class // private void Init() { mPage = null; mNameStart = - 1; mNameEnd = - 1; mValueStart = - 1; mValueEnd = - 1; }
/// <summary> Create an abstract node with the page positions given. /// Remember the page and start & end cursor positions. /// </summary> /// <param name="page">The page this tag was read from. /// </param> /// <param name="start">The starting offset of this node within the page. /// </param> /// <param name="end">The ending offset of this node within the page. /// </param> public AbstractNode(Page page, int start, int end) { mPage = page; nodeBegin = start; nodeEnd = end; parent = null; children = null; }
/// <summary> Create an attribute.</summary> /// <param name="page">The page containing the attribute. /// </param> /// <param name="name_start">The starting offset of the name within the page. /// If this is negative, the name is considered null. /// </param> /// <param name="name_end">The ending offset of the name within the page. /// </param> /// <param name="value_start">he starting offset of the value within the page. /// If this is negative, the value is considered null. /// </param> /// <param name="value_end">The ending offset of the value within the page. /// </param> /// <param name="quote">The quote, if any, surrounding the value of the attribute, /// (i.e. ' or "), or zero if none. /// </param> public PageAttribute(Page page, int name_start, int name_end, int value_start, int value_end, char quote) { mPage = page; mNameStart = name_start; mNameEnd = name_end; mValueStart = value_start; mValueEnd = value_end; SetName(null); SetAssignment(null); SetValue(null); SetQuote(quote); }
/// <summary> Create a tag with the location and attributes provided</summary> /// <param name="page">The page this tag was read from. /// </param> /// <param name="start">The starting offset of this node within the page. /// </param> /// <param name="end">The ending offset of this node within the page. /// </param> /// <param name="attributes">The list of attributes that were parsed in this tag. /// </param> /// <seealso cref="Attribute"> /// </seealso> public TagNode(Page page, int start, int end, System.Collections.ArrayList attributes):base(page, start, end) { mScanner = mDefaultScanner; mAttributes = attributes; if ((null == mAttributes) || (0 == mAttributes.Count)) { System.String[] names; names = Ids; if ((null != names) && (0 != names.Length)) TagName = names[0]; else TagName = ""; // make sure it's not null } }
/// <summary> Create a new remark node.</summary> /// <param name="page">The page the node is on. /// </param> /// <param name="start">The beginning position of the remark. /// </param> /// <param name="end">The ending position of the remark. /// </param> /// <returns> A remark node comprising the indicated characters from the page. /// </returns> public virtual IRemark CreateRemarkNode(Page page, int start, int end) { IRemark ret; try { ret = (IRemark) (RemarkPrototype.Clone()); ret.Page = page; ret.StartPosition = start; ret.EndPosition = end; } catch { ret = new RemarkNode(page, start, end); } return (ret); }
/// <summary> Create a new string node.</summary> /// <param name="page">The page the node is on. /// </param> /// <param name="start">The beginning position of the string. /// </param> /// <param name="end">The ending position of the string. /// </param> /// <returns> A text node comprising the indicated characters from the page. /// </returns> public virtual IText CreateStringNode(Page page, int start, int end) { IText ret; try { ret = (IText) (TextPrototype.Clone()); ret.Page = page; ret.StartPosition = start; ret.EndPosition = end; } catch { ret = new TextNode(page, start, end); } return (ret); }
/// <summary> Creates a new instance of a Lexer.</summary> /// <param name="page">The page with HTML text. /// </param> public Lexer(Page page) { Page = page; Cursor = new Cursor(page, 0); NodeFactory = this; }
/// <summary> Create a new tag node. /// Note that the attributes vector contains at least one element, /// which is the tag name (standalone attribute) at position zero. /// This can be used to decide which type of node to create, or /// gate other processing that may be appropriate. /// </summary> /// <param name="page">The page the node is on. /// </param> /// <param name="start">The beginning position of the tag. /// </param> /// <param name="end">The ending positiong of the tag. /// </param> /// <param name="attributes">The attributes contained in this tag. /// </param> /// <returns> The created Tag node. /// </returns> public virtual ITag CreateTagNode(Page page, int start, int end, System.Collections.ArrayList attributes) { return (new TagNode(page, start, end, attributes)); }
/// <summary> Create a new remark node.</summary> /// <param name="page">The page the node is on. /// </param> /// <param name="start">The beginning position of the remark. /// </param> /// <param name="end">The ending positiong of the remark. /// </param> /// <returns> The created Remark node. /// </returns> public virtual IRemark CreateRemarkNode(Page page, int start, int end) { return (new RemarkNode(page, start, end)); }
// // NodeFactory interface // /// <summary> Create a new string node.</summary> /// <param name="page">The page the node is on. /// </param> /// <param name="start">The beginning position of the string. /// </param> /// <param name="end">The ending positiong of the string. /// </param> /// <returns> The created Text node. /// </returns> public virtual IText CreateStringNode(Page page, int start, int end) { return (new TextNode(page, start, end)); }
/// <summary> Creates an end tag with the same name as the given tag.</summary> /// <param name="tag">The tag to end. /// </param> /// <param name="lexer">The object containg the node factory. /// </param> /// <param name="page">The page the tag is on (virtually). /// </param> /// <param name="position">The offset into the page at which the tag is to /// be anchored. /// </param> /// <returns> An end tag with the name '"/" + tag.getTagName()' and a start /// and end position at the given position. The fact these positions are /// equal may be used to distinguish it as a virtual tag later on. /// </returns> protected internal virtual ITag CreateVirtualEndTag(ITag tag, Lexer lexer, Page page, int position) { ITag ret; System.String name; System.Collections.ArrayList attributes; name = "/" + tag.RawTagName; attributes = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10)); attributes.Add(new TagAttribute(name, (System.String) null)); ret = lexer.NodeFactory.CreateTagNode(page, position, position, attributes); return (ret); }
/// <summary> Create an index with the elements given.</summary> /// <param name="page">The page associated with this index. /// </param> /// <param name="cursors">The initial elements of the index. /// NOTE: The list must be sorted in ascending order. /// </param> public PageIndex(Page page, int[] cursors) { mPage = page; mIndices = cursors; mCount = cursors.Length; }
/// <summary> Constructor takes in the page and beginning and ending posns.</summary> /// <param name="page">The page this string is on. /// </param> /// <param name="start">The beginning position of the string. /// </param> /// <param name="end">The ending positiong of the string. /// </param> public TextNode(Page page, int start, int end):base(page, start, end) { mText = null; }
/// <summary> Create a new tag node. /// Note that the attributes vector contains at least one element, /// which is the tag name (standalone attribute) at position zero. /// This can be used to decide which type of node to create, or /// gate other processing that may be appropriate. /// </summary> /// <param name="page">The page the node is on. /// </param> /// <param name="start">The beginning position of the tag. /// </param> /// <param name="end">The ending positiong of the tag. /// </param> /// <param name="attributes">The attributes contained in this tag. /// </param> /// <returns> A tag node comprising the indicated characters from the page. /// </returns> public virtual ITag CreateTagNode(Page page, int start, int end, System.Collections.ArrayList attributes) { TagAttribute attribute; System.String id; ITag prototype; ITag ret; ret = null; if (0 != attributes.Count) { attribute = (TagAttribute) attributes[0]; id = attribute.GetName(); if (null != id) { try { id = id.ToUpper(new System.Globalization.CultureInfo("en")); if (!id.StartsWith("/")) { if (id.EndsWith("/")) id = id.Substring(0, (id.Length - 1) - (0)); prototype = (ITag) mBlastocyst[id]; if (null != prototype) { ret = (ITag) prototype.Clone(); ret.Page = page; ret.StartPosition = start; ret.EndPosition = end; ret.AttributesEx = attributes; } } } catch { // default to creating a generic one } } } if (null == ret) { // generate a generic node try { ret = (ITag) TagPrototype.Clone(); ret.Page = page; ret.StartPosition = start; ret.EndPosition = end; ret.AttributesEx = attributes; } catch { ret = new TagNode(page, start, end, attributes); } } return (ret); }
/// <summary> Construct a <code>Cursor</code> from the page and position given.</summary> /// <param name="page">The page this cursor is on. /// </param> /// <param name="offset">The character offset within the page. /// </param> public Cursor(Page page, int offset) { mPage = page; mPosition = offset; }
/// <summary> Decode script encoded by the Microsoft obfuscator.</summary> /// <param name="page">The source for encoded text. /// </param> /// <param name="cursor">The position at which to start decoding. /// This is advanced to the end of the encoded text. /// </param> /// <returns> The plaintext. /// </returns> /// <exception cref="ParserException">If an error is discovered while decoding. /// </exception> public static System.String Decode(Page page, Cursor cursor) { int state; int substate_initial; int substate_length; int substate_prefix; int substate_checksum; int substate_final; long checksum; long length; char[] buffer; buffer = new char[6]; int index; char character; int input_character; bool found; System.Text.StringBuilder ret; ret = new System.Text.StringBuilder(1024); state = STATE_INITIAL; substate_initial = 0; substate_length = 0; substate_prefix = 0; substate_checksum = 0; substate_final = 0; length = 0L; checksum = 0L; index = 0; while (STATE_DONE != state) { input_character = page.GetCharacter(cursor); character = (char) input_character; if (Page.EOF == input_character) { if ((STATE_INITIAL != state) || (0 != substate_initial) || (0 != substate_length) || (0 != substate_prefix) || (0 != substate_checksum) || (0 != substate_final)) throw new ParserException("illegal state for exit"); state = STATE_DONE; } else switch (state) { case STATE_INITIAL: if (character == mLeader[substate_initial]) { substate_initial++; if (substate_initial == mLeader.Length) { substate_initial = 0; state = STATE_LENGTH; } } else { // oops, flush for (int k = 0; 0 < substate_initial; k++) { ret.Append(mLeader[k++]); substate_initial--; } ret.Append(character); } break; case STATE_LENGTH: buffer[substate_length] = character; substate_length++; if (substate_length >= buffer.Length) { length = DecodeBase64(buffer); if (0 > length) throw new ParserException("illegal length: " + length); substate_length = 0; state = STATE_PREFIX; } break; case STATE_PREFIX: if (character == mPrefix[substate_prefix]) substate_prefix++; else throw new ParserException("illegal character encountered: " + (int) character + " ('" + character + "')"); if (substate_prefix >= mPrefix.Length) { substate_prefix = 0; state = STATE_DECODE; } break; case STATE_DECODE: if ('@' == character) state = STATE_ESCAPE; else { if (input_character < 0x80) { if (input_character == '\t') input_character = 0; else if (input_character >= ' ') input_character -= (' ' - 1); else throw new ParserException("illegal encoded character: " + input_character + " ('" + character + "')"); char ch = mLookupTable[mEncodingIndex[index % 64]][input_character]; ret.Append(ch); checksum += ch; index++; } else ret.Append(character); } length--; if (0 == length) { index = 0; state = STATE_CHECKSUM; } break; case STATE_ESCAPE: found = false; for (int i = 0; i < mEscapes.Length; i++) if (character == mEscapes[i]) { found = true; character = mEscaped[i]; } if (!found) throw new ParserException("unexpected escape character: " + (int) character + " ('" + character + "')"); ret.Append(character); checksum += character; index++; state = STATE_DECODE; length--; if (0 == length) { index = 0; state = STATE_CHECKSUM; } break; case STATE_CHECKSUM: buffer[substate_checksum] = character; substate_checksum++; if (substate_checksum >= buffer.Length) { long check = DecodeBase64(buffer); if (check != checksum) throw new ParserException("incorrect checksum, expected " + check + ", calculated " + checksum); checksum = 0; substate_checksum = 0; state = STATE_FINAL; } break; case STATE_FINAL: if (character == mTrailer[substate_final]) substate_final++; else throw new ParserException("illegal character encountered: " + (int) character + " ('" + character + "')"); if (substate_final >= mTrailer.Length) { substate_final = 0; state = LAST_STATE; } break; default: throw new ParserException("invalid state: " + state); } } return (ret.ToString()); }