/// <summary> /// This function is called within VTDGen and /// VTDNav's writeIndex /// </summary> /// <param name="version"></param> /// <param name="encodingType"></param> /// <param name="ns"></param> /// <param name="byteOrder"></param> /// <param name="nestDepth"></param> /// <param name="LCLevel"></param> /// <param name="rootIndex"></param> /// <param name="xmlDoc"></param> /// <param name="docOffset"></param> /// <param name="docLen"></param> /// <param name="vtdBuffer"></param> /// <param name="l1Buffer"></param> /// <param name="l2Buffer"></param> /// <param name="l3Buffer"></param> /// <param name="os"></param> /// <returns></returns> public static void writeIndex_L3(byte version, int encodingType, bool ns, bool byteOrder, int nestDepth, int LCLevel, int rootIndex, byte[] xmlDoc, int docOffset, int docLen, FastLongBuffer vtdBuffer, FastLongBuffer l1Buffer, FastLongBuffer l2Buffer, FastIntBuffer l3Buffer, System.IO.Stream os) { if (xmlDoc == null || docLen <= 0 || vtdBuffer == null || l1Buffer == null || l2Buffer == null || l3Buffer == null || LCLevel != 3) { throw new System.ArgumentException("Invalid argument for writeIndex "); } if (vtdBuffer.size_Renamed_Field == 0) { throw new IndexWriteException("VTDBuffer can't be zero length"); } int i; System.IO.BinaryWriter dos = new System.IO.BinaryWriter(os); // first 4 bytes byte[] ba = new byte[4]; ba[0] = (byte)version; // version # is 1 ba[1] = (byte)encodingType; if (BitConverter.IsLittleEndian == false) ba[2] = (byte)(ns ? 0xe0 : 0xa0); // big endien else ba[2] = (byte)(ns ? 0xc0 : 0x80); ba[3] = (byte)nestDepth; dos.Write(ba); // second 4 bytes ba[0] = 0; ba[1] = 4; ba[2] = (byte)((rootIndex & 0xff00) >> 8); ba[3] = (byte)(rootIndex & 0xff); dos.Write(ba); // 2 reserved 32-bit words set to zero ba[1] = ba[2] = ba[3] = 0; dos.Write(ba); dos.Write(ba); dos.Write(ba); dos.Write(ba); // write XML doc in bytes dos.Write((long)docLen); dos.Write(xmlDoc, docOffset, docLen); //dos.Write(xmlDoc, docOffset, docLen); // zero padding to make it integer multiple of 64 bits if ((docLen & 0x07) != 0) { int t = (((docLen >> 3) + 1) << 3) - docLen; for (; t > 0; t--) dos.Write((System.Byte)0); } // write VTD dos.Write((long)vtdBuffer.size_Renamed_Field); if (docOffset != 0) { for (i = 0; i < vtdBuffer.size_Renamed_Field; i++) { dos.Write(adjust(vtdBuffer.longAt(i), -docOffset)); } } else { for (i = 0; i < vtdBuffer.size_Renamed_Field; i++) { dos.Write(vtdBuffer.longAt(i)); } } // write L1 dos.Write((long)l1Buffer.size_Renamed_Field); for (i = 0; i < l1Buffer.size_Renamed_Field; i++) { dos.Write(l1Buffer.longAt(i)); } // write L2 dos.Write((long)l2Buffer.size_Renamed_Field); for (i = 0; i < l2Buffer.size_Renamed_Field; i++) { dos.Write(l2Buffer.longAt(i)); } // write L3 dos.Write((long)l3Buffer.size_Renamed_Field); for (i = 0; i < l3Buffer.size_Renamed_Field; i++) { dos.Write(l3Buffer.intAt(i)); } // pad zero if # of l3 entry is odd if ((l3Buffer.size_Renamed_Field & 1) != 0) dos.Write(0); dos.Close(); }
// lengnth of the XML document (in byte)) /** * Initialize the VTD navigation object. * * @param RootIndex * int * @param maxDepth * int * @param encoding * int * @param NS * boolean * @param x * byte[] * @param vtd * com.ximpleware.ILongBuffer * @param l1 * com.ximpleware.ILongBuffer * @param l2 * com.ximpleware.ILongBuffer * @param l3 * com.ximpleware.IIntBuffer * @param so * int starting offset of the document(in byte) * @param length * int length of the document (in byte) */ protected internal VTDNav_L5( int RootIndex, int enc, bool NS, int depth, IByteBuffer x, FastLongBuffer vtd, FastLongBuffer l1, FastLongBuffer l2, FastLongBuffer l3, FastLongBuffer l4, FastIntBuffer l5, int so, // start offset of the starting offset(in byte) int length) { //super(); // initialize all buffers if (l1 == null || l2 == null || l3 == null || vtd == null || x == null || depth < 0 || RootIndex < 0 //|| encoding <= FORMAT_UTF8 //|| encoding >= FORMAT_ISO_8859_1 || so < 0 || length < 0) { throw new System.ArgumentException(); } count = 0; l1Buffer = l1; l2Buffer = l2; l3Buffer = l3; l4Buffer = l4; l5Buffer = l5; vtdBuffer = vtd; XMLDoc = x; encoding = enc; //System.out.println("encoding " + encoding); rootIndex = RootIndex; nestingLevel = depth + 1; ns = NS; // namespace aware or not if (ns == false) MASK_TOKEN_OFFSET = 0x000000007fffffffL; // this allows xml size to // be 2GB else // if there is no namespace MASK_TOKEN_OFFSET = 0x000000003fffffffL; atTerminal = false; //this variable will only change value during XPath // eval // initialize the context object this.context = new int[nestingLevel]; //depth value is the first entry in the context because root is // singular. context[0] = 0; //set the value to zero for (int i = 1; i < nestingLevel; i++) { context[i] = -1; } //currentOffset = 0; //contextStack = new ContextBuffer(1024, nestingLevel + 7); contextStack = new ContextBuffer(10, nestingLevel + 15); contextStack2 = new ContextBuffer(10, nestingLevel + 15); stackTemp = new int[nestingLevel + 15]; // initial state of LC variables l1index = l2index = l3index = l4index = l5index = -1; l2lower = l3lower = l4lower = l5lower = -1; l2upper = l3upper = l4upper = l5upper = -1; docOffset = so; docLen = length; //System.out.println("offset " + offset + " length " + length); //printL2Buffer(); vtdSize = vtd.size_Renamed_Field; //writeOffsetAdjustment = false; //recentNS = -1; name = null; nameIndex = -1; localName = null; localNameIndex = -1; fib = new FastIntBuffer(5); // page size is 32 ints shallowDepth = false; maxLCDepthPlusOne = 6; }
/// <summary> Clear internal states so VTDGEn can process the next file.</summary> public void clear() { if (br == false) { VTDBuffer = null; l1Buffer = null; l2Buffer = null; l3Buffer = null; _l3Buffer = null; _l4Buffer = null; _l5Buffer = null; } XMLDoc = null; offset = temp_offset = 0; last_depth = last_l1_index = last_l2_index = last_l3_index = last_l4_index = 0; rootIndex = 0; depth = -1; increment = 1; BOM_detected = false; must_utf_8 = false; ch = ch_temp = 0; r = new UTF8Reader(this); nsBuffer1.size_Renamed_Field = 0; nsBuffer2.size_Renamed_Field = 0; nsBuffer3.size_Renamed_Field = 0; currentElementRecord = 0; }
/// <summary> Initialize the VTD navigation object.</summary> /// <param name="RootIndex">int /// </param> /// <param name="maxDepth">int /// </param> /// <param name="encoding">int /// </param> /// <param name="NS"> boolean /// </param> /// <param name="x">byte[] /// </param> /// <param name="vtd">com.ximpleware.ILongBuffer /// </param> /// <param name="l1">com.ximpleware.ILongBuffer /// </param> /// <param name="l2">com.ximpleware.ILongBuffer /// </param> /// <param name="l3">com.ximpleware.IIntBuffer /// </param> /// <param name="so">int starting offset of the document(in byte) /// </param> /// <param name="length">int length of the document (in byte) /// </param> protected internal VTDNav(int RootIndex, int enc, bool NS, int depth, IByteBuffer x, FastLongBuffer vtd, FastLongBuffer l1, FastLongBuffer l2, FastIntBuffer l3, int so, int length) { // initialize all buffers if (l1 == null || l2 == null || l3 == null || vtd == null || x == null || depth < 0 || RootIndex < 0 || so < 0 || length < 0) { throw new System.ArgumentException(); } count = 0; l1Buffer = l1; l2Buffer = l2; l3Buffer = l3; vtdBuffer = vtd; XMLDoc = x; encoding = enc; //System.out.println("encoding " + encoding); rootIndex = RootIndex; nestingLevel = depth + 1; ns = NS; // namespace aware or not if (ns == false) MASK_TOKEN_OFFSET = 0x000000007fffffff; // this allows xml size to be 2GB // if there is no namespace else MASK_TOKEN_OFFSET = 0x000000003fffffff; atTerminal = false; //this variable will only change value during XPath eval // initialize the context object context = new int[nestingLevel]; //depth value is the first entry in the context because root is singular. context[0] = 0; //set the value to zero for (int i = 1; i < nestingLevel; i++) { context[i] = -1; } // currentOffset = 0; //contextStack = new ContextBuffer(1024, nestingLevel + 7); contextStack = new ContextBuffer(10, nestingLevel + 9); contextStack2 = new ContextBuffer(10, nestingLevel + 9); stackTemp = new int[nestingLevel + 9]; // initial state of LC variables l1index = l2index = l3index = -1; l2lower = l3lower = -1; l2upper = l3upper = -1; docOffset = so; docLen = length; //System.out.println("offset " + offset + " length " + length); //printL2Buffer(); vtdSize = vtd.size_Renamed_Field; name = null; nameIndex = -1; localName = null; localNameIndex = -1; fib = new FastIntBuffer(5); shallowDepth = true; //recentNS = -1; }
/// <summary> The buffer-reuse version of setDoc /// The concept is to reuse LC and VTD buffer for /// XML parsing, instead of allocating every time /// </summary> /// <param name="ba">byte[] /// </param> /// <param name="os">int (in byte) /// </param> /// <param name="len">int (in byte) /// /// </param> public void setDoc_BR(byte[] ba, int os, int len) { int a; br = true; depth = -1; increment = 1; BOM_detected = false; must_utf_8 = false; ch = ch_temp = 0; temp_offset = 0; XMLDoc = ba; docOffset = offset = os; docLen = len; endOffset = os + len; last_l1_index = last_l2_index = last_l3_index = last_depth = 0; //int i1 = 7, i2 = 9, i3 = 11; currentElementRecord = 0; nsBuffer1.size_Renamed_Field = 0; nsBuffer2.size_Renamed_Field = 0; nsBuffer3.size_Renamed_Field = 0; r = new UTF8Reader(this); if (shallowDepth) { int i1 = 7, i2 = 9, i3 = 11; if (docLen <= 1024) { // a = 1024; //set the floor a = 6; i1 = 5; i2 = 5; i3 = 5; } else if (docLen <= 4096) { a = 7; i1 = 6; i2 = 6; i3 = 6; } else if (docLen <= 1024 * 16) { a = 8; i1 = 7; i2 = 7; i3 = 7; } else if (docLen <= 1024 * 16 * 4) { // a = 2048; a = 11; } else if (docLen <= 1024 * 256) { // a = 1024 * 4; a = 12; } else { // a = 1 << 15; a = 15; } VTDBuffer = new FastLongBuffer(a, len >> (a + 1)); l1Buffer = new FastLongBuffer(i1); l2Buffer = new FastLongBuffer(i2); l3Buffer = new FastIntBuffer(i3); } else { int i1 = 7, i2 = 9, i3 = 11, i4 = 11, i5 = 11; if (docLen <= 1024) { // a = 1024; //set the floor a = 6; i1 = 5; i2 = 5; i3 = 5; i4 = 5; i5 = 5; } else if (docLen <= 4096) { a = 7; i1 = 6; i2 = 6; i3 = 6; i4 = 6; i5 = 6; } else if (docLen <= 1024 * 16) { a = 8; i1 = 7; i2 = 7; i3 = 7; i4 = 7; i5 = 7; } else if (docLen <= 1024 * 16 * 4) { // a = 2048; a = 11; i2 = 8; i3 = 8; i4 = 8; i5 = 8; } else if (docLen <= 1024 * 256) { // a = 1024 * 4; a = 12; i1 = 8; i2 = 9; i3 = 9; i4 = 9; i5 = 9; } else { // a = 1 << 15; a = 15; } VTDBuffer = new FastLongBuffer(a, len >> (a + 1)); l1Buffer = new FastLongBuffer(i1); l2Buffer = new FastLongBuffer(i2); _l3Buffer = new FastLongBuffer(i3); _l4Buffer = new FastLongBuffer(i4); _l5Buffer = new FastIntBuffer(i5); } }
private bool ws; // to prserve whitespace or not, default to false #endregion Fields #region Constructors /// <summary> VTDGen constructor method.</summary> public VTDGen() { attr_name_array = new long[ATTR_NAME_ARRAY_SIZE]; prefixed_attr_name_array = new long[ATTR_NAME_ARRAY_SIZE]; prefix_URL_array = new int[ATTR_NAME_ARRAY_SIZE]; tag_stack = new long[TAG_STACK_SIZE]; //scratch_buffer = new int[10]; VTDDepth = 0; LcDepth = 3; br = false; e = new EOFException("permature EOF reached, XML document incomplete"); ws = false; nsBuffer1 = new FastIntBuffer(4); nsBuffer2 = new FastLongBuffer(4); nsBuffer3 = new FastLongBuffer(4); currentElementRecord = 0; singleByteEncoding = true; shallowDepth = true; }
/// <summary> Clear internal states so VTDGEn can process the next file.</summary> public void clear() { if (br == false) { VTDBuffer = null; l1Buffer = null; l2Buffer = null; l3Buffer = null; } XMLDoc = null; offset = temp_offset = 0; last_depth = last_l1_index = last_l2_index = 0; rootIndex = 0; depth = -1; increment = 1; BOM_detected = false; must_utf_8 = false; ch = ch_temp = 0; }
/// <summary> Set the XMLDoc container. Also set the offset and len of the document /// with respect to the container. /// /// </summary> /// <param name="ba">byte[] /// </param> /// <param name="os">int (in byte) /// </param> /// <param name="len">int (in byte) /// </param> public void setDoc(byte[] ba, int os, int len) { int a; br = false; depth = -1; increment = 1; BOM_detected = false; must_utf_8 = false; ch = ch_temp = 0; temp_offset = 0; XMLDoc = ba; docOffset = offset = os; docLen = len; endOffset = os + len; last_l1_index = last_l2_index = last_l3_index = last_depth = 0; if (docLen <= 1024) { //a = 1024; //set the floor a = 7; } else if (docLen <= 4096 * 2) { a = 9; } else if (docLen <= 1024 * 16 * 4) { //a = 2048; a = 10; } else if (docLen <= 1024 * 256) { //a = 1024 * 4; a = 12; } else { //a = 1 << 15; a = 15; } // VTDBuffer = new FastLongBuffer(a); // l1Buffer = new FastLongBuffer(128); // l2Buffer = new FastLongBuffer(512); // l3Buffer = new FastIntBuffer(2048); VTDBuffer = new FastLongBuffer(a, len >> (a + 1)); l1Buffer = new FastLongBuffer(7); l2Buffer = new FastLongBuffer(9); l3Buffer = new FastIntBuffer(11); }
/// <summary> /// This function is called within VTDGen and /// VTDNav's writeIndex /// </summary> /// <param name="version"></param> /// <param name="encodingType"></param> /// <param name="ns"></param> /// <param name="byteOrder"></param> /// <param name="nestDepth"></param> /// <param name="LCLevel"></param> /// <param name="rootIndex"></param> /// <param name="xmlDoc"></param> /// <param name="docOffset"></param> /// <param name="docLen"></param> /// <param name="vtdBuffer"></param> /// <param name="l1Buffer"></param> /// <param name="l2Buffer"></param> /// <param name="l3Buffer"></param> /// <param name="os"></param> /// <returns></returns> public static void writeIndex_L3(byte version, int encodingType, bool ns, bool byteOrder, int nestDepth, int LCLevel, int rootIndex, byte[] xmlDoc, int docOffset, int docLen, FastLongBuffer vtdBuffer, FastLongBuffer l1Buffer, FastLongBuffer l2Buffer, FastIntBuffer l3Buffer, System.IO.Stream os) { if (xmlDoc == null || docLen <= 0 || vtdBuffer == null || l1Buffer == null || l2Buffer == null || l3Buffer == null || LCLevel != 3) { throw new System.ArgumentException("Invalid argument for writeIndex "); } if (vtdBuffer.size_Renamed_Field == 0) { throw new IndexWriteException("VTDBuffer can't be zero length"); } int i; System.IO.BinaryWriter dos = new System.IO.BinaryWriter(os); // first 4 bytes byte[] ba = new byte[4]; ba[0] = (byte)version; // version # is 1 ba[1] = (byte)encodingType; if (BitConverter.IsLittleEndian == false) { ba[2] = (byte)(ns ? 0xe0 : 0xa0); // big endien } else { ba[2] = (byte)(ns ? 0xc0 : 0x80); } ba[3] = (byte)nestDepth; dos.Write(ba); // second 4 bytes ba[0] = 0; ba[1] = 4; ba[2] = (byte)((rootIndex & 0xff00) >> 8); ba[3] = (byte)(rootIndex & 0xff); dos.Write(ba); // 2 reserved 32-bit words set to zero ba[1] = ba[2] = ba[3] = 0; dos.Write(ba); dos.Write(ba); dos.Write(ba); dos.Write(ba); // write XML doc in bytes dos.Write((long)docLen); dos.Write(xmlDoc, docOffset, docLen); //dos.Write(xmlDoc, docOffset, docLen); // zero padding to make it integer multiple of 64 bits if ((docLen & 0x07) != 0) { int t = (((docLen >> 3) + 1) << 3) - docLen; for (; t > 0; t--) { dos.Write((System.Byte) 0); } } // write VTD dos.Write((long)vtdBuffer.size_Renamed_Field); if (docOffset != 0) { for (i = 0; i < vtdBuffer.size_Renamed_Field; i++) { dos.Write(adjust(vtdBuffer.longAt(i), -docOffset)); } } else { for (i = 0; i < vtdBuffer.size_Renamed_Field; i++) { dos.Write(vtdBuffer.longAt(i)); } } // write L1 dos.Write((long)l1Buffer.size_Renamed_Field); for (i = 0; i < l1Buffer.size_Renamed_Field; i++) { dos.Write(l1Buffer.longAt(i)); } // write L2 dos.Write((long)l2Buffer.size_Renamed_Field); for (i = 0; i < l2Buffer.size_Renamed_Field; i++) { dos.Write(l2Buffer.longAt(i)); } // write L3 dos.Write((long)l3Buffer.size_Renamed_Field); for (i = 0; i < l3Buffer.size_Renamed_Field; i++) { dos.Write(l3Buffer.intAt(i)); } // pad zero if # of l3 entry is odd if ((l3Buffer.size_Renamed_Field & 1) != 0) { dos.Write(0); } dos.Close(); }
/// <summary> Attach master document to this instance of XMLModifier</summary> /// <param name="masterDocument">* /// </param> public void bind(VTDNav masterDocument) { if (masterDocument == null) throw new System.ArgumentException("MasterDocument can't be null"); md = masterDocument; flb = new FastLongBuffer(); fob = new FastObjectBuffer(); int i = intHash.determineHashWidth(md.vtdSize); insertHash = new intHash(i); deleteHash = new intHash(i); //determine encoding charset string here encoding = md.getEncoding(); switch (encoding) { case VTDNav.FORMAT_ASCII: charSet = "ascii"; break; case VTDNav.FORMAT_ISO_8859_1: charSet = "iso-8859-1"; break; case VTDNav.FORMAT_UTF8: charSet = "utf-8"; break; case VTDNav.FORMAT_UTF_16BE: charSet = "utf-16be"; break; case VTDNav.FORMAT_UTF_16LE: charSet = "utf-16le"; break; case VTDNav.FORMAT_ISO_8859_2: charSet = "iso-8859-2"; break; case VTDNav.FORMAT_ISO_8859_3: charSet = "iso-8859-3"; break; case VTDNav.FORMAT_ISO_8859_4: charSet = "iso-8859-4"; break; case VTDNav.FORMAT_ISO_8859_5: charSet = "iso-8859-5"; break; case VTDNav.FORMAT_ISO_8859_6: charSet = "iso-8859-6"; break; case VTDNav.FORMAT_ISO_8859_7: charSet = "iso-8859-7"; break; case VTDNav.FORMAT_ISO_8859_8: charSet = "iso-8859-8"; break; case VTDNav.FORMAT_ISO_8859_9: charSet = "iso-8859-9"; break; case VTDNav.FORMAT_ISO_8859_10: charSet = "iso-8859-10"; break; case VTDNav.FORMAT_ISO_8859_11: charSet = "iso-8859-11"; break; case VTDNav.FORMAT_ISO_8859_12: charSet = "iso-8859-12"; break; case VTDNav.FORMAT_ISO_8859_13: charSet = "iso-8859-13"; break; case VTDNav.FORMAT_ISO_8859_14: charSet = "iso-8859-14"; break; case VTDNav.FORMAT_ISO_8859_15: charSet = "iso-8859-15"; break; case VTDNav.FORMAT_WIN_1250: charSet = "windows-1250"; break; case VTDNav.FORMAT_WIN_1251: charSet = "windows-1251"; break; case VTDNav.FORMAT_WIN_1252: charSet = "windows-1252"; break; case VTDNav.FORMAT_WIN_1253: charSet = "windows-1253"; break; case VTDNav.FORMAT_WIN_1254: charSet = "windows-1254"; break; case VTDNav.FORMAT_WIN_1255: charSet = "windows-1255"; break; case VTDNav.FORMAT_WIN_1256: charSet = "windows-1256"; break; case VTDNav.FORMAT_WIN_1257: charSet = "windows-1257"; break; case VTDNav.FORMAT_WIN_1258: charSet = "windows-1258"; break; default: throw new ModifyException("Master document encoding not yet supported by XML modifier"); } eg = System.Text.Encoding.GetEncoding(charSet); }
/// <summary> Set the XMLDoc container. Also set the offset and len of the document /// with respect to the container. /// </summary> /// <param name="ba">byte[] /// </param> /// <param name="os">int (in byte) /// </param> /// <param name="len">int (in byte) /// </param> public void setDoc(byte[] ba, int os, int len) { int a; br = false; depth = -1; increment = 1; BOM_detected = false; must_utf_8 = false; ch = ch_temp = 0; temp_offset = 0; XMLDoc = ba; docOffset = offset = os; docLen = len; endOffset = os + len; last_l1_index = last_l2_index = last_l3_index = last_depth = 0; int i1 = 7, i2 = 9, i3 = 11; if (docLen <= 1024) { //a = 1024; //set the floor a = 6; i1 = 5; i2 = 5; i3 = 5; } else if (docLen <= 4096) { a = 7; i1 = 6; i2 = 6; i3 = 6; } else if (docLen <= 1024 * 16) { a = 8; i1 = 7; i2 = 7; i3 = 7; } else if (docLen <= 1024 * 16 * 4) { //a = 2048; a = 11; } else if (docLen <= 1024 * 256) { //a = 1024 * 4; a = 12; } else if (docLen <= (1 << 26)) { //a = 1 << 15; i1 = i2 = i3 = 12; a = 15; } else if (docLen <= (1 << 30)) { //a = 1 << 15; i1 = i2 = i3 = 13; a = 19; } else { i1 = i2 = i3 = 16; a = 23; } VTDBuffer = new FastLongBuffer(a, len >> (a + 1)); l1Buffer = new FastLongBuffer(i1); l2Buffer = new FastLongBuffer(i2); l3Buffer = new FastIntBuffer(i3); ; }