/** * Handle the top level object write. * * This default method is used for all types except structs, which are the * typical case. VectorizedRowBatch assumes the top level object is a * struct, so we use the first column for all other types. * @param batch the batch to write from * @param offset the row to start on * @param length the number of rows to write * @throws IOException */ internal virtual void writeRootBatch(VectorizedRowBatch batch, int offset, int length) { writeBatch(batch.cols[0], offset, length); }
public void addRowBatch(VectorizedRowBatch batch) { if (buildIndex) { // Batch the writes up to the rowIndexStride so that we can get the // right size indexes. int posn = 0; while (posn < batch.size) { int chunkSize = Math.Min(batch.size - posn, rowIndexStride - rowsInIndex); treeWriter.writeRootBatch(batch, posn, chunkSize); posn += chunkSize; rowsInIndex += chunkSize; rowsInStripe += chunkSize; if (rowsInIndex >= rowIndexStride) { createRowIndexEntry(); } } } else { rowsInStripe += batch.size; treeWriter.writeRootBatch(batch, 0, batch.size); } memoryManager.addedRow(batch.size); }
internal override void writeRootBatch(VectorizedRowBatch batch, int offset, int length) { // update the statistics for the root column indexStatistics.increment(length); // I'm assuming that the root column isn't nullable so that I don't need // to update isPresent. for (int i = 0; i < childrenWriters.Length; ++i) { childrenWriters[i].writeBatch(batch.cols[i], offset, length); } }
private void appendRandomRow(VectorizedRowBatch batch, long[] intValues, double[] doubleValues, string[] stringValues, byte[][] byteValues, string[] words, int i) { InnerStruct inner = new InnerStruct((int)intValues[i], stringValues[i]); InnerStruct inner2 = new InnerStruct((int)(intValues[i] >> 32), words[i % words.Length] + "-x"); setBigRow(batch, batch.size++, (intValues[i] & 1) == 0, (byte)intValues[i], (short)intValues[i], (int)intValues[i], intValues[i], (float)doubleValues[i], doubleValues[i], byteValues[i], stringValues[i], new MiddleStruct(inner, inner2), MakeList(), MakeMap(inner, inner2)); }
private static void setUnion(VectorizedRowBatch batch, int rowId, Timestamp? ts, int? tag, int? i, string s, HiveDecimal dec) { UnionColumnVector union = (UnionColumnVector)batch.cols[1]; if (ts != null) { ((LongColumnVector)batch.cols[0]).vector[rowId] = ts.Value.Nanoseconds; } else { batch.cols[0].isNull[rowId] = true; batch.cols[0].noNulls = false; } if (tag != null) { union.tags[rowId] = tag.Value; if (tag == 0) { if (i != null) { ((LongColumnVector)union.fields[tag.Value]).vector[rowId] = i.Value; } else { union.fields[tag.Value].isNull[rowId] = true; union.fields[tag.Value].noNulls = false; } } else if (tag == 1) { if (s != null) { ((BytesColumnVector)union.fields[tag.Value]).setVal(rowId, s.getBytes()); } else { union.fields[tag.Value].isNull[rowId] = true; union.fields[tag.Value].noNulls = false; } } else { throw new ArgumentException("Bad tag " + tag); } } else { batch.cols[1].isNull[rowId] = true; batch.cols[1].noNulls = false; } if (dec != null) { ((DecimalColumnVector)batch.cols[2]).vector[rowId] = dec; } else { batch.cols[2].isNull[rowId] = true; batch.cols[2].noNulls = false; } }
private static void setBigRow(VectorizedRowBatch batch, int rowId, bool b1, byte b2, short s1, int i1, long l1, float f1, double d1, byte[] b3, string s2, MiddleStruct m1, List<InnerStruct> l2, Dictionary<string, InnerStruct> m2) { ((LongColumnVector)batch.cols[0]).vector[rowId] = b1 ? 1 : 0; ((LongColumnVector)batch.cols[1]).vector[rowId] = b2; ((LongColumnVector)batch.cols[2]).vector[rowId] = s1; ((LongColumnVector)batch.cols[3]).vector[rowId] = i1; ((LongColumnVector)batch.cols[4]).vector[rowId] = l1; ((DoubleColumnVector)batch.cols[5]).vector[rowId] = f1; ((DoubleColumnVector)batch.cols[6]).vector[rowId] = d1; if (b3 != null) { ((BytesColumnVector)batch.cols[7]).setVal(rowId, b3, 0, b3.Length); } else { batch.cols[7].isNull[rowId] = true; batch.cols[7].noNulls = false; } if (s2 != null) { ((BytesColumnVector)batch.cols[8]).setVal(rowId, s2.getBytes()); } else { batch.cols[8].isNull[rowId] = true; batch.cols[8].noNulls = false; } setMiddleStruct((StructColumnVector)batch.cols[9], rowId, m1); setInnerList((ListColumnVector)batch.cols[10], rowId, l2); setInnerMap((MapColumnVector)batch.cols[11], rowId, m2); }
public void testTimestamp() { ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(typeof(Timestamp)); TypeDescription schema = TypeDescription.createTimestamp(); List<Timestamp> tslist = new List<Timestamp>(); using (Stream file = File.OpenWrite(TestFilePath)) using (Writer writer = OrcFile.createWriter(TestFilePath, file, OrcFile.writerOptions(conf) .setSchema(schema) .stripeSize(100000) .bufferSize(10000) .version(OrcFile.Version.V_0_11))) { tslist.Add(Timestamp.Parse("2037-01-01 00:00:00.000999")); tslist.Add(Timestamp.Parse("2003-01-01 00:00:00.000000222")); tslist.Add(Timestamp.Parse("1999-01-01 00:00:00.999999999")); tslist.Add(Timestamp.Parse("1995-01-01 00:00:00.688888888")); tslist.Add(Timestamp.Parse("2002-01-01 00:00:00.1")); tslist.Add(Timestamp.Parse("2010-03-02 00:00:00.000009001")); tslist.Add(Timestamp.Parse("2005-01-01 00:00:00.000002229")); tslist.Add(Timestamp.Parse("2006-01-01 00:00:00.900203003")); tslist.Add(Timestamp.Parse("2003-01-01 00:00:00.800000007")); tslist.Add(Timestamp.Parse("1996-08-02 00:00:00.723100809")); tslist.Add(Timestamp.Parse("1998-11-02 00:00:00.857340643")); tslist.Add(Timestamp.Parse("2008-10-02 00:00:00")); VectorizedRowBatch batch = new VectorizedRowBatch(1, 1024); LongColumnVector vec = new LongColumnVector(1024); batch.cols[0] = vec; batch.reset(); batch.size = tslist.Count; for (int i = 0; i < tslist.Count; ++i) { Timestamp ts = tslist[i]; vec.vector[i] = ts.Nanoseconds; } writer.addRowBatch(batch); schema = writer.getSchema(); } Reader reader = OrcFile.createReader(TestFilePath, OrcFile.readerOptions(conf)); using (RecordReader rows = reader.rows()) { int idx = 0; while (rows.hasNext()) { object row = rows.next(); Assert.Equal(tslist[idx++].getNanos(), ((Timestamp)row).getNanos()); } Assert.Equal(tslist.Count, rows.getRowNumber()); Assert.Equal(0, schema.getMaximumId()); bool[] expected = new bool[] { false }; bool[] included = OrcUtils.includeColumns("", schema); Assert.Equal(expected, included); } }
public VectorizedRowBatch createRowBatch() { VectorizedRowBatch result; if (category == Category.STRUCT) { result = new VectorizedRowBatch(children.Count, VectorizedRowBatch.DEFAULT_SIZE); for (int i = 0; i < result.cols.Length; ++i) { result.cols[i] = children[i].createColumn(); } } else { result = new VectorizedRowBatch(1, VectorizedRowBatch.DEFAULT_SIZE); result.cols[0] = createColumn(); } result.reset(); return result; }