Пример #1
0
 /**
  * Handle the top level object write.
  *
  * This default method is used for all types except structs, which are the
  * typical case. VectorizedRowBatch assumes the top level object is a
  * struct, so we use the first column for all other types.
  * @param batch the batch to write from
  * @param offset the row to start on
  * @param length the number of rows to write
  * @throws IOException
  */
 internal virtual void writeRootBatch(VectorizedRowBatch batch, int offset, int length)
 {
     writeBatch(batch.cols[0], offset, length);
 }
Пример #2
0
 public void addRowBatch(VectorizedRowBatch batch)
 {
     if (buildIndex)
     {
         // Batch the writes up to the rowIndexStride so that we can get the
         // right size indexes.
         int posn = 0;
         while (posn < batch.size)
         {
             int chunkSize = Math.Min(batch.size - posn,
                 rowIndexStride - rowsInIndex);
             treeWriter.writeRootBatch(batch, posn, chunkSize);
             posn += chunkSize;
             rowsInIndex += chunkSize;
             rowsInStripe += chunkSize;
             if (rowsInIndex >= rowIndexStride)
             {
                 createRowIndexEntry();
             }
         }
     }
     else
     {
         rowsInStripe += batch.size;
         treeWriter.writeRootBatch(batch, 0, batch.size);
     }
     memoryManager.addedRow(batch.size);
 }
Пример #3
0
 internal override void writeRootBatch(VectorizedRowBatch batch, int offset, int length)
 {
     // update the statistics for the root column
     indexStatistics.increment(length);
     // I'm assuming that the root column isn't nullable so that I don't need
     // to update isPresent.
     for (int i = 0; i < childrenWriters.Length; ++i)
     {
         childrenWriters[i].writeBatch(batch.cols[i], offset, length);
     }
 }
Пример #4
0
 private void appendRandomRow(VectorizedRowBatch batch,
                              long[] intValues, double[] doubleValues,
                              string[] stringValues,
                              byte[][] byteValues,
                              string[] words, int i)
 {
     InnerStruct inner = new InnerStruct((int)intValues[i], stringValues[i]);
     InnerStruct inner2 = new InnerStruct((int)(intValues[i] >> 32),
         words[i % words.Length] + "-x");
     setBigRow(batch, batch.size++, (intValues[i] & 1) == 0, (byte)intValues[i],
         (short)intValues[i], (int)intValues[i], intValues[i],
         (float)doubleValues[i], doubleValues[i], byteValues[i], stringValues[i],
         new MiddleStruct(inner, inner2), MakeList(), MakeMap(inner, inner2));
 }
Пример #5
0
 private static void setUnion(VectorizedRowBatch batch, int rowId,
                              Timestamp? ts, int? tag, int? i, string s,
                              HiveDecimal dec)
 {
     UnionColumnVector union = (UnionColumnVector)batch.cols[1];
     if (ts != null)
     {
         ((LongColumnVector)batch.cols[0]).vector[rowId] = ts.Value.Nanoseconds;
     }
     else
     {
         batch.cols[0].isNull[rowId] = true;
         batch.cols[0].noNulls = false;
     }
     if (tag != null)
     {
         union.tags[rowId] = tag.Value;
         if (tag == 0)
         {
             if (i != null)
             {
                 ((LongColumnVector)union.fields[tag.Value]).vector[rowId] = i.Value;
             }
             else
             {
                 union.fields[tag.Value].isNull[rowId] = true;
                 union.fields[tag.Value].noNulls = false;
             }
         }
         else if (tag == 1)
         {
             if (s != null)
             {
                 ((BytesColumnVector)union.fields[tag.Value]).setVal(rowId, s.getBytes());
             }
             else
             {
                 union.fields[tag.Value].isNull[rowId] = true;
                 union.fields[tag.Value].noNulls = false;
             }
         }
         else
         {
             throw new ArgumentException("Bad tag " + tag);
         }
     }
     else
     {
         batch.cols[1].isNull[rowId] = true;
         batch.cols[1].noNulls = false;
     }
     if (dec != null)
     {
         ((DecimalColumnVector)batch.cols[2]).vector[rowId] = dec;
     }
     else
     {
         batch.cols[2].isNull[rowId] = true;
         batch.cols[2].noNulls = false;
     }
 }
Пример #6
0
 private static void setBigRow(VectorizedRowBatch batch, int rowId,
                               bool b1, byte b2, short s1,
                               int i1, long l1, float f1,
                               double d1, byte[] b3, string s2,
                               MiddleStruct m1, List<InnerStruct> l2,
                               Dictionary<string, InnerStruct> m2)
 {
     ((LongColumnVector)batch.cols[0]).vector[rowId] = b1 ? 1 : 0;
     ((LongColumnVector)batch.cols[1]).vector[rowId] = b2;
     ((LongColumnVector)batch.cols[2]).vector[rowId] = s1;
     ((LongColumnVector)batch.cols[3]).vector[rowId] = i1;
     ((LongColumnVector)batch.cols[4]).vector[rowId] = l1;
     ((DoubleColumnVector)batch.cols[5]).vector[rowId] = f1;
     ((DoubleColumnVector)batch.cols[6]).vector[rowId] = d1;
     if (b3 != null)
     {
         ((BytesColumnVector)batch.cols[7]).setVal(rowId, b3, 0, b3.Length);
     }
     else
     {
         batch.cols[7].isNull[rowId] = true;
         batch.cols[7].noNulls = false;
     }
     if (s2 != null)
     {
         ((BytesColumnVector)batch.cols[8]).setVal(rowId, s2.getBytes());
     }
     else
     {
         batch.cols[8].isNull[rowId] = true;
         batch.cols[8].noNulls = false;
     }
     setMiddleStruct((StructColumnVector)batch.cols[9], rowId, m1);
     setInnerList((ListColumnVector)batch.cols[10], rowId, l2);
     setInnerMap((MapColumnVector)batch.cols[11], rowId, m2);
 }
Пример #7
0
        public void testTimestamp()
        {
            ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(typeof(Timestamp));
            TypeDescription schema = TypeDescription.createTimestamp();
            List<Timestamp> tslist = new List<Timestamp>();

            using (Stream file = File.OpenWrite(TestFilePath))
            using (Writer writer = OrcFile.createWriter(TestFilePath, file, OrcFile.writerOptions(conf)
                .setSchema(schema)
                .stripeSize(100000)
                .bufferSize(10000)
                .version(OrcFile.Version.V_0_11)))
            {
                tslist.Add(Timestamp.Parse("2037-01-01 00:00:00.000999"));
                tslist.Add(Timestamp.Parse("2003-01-01 00:00:00.000000222"));
                tslist.Add(Timestamp.Parse("1999-01-01 00:00:00.999999999"));
                tslist.Add(Timestamp.Parse("1995-01-01 00:00:00.688888888"));
                tslist.Add(Timestamp.Parse("2002-01-01 00:00:00.1"));
                tslist.Add(Timestamp.Parse("2010-03-02 00:00:00.000009001"));
                tslist.Add(Timestamp.Parse("2005-01-01 00:00:00.000002229"));
                tslist.Add(Timestamp.Parse("2006-01-01 00:00:00.900203003"));
                tslist.Add(Timestamp.Parse("2003-01-01 00:00:00.800000007"));
                tslist.Add(Timestamp.Parse("1996-08-02 00:00:00.723100809"));
                tslist.Add(Timestamp.Parse("1998-11-02 00:00:00.857340643"));
                tslist.Add(Timestamp.Parse("2008-10-02 00:00:00"));

                VectorizedRowBatch batch = new VectorizedRowBatch(1, 1024);
                LongColumnVector vec = new LongColumnVector(1024);
                batch.cols[0] = vec;
                batch.reset();
                batch.size = tslist.Count;
                for (int i = 0; i < tslist.Count; ++i)
                {
                    Timestamp ts = tslist[i];
                    vec.vector[i] = ts.Nanoseconds;
                }
                writer.addRowBatch(batch);
                schema = writer.getSchema();
            }

            Reader reader = OrcFile.createReader(TestFilePath,
                OrcFile.readerOptions(conf));
            using (RecordReader rows = reader.rows())
            {
                int idx = 0;
                while (rows.hasNext())
                {
                    object row = rows.next();
                    Assert.Equal(tslist[idx++].getNanos(), ((Timestamp)row).getNanos());
                }
                Assert.Equal(tslist.Count, rows.getRowNumber());
                Assert.Equal(0, schema.getMaximumId());
                bool[] expected = new bool[] { false };
                bool[] included = OrcUtils.includeColumns("", schema);
                Assert.Equal(expected, included);
            }
        }
Пример #8
0
 public VectorizedRowBatch createRowBatch()
 {
     VectorizedRowBatch result;
     if (category == Category.STRUCT)
     {
         result = new VectorizedRowBatch(children.Count,
             VectorizedRowBatch.DEFAULT_SIZE);
         for (int i = 0; i < result.cols.Length; ++i)
         {
             result.cols[i] = children[i].createColumn();
         }
     }
     else
     {
         result = new VectorizedRowBatch(1, VectorizedRowBatch.DEFAULT_SIZE);
         result.cols[0] = createColumn();
     }
     result.reset();
     return result;
 }