private void CompleteStripe() { var stripeFooter = new StripeFooter(); var stripeStats = new StripeStatistics(); //Columns foreach (var writer in _columnWriters) { writer.ColumnWriter.FlushBuffers(); var dictionaryLength = (writer.ColumnWriter as StringWriter)?.DictionaryLength ?? 0; //DictionaryLength is only used by StringWriter stripeFooter.AddColumn(writer.ColumnWriter.ColumnEncoding, dictionaryLength); } var stripeInformation = new StripeInformation { Offset = (ulong)_outputStream.Position, NumberOfRows = (ulong)_rowsInStripe }; //Indexes foreach (var writer in _columnWriters) { //Write the index buffer var indexBuffer = _bufferFactory.CreateBuffer(StreamKind.RowIndex); writer.ColumnWriter.Statistics.WriteToBuffer(indexBuffer); indexBuffer.CopyTo(_outputStream); //Add the index to the footer stripeFooter.AddDataStream(writer.ColumnWriter.ColumnId, indexBuffer); //Collect summary statistics var columnStats = new ColumnStatistics(); foreach (var stats in writer.ColumnWriter.Statistics) { stats.FillColumnStatistics(columnStats); stats.FillColumnStatistics(writer.FileStatistics); } stripeStats.ColStats.Add(columnStats); } _stripeStats.Add(stripeStats); stripeInformation.IndexLength = (ulong)_outputStream.Position - stripeInformation.Offset; //Data streams foreach (var writer in _columnWriters) { foreach (var buffer in writer.ColumnWriter.Buffers) { if (!buffer.MustBeIncluded) { continue; } buffer.CopyTo(_outputStream); stripeFooter.AddDataStream(writer.ColumnWriter.ColumnId, buffer); } } stripeInformation.DataLength = (ulong)_outputStream.Position - stripeInformation.IndexLength - stripeInformation.Offset; //Footer long footerLength; _bufferFactory.SerializeAndCompressTo(_outputStream, stripeFooter, out footerLength); stripeInformation.FooterLength = (ulong)footerLength; _stripeInformations.Add(stripeInformation); _rowsInFile += _rowsInStripe; _rowsInStripe = 0; foreach (var writer in _columnWriters) { writer.ColumnWriter.Reset(); } }
public void testHasNull() { ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(typeof(SimpleStruct)); using (Stream file = File.OpenWrite(TestFilePath)) using (Writer writer = OrcFile.createWriter(TestFilePath, file, OrcFile.writerOptions(conf) .inspector(inspector) .rowIndexStride(1000) .stripeSize(10000) .bufferSize(10000))) { // STRIPE 1 // RG1 for (int i = 0; i < 1000; i++) { writer.addRow(new SimpleStruct(bytes(1, 2, 3), "RG1")); } // RG2 for (int i = 0; i < 1000; i++) { writer.addRow(new SimpleStruct(bytes(1, 2, 3), null)); } // RG3 for (int i = 0; i < 1000; i++) { writer.addRow(new SimpleStruct(bytes(1, 2, 3), "RG3")); } // RG4 for (int i = 0; i < 1000; i++) { writer.addRow(new SimpleStruct(bytes(1, 2, 3), null)); } // RG5 for (int i = 0; i < 1000; i++) { writer.addRow(new SimpleStruct(bytes(1, 2, 3), null)); } // STRIPE 2 for (int i = 0; i < 5000; i++) { writer.addRow(new SimpleStruct(bytes(1, 2, 3), null)); } // STRIPE 3 for (int i = 0; i < 5000; i++) { writer.addRow(new SimpleStruct(bytes(1, 2, 3), "STRIPE-3")); } // STRIPE 4 for (int i = 0; i < 5000; i++) { writer.addRow(new SimpleStruct(bytes(1, 2, 3), null)); } } Reader reader = OrcFile.createReader(TestFilePath, OrcFile.readerOptions(conf)); // check the file level stats ColumnStatistics[] stats = reader.getStatistics(); Assert.Equal(20000, stats[0].getNumberOfValues()); Assert.Equal(20000, stats[1].getNumberOfValues()); Assert.Equal(7000, stats[2].getNumberOfValues()); Assert.Equal(false, stats[0].hasNull()); Assert.Equal(false, stats[1].hasNull()); Assert.Equal(true, stats[2].hasNull()); // check the stripe level stats List <StripeStatistics> stripeStats = reader.getStripeStatistics(); // stripe 1 stats StripeStatistics ss1 = stripeStats[0]; ColumnStatistics ss1_cs1 = ss1.getColumnStatistics()[0]; ColumnStatistics ss1_cs2 = ss1.getColumnStatistics()[1]; ColumnStatistics ss1_cs3 = ss1.getColumnStatistics()[2]; Assert.Equal(false, ss1_cs1.hasNull()); Assert.Equal(false, ss1_cs2.hasNull()); Assert.Equal(true, ss1_cs3.hasNull()); // stripe 2 stats StripeStatistics ss2 = stripeStats[1]; ColumnStatistics ss2_cs1 = ss2.getColumnStatistics()[0]; ColumnStatistics ss2_cs2 = ss2.getColumnStatistics()[1]; ColumnStatistics ss2_cs3 = ss2.getColumnStatistics()[2]; Assert.Equal(false, ss2_cs1.hasNull()); Assert.Equal(false, ss2_cs2.hasNull()); Assert.Equal(true, ss2_cs3.hasNull()); // stripe 3 stats StripeStatistics ss3 = stripeStats[2]; ColumnStatistics ss3_cs1 = ss3.getColumnStatistics()[0]; ColumnStatistics ss3_cs2 = ss3.getColumnStatistics()[1]; ColumnStatistics ss3_cs3 = ss3.getColumnStatistics()[2]; Assert.Equal(false, ss3_cs1.hasNull()); Assert.Equal(false, ss3_cs2.hasNull()); Assert.Equal(false, ss3_cs3.hasNull()); // stripe 4 stats StripeStatistics ss4 = stripeStats[3]; ColumnStatistics ss4_cs1 = ss4.getColumnStatistics()[0]; ColumnStatistics ss4_cs2 = ss4.getColumnStatistics()[1]; ColumnStatistics ss4_cs3 = ss4.getColumnStatistics()[2]; Assert.Equal(false, ss4_cs1.hasNull()); Assert.Equal(false, ss4_cs2.hasNull()); Assert.Equal(true, ss4_cs3.hasNull()); #if false // Test file dump TextWriter origOut = System.Console.Out; string outputFilename = "orc-file-has-null.out"; FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename); // replace stdout and run command System.Console.SetOut(new StreamWriter(myOut)); FileDump.main(new String[] { testFilePath.toString(), "--rowindex=2" }); System.Console.Out.Flush(); System.SetOut(origOut); TestFileDump.checkOutput(outputFilename, workDir + File.separator + outputFilename); #endif }