Esempio n. 1
0
        private void CompleteStripe()
        {
            var stripeFooter = new StripeFooter();
            var stripeStats  = new StripeStatistics();

            //Columns
            foreach (var writer in _columnWriters)
            {
                writer.ColumnWriter.FlushBuffers();
                var dictionaryLength =
                    (writer.ColumnWriter as StringWriter)?.DictionaryLength ??
                    0; //DictionaryLength is only used by StringWriter
                stripeFooter.AddColumn(writer.ColumnWriter.ColumnEncoding, dictionaryLength);
            }

            var stripeInformation = new StripeInformation
            {
                Offset       = (ulong)_outputStream.Position,
                NumberOfRows = (ulong)_rowsInStripe
            };

            //Indexes
            foreach (var writer in _columnWriters)
            {
                //Write the index buffer
                var indexBuffer = _bufferFactory.CreateBuffer(StreamKind.RowIndex);
                writer.ColumnWriter.Statistics.WriteToBuffer(indexBuffer);
                indexBuffer.CopyTo(_outputStream);

                //Add the index to the footer
                stripeFooter.AddDataStream(writer.ColumnWriter.ColumnId, indexBuffer);

                //Collect summary statistics
                var columnStats = new ColumnStatistics();
                foreach (var stats in writer.ColumnWriter.Statistics)
                {
                    stats.FillColumnStatistics(columnStats);
                    stats.FillColumnStatistics(writer.FileStatistics);
                }
                stripeStats.ColStats.Add(columnStats);
            }
            _stripeStats.Add(stripeStats);

            stripeInformation.IndexLength = (ulong)_outputStream.Position - stripeInformation.Offset;

            //Data streams
            foreach (var writer in _columnWriters)
            {
                foreach (var buffer in writer.ColumnWriter.Buffers)
                {
                    if (!buffer.MustBeIncluded)
                    {
                        continue;
                    }
                    buffer.CopyTo(_outputStream);
                    stripeFooter.AddDataStream(writer.ColumnWriter.ColumnId, buffer);
                }
            }

            stripeInformation.DataLength = (ulong)_outputStream.Position - stripeInformation.IndexLength -
                                           stripeInformation.Offset;

            //Footer
            long footerLength;

            _bufferFactory.SerializeAndCompressTo(_outputStream, stripeFooter, out footerLength);
            stripeInformation.FooterLength = (ulong)footerLength;

            _stripeInformations.Add(stripeInformation);

            _rowsInFile  += _rowsInStripe;
            _rowsInStripe = 0;
            foreach (var writer in _columnWriters)
            {
                writer.ColumnWriter.Reset();
            }
        }
        public void testHasNull()
        {
            ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(typeof(SimpleStruct));

            using (Stream file = File.OpenWrite(TestFilePath))
                using (Writer writer = OrcFile.createWriter(TestFilePath, file, OrcFile.writerOptions(conf)
                                                            .inspector(inspector)
                                                            .rowIndexStride(1000)
                                                            .stripeSize(10000)
                                                            .bufferSize(10000)))
                {
                    // STRIPE 1
                    // RG1
                    for (int i = 0; i < 1000; i++)
                    {
                        writer.addRow(new SimpleStruct(bytes(1, 2, 3), "RG1"));
                    }
                    // RG2
                    for (int i = 0; i < 1000; i++)
                    {
                        writer.addRow(new SimpleStruct(bytes(1, 2, 3), null));
                    }
                    // RG3
                    for (int i = 0; i < 1000; i++)
                    {
                        writer.addRow(new SimpleStruct(bytes(1, 2, 3), "RG3"));
                    }
                    // RG4
                    for (int i = 0; i < 1000; i++)
                    {
                        writer.addRow(new SimpleStruct(bytes(1, 2, 3), null));
                    }
                    // RG5
                    for (int i = 0; i < 1000; i++)
                    {
                        writer.addRow(new SimpleStruct(bytes(1, 2, 3), null));
                    }
                    // STRIPE 2
                    for (int i = 0; i < 5000; i++)
                    {
                        writer.addRow(new SimpleStruct(bytes(1, 2, 3), null));
                    }
                    // STRIPE 3
                    for (int i = 0; i < 5000; i++)
                    {
                        writer.addRow(new SimpleStruct(bytes(1, 2, 3), "STRIPE-3"));
                    }
                    // STRIPE 4
                    for (int i = 0; i < 5000; i++)
                    {
                        writer.addRow(new SimpleStruct(bytes(1, 2, 3), null));
                    }
                }

            Reader reader = OrcFile.createReader(TestFilePath, OrcFile.readerOptions(conf));

            // check the file level stats
            ColumnStatistics[] stats = reader.getStatistics();
            Assert.Equal(20000, stats[0].getNumberOfValues());
            Assert.Equal(20000, stats[1].getNumberOfValues());
            Assert.Equal(7000, stats[2].getNumberOfValues());
            Assert.Equal(false, stats[0].hasNull());
            Assert.Equal(false, stats[1].hasNull());
            Assert.Equal(true, stats[2].hasNull());

            // check the stripe level stats
            List <StripeStatistics> stripeStats = reader.getStripeStatistics();
            // stripe 1 stats
            StripeStatistics ss1     = stripeStats[0];
            ColumnStatistics ss1_cs1 = ss1.getColumnStatistics()[0];
            ColumnStatistics ss1_cs2 = ss1.getColumnStatistics()[1];
            ColumnStatistics ss1_cs3 = ss1.getColumnStatistics()[2];

            Assert.Equal(false, ss1_cs1.hasNull());
            Assert.Equal(false, ss1_cs2.hasNull());
            Assert.Equal(true, ss1_cs3.hasNull());

            // stripe 2 stats
            StripeStatistics ss2     = stripeStats[1];
            ColumnStatistics ss2_cs1 = ss2.getColumnStatistics()[0];
            ColumnStatistics ss2_cs2 = ss2.getColumnStatistics()[1];
            ColumnStatistics ss2_cs3 = ss2.getColumnStatistics()[2];

            Assert.Equal(false, ss2_cs1.hasNull());
            Assert.Equal(false, ss2_cs2.hasNull());
            Assert.Equal(true, ss2_cs3.hasNull());

            // stripe 3 stats
            StripeStatistics ss3     = stripeStats[2];
            ColumnStatistics ss3_cs1 = ss3.getColumnStatistics()[0];
            ColumnStatistics ss3_cs2 = ss3.getColumnStatistics()[1];
            ColumnStatistics ss3_cs3 = ss3.getColumnStatistics()[2];

            Assert.Equal(false, ss3_cs1.hasNull());
            Assert.Equal(false, ss3_cs2.hasNull());
            Assert.Equal(false, ss3_cs3.hasNull());

            // stripe 4 stats
            StripeStatistics ss4     = stripeStats[3];
            ColumnStatistics ss4_cs1 = ss4.getColumnStatistics()[0];
            ColumnStatistics ss4_cs2 = ss4.getColumnStatistics()[1];
            ColumnStatistics ss4_cs3 = ss4.getColumnStatistics()[2];

            Assert.Equal(false, ss4_cs1.hasNull());
            Assert.Equal(false, ss4_cs2.hasNull());
            Assert.Equal(true, ss4_cs3.hasNull());

#if false
            // Test file dump
            TextWriter       origOut        = System.Console.Out;
            string           outputFilename = "orc-file-has-null.out";
            FileOutputStream myOut          = new FileOutputStream(workDir + File.separator + outputFilename);

            // replace stdout and run command
            System.Console.SetOut(new StreamWriter(myOut));
            FileDump.main(new String[] { testFilePath.toString(), "--rowindex=2" });
            System.Console.Out.Flush();
            System.SetOut(origOut);

            TestFileDump.checkOutput(outputFilename, workDir + File.separator + outputFilename);
#endif
        }