public void testFixedDeltaOneDescending() { ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(typeof(int)); using (Stream file = File.OpenWrite(TestFilePath)) using (Writer w = OrcFile.createWriter(TestFilePath, file, OrcFile.writerOptions(conf) .compress(CompressionKind.NONE) .inspector(inspector) .rowIndexStride(0) .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION) .version(OrcFile.Version.V_0_12))) { for (int i = 0; i < 5120; ++i) { w.addRow(512 - (i % 512)); } } using (CaptureStdoutToMemory capture = new CaptureStdoutToMemory()) { FileDump.Main(TestFilePath); // 10 runs of 512 elements. Each run has 2 bytes header, 2 byte base (base = 512, zigzag + varint) // and 1 byte delta (delta = 1). In total, 5 bytes per run. Assert.True(capture.Text.Contains("Stream: column 0 section DATA start: 3 length 50")); } }
public void testPatchedBase() { ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(typeof(int)); using (Stream file = File.OpenWrite(TestFilePath)) using (Writer w = OrcFile.createWriter(TestFilePath, file, OrcFile.writerOptions(conf) .compress(CompressionKind.NONE) .inspector(inspector) .rowIndexStride(0) .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION) .version(OrcFile.Version.V_0_12))) { Random rand = new Random(123); w.addRow(10000000); for (int i = 0; i < 511; ++i) { w.addRow(rand.Next(i + 1)); } } using (CaptureStdoutToMemory capture = new CaptureStdoutToMemory()) { FileDump.Main(TestFilePath); // use PATCHED_BASE encoding Assert.True(capture.Text.Contains("Stream: column 0 section DATA start: 3 length 583")); } }
public void testDeltaUnknownSign() { ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(typeof(int)); using (Stream file = File.OpenWrite(TestFilePath)) using (Writer w = OrcFile.createWriter(TestFilePath, file, OrcFile.writerOptions(conf) .compress(CompressionKind.NONE) .inspector(inspector) .rowIndexStride(0) .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION) .version(OrcFile.Version.V_0_12))) { w.addRow(0); for (int i = 0; i < 511; ++i) { w.addRow(i); } } using (CaptureStdoutToMemory capture = new CaptureStdoutToMemory()) { FileDump.Main(TestFilePath); // monotonicity will be undetermined for this sequence 0,0,1,2,3,...510. Hence DIRECT encoding // will be used. 2 bytes for header and 640 bytes for data (512 values with fixed bit of 10 bits // each, 5120/8 = 640). Total bytes 642 Assert.True(capture.Text.Contains("Stream: column 0 section DATA start: 3 length 642")); } }
public void testShortRepeat() { ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(typeof(int)); using (Stream file = File.OpenWrite(TestFilePath)) using (Writer w = OrcFile.createWriter(TestFilePath, file, OrcFile.writerOptions(conf) .compress(CompressionKind.NONE) .inspector(inspector) .rowIndexStride(0) .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION) .version(OrcFile.Version.V_0_12))) { for (int i = 0; i < 5; ++i) { w.addRow(10); } } using (CaptureStdoutToMemory capture = new CaptureStdoutToMemory()) { FileDump.Main(TestFilePath); // 1 byte header + 1 byte value Assert.True(capture.Text.Contains("Stream: column 0 section DATA start: 3 length 2")); } }
public void testDataDump() { using (Stream file = File.OpenWrite(TestFilePath)) { OrcFile.WriterOptions options = new OrcFile.WriterOptions(new Properties(), conf); options.inspector(ObjectInspectorFactory.getReflectionObjectInspector(typeof(AllTypesRecord))); options.stripeSize(100000); options.compress(CompressionKind.NONE); options.bufferSize(10000); options.rowIndexStride(1000); using (Writer writer = OrcFile.createWriter(TestFilePath, file, options)) { Dictionary <string, string> m = new Dictionary <string, string>(2); m.Add("k1", "v1"); writer.addRow(new AllTypesRecord( true, (sbyte)10, (short)100, 1000, 10000L, 4.0f, 20.0, HiveDecimal.Parse("4.2222"), new Timestamp(1416967764000L), new Date(1416967764000L), "string", m, new List <int> { 100, 200 }, new AllTypesRecord.Struct(10, "foo"))); m.Clear(); m.Add("k3", "v3"); writer.addRow(new AllTypesRecord( false, (sbyte)20, (short)200, 2000, 20000L, 8.0f, 40.0, HiveDecimal.Parse("2.2222"), new Timestamp(1416967364000L), new Date(1411967764000L), "abcd", m, new List <int> { 200, 300 }, new AllTypesRecord.Struct(20, "bar"))); } } string[] lines; using (CaptureStdoutToMemory capture = new CaptureStdoutToMemory()) { FileDump.Main(TestFilePath, "-d"); lines = capture.Text.Split(new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries); } Assert.Equal(2, lines.Length); // Don't be fooled by the big space in the middle, this line is quite long Assert.Equal("{\"b\":true,\"bt\":10,\"s\":100,\"i\":1000,\"l\":10000,\"f\":4,\"d\":20,\"de\":\"4.2222\",\"t\":\"2014-11-25 18:09:24\",\"dt\":\"2014-11-25\",\"str\":\"string\",\"c\":\"hello \",\"vc\":\"hello\",\"m\":[{\"_key\":\"k1\",\"_value\":\"v1\"}],\"a\":[100,200],\"st\":{\"i\":10,\"s\":\"foo\"}}", lines[0]); Assert.Equal("{\"b\":false,\"bt\":20,\"s\":200,\"i\":2000,\"l\":20000,\"f\":8,\"d\":40,\"de\":\"2.2222\",\"t\":\"2014-11-25 18:02:44\",\"dt\":\"2014-09-28\",\"str\":\"abcd\",\"c\":\"world \",\"vc\":\"world\",\"m\":[{\"_key\":\"k3\",\"_value\":\"v3\"}],\"a\":[200,300],\"st\":{\"i\":20,\"s\":\"bar\"}}", lines[1]); }
public void testDataDump() { using (Stream file = File.OpenWrite(TestFilePath)) { OrcFile.WriterOptions options = new OrcFile.WriterOptions(new Properties(), conf); options.inspector(ObjectInspectorFactory.getReflectionObjectInspector(typeof(AllTypesRecord))); options.stripeSize(100000); options.compress(CompressionKind.NONE); options.bufferSize(10000); options.rowIndexStride(1000); using (Writer writer = OrcFile.createWriter(TestFilePath, file, options)) { Dictionary<string, string> m = new Dictionary<string, string>(2); m.Add("k1", "v1"); writer.addRow(new AllTypesRecord( true, (sbyte)10, (short)100, 1000, 10000L, 4.0f, 20.0, HiveDecimal.Parse("4.2222"), new Timestamp(1416967764000L), new Date(1416967764000L), "string", m, new List<int> { 100, 200 }, new AllTypesRecord.Struct(10, "foo"))); m.Clear(); m.Add("k3", "v3"); writer.addRow(new AllTypesRecord( false, (sbyte)20, (short)200, 2000, 20000L, 8.0f, 40.0, HiveDecimal.Parse("2.2222"), new Timestamp(1416967364000L), new Date(1411967764000L), "abcd", m, new List<int> { 200, 300 }, new AllTypesRecord.Struct(20, "bar"))); } } string[] lines; using (CaptureStdoutToMemory capture = new CaptureStdoutToMemory()) { FileDump.Main(TestFilePath, "-d"); lines = capture.Text.Split(new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries); } Assert.Equal(2, lines.Length); // Don't be fooled by the big space in the middle, this line is quite long Assert.Equal("{\"b\":true,\"bt\":10,\"s\":100,\"i\":1000,\"l\":10000,\"f\":4,\"d\":20,\"de\":\"4.2222\",\"t\":\"2014-11-25 18:09:24\",\"dt\":\"2014-11-25\",\"str\":\"string\",\"c\":\"hello \",\"vc\":\"hello\",\"m\":[{\"_key\":\"k1\",\"_value\":\"v1\"}],\"a\":[100,200],\"st\":{\"i\":10,\"s\":\"foo\"}}", lines[0]); Assert.Equal("{\"b\":false,\"bt\":20,\"s\":200,\"i\":2000,\"l\":20000,\"f\":8,\"d\":40,\"de\":\"2.2222\",\"t\":\"2014-11-25 18:02:44\",\"dt\":\"2014-09-28\",\"str\":\"abcd\",\"c\":\"world \",\"vc\":\"world\",\"m\":[{\"_key\":\"k3\",\"_value\":\"v3\"}],\"a\":[200,300],\"st\":{\"i\":20,\"s\":\"bar\"}}", lines[1]); }