void TestRoundTrip(long[] test, bool isSigned, bool aligned, int?expectedEncodeLength = null) { var stream = new MemoryStream(); var writer = new IntegerRunLengthEncodingV2Writer(stream); writer.Write(test, isSigned, aligned); //If we know the encode length, make sure it's correct if (expectedEncodeLength.HasValue) { Assert.Equal(expectedEncodeLength.Value, stream.Length); } stream.Seek(0, SeekOrigin.Begin); var reader = new IntegerRunLengthEncodingV2Reader(stream, isSigned); var result = reader.Read().ToArray(); //Make sure all bytes in the written stream were consumed Assert.Equal(stream.Length, stream.Position); //Check the actual values Assert.Equal(test.Length, result.Length); for (int i = 0; i < test.Length; i++) { Assert.Equal(test[i], result[i]); } }
public void AddBlock(IList <decimal?> values) { var stats = new DecimalWriterStatistics(); Statistics.Add(stats); foreach (var buffer in Buffers) { buffer.AnnotatePosition(stats, 0); } var wholePartsList = new List <long>(values.Count); var scaleList = new List <long>(values.Count); if (_isNullable) { var presentList = new List <bool>(values.Count); foreach (var value in values) { stats.AddValue(value); if (value.HasValue) { var longAndScale = value.Value.ToLongAndScale(); var rescaled = longAndScale.Rescale(_scale, truncateIfNecessary: false); wholePartsList.Add(rescaled.Item1); scaleList.Add(rescaled.Item2); } presentList.Add(value.HasValue); } var presentEncoder = new BitWriter(_presentBuffer); presentEncoder.Write(presentList); if (stats.HasNull) { _presentBuffer.MustBeIncluded = true; } } else { foreach (var value in values) { stats.AddValue(value); var longAndScale = value.Value.ToLongAndScale(); var rescaled = longAndScale.Rescale(_scale, truncateIfNecessary: false); wholePartsList.Add(rescaled.Item1); scaleList.Add(rescaled.Item2); } } var varIntEncoder = new VarIntWriter(_dataBuffer); varIntEncoder.Write(wholePartsList); var scaleEncoder = new IntegerRunLengthEncodingV2Writer(_secondaryBuffer); scaleEncoder.Write(scaleList, true, _shouldAlignEncodedValues); }
public void AddBlock(IList <DateTime?> values) { var stats = new DateWriterStatistics(); Statistics.Add(stats); if (_isNullable) { _presentBuffer.AnnotatePosition(stats, rleValuesToConsume: 0, bitsToConsume: 0); } _dataBuffer.AnnotatePosition(stats, rleValuesToConsume: 0); var datesList = new List <long>(values.Count); if (_isNullable) { var presentList = new List <bool>(values.Count); foreach (var value in values) { if (!value.HasValue) { stats.AddValue(null); presentList.Add(false); } else { var daysSinceEpoch = (int)(value.Value - _unixEpoch).TotalDays; stats.AddValue(daysSinceEpoch); presentList.Add(true); datesList.Add(daysSinceEpoch); } } var presentEncoder = new BitWriter(_presentBuffer); presentEncoder.Write(presentList); if (stats.HasNull) { _presentBuffer.MustBeIncluded = true; } } else { foreach (var value in values) { var daysSinceEpoch = (int)(value.Value - _unixEpoch).TotalDays; stats.AddValue(daysSinceEpoch); datesList.Add(daysSinceEpoch); } } var datesEncoder = new IntegerRunLengthEncodingV2Writer(_dataBuffer); datesEncoder.Write(datesList, true, _shouldAlignEncodedValues); }
void TestWrite(byte[] expected, long[] input, bool isSigned, bool aligned) { var stream = new MemoryStream(); var writer = new IntegerRunLengthEncodingV2Writer(stream); writer.Write(input, isSigned, aligned); var actual = stream.ToArray(); Assert.Equal(expected.Length, actual.Length); for (int i = 0; i < expected.Length; i++) { Assert.Equal(expected[i], actual[i]); } }
public void AddBlock(IList <long?> values) { var stats = new LongWriterStatistics(); Statistics.Add(stats); if (_isNullable) { _presentBuffer.AnnotatePosition(stats, rleValuesToConsume: 0, bitsToConsume: 0); } _dataBuffer.AnnotatePosition(stats, rleValuesToConsume: 0); var valList = new List <long>(values.Count); if (_isNullable) { var presentList = new List <bool>(values.Count); foreach (var value in values) { stats.AddValue(value); if (value.HasValue) { valList.Add(value.Value); } presentList.Add(value.HasValue); } var presentEncoder = new BitWriter(_presentBuffer); presentEncoder.Write(presentList); if (stats.HasNull) { _presentBuffer.MustBeIncluded = true; //A null occurred. Make sure to write this stream } } else { foreach (var value in values) { stats.AddValue(value); valList.Add(value.Value); } } var valEncoder = new IntegerRunLengthEncodingV2Writer(_dataBuffer); valEncoder.Write(valList, true, _shouldAlignEncodedValues); }
public void AddBlock(IList <byte[]> values) { var stats = new BinaryWriterStatistics(); Statistics.Add(stats); foreach (var buffer in Buffers) { buffer.AnnotatePosition(stats, 0); } var bytesList = new List <byte[]>(values.Count); var presentList = new List <bool>(values.Count); var lengthList = new List <long>(values.Count); foreach (var bytes in values) { stats.AddValue(bytes); if (values != null) { bytesList.Add(bytes); lengthList.Add(bytes.Length); } presentList.Add(bytes != null); } var presentEncoder = new BitWriter(_presentBuffer); presentEncoder.Write(presentList); if (stats.HasNull) { _presentBuffer.MustBeIncluded = true; } foreach (var bytes in bytesList) { _dataBuffer.Write(bytes, 0, bytes.Length); } var lengthEncoder = new IntegerRunLengthEncodingV2Writer(_lengthBuffer); lengthEncoder.Write(lengthList, false, _shouldAlignLengths); }
public void AddBlock(IList <DateTime?> values) { var stats = new TimestampWriterStatistics(); Statistics.Add(stats); if (_isNullable) { _presentBuffer.AnnotatePosition(stats, rleValuesToConsume: 0, bitsToConsume: 0); } _dataBuffer.AnnotatePosition(stats, rleValuesToConsume: 0); _secondaryBuffer.AnnotatePosition(stats, rleValuesToConsume: 0); var secondsList = new List <long>(values.Count); var fractionsList = new List <long>(values.Count); if (_isNullable) { var presentList = new List <bool>(values.Count); foreach (var value in values) { if (!value.HasValue) { stats.AddValue(null); presentList.Add(false); } else { long millisecondsSinceUnixEpoch; long fraction; var seconds = GetValues(value.Value, out millisecondsSinceUnixEpoch, out fraction); stats.AddValue(millisecondsSinceUnixEpoch); presentList.Add(true); secondsList.Add(seconds); fractionsList.Add(fraction); } } var presentEncoder = new BitWriter(_presentBuffer); presentEncoder.Write(presentList); if (stats.HasNull) { _presentBuffer.MustBeIncluded = true; } } else { foreach (var value in values) { long millisecondsSinceUnixEpoch; long fraction; var seconds = GetValues(value.Value, out millisecondsSinceUnixEpoch, out fraction); stats.AddValue(millisecondsSinceUnixEpoch); secondsList.Add(seconds); fractionsList.Add(fraction); } } var secondsEncoder = new IntegerRunLengthEncodingV2Writer(_dataBuffer); secondsEncoder.Write(secondsList, true, _shouldAlignEncodedValues); var fractionsEncoder = new IntegerRunLengthEncodingV2Writer(_secondaryBuffer); fractionsEncoder.Write(fractionsList, false, _shouldAlignEncodedValues); }
private void WriteDictionaryEncodedData() { //Sort the dictionary var sortedDictionary = new List <string>(); var i = 0; foreach (var dictEntry in _unsortedDictionary.OrderBy(d => d.Key, StringComparer.Ordinal)) { sortedDictionary.Add(dictEntry.Key); dictEntry.Value.Id = i++; } //Write the dictionary var dictionaryLengthList = new List <long>(); foreach (var dictEntry in sortedDictionary) { var bytes = Encoding.UTF8.GetBytes(dictEntry); dictionaryLengthList.Add(bytes.Length); //Save the length _dictionaryDataBuffer.Write(bytes, 0, bytes.Length); //Write to the buffer } //Write the dictionary lengths var dictionaryLengthEncoder = new IntegerRunLengthEncodingV2Writer(_lengthBuffer); dictionaryLengthEncoder.Write(dictionaryLengthList, false, _shouldAlignLengths); //Write the lookup values var presentList = new List <bool>(_dictionaryLookupValues.Count); var presentEncoder = new BitWriter(_presentBuffer); var lookupList = new List <long>(_dictionaryLookupValues.Count); var lookupEncoder = new IntegerRunLengthEncodingV2Writer(_dataBuffer); var hasNull = false; var strideCount = 0; StringWriterStatistics stats = null; foreach (var value in _dictionaryLookupValues) { if (stats == null) { stats = new StringWriterStatistics(); Statistics.Add(stats); foreach (var buffer in Buffers) { buffer.AnnotatePosition(stats, 0); } } var stringValue = sortedDictionary[value.Id]; //Look up the string value for this Id so we can notate statistics stats.AddValue(stringValue); presentList.Add(value != null); if (value != null) { lookupList.Add(value.Id); } else { hasNull = true; } if (++strideCount == _strideLength) //If it's time for new statistics { //Flush to the buffers presentEncoder.Write(presentList); presentList.Clear(); if (hasNull) { _presentBuffer.MustBeIncluded = true; } lookupEncoder.Write(lookupList, false, _shouldAlignDictionaryLookup); lookupList.Clear(); strideCount = 0; stats = null; } } }
public void AddBlock(IList <string> values) { EnsureEncodingKindIsSet(values); if (ColumnEncoding == ColumnEncodingKind.DirectV2) { var stats = new StringWriterStatistics(); Statistics.Add(stats); foreach (var buffer in Buffers) { buffer.AnnotatePosition(stats, 0); //Our implementation always ends the RLE at the stride } var bytesList = new List <byte[]>(values.Count); var presentList = new List <bool>(values.Count); var lengthList = new List <long>(values.Count); foreach (var str in values) { stats.AddValue(str); if (str != null) { var bytes = Encoding.UTF8.GetBytes(str); bytesList.Add(bytes); lengthList.Add(bytes.Length); } presentList.Add(str != null); } var presentEncoder = new BitWriter(_presentBuffer); presentEncoder.Write(presentList); if (stats.HasNull) { _presentBuffer.MustBeIncluded = true; } foreach (var bytes in bytesList) { _dataBuffer.Write(bytes, 0, bytes.Length); } var lengthEncoder = new IntegerRunLengthEncodingV2Writer(_lengthBuffer); lengthEncoder.Write(lengthList, false, _shouldAlignLengths); } else if (ColumnEncoding == ColumnEncodingKind.DictionaryV2) { foreach (var value in values) { if (value == null) { _dictionaryLookupValues.Add(null); } else { DictionaryEntry entry; if (!_unsortedDictionary.TryGetValue(value, out entry)) { entry = new DictionaryEntry(); _unsortedDictionary.Add(value, entry); } _dictionaryLookupValues.Add(entry); } } } else { throw new ArgumentException(); } }