public override int CopyTo(byte[] fullBuffer, int offset) { using var time = Mdf4File.Metrics.ExtractAndTranspose.Measure((long)Data.UncompressedDataLength); if (Data.ZipType == ZipType.TranspositionDeflate) { var rows = Data.ZipParameter; var columns = (int)(Data.UncompressedDataLength / Data.ZipParameter); var transposedData = MdfBufferPool.Rent(Data.UncompressedDataLength); var compressedData = Reader.GetRawPointer(ZippedDataOffset); // unmanaged version is ~ factor 2 faster LibDeflateDecompress.Decompress(compressedData, Data.CompressedDataLength, transposedData, Data.UncompressedDataLength); // overhead for basically everything I have right now is bigger than the speedup. // maybe if we have some files with 2/4/8MB blocks? Test when we have such a file. // Parallel.For(0, rows, r => // { // for (int c = 0; c < columns; c++) // { // var transposedIndex = (int) rows * c + r; // fullBuffer[offset + transposedIndex] = transposedData[c]; // } // }); unsafe { fixed(byte *bufferStart = fullBuffer) { var b = bufferStart + offset; for (var c = 0; c < columns; c++) { for (var r = 0; r < rows; r++) { var transposedIndex = columns * r + c; * b++ = transposedData[transposedIndex]; } } } } // remaining untransposed stuff: var transposedBytes = columns * rows; var rem = (uint)(Data.UncompressedDataLength - (ulong)transposedBytes); if (rem > 0) { var remStart = offset + transposedBytes; Unsafe.CopyBlock(ref fullBuffer[remStart], ref transposedData[transposedBytes], rem); } MdfBufferPool.Return(transposedData); } else { var compressedData = Reader.GetRawPointer(ZippedDataOffset); var destBuffer = fullBuffer.AsSpan(offset); LibDeflateDecompress.Decompress(compressedData, Data.CompressedDataLength, destBuffer, Data.UncompressedDataLength); } #region Managed version // Managed impl. /* * using var compressed = Reader.CreateStream(ZippedDataOffset, (long) Data.CompressedDataLength); * using var decompressStream = new DeflateStream(compressed, CompressionMode.Decompress); * * if (Data.ZipType == ZipType.TranspositionDeflate) * { * var rows = Data.ZipParameter; * var columns = (int) (Data.UncompressedDataLength / Data.ZipParameter); * var bytesToProcess = columns * rows; * * // length of one row (uncompressed transposed) * var transposedData = ArrayPool<byte>.Shared.Rent((int) bytesToProcess); * decompressStream.Read(transposedData, 0, (int) bytesToProcess); * for (var r = 0; r < rows; r++) * { * for (int c = 0; c < columns; c++) * { * var transposedIndex = (int) rows * c + r; * buffer[transposedIndex] = transposedData[c]; * } * } * ArrayPool<byte>.Shared.Return(transposedData); * } * else * { * throw new NotImplementedException(); * } * */ #endregion return((int)Data.UncompressedDataLength); }
internal Mdf4Sampler(IEnumerable<Mdf4Channel> chanz, ulong sampleOffset, ulong sampleCnt) { var channels = chanz.ToArray(); var src = channels.First().ChannelGroup; var recLen = src.RecordLength; var sampleToRecordFirst = sampleOffset * recLen; var sampleToRecordLast = (sampleOffset + sampleCnt) * recLen; var blis = src.BlockLoadingInfos; //TODO: Auf binarysearch umstellen.... var firstMapIndex = Array.FindIndex(blis, 0, map => map.BytePosition <= (long) sampleToRecordFirst); firstMapIndex = firstMapIndex == -1 ? 0 : firstMapIndex; var lastMapIndex = Array.FindIndex(blis, firstMapIndex, map => map.BytePosition >= (long) sampleToRecordLast); lastMapIndex = lastMapIndex == -1 ? blis.Length - 1 : lastMapIndex; if (lastMapIndex >= 0) { SampleCount = blis[lastMapIndex].SampleIndex + blis[lastMapIndex].SampleCount - blis[firstMapIndex].SampleIndex; SampleOffset = blis[firstMapIndex].SampleIndex; } else { SampleCount = 0; SampleOffset = 0; } var buffers = channels.Select(k => k.CreateBuffer(SampleCount)).ToArray(); Buffers = buffers.Select(k => k.CreateView<Mdf4Channel>()).ToArray(); using var _ = Mdf4File.Metrics.SampleReading.Measure(SampleCount, SampleCount * recLen); // MDF4 allows records to be spread across multiple blocks. // "Why is that so?", you might ask. "This is stupid!", you might say. I might agree. // Consider this example: // Blocks -> [.......][......][............][...........] // Records -> AAABBBC CCDDDE EEFFFGGGHHHJ JJKKKLLLMMM // This implementation will process the aligned stuff fast (parallel) and sync up // on the 'gaps' as the last step. ( | = parallel, -> sequential) // : AAABBB | DDD | FFFGGGHHH | KKKLLLMMM / sync / CCC -> EEE -> JJJ #if PARALLEL Parallel.For(firstMapIndex, lastMapIndex + 1, i => #else for (var i = firstMapIndex; i <= lastMapIndex; ++i) #endif { var bli = blis[i]; var blk = bli.Block; // allocate 'a little bit more' as we always read 8 bytes var recordBuffer = MdfBufferPool.Rent(blk.ByteLength + 8); blk.CopyTo(recordBuffer, 0); bli.CopyGaps(recordBuffer, src.GapBuffer); //TODO: find better metric -.- var threadMetric = bli.SampleCount * channels.Length; var threadCount = (int) Math.Ceiling(threadMetric / 100000.0); #if PARALLEL //NORMAL VERSION if (threadCount <= 1) { #endif var byteOffset = (ulong) bli.Alignment.LeftByteOffset; var sampleStart = (ulong) bli.SampleIndex; var sampleCount = (uint) bli.SampleCount; for (var cIndex = 0; cIndex < channels.Length; cIndex++) { var buffer = buffers[cIndex]; buffer.Update(recordBuffer, byteOffset, sampleStart, sampleCount); } #if PARALLEL } // THREADED VERSION else { var numThreads = threadCount; var split = bli.SampleCount / numThreads; var rest = bli.SampleCount % numThreads; var byteOffset = bli.Alignment.LeftByteOffset; Parallel.For(0, numThreads, i => { var sampleStart = (ulong) (bli.SampleIndex + i * split); var sampleCount = (uint) (split + rest); for (var cIndex = 0; cIndex < channels.Length; cIndex++) { var buffer = buffers[cIndex]; buffer.Update(recordBuffer, (ulong) byteOffset, sampleStart, sampleCount); } }); } #endif MdfBufferPool.Return(recordBuffer); }