public ParallelState(Cursor curs, out RowSet rows, int cthd) { Contracts.AssertValue(curs); Contracts.Assert(cthd > 0); _curs = curs; _reader = _curs._reader; // Why cthd + 3? We need two blocks for the blocking collection, and one // more for the block currently being dished out by the cursor. _blockCount = cthd + 3; // Why cthd + 3? We need two blocks for the blocking collection, and one // more for the block currently being dished out by the cursor. _rows = rows = _curs._parser.CreateRowSet(_curs._stats, checked (_blockCount * BlockSize), _curs._active); _waiterReading = new OrderedWaiter(firstCleared: false); _waiterPublish = new OrderedWaiter(firstCleared: false); // The size limit here ensures that worker threads are never writing to // a range that is being served up by the cursor. _queue = new BlockingCollection <RowBatch>(2); _threads = new Thread[cthd]; _threadsRunning = cthd; for (int tid = 0; tid < _threads.Length; tid++) { var thd = _threads[tid] = Utils.CreateBackgroundThread(ThreadProc); thd.Start(tid); } }
public void SaveData(Stream stream, IDataView data, params int[] colIndices) { _host.CheckValue(stream, nameof(stream)); _host.CheckValue(data, nameof(data)); _host.CheckValueOrNull(colIndices); _host.CheckParam(stream.CanWrite, nameof(stream), "cannot save to non-writable stream"); _host.CheckParam(stream.CanSeek, nameof(stream), "cannot save to non-seekable stream"); _host.CheckParam(stream.Position == 0, nameof(stream), "stream must be positioned at head of stream"); using (IChannel ch = _host.Start("Saving")) using (ExceptionMarshaller exMarshaller = new ExceptionMarshaller()) { var toWrite = new BlockingCollection <Block>(16); var toCompress = new BlockingCollection <Block>(16); var activeColumns = GetActiveColumns(data.Schema, colIndices); int rowsPerBlock = RowsPerBlockHeuristic(data, activeColumns); ch.Assert(rowsPerBlock > 0); Stopwatch sw = new Stopwatch(); // Set up the compression and write workers that consume the input information first. Task compressionTask = null; if (activeColumns.Length > 0) { OrderedWaiter waiter = _deterministicBlockOrder ? new OrderedWaiter() : null; Thread[] compressionThreads = new Thread[Environment.ProcessorCount]; for (int i = 0; i < compressionThreads.Length; ++i) { compressionThreads[i] = Utils.CreateBackgroundThread( () => CompressionWorker(toCompress, toWrite, activeColumns.Length, waiter, exMarshaller)); compressionThreads[i].Start(); } compressionTask = new Task(() => { foreach (Thread t in compressionThreads) { t.Join(); } }); compressionTask.Start(); } // While there is an advantage to putting the IO into a separate thread, there is not an // advantage to having more than one worker. Thread writeThread = Utils.CreateBackgroundThread( () => WriteWorker(stream, toWrite, activeColumns, data.Schema, rowsPerBlock, _host, exMarshaller)); writeThread.Start(); sw.Start(); // REVIEW: For now the fetch worker just works in the main thread. If it's // a fairly large view through, it may be advantageous to consider breaking up the // fetchwrite operations on the pipes, somehow. // Despite running in the main thread for now, the fetch worker follows the same // pattern of utilizing exMarshaller. using (var pch = _silent ? null : _host.StartProgressChannel("BinarySaver")) { FetchWorker(toCompress, data, activeColumns, rowsPerBlock, sw, ch, pch, exMarshaller); } _host.Assert(compressionTask != null || toCompress.IsCompleted); if (compressionTask != null) { compressionTask.Wait(); } toWrite.CompleteAdding(); writeThread.Join(); exMarshaller.ThrowIfSet(ch); if (!_silent) { ch.Info("Wrote {0} rows across {1} columns in {2}", _rowCount, activeColumns.Length, sw.Elapsed); } // When we dispose the exception marshaller, this will set the cancellation token when we internally // dispose the cancellation token source, so one way or another those threads are being cancelled, even // if an exception is thrown in the main body of this function. } }
private void CompressionWorker(BlockingCollection <Block> toCompress, BlockingCollection <Block> toWrite, int columns, OrderedWaiter waiter, ExceptionMarshaller exMarshaller) { Contracts.AssertValue(exMarshaller); try { _host.AssertValue(toCompress); _host.AssertValue(toWrite); _host.Assert(columns > 0); _host.Assert(_deterministicBlockOrder == (waiter != null)); foreach (Block block in toCompress.GetConsumingEnumerable(exMarshaller.Token)) { MemoryStream compressed = _memPool.Get(); int uncompLength; using (Stream stream = _compression.CompressStream(compressed)) { MemoryStream uncompressed = block.BlockData; uncompLength = (int)uncompressed.Length; ArraySegment <byte> buffer; bool tmp = uncompressed.TryGetBuffer(out buffer); Contracts.Assert(tmp); stream.Write(buffer.Array, buffer.Offset, buffer.Count); _memPool.Return(ref uncompressed); } if (_deterministicBlockOrder) { waiter.Wait((long)columns * block.BlockIndex + block.ColumnIndex, exMarshaller.Token); } toWrite.Add(new Block(compressed, block.ColumnIndex, block.BlockIndex, uncompLength), exMarshaller.Token); if (_deterministicBlockOrder) { waiter.Increment(); } } } catch (Exception ex) { exMarshaller.Set("compressing", ex); } }