Ejemplo n.º 1
0
                public ParallelState(Cursor curs, out RowSet rows, int cthd)
                {
                    Contracts.AssertValue(curs);
                    Contracts.Assert(cthd > 0);

                    _curs   = curs;
                    _reader = _curs._reader;

                    // Why cthd + 3? We need two blocks for the blocking collection, and one
                    // more for the block currently being dished out by the cursor.
                    _blockCount = cthd + 3;

                    // Why cthd + 3? We need two blocks for the blocking collection, and one
                    // more for the block currently being dished out by the cursor.
                    _rows = rows = _curs._parser.CreateRowSet(_curs._stats,
                                                              checked (_blockCount * BlockSize), _curs._active);

                    _waiterReading = new OrderedWaiter(firstCleared: false);
                    _waiterPublish = new OrderedWaiter(firstCleared: false);

                    // The size limit here ensures that worker threads are never writing to
                    // a range that is being served up by the cursor.
                    _queue = new BlockingCollection <RowBatch>(2);

                    _threads        = new Thread[cthd];
                    _threadsRunning = cthd;

                    for (int tid = 0; tid < _threads.Length; tid++)
                    {
                        var thd = _threads[tid] = Utils.CreateBackgroundThread(ThreadProc);
                        thd.Start(tid);
                    }
                }
Ejemplo n.º 2
0
        public void SaveData(Stream stream, IDataView data, params int[] colIndices)
        {
            _host.CheckValue(stream, nameof(stream));
            _host.CheckValue(data, nameof(data));
            _host.CheckValueOrNull(colIndices);
            _host.CheckParam(stream.CanWrite, nameof(stream), "cannot save to non-writable stream");
            _host.CheckParam(stream.CanSeek, nameof(stream), "cannot save to non-seekable stream");
            _host.CheckParam(stream.Position == 0, nameof(stream), "stream must be positioned at head of stream");

            using (IChannel ch = _host.Start("Saving"))
                using (ExceptionMarshaller exMarshaller = new ExceptionMarshaller())
                {
                    var toWrite       = new BlockingCollection <Block>(16);
                    var toCompress    = new BlockingCollection <Block>(16);
                    var activeColumns = GetActiveColumns(data.Schema, colIndices);
                    int rowsPerBlock  = RowsPerBlockHeuristic(data, activeColumns);
                    ch.Assert(rowsPerBlock > 0);
                    Stopwatch sw = new Stopwatch();

                    // Set up the compression and write workers that consume the input information first.
                    Task compressionTask = null;
                    if (activeColumns.Length > 0)
                    {
                        OrderedWaiter waiter             = _deterministicBlockOrder ? new OrderedWaiter() : null;
                        Thread[]      compressionThreads = new Thread[Environment.ProcessorCount];
                        for (int i = 0; i < compressionThreads.Length; ++i)
                        {
                            compressionThreads[i] = Utils.CreateBackgroundThread(
                                () => CompressionWorker(toCompress, toWrite, activeColumns.Length, waiter, exMarshaller));
                            compressionThreads[i].Start();
                        }
                        compressionTask = new Task(() =>
                        {
                            foreach (Thread t in compressionThreads)
                            {
                                t.Join();
                            }
                        });
                        compressionTask.Start();
                    }

                    // While there is an advantage to putting the IO into a separate thread, there is not an
                    // advantage to having more than one worker.
                    Thread writeThread = Utils.CreateBackgroundThread(
                        () => WriteWorker(stream, toWrite, activeColumns, data.Schema, rowsPerBlock, _host, exMarshaller));
                    writeThread.Start();
                    sw.Start();

                    // REVIEW: For now the fetch worker just works in the main thread. If it's
                    // a fairly large view through, it may be advantageous to consider breaking up the
                    // fetchwrite operations on the pipes, somehow.
                    // Despite running in the main thread for now, the fetch worker follows the same
                    // pattern of utilizing exMarshaller.
                    using (var pch = _silent ? null : _host.StartProgressChannel("BinarySaver"))
                    {
                        FetchWorker(toCompress, data, activeColumns, rowsPerBlock, sw, ch, pch, exMarshaller);
                    }

                    _host.Assert(compressionTask != null || toCompress.IsCompleted);
                    if (compressionTask != null)
                    {
                        compressionTask.Wait();
                    }
                    toWrite.CompleteAdding();

                    writeThread.Join();
                    exMarshaller.ThrowIfSet(ch);
                    if (!_silent)
                    {
                        ch.Info("Wrote {0} rows across {1} columns in {2}", _rowCount, activeColumns.Length, sw.Elapsed);
                    }
                    // When we dispose the exception marshaller, this will set the cancellation token when we internally
                    // dispose the cancellation token source, so one way or another those threads are being cancelled, even
                    // if an exception is thrown in the main body of this function.
                }
        }
Ejemplo n.º 3
0
        private void CompressionWorker(BlockingCollection <Block> toCompress, BlockingCollection <Block> toWrite, int columns, OrderedWaiter waiter,
                                       ExceptionMarshaller exMarshaller)
        {
            Contracts.AssertValue(exMarshaller);
            try
            {
                _host.AssertValue(toCompress);
                _host.AssertValue(toWrite);
                _host.Assert(columns > 0);
                _host.Assert(_deterministicBlockOrder == (waiter != null));

                foreach (Block block in toCompress.GetConsumingEnumerable(exMarshaller.Token))
                {
                    MemoryStream compressed = _memPool.Get();
                    int          uncompLength;
                    using (Stream stream = _compression.CompressStream(compressed))
                    {
                        MemoryStream uncompressed = block.BlockData;
                        uncompLength = (int)uncompressed.Length;
                        ArraySegment <byte> buffer;
                        bool tmp = uncompressed.TryGetBuffer(out buffer);
                        Contracts.Assert(tmp);
                        stream.Write(buffer.Array, buffer.Offset, buffer.Count);
                        _memPool.Return(ref uncompressed);
                    }
                    if (_deterministicBlockOrder)
                    {
                        waiter.Wait((long)columns * block.BlockIndex + block.ColumnIndex, exMarshaller.Token);
                    }
                    toWrite.Add(new Block(compressed, block.ColumnIndex, block.BlockIndex, uncompLength), exMarshaller.Token);
                    if (_deterministicBlockOrder)
                    {
                        waiter.Increment();
                    }
                }
            }
            catch (Exception ex)
            {
                exMarshaller.Set("compressing", ex);
            }
        }