コード例 #1
0
        private CommandExecutorStat ExecuteDataFrameSqlCommand(
            Stream inputStream,
            Stream outputStream,
            SqlCommand[] commands)
        {
            var            stat          = new CommandExecutorStat();
            ICommandRunner commandRunner = CreateCommandRunner(commands);

            SerDe.Write(outputStream, (int)SpecialLengths.START_ARROW_STREAM);

            ArrowStreamWriter writer = null;

            foreach (RecordBatch input in GetInputIterator(inputStream))
            {
                FxDataFrame dataFrame    = FxDataFrame.FromArrowRecordBatch(input);
                var         inputColumns = new DataFrameColumn[input.ColumnCount];
                for (int i = 0; i < dataFrame.Columns.Count; ++i)
                {
                    inputColumns[i] = dataFrame.Columns[i];
                }

                DataFrameColumn[] results = commandRunner.Run(inputColumns);

                var resultDataFrame = new FxDataFrame(results);
                IEnumerable <RecordBatch> recordBatches = resultDataFrame.ToArrowRecordBatches();

                foreach (RecordBatch result in recordBatches)
                {
                    stat.NumEntriesProcessed += result.Length;

                    if (writer == null)
                    {
                        writer = new ArrowStreamWriter(outputStream, result.Schema, leaveOpen: true);
                    }

                    // TODO: Remove sync-over-async once WriteRecordBatch exists.
                    writer.WriteRecordBatchAsync(result).GetAwaiter().GetResult();
                }
            }

            SerDe.Write(outputStream, 0);

            if (writer != null)
            {
                writer.Dispose();
            }

            return(stat);
        }
コード例 #2
0
        private CommandExecutorStat ExecuteDataFrameGroupedMapCommand(
            Stream inputStream,
            Stream outputStream,
            SqlCommand[] commands)
        {
            Debug.Assert(commands.Length == 1,
                         "Grouped Map UDFs do not support combining multiple UDFs.");

            var stat   = new CommandExecutorStat();
            var worker = (DataFrameGroupedMapWorkerFunction)commands[0].WorkerFunction;

            SerDe.Write(outputStream, (int)SpecialLengths.START_ARROW_STREAM);

            ArrowStreamWriter writer = null;

            foreach (RecordBatch input in GetInputIterator(inputStream))
            {
                FxDataFrame dataFrame                   = FxDataFrame.FromArrowRecordBatch(input);
                FxDataFrame resultDataFrame             = worker.Func(dataFrame);
                IEnumerable <RecordBatch> recordBatches = resultDataFrame.ToArrowRecordBatches();

                foreach (RecordBatch result in recordBatches)
                {
                    stat.NumEntriesProcessed += result.Length;

                    if (writer == null)
                    {
                        writer = new ArrowStreamWriter(outputStream, result.Schema, leaveOpen: true);
                    }

                    // TODO: Remove sync-over-async once WriteRecordBatch exists.
                    writer.WriteRecordBatchAsync(result).GetAwaiter().GetResult();
                }
            }

            SerDe.Write(outputStream, 0);

            if (writer != null)
            {
                writer.Dispose();
            }

            return(stat);
        }
コード例 #3
0
        private CommandExecutorStat ExecuteDataFrameGroupedMapCommand(
            Stream inputStream,
            Stream outputStream,
            SqlCommand[] commands)
        {
            Debug.Assert(commands.Length == 1,
                         "Grouped Map UDFs do not support combining multiple UDFs.");

            var stat   = new CommandExecutorStat();
            var worker = (DataFrameGroupedMapWorkerFunction)commands[0].WorkerFunction;

            SerDe.Write(outputStream, (int)SpecialLengths.START_ARROW_STREAM);

            IpcOptions        ipcOptions = ArrowIpcOptions();
            ArrowStreamWriter writer     = null;

            foreach (RecordBatch input in GetInputIterator(inputStream))
            {
                FxDataFrame dataFrame       = FxDataFrame.FromArrowRecordBatch(input);
                FxDataFrame resultDataFrame = worker.Func(dataFrame);

                IEnumerable <RecordBatch> recordBatches = resultDataFrame.ToArrowRecordBatches();

                foreach (RecordBatch batch in recordBatches)
                {
                    RecordBatch final = WrapColumnsInStructIfApplicable(batch);
                    stat.NumEntriesProcessed += final.Length;

                    if (writer == null)
                    {
                        writer =
                            new ArrowStreamWriter(outputStream, final.Schema, leaveOpen: true, ipcOptions);
                    }

                    writer.WriteRecordBatch(final);
                }
            }

            WriteEnd(outputStream, ipcOptions);
            writer?.Dispose();

            return(stat);
        }