private CommandExecutorStat ExecuteDataFrameSqlCommand( Stream inputStream, Stream outputStream, SqlCommand[] commands) { var stat = new CommandExecutorStat(); ICommandRunner commandRunner = CreateCommandRunner(commands); SerDe.Write(outputStream, (int)SpecialLengths.START_ARROW_STREAM); ArrowStreamWriter writer = null; foreach (RecordBatch input in GetInputIterator(inputStream)) { FxDataFrame dataFrame = FxDataFrame.FromArrowRecordBatch(input); var inputColumns = new DataFrameColumn[input.ColumnCount]; for (int i = 0; i < dataFrame.Columns.Count; ++i) { inputColumns[i] = dataFrame.Columns[i]; } DataFrameColumn[] results = commandRunner.Run(inputColumns); var resultDataFrame = new FxDataFrame(results); IEnumerable <RecordBatch> recordBatches = resultDataFrame.ToArrowRecordBatches(); foreach (RecordBatch result in recordBatches) { stat.NumEntriesProcessed += result.Length; if (writer == null) { writer = new ArrowStreamWriter(outputStream, result.Schema, leaveOpen: true); } // TODO: Remove sync-over-async once WriteRecordBatch exists. writer.WriteRecordBatchAsync(result).GetAwaiter().GetResult(); } } SerDe.Write(outputStream, 0); if (writer != null) { writer.Dispose(); } return(stat); }
private CommandExecutorStat ExecuteDataFrameGroupedMapCommand( Stream inputStream, Stream outputStream, SqlCommand[] commands) { Debug.Assert(commands.Length == 1, "Grouped Map UDFs do not support combining multiple UDFs."); var stat = new CommandExecutorStat(); var worker = (DataFrameGroupedMapWorkerFunction)commands[0].WorkerFunction; SerDe.Write(outputStream, (int)SpecialLengths.START_ARROW_STREAM); ArrowStreamWriter writer = null; foreach (RecordBatch input in GetInputIterator(inputStream)) { FxDataFrame dataFrame = FxDataFrame.FromArrowRecordBatch(input); FxDataFrame resultDataFrame = worker.Func(dataFrame); IEnumerable <RecordBatch> recordBatches = resultDataFrame.ToArrowRecordBatches(); foreach (RecordBatch result in recordBatches) { stat.NumEntriesProcessed += result.Length; if (writer == null) { writer = new ArrowStreamWriter(outputStream, result.Schema, leaveOpen: true); } // TODO: Remove sync-over-async once WriteRecordBatch exists. writer.WriteRecordBatchAsync(result).GetAwaiter().GetResult(); } } SerDe.Write(outputStream, 0); if (writer != null) { writer.Dispose(); } return(stat); }
private CommandExecutorStat ExecuteDataFrameGroupedMapCommand( Stream inputStream, Stream outputStream, SqlCommand[] commands) { Debug.Assert(commands.Length == 1, "Grouped Map UDFs do not support combining multiple UDFs."); var stat = new CommandExecutorStat(); var worker = (DataFrameGroupedMapWorkerFunction)commands[0].WorkerFunction; SerDe.Write(outputStream, (int)SpecialLengths.START_ARROW_STREAM); IpcOptions ipcOptions = ArrowIpcOptions(); ArrowStreamWriter writer = null; foreach (RecordBatch input in GetInputIterator(inputStream)) { FxDataFrame dataFrame = FxDataFrame.FromArrowRecordBatch(input); FxDataFrame resultDataFrame = worker.Func(dataFrame); IEnumerable <RecordBatch> recordBatches = resultDataFrame.ToArrowRecordBatches(); foreach (RecordBatch batch in recordBatches) { RecordBatch final = WrapColumnsInStructIfApplicable(batch); stat.NumEntriesProcessed += final.Length; if (writer == null) { writer = new ArrowStreamWriter(outputStream, final.Schema, leaveOpen: true, ipcOptions); } writer.WriteRecordBatch(final); } } WriteEnd(outputStream, ipcOptions); writer?.Dispose(); return(stat); }