private static IEnumerable <IRow> ExtractInternal(IUpdatableRow output, Stream input) { if (!input.CanSeek) { throw new ArgumentOutOfRangeException(nameof(input), "Input stream must be seekable for ORC reader. Enable the hack to copy to a Memory Stream or to a non-Persisted Memory Mapped file. The hack is the default setting."); } using (var fileTail = new FileTail(input)) { var stripes = fileTail.GetStripeCollection(); var columnsToRead = GetIntersectedColumnMetadata(output.Schema, fileTail).ToArray(); foreach (var stripe in stripes) { var extractedColumns = ReadStripe(stripe, columnsToRead).ToArray(); for (int i = 0; i < (int)stripe.NumRows; i++) { foreach (var col in extractedColumns) { var outputColumn = col.Item1.USqlProjectionColumnIndex; var value = col.Item2?.GetValue(i) ?? col.Item1.USqlProjectionColumn.DefaultValue; output.Set(outputColumn, value); } yield return(output.AsReadOnly()); } } } }
private static IEnumerable <ProjectedColumnMetadata> GetIntersectedColumnMetadata(ISchema usqlSchema, FileTail orcFileTail) { var orcColumnNames = orcFileTail.Footer.Types[0].FieldNames; var orcColumnMetadata = orcFileTail.Footer.Types.Skip(1).Select((ct, i) => new { ColumnTypeKind = ct.Kind, OrcColumnName = orcColumnNames[i], OrcColumnIndex = (uint)(i + 1) }).ToDictionary(x => x.OrcColumnName, x => x); for (int i = 0; i < usqlSchema.Count; i++) { var uSqlColumn = usqlSchema[i]; if (orcColumnMetadata.ContainsKey(uSqlColumn.Name)) { var orcMeta = orcColumnMetadata[uSqlColumn.Name]; yield return(new ProjectedColumnMetadata { ColumnTypeKind = orcMeta.ColumnTypeKind, OrcColumnIndex = orcMeta.OrcColumnIndex, OrcColumnName = orcMeta.OrcColumnName, USqlProjectionColumn = uSqlColumn, USqlProjectionColumnIndex = i }); } } }