private protected OneToOneTransformBase(IHostEnvironment env, string name, OneToOneTransformBase transform, IDataView newInput, Func <ColumnType, string> checkType) : base(env, name, newInput) { Host.CheckValueOrNull(checkType); InputTranspose = Source as ITransposeDataView; OneToOneColumn[] map = transform.Infos .Select(x => new ColumnTmp { Name = x.Name, Source = transform.Source.Schema[x.Source].Name, }) .ToArray(); _bindings = Bindings.Create(this, map, newInput.Schema, InputTransposeSchema, checkType); Infos = _bindings.Infos; Metadata = new MetadataDispatcher(Infos.Length); }
private static void TransposeCheckHelper <T>(IDataView view, int viewCol, ITransposeDataView trans) { int col = viewCol; var type = trans.TransposeSchema.GetSlotType(col); var colType = trans.Schema.GetColumnType(col); Assert.Equal(view.Schema.GetColumnName(viewCol), trans.Schema.GetColumnName(col)); var expectedType = view.Schema.GetColumnType(viewCol); // Unfortunately can't use equals because column type equality is a simple reference comparison. :P Assert.Equal(expectedType, colType); Assert.Equal(DataViewUtils.ComputeRowCount(view), (long)type.VectorSize); string desc = string.Format("Column {0} named '{1}'", col, trans.Schema.GetColumnName(col)); Assert.True(typeof(T) == type.ItemType.RawType, $"{desc} had wrong type for slot cursor"); Assert.True(type.IsVector, $"{desc} expected to be vector but is not"); Assert.True(type.VectorSize > 0, $"{desc} expected to be known sized vector but is not"); Assert.True(0 != colType.ValueCount, $"{desc} expected to have fixed size, but does not"); int rc = type.VectorSize; T[] expectedVals = NaiveTranspose <T>(view, viewCol); T[] vals = new T[rc * colType.ValueCount]; Contracts.Assert(vals.Length == expectedVals.Length); using (var cursor = trans.GetSlotCursor(col)) { var getter = cursor.GetGetter <T>(); VBuffer <T> temp = default(VBuffer <T>); int offset = 0; while (cursor.MoveNext()) { Assert.True(offset < vals.Length, $"{desc} slot cursor went further than it should have"); getter(ref temp); Assert.True(rc == temp.Length, $"{desc} slot cursor yielded vector with unexpected length"); temp.CopyTo(vals, offset); offset += rc; } Assert.True(colType.ValueCount == offset / rc, $"{desc} slot cursor yielded fewer than expected values"); } for (int i = 0; i < vals.Length; ++i) { Assert.Equal(expectedVals[i], vals[i]); } }
public static Bindings Create(OneToOneTransformBase parent, OneToOneColumn[] column, DataViewSchema inputSchema, ITransposeDataView transposedInput, Func <DataViewType, string> testType) { Contracts.AssertValue(parent); var host = parent.Host; host.CheckUserArg(Utils.Size(column) > 0, nameof(column)); host.AssertValue(inputSchema); host.AssertValueOrNull(transposedInput); host.AssertValueOrNull(testType); var names = new string[column.Length]; var infos = new ColInfo[column.Length]; for (int i = 0; i < names.Length; i++) { var item = column[i]; host.CheckUserArg(item.TrySanitize(), nameof(OneToOneColumn.Name), "Invalid new column name"); names[i] = item.Name; int colSrc; if (!inputSchema.TryGetColumnIndex(item.Source, out colSrc)) { throw host.ExceptUserArg(nameof(OneToOneColumn.Source), "Source column '{0}' not found", item.Source); } var type = inputSchema[colSrc].Type; if (testType != null) { string reason = testType(type); if (reason != null) { throw host.ExceptUserArg(nameof(OneToOneColumn.Source), InvalidTypeErrorFormat, item.Source, type, reason); } } var slotType = transposedInput?.GetSlotType(i); infos[i] = new ColInfo(names[i], colSrc, type, slotType as VectorDataViewType); } return(new Bindings(parent, infos, inputSchema, true, names)); }
private LegacyCompositeDataLoader(IHost host, TransformEx[] transforms) { Contracts.AssertValue(host, "host"); _host = host; _host.AssertNonEmpty(transforms); View = transforms[transforms.Length - 1].Transform; _tview = View as ITransposeDataView; var srcLoader = transforms[0].Transform.Source as ILegacyDataLoader; #if DEBUG // Assert that the transforms array is consistent: first one starts with loader, // they are chained together, the loader is not a composite. for (int i = 1; i < transforms.Length; i++) _host.Assert(transforms[i].Transform.Source == transforms[i - 1].Transform, "Transforms are not linked"); _host.AssertValue(srcLoader, "loader", "Transform chain doesn't start with a loader"); _host.Assert(!(srcLoader is LegacyCompositeDataLoader), "Can't have composite source loader"); #endif _loader = srcLoader; _transforms = transforms; }
private static void PrintSchema(TextWriter writer, Arguments args, Schema schema, ITransposeDataView transposeDataView) { Contracts.AssertValue(writer); Contracts.AssertValue(args); Contracts.AssertValue(schema); Contracts.AssertValueOrNull(transposeDataView); #if !CORECLR if (args.ShowJson) { writer.WriteLine("Json Schema not supported."); return; } #endif int colLim = schema.Count; var itw = new IndentedTextWriter(writer, " "); itw.WriteLine("{0} columns:", colLim); using (itw.Nest()) { var names = default(VBuffer <ReadOnlyMemory <char> >); for (int col = 0; col < colLim; col++) { var name = schema[col].Name; var type = schema[col].Type; var slotType = transposeDataView?.GetSlotType(col); itw.WriteLine("{0}: {1}{2}", name, type, slotType == null ? "" : " (T)"); bool metaVals = args.ShowMetadataValues; if (metaVals || args.ShowMetadataTypes) { ShowMetadata(itw, schema, col, metaVals); continue; } if (!args.ShowSlots) { continue; } if (!type.IsKnownSizeVector()) { continue; } ColumnType typeNames; if ((typeNames = schema[col].Metadata.Schema.GetColumnOrNull(MetadataUtils.Kinds.SlotNames)?.Type) == null) { continue; } if (typeNames.GetVectorSize() != type.GetVectorSize() || !(typeNames.GetItemType() is TextType)) { Contracts.Assert(false, "Unexpected slot names type"); continue; } schema[col].Metadata.GetValue(MetadataUtils.Kinds.SlotNames, ref names); if (names.Length != type.GetVectorSize()) { Contracts.Assert(false, "Unexpected length of slot names vector"); continue; } using (itw.Nest()) { bool verbose = args.Verbose ?? false; foreach (var kvp in names.Items(all: verbose)) { if (verbose || !kvp.Value.IsEmpty) { itw.WriteLine("{0}:{1}", kvp.Key, kvp.Value); } } } } } }
private void SaveTransposedData(IChannel ch, Stream stream, ITransposeDataView data, int[] cols) { _host.AssertValue(ch); ch.AssertValue(stream); ch.AssertValue(data); ch.AssertNonEmpty(cols); ch.Assert(stream.CanSeek); // Initialize what we can in the header, though we will not be writing out things in the // header until we have confidence that things were written out correctly. TransposeLoader.Header header = default(TransposeLoader.Header); header.Signature = TransposeLoader.Header.SignatureValue; header.Version = TransposeLoader.Header.WriterVersion; header.CompatibleVersion = TransposeLoader.Header.WriterVersion; VectorType slotType = data.TransposeSchema.GetSlotType(cols[0]); ch.AssertValue(slotType); header.RowCount = slotType.ValueCount; header.ColumnCount = cols.Length; // We keep track of the offsets of the start of each sub-IDV, for use in writing out the // offsets/length table later. List <long> offsets = new List <long>(); // First write a bunch of zeros at the head, as a placeholder for the header that // will go there assuming we can successfully load it. We'll keep this array around // for the real marshalling and writing of the header bytes structure. byte[] headerBytes = new byte[TransposeLoader.Header.HeaderSize]; stream.Write(headerBytes, 0, headerBytes.Length); offsets.Add(stream.Position); // This is a convenient delegate to write out an IDV substream, then save the offsets // where writing stopped to the offsets list. Action <string, IDataView> viewAction = (name, view) => { using (var substream = new SubsetStream(stream)) { _internalSaver.SaveData(substream, view, Utils.GetIdentityPermutation(view.Schema.ColumnCount)); substream.Seek(0, SeekOrigin.End); ch.Info("Wrote {0} data view in {1} bytes", name, substream.Length); } offsets.Add(stream.Position); }; // First write out the no-row data, limited to these columns. IDataView subdata = new ChooseColumnsByIndexTransform(_host, new ChooseColumnsByIndexTransform.Arguments() { Index = cols }, data); // If we want the "dual mode" row-wise and slot-wise file, don't filter out anything. if (!_writeRowData) { subdata = SkipTakeFilter.Create(_host, new SkipTakeFilter.TakeArguments() { Count = 0 }, subdata); } string msg = _writeRowData ? "row-wise data, schema, and metadata" : "schema and metadata"; viewAction(msg, subdata); foreach (var col in cols) { viewAction(data.Schema.GetColumnName(col), new TransposerUtils.SlotDataView(_host, data, col)); } // Wrote out the dataview. Write out the table offset. using (var writer = new BinaryWriter(stream, Encoding.UTF8, leaveOpen: true)) { // Format of the table is offset, length, both as 8-byte integers. // As it happens we wrote things out as adjacent sub-IDVs, so the // length can be derived from the offsets. The first will be the // start of the first sub-IDV, and all subsequent entries will be // the start/end of the current/next sub-IDV, respectively, so a total // of cols.Length + 2 entries. ch.Assert(offsets.Count == cols.Length + 2); ch.Assert(offsets[offsets.Count - 1] == stream.Position); header.SubIdvTableOffset = stream.Position; for (int c = 1; c < offsets.Count; ++c) { // 8-byte int for offsets, 8-byte int for length. writer.Write(offsets[c - 1]); writer.Write(offsets[c] - offsets[c - 1]); } header.TailOffset = stream.Position; writer.Write(TransposeLoader.Header.TailSignatureValue); // Now we are confident that things will work, so write it out. unsafe { Marshal.Copy(new IntPtr(&header), headerBytes, 0, Marshal.SizeOf(typeof(Header))); } writer.Seek(0, SeekOrigin.Begin); writer.Write(headerBytes); } }