Пример #1
0
        private protected OneToOneTransformBase(IHostEnvironment env, string name, OneToOneTransformBase transform,
                                                IDataView newInput, Func <ColumnType, string> checkType)
            : base(env, name, newInput)
        {
            Host.CheckValueOrNull(checkType);
            InputTranspose = Source as ITransposeDataView;

            OneToOneColumn[] map = transform.Infos
                                   .Select(x => new ColumnTmp
            {
                Name   = x.Name,
                Source = transform.Source.Schema[x.Source].Name,
            })
                                   .ToArray();

            _bindings = Bindings.Create(this, map, newInput.Schema, InputTransposeSchema, checkType);
            Infos     = _bindings.Infos;
            Metadata  = new MetadataDispatcher(Infos.Length);
        }
Пример #2
0
        private static void TransposeCheckHelper <T>(IDataView view, int viewCol, ITransposeDataView trans)
        {
            int col     = viewCol;
            var type    = trans.TransposeSchema.GetSlotType(col);
            var colType = trans.Schema.GetColumnType(col);

            Assert.Equal(view.Schema.GetColumnName(viewCol), trans.Schema.GetColumnName(col));
            var expectedType = view.Schema.GetColumnType(viewCol);

            // Unfortunately can't use equals because column type equality is a simple reference comparison. :P
            Assert.Equal(expectedType, colType);
            Assert.Equal(DataViewUtils.ComputeRowCount(view), (long)type.VectorSize);
            string desc = string.Format("Column {0} named '{1}'", col, trans.Schema.GetColumnName(col));

            Assert.True(typeof(T) == type.ItemType.RawType, $"{desc} had wrong type for slot cursor");
            Assert.True(type.IsVector, $"{desc} expected to be vector but is not");
            Assert.True(type.VectorSize > 0, $"{desc} expected to be known sized vector but is not");
            Assert.True(0 != colType.ValueCount, $"{desc} expected to have fixed size, but does not");
            int rc = type.VectorSize;

            T[] expectedVals = NaiveTranspose <T>(view, viewCol);
            T[] vals         = new T[rc * colType.ValueCount];
            Contracts.Assert(vals.Length == expectedVals.Length);
            using (var cursor = trans.GetSlotCursor(col))
            {
                var         getter = cursor.GetGetter <T>();
                VBuffer <T> temp   = default(VBuffer <T>);
                int         offset = 0;
                while (cursor.MoveNext())
                {
                    Assert.True(offset < vals.Length, $"{desc} slot cursor went further than it should have");
                    getter(ref temp);
                    Assert.True(rc == temp.Length, $"{desc} slot cursor yielded vector with unexpected length");
                    temp.CopyTo(vals, offset);
                    offset += rc;
                }
                Assert.True(colType.ValueCount == offset / rc, $"{desc} slot cursor yielded fewer than expected values");
            }
            for (int i = 0; i < vals.Length; ++i)
            {
                Assert.Equal(expectedVals[i], vals[i]);
            }
        }
Пример #3
0
            public static Bindings Create(OneToOneTransformBase parent, OneToOneColumn[] column, DataViewSchema inputSchema,
                                          ITransposeDataView transposedInput, Func <DataViewType, string> testType)
            {
                Contracts.AssertValue(parent);
                var host = parent.Host;

                host.CheckUserArg(Utils.Size(column) > 0, nameof(column));
                host.AssertValue(inputSchema);
                host.AssertValueOrNull(transposedInput);
                host.AssertValueOrNull(testType);

                var names = new string[column.Length];
                var infos = new ColInfo[column.Length];

                for (int i = 0; i < names.Length; i++)
                {
                    var item = column[i];
                    host.CheckUserArg(item.TrySanitize(), nameof(OneToOneColumn.Name), "Invalid new column name");
                    names[i] = item.Name;

                    int colSrc;
                    if (!inputSchema.TryGetColumnIndex(item.Source, out colSrc))
                    {
                        throw host.ExceptUserArg(nameof(OneToOneColumn.Source), "Source column '{0}' not found", item.Source);
                    }

                    var type = inputSchema[colSrc].Type;
                    if (testType != null)
                    {
                        string reason = testType(type);
                        if (reason != null)
                        {
                            throw host.ExceptUserArg(nameof(OneToOneColumn.Source), InvalidTypeErrorFormat, item.Source, type, reason);
                        }
                    }

                    var slotType = transposedInput?.GetSlotType(i);
                    infos[i] = new ColInfo(names[i], colSrc, type, slotType as VectorDataViewType);
                }

                return(new Bindings(parent, infos, inputSchema, true, names));
            }
        private LegacyCompositeDataLoader(IHost host, TransformEx[] transforms)
        {
            Contracts.AssertValue(host, "host");
            _host = host;
            _host.AssertNonEmpty(transforms);

            View = transforms[transforms.Length - 1].Transform;
            _tview = View as ITransposeDataView;
            var srcLoader = transforms[0].Transform.Source as ILegacyDataLoader;

#if DEBUG
            // Assert that the transforms array is consistent: first one starts with loader,
            // they are chained together, the loader is not a composite.
            for (int i = 1; i < transforms.Length; i++)
                _host.Assert(transforms[i].Transform.Source == transforms[i - 1].Transform, "Transforms are not linked");

            _host.AssertValue(srcLoader, "loader", "Transform chain doesn't start with a loader");
            _host.Assert(!(srcLoader is LegacyCompositeDataLoader), "Can't have composite source loader");
#endif

            _loader = srcLoader;
            _transforms = transforms;
        }
Пример #5
0
        private static void PrintSchema(TextWriter writer, Arguments args, Schema schema, ITransposeDataView transposeDataView)
        {
            Contracts.AssertValue(writer);
            Contracts.AssertValue(args);
            Contracts.AssertValue(schema);
            Contracts.AssertValueOrNull(transposeDataView);
#if !CORECLR
            if (args.ShowJson)
            {
                writer.WriteLine("Json Schema not supported.");
                return;
            }
#endif
            int colLim = schema.Count;

            var itw = new IndentedTextWriter(writer, "  ");
            itw.WriteLine("{0} columns:", colLim);
            using (itw.Nest())
            {
                var names = default(VBuffer <ReadOnlyMemory <char> >);
                for (int col = 0; col < colLim; col++)
                {
                    var name     = schema[col].Name;
                    var type     = schema[col].Type;
                    var slotType = transposeDataView?.GetSlotType(col);
                    itw.WriteLine("{0}: {1}{2}", name, type, slotType == null ? "" : " (T)");

                    bool metaVals = args.ShowMetadataValues;
                    if (metaVals || args.ShowMetadataTypes)
                    {
                        ShowMetadata(itw, schema, col, metaVals);
                        continue;
                    }

                    if (!args.ShowSlots)
                    {
                        continue;
                    }
                    if (!type.IsKnownSizeVector())
                    {
                        continue;
                    }
                    ColumnType typeNames;
                    if ((typeNames = schema[col].Metadata.Schema.GetColumnOrNull(MetadataUtils.Kinds.SlotNames)?.Type) == null)
                    {
                        continue;
                    }
                    if (typeNames.GetVectorSize() != type.GetVectorSize() || !(typeNames.GetItemType() is TextType))
                    {
                        Contracts.Assert(false, "Unexpected slot names type");
                        continue;
                    }
                    schema[col].Metadata.GetValue(MetadataUtils.Kinds.SlotNames, ref names);
                    if (names.Length != type.GetVectorSize())
                    {
                        Contracts.Assert(false, "Unexpected length of slot names vector");
                        continue;
                    }

                    using (itw.Nest())
                    {
                        bool verbose = args.Verbose ?? false;
                        foreach (var kvp in names.Items(all: verbose))
                        {
                            if (verbose || !kvp.Value.IsEmpty)
                            {
                                itw.WriteLine("{0}:{1}", kvp.Key, kvp.Value);
                            }
                        }
                    }
                }
            }
        }
Пример #6
0
        private void SaveTransposedData(IChannel ch, Stream stream, ITransposeDataView data, int[] cols)
        {
            _host.AssertValue(ch);
            ch.AssertValue(stream);
            ch.AssertValue(data);
            ch.AssertNonEmpty(cols);
            ch.Assert(stream.CanSeek);

            // Initialize what we can in the header, though we will not be writing out things in the
            // header until we have confidence that things were written out correctly.
            TransposeLoader.Header header = default(TransposeLoader.Header);
            header.Signature         = TransposeLoader.Header.SignatureValue;
            header.Version           = TransposeLoader.Header.WriterVersion;
            header.CompatibleVersion = TransposeLoader.Header.WriterVersion;
            VectorType slotType = data.TransposeSchema.GetSlotType(cols[0]);

            ch.AssertValue(slotType);
            header.RowCount    = slotType.ValueCount;
            header.ColumnCount = cols.Length;

            // We keep track of the offsets of the start of each sub-IDV, for use in writing out the
            // offsets/length table later.
            List <long> offsets = new List <long>();

            // First write a bunch of zeros at the head, as a placeholder for the header that
            // will go there assuming we can successfully load it. We'll keep this array around
            // for the real marshalling and writing of the header bytes structure.
            byte[] headerBytes = new byte[TransposeLoader.Header.HeaderSize];
            stream.Write(headerBytes, 0, headerBytes.Length);
            offsets.Add(stream.Position);

            // This is a convenient delegate to write out an IDV substream, then save the offsets
            // where writing stopped to the offsets list.
            Action <string, IDataView> viewAction =
                (name, view) =>
            {
                using (var substream = new SubsetStream(stream))
                {
                    _internalSaver.SaveData(substream, view, Utils.GetIdentityPermutation(view.Schema.ColumnCount));
                    substream.Seek(0, SeekOrigin.End);
                    ch.Info("Wrote {0} data view in {1} bytes", name, substream.Length);
                }
                offsets.Add(stream.Position);
            };

            // First write out the no-row data, limited to these columns.
            IDataView subdata = new ChooseColumnsByIndexTransform(_host,
                                                                  new ChooseColumnsByIndexTransform.Arguments()
            {
                Index = cols
            }, data);

            // If we want the "dual mode" row-wise and slot-wise file, don't filter out anything.
            if (!_writeRowData)
            {
                subdata = SkipTakeFilter.Create(_host, new SkipTakeFilter.TakeArguments()
                {
                    Count = 0
                }, subdata);
            }

            string msg = _writeRowData ? "row-wise data, schema, and metadata" : "schema and metadata";

            viewAction(msg, subdata);
            foreach (var col in cols)
            {
                viewAction(data.Schema.GetColumnName(col), new TransposerUtils.SlotDataView(_host, data, col));
            }

            // Wrote out the dataview. Write out the table offset.
            using (var writer = new BinaryWriter(stream, Encoding.UTF8, leaveOpen: true))
            {
                // Format of the table is offset, length, both as 8-byte integers.
                // As it happens we wrote things out as adjacent sub-IDVs, so the
                // length can be derived from the offsets. The first will be the
                // start of the first sub-IDV, and all subsequent entries will be
                // the start/end of the current/next sub-IDV, respectively, so a total
                // of cols.Length + 2 entries.
                ch.Assert(offsets.Count == cols.Length + 2);
                ch.Assert(offsets[offsets.Count - 1] == stream.Position);
                header.SubIdvTableOffset = stream.Position;
                for (int c = 1; c < offsets.Count; ++c)
                {
                    // 8-byte int for offsets, 8-byte int for length.
                    writer.Write(offsets[c - 1]);
                    writer.Write(offsets[c] - offsets[c - 1]);
                }
                header.TailOffset = stream.Position;
                writer.Write(TransposeLoader.Header.TailSignatureValue);

                // Now we are confident that things will work, so write it out.
                unsafe
                {
                    Marshal.Copy(new IntPtr(&header), headerBytes, 0, Marshal.SizeOf(typeof(Header)));
                }
                writer.Seek(0, SeekOrigin.Begin);
                writer.Write(headerBytes);
            }
        }