Exemple #1
0
        public static IDataView Take(this IDataView data, int count)
        {
            // REVIEW: This should take an env as a parameter, not create one.
            var env  = new MLContext();
            var take = SkipTakeFilter.Create(env, new SkipTakeFilter.TakeArguments {
                Count = count
            }, data);

            return(new CacheDataView(env, data, Enumerable.Range(0, data.Schema.Count).ToArray()));
        }
        public static IDataView Take(this IDataView data, int count)
        {
            Contracts.CheckValue(data, nameof(data));
            // REVIEW: This should take an env as a parameter, not create one.
            var env  = new TlcEnvironment(0);
            var take = SkipTakeFilter.Create(env, new SkipTakeFilter.TakeArguments {
                Count = count
            }, data);

            return(CacheCore(take, env));
        }
Exemple #3
0
        public static CommonOutputs.TransformOutput SkipAndTakeFilter(IHostEnvironment env, SkipTakeFilter.Options input)
        {
            Contracts.CheckValue(env, nameof(env));
            var host = env.Register("SkipTakeFilter");

            host.CheckValue(input, nameof(input));
            EntryPointUtils.CheckInputArgs(host, input);
            var xf = SkipTakeFilter.Create(host, input, input.Data);

            return(new CommonOutputs.TransformOutput {
                Model = new TransformModelImpl(env, xf, input.Data), OutputData = xf
            });
        }
        /// <summary>
        /// Preview an effect of the <paramref name="estimator"/> on a given <paramref name="data"/>.
        /// </summary>
        /// <param name="estimator">The estimnator which effect we are previewing</param>
        /// <param name="data">The data view to use for preview</param>
        /// <param name="maxRows">Maximum number of rows to show in preview</param>
        /// <param name="maxTrainingRows">Maximum number of rows to fit the estimator</param>
        public static DataDebuggerPreview Preview(this IEstimator <ITransformer> estimator, IDataView data, int maxRows = DataDebuggerPreview.Defaults.MaxRows,
                                                  int maxTrainingRows = DataDebuggerPreview.Defaults.MaxRows)
        {
            Contracts.CheckValue(estimator, nameof(estimator));
            Contracts.CheckValue(data, nameof(data));
            Contracts.CheckParam(maxRows >= 0, nameof(maxRows));
            Contracts.CheckParam(maxTrainingRows >= 0, nameof(maxTrainingRows));

            var env       = new LocalEnvironment(conc: 1);
            var trainData = SkipTakeFilter.Create(env, new SkipTakeFilter.TakeOptions {
                Count = maxTrainingRows
            }, data);

            return(new DataDebuggerPreview(estimator.Fit(trainData).Transform(data), maxRows));
        }
Exemple #5
0
        private void SaveTransposedData(IChannel ch, Stream stream, ITransposeDataView data, int[] cols)
        {
            _host.AssertValue(ch);
            ch.AssertValue(stream);
            ch.AssertValue(data);
            ch.AssertNonEmpty(cols);
            ch.Assert(stream.CanSeek);

            // Initialize what we can in the header, though we will not be writing out things in the
            // header until we have confidence that things were written out correctly.
            TransposeLoader.Header header = default(TransposeLoader.Header);
            header.Signature         = TransposeLoader.Header.SignatureValue;
            header.Version           = TransposeLoader.Header.WriterVersion;
            header.CompatibleVersion = TransposeLoader.Header.WriterVersion;
            VectorType slotType = data.TransposeSchema.GetSlotType(cols[0]);

            ch.AssertValue(slotType);
            header.RowCount    = slotType.ValueCount;
            header.ColumnCount = cols.Length;

            // We keep track of the offsets of the start of each sub-IDV, for use in writing out the
            // offsets/length table later.
            List <long> offsets = new List <long>();

            // First write a bunch of zeros at the head, as a placeholder for the header that
            // will go there assuming we can successfully load it. We'll keep this array around
            // for the real marshalling and writing of the header bytes structure.
            byte[] headerBytes = new byte[TransposeLoader.Header.HeaderSize];
            stream.Write(headerBytes, 0, headerBytes.Length);
            offsets.Add(stream.Position);

            // This is a convenient delegate to write out an IDV substream, then save the offsets
            // where writing stopped to the offsets list.
            Action <string, IDataView> viewAction =
                (name, view) =>
            {
                using (var substream = new SubsetStream(stream))
                {
                    _internalSaver.SaveData(substream, view, Utils.GetIdentityPermutation(view.Schema.ColumnCount));
                    substream.Seek(0, SeekOrigin.End);
                    ch.Info("Wrote {0} data view in {1} bytes", name, substream.Length);
                }
                offsets.Add(stream.Position);
            };

            // First write out the no-row data, limited to these columns.
            IDataView subdata = new ChooseColumnsByIndexTransform(_host,
                                                                  new ChooseColumnsByIndexTransform.Arguments()
            {
                Index = cols
            }, data);

            // If we want the "dual mode" row-wise and slot-wise file, don't filter out anything.
            if (!_writeRowData)
            {
                subdata = SkipTakeFilter.Create(_host, new SkipTakeFilter.TakeArguments()
                {
                    Count = 0
                }, subdata);
            }

            string msg = _writeRowData ? "row-wise data, schema, and metadata" : "schema and metadata";

            viewAction(msg, subdata);
            foreach (var col in cols)
            {
                viewAction(data.Schema.GetColumnName(col), new TransposerUtils.SlotDataView(_host, data, col));
            }

            // Wrote out the dataview. Write out the table offset.
            using (var writer = new BinaryWriter(stream, Encoding.UTF8, leaveOpen: true))
            {
                // Format of the table is offset, length, both as 8-byte integers.
                // As it happens we wrote things out as adjacent sub-IDVs, so the
                // length can be derived from the offsets. The first will be the
                // start of the first sub-IDV, and all subsequent entries will be
                // the start/end of the current/next sub-IDV, respectively, so a total
                // of cols.Length + 2 entries.
                ch.Assert(offsets.Count == cols.Length + 2);
                ch.Assert(offsets[offsets.Count - 1] == stream.Position);
                header.SubIdvTableOffset = stream.Position;
                for (int c = 1; c < offsets.Count; ++c)
                {
                    // 8-byte int for offsets, 8-byte int for length.
                    writer.Write(offsets[c - 1]);
                    writer.Write(offsets[c] - offsets[c - 1]);
                }
                header.TailOffset = stream.Position;
                writer.Write(TransposeLoader.Header.TailSignatureValue);

                // Now we are confident that things will work, so write it out.
                unsafe
                {
                    Marshal.Copy(new IntPtr(&header), headerBytes, 0, Marshal.SizeOf(typeof(Header)));
                }
                writer.Seek(0, SeekOrigin.Begin);
                writer.Write(headerBytes);
            }
        }
Exemple #6
0
        private void RunCore(IChannel ch)
        {
            Host.AssertValue(ch);
            IDataView data = CreateAndSaveLoader();

            if (!string.IsNullOrWhiteSpace(ImplOptions.Columns))
            {
                var keepColumns = ImplOptions.Columns
                                  .Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries).ToArray();
                if (Utils.Size(keepColumns) > 0)
                {
                    data = ColumnSelectingTransformer.CreateKeep(Host, data, keepColumns);
                }
            }

            IDataSaver saver;

            if (ImplOptions.Saver != null)
            {
                saver = ImplOptions.Saver.CreateComponent(Host);
            }
            else
            {
                saver = new TextSaver(Host, new TextSaver.Arguments()
                {
                    Dense = ImplOptions.Dense
                });
            }
            var cols = new List <int>();

            for (int i = 0; i < data.Schema.Count; i++)
            {
                if (!ImplOptions.KeepHidden && data.Schema[i].IsHidden)
                {
                    continue;
                }
                var type = data.Schema[i].Type;
                if (saver.IsColumnSavable(type))
                {
                    cols.Add(i);
                }
                else
                {
                    ch.Info(MessageSensitivity.Schema, "The column '{0}' will not be written as it has unsavable column type.", data.Schema[i].Name);
                }
            }
            Host.NotSensitive().Check(cols.Count > 0, "No valid columns to save");

            // Send the first N lines to console.
            if (ImplOptions.Rows > 0)
            {
                var args = new SkipTakeFilter.TakeOptions()
                {
                    Count = ImplOptions.Rows
                };
                data = SkipTakeFilter.Create(Host, args, data);
            }
            var textSaver = saver as TextSaver;

            // If it is a text saver, utilize a special utility for this purpose.
            if (textSaver != null)
            {
                textSaver.WriteData(data, true, cols.ToArray());
            }
            else
            {
                using (MemoryStream mem = new MemoryStream())
                {
                    using (Stream wrapStream = new SubsetStream(mem))
                        saver.SaveData(wrapStream, data, cols.ToArray());
                    mem.Seek(0, SeekOrigin.Begin);
                    using (StreamReader reader = new StreamReader(mem))
                    {
                        string result = reader.ReadToEnd();
                        ch.Info(MessageSensitivity.UserData | MessageSensitivity.Schema, result);
                    }
                }
            }
        }