public static IDataView Take(this IDataView data, int count) { // REVIEW: This should take an env as a parameter, not create one. var env = new MLContext(); var take = SkipTakeFilter.Create(env, new SkipTakeFilter.TakeArguments { Count = count }, data); return(new CacheDataView(env, data, Enumerable.Range(0, data.Schema.Count).ToArray())); }
public static IDataView Take(this IDataView data, int count) { Contracts.CheckValue(data, nameof(data)); // REVIEW: This should take an env as a parameter, not create one. var env = new TlcEnvironment(0); var take = SkipTakeFilter.Create(env, new SkipTakeFilter.TakeArguments { Count = count }, data); return(CacheCore(take, env)); }
public static CommonOutputs.TransformOutput SkipAndTakeFilter(IHostEnvironment env, SkipTakeFilter.Options input) { Contracts.CheckValue(env, nameof(env)); var host = env.Register("SkipTakeFilter"); host.CheckValue(input, nameof(input)); EntryPointUtils.CheckInputArgs(host, input); var xf = SkipTakeFilter.Create(host, input, input.Data); return(new CommonOutputs.TransformOutput { Model = new TransformModelImpl(env, xf, input.Data), OutputData = xf }); }
/// <summary> /// Preview an effect of the <paramref name="estimator"/> on a given <paramref name="data"/>. /// </summary> /// <param name="estimator">The estimnator which effect we are previewing</param> /// <param name="data">The data view to use for preview</param> /// <param name="maxRows">Maximum number of rows to show in preview</param> /// <param name="maxTrainingRows">Maximum number of rows to fit the estimator</param> public static DataDebuggerPreview Preview(this IEstimator <ITransformer> estimator, IDataView data, int maxRows = DataDebuggerPreview.Defaults.MaxRows, int maxTrainingRows = DataDebuggerPreview.Defaults.MaxRows) { Contracts.CheckValue(estimator, nameof(estimator)); Contracts.CheckValue(data, nameof(data)); Contracts.CheckParam(maxRows >= 0, nameof(maxRows)); Contracts.CheckParam(maxTrainingRows >= 0, nameof(maxTrainingRows)); var env = new LocalEnvironment(conc: 1); var trainData = SkipTakeFilter.Create(env, new SkipTakeFilter.TakeOptions { Count = maxTrainingRows }, data); return(new DataDebuggerPreview(estimator.Fit(trainData).Transform(data), maxRows)); }
private void SaveTransposedData(IChannel ch, Stream stream, ITransposeDataView data, int[] cols) { _host.AssertValue(ch); ch.AssertValue(stream); ch.AssertValue(data); ch.AssertNonEmpty(cols); ch.Assert(stream.CanSeek); // Initialize what we can in the header, though we will not be writing out things in the // header until we have confidence that things were written out correctly. TransposeLoader.Header header = default(TransposeLoader.Header); header.Signature = TransposeLoader.Header.SignatureValue; header.Version = TransposeLoader.Header.WriterVersion; header.CompatibleVersion = TransposeLoader.Header.WriterVersion; VectorType slotType = data.TransposeSchema.GetSlotType(cols[0]); ch.AssertValue(slotType); header.RowCount = slotType.ValueCount; header.ColumnCount = cols.Length; // We keep track of the offsets of the start of each sub-IDV, for use in writing out the // offsets/length table later. List <long> offsets = new List <long>(); // First write a bunch of zeros at the head, as a placeholder for the header that // will go there assuming we can successfully load it. We'll keep this array around // for the real marshalling and writing of the header bytes structure. byte[] headerBytes = new byte[TransposeLoader.Header.HeaderSize]; stream.Write(headerBytes, 0, headerBytes.Length); offsets.Add(stream.Position); // This is a convenient delegate to write out an IDV substream, then save the offsets // where writing stopped to the offsets list. Action <string, IDataView> viewAction = (name, view) => { using (var substream = new SubsetStream(stream)) { _internalSaver.SaveData(substream, view, Utils.GetIdentityPermutation(view.Schema.ColumnCount)); substream.Seek(0, SeekOrigin.End); ch.Info("Wrote {0} data view in {1} bytes", name, substream.Length); } offsets.Add(stream.Position); }; // First write out the no-row data, limited to these columns. IDataView subdata = new ChooseColumnsByIndexTransform(_host, new ChooseColumnsByIndexTransform.Arguments() { Index = cols }, data); // If we want the "dual mode" row-wise and slot-wise file, don't filter out anything. if (!_writeRowData) { subdata = SkipTakeFilter.Create(_host, new SkipTakeFilter.TakeArguments() { Count = 0 }, subdata); } string msg = _writeRowData ? "row-wise data, schema, and metadata" : "schema and metadata"; viewAction(msg, subdata); foreach (var col in cols) { viewAction(data.Schema.GetColumnName(col), new TransposerUtils.SlotDataView(_host, data, col)); } // Wrote out the dataview. Write out the table offset. using (var writer = new BinaryWriter(stream, Encoding.UTF8, leaveOpen: true)) { // Format of the table is offset, length, both as 8-byte integers. // As it happens we wrote things out as adjacent sub-IDVs, so the // length can be derived from the offsets. The first will be the // start of the first sub-IDV, and all subsequent entries will be // the start/end of the current/next sub-IDV, respectively, so a total // of cols.Length + 2 entries. ch.Assert(offsets.Count == cols.Length + 2); ch.Assert(offsets[offsets.Count - 1] == stream.Position); header.SubIdvTableOffset = stream.Position; for (int c = 1; c < offsets.Count; ++c) { // 8-byte int for offsets, 8-byte int for length. writer.Write(offsets[c - 1]); writer.Write(offsets[c] - offsets[c - 1]); } header.TailOffset = stream.Position; writer.Write(TransposeLoader.Header.TailSignatureValue); // Now we are confident that things will work, so write it out. unsafe { Marshal.Copy(new IntPtr(&header), headerBytes, 0, Marshal.SizeOf(typeof(Header))); } writer.Seek(0, SeekOrigin.Begin); writer.Write(headerBytes); } }
private void RunCore(IChannel ch) { Host.AssertValue(ch); IDataView data = CreateAndSaveLoader(); if (!string.IsNullOrWhiteSpace(ImplOptions.Columns)) { var keepColumns = ImplOptions.Columns .Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries).ToArray(); if (Utils.Size(keepColumns) > 0) { data = ColumnSelectingTransformer.CreateKeep(Host, data, keepColumns); } } IDataSaver saver; if (ImplOptions.Saver != null) { saver = ImplOptions.Saver.CreateComponent(Host); } else { saver = new TextSaver(Host, new TextSaver.Arguments() { Dense = ImplOptions.Dense }); } var cols = new List <int>(); for (int i = 0; i < data.Schema.Count; i++) { if (!ImplOptions.KeepHidden && data.Schema[i].IsHidden) { continue; } var type = data.Schema[i].Type; if (saver.IsColumnSavable(type)) { cols.Add(i); } else { ch.Info(MessageSensitivity.Schema, "The column '{0}' will not be written as it has unsavable column type.", data.Schema[i].Name); } } Host.NotSensitive().Check(cols.Count > 0, "No valid columns to save"); // Send the first N lines to console. if (ImplOptions.Rows > 0) { var args = new SkipTakeFilter.TakeOptions() { Count = ImplOptions.Rows }; data = SkipTakeFilter.Create(Host, args, data); } var textSaver = saver as TextSaver; // If it is a text saver, utilize a special utility for this purpose. if (textSaver != null) { textSaver.WriteData(data, true, cols.ToArray()); } else { using (MemoryStream mem = new MemoryStream()) { using (Stream wrapStream = new SubsetStream(mem)) saver.SaveData(wrapStream, data, cols.ToArray()); mem.Seek(0, SeekOrigin.Begin); using (StreamReader reader = new StreamReader(mem)) { string result = reader.ReadToEnd(); ch.Info(MessageSensitivity.UserData | MessageSensitivity.Schema, result); } } } }