IDataTransform IEvaluator.GetPerInstanceMetrics(RoleMappedData scoredData) { Host.AssertValue(scoredData); var schema = scoredData.Schema; var dataEval = new RoleMappedData(scoredData.Data, GetInputColumnRoles(schema)); return(Evaluator.GetPerInstanceMetrics(dataEval)); }
public bool CanShuffle => true; // The shuffling is even uniformly IID!! :) public OneRowDataView(IHostEnvironment env, DataViewRow row) { Contracts.AssertValue(env); _host = env.Register("OneRowDataView"); _host.AssertValue(row); _host.Assert(Enumerable.Range(0, row.Schema.Count).All(c => row.IsColumnActive(row.Schema[c]))); _row = row; }
private JToken PfaTypeOrNullForColumn(ISchema schema, int col) { _host.AssertValue(schema); _host.Assert(0 <= col && col < schema.ColumnCount); ColumnType type = schema.GetColumnType(col); return(T.PfaTypeOrNullForColumnType(type)); }
private JToken PfaTypeOrNullForColumn(DataViewSchema schema, int col) { _host.AssertValue(schema); _host.Assert(0 <= col && col < schema.Count); DataViewType type = schema[col].Type; return(T.PfaTypeOrNullForColumnType(type)); }
public void Run() { _host.Assert(CanStart()); Stopwatch stopWatch = new Stopwatch(); stopWatch.Start(); // Set all remaining inputs. foreach (var pair in _inputBindingMap) { bool success = _inputBuilder.TrySetValue(pair.Key, BuildParameterValue(pair.Value)); _host.Assert(success); } _host.Assert(_inputBuilder.GetMissingValues().Length == 0); object output; if (IsMacro) { output = _entryPoint.Method.Invoke(null, new object[] { _host, _inputBuilder.GetInstance(), this }); var macroResult = (CommonOutputs.MacroOutput)output; _host.AssertValue(macroResult); _macroNodes = macroResult.Nodes; } else { output = _entryPoint.Method.Invoke(null, new object[] { _host, _inputBuilder.GetInstance() }); foreach (var pair in _outputHelper.ExtractValues(output)) { string tgt; if (_outputMap.TryGetValue(pair.Key, out tgt)) { EntryPointVariable v; bool good = _context.TryGetVariable(tgt, out v); _host.Assert(good); v.SetValue(pair.Value); } } } stopWatch.Stop(); RunTime = stopWatch.Elapsed; IsFinished = true; }
public InputRowBase(IHostEnvironment env, Schema schema, InternalSchemaDefinition schemaDef, Delegate[] peeks, Func <int, bool> predicate) { Contracts.AssertValue(env); Host = env.Register("Row"); Host.AssertValue(schema); Host.AssertValue(schemaDef); Host.AssertValue(peeks); Host.AssertValue(predicate); Host.Assert(schema.ColumnCount == schemaDef.Columns.Length); Host.Assert(schema.ColumnCount == peeks.Length); _colCount = schema.ColumnCount; Schema = schema; _getters = new Delegate[_colCount]; for (int c = 0; c < _colCount; c++) { _getters[c] = predicate(c) ? CreateGetter(schema.GetColumnType(c), schemaDef.Columns[c], peeks[c]) : null; } }
protected void SendTelemetryComponent(IPipe <TelemetryMessage> pipe, SubComponent sub) { Host.AssertValue(pipe); Host.AssertValueOrNull(sub); if (sub.IsGood()) { pipe.Send(TelemetryMessage.CreateTrainer(sub.Kind, sub.SubComponentSettings)); } }
// REVIEW: The converting getter invokes a type conversion delegate on every call, so it's inherently slower // than the 'direct' getter. We don't have good indication of this to the user, and the selection // of affected types is pretty arbitrary (signed integers and bools, but not uints and floats). private Delegate CreateConvertingArrayGetterDelegate <TSrc, TDst>(Delegate peekDel, Func <TSrc, TDst> convert) { var peek = peekDel as Peek <TRow, TSrc[]>; Host.AssertValue(peek); TSrc[] buf = default; return((ValueGetter <VBuffer <TDst> >)((ref VBuffer <TDst> dst) => { peek(GetCurrentRowObject(), Position, ref buf); var n = Utils.Size(buf); dst = new VBuffer <TDst>(n, Utils.Size(dst.Values) < n ? new TDst[n] : dst.Values, dst.Indices); for (int i = 0; i < n; i++) { dst.Values[i] = convert(buf[i]); } })); }
private static ILegacyDataLoader CreateCore(IHost host, ILegacyDataLoader srcLoader, KeyValuePair <string, IComponentFactory <IDataView, IDataTransform> >[] transformArgs) { Contracts.AssertValue(host, "host"); host.AssertValue(srcLoader, "srcLoader"); host.AssertValueOrNull(transformArgs); if (Utils.Size(transformArgs) == 0) { return(srcLoader); }
public ISlotCursor GetSlotCursor(int col) { _host.CheckParam(0 <= col && col < Schema.ColumnCount, nameof(col)); if (TransposeSchema?.GetSlotType(col) == null) { throw _host.ExceptParam(nameof(col), "Bad call to GetSlotCursor on untransposable column '{0}'", Schema.GetColumnName(col)); } _host.AssertValue(_tview); return(_tview.GetSlotCursor(col)); }
public TreeEnsembleFeaturizerBindableMapper(IHostEnvironment env, ModelLoadContext ctx) { Contracts.CheckValue(env, nameof(env)); _host = env.Register(LoaderSignature); _host.AssertValue(ctx); // *** Binary format *** // ensemble ctx.LoadModel <FastTreePredictionWrapper, SignatureLoadModel>(env, out _ensemble, "Ensemble"); _totalLeafCount = CountLeaves(_ensemble); }
private protected IDataView MapLabelsCore <T>(DataViewType type, InPredicate <T> equalsTarget, RoleMappedData data) { Host.AssertValue(type); Host.Assert(type.RawType == typeof(T)); Host.AssertValue(equalsTarget); Host.AssertValue(data); Host.Assert(data.Schema.Label.HasValue); var label = data.Schema.Label.Value; IDataView dataView = data.Data; if (!Args.ImputeMissingLabelsAsNegative) { dataView = new NAFilter(Host, data.Data, false, label.Name); } return(LambdaColumnMapper.Create(Host, "Label mapper", data.Data, label.Name, label.Name, type, BooleanDataViewType.Instance, (in T src, ref bool dst) => dst = equalsTarget(in src) ? true : false)); }
private Voting(IHostEnvironment env, ModelLoadContext ctx) { Contracts.AssertValue(env); _host = env.Register(LoaderSignature); _host.AssertValue(ctx); // *** Binary format *** // int: sizeof(Single) int cbFloat = ctx.Reader.ReadInt32(); _host.CheckDecode(cbFloat == sizeof(Single)); }
private BootstrapSampleTransform(IHost host, ModelLoadContext ctx, IDataView input) : base(host, input) { host.AssertValue(ctx); host.AssertValue(input); // *** Binary format *** // byte: is the compliment sample, that is, an out-of-bag sample // uint: seed0 // uint: seed1 // uint: seed2 // uint: seed3 // byte: input source should be shuffled // int: size of the output pool size _complement = ctx.Reader.ReadBoolByte(); _state = TauswortheHybrid.State.Load(ctx.Reader); _shuffleInput = ctx.Reader.ReadBoolByte(); _poolSize = ctx.Reader.ReadInt32(); Host.CheckDecode(_poolSize >= 0); }
private bool HasCategoricals(SchemaShape.Column col) { _host.AssertValue(col); if (!col.Metadata.TryFindColumn(MetadataUtils.Kinds.CategoricalSlotRanges, out var mcol)) { return(false); } // The indices must be ints and of a definite size vector type. (Definite becuase // metadata has only one value anyway.) return(mcol.Kind == SchemaShape.Column.VectorKind.Vector && mcol.ItemType == NumberType.I4); }
public DataView(IHostEnvironment env, ArrayDataViewBuilder builder, int rowCount) { Contracts.AssertValue(env, "env"); _host = env.Register("ArrayDataView"); _host.AssertValue(builder); _host.Assert(rowCount >= 0); _host.Assert(builder._names.Count == builder._columns.Count); _columns = builder._columns.ToArray(); _schema = new SchemaImpl(_host, _columns.Select(c => c.Type).ToArray(), builder._names.ToArray(), builder); _rowCount = rowCount; }
public void Save(ModelSaveContext ctx) { _host.CheckValue(ctx, nameof(ctx)); ctx.CheckAtModel(); ctx.SetVersionInfo(GetVersionInfo()); // *** Binary format *** // ensemble _host.AssertValue(_ensemble); ctx.SaveModel(_ensemble, "Ensemble"); }
private Delegate MakeGetter(DataViewRow input, ForecastingStateBase state) { _host.AssertValue(input); var srcGetter = input.GetGetter <TInput>(input.Schema[_inputColumnIndex]); ProcessData processData = _parent.WindowSize > 0 ? (ProcessData)state.Process : state.ProcessWithoutBuffer; ValueGetter <VBuffer <float> > valueGetter = (ref VBuffer <float> dst) => { TInput src = default; if (_dontFetchSrcValue) { state.TransformCore(ref src, null, 0, ref dst); return; } srcGetter(ref src); processData(ref src, ref dst); }; return(valueGetter); }
/// <summary> /// Create a filter transform /// </summary> /// <param name="env">The host environment</param> /// <param name="source">The dataview upon which we construct the transform</param> /// <param name="filterFunc">The function by which we transform source to destination columns and decide whether /// to keep the row.</param> /// <param name="initStateAction">The function that is called once per cursor to initialize state. Can be null.</param> /// <param name="inputSchemaDefinition">The schema definition overrides for <typeparamref name="TSrc"/></param> /// <param name="outputSchemaDefinition">The schema definition overrides for <typeparamref name="TDst"/></param> public StatefulFilterTransform(IHostEnvironment env, IDataView source, Func <TSrc, TDst, TState, bool> filterFunc, Action <TState> initStateAction, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) { _host = env.Register(RegistrationName); _host.AssertValue(source, "source"); _host.AssertValue(filterFunc, "filterFunc"); _host.AssertValueOrNull(initStateAction); _host.AssertValueOrNull(inputSchemaDefinition); _host.AssertValueOrNull(outputSchemaDefinition); _source = source; _filterFunc = filterFunc; _initStateAction = initStateAction; _inputSchemaDefinition = inputSchemaDefinition; _typedSource = TypedCursorable <TSrc> .Create(_host, Source, false, inputSchemaDefinition); var outSchema = InternalSchemaDefinition.Create(typeof(TDst), outputSchemaDefinition); _addedSchema = outSchema; _bindings = new ColumnBindings(Source.Schema, DataViewConstructionUtils.GetSchemaColumns(outSchema)); }
public IRowCursor GetRowCursor(Func <int, bool> needCol, IRandom rand = null) { _host.AssertValue(needCol, nameof(needCol)); _host.AssertValueOrNull(rand); // Build out the active state for the input var inputPred = GetDependencies(needCol); var inputRowCursor = Source.GetRowCursor(inputPred, rand); // Build the active state for the output var active = Utils.BuildArray(_mapper.Schema.ColumnCount, needCol); return(new RowCursor(_host, _mapper, inputRowCursor, active)); }
protected void SendTelemetryComponent(IPipe <TelemetryMessage> pipe, IComponentFactory factory) { Host.AssertValue(pipe); Host.AssertValueOrNull(factory); if (factory is ICommandLineComponentFactory commandLineFactory) { pipe.Send(TelemetryMessage.CreateTrainer(commandLineFactory.Name, commandLineFactory.GetSettingsString())); } else { pipe.Send(TelemetryMessage.CreateTrainer("Unknown", "Non-ICommandLineComponentFactory object")); } }
private protected BaseStacking(IHostEnvironment env, string name, ArgumentsBase args) { Contracts.AssertValue(env); env.AssertNonWhiteSpace(name); Host = env.Register(name); Host.AssertValue(args, "args"); Host.CheckUserArg(0 <= args.ValidationDatasetProportion && args.ValidationDatasetProportion < 1, nameof(args.ValidationDatasetProportion), "The validation proportion for stacking should be greater than or equal to 0 and less than 1"); ValidationDatasetProportion = args.ValidationDatasetProportion; BasePredictorType = args.GetPredictorFactory(); Host.CheckValue(BasePredictorType, nameof(BasePredictorType)); }
public void Save(ModelSaveContext ctx) { _host.AssertValue(ctx); ctx.CheckAtModel(); ctx.SetVersionInfo(GetVersionInfo()); var buffer = new TFBuffer(); _session.Graph.ToGraphDef(buffer); ctx.SaveBinaryStream("TFModel", w => { w.WriteByteArray(buffer.ToArray()); }); Contracts.AssertNonEmpty(_inputColNames); ctx.Writer.Write(_inputColNames.Length); foreach (var colName in _inputColNames) { ctx.SaveNonEmptyString(colName); } ctx.SaveNonEmptyString(_outputColName); }
public State(TextSaver parent, TextWriter writer, ValueWriter[] pipes, bool hasHeader) { Contracts.AssertValue(parent); Contracts.AssertValue(parent._host); _host = parent._host; _host.AssertValue(writer); _host.AssertValue(pipes); _dense = parent._forceDense; _sepChar = parent._sepChar; _sepStr = parent._sepStr; _writer = writer; _pipes = pipes; _hasHeader = hasHeader && parent._outputHeader; _mpcoldst = new int[_pipes.Length + 1]; _mpcolslot = new int[_pipes.Length + 1]; _rgch = new char[1024]; _mpslotdst = new int[128]; _mpslotichLim = new int[128]; }
private protected IDataView MapLabelsCore<T>(ColumnType type, InPredicate<T> equalsTarget, RoleMappedData data) { Host.AssertValue(type); Host.Assert(type.RawType == typeof(T)); Host.AssertValue(equalsTarget); Host.AssertValue(data); Host.Assert(data.Schema.Label.HasValue); var lab = data.Schema.Label.Value; InPredicate<T> isMissing; if (!Args.ImputeMissingLabelsAsNegative && Conversions.Instance.TryGetIsNAPredicate(type, out isMissing)) { return LambdaColumnMapper.Create(Host, "Label mapper", data.Data, lab.Name, lab.Name, type, NumberType.Float, (in T src, ref float dst) => dst = equalsTarget(in src) ? 1 : (isMissing(in src) ? float.NaN : default(float))); } return LambdaColumnMapper.Create(Host, "Label mapper", data.Data, lab.Name, lab.Name, type, NumberType.Float, (in T src, ref float dst) => dst = equalsTarget(in src) ? 1 : default(float)); }
private static void TrainCore(IHost host, IDataView input, Arguments args, ref VBuffer <Single> scores) { Contracts.AssertValue(host); host.AssertValue(args); host.AssertValue(input); host.Assert(args.Threshold.HasValue != args.NumSlotsToKeep.HasValue); using (var ch = host.Start("Train")) { ch.Trace("Constructing trainer"); ITrainer trainer = args.Filter.CreateComponent(host); IDataView view = input; ISchema schema = view.Schema; var label = TrainUtils.MatchNameOrDefaultOrNull(ch, schema, nameof(args.LabelColumn), args.LabelColumn, DefaultColumnNames.Label); var feature = TrainUtils.MatchNameOrDefaultOrNull(ch, schema, nameof(args.FeatureColumn), args.FeatureColumn, DefaultColumnNames.Features); var group = TrainUtils.MatchNameOrDefaultOrNull(ch, schema, nameof(args.GroupColumn), args.GroupColumn, DefaultColumnNames.GroupId); var weight = TrainUtils.MatchNameOrDefaultOrNull(ch, schema, nameof(args.WeightColumn), args.WeightColumn, DefaultColumnNames.Weight); var name = TrainUtils.MatchNameOrDefaultOrNull(ch, schema, nameof(args.NameColumn), args.NameColumn, DefaultColumnNames.Name); TrainUtils.AddNormalizerIfNeeded(host, ch, trainer, ref view, feature, args.NormalizeFeatures); ch.Trace("Binding columns"); var customCols = TrainUtils.CheckAndGenerateCustomColumns(ch, args.CustomColumn); var data = new RoleMappedData(view, label, feature, group, weight, name, customCols); var predictor = TrainUtils.Train(host, ch, data, trainer, null, null, 0, args.CacheData); var rfs = predictor as IPredictorWithFeatureWeights <Single>; Contracts.AssertValue(rfs); rfs.GetFeatureWeights(ref scores); ch.Done(); } }
public IEnumerable <Batch> GetBatches(IRandom rand) { Host.Assert(Data != null, "Must call Initialize first!"); Host.AssertValue(rand); using (var ch = Host.Start("Getting batches")) { RoleMappedData dataTest; RoleMappedData dataTrain; // Split the data, if needed. if (!(ValidationDatasetProportion > 0)) { dataTest = dataTrain = Data; } else { // Split the data into train and test sets. string name = Data.Data.Schema.GetTempColumnName(); var args = new GenerateNumberTransform.Arguments(); args.Column = new[] { new GenerateNumberTransform.Column() { Name = name } }; args.Seed = (uint)rand.Next(); var view = new GenerateNumberTransform(Host, args, Data.Data); var viewTest = new RangeFilter(Host, new RangeFilter.Arguments() { Column = name, Max = ValidationDatasetProportion }, view); var viewTrain = new RangeFilter(Host, new RangeFilter.Arguments() { Column = name, Max = ValidationDatasetProportion, Complement = true }, view); dataTest = new RoleMappedData(viewTest, Data.Schema.GetColumnRoleNames()); dataTrain = new RoleMappedData(viewTrain, Data.Schema.GetColumnRoleNames()); } if (BatchSize > 0) { // REVIEW: How should we carve the data into batches? ch.Warning("Batch support is temporarily disabled"); } yield return(new Batch(dataTrain, dataTest)); ch.Done(); } }
private Delegate MakeGetter(IRow input, TState state) { _host.AssertValue(input); var srcGetter = input.GetGetter <TInput>(_inputColumnIndex); ProcessData processData = _parent.WindowSize > 0 ? (ProcessData)state.Process : state.ProcessWithoutBuffer; ValueGetter <VBuffer <double> > valueGetter = (ref VBuffer <double> dst) => { TInput src = default; srcGetter(ref src); processData(ref src, ref dst); }; return(valueGetter); }
internal BaseMultiCombiner(IHostEnvironment env, string name, ModelLoadContext ctx) { Contracts.AssertValue(env); env.AssertNonWhiteSpace(name); Host = env.Register(name); Host.AssertValue(ctx); // *** Binary format *** // int: sizeof(Single) // bool: _normalize int cbFloat = ctx.Reader.ReadInt32(); Host.CheckDecode(cbFloat == sizeof(Single)); Normalize = ctx.Reader.ReadBoolByte(); }
private static byte[] GetBytesFromDataView(IHost host, IDataView lookup, string termColumn, string valueColumn) { Contracts.AssertValue(host); host.AssertValue(lookup); host.AssertNonEmpty(termColumn); host.AssertNonEmpty(valueColumn); int colTerm; int colValue; var schema = lookup.Schema; if (!schema.TryGetColumnIndex(termColumn, out colTerm)) { throw host.ExceptUserArg(nameof(Arguments.TermColumn), "column not found: '{0}'", termColumn); } if (!schema.TryGetColumnIndex(valueColumn, out colValue)) { throw host.ExceptUserArg(nameof(Arguments.ValueColumn), "column not found: '{0}'", valueColumn); } // REVIEW: Should we allow term to be a vector of text (each term in the vector // would map to the same value)? var typeTerm = schema.GetColumnType(colTerm); host.CheckUserArg(typeTerm.IsText, nameof(Arguments.TermColumn), "term column must contain text"); var typeValue = schema.GetColumnType(colValue); var args = new ChooseColumnsTransform.Arguments(); args.Column = new[] { new ChooseColumnsTransform.Column { Name = "Term", Source = termColumn }, new ChooseColumnsTransform.Column { Name = "Value", Source = valueColumn }, }; var view = new ChooseColumnsTransform(host, args, lookup); var saver = new BinarySaver(host, new BinarySaver.Arguments()); using (var strm = new MemoryStream()) { saver.SaveData(strm, view, 0, 1); return(strm.ToArray()); } }