IDataTransform IEvaluator.GetPerInstanceMetrics(RoleMappedData scoredData)
        {
            Host.AssertValue(scoredData);

            var schema   = scoredData.Schema;
            var dataEval = new RoleMappedData(scoredData.Data, GetInputColumnRoles(schema));

            return(Evaluator.GetPerInstanceMetrics(dataEval));
        }
            public bool CanShuffle => true; // The shuffling is even uniformly IID!! :)

            public OneRowDataView(IHostEnvironment env, DataViewRow row)
            {
                Contracts.AssertValue(env);
                _host = env.Register("OneRowDataView");
                _host.AssertValue(row);
                _host.Assert(Enumerable.Range(0, row.Schema.Count).All(c => row.IsColumnActive(row.Schema[c])));

                _row = row;
            }
        private JToken PfaTypeOrNullForColumn(ISchema schema, int col)
        {
            _host.AssertValue(schema);
            _host.Assert(0 <= col && col < schema.ColumnCount);

            ColumnType type = schema.GetColumnType(col);

            return(T.PfaTypeOrNullForColumnType(type));
        }
示例#4
0
        private JToken PfaTypeOrNullForColumn(DataViewSchema schema, int col)
        {
            _host.AssertValue(schema);
            _host.Assert(0 <= col && col < schema.Count);

            DataViewType type = schema[col].Type;

            return(T.PfaTypeOrNullForColumnType(type));
        }
示例#5
0
        public void Run()
        {
            _host.Assert(CanStart());
            Stopwatch stopWatch = new Stopwatch();

            stopWatch.Start();

            // Set all remaining inputs.
            foreach (var pair in _inputBindingMap)
            {
                bool success = _inputBuilder.TrySetValue(pair.Key, BuildParameterValue(pair.Value));
                _host.Assert(success);
            }

            _host.Assert(_inputBuilder.GetMissingValues().Length == 0);
            object output;

            if (IsMacro)
            {
                output = _entryPoint.Method.Invoke(null, new object[] { _host, _inputBuilder.GetInstance(), this });
                var macroResult = (CommonOutputs.MacroOutput)output;
                _host.AssertValue(macroResult);
                _macroNodes = macroResult.Nodes;
            }
            else
            {
                output = _entryPoint.Method.Invoke(null, new object[] { _host, _inputBuilder.GetInstance() });
                foreach (var pair in _outputHelper.ExtractValues(output))
                {
                    string tgt;
                    if (_outputMap.TryGetValue(pair.Key, out tgt))
                    {
                        EntryPointVariable v;
                        bool good = _context.TryGetVariable(tgt, out v);
                        _host.Assert(good);
                        v.SetValue(pair.Value);
                    }
                }
            }

            stopWatch.Stop();
            RunTime    = stopWatch.Elapsed;
            IsFinished = true;
        }
示例#6
0
            public InputRowBase(IHostEnvironment env, Schema schema, InternalSchemaDefinition schemaDef, Delegate[] peeks, Func <int, bool> predicate)
            {
                Contracts.AssertValue(env);
                Host = env.Register("Row");
                Host.AssertValue(schema);
                Host.AssertValue(schemaDef);
                Host.AssertValue(peeks);
                Host.AssertValue(predicate);
                Host.Assert(schema.ColumnCount == schemaDef.Columns.Length);
                Host.Assert(schema.ColumnCount == peeks.Length);

                _colCount = schema.ColumnCount;
                Schema    = schema;
                _getters  = new Delegate[_colCount];
                for (int c = 0; c < _colCount; c++)
                {
                    _getters[c] = predicate(c) ? CreateGetter(schema.GetColumnType(c), schemaDef.Columns[c], peeks[c]) : null;
                }
            }
示例#7
0
            protected void SendTelemetryComponent(IPipe <TelemetryMessage> pipe, SubComponent sub)
            {
                Host.AssertValue(pipe);
                Host.AssertValueOrNull(sub);

                if (sub.IsGood())
                {
                    pipe.Send(TelemetryMessage.CreateTrainer(sub.Kind, sub.SubComponentSettings));
                }
            }
示例#8
0
            // REVIEW: The converting getter invokes a type conversion delegate on every call, so it's inherently slower
            // than the 'direct' getter. We don't have good indication of this to the user, and the selection
            // of affected types is pretty arbitrary (signed integers and bools, but not uints and floats).
            private Delegate CreateConvertingArrayGetterDelegate <TSrc, TDst>(Delegate peekDel, Func <TSrc, TDst> convert)
            {
                var peek = peekDel as Peek <TRow, TSrc[]>;

                Host.AssertValue(peek);
                TSrc[] buf = default;
                return((ValueGetter <VBuffer <TDst> >)((ref VBuffer <TDst> dst) =>
                {
                    peek(GetCurrentRowObject(), Position, ref buf);
                    var n = Utils.Size(buf);
                    dst = new VBuffer <TDst>(n, Utils.Size(dst.Values) < n
                        ? new TDst[n]
                        : dst.Values, dst.Indices);
                    for (int i = 0; i < n; i++)
                    {
                        dst.Values[i] = convert(buf[i]);
                    }
                }));
            }
        private static ILegacyDataLoader CreateCore(IHost host, ILegacyDataLoader srcLoader,
                                                    KeyValuePair <string, IComponentFactory <IDataView, IDataTransform> >[] transformArgs)
        {
            Contracts.AssertValue(host, "host");
            host.AssertValue(srcLoader, "srcLoader");
            host.AssertValueOrNull(transformArgs);

            if (Utils.Size(transformArgs) == 0)
            {
                return(srcLoader);
            }
 public ISlotCursor GetSlotCursor(int col)
 {
     _host.CheckParam(0 <= col && col < Schema.ColumnCount, nameof(col));
     if (TransposeSchema?.GetSlotType(col) == null)
     {
         throw _host.ExceptParam(nameof(col), "Bad call to GetSlotCursor on untransposable column '{0}'",
                                 Schema.GetColumnName(col));
     }
     _host.AssertValue(_tview);
     return(_tview.GetSlotCursor(col));
 }
示例#11
0
        public TreeEnsembleFeaturizerBindableMapper(IHostEnvironment env, ModelLoadContext ctx)
        {
            Contracts.CheckValue(env, nameof(env));
            _host = env.Register(LoaderSignature);
            _host.AssertValue(ctx);

            // *** Binary format ***
            // ensemble

            ctx.LoadModel <FastTreePredictionWrapper, SignatureLoadModel>(env, out _ensemble, "Ensemble");
            _totalLeafCount = CountLeaves(_ensemble);
        }
示例#12
0
        private protected IDataView MapLabelsCore <T>(DataViewType type, InPredicate <T> equalsTarget, RoleMappedData data)
        {
            Host.AssertValue(type);
            Host.Assert(type.RawType == typeof(T));
            Host.AssertValue(equalsTarget);
            Host.AssertValue(data);
            Host.Assert(data.Schema.Label.HasValue);

            var       label    = data.Schema.Label.Value;
            IDataView dataView = data.Data;

            if (!Args.ImputeMissingLabelsAsNegative)
            {
                dataView = new NAFilter(Host, data.Data, false, label.Name);
            }

            return(LambdaColumnMapper.Create(Host, "Label mapper", data.Data,
                                             label.Name, label.Name, type, BooleanDataViewType.Instance,
                                             (in T src, ref bool dst) =>
                                             dst = equalsTarget(in src) ? true : false));
        }
示例#13
0
        private Voting(IHostEnvironment env, ModelLoadContext ctx)
        {
            Contracts.AssertValue(env);
            _host = env.Register(LoaderSignature);
            _host.AssertValue(ctx);

            // *** Binary format ***
            // int: sizeof(Single)
            int cbFloat = ctx.Reader.ReadInt32();

            _host.CheckDecode(cbFloat == sizeof(Single));
        }
        private BootstrapSampleTransform(IHost host, ModelLoadContext ctx, IDataView input)
            : base(host, input)
        {
            host.AssertValue(ctx);
            host.AssertValue(input);

            // *** Binary format ***
            // byte: is the compliment sample, that is, an out-of-bag sample
            // uint: seed0
            // uint: seed1
            // uint: seed2
            // uint: seed3
            // byte: input source should be shuffled
            // int: size of the output pool size

            _complement   = ctx.Reader.ReadBoolByte();
            _state        = TauswortheHybrid.State.Load(ctx.Reader);
            _shuffleInput = ctx.Reader.ReadBoolByte();
            _poolSize     = ctx.Reader.ReadInt32();
            Host.CheckDecode(_poolSize >= 0);
        }
示例#15
0
 private bool HasCategoricals(SchemaShape.Column col)
 {
     _host.AssertValue(col);
     if (!col.Metadata.TryFindColumn(MetadataUtils.Kinds.CategoricalSlotRanges, out var mcol))
     {
         return(false);
     }
     // The indices must be ints and of a definite size vector type. (Definite becuase
     // metadata has only one value anyway.)
     return(mcol.Kind == SchemaShape.Column.VectorKind.Vector &&
            mcol.ItemType == NumberType.I4);
 }
示例#16
0
            public DataView(IHostEnvironment env, ArrayDataViewBuilder builder, int rowCount)
            {
                Contracts.AssertValue(env, "env");
                _host = env.Register("ArrayDataView");

                _host.AssertValue(builder);
                _host.Assert(rowCount >= 0);
                _host.Assert(builder._names.Count == builder._columns.Count);
                _columns  = builder._columns.ToArray();
                _schema   = new SchemaImpl(_host, _columns.Select(c => c.Type).ToArray(), builder._names.ToArray(), builder);
                _rowCount = rowCount;
            }
示例#17
0
        public void Save(ModelSaveContext ctx)
        {
            _host.CheckValue(ctx, nameof(ctx));
            ctx.CheckAtModel();
            ctx.SetVersionInfo(GetVersionInfo());

            // *** Binary format ***
            // ensemble

            _host.AssertValue(_ensemble);
            ctx.SaveModel(_ensemble, "Ensemble");
        }
            private Delegate MakeGetter(DataViewRow input, ForecastingStateBase state)
            {
                _host.AssertValue(input);
                var         srcGetter   = input.GetGetter <TInput>(input.Schema[_inputColumnIndex]);
                ProcessData processData = _parent.WindowSize > 0 ?
                                          (ProcessData)state.Process : state.ProcessWithoutBuffer;

                ValueGetter <VBuffer <float> > valueGetter = (ref VBuffer <float> dst) =>
                {
                    TInput src = default;
                    if (_dontFetchSrcValue)
                    {
                        state.TransformCore(ref src, null, 0, ref dst);
                        return;
                    }

                    srcGetter(ref src);
                    processData(ref src, ref dst);
                };

                return(valueGetter);
            }
示例#19
0
        /// <summary>
        /// Create a filter transform
        /// </summary>
        /// <param name="env">The host environment</param>
        /// <param name="source">The dataview upon which we construct the transform</param>
        /// <param name="filterFunc">The function by which we transform source to destination columns and decide whether
        /// to keep the row.</param>
        /// <param name="initStateAction">The function that is called once per cursor to initialize state. Can be null.</param>
        /// <param name="inputSchemaDefinition">The schema definition overrides for <typeparamref name="TSrc"/></param>
        /// <param name="outputSchemaDefinition">The schema definition overrides for <typeparamref name="TDst"/></param>
        public StatefulFilterTransform(IHostEnvironment env, IDataView source, Func <TSrc, TDst, TState, bool> filterFunc,
                                       Action <TState> initStateAction,
                                       SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null)
        {
            _host = env.Register(RegistrationName);
            _host.AssertValue(source, "source");
            _host.AssertValue(filterFunc, "filterFunc");
            _host.AssertValueOrNull(initStateAction);
            _host.AssertValueOrNull(inputSchemaDefinition);
            _host.AssertValueOrNull(outputSchemaDefinition);

            _source                = source;
            _filterFunc            = filterFunc;
            _initStateAction       = initStateAction;
            _inputSchemaDefinition = inputSchemaDefinition;
            _typedSource           = TypedCursorable <TSrc> .Create(_host, Source, false, inputSchemaDefinition);

            var outSchema = InternalSchemaDefinition.Create(typeof(TDst), outputSchemaDefinition);

            _addedSchema = outSchema;
            _bindings    = new ColumnBindings(Source.Schema, DataViewConstructionUtils.GetSchemaColumns(outSchema));
        }
            public IRowCursor GetRowCursor(Func <int, bool> needCol, IRandom rand = null)
            {
                _host.AssertValue(needCol, nameof(needCol));
                _host.AssertValueOrNull(rand);

                // Build out the active state for the input
                var inputPred      = GetDependencies(needCol);
                var inputRowCursor = Source.GetRowCursor(inputPred, rand);

                // Build the active state for the output
                var active = Utils.BuildArray(_mapper.Schema.ColumnCount, needCol);

                return(new RowCursor(_host, _mapper, inputRowCursor, active));
            }
            protected void SendTelemetryComponent(IPipe <TelemetryMessage> pipe, IComponentFactory factory)
            {
                Host.AssertValue(pipe);
                Host.AssertValueOrNull(factory);

                if (factory is ICommandLineComponentFactory commandLineFactory)
                {
                    pipe.Send(TelemetryMessage.CreateTrainer(commandLineFactory.Name, commandLineFactory.GetSettingsString()));
                }
                else
                {
                    pipe.Send(TelemetryMessage.CreateTrainer("Unknown", "Non-ICommandLineComponentFactory object"));
                }
            }
示例#22
0
        private protected BaseStacking(IHostEnvironment env, string name, ArgumentsBase args)
        {
            Contracts.AssertValue(env);
            env.AssertNonWhiteSpace(name);
            Host = env.Register(name);
            Host.AssertValue(args, "args");
            Host.CheckUserArg(0 <= args.ValidationDatasetProportion && args.ValidationDatasetProportion < 1,
                              nameof(args.ValidationDatasetProportion),
                              "The validation proportion for stacking should be greater than or equal to 0 and less than 1");

            ValidationDatasetProportion = args.ValidationDatasetProportion;
            BasePredictorType           = args.GetPredictorFactory();
            Host.CheckValue(BasePredictorType, nameof(BasePredictorType));
        }
示例#23
0
            public void Save(ModelSaveContext ctx)
            {
                _host.AssertValue(ctx);
                ctx.CheckAtModel();
                ctx.SetVersionInfo(GetVersionInfo());

                var buffer = new TFBuffer();

                _session.Graph.ToGraphDef(buffer);

                ctx.SaveBinaryStream("TFModel", w =>
                {
                    w.WriteByteArray(buffer.ToArray());
                });
                Contracts.AssertNonEmpty(_inputColNames);
                ctx.Writer.Write(_inputColNames.Length);
                foreach (var colName in _inputColNames)
                {
                    ctx.SaveNonEmptyString(colName);
                }

                ctx.SaveNonEmptyString(_outputColName);
            }
示例#24
0
            public State(TextSaver parent, TextWriter writer, ValueWriter[] pipes, bool hasHeader)
            {
                Contracts.AssertValue(parent);
                Contracts.AssertValue(parent._host);
                _host = parent._host;
                _host.AssertValue(writer);
                _host.AssertValue(pipes);

                _dense   = parent._forceDense;
                _sepChar = parent._sepChar;
                _sepStr  = parent._sepStr;

                _writer    = writer;
                _pipes     = pipes;
                _hasHeader = hasHeader && parent._outputHeader;

                _mpcoldst  = new int[_pipes.Length + 1];
                _mpcolslot = new int[_pipes.Length + 1];

                _rgch         = new char[1024];
                _mpslotdst    = new int[128];
                _mpslotichLim = new int[128];
            }
        private protected IDataView MapLabelsCore<T>(ColumnType type, InPredicate<T> equalsTarget, RoleMappedData data)
        {
            Host.AssertValue(type);
            Host.Assert(type.RawType == typeof(T));
            Host.AssertValue(equalsTarget);
            Host.AssertValue(data);
            Host.Assert(data.Schema.Label.HasValue);

            var lab = data.Schema.Label.Value;

            InPredicate<T> isMissing;
            if (!Args.ImputeMissingLabelsAsNegative && Conversions.Instance.TryGetIsNAPredicate(type, out isMissing))
            {
                return LambdaColumnMapper.Create(Host, "Label mapper", data.Data,
                    lab.Name, lab.Name, type, NumberType.Float,
                    (in T src, ref float dst) =>
                        dst = equalsTarget(in src) ? 1 : (isMissing(in src) ? float.NaN : default(float)));
            }
            return LambdaColumnMapper.Create(Host, "Label mapper", data.Data,
                lab.Name, lab.Name, type, NumberType.Float,
                (in T src, ref float dst) =>
                    dst = equalsTarget(in src) ? 1 : default(float));
        }
示例#26
0
        private static void TrainCore(IHost host, IDataView input, Arguments args, ref VBuffer <Single> scores)
        {
            Contracts.AssertValue(host);
            host.AssertValue(args);
            host.AssertValue(input);
            host.Assert(args.Threshold.HasValue != args.NumSlotsToKeep.HasValue);

            using (var ch = host.Start("Train"))
            {
                ch.Trace("Constructing trainer");
                ITrainer trainer = args.Filter.CreateComponent(host);

                IDataView view = input;

                ISchema schema  = view.Schema;
                var     label   = TrainUtils.MatchNameOrDefaultOrNull(ch, schema, nameof(args.LabelColumn), args.LabelColumn, DefaultColumnNames.Label);
                var     feature = TrainUtils.MatchNameOrDefaultOrNull(ch, schema, nameof(args.FeatureColumn), args.FeatureColumn, DefaultColumnNames.Features);
                var     group   = TrainUtils.MatchNameOrDefaultOrNull(ch, schema, nameof(args.GroupColumn), args.GroupColumn, DefaultColumnNames.GroupId);
                var     weight  = TrainUtils.MatchNameOrDefaultOrNull(ch, schema, nameof(args.WeightColumn), args.WeightColumn, DefaultColumnNames.Weight);
                var     name    = TrainUtils.MatchNameOrDefaultOrNull(ch, schema, nameof(args.NameColumn), args.NameColumn, DefaultColumnNames.Name);

                TrainUtils.AddNormalizerIfNeeded(host, ch, trainer, ref view, feature, args.NormalizeFeatures);

                ch.Trace("Binding columns");

                var customCols = TrainUtils.CheckAndGenerateCustomColumns(ch, args.CustomColumn);
                var data       = new RoleMappedData(view, label, feature, group, weight, name, customCols);

                var predictor = TrainUtils.Train(host, ch, data, trainer, null,
                                                 null, 0, args.CacheData);

                var rfs = predictor as IPredictorWithFeatureWeights <Single>;
                Contracts.AssertValue(rfs);
                rfs.GetFeatureWeights(ref scores);
                ch.Done();
            }
        }
        public IEnumerable <Batch> GetBatches(IRandom rand)
        {
            Host.Assert(Data != null, "Must call Initialize first!");
            Host.AssertValue(rand);

            using (var ch = Host.Start("Getting batches"))
            {
                RoleMappedData dataTest;
                RoleMappedData dataTrain;

                // Split the data, if needed.
                if (!(ValidationDatasetProportion > 0))
                {
                    dataTest = dataTrain = Data;
                }
                else
                {
                    // Split the data into train and test sets.
                    string name = Data.Data.Schema.GetTempColumnName();
                    var    args = new GenerateNumberTransform.Arguments();
                    args.Column = new[] { new GenerateNumberTransform.Column()
                                          {
                                              Name = name
                                          } };
                    args.Seed = (uint)rand.Next();
                    var view     = new GenerateNumberTransform(Host, args, Data.Data);
                    var viewTest = new RangeFilter(Host, new RangeFilter.Arguments()
                    {
                        Column = name, Max = ValidationDatasetProportion
                    }, view);
                    var viewTrain = new RangeFilter(Host, new RangeFilter.Arguments()
                    {
                        Column = name, Max = ValidationDatasetProportion, Complement = true
                    }, view);
                    dataTest  = new RoleMappedData(viewTest, Data.Schema.GetColumnRoleNames());
                    dataTrain = new RoleMappedData(viewTrain, Data.Schema.GetColumnRoleNames());
                }

                if (BatchSize > 0)
                {
                    // REVIEW: How should we carve the data into batches?
                    ch.Warning("Batch support is temporarily disabled");
                }

                yield return(new Batch(dataTrain, dataTest));

                ch.Done();
            }
        }
            private Delegate MakeGetter(IRow input, TState state)
            {
                _host.AssertValue(input);
                var         srcGetter   = input.GetGetter <TInput>(_inputColumnIndex);
                ProcessData processData = _parent.WindowSize > 0 ?
                                          (ProcessData)state.Process : state.ProcessWithoutBuffer;
                ValueGetter <VBuffer <double> > valueGetter = (ref VBuffer <double> dst) =>
                {
                    TInput src = default;
                    srcGetter(ref src);
                    processData(ref src, ref dst);
                };

                return(valueGetter);
            }
        internal BaseMultiCombiner(IHostEnvironment env, string name, ModelLoadContext ctx)
        {
            Contracts.AssertValue(env);
            env.AssertNonWhiteSpace(name);
            Host = env.Register(name);
            Host.AssertValue(ctx);

            // *** Binary format ***
            // int: sizeof(Single)
            // bool: _normalize
            int cbFloat = ctx.Reader.ReadInt32();

            Host.CheckDecode(cbFloat == sizeof(Single));
            Normalize = ctx.Reader.ReadBoolByte();
        }
        private static byte[] GetBytesFromDataView(IHost host, IDataView lookup, string termColumn, string valueColumn)
        {
            Contracts.AssertValue(host);
            host.AssertValue(lookup);
            host.AssertNonEmpty(termColumn);
            host.AssertNonEmpty(valueColumn);

            int colTerm;
            int colValue;
            var schema = lookup.Schema;

            if (!schema.TryGetColumnIndex(termColumn, out colTerm))
            {
                throw host.ExceptUserArg(nameof(Arguments.TermColumn), "column not found: '{0}'", termColumn);
            }
            if (!schema.TryGetColumnIndex(valueColumn, out colValue))
            {
                throw host.ExceptUserArg(nameof(Arguments.ValueColumn), "column not found: '{0}'", valueColumn);
            }

            // REVIEW: Should we allow term to be a vector of text (each term in the vector
            // would map to the same value)?
            var typeTerm = schema.GetColumnType(colTerm);

            host.CheckUserArg(typeTerm.IsText, nameof(Arguments.TermColumn), "term column must contain text");
            var typeValue = schema.GetColumnType(colValue);

            var args = new ChooseColumnsTransform.Arguments();

            args.Column = new[] {
                new ChooseColumnsTransform.Column {
                    Name = "Term", Source = termColumn
                },
                new ChooseColumnsTransform.Column {
                    Name = "Value", Source = valueColumn
                },
            };
            var view = new ChooseColumnsTransform(host, args, lookup);

            var saver = new BinarySaver(host, new BinarySaver.Arguments());

            using (var strm = new MemoryStream())
            {
                saver.SaveData(strm, view, 0, 1);
                return(strm.ToArray());
            }
        }