internal Row GetStatefulRows(Row input, IRowToRowMapper mapper, Func <int, bool> active, List <StatefulRow> rows, out Action disposer) { Contracts.CheckValue(input, nameof(input)); Contracts.CheckValue(active, nameof(active)); disposer = null; IRowToRowMapper[] innerMappers = new IRowToRowMapper[0]; if (mapper is CompositeRowToRowMapper) { innerMappers = ((CompositeRowToRowMapper)mapper).InnerMappers; } if (innerMappers.Length == 0) { bool differentActive = false; for (int c = 0; c < input.Schema.ColumnCount; ++c) { bool wantsActive = active(c); bool isActive = input.IsColumnActive(c); differentActive |= wantsActive != isActive; if (wantsActive && !isActive) { throw Contracts.ExceptParam(nameof(input), $"Mapper required column '{input.Schema.GetColumnName(c)}' active but it was not."); } } var row = mapper.GetRow(input, active, out disposer); if (row is StatefulRow statefulRow) { rows.Add(statefulRow); } return(row); } // For each of the inner mappers, we will be calling their GetRow method, but to do so we need to know // what we need from them. The last one will just have the input, but the rest will need to be // computed based on the dependencies of the next one in the chain. var deps = new Func <int, bool> [innerMappers.Length]; deps[deps.Length - 1] = active; for (int i = deps.Length - 1; i >= 1; --i) { deps[i - 1] = innerMappers[i].GetDependencies(deps[i]); } Row result = input; for (int i = 0; i < innerMappers.Length; ++i) { Action localDisp; result = GetStatefulRows(result, innerMappers[i], deps[i], rows, out localDisp); if (result is StatefulRow statefulResult) { rows.Add(statefulResult); } if (localDisp != null) { if (disposer == null) { disposer = localDisp; } else { disposer = localDisp + disposer; } // We want the last disposer to be called first, so the order of the addition here is important. } } return(result); }
public static ValueGetter <ulong> GetOptGroupGetter(this Row row, RoleMappedData data) { Contracts.CheckValue(data, nameof(data)); return(GetOptGroupGetter(row, data.Schema)); }
// REVIEW: It would be nice to support propagation of select metadata. public static IDataView Create <TSrc, TDst>(IHostEnvironment env, string name, IDataView input, string src, string dst, DataViewType typeSrc, DataViewType typeDst, ValueMapper <TSrc, TDst> mapper, ValueGetter <VBuffer <ReadOnlyMemory <char> > > keyValueGetter = null, ValueGetter <VBuffer <ReadOnlyMemory <char> > > slotNamesGetter = null) { Contracts.CheckValue(env, nameof(env)); env.CheckNonEmpty(name, nameof(name)); env.CheckValue(input, nameof(input)); env.CheckNonEmpty(src, nameof(src)); env.CheckNonEmpty(dst, nameof(dst)); env.CheckValue(typeSrc, nameof(typeSrc)); env.CheckValue(typeDst, nameof(typeDst)); env.CheckValue(mapper, nameof(mapper)); env.Check(keyValueGetter == null || typeDst.GetItemType() is KeyType); env.Check(slotNamesGetter == null || typeDst.IsKnownSizeVector()); if (typeSrc.RawType != typeof(TSrc)) { throw env.ExceptParam(nameof(mapper), "The source column type '{0}' doesn't match the input type of the mapper", typeSrc); } if (typeDst.RawType != typeof(TDst)) { throw env.ExceptParam(nameof(mapper), "The destination column type '{0}' doesn't match the output type of the mapper", typeDst); } bool tmp = input.Schema.TryGetColumnIndex(src, out int colSrc); if (!tmp) { throw env.ExceptParam(nameof(src), "The input data doesn't have a column named '{0}'", src); } var typeOrig = input.Schema[colSrc].Type; // REVIEW: Ideally this should support vector-type conversion. It currently doesn't. bool ident; Delegate conv; if (typeOrig.SameSizeAndItemType(typeSrc)) { ident = true; conv = null; } else if (!Conversions.Instance.TryGetStandardConversion(typeOrig, typeSrc, out conv, out ident)) { throw env.ExceptParam(nameof(mapper), "The type of column '{0}', '{1}', cannot be converted to the input type of the mapper '{2}'", src, typeOrig, typeSrc); } var col = new Column(src, dst); IDataView impl; if (ident) { impl = new Impl <TSrc, TDst, TDst>(env, name, input, col, typeDst, mapper, keyValueGetter: keyValueGetter, slotNamesGetter: slotNamesGetter); } else { Func <IHostEnvironment, string, IDataView, Column, DataViewType, ValueMapper <int, int>, ValueMapper <int, int>, ValueGetter <VBuffer <ReadOnlyMemory <char> > >, ValueGetter <VBuffer <ReadOnlyMemory <char> > >, Impl <int, int, int> > del = CreateImpl <int, int, int>; var meth = del.GetMethodInfo().GetGenericMethodDefinition() .MakeGenericMethod(typeOrig.RawType, typeof(TSrc), typeof(TDst)); impl = (IDataView)meth.Invoke(null, new object[] { env, name, input, col, typeDst, conv, mapper, keyValueGetter, slotNamesGetter }); } return(new OpaqueDataView(impl)); }
internal static ITrainer CreateTrainer <TArgs>(this IHostEnvironment env, TArgs arguments, out string loadName) where TArgs : class, new() { Contracts.CheckValue(env, nameof(env)); return(CreateCore <ITrainer, TArgs, SignatureTrainer>(env, arguments, out loadName)); }
/// <summary> /// Get the getter for the feature column, assuming it is a vector of float. /// </summary> public static ValueGetter <VBuffer <float> > GetFeatureFloatVectorGetter(this Row row, RoleMappedData data) { Contracts.CheckValue(data, nameof(data)); return(GetFeatureFloatVectorGetter(row, data.Schema)); }
/// <summary> /// Creates a data saver from the 'LoadName{settings}' string. /// </summary> public static IDataSaver CreateSaver(this IHostEnvironment env, string settings) { Contracts.CheckValue(env, nameof(env)); return(CreateCore <IDataSaver>(env, typeof(SignatureDataSaver), settings)); }
internal static IEvaluator CreateEvaluator(this IHostEnvironment env, string settings) { Contracts.CheckValue(env, nameof(env)); env.CheckNonWhiteSpace(settings, nameof(settings)); return(CreateCore <IEvaluator>(env, typeof(SignatureEvaluator), settings)); }
/// <summary> /// Run a TensorFlow model provided through <paramref name="tensorFlowModel"/> on the input column and extract one output column. /// The inputs and outputs are matched to TensorFlow graph nodes by name. /// </summary> public static Vector <float> ApplyTensorFlowGraph(this Vector <float> input, TensorFlowModelInfo tensorFlowModel) { Contracts.CheckValue(input, nameof(input)); Contracts.CheckValue(tensorFlowModel, nameof(tensorFlowModel)); return(new OutColumn(input, tensorFlowModel)); }
internal static IDataTransform Create(IHostEnvironment env, Options options, IDataView input) { Contracts.CheckValue(env, nameof(env)); var h = env.Register(RegistrationName); h.CheckValue(options, nameof(options)); h.CheckValue(input, nameof(input)); h.CheckUserArg(Utils.Size(options.Columns) > 0, nameof(options.Columns), "Columns must be specified"); // Compose the WordBagTransform from a tokenize transform, // followed by a NgramExtractionTransform. // Since WordBagTransform is a many-to-one column transform, for each // WordBagTransform.Column with multiple sources, we first apply a ConcatTransform. // REVIEW: In order to not get ngrams that cross between vector slots, we need to // enable tokenize transforms to insert a special token between slots. // REVIEW: In order to make it possible to output separate bags for different columns // using the same dictionary, we need to find a way to make ConcatTransform remember the boundaries. var tokenizeColumns = new WordTokenizingEstimator.ColumnOptions[options.Columns.Length]; var extractorArgs = new NgramExtractorTransform.Options() { MaxNumTerms = options.MaxNumTerms, NgramLength = options.NgramLength, SkipLength = options.SkipLength, AllLengths = options.AllLengths, Weighting = options.Weighting, Columns = new NgramExtractorTransform.Column[options.Columns.Length] }; for (int iinfo = 0; iinfo < options.Columns.Length; iinfo++) { var column = options.Columns[iinfo]; h.CheckUserArg(!string.IsNullOrWhiteSpace(column.Name), nameof(column.Name)); h.CheckUserArg(Utils.Size(column.Source) > 0, nameof(column.Source)); h.CheckUserArg(column.Source.All(src => !string.IsNullOrWhiteSpace(src)), nameof(column.Source)); tokenizeColumns[iinfo] = new WordTokenizingEstimator.ColumnOptions(column.Name, column.Source.Length > 1 ? column.Name : column.Source[0]); extractorArgs.Columns[iinfo] = new NgramExtractorTransform.Column() { Name = column.Name, Source = column.Name, MaxNumTerms = column.MaxNumTerms, NgramLength = column.NgramLength, SkipLength = column.SkipLength, Weighting = column.Weighting, AllLengths = column.AllLengths }; } IDataView view = input; view = NgramExtractionUtils.ApplyConcatOnSources(h, options.Columns, view); view = new WordTokenizingEstimator(env, tokenizeColumns).Fit(view).Transform(view); return(NgramExtractorTransform.Create(h, extractorArgs, view)); }
/// <include file='doc.xml' path='doc/members/member[@name="WordEmbeddings"]/*' /> /// <param name="input">Vector of tokenized text.</param> /// <param name="customModelFile">The custom word embedding model file.</param> public static Vector <float> WordEmbeddings(this VarVector <string> input, string customModelFile) { Contracts.CheckValue(input, nameof(input)); return(new OutColumn(input, customModelFile)); }
// REVIEW: this method only covers one use case of using TensorFlow models: consuming one // input and producing one output of floats. // We could consider selectively adding some more extensions to enable common scenarios. /// <summary> /// Load the TensorFlow model from <paramref name="modelFile"/> and run it on the input column and extract one output column. /// The inputs and outputs are matched to TensorFlow graph nodes by name. /// </summary> public static Vector <float> ApplyTensorFlowGraph(this Vector <float> input, string modelFile) { Contracts.CheckValue(input, nameof(input)); Contracts.CheckNonEmpty(modelFile, nameof(modelFile)); return(new OutColumn(input, modelFile)); }
/// <include file='doc.xml' path='doc/members/member[@name="WordEmbeddings"]/*' /> /// <param name="input">Vector of tokenized text.</param> /// <param name="modelKind">The pretrained word embedding model.</param> /// <returns></returns> public static Vector <float> WordEmbeddings(this VarVector <string> input, WordEmbeddingsTransform.PretrainedModelKind modelKind = WordEmbeddingsTransform.PretrainedModelKind.Sswe) { Contracts.CheckValue(input, nameof(input)); return(new OutColumn(input, modelKind)); }
private static Type GetRawType(PrimitiveType itemType) { Contracts.CheckValue(itemType, nameof(itemType)); return(typeof(VBuffer <>).MakeGenericType(itemType.RawType)); }
/// <summary> /// Returns true iff the given type is valid for a <see cref="KeyType"/>. The valid ones are /// <see cref="byte"/>, <see cref="ushort"/>, <see cref="uint"/>, and <see cref="ulong"/>, that is, the unsigned integer types. /// </summary> public static bool IsValidDataType(Type type) { Contracts.CheckValue(type, nameof(type)); return(type == typeof(byte) || type == typeof(ushort) || type == typeof(uint) || type == typeof(ulong)); }
/// <summary> /// Returns the feature selection scores for each slot of each column. /// </summary> /// <param name="env">The host environment.</param> /// <param name="input">The input dataview.</param> /// <param name="columns">The columns for which to compute the feature selection scores.</param> /// <param name="colSizes">Outputs an array containing the vector sizes of the input columns</param> /// <returns>A list of scores.</returns> public static long[][] Train(IHostEnvironment env, IDataView input, string[] columns, out int[] colSizes) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(input, nameof(input)); env.CheckParam(Utils.Size(columns) > 0, nameof(columns)); var schema = input.Schema; var size = columns.Length; var activeInput = new bool[schema.ColumnCount]; var colSrcs = new int[size]; var colTypes = new ColumnType[size]; colSizes = new int[size]; for (int i = 0; i < size; i++) { int colSrc; var colName = columns[i]; if (!schema.TryGetColumnIndex(colName, out colSrc)) { throw env.ExceptUserArg(nameof(CountFeatureSelectingTransformer.Arguments.Column), "Source column '{0}' not found", colName); } var colType = schema.GetColumnType(colSrc); if (colType.IsVector && !colType.IsKnownSizeVector) { throw env.ExceptUserArg(nameof(CountFeatureSelectingTransformer.Arguments.Column), "Variable length column '{0}' is not allowed", colName); } activeInput[colSrc] = true; colSrcs[i] = colSrc; colTypes[i] = colType; colSizes[i] = colType.ValueCount; } var aggregators = new CountAggregator[size]; long rowCur = 0; double rowCount = input.GetRowCount() ?? double.NaN; using (var pch = env.StartProgressChannel("Aggregating counts")) using (var cursor = input.GetRowCursor(col => activeInput[col])) { var header = new ProgressHeader(new[] { "rows" }); pch.SetHeader(header, e => { e.SetProgress(0, rowCur, rowCount); }); for (int i = 0; i < size; i++) { if (colTypes[i].IsVector) { aggregators[i] = GetVecAggregator(cursor, colTypes[i], colSrcs[i]); } else { aggregators[i] = GetOneAggregator(cursor, colTypes[i], colSrcs[i]); } } while (cursor.MoveNext()) { for (int i = 0; i < size; i++) { aggregators[i].ProcessValue(); } rowCur++; } pch.Checkpoint(rowCur); } return(aggregators.Select(a => a.Count).ToArray()); }
internal static IDataTransform Create(IHostEnvironment env, Options options, IDataView input, TermLoaderArguments termLoaderArgs = null) { Contracts.CheckValue(env, nameof(env)); var h = env.Register(LoaderSignature); h.CheckValue(options, nameof(options)); h.CheckValue(input, nameof(input)); h.CheckUserArg(Utils.Size(options.Columns) > 0, nameof(options.Columns), "Columns must be specified"); IDataView view = input; var termCols = new List <Column>(); var isTermCol = new bool[options.Columns.Length]; for (int i = 0; i < options.Columns.Length; i++) { var col = options.Columns[i]; h.CheckNonWhiteSpace(col.Name, nameof(col.Name)); h.CheckNonWhiteSpace(col.Source, nameof(col.Source)); int colId; if (input.Schema.TryGetColumnIndex(col.Source, out colId) && input.Schema[colId].Type.GetItemType() is TextDataViewType) { termCols.Add(col); isTermCol[i] = true; } } // If the column types of args.column are text, apply term transform to convert them to keys. // Otherwise, skip term transform and apply ngram transform directly. // This logic allows NgramExtractorTransform to handle both text and key input columns. // Note: ngram transform handles the validation of the types natively (in case the types // of args.column are not text nor keys). if (termCols.Count > 0) { ValueToKeyMappingTransformer.Options termArgs = null; string[] missingDropColumns = null; if (termLoaderArgs != null) { termArgs = new ValueToKeyMappingTransformer.Options() { MaxNumTerms = int.MaxValue, Term = termLoaderArgs.Term, Terms = termLoaderArgs.Terms, DataFile = termLoaderArgs.DataFile, Loader = termLoaderArgs.Loader, TermsColumn = termLoaderArgs.TermsColumn, Sort = termLoaderArgs.Sort, Columns = new ValueToKeyMappingTransformer.Column[termCols.Count] }; if (termLoaderArgs.DropUnknowns) { missingDropColumns = new string[termCols.Count]; } } else { termArgs = new ValueToKeyMappingTransformer.Options() { MaxNumTerms = Utils.Size(options.MaxNumTerms) > 0 ? options.MaxNumTerms[0] : NgramExtractingEstimator.Defaults.MaxNumTerms, Columns = new ValueToKeyMappingTransformer.Column[termCols.Count] }; } for (int iinfo = 0; iinfo < termCols.Count; iinfo++) { var column = termCols[iinfo]; termArgs.Columns[iinfo] = new ValueToKeyMappingTransformer.Column() { Name = column.Name, Source = column.Source, MaxNumTerms = Utils.Size(column.MaxNumTerms) > 0 ? column.MaxNumTerms[0] : default(int?) }; if (missingDropColumns != null) { missingDropColumns[iinfo] = column.Name; } } view = ValueToKeyMappingTransformer.Create(h, termArgs, view); if (missingDropColumns != null) { view = new MissingValueDroppingTransformer(h, missingDropColumns.Select(x => (x, x)).ToArray()).Transform(view); } } var ngramColumns = new NgramExtractingEstimator.ColumnOptions[options.Columns.Length]; for (int iinfo = 0; iinfo < options.Columns.Length; iinfo++) { var column = options.Columns[iinfo]; ngramColumns[iinfo] = new NgramExtractingEstimator.ColumnOptions(column.Name, column.NgramLength ?? options.NgramLength, column.SkipLength ?? options.SkipLength, column.AllLengths ?? options.AllLengths, column.Weighting ?? options.Weighting, column.MaxNumTerms ?? options.MaxNumTerms, isTermCol[iinfo] ? column.Name : column.Source ); } return(new NgramExtractingEstimator(env, ngramColumns).Fit(view).Transform(view) as IDataTransform); }
/// <summary> /// Creates a data saver from the arguments object. /// </summary> public static IDataSaver CreateSaver <TArgs>(this IHostEnvironment env, TArgs arguments) where TArgs : class, new() { Contracts.CheckValue(env, nameof(env)); return(CreateCore <IDataSaver, TArgs, SignatureDataSaver>(env, arguments)); }
public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input) { Contracts.CheckValue(env, nameof(env)); var host = env.Register("Tree Featurizer Transform"); host.CheckValue(args, nameof(args)); host.CheckValue(input, nameof(input)); host.CheckUserArg(!string.IsNullOrWhiteSpace(args.TrainedModelFile) || args.Trainer.IsGood(), nameof(args.TrainedModelFile), "Please specify either a trainer or an input model file."); host.CheckUserArg(!string.IsNullOrEmpty(args.FeatureColumn), nameof(args.FeatureColumn), "Transform needs an input features column"); IDataTransform xf; using (var ch = host.Start("Create Tree Ensemble Scorer")) { var scorerArgs = new TreeEnsembleFeaturizerBindableMapper.Arguments() { Suffix = args.Suffix }; if (!string.IsNullOrWhiteSpace(args.TrainedModelFile)) { if (args.Trainer.IsGood()) { ch.Warning("Both an input model and a trainer were specified. Using the model file."); } ch.Trace("Loading model"); IPredictor predictor; using (Stream strm = new FileStream(args.TrainedModelFile, FileMode.Open, FileAccess.Read)) using (var rep = RepositoryReader.Open(strm, ch)) ModelLoadContext.LoadModel <IPredictor, SignatureLoadModel>(host, out predictor, rep, ModelFileUtils.DirPredictor); ch.Trace("Creating scorer"); var data = TrainAndScoreTransform.CreateDataFromArgs(ch, input, args); // Make sure that the given predictor has the correct number of input features. if (predictor is CalibratedPredictorBase) { predictor = ((CalibratedPredictorBase)predictor).SubPredictor; } // Predictor should be a FastTreePredictionWrapper, which implements IValueMapper, so this should // be non-null. var vm = predictor as IValueMapper; ch.CheckUserArg(vm != null, nameof(args.TrainedModelFile), "Predictor in model file does not have compatible type"); if (vm.InputType.VectorSize != data.Schema.Feature.Type.VectorSize) { throw ch.ExceptUserArg(nameof(args.TrainedModelFile), "Predictor in model file expects {0} features, but data has {1} features", vm.InputType.VectorSize, data.Schema.Feature.Type.VectorSize); } var bindable = new TreeEnsembleFeaturizerBindableMapper(env, scorerArgs, predictor); var bound = bindable.Bind(env, data.Schema); xf = new GenericScorer(env, scorerArgs, input, bound, data.Schema); } else { ch.Assert(args.Trainer.IsGood()); ch.Trace("Creating TrainAndScoreTransform"); string scorerSettings = CmdParser.GetSettings(ch, scorerArgs, new TreeEnsembleFeaturizerBindableMapper.Arguments()); var scorer = new SubComponent <IDataScorerTransform, SignatureDataScorer>( TreeEnsembleFeaturizerBindableMapper.LoadNameShort, scorerSettings); var trainScoreArgs = new TrainAndScoreTransform.Arguments(); args.CopyTo(trainScoreArgs); trainScoreArgs.Trainer = new SubComponent <ITrainer, SignatureTrainer>(args.Trainer.Kind, args.Trainer.Settings); var labelInput = AppendLabelTransform(host, ch, input, trainScoreArgs.LabelColumn, args.LabelPermutationSeed); trainScoreArgs.Scorer = scorer; var scoreXf = TrainAndScoreTransform.Create(host, trainScoreArgs, labelInput); if (input == labelInput) { return(scoreXf); } return((IDataTransform)ApplyTransformUtils.ApplyAllTransformsToData(host, scoreXf, input, labelInput)); } ch.Done(); } return(xf); }
/// <summary> /// Create a new data view which is obtained by appending all columns of all the source data views. /// If the data views are of different length, the resulting data view will have the length equal to the /// length of the shortest source. /// </summary> /// <param name="env">The host environment to use.</param> /// <param name="sources">A non-empty collection of data views to zip together.</param> /// <returns>The resulting data view.</returns> public static IDataView Zip(this IHostEnvironment env, IEnumerable <IDataView> sources) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(sources, nameof(sources)); return(ZipDataView.Create(env, sources)); }
public OneHotHashEncodingEstimator(IHostEnvironment env, params ColumnInfo[] columns) { Contracts.CheckValue(env, nameof(env)); _host = env.Register(nameof(ValueToKeyMappingEstimator)); _hash = new HashingEstimator(_host, columns.Select(x => x.HashInfo).ToArray()); using (var ch = _host.Start(nameof(OneHotHashEncodingEstimator))) { var binaryCols = new List <(string input, string output)>(); var cols = new List <(string input, string output, bool bag)>(); for (int i = 0; i < columns.Length; i++) { var column = columns[i]; OneHotEncodingTransformer.OutputKind kind = columns[i].OutputKind; switch (kind) { default: throw _host.ExceptUserArg(nameof(column.OutputKind)); case OneHotEncodingTransformer.OutputKind.Key: continue; case OneHotEncodingTransformer.OutputKind.Bin: if ((column.HashInfo.InvertHash) != 0) { ch.Warning("Invert hashing is being used with binary encoding."); } binaryCols.Add((column.HashInfo.Output, column.HashInfo.Output)); break; case OneHotEncodingTransformer.OutputKind.Ind: cols.Add((column.HashInfo.Output, column.HashInfo.Output, false)); break; case OneHotEncodingTransformer.OutputKind.Bag: cols.Add((column.HashInfo.Output, column.HashInfo.Output, true)); break; } } IEstimator <ITransformer> toBinVector = null; IEstimator <ITransformer> toVector = null; if (binaryCols.Count > 0) { toBinVector = new KeyToBinaryVectorMappingEstimator(_host, binaryCols.Select(x => new KeyToBinaryVectorTransform.ColumnInfo(x.input, x.output)).ToArray()); } if (cols.Count > 0) { toVector = new KeyToVectorMappingEstimator(_host, cols.Select(x => new KeyToVectorTransform.ColumnInfo(x.input, x.output, x.bag)).ToArray()); } if (toBinVector != null && toVector != null) { _toSomething = toVector.Append(toBinVector); } else { if (toBinVector != null) { _toSomething = toBinVector; } else { _toSomething = toVector; } } } }
/// <summary> /// Loads a predictor from the model stream. Returns null iff there's no predictor. /// </summary> /// <param name="env">The host environment to use.</param> /// <param name="modelStream">The model stream.</param> public static IPredictor LoadPredictorOrNull(this IHostEnvironment env, Stream modelStream) { Contracts.CheckValue(modelStream, nameof(modelStream)); return(ModelFileUtils.LoadPredictorOrNull(env, modelStream)); }
/// <summary> /// Converts the categorical value into an indicator array by building a dictionary of categories based on the data and using the id in the dictionary as the index in the array /// </summary> /// <param name="input">Incoming data.</param> /// <param name="outputKind">Specify the output type of indicator array: array or binary encoded data.</param> /// <param name="hashBits">Amount of bits to use for hashing.</param> /// <param name="seed">Seed value used for hashing.</param> /// <param name="ordered">Whether the position of each term should be included in the hash.</param> /// <param name="invertHash">Limit the number of keys used to generate the slot name to this many. 0 means no invert hashing, -1 means no limit.</param> public static Vector <float> OneHotHashEncoding(this Vector <string> input, OneHotHashVectorOutputKind outputKind = DefOut, int hashBits = DefHashBits, uint seed = DefSeed, bool ordered = DefOrdered, int invertHash = DefInvertHash) { Contracts.CheckValue(input, nameof(input)); return(new ImplVector <string>(input, new Config(outputKind, hashBits, seed, ordered, invertHash))); }
internal static ITrainer CreateTrainer(this IHostEnvironment env, string settings, out string loadName) { Contracts.CheckValue(env, nameof(env)); return(CreateCore <ITrainer>(env, typeof(SignatureTrainer), settings, out loadName)); }
public static ModelArgs GetModelArgs(ColumnType type, string colName, List <long> dims = null, List <bool> dimsParams = null) { Contracts.CheckValue(type, nameof(type)); Contracts.CheckNonEmpty(colName, nameof(colName)); TensorProto.Types.DataType dataType = TensorProto.Types.DataType.Undefined; DataKind rawKind; if (type is VectorType vectorType) { rawKind = vectorType.ItemType.RawKind; } else if (type is KeyType keyType) { rawKind = keyType.RawKind; } else { rawKind = type.RawKind; } switch (rawKind) { case DataKind.BL: dataType = TensorProto.Types.DataType.Float; break; case DataKind.TX: dataType = TensorProto.Types.DataType.String; break; case DataKind.I1: dataType = TensorProto.Types.DataType.Int8; break; case DataKind.U1: dataType = TensorProto.Types.DataType.Uint8; break; case DataKind.I2: dataType = TensorProto.Types.DataType.Int16; break; case DataKind.U2: dataType = TensorProto.Types.DataType.Uint16; break; case DataKind.I4: dataType = TensorProto.Types.DataType.Int32; break; case DataKind.U4: dataType = TensorProto.Types.DataType.Int64; break; case DataKind.I8: dataType = TensorProto.Types.DataType.Int64; break; case DataKind.U8: dataType = TensorProto.Types.DataType.Uint64; break; case DataKind.R4: dataType = TensorProto.Types.DataType.Float; break; case DataKind.R8: dataType = TensorProto.Types.DataType.Double; break; default: string msg = "Unsupported type: DataKind " + rawKind.ToString(); Contracts.Check(false, msg); break; } string name = colName; List <long> dimsLocal = null; List <bool> dimsParamLocal = null; if (dims != null) { dimsLocal = dims; dimsParamLocal = dimsParams; } else { dimsLocal = new List <long>(); if (type.ValueCount == 0) //Unknown size. { dimsLocal.Add(1); dimsParamLocal = new List <bool>() { false, true }; //false for batch size, true for dims. } else if (type.ValueCount == 1) { dimsLocal.Add(1); } else if (type.ValueCount > 1) { var vec = (VectorType)type; for (int i = 0; i < vec.Dimensions.Length; i++) { dimsLocal.Add(vec.Dimensions[i]); } } } //batch size. dimsLocal?.Insert(0, 1); return(new ModelArgs(name, dataType, dimsLocal, dimsParamLocal)); }
public static ValueGetter <float> GetOptWeightFloatGetter(this Row row, RoleMappedData data) { Contracts.CheckValue(data, nameof(data)); return(GetOptWeightFloatGetter(row, data.Schema)); }
public override void Save(ModelSaveContext ctx) { Contracts.CheckValue(ctx, nameof(ctx)); ctx.SetVersionInfo(GetVersionInfo()); base.Save(ctx); }
// Factory method for SignatureDataTransform. private static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input) { Contracts.CheckValue(env, nameof(env)); var h = env.Register("LoadTransform"); h.CheckValue(args, nameof(args)); h.CheckValue(input, nameof(input)); h.CheckUserArg(File.Exists(args.ModelFile), nameof(args.ModelFile), "File does not exist"); IDataView currentView; // If there are no 'tag' parameters, we load everything, regardless of 'comp'. bool complement = args.Complement || Utils.Size(args.Tags) == 0; var allTags = new HashSet <string>(); for (int i = 0; i < Utils.Size(args.Tags); i++) { var curList = args.Tags[i]; if (string.IsNullOrWhiteSpace(curList)) { continue; } foreach (var tag in curList.Split(',')) { if (!string.IsNullOrWhiteSpace(tag)) { allTags.Add(tag.ToLower()); } } } Func <string, bool> predicate = tag => { bool found = allTags.Contains(tag.ToLower()); return(found == !complement); }; using (var file = h.OpenInputFile(args.ModelFile)) using (var strm = file.OpenReadStream()) using (var rep = RepositoryReader.Open(strm, h)) using (var pipeLoaderEntry = rep.OpenEntry(ModelFileUtils.DirDataLoaderModel, ModelLoadContext.ModelStreamName)) using (var ctx = new ModelLoadContext(rep, pipeLoaderEntry, ModelFileUtils.DirDataLoaderModel)) { currentView = LegacyCompositeDataLoader.LoadSelectedTransforms(ctx, input, h, predicate); if (currentView == input) { // REVIEW: we are required to return an IDataTransform. Therefore, if we don't introduce a new transform // on top of 'input', we must throw (since input may not be a data transform). // We could of course introduce a 'no-op transform', or we could lift the requirement to always return an IDataTransform // associated with SignatureDataTransform. var criteria = string.Format( complement ? "transforms that don't have tags from the list: '{0}'" : "transforms that have tags from the list: '{0}'", string.Join(",", allTags)); throw h.ExceptUserArg(nameof(args.Tags), "No transforms were found that match the search criteria ({0})", criteria); } } h.Assert(currentView is IDataTransform); return((IDataTransform)currentView); }
public static SchemaBindableQuantileRegressionPredictor Create(IHostEnvironment env, ModelLoadContext ctx) { Contracts.CheckValue(ctx, nameof(ctx)); ctx.CheckAtModel(GetVersionInfo()); return(new SchemaBindableQuantileRegressionPredictor(env, ctx)); }
/// <summary> /// Loads all transforms from the <paramref name="ctx"/> that pass the <paramref name="isTransformTagAccepted"/> test, /// applies them sequentially to the <paramref name="srcView"/>, and returns the resulting data view. /// If there are no transforms in <paramref name="ctx"/> that are accepted, returns the original <paramref name="srcView"/>. /// The difference from the <c>Create</c> method above is that: /// - it doesn't wrap the results into a loader, just returns the last transform in the chain. /// - it accepts <see cref="IDataView"/> as input, not necessarily a loader. /// - it throws away the tag information. /// - it doesn't throw if the context is not representing a <see cref="CompositeDataLoader"/>: in this case it's assumed that no transforms /// meet the test, and the <paramref name="srcView"/> is returned. /// Essentially, this is a helper method for the LoadTransform class. /// </summary> public static IDataView LoadSelectedTransforms(ModelLoadContext ctx, IDataView srcView, IHostEnvironment env, Func <string, bool> isTransformTagAccepted) { Contracts.CheckValue(env, nameof(env)); var h = env.Register(RegistrationName); h.CheckValue(ctx, nameof(ctx)); h.Check(ctx.Reader.BaseStream.Position == ctx.FpMin + ctx.Header.FpModel); var ver = GetVersionInfo(); if (ctx.Header.ModelSignature != ver.ModelSignature) { using (var ch = h.Start("ModelCheck")) { ch.Info("The data model doesn't contain transforms."); } return(srcView); } ModelHeader.CheckVersionInfo(ref ctx.Header, ver); h.CheckValue(srcView, nameof(srcView)); h.CheckValue(isTransformTagAccepted, nameof(isTransformTagAccepted)); // *** Binary format *** // int: sizeof(Float) // int: number of transforms // foreach transform: (starting from version VersionAddedTags) // string: tag // string: args string int cbFloat = ctx.Reader.ReadInt32(); h.CheckDecode(cbFloat == sizeof(Float)); int cxf = ctx.Reader.ReadInt32(); h.CheckDecode(cxf >= 0); bool hasTags = ctx.Header.ModelVerReadable >= VersionAddedTags; var curView = srcView; for (int i = 0; i < cxf; i++) { string tag = ""; if (hasTags) { tag = ctx.LoadNonEmptyString(); ctx.LoadStringOrNull(); // ignore the args string } if (!isTransformTagAccepted(tag)) { continue; } IDataTransform xf; ctx.LoadModel <IDataTransform, SignatureLoadDataTransform>(env, out xf, string.Format(TransformDirTemplate, i), curView); curView = xf; } return(curView); }
/// <summary> /// Presents a <see cref="Schema.Metadata"/> as a an <see cref="Row"/>. /// </summary> /// <param name="metadata">The metadata to wrap.</param> /// <returns>A row that wraps an input metadata.</returns> public static Row MetadataAsRow(Schema.Metadata metadata) { Contracts.CheckValue(metadata, nameof(metadata)); return(new MetadataRow(metadata)); }