internal TreeEnsembleFeaturizationTransformer(IHostEnvironment env, DataViewSchema inputSchema, DataViewSchema.Column featureColumn, TreeEnsembleModelParameters modelParameters, string treesColumnName, string leavesColumnName, string pathsColumnName) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(TreeEnsembleFeaturizationTransformer)), modelParameters, inputSchema) { // Store featureColumn as a detached column because a fitted transformer can be applied to different IDataViews and different // IDataView may have different schemas. _featureDetachedColumn = new DataViewSchema.DetachedColumn(featureColumn); // Check if featureColumn matches a column in inputSchema. The answer is yes if they have the same name and type. // The indexed column, inputSchema[featureColumn.Index], should match the detached column, _featureDetachedColumn. CheckFeatureColumnCompatibility(inputSchema[featureColumn.Index]); // Store output column names so that this transformer can be saved into a file later. _treesColumnName = treesColumnName; _leavesColumnName = leavesColumnName; _pathsColumnName = pathsColumnName; // Create an argument, _scorerArgs, to pass the output column names to the underlying scorer. _scorerArgs = new TreeEnsembleFeaturizerBindableMapper.Arguments { TreesColumnName = _treesColumnName, LeavesColumnName = _leavesColumnName, PathsColumnName = _pathsColumnName }; // Create a bindable mapper. It provides the core computation and can be attached to any IDataView and produce // a transformed IDataView. BindableMapper = new TreeEnsembleFeaturizerBindableMapper(env, _scorerArgs, modelParameters); // Create a scorer. var roleMappedSchema = MakeFeatureRoleMappedSchema(inputSchema); Scorer = new GenericScorer(Host, _scorerArgs, new EmptyDataView(Host, inputSchema), BindableMapper.Bind(Host, roleMappedSchema), roleMappedSchema); }
private TreeEnsembleFeaturizationTransformer(IHostEnvironment host, ModelLoadContext ctx) : base(Contracts.CheckRef(host, nameof(host)).Register(nameof(TreeEnsembleFeaturizationTransformer)), ctx) { // *** Binary format *** // <base info> // string: feature column's name. // string: the name of the columns where tree prediction values are stored. // string: the name of the columns where trees' leave are stored. // string: the name of the columns where trees' paths are stored. // Load stored fields. string featureColumnName = ctx.LoadString(); _featureDetachedColumn = new DataViewSchema.DetachedColumn(TrainSchema[featureColumnName]); _treesColumnName = ctx.LoadStringOrNull(); _leavesColumnName = ctx.LoadStringOrNull(); _pathsColumnName = ctx.LoadStringOrNull(); // Create an argument to specify output columns' names of this transformer. _scorerArgs = new TreeEnsembleFeaturizerBindableMapper.Arguments { TreesColumnName = _treesColumnName, LeavesColumnName = _leavesColumnName, PathsColumnName = _pathsColumnName }; // Create a bindable mapper. It provides the core computation and can be attached to any IDataView and produce // a transformed IDataView. BindableMapper = new TreeEnsembleFeaturizerBindableMapper(host, _scorerArgs, Model); // Create a scorer. var roleMappedSchema = MakeFeatureRoleMappedSchema(TrainSchema); Scorer = new GenericScorer(Host, _scorerArgs, new EmptyDataView(Host, TrainSchema), BindableMapper.Bind(Host, roleMappedSchema), roleMappedSchema); }
private protected override DataViewSchema.DetachedColumn[] GetOutputColumnsCore() { var infos = new DataViewSchema.DetachedColumn[2]; infos[L1Col] = new DataViewSchema.DetachedColumn(L1, NumberDataViewType.Double, null); infos[L2Col] = new DataViewSchema.DetachedColumn(L2, NumberDataViewType.Double, null); return(infos); }
protected override DataViewSchema.DetachedColumn[] GetOutputColumnsCore() { var info = new DataViewSchema.DetachedColumn[_parent.Outputs.Length]; for (int i = 0; i < _parent.Outputs.Length; i++) { info[i] = new DataViewSchema.DetachedColumn(_parent.Outputs[i], _parent.OutputTypes[i], null); } return(info); }
public DataViewSchema.DetachedColumn[] GetOutputColumns() { var meta = new DataViewSchema.Annotations.Builder(); meta.AddSlotNames(_parent.OutputLength, GetSlotNames); var info = new DataViewSchema.DetachedColumn[1]; info[0] = new DataViewSchema.DetachedColumn(_parent.OutputColumnName, new VectorType(NumberDataViewType.Double, _parent.OutputLength), meta.ToAnnotations()); return(info); }
private protected override DataViewSchema.DetachedColumn[] GetOutputColumnsCore() { var infos = new DataViewSchema.DetachedColumn[5]; infos[LabelOutput] = new DataViewSchema.DetachedColumn(LabelCol, _labelType, _labelMetadata); infos[ScoreOutput] = new DataViewSchema.DetachedColumn(ScoreCol, _scoreType, _scoreMetadata); infos[L1Output] = new DataViewSchema.DetachedColumn(L1, NumberDataViewType.Double, null); infos[L2Output] = new DataViewSchema.DetachedColumn(L2, NumberDataViewType.Double, null); infos[DistCol] = new DataViewSchema.DetachedColumn(Dist, NumberDataViewType.Double, null); return(infos); }
protected override DataViewSchema.DetachedColumn[] GetOutputColumnsCore() { var result = new DataViewSchema.DetachedColumn[_parent.ColumnPairs.Length]; for (int i = 0; i < _parent.ColumnPairs.Length; i++) { var builder = new MetadataBuilder(); builder.Add(InputSchema[ColMapNewToOld[i]].Metadata, x => x == MetadataUtils.Kinds.KeyValues || x == MetadataUtils.Kinds.IsNormalized); result[i] = new DataViewSchema.DetachedColumn(_parent.ColumnPairs[i].outputColumnName, _types[i], builder.GetMetadata()); } return(result); }
protected override DataViewSchema.DetachedColumn[] GetOutputColumnsCore() { var result = new DataViewSchema.DetachedColumn[_parent.ColumnPairs.Length]; for (int i = 0; i < _parent.ColumnPairs.Length; i++) { var builder = new DataViewSchema.Annotations.Builder(); AddMetadata(i, builder); result[i] = new DataViewSchema.DetachedColumn(_parent.ColumnPairs[i].outputColumnName, _type, builder.ToAnnotations()); } return(result); }
protected override DataViewSchema.DetachedColumn[] GetOutputColumnsCore() { var result = new DataViewSchema.DetachedColumn[_parent.ColumnPairs.Length]; for (int i = 0; i < _parent.ColumnPairs.Length; i++) { var meta = new DataViewSchema.Annotations.Builder(); meta.Add(InputSchema[ColMapNewToOld[i]].Annotations, name => name == AnnotationUtils.Kinds.SlotNames); result[i] = new DataViewSchema.DetachedColumn(_parent.ColumnPairs[i].outputColumnName, _types[i], meta.ToAnnotations()); } return(result); }
protected override DataViewSchema.DetachedColumn[] GetOutputColumnsCore() { var result = new DataViewSchema.DetachedColumn[_parent.ColumnPairs.Length]; for (int i = 0; i < _parent.ColumnPairs.Length; i++) { InputSchema.TryGetColumnIndex(_parent.ColumnPairs[i].inputColumnName, out int colIndex); Host.Assert(colIndex >= 0); result[i] = new DataViewSchema.DetachedColumn(_parent.ColumnPairs[i].outputColumnName, _types[i], null); } return(result); }
/// <summary> /// For PCA, the transform equation is y=U^Tx, where "^T" denotes matrix transpose, x is an 1-D vector (i.e., the input column), and U=[u_1, ..., u_PcaNum] /// is a n-by-PcaNum matrix. The symbol u_k is the k-th largest (in terms of the associated eigenvalue) eigenvector of (1/m)*\sum_{i=1}^m x_ix_i^T, /// where x_i is the whitened column at the i-th row and we have m rows in the training data. /// For ZCA, the transform equation is y = US^{-1/2}U^Tx, where U=[u_1, ..., u_n] (we retain all eigenvectors) and S is a diagonal matrix whose i-th /// diagonal element is the eigenvalues of u_i. The first U^Tx rotates x to another linear space (bases are u_1, ..., u_n), then S^{-1/2} is applied /// to ensure unit variance, and finally we rotate the scaled result back to the original space using U (note that UU^T is identity matrix so U is /// the inverse rotation of U^T). /// </summary> protected override DataViewSchema.DetachedColumn[] GetOutputColumnsCore() { var result = new DataViewSchema.DetachedColumn[_parent.ColumnPairs.Length]; for (int iinfo = 0; iinfo < _parent.ColumnPairs.Length; iinfo++) { InputSchema.TryGetColumnIndex(_parent.ColumnPairs[iinfo].inputColumnName, out int colIndex); Host.Assert(colIndex >= 0); var info = _parent._columns[iinfo]; DataViewType outType = (info.Kind == WhiteningKind.PrincipalComponentAnalysis && info.Rank > 0) ? new VectorDataViewType(NumberDataViewType.Single, info.Rank) : _srcTypes[iinfo]; result[iinfo] = new DataViewSchema.DetachedColumn(_parent.ColumnPairs[iinfo].outputColumnName, outType, null); } return(result); }
private void ConvertColumnsToType(List <int> convertColumnIndex, DataKind convertedType) { AddEstimatorAndKeepOldNames(pairs => _mlContext.Transforms.Conversion.ConvertType(pairs, convertedType), convertColumnIndex); convertColumnIndex.ForEach(i => { var current = _schema[i]; _schema[i] = new DataViewSchema.DetachedColumn(current.Name, MLIndex.DataKindToDataViewType[convertedType], current.Annotations); }); }
protected override DataViewSchema.DetachedColumn[] GetOutputColumnsCore() { var stdSuffix = ".output"; var info = new DataViewSchema.DetachedColumn[_parent.Outputs.Length]; for (int i = 0; i < _parent.Outputs.Length; i++) { var onnxOutputName = _parent.Outputs[i]; var columnName = onnxOutputName.EndsWith(stdSuffix) ? onnxOutputName.Replace(stdSuffix, "") : onnxOutputName; var builder = new DataViewSchema.Annotations.Builder(); AddSlotNames(columnName, builder); info[i] = new DataViewSchema.DetachedColumn(columnName, _parent.OutputTypes[i], builder.ToAnnotations()); } return(info); }
private protected override DataViewSchema.DetachedColumn[] GetOutputColumnsCore() { var infos = new DataViewSchema.DetachedColumn[3]; infos[ClusterIdCol] = new DataViewSchema.DetachedColumn(ClusterId, _types[ClusterIdCol], null); var slotNamesType = new VectorType(TextDataViewType.Instance, _numClusters); var sortedClusters = new DataViewSchema.Annotations.Builder(); int vectorSize = slotNamesType.GetVectorSize(); sortedClusters.AddSlotNames(vectorSize, CreateSlotNamesGetter(_numClusters, "Cluster")); var builder = new DataViewSchema.Annotations.Builder(); builder.AddSlotNames(vectorSize, CreateSlotNamesGetter(_numClusters, "Score")); infos[SortedClusterCol] = new DataViewSchema.DetachedColumn(SortedClusters, _types[SortedClusterCol], sortedClusters.ToAnnotations()); infos[SortedClusterScoreCol] = new DataViewSchema.DetachedColumn(SortedClusterScores, _types[SortedClusterScoreCol], builder.ToAnnotations()); return(infos); }