/// <summary> /// This function returns a schema for sequence predictor's output. Its output column is always called <see cref="MetadataUtils.Const.ScoreValueKind.PredictedLabel"/>. /// </summary> /// <param name="scoreType">Score column's type produced by sequence predictor.</param> /// <param name="scoreColumnKindValue">A metadata value of score column. It's the value associated with key /// <see cref="MetadataUtils.Kinds.ScoreColumnKind"/>.</param> /// <param name="keyNames">Sequence predictor usually generates integer outputs. This field tells the tags of all possible output values. /// For example, output integer 0 cound be mapped to "Sell" and 0 to "Buy" when predicting stock trend.</param> /// <returns><see cref="DataViewSchema"/> of sequence predictor's output.</returns> public static DataViewSchema CreateSequencePredictionSchema(DataViewType scoreType, string scoreColumnKindValue, VBuffer <ReadOnlyMemory <char> > keyNames = default) { Contracts.CheckValue(scoreType, nameof(scoreType)); Contracts.CheckValue(scoreColumnKindValue, nameof(scoreColumnKindValue)); var metadataBuilder = new MetadataBuilder(); // Add metadata columns including their getters. We starts with key names of predicted keys if they exist. if (keyNames.Length > 0) { metadataBuilder.AddKeyValues(keyNames.Length, TextDataViewType.Instance, (ref VBuffer <ReadOnlyMemory <char> > value) => value = keyNames); } metadataBuilder.Add(MetadataUtils.Kinds.ScoreColumnKind, TextDataViewType.Instance, (ref ReadOnlyMemory <char> value) => value = scoreColumnKindValue.AsMemory()); metadataBuilder.Add(MetadataUtils.Kinds.ScoreValueKind, TextDataViewType.Instance, (ref ReadOnlyMemory <char> value) => value = MetadataUtils.Const.ScoreValueKind.PredictedLabel.AsMemory()); // Build a schema consisting of a single column. var schemaBuilder = new SchemaBuilder(); schemaBuilder.AddColumn(MetadataUtils.Const.ScoreValueKind.PredictedLabel, scoreType, metadataBuilder.GetMetadata()); return(schemaBuilder.GetSchema()); }
/// <summary> /// Create a <see cref="DataViewSchema"/> with two columns for binary classifier. The first column, indexed by 0, is the score column. /// The second column is the probability column. For example, for linear support vector machine, score column stands for the inner product /// of linear coefficients and the input feature vector and we convert score column to probability column using a calibrator. /// </summary> /// <param name="scoreColumnName">Column name of score column</param> /// <param name="probabilityColumnName">Column name of probability column</param> /// <returns><see cref="DataViewSchema"/> of binary classifier's output.</returns> public static DataViewSchema CreateBinaryClassificationSchema(string scoreColumnName = MetadataUtils.Const.ScoreValueKind.Score, string probabilityColumnName = MetadataUtils.Const.ScoreValueKind.Probability) { // Schema of Score column. We are going to extend it by adding a Probability column. var partialSchema = Create(NumberDataViewType.Single, MetadataUtils.Const.ScoreColumnKind.BinaryClassification, scoreColumnName); var schemaBuilder = new SchemaBuilder(); // Copy Score column from partialSchema. schemaBuilder.AddColumn(partialSchema[0].Name, partialSchema[0].Type, partialSchema[0].Metadata); // Create Probability column's metadata. var probabilityMetadataBuilder = new MetadataBuilder(); probabilityMetadataBuilder.Add(MetadataUtils.Kinds.IsNormalized, BooleanDataViewType.Instance, (ref bool value) => { value = true; }); probabilityMetadataBuilder.Add(MetadataUtils.Kinds.ScoreColumnKind, TextDataViewType.Instance, (ref ReadOnlyMemory <char> value) => { value = MetadataUtils.Const.ScoreColumnKind.BinaryClassification.AsMemory(); }); probabilityMetadataBuilder.Add(MetadataUtils.Kinds.ScoreValueKind, TextDataViewType.Instance, (ref ReadOnlyMemory <char> value) => { value = MetadataUtils.Const.ScoreValueKind.Probability.AsMemory(); }); // Add probability column. schemaBuilder.AddColumn(probabilityColumnName, NumberDataViewType.Single, probabilityMetadataBuilder.GetMetadata()); return(schemaBuilder.GetSchema()); }
/// <summary> /// Append label names to score column as its metadata. /// </summary> private DataViewSchema DecorateOutputSchema(DataViewSchema partialSchema, int scoreColumnIndex, VectorType labelNameType, ValueGetter <VBuffer <T> > labelNameGetter, string labelNameKind) { var builder = new SchemaBuilder(); // Sequentially add columns so that the order of them is not changed comparing with the schema in the mapper // that computes score column. for (int i = 0; i < partialSchema.Count; ++i) { var meta = new MetadataBuilder(); if (i == scoreColumnIndex) { // Add label names for score column. meta.Add(partialSchema[i].Metadata, selector: s => s != labelNameKind); meta.Add(labelNameKind, labelNameType, labelNameGetter); } else { // Copy all existing metadata because this transform only affects score column. meta.Add(partialSchema[i].Metadata, selector: s => true); } // Instead of appending extra metadata to the existing score column, we create new one because // metadata is read-only. builder.AddColumn(partialSchema[i].Name, partialSchema[i].Type, meta.GetMetadata()); } return(builder.GetSchema()); }
/// <summary> /// This is very similar to <see cref="Create(DataViewType, string, string)"/> but adds one extra metadata field to the only score column. /// </summary> /// <param name="scoreType">Output element's type of quantile regressor. Note that a quantile regressor can produce an array of <see cref="PrimitiveDataViewType"/>.</param> /// <param name="quantiles">Quantiles used in quantile regressor.</param> /// <returns><see cref="DataViewSchema"/> of quantile regressor's output.</returns> public static DataViewSchema CreateQuantileRegressionSchema(DataViewType scoreType, double[] quantiles) { Contracts.CheckValue(scoreType, nameof(scoreType)); Contracts.CheckValue(scoreType as PrimitiveDataViewType, nameof(scoreType)); Contracts.AssertValue(quantiles); // Create a schema using standard function. The produced schema will be modified by adding one metadata column. var partialSchema = Create(new VectorType(scoreType as PrimitiveDataViewType, quantiles.Length), MetadataUtils.Const.ScoreColumnKind.QuantileRegression); var metadataBuilder = new MetadataBuilder(); // Add the extra metadata. metadataBuilder.AddSlotNames(quantiles.Length, (ref VBuffer <ReadOnlyMemory <char> > value) => { var bufferEditor = VBufferEditor.Create(ref value, quantiles.Length); for (int i = 0; i < quantiles.Length; ++i) { bufferEditor.Values[i] = string.Format("Quantile-{0}", quantiles[i]).AsMemory(); } value = bufferEditor.Commit(); }); // Copy default metadata from the partial schema. metadataBuilder.Add(partialSchema[0].Metadata, (string kind) => true); // Build a schema consisting of a single column. Comparing with partial schema, the only difference is a metadata field. var schemaBuilder = new SchemaBuilder(); schemaBuilder.AddColumn(partialSchema[0].Name, partialSchema[0].Type, metadataBuilder.GetMetadata()); return(schemaBuilder.GetSchema()); }
/// <summary> /// Return a <see cref="DataViewSchema"/> which contains a single score column. /// </summary> /// <param name="scoreType">The type of the score column.</param> /// <param name="scoreColumnKindValue">The kind of the score column. It's the value of <see cref="MetadataUtils.Kinds.ScoreColumnKind"/> in the score column's metadata.</param> /// <param name="scoreColumnName">The score column's name in the generated <see cref="DataViewSchema"/>.</param> /// <returns><see cref="DataViewSchema"/> which contains only one column.</returns> public static DataViewSchema Create(DataViewType scoreType, string scoreColumnKindValue, string scoreColumnName = MetadataUtils.Const.ScoreValueKind.Score) { Contracts.CheckValue(scoreType, nameof(scoreType)); Contracts.CheckNonEmpty(scoreColumnKindValue, nameof(scoreColumnKindValue)); // Two metadata fields. One can set up by caller of this function while the other one is a constant. var metadataBuilder = new MetadataBuilder(); metadataBuilder.Add(MetadataUtils.Kinds.ScoreColumnKind, TextDataViewType.Instance, (ref ReadOnlyMemory <char> value) => { value = scoreColumnKindValue.AsMemory(); }); metadataBuilder.Add(MetadataUtils.Kinds.ScoreValueKind, TextDataViewType.Instance, (ref ReadOnlyMemory <char> value) => { value = MetadataUtils.Const.ScoreValueKind.Score.AsMemory(); }); // Build a schema consisting of a single column. var schemaBuilder = new SchemaBuilder(); schemaBuilder.AddColumn(scoreColumnName, scoreType, metadataBuilder.GetMetadata()); return(schemaBuilder.GetSchema()); }
private static DataViewSchema CreateSchema(ColumnBindingsBase inputBindings) { Contracts.CheckValue(inputBindings, nameof(inputBindings)); var builder = new SchemaBuilder(); for (int i = 0; i < inputBindings.ColumnCount; i++) { var meta = new MetadataBuilder(); foreach (var kvp in inputBindings.GetMetadataTypes(i)) { var getter = Utils.MarshalInvoke(GetMetadataGetterDelegate <int>, kvp.Value.RawType, inputBindings, i, kvp.Key); meta.Add(kvp.Key, kvp.Value, getter); } builder.AddColumn(inputBindings.GetColumnName(i), inputBindings.GetColumnType(i), meta.GetMetadata()); } return(builder.GetSchema()); }
internal static Schema Create(ISchema inputSchema) { Contracts.CheckValue(inputSchema, nameof(inputSchema)); if (inputSchema is Schema s) { return(s); } var builder = new SchemaBuilder(); for (int i = 0; i < inputSchema.ColumnCount; i++) { var meta = new MetadataBuilder(); foreach (var kvp in inputSchema.GetMetadataTypes(i)) { var getter = Utils.MarshalInvoke(GetMetadataGetterDelegate <int>, kvp.Value.RawType, inputSchema, i, kvp.Key); meta.Add(kvp.Key, kvp.Value, getter); } builder.AddColumn(inputSchema.GetColumnName(i), inputSchema.GetColumnType(i), meta.GetMetadata()); } return(builder.GetSchema()); }
public BoundMapper(IExceptionContext ectx, TreeEnsembleFeaturizerBindableMapper owner, RoleMappedSchema schema) { Contracts.AssertValue(ectx); ectx.AssertValue(owner); ectx.AssertValue(schema); ectx.Assert(schema.Feature.HasValue); _ectx = ectx; _owner = owner; InputRoleMappedSchema = schema; // A vector containing the output of each tree on a given example. var treeValueType = new VectorType(NumberType.Float, owner._ensemble.TrainedEnsemble.NumTrees); // An indicator vector with length = the total number of leaves in the ensemble, indicating which leaf the example // ends up in all the trees in the ensemble. var leafIdType = new VectorType(NumberType.Float, owner._totalLeafCount); // An indicator vector with length = the total number of nodes in the ensemble, indicating the nodes on // the paths of the example in all the trees in the ensemble. // The total number of nodes in a binary tree is equal to the number of internal nodes + the number of leaf nodes, // and it is also equal to the number of children of internal nodes (which is 2 * the number of internal nodes) // plus one (since the root node is not a child of any node). So we have #internal + #leaf = 2*(#internal) + 1, // which means that #internal = #leaf - 1. // Therefore, the number of internal nodes in the ensemble is #leaf - #trees. var pathIdType = new VectorType(NumberType.Float, owner._totalLeafCount - owner._ensemble.TrainedEnsemble.NumTrees); // Start creating output schema with types derived above. var schemaBuilder = new SchemaBuilder(); // Metadata of tree values. var treeIdMetadataBuilder = new MetadataBuilder(); treeIdMetadataBuilder.Add(MetadataUtils.Kinds.SlotNames, MetadataUtils.GetNamesType(treeValueType.Size), (ValueGetter <VBuffer <ReadOnlyMemory <char> > >)owner.GetTreeSlotNames); // Add the column of trees' output values schemaBuilder.AddColumn(OutputColumnNames.Trees, treeValueType, treeIdMetadataBuilder.GetMetadata()); // Metadata of leaf IDs. var leafIdMetadataBuilder = new MetadataBuilder(); leafIdMetadataBuilder.Add(MetadataUtils.Kinds.SlotNames, MetadataUtils.GetNamesType(leafIdType.Size), (ValueGetter <VBuffer <ReadOnlyMemory <char> > >)owner.GetLeafSlotNames); leafIdMetadataBuilder.Add(MetadataUtils.Kinds.IsNormalized, BoolType.Instance, (ref bool value) => value = true); // Add the column of leaves' IDs where the input example reaches. schemaBuilder.AddColumn(OutputColumnNames.Leaves, leafIdType, leafIdMetadataBuilder.GetMetadata()); // Metadata of path IDs. var pathIdMetadataBuilder = new MetadataBuilder(); pathIdMetadataBuilder.Add(MetadataUtils.Kinds.SlotNames, MetadataUtils.GetNamesType(pathIdType.Size), (ValueGetter <VBuffer <ReadOnlyMemory <char> > >)owner.GetPathSlotNames); pathIdMetadataBuilder.Add(MetadataUtils.Kinds.IsNormalized, BoolType.Instance, (ref bool value) => value = true); // Add the column of encoded paths which the input example passes. schemaBuilder.AddColumn(OutputColumnNames.Paths, pathIdType, pathIdMetadataBuilder.GetMetadata()); OutputSchema = schemaBuilder.GetSchema(); // Tree values must be the first output column. Contracts.Assert(OutputSchema[OutputColumnNames.Trees].Index == TreeValuesColumnId); // leaf IDs must be the second output column. Contracts.Assert(OutputSchema[OutputColumnNames.Leaves].Index == LeafIdsColumnId); // Path IDs must be the third output column. Contracts.Assert(OutputSchema[OutputColumnNames.Paths].Index == PathIdsColumnId); }
/// <summary> /// Add key values metadata. /// </summary> /// <typeparam name="TValue">The value type of key values.</typeparam> /// <param name="builder">The MetadataBuilder to which to add the key values.</param> /// <param name="size">The size of key values vector.</param> /// <param name="valueType">The value type of key values. Its raw type must match <typeparamref name="TValue"/>.</param> /// <param name="getter">The getter delegate for the key values.</param> public static void AddKeyValues <TValue>(this MetadataBuilder builder, int size, PrimitiveDataViewType valueType, ValueGetter <VBuffer <TValue> > getter) => builder.Add(MetadataUtils.Kinds.KeyValues, new VectorType(valueType, size), getter);
/// <summary> /// Add slot names metadata. /// </summary> /// <param name="builder">The MetadataBuilder to which to add the slot names.</param> /// <param name="size">The size of the slot names vector.</param> /// <param name="getter">The getter delegate for the slot names.</param> public static void AddSlotNames(this MetadataBuilder builder, int size, ValueGetter <VBuffer <ReadOnlyMemory <char> > > getter) => builder.Add(MetadataUtils.Kinds.SlotNames, new VectorType(TextDataViewType.Instance, size), getter);