/// <summary> /// Create a <see cref="DataViewSchema"/> with two columns for binary classifier. The first column, indexed by 0, is the score column. /// The second column is the probability column. For example, for linear support vector machine, score column stands for the inner product /// of linear coefficients and the input feature vector and we convert score column to probability column using a calibrator. /// </summary> /// <param name="scoreColumnName">Column name of score column</param> /// <param name="probabilityColumnName">Column name of probability column</param> /// <returns><see cref="DataViewSchema"/> of binary classifier's output.</returns> public static DataViewSchema CreateBinaryClassificationSchema(string scoreColumnName = MetadataUtils.Const.ScoreValueKind.Score, string probabilityColumnName = MetadataUtils.Const.ScoreValueKind.Probability) { // Schema of Score column. We are going to extend it by adding a Probability column. var partialSchema = Create(NumberDataViewType.Single, MetadataUtils.Const.ScoreColumnKind.BinaryClassification, scoreColumnName); var schemaBuilder = new DataViewSchema.Builder(); // Copy Score column from partialSchema. schemaBuilder.AddColumn(partialSchema[0].Name, partialSchema[0].Type, partialSchema[0].Metadata); // Create Probability column's metadata. var probabilityMetadataBuilder = new DataViewSchema.Metadata.Builder(); probabilityMetadataBuilder.Add(MetadataUtils.Kinds.IsNormalized, BooleanDataViewType.Instance, (ref bool value) => { value = true; }); probabilityMetadataBuilder.Add(MetadataUtils.Kinds.ScoreColumnKind, TextDataViewType.Instance, (ref ReadOnlyMemory <char> value) => { value = MetadataUtils.Const.ScoreColumnKind.BinaryClassification.AsMemory(); }); probabilityMetadataBuilder.Add(MetadataUtils.Kinds.ScoreValueKind, TextDataViewType.Instance, (ref ReadOnlyMemory <char> value) => { value = MetadataUtils.Const.ScoreValueKind.Probability.AsMemory(); }); // Add probability column. schemaBuilder.AddColumn(probabilityColumnName, NumberDataViewType.Single, probabilityMetadataBuilder.ToMetadata()); return(schemaBuilder.ToSchema()); }
/// <summary> /// Append label names to score column as its metadata. /// </summary> private DataViewSchema DecorateOutputSchema(DataViewSchema partialSchema, int scoreColumnIndex, VectorType labelNameType, ValueGetter <VBuffer <T> > labelNameGetter, string labelNameKind) { var builder = new DataViewSchema.Builder(); // Sequentially add columns so that the order of them is not changed comparing with the schema in the mapper // that computes score column. for (int i = 0; i < partialSchema.Count; ++i) { var meta = new DataViewSchema.Metadata.Builder(); if (i == scoreColumnIndex) { // Add label names for score column. meta.Add(partialSchema[i].Metadata, selector: s => s != labelNameKind); meta.Add(labelNameKind, labelNameType, labelNameGetter); } else { // Copy all existing metadata because this transform only affects score column. meta.Add(partialSchema[i].Metadata, selector: s => true); } // Instead of appending extra metadata to the existing score column, we create new one because // metadata is read-only. builder.AddColumn(partialSchema[i].Name, partialSchema[i].Type, meta.ToMetadata()); } return(builder.ToSchema()); }
/// <summary> /// This function returns a schema for sequence predictor's output. Its output column is always called <see cref="MetadataUtils.Const.ScoreValueKind.PredictedLabel"/>. /// </summary> /// <param name="scoreType">Score column's type produced by sequence predictor.</param> /// <param name="scoreColumnKindValue">A metadata value of score column. It's the value associated with key /// <see cref="MetadataUtils.Kinds.ScoreColumnKind"/>.</param> /// <param name="keyNames">Sequence predictor usually generates integer outputs. This field tells the tags of all possible output values. /// For example, output integer 0 cound be mapped to "Sell" and 0 to "Buy" when predicting stock trend.</param> /// <returns><see cref="DataViewSchema"/> of sequence predictor's output.</returns> public static DataViewSchema CreateSequencePredictionSchema(DataViewType scoreType, string scoreColumnKindValue, VBuffer <ReadOnlyMemory <char> > keyNames = default) { Contracts.CheckValue(scoreType, nameof(scoreType)); Contracts.CheckValue(scoreColumnKindValue, nameof(scoreColumnKindValue)); var metadataBuilder = new DataViewSchema.Metadata.Builder(); // Add metadata columns including their getters. We starts with key names of predicted keys if they exist. if (keyNames.Length > 0) { metadataBuilder.AddKeyValues(keyNames.Length, TextDataViewType.Instance, (ref VBuffer <ReadOnlyMemory <char> > value) => value = keyNames); } metadataBuilder.Add(MetadataUtils.Kinds.ScoreColumnKind, TextDataViewType.Instance, (ref ReadOnlyMemory <char> value) => value = scoreColumnKindValue.AsMemory()); metadataBuilder.Add(MetadataUtils.Kinds.ScoreValueKind, TextDataViewType.Instance, (ref ReadOnlyMemory <char> value) => value = MetadataUtils.Const.ScoreValueKind.PredictedLabel.AsMemory()); // Build a schema consisting of a single column. var schemaBuilder = new DataViewSchema.Builder(); schemaBuilder.AddColumn(MetadataUtils.Const.ScoreValueKind.PredictedLabel, scoreType, metadataBuilder.ToMetadata()); return(schemaBuilder.ToSchema()); }
/// <summary> /// Compute the output schema of a <see cref="GroupTransform"/> given a input schema. /// </summary> /// <param name="sourceSchema">Input schema.</param> /// <returns>The associated output schema produced by <see cref="GroupTransform"/>.</returns> private DataViewSchema BuildOutputSchema(DataViewSchema sourceSchema) { // Create schema build. We will sequentially add group columns and then aggregated columns. var schemaBuilder = new DataViewSchema.Builder(); // Handle group(-key) columns. Those columns are used as keys to partition rows in the input data; specifically, // rows with the same key value will be merged into one row in the output data. foreach (var groupKeyColumnName in _groupColumns) { schemaBuilder.AddColumn(groupKeyColumnName, sourceSchema[groupKeyColumnName].Type, sourceSchema[groupKeyColumnName].Metadata); } // Handle aggregated (aka keep) columns. foreach (var groupValueColumnName in _keepColumns) { // Prepare column's metadata. var metadataBuilder = new DataViewSchema.Metadata.Builder(); metadataBuilder.Add(sourceSchema[groupValueColumnName].Metadata, s => s == MetadataUtils.Kinds.IsNormalized || s == MetadataUtils.Kinds.KeyValues); // Prepare column's type. var aggregatedValueType = sourceSchema[groupValueColumnName].Type as PrimitiveDataViewType; _ectx.CheckValue(aggregatedValueType, nameof(aggregatedValueType), "Columns being aggregated must be primitive types such as string, float, or integer"); var aggregatedResultType = new VectorType(aggregatedValueType); // Add column into output schema. schemaBuilder.AddColumn(groupValueColumnName, aggregatedResultType, metadataBuilder.ToMetadata()); } return(schemaBuilder.ToSchema()); }
/// <summary> /// This is very similar to <see cref="Create(DataViewType, string, string)"/> but adds one extra metadata field to the only score column. /// </summary> /// <param name="scoreType">Output element's type of quantile regressor. Note that a quantile regressor can produce an array of <see cref="PrimitiveDataViewType"/>.</param> /// <param name="quantiles">Quantiles used in quantile regressor.</param> /// <returns><see cref="DataViewSchema"/> of quantile regressor's output.</returns> public static DataViewSchema CreateQuantileRegressionSchema(DataViewType scoreType, double[] quantiles) { Contracts.CheckValue(scoreType, nameof(scoreType)); Contracts.CheckValue(scoreType as PrimitiveDataViewType, nameof(scoreType)); Contracts.AssertValue(quantiles); // Create a schema using standard function. The produced schema will be modified by adding one metadata column. var partialSchema = Create(new VectorType(scoreType as PrimitiveDataViewType, quantiles.Length), MetadataUtils.Const.ScoreColumnKind.QuantileRegression); var metadataBuilder = new DataViewSchema.Metadata.Builder(); // Add the extra metadata. metadataBuilder.AddSlotNames(quantiles.Length, (ref VBuffer <ReadOnlyMemory <char> > value) => { var bufferEditor = VBufferEditor.Create(ref value, quantiles.Length); for (int i = 0; i < quantiles.Length; ++i) { bufferEditor.Values[i] = string.Format("Quantile-{0}", quantiles[i]).AsMemory(); } value = bufferEditor.Commit(); }); // Copy default metadata from the partial schema. metadataBuilder.Add(partialSchema[0].Metadata, (string kind) => true); // Build a schema consisting of a single column. Comparing with partial schema, the only difference is a metadata field. var schemaBuilder = new DataViewSchema.Builder(); schemaBuilder.AddColumn(partialSchema[0].Name, partialSchema[0].Type, metadataBuilder.ToMetadata()); return(schemaBuilder.ToSchema()); }
public static DataViewSchema Create(SchemaShape shape) { var builder = new DataViewSchema.Builder(); for (int i = 0; i < shape.Count; ++i) { var metaBuilder = new DataViewSchema.Metadata.Builder(); var partialMetadata = shape[i].Metadata; for (int j = 0; j < partialMetadata.Count; ++j) { var metaColumnType = MakeColumnType(partialMetadata[j]); Delegate del; if (metaColumnType is VectorType vectorType) { del = Utils.MarshalInvoke(GetDefaultVectorGetter <int>, vectorType.ItemType.RawType); } else { del = Utils.MarshalInvoke(GetDefaultGetter <int>, metaColumnType.RawType); } metaBuilder.Add(partialMetadata[j].Name, metaColumnType, del); } builder.AddColumn(shape[i].Name, MakeColumnType(shape[i]), metaBuilder.ToMetadata()); } return(builder.ToSchema()); }
/// <summary> /// Return a <see cref="DataViewSchema"/> which contains a single score column. /// </summary> /// <param name="scoreType">The type of the score column.</param> /// <param name="scoreColumnKindValue">The kind of the score column. It's the value of <see cref="MetadataUtils.Kinds.ScoreColumnKind"/> in the score column's metadata.</param> /// <param name="scoreColumnName">The score column's name in the generated <see cref="DataViewSchema"/>.</param> /// <returns><see cref="DataViewSchema"/> which contains only one column.</returns> public static DataViewSchema Create(DataViewType scoreType, string scoreColumnKindValue, string scoreColumnName = MetadataUtils.Const.ScoreValueKind.Score) { Contracts.CheckValue(scoreType, nameof(scoreType)); Contracts.CheckNonEmpty(scoreColumnKindValue, nameof(scoreColumnKindValue)); // Two metadata fields. One can set up by caller of this function while the other one is a constant. var metadataBuilder = new DataViewSchema.Metadata.Builder(); metadataBuilder.Add(MetadataUtils.Kinds.ScoreColumnKind, TextDataViewType.Instance, (ref ReadOnlyMemory <char> value) => { value = scoreColumnKindValue.AsMemory(); }); metadataBuilder.Add(MetadataUtils.Kinds.ScoreValueKind, TextDataViewType.Instance, (ref ReadOnlyMemory <char> value) => { value = MetadataUtils.Const.ScoreValueKind.Score.AsMemory(); }); // Build a schema consisting of a single column. var schemaBuilder = new DataViewSchema.Builder(); schemaBuilder.AddColumn(scoreColumnName, scoreType, metadataBuilder.ToMetadata()); return(schemaBuilder.ToSchema()); }
private void AddMetadata(int iinfo, DataViewSchema.Metadata.Builder builder) { builder.Add(InputSchema[_parent.ColumnPairs[iinfo].inputColumnName].Metadata, name => name == MetadataUtils.Kinds.SlotNames); ValueGetter <VBuffer <ReadOnlyMemory <char> > > getter = (ref VBuffer <ReadOnlyMemory <char> > dst) => { GetKeyValues(iinfo, ref dst); }; builder.AddKeyValues(CharsCount, TextDataViewType.Instance, getter); }
protected override DataViewSchema.DetachedColumn[] GetOutputColumnsCore() { var result = new DataViewSchema.DetachedColumn[_parent.ColumnPairs.Length]; for (int i = 0; i < _parent.ColumnPairs.Length; i++) { var builder = new DataViewSchema.Metadata.Builder(); builder.Add(InputSchema[ColMapNewToOld[i]].Metadata, x => x == MetadataUtils.Kinds.KeyValues || x == MetadataUtils.Kinds.IsNormalized); result[i] = new DataViewSchema.DetachedColumn(_parent.ColumnPairs[i].outputColumnName, _types[i], builder.ToMetadata()); } return(result); }
protected override DataViewSchema.DetachedColumn[] GetOutputColumnsCore() { var result = new DataViewSchema.DetachedColumn[_parent.ColumnPairs.Length]; for (int i = 0; i < _parent.ColumnPairs.Length; i++) { var meta = new DataViewSchema.Metadata.Builder(); meta.Add(InputSchema[ColMapNewToOld[i]].Metadata, name => name == MetadataUtils.Kinds.SlotNames); result[i] = new DataViewSchema.DetachedColumn(_parent.ColumnPairs[i].outputColumnName, _types[i], meta.ToMetadata()); } return(result); }
public UngroupBinding(IExceptionContext ectx, DataViewSchema inputSchema, UngroupMode mode, string[] pivotColumns) { Contracts.AssertValueOrNull(ectx); _ectx = ectx; _ectx.AssertValue(inputSchema); _ectx.AssertNonEmpty(pivotColumns); _inputSchema = inputSchema; // This also makes InputColumnCount valid. Mode = mode; Bind(_ectx, inputSchema, pivotColumns, out _infos); _pivotIndex = Utils.CreateArray(InputColumnCount, -1); for (int i = 0; i < _infos.Length; i++) { var info = _infos[i]; _ectx.Assert(_pivotIndex[info.Index] == -1); _pivotIndex[info.Index] = i; } var schemaBuilder = new DataViewSchema.Builder(); // Iterate through input columns. Input columns which are not pivot columns will be copied to output schema with the same column index unchanged. // Input columns which are pivot columns would also be copied but with different data types and different metadata. for (int i = 0; i < InputColumnCount; ++i) { if (_pivotIndex[i] < 0) { // i-th input column is not a pivot column. Let's do a naive copy. schemaBuilder.AddColumn(inputSchema[i].Name, inputSchema[i].Type, inputSchema[i].Metadata); } else { // i-th input column is a pivot column. Let's calculate proper type and metadata for it. var metadataBuilder = new DataViewSchema.Metadata.Builder(); metadataBuilder.Add(inputSchema[i].Metadata, metadataName => ShouldPreserveMetadata(metadataName)); // To explain the output type of pivot columns, let's consider a row // Age UserID // 18 {"Amy", "Willy"} // where "Age" and "UserID" are column names and 18/{"Amy", "Willy"} is "Age"/"UserID" column in this example row. // If the only pivot column is "UserID", the ungroup may produce // Age UserID // 18 "Amy" // 18 "Willy" // One can see that "UserID" column (in output data) has a type identical to the element's type of the "UserID" column in input data. schemaBuilder.AddColumn(inputSchema[i].Name, inputSchema[i].Type.GetItemType(), metadataBuilder.ToMetadata()); } } OutputSchema = schemaBuilder.ToSchema(); }
public FeatureNameCollectionBinding(FeatureNameCollection collection) { Contracts.CheckValue(collection, nameof(collection)); _collection = collection; _colType = new VectorType(NumberDataViewType.Single, collection.Count); _slotNamesType = new VectorType(TextDataViewType.Instance, collection.Count); var metadataBuilder = new DataViewSchema.Metadata.Builder(); metadataBuilder.Add(MetadataUtils.Kinds.SlotNames, _slotNamesType, (ref VBuffer <ReadOnlyMemory <char> > slotNames) => { GetSlotNames(0, ref slotNames); }); var schemaBuilder = new DataViewSchema.Builder(); schemaBuilder.AddColumn(RoleMappedSchema.ColumnRole.Feature.Value, _colType, metadataBuilder.ToMetadata()); FeatureNameCollectionSchema = schemaBuilder.ToSchema(); }
private static DataViewSchema CreateSchema(ColumnBindingsBase inputBindings) { Contracts.CheckValue(inputBindings, nameof(inputBindings)); var builder = new DataViewSchema.Builder(); for (int i = 0; i < inputBindings.ColumnCount; i++) { var meta = new DataViewSchema.Metadata.Builder(); foreach (var kvp in inputBindings.GetMetadataTypes(i)) { var getter = Utils.MarshalInvoke(GetMetadataGetterDelegate <int>, kvp.Value.RawType, inputBindings, i, kvp.Key); meta.Add(kvp.Key, kvp.Value, getter); } builder.AddColumn(inputBindings.GetColumnName(i), inputBindings.GetColumnType(i), meta.ToMetadata()); } return(builder.ToSchema()); }
void SimpleTest() { var metadataBuilder = new DataViewSchema.Metadata.Builder(); metadataBuilder.Add("M", NumberDataViewType.Single, (ref float v) => v = 484f); var schemaBuilder = new DataViewSchema.Builder(); schemaBuilder.AddColumn("A", new VectorType(NumberDataViewType.Single, 94)); schemaBuilder.AddColumn("B", new KeyType(typeof(uint), 17)); schemaBuilder.AddColumn("C", NumberDataViewType.Int32, metadataBuilder.ToMetadata()); var shape = SchemaShape.Create(schemaBuilder.ToSchema()); var fakeSchema = FakeSchemaFactory.Create(shape); var columnA = fakeSchema[0]; var columnB = fakeSchema[1]; var columnC = fakeSchema[2]; Assert.Equal("A", columnA.Name); Assert.Equal(NumberDataViewType.Single, columnA.Type.GetItemType()); Assert.Equal(10, columnA.Type.GetValueCount()); Assert.Equal("B", columnB.Name); Assert.Equal(InternalDataKind.U4, columnB.Type.GetRawKind()); Assert.Equal(10u, columnB.Type.GetKeyCount()); Assert.Equal("C", columnC.Name); Assert.Equal(NumberDataViewType.Int32, columnC.Type); var metaC = columnC.Metadata; Assert.Single(metaC.Schema); float mValue = -1; metaC.GetValue("M", ref mValue); Assert.Equal(default, mValue);
/// <summary> /// Add key values metadata. /// </summary> /// <typeparam name="TValue">The value type of key values.</typeparam> /// <param name="builder">The <see cref="DataViewSchema.Metadata.Builder"/> to which to add the key values.</param> /// <param name="size">The size of key values vector.</param> /// <param name="valueType">The value type of key values. Its raw type must match <typeparamref name="TValue"/>.</param> /// <param name="getter">The getter delegate for the key values.</param> public static void AddKeyValues <TValue>(this DataViewSchema.Metadata.Builder builder, int size, PrimitiveDataViewType valueType, ValueGetter <VBuffer <TValue> > getter) => builder.Add(MetadataUtils.Kinds.KeyValues, new VectorType(valueType, size), getter);
/// <summary> /// Add slot names metadata. /// </summary> /// <param name="builder">The <see cref="DataViewSchema.Metadata.Builder"/> to which to add the slot names.</param> /// <param name="size">The size of the slot names vector.</param> /// <param name="getter">The getter delegate for the slot names.</param> public static void AddSlotNames(this DataViewSchema.Metadata.Builder builder, int size, ValueGetter <VBuffer <ReadOnlyMemory <char> > > getter) => builder.Add(MetadataUtils.Kinds.SlotNames, new VectorType(TextDataViewType.Instance, size), getter);
private void HashTestCore <T>(T val, PrimitiveDataViewType type, uint expected, uint expectedOrdered, uint expectedOrdered3) { const int bits = 10; var builder = new DataViewSchema.Metadata.Builder(); builder.AddPrimitiveValue("Foo", type, val); var inRow = MetadataUtils.MetadataAsRow(builder.ToMetadata()); // First do an unordered hash. var info = new HashingEstimator.ColumnInfo("Bar", "Foo", hashBits: bits); var xf = new HashingTransformer(Env, new[] { info }); var mapper = ((ITransformer)xf).GetRowToRowMapper(inRow.Schema); mapper.OutputSchema.TryGetColumnIndex("Bar", out int outCol); var outRow = mapper.GetRow(inRow, c => c == outCol); var getter = outRow.GetGetter <uint>(outCol); uint result = 0; getter(ref result); Assert.Equal(expected, result); // Next do an ordered hash. info = new HashingEstimator.ColumnInfo("Bar", "Foo", hashBits: bits, ordered: true); xf = new HashingTransformer(Env, new[] { info }); mapper = ((ITransformer)xf).GetRowToRowMapper(inRow.Schema); mapper.OutputSchema.TryGetColumnIndex("Bar", out outCol); outRow = mapper.GetRow(inRow, c => c == outCol); getter = outRow.GetGetter <uint>(outCol); getter(ref result); Assert.Equal(expectedOrdered, result); // Next build up a vector to make sure that hashing is consistent between scalar values // at least in the first position, and in the unordered case, the last position. const int vecLen = 5; var denseVec = new VBuffer <T>(vecLen, Utils.CreateArray(vecLen, val)); builder = new DataViewSchema.Metadata.Builder(); builder.Add("Foo", new VectorType(type, vecLen), (ref VBuffer <T> dst) => denseVec.CopyTo(ref dst)); inRow = MetadataUtils.MetadataAsRow(builder.ToMetadata()); info = new HashingEstimator.ColumnInfo("Bar", "Foo", hashBits: bits, ordered: false); xf = new HashingTransformer(Env, new[] { info }); mapper = ((ITransformer)xf).GetRowToRowMapper(inRow.Schema); mapper.OutputSchema.TryGetColumnIndex("Bar", out outCol); outRow = mapper.GetRow(inRow, c => c == outCol); var vecGetter = outRow.GetGetter <VBuffer <uint> >(outCol); VBuffer <uint> vecResult = default; vecGetter(ref vecResult); Assert.Equal(vecLen, vecResult.Length); // They all should equal this in this case. Assert.All(vecResult.DenseValues(), v => Assert.Equal(expected, v)); // Now do ordered with the dense vector. info = new HashingEstimator.ColumnInfo("Bar", "Foo", hashBits: bits, ordered: true); xf = new HashingTransformer(Env, new[] { info }); mapper = ((ITransformer)xf).GetRowToRowMapper(inRow.Schema); mapper.OutputSchema.TryGetColumnIndex("Bar", out outCol); outRow = mapper.GetRow(inRow, c => c == outCol); vecGetter = outRow.GetGetter <VBuffer <uint> >(outCol); vecGetter(ref vecResult); Assert.Equal(vecLen, vecResult.Length); Assert.Equal(expectedOrdered, vecResult.GetItemOrDefault(0)); Assert.Equal(expectedOrdered3, vecResult.GetItemOrDefault(3)); Assert.All(vecResult.DenseValues(), v => Assert.True((v == 0) == (expectedOrdered == 0))); // Let's now do a sparse vector. var sparseVec = new VBuffer <T>(10, 3, Utils.CreateArray(3, val), new[] { 0, 3, 7 }); builder = new DataViewSchema.Metadata.Builder(); builder.Add("Foo", new VectorType(type, vecLen), (ref VBuffer <T> dst) => sparseVec.CopyTo(ref dst)); inRow = MetadataUtils.MetadataAsRow(builder.ToMetadata()); info = new HashingEstimator.ColumnInfo("Bar", "Foo", hashBits: bits, ordered: false); xf = new HashingTransformer(Env, new[] { info }); mapper = ((ITransformer)xf).GetRowToRowMapper(inRow.Schema); mapper.OutputSchema.TryGetColumnIndex("Bar", out outCol); outRow = mapper.GetRow(inRow, c => c == outCol); vecGetter = outRow.GetGetter <VBuffer <uint> >(outCol); vecGetter(ref vecResult); Assert.Equal(10, vecResult.Length); Assert.Equal(expected, vecResult.GetItemOrDefault(0)); Assert.Equal(expected, vecResult.GetItemOrDefault(3)); Assert.Equal(expected, vecResult.GetItemOrDefault(7)); info = new HashingEstimator.ColumnInfo("Bar", "Foo", hashBits: bits, ordered: true); xf = new HashingTransformer(Env, new[] { info }); mapper = ((ITransformer)xf).GetRowToRowMapper(inRow.Schema); mapper.OutputSchema.TryGetColumnIndex("Bar", out outCol); outRow = mapper.GetRow(inRow, c => c == outCol); vecGetter = outRow.GetGetter <VBuffer <uint> >(outCol); vecGetter(ref vecResult); Assert.Equal(10, vecResult.Length); Assert.Equal(expectedOrdered, vecResult.GetItemOrDefault(0)); Assert.Equal(expectedOrdered3, vecResult.GetItemOrDefault(3)); }
internal static DataViewSchema GetModelSchema(IExceptionContext ectx, TFGraph graph, string opType = null) { var schemaBuilder = new DataViewSchema.Builder(); foreach (var op in graph) { if (opType != null && opType != op.OpType) { continue; } var tfType = op[0].OutputType; // Determine element type in Tensorflow tensor. For example, a vector of floats may get NumberType.R4 here. var mlType = Tf2MlNetTypeOrNull(tfType); // If the type is not supported in ML.NET then we cannot represent it as a column in an Schema. // We also cannot output it with a TensorFlowTransform, so we skip it. // Furthermore, operators which have NumOutputs <= 0 needs to be filtered. // The 'GetTensorShape' method crashes TensorFlow runtime // (https://github.com/dotnet/machinelearning/issues/2156) when the operator has no outputs. if (mlType == null || op.NumOutputs <= 0) { continue; } // Construct the final ML.NET type of a Tensorflow variable. var tensorShape = graph.GetTensorShape(op[0]).ToIntArray(); var columnType = new VectorType(mlType); if (!(Utils.Size(tensorShape) == 1 && tensorShape[0] <= 0) && (Utils.Size(tensorShape) > 0 && tensorShape.Skip(1).All(x => x > 0))) { columnType = new VectorType(mlType, tensorShape[0] > 0 ? tensorShape : tensorShape.Skip(1).ToArray()); } // There can be at most two metadata fields. // 1. The first field always presents. Its value is this operator's type. For example, // if an output is produced by an "Softmax" operator, the value of this field should be "Softmax". // 2. The second field stores operators whose outputs are consumed by this operator. In other words, // these values are names of some upstream operators which should be evaluated before executing // the current operator. It's possible that one operator doesn't need any input, so this field // can be missing. var metadataBuilder = new DataViewSchema.Metadata.Builder(); // Create the first metadata field. metadataBuilder.Add(TensorflowOperatorTypeKind, TextDataViewType.Instance, (ref ReadOnlyMemory <char> value) => value = op.OpType.AsMemory()); if (op.NumInputs > 0) { // Put upstream operators' names to an array (type: VBuffer) of string (type: ReadOnlyMemory<char>). VBuffer <ReadOnlyMemory <char> > upstreamOperatorNames = default; var bufferEditor = VBufferEditor.Create(ref upstreamOperatorNames, op.NumInputs); for (int i = 0; i < op.NumInputs; ++i) { bufferEditor.Values[i] = op.GetInput(i).Operation.Name.AsMemory(); } upstreamOperatorNames = bufferEditor.Commit(); // Used in metadata's getter. // Create the second metadata field. metadataBuilder.Add(TensorflowUpstreamOperatorsKind, new VectorType(TextDataViewType.Instance, op.NumInputs), (ref VBuffer <ReadOnlyMemory <char> > value) => { upstreamOperatorNames.CopyTo(ref value); }); } schemaBuilder.AddColumn(op.Name, columnType, metadataBuilder.ToMetadata()); } return(schemaBuilder.ToSchema()); }
public BoundMapper(IExceptionContext ectx, TreeEnsembleFeaturizerBindableMapper owner, RoleMappedSchema schema) { Contracts.AssertValue(ectx); ectx.AssertValue(owner); ectx.AssertValue(schema); ectx.Assert(schema.Feature.HasValue); _ectx = ectx; _owner = owner; InputRoleMappedSchema = schema; // A vector containing the output of each tree on a given example. var treeValueType = new VectorType(NumberDataViewType.Single, owner._ensemble.TrainedEnsemble.NumTrees); // An indicator vector with length = the total number of leaves in the ensemble, indicating which leaf the example // ends up in all the trees in the ensemble. var leafIdType = new VectorType(NumberDataViewType.Single, owner._totalLeafCount); // An indicator vector with length = the total number of nodes in the ensemble, indicating the nodes on // the paths of the example in all the trees in the ensemble. // The total number of nodes in a binary tree is equal to the number of internal nodes + the number of leaf nodes, // and it is also equal to the number of children of internal nodes (which is 2 * the number of internal nodes) // plus one (since the root node is not a child of any node). So we have #internal + #leaf = 2*(#internal) + 1, // which means that #internal = #leaf - 1. // Therefore, the number of internal nodes in the ensemble is #leaf - #trees. var pathIdType = new VectorType(NumberDataViewType.Single, owner._totalLeafCount - owner._ensemble.TrainedEnsemble.NumTrees); // Start creating output schema with types derived above. var schemaBuilder = new DataViewSchema.Builder(); // Metadata of tree values. var treeIdMetadataBuilder = new DataViewSchema.Metadata.Builder(); treeIdMetadataBuilder.Add(MetadataUtils.Kinds.SlotNames, MetadataUtils.GetNamesType(treeValueType.Size), (ValueGetter <VBuffer <ReadOnlyMemory <char> > >)owner.GetTreeSlotNames); // Add the column of trees' output values schemaBuilder.AddColumn(OutputColumnNames.Trees, treeValueType, treeIdMetadataBuilder.ToMetadata()); // Metadata of leaf IDs. var leafIdMetadataBuilder = new DataViewSchema.Metadata.Builder(); leafIdMetadataBuilder.Add(MetadataUtils.Kinds.SlotNames, MetadataUtils.GetNamesType(leafIdType.Size), (ValueGetter <VBuffer <ReadOnlyMemory <char> > >)owner.GetLeafSlotNames); leafIdMetadataBuilder.Add(MetadataUtils.Kinds.IsNormalized, BooleanDataViewType.Instance, (ref bool value) => value = true); // Add the column of leaves' IDs where the input example reaches. schemaBuilder.AddColumn(OutputColumnNames.Leaves, leafIdType, leafIdMetadataBuilder.ToMetadata()); // Metadata of path IDs. var pathIdMetadataBuilder = new DataViewSchema.Metadata.Builder(); pathIdMetadataBuilder.Add(MetadataUtils.Kinds.SlotNames, MetadataUtils.GetNamesType(pathIdType.Size), (ValueGetter <VBuffer <ReadOnlyMemory <char> > >)owner.GetPathSlotNames); pathIdMetadataBuilder.Add(MetadataUtils.Kinds.IsNormalized, BooleanDataViewType.Instance, (ref bool value) => value = true); // Add the column of encoded paths which the input example passes. schemaBuilder.AddColumn(OutputColumnNames.Paths, pathIdType, pathIdMetadataBuilder.ToMetadata()); OutputSchema = schemaBuilder.ToSchema(); // Tree values must be the first output column. Contracts.Assert(OutputSchema[OutputColumnNames.Trees].Index == TreeValuesColumnId); // leaf IDs must be the second output column. Contracts.Assert(OutputSchema[OutputColumnNames.Leaves].Index == LeafIdsColumnId); // Path IDs must be the third output column. Contracts.Assert(OutputSchema[OutputColumnNames.Paths].Index == PathIdsColumnId); }