Example #1
0
        /// <summary>
        /// This is very similar to <see cref="Create(DataViewType, string, string)"/> but adds one extra metadata field to the only score column.
        /// </summary>
        /// <param name="scoreType">Output element's type of quantile regressor. Note that a quantile regressor can produce an array of <see cref="PrimitiveDataViewType"/>.</param>
        /// <param name="quantiles">Quantiles used in quantile regressor.</param>
        /// <returns><see cref="DataViewSchema"/> of quantile regressor's output.</returns>
        public static DataViewSchema CreateQuantileRegressionSchema(DataViewType scoreType, double[] quantiles)
        {
            Contracts.CheckValue(scoreType, nameof(scoreType));
            Contracts.CheckValue(scoreType as PrimitiveDataViewType, nameof(scoreType));
            Contracts.AssertValue(quantiles);

            // Create a schema using standard function. The produced schema will be modified by adding one metadata column.
            var partialSchema = Create(new VectorType(scoreType as PrimitiveDataViewType, quantiles.Length), MetadataUtils.Const.ScoreColumnKind.QuantileRegression);

            var metadataBuilder = new MetadataBuilder();

            // Add the extra metadata.
            metadataBuilder.AddSlotNames(quantiles.Length, (ref VBuffer <ReadOnlyMemory <char> > value) =>
            {
                var bufferEditor = VBufferEditor.Create(ref value, quantiles.Length);
                for (int i = 0; i < quantiles.Length; ++i)
                {
                    bufferEditor.Values[i] = string.Format("Quantile-{0}", quantiles[i]).AsMemory();
                }
                value = bufferEditor.Commit();
            });
            // Copy default metadata from the partial schema.
            metadataBuilder.Add(partialSchema[0].Metadata, (string kind) => true);

            // Build a schema consisting of a single column. Comparing with partial schema, the only difference is a metadata field.
            var schemaBuilder = new SchemaBuilder();

            schemaBuilder.AddColumn(partialSchema[0].Name, partialSchema[0].Type, metadataBuilder.GetMetadata());

            return(schemaBuilder.GetSchema());
        }
            public DataView(IHostEnvironment env, ArrayDataViewBuilder builder, int rowCount)
            {
                Contracts.AssertValue(env, "env");
                _host = env.Register("ArrayDataView");

                _host.AssertValue(builder);
                _host.Assert(rowCount >= 0);
                _host.Assert(builder._names.Count == builder._columns.Count);
                _columns = builder._columns.ToArray();

                var schemaBuilder = new SchemaBuilder();

                for (int i = 0; i < _columns.Length; i++)
                {
                    var meta = new MetadataBuilder();

                    if (builder._getSlotNames.TryGetValue(builder._names[i], out var slotNamesGetter))
                    {
                        meta.AddSlotNames(_columns[i].Type.GetVectorSize(), slotNamesGetter);
                    }

                    if (builder._getKeyValues.TryGetValue(builder._names[i], out var keyValueGetter))
                    {
                        meta.AddKeyValues(_columns[i].Type.GetKeyCount(), TextType.Instance, keyValueGetter);
                    }
                    schemaBuilder.AddColumn(builder._names[i], _columns[i].Type, meta.GetMetadata());
                }

                _schema   = schemaBuilder.GetSchema();
                _rowCount = rowCount;
            }
Example #3
0
        /// <summary>
        /// Create a <see cref="Schema"/> with two columns for binary classifier. The first column, indexed by 0, is the score column.
        /// The second column is the probability column. For example, for linear support vector machine, score column stands for the inner product
        /// of linear coefficients and the input feature vector and we convert score column to probability column using a calibrator.
        /// </summary>
        /// <param name="scoreColumnName">Column name of score column</param>
        /// <param name="probabilityColumnName">Column name of probability column</param>
        /// <returns><see cref="Schema"/> of binary classifier's output.</returns>
        public static Schema CreateBinaryClassificationSchema(string scoreColumnName       = MetadataUtils.Const.ScoreValueKind.Score,
                                                              string probabilityColumnName = MetadataUtils.Const.ScoreValueKind.Probability)
        {
            // Schema of Score column. We are going to extend it by adding a Probability column.
            var partialSchema = Create(NumberType.Float, MetadataUtils.Const.ScoreColumnKind.BinaryClassification, scoreColumnName);

            var schemaBuilder = new SchemaBuilder();

            // Copy Score column from partialSchema.
            schemaBuilder.AddColumn(partialSchema[0].Name, partialSchema[0].Type, partialSchema[0].Metadata);

            // Create Probability column's metadata.
            var probabilityMetadataBuilder = new MetadataBuilder();

            probabilityMetadataBuilder.Add(MetadataUtils.Kinds.IsNormalized, BoolType.Instance, (ref bool value) => { value = true; });
            probabilityMetadataBuilder.Add(MetadataUtils.Kinds.ScoreColumnKind, TextType.Instance,
                                           (ref ReadOnlyMemory <char> value) => { value = MetadataUtils.Const.ScoreColumnKind.BinaryClassification.AsMemory(); });
            probabilityMetadataBuilder.Add(MetadataUtils.Kinds.ScoreValueKind, TextType.Instance,
                                           (ref ReadOnlyMemory <char> value) => { value = MetadataUtils.Const.ScoreValueKind.Probability.AsMemory(); });

            // Add probability column.
            schemaBuilder.AddColumn(probabilityColumnName, NumberType.Float, probabilityMetadataBuilder.GetMetadata());

            return(schemaBuilder.GetSchema());
        }
Example #4
0
        /// <summary>
        /// Return a <see cref="DataViewSchema"/> which contains a single score column.
        /// </summary>
        /// <param name="scoreType">The type of the score column.</param>
        /// <param name="scoreColumnKindValue">The kind of the score column. It's the value of <see cref="MetadataUtils.Kinds.ScoreColumnKind"/> in the score column's metadata.</param>
        /// <param name="scoreColumnName">The score column's name in the generated <see cref="DataViewSchema"/>.</param>
        /// <returns><see cref="DataViewSchema"/> which contains only one column.</returns>
        public static DataViewSchema Create(DataViewType scoreType, string scoreColumnKindValue, string scoreColumnName = MetadataUtils.Const.ScoreValueKind.Score)
        {
            Contracts.CheckValue(scoreType, nameof(scoreType));
            Contracts.CheckNonEmpty(scoreColumnKindValue, nameof(scoreColumnKindValue));

            // Two metadata fields. One can set up by caller of this function while the other one is a constant.
            var metadataBuilder = new MetadataBuilder();

            metadataBuilder.Add(MetadataUtils.Kinds.ScoreColumnKind, TextDataViewType.Instance,
                                (ref ReadOnlyMemory <char> value) => { value = scoreColumnKindValue.AsMemory(); });
            metadataBuilder.Add(MetadataUtils.Kinds.ScoreValueKind, TextDataViewType.Instance,
                                (ref ReadOnlyMemory <char> value) => { value = MetadataUtils.Const.ScoreValueKind.Score.AsMemory(); });

            // Build a schema consisting of a single column.
            var schemaBuilder = new SchemaBuilder();

            schemaBuilder.AddColumn(scoreColumnName, scoreType, metadataBuilder.GetMetadata());

            return(schemaBuilder.GetSchema());
        }
Example #5
0
        private static DataViewSchema CreateSchema(ColumnBindingsBase inputBindings)
        {
            Contracts.CheckValue(inputBindings, nameof(inputBindings));

            var builder = new SchemaBuilder();

            for (int i = 0; i < inputBindings.ColumnCount; i++)
            {
                var meta = new MetadataBuilder();
                foreach (var kvp in inputBindings.GetMetadataTypes(i))
                {
                    var getter = Utils.MarshalInvoke(GetMetadataGetterDelegate <int>, kvp.Value.RawType, inputBindings, i, kvp.Key);
                    meta.Add(kvp.Key, kvp.Value, getter);
                }
                builder.AddColumn(inputBindings.GetColumnName(i), inputBindings.GetColumnType(i), meta.GetMetadata());
            }

            return(builder.GetSchema());
        }
            public BoundMapper(IExceptionContext ectx, TreeEnsembleFeaturizerBindableMapper owner,
                               RoleMappedSchema schema)
            {
                Contracts.AssertValue(ectx);
                ectx.AssertValue(owner);
                ectx.AssertValue(schema);
                ectx.Assert(schema.Feature.HasValue);

                _ectx = ectx;

                _owner = owner;
                InputRoleMappedSchema = schema;

                // A vector containing the output of each tree on a given example.
                var treeValueType = new VectorType(NumberType.Float, owner._ensemble.TrainedEnsemble.NumTrees);
                // An indicator vector with length = the total number of leaves in the ensemble, indicating which leaf the example
                // ends up in all the trees in the ensemble.
                var leafIdType = new VectorType(NumberType.Float, owner._totalLeafCount);
                // An indicator vector with length = the total number of nodes in the ensemble, indicating the nodes on
                // the paths of the example in all the trees in the ensemble.
                // The total number of nodes in a binary tree is equal to the number of internal nodes + the number of leaf nodes,
                // and it is also equal to the number of children of internal nodes (which is 2 * the number of internal nodes)
                // plus one (since the root node is not a child of any node). So we have #internal + #leaf = 2*(#internal) + 1,
                // which means that #internal = #leaf - 1.
                // Therefore, the number of internal nodes in the ensemble is #leaf - #trees.
                var pathIdType = new VectorType(NumberType.Float, owner._totalLeafCount - owner._ensemble.TrainedEnsemble.NumTrees);

                // Start creating output schema with types derived above.
                var schemaBuilder = new SchemaBuilder();

                // Metadata of tree values.
                var treeIdMetadataBuilder = new MetadataBuilder();

                treeIdMetadataBuilder.Add(MetadataUtils.Kinds.SlotNames, MetadataUtils.GetNamesType(treeValueType.Size),
                                          (ValueGetter <VBuffer <ReadOnlyMemory <char> > >)owner.GetTreeSlotNames);
                // Add the column of trees' output values
                schemaBuilder.AddColumn(OutputColumnNames.Trees, treeValueType, treeIdMetadataBuilder.GetMetadata());

                // Metadata of leaf IDs.
                var leafIdMetadataBuilder = new MetadataBuilder();

                leafIdMetadataBuilder.Add(MetadataUtils.Kinds.SlotNames, MetadataUtils.GetNamesType(leafIdType.Size),
                                          (ValueGetter <VBuffer <ReadOnlyMemory <char> > >)owner.GetLeafSlotNames);
                leafIdMetadataBuilder.Add(MetadataUtils.Kinds.IsNormalized, BoolType.Instance, (ref bool value) => value = true);
                // Add the column of leaves' IDs where the input example reaches.
                schemaBuilder.AddColumn(OutputColumnNames.Leaves, leafIdType, leafIdMetadataBuilder.GetMetadata());

                // Metadata of path IDs.
                var pathIdMetadataBuilder = new MetadataBuilder();

                pathIdMetadataBuilder.Add(MetadataUtils.Kinds.SlotNames, MetadataUtils.GetNamesType(pathIdType.Size),
                                          (ValueGetter <VBuffer <ReadOnlyMemory <char> > >)owner.GetPathSlotNames);
                pathIdMetadataBuilder.Add(MetadataUtils.Kinds.IsNormalized, BoolType.Instance, (ref bool value) => value = true);
                // Add the column of encoded paths which the input example passes.
                schemaBuilder.AddColumn(OutputColumnNames.Paths, pathIdType, pathIdMetadataBuilder.GetMetadata());

                OutputSchema = schemaBuilder.GetSchema();

                // Tree values must be the first output column.
                Contracts.Assert(OutputSchema[OutputColumnNames.Trees].Index == TreeValuesColumnId);
                // leaf IDs must be the second output column.
                Contracts.Assert(OutputSchema[OutputColumnNames.Leaves].Index == LeafIdsColumnId);
                // Path IDs must be the third output column.
                Contracts.Assert(OutputSchema[OutputColumnNames.Paths].Index == PathIdsColumnId);
            }
Example #7
0
        /// <summary>
        /// This function returns a schema for sequence predictor's output. Its output column is always called <see cref="MetadataUtils.Const.ScoreValueKind.PredictedLabel"/>.
        /// </summary>
        /// <param name="scoreType">Score column's type produced by sequence predictor.</param>
        /// <param name="scoreColumnKindValue">A metadata value of score column. It's the value associated with key
        /// <see cref="MetadataUtils.Kinds.ScoreColumnKind"/>.</param>
        /// <param name="keyNames">Sequence predictor usually generates integer outputs. This field tells the tags of all possible output values.
        /// For example, output integer 0 cound be mapped to "Sell" and 0 to "Buy" when predicting stock trend.</param>
        /// <returns><see cref="DataViewSchema"/> of sequence predictor's output.</returns>
        public static DataViewSchema CreateSequencePredictionSchema(DataViewType scoreType, string scoreColumnKindValue, VBuffer <ReadOnlyMemory <char> > keyNames = default)
        {
            Contracts.CheckValue(scoreType, nameof(scoreType));
            Contracts.CheckValue(scoreColumnKindValue, nameof(scoreColumnKindValue));

            var metadataBuilder = new MetadataBuilder();

            // Add metadata columns including their getters. We starts with key names of predicted keys if they exist.
            if (keyNames.Length > 0)
            {
                metadataBuilder.AddKeyValues(keyNames.Length, TextDataViewType.Instance,
                                             (ref VBuffer <ReadOnlyMemory <char> > value) => value = keyNames);
            }
            metadataBuilder.Add(MetadataUtils.Kinds.ScoreColumnKind, TextDataViewType.Instance,
                                (ref ReadOnlyMemory <char> value) => value = scoreColumnKindValue.AsMemory());
            metadataBuilder.Add(MetadataUtils.Kinds.ScoreValueKind, TextDataViewType.Instance,
                                (ref ReadOnlyMemory <char> value) => value = MetadataUtils.Const.ScoreValueKind.PredictedLabel.AsMemory());

            // Build a schema consisting of a single column.
            var schemaBuilder = new SchemaBuilder();

            schemaBuilder.AddColumn(MetadataUtils.Const.ScoreValueKind.PredictedLabel, scoreType, metadataBuilder.GetMetadata());

            return(schemaBuilder.GetSchema());
        }
                /// <summary>
                /// Append label names to score column as its metadata.
                /// </summary>
                private DataViewSchema DecorateOutputSchema(DataViewSchema partialSchema, int scoreColumnIndex, VectorType labelNameType,
                                                            ValueGetter <VBuffer <T> > labelNameGetter, string labelNameKind)
                {
                    var builder = new SchemaBuilder();

                    // Sequentially add columns so that the order of them is not changed comparing with the schema in the mapper
                    // that computes score column.
                    for (int i = 0; i < partialSchema.Count; ++i)
                    {
                        var meta = new MetadataBuilder();
                        if (i == scoreColumnIndex)
                        {
                            // Add label names for score column.
                            meta.Add(partialSchema[i].Metadata, selector: s => s != labelNameKind);
                            meta.Add(labelNameKind, labelNameType, labelNameGetter);
                        }
                        else
                        {
                            // Copy all existing metadata because this transform only affects score column.
                            meta.Add(partialSchema[i].Metadata, selector: s => true);
                        }
                        // Instead of appending extra metadata to the existing score column, we create new one because
                        // metadata is read-only.
                        builder.AddColumn(partialSchema[i].Name, partialSchema[i].Type, meta.GetMetadata());
                    }
                    return(builder.GetSchema());
                }
Example #9
0
            public RowMapper(IHostEnvironment env, BindableMapper parent, RoleMappedSchema schema)
            {
                Contracts.AssertValue(env);
                _env = env;
                _env.AssertValue(schema);
                _env.AssertValue(parent);
                _env.Assert(schema.Feature.HasValue);
                _parent = parent;
                InputRoleMappedSchema = schema;
                var genericMapper = parent.GenericMapper.Bind(_env, schema);

                _genericRowMapper = genericMapper as ISchemaBoundRowMapper;
                var featureSize = FeatureColumn.Type.GetVectorSize();

                if (parent.Stringify)
                {
                    var builder = new SchemaBuilder();
                    builder.AddColumn(DefaultColumnNames.FeatureContributions, TextType.Instance, null);
                    _outputSchema = builder.GetSchema();
                    if (FeatureColumn.HasSlotNames(featureSize))
                    {
                        FeatureColumn.Metadata.GetValue(MetadataUtils.Kinds.SlotNames, ref _slotNames);
                    }
                    else
                    {
                        _slotNames = VBufferUtils.CreateEmpty <ReadOnlyMemory <char> >(featureSize);
                    }
                }
                else
                {
                    var metadataBuilder = new MetadataBuilder();
                    if (InputSchema[FeatureColumn.Index].HasSlotNames(featureSize))
                    {
                        metadataBuilder.AddSlotNames(featureSize, (ref VBuffer <ReadOnlyMemory <char> > value) =>
                                                     FeatureColumn.Metadata.GetValue(MetadataUtils.Kinds.SlotNames, ref value));
                    }

                    var schemaBuilder           = new SchemaBuilder();
                    var featureContributionType = new VectorType(NumberType.R4, FeatureColumn.Type as VectorType);
                    schemaBuilder.AddColumn(DefaultColumnNames.FeatureContributions, featureContributionType, metadataBuilder.GetMetadata());
                    _outputSchema = schemaBuilder.GetSchema();
                }

                _outputGenericSchema = _genericRowMapper.OutputSchema;
                OutputSchema         = new ZipBinding(new Schema[] { _outputGenericSchema, _outputSchema, }).OutputSchema;
            }
Example #10
0
        internal static Schema Create(ISchema inputSchema)
        {
            Contracts.CheckValue(inputSchema, nameof(inputSchema));

            if (inputSchema is Schema s)
            {
                return(s);
            }

            var builder = new SchemaBuilder();

            for (int i = 0; i < inputSchema.ColumnCount; i++)
            {
                var meta = new MetadataBuilder();
                foreach (var kvp in inputSchema.GetMetadataTypes(i))
                {
                    var getter = Utils.MarshalInvoke(GetMetadataGetterDelegate <int>, kvp.Value.RawType, inputSchema, i, kvp.Key);
                    meta.Add(kvp.Key, kvp.Value, getter);
                }
                builder.AddColumn(inputSchema.GetColumnName(i), inputSchema.GetColumnType(i), meta.GetMetadata());
            }

            return(builder.GetSchema());
        }