コード例 #1
0
        /// <summary>
        /// Create a <see cref="DataViewSchema"/> with two columns for binary classifier. The first column, indexed by 0, is the score column.
        /// The second column is the probability column. For example, for linear support vector machine, score column stands for the inner product
        /// of linear coefficients and the input feature vector and we convert score column to probability column using a calibrator.
        /// </summary>
        /// <param name="scoreColumnName">Column name of score column</param>
        /// <param name="probabilityColumnName">Column name of probability column</param>
        /// <returns><see cref="DataViewSchema"/> of binary classifier's output.</returns>
        public static DataViewSchema CreateBinaryClassificationSchema(string scoreColumnName       = MetadataUtils.Const.ScoreValueKind.Score,
                                                                      string probabilityColumnName = MetadataUtils.Const.ScoreValueKind.Probability)
        {
            // Schema of Score column. We are going to extend it by adding a Probability column.
            var partialSchema = Create(NumberDataViewType.Single, MetadataUtils.Const.ScoreColumnKind.BinaryClassification, scoreColumnName);

            var schemaBuilder = new DataViewSchema.Builder();

            // Copy Score column from partialSchema.
            schemaBuilder.AddColumn(partialSchema[0].Name, partialSchema[0].Type, partialSchema[0].Metadata);

            // Create Probability column's metadata.
            var probabilityMetadataBuilder = new DataViewSchema.Metadata.Builder();

            probabilityMetadataBuilder.Add(MetadataUtils.Kinds.IsNormalized, BooleanDataViewType.Instance, (ref bool value) => { value = true; });
            probabilityMetadataBuilder.Add(MetadataUtils.Kinds.ScoreColumnKind, TextDataViewType.Instance,
                                           (ref ReadOnlyMemory <char> value) => { value = MetadataUtils.Const.ScoreColumnKind.BinaryClassification.AsMemory(); });
            probabilityMetadataBuilder.Add(MetadataUtils.Kinds.ScoreValueKind, TextDataViewType.Instance,
                                           (ref ReadOnlyMemory <char> value) => { value = MetadataUtils.Const.ScoreValueKind.Probability.AsMemory(); });

            // Add probability column.
            schemaBuilder.AddColumn(probabilityColumnName, NumberDataViewType.Single, probabilityMetadataBuilder.ToMetadata());

            return(schemaBuilder.ToSchema());
        }
コード例 #2
0
                /// <summary>
                /// Append label names to score column as its metadata.
                /// </summary>
                private DataViewSchema DecorateOutputSchema(DataViewSchema partialSchema, int scoreColumnIndex, VectorType labelNameType,
                                                            ValueGetter <VBuffer <T> > labelNameGetter, string labelNameKind)
                {
                    var builder = new DataViewSchema.Builder();

                    // Sequentially add columns so that the order of them is not changed comparing with the schema in the mapper
                    // that computes score column.
                    for (int i = 0; i < partialSchema.Count; ++i)
                    {
                        var meta = new DataViewSchema.Metadata.Builder();
                        if (i == scoreColumnIndex)
                        {
                            // Add label names for score column.
                            meta.Add(partialSchema[i].Metadata, selector: s => s != labelNameKind);
                            meta.Add(labelNameKind, labelNameType, labelNameGetter);
                        }
                        else
                        {
                            // Copy all existing metadata because this transform only affects score column.
                            meta.Add(partialSchema[i].Metadata, selector: s => true);
                        }
                        // Instead of appending extra metadata to the existing score column, we create new one because
                        // metadata is read-only.
                        builder.AddColumn(partialSchema[i].Name, partialSchema[i].Type, meta.ToMetadata());
                    }
                    return(builder.ToSchema());
                }
コード例 #3
0
        /// <summary>
        /// This function returns a schema for sequence predictor's output. Its output column is always called <see cref="MetadataUtils.Const.ScoreValueKind.PredictedLabel"/>.
        /// </summary>
        /// <param name="scoreType">Score column's type produced by sequence predictor.</param>
        /// <param name="scoreColumnKindValue">A metadata value of score column. It's the value associated with key
        /// <see cref="MetadataUtils.Kinds.ScoreColumnKind"/>.</param>
        /// <param name="keyNames">Sequence predictor usually generates integer outputs. This field tells the tags of all possible output values.
        /// For example, output integer 0 cound be mapped to "Sell" and 0 to "Buy" when predicting stock trend.</param>
        /// <returns><see cref="DataViewSchema"/> of sequence predictor's output.</returns>
        public static DataViewSchema CreateSequencePredictionSchema(DataViewType scoreType, string scoreColumnKindValue, VBuffer <ReadOnlyMemory <char> > keyNames = default)
        {
            Contracts.CheckValue(scoreType, nameof(scoreType));
            Contracts.CheckValue(scoreColumnKindValue, nameof(scoreColumnKindValue));

            var metadataBuilder = new DataViewSchema.Metadata.Builder();

            // Add metadata columns including their getters. We starts with key names of predicted keys if they exist.
            if (keyNames.Length > 0)
            {
                metadataBuilder.AddKeyValues(keyNames.Length, TextDataViewType.Instance,
                                             (ref VBuffer <ReadOnlyMemory <char> > value) => value = keyNames);
            }
            metadataBuilder.Add(MetadataUtils.Kinds.ScoreColumnKind, TextDataViewType.Instance,
                                (ref ReadOnlyMemory <char> value) => value = scoreColumnKindValue.AsMemory());
            metadataBuilder.Add(MetadataUtils.Kinds.ScoreValueKind, TextDataViewType.Instance,
                                (ref ReadOnlyMemory <char> value) => value = MetadataUtils.Const.ScoreValueKind.PredictedLabel.AsMemory());

            // Build a schema consisting of a single column.
            var schemaBuilder = new DataViewSchema.Builder();

            schemaBuilder.AddColumn(MetadataUtils.Const.ScoreValueKind.PredictedLabel, scoreType, metadataBuilder.ToMetadata());

            return(schemaBuilder.ToSchema());
        }
コード例 #4
0
            /// <summary>
            /// Compute the output schema of a <see cref="GroupTransform"/> given a input schema.
            /// </summary>
            /// <param name="sourceSchema">Input schema.</param>
            /// <returns>The associated output schema produced by <see cref="GroupTransform"/>.</returns>
            private DataViewSchema BuildOutputSchema(DataViewSchema sourceSchema)
            {
                // Create schema build. We will sequentially add group columns and then aggregated columns.
                var schemaBuilder = new DataViewSchema.Builder();

                // Handle group(-key) columns. Those columns are used as keys to partition rows in the input data; specifically,
                // rows with the same key value will be merged into one row in the output data.
                foreach (var groupKeyColumnName in _groupColumns)
                {
                    schemaBuilder.AddColumn(groupKeyColumnName, sourceSchema[groupKeyColumnName].Type, sourceSchema[groupKeyColumnName].Metadata);
                }

                // Handle aggregated (aka keep) columns.
                foreach (var groupValueColumnName in _keepColumns)
                {
                    // Prepare column's metadata.
                    var metadataBuilder = new DataViewSchema.Metadata.Builder();
                    metadataBuilder.Add(sourceSchema[groupValueColumnName].Metadata,
                                        s => s == MetadataUtils.Kinds.IsNormalized || s == MetadataUtils.Kinds.KeyValues);

                    // Prepare column's type.
                    var aggregatedValueType = sourceSchema[groupValueColumnName].Type as PrimitiveDataViewType;
                    _ectx.CheckValue(aggregatedValueType, nameof(aggregatedValueType), "Columns being aggregated must be primitive types such as string, float, or integer");
                    var aggregatedResultType = new VectorType(aggregatedValueType);

                    // Add column into output schema.
                    schemaBuilder.AddColumn(groupValueColumnName, aggregatedResultType, metadataBuilder.ToMetadata());
                }

                return(schemaBuilder.ToSchema());
            }
コード例 #5
0
        /// <summary>
        /// This is very similar to <see cref="Create(DataViewType, string, string)"/> but adds one extra metadata field to the only score column.
        /// </summary>
        /// <param name="scoreType">Output element's type of quantile regressor. Note that a quantile regressor can produce an array of <see cref="PrimitiveDataViewType"/>.</param>
        /// <param name="quantiles">Quantiles used in quantile regressor.</param>
        /// <returns><see cref="DataViewSchema"/> of quantile regressor's output.</returns>
        public static DataViewSchema CreateQuantileRegressionSchema(DataViewType scoreType, double[] quantiles)
        {
            Contracts.CheckValue(scoreType, nameof(scoreType));
            Contracts.CheckValue(scoreType as PrimitiveDataViewType, nameof(scoreType));
            Contracts.AssertValue(quantiles);

            // Create a schema using standard function. The produced schema will be modified by adding one metadata column.
            var partialSchema = Create(new VectorType(scoreType as PrimitiveDataViewType, quantiles.Length), MetadataUtils.Const.ScoreColumnKind.QuantileRegression);

            var metadataBuilder = new DataViewSchema.Metadata.Builder();

            // Add the extra metadata.
            metadataBuilder.AddSlotNames(quantiles.Length, (ref VBuffer <ReadOnlyMemory <char> > value) =>
            {
                var bufferEditor = VBufferEditor.Create(ref value, quantiles.Length);
                for (int i = 0; i < quantiles.Length; ++i)
                {
                    bufferEditor.Values[i] = string.Format("Quantile-{0}", quantiles[i]).AsMemory();
                }
                value = bufferEditor.Commit();
            });
            // Copy default metadata from the partial schema.
            metadataBuilder.Add(partialSchema[0].Metadata, (string kind) => true);

            // Build a schema consisting of a single column. Comparing with partial schema, the only difference is a metadata field.
            var schemaBuilder = new DataViewSchema.Builder();

            schemaBuilder.AddColumn(partialSchema[0].Name, partialSchema[0].Type, metadataBuilder.ToMetadata());

            return(schemaBuilder.ToSchema());
        }
コード例 #6
0
        public static DataViewSchema Create(SchemaShape shape)
        {
            var builder = new DataViewSchema.Builder();

            for (int i = 0; i < shape.Count; ++i)
            {
                var metaBuilder     = new DataViewSchema.Metadata.Builder();
                var partialMetadata = shape[i].Metadata;
                for (int j = 0; j < partialMetadata.Count; ++j)
                {
                    var      metaColumnType = MakeColumnType(partialMetadata[j]);
                    Delegate del;
                    if (metaColumnType is VectorType vectorType)
                    {
                        del = Utils.MarshalInvoke(GetDefaultVectorGetter <int>, vectorType.ItemType.RawType);
                    }
                    else
                    {
                        del = Utils.MarshalInvoke(GetDefaultGetter <int>, metaColumnType.RawType);
                    }
                    metaBuilder.Add(partialMetadata[j].Name, metaColumnType, del);
                }
                builder.AddColumn(shape[i].Name, MakeColumnType(shape[i]), metaBuilder.ToMetadata());
            }
            return(builder.ToSchema());
        }
コード例 #7
0
        /// <summary>
        /// Return a <see cref="DataViewSchema"/> which contains a single score column.
        /// </summary>
        /// <param name="scoreType">The type of the score column.</param>
        /// <param name="scoreColumnKindValue">The kind of the score column. It's the value of <see cref="MetadataUtils.Kinds.ScoreColumnKind"/> in the score column's metadata.</param>
        /// <param name="scoreColumnName">The score column's name in the generated <see cref="DataViewSchema"/>.</param>
        /// <returns><see cref="DataViewSchema"/> which contains only one column.</returns>
        public static DataViewSchema Create(DataViewType scoreType, string scoreColumnKindValue, string scoreColumnName = MetadataUtils.Const.ScoreValueKind.Score)
        {
            Contracts.CheckValue(scoreType, nameof(scoreType));
            Contracts.CheckNonEmpty(scoreColumnKindValue, nameof(scoreColumnKindValue));

            // Two metadata fields. One can set up by caller of this function while the other one is a constant.
            var metadataBuilder = new DataViewSchema.Metadata.Builder();

            metadataBuilder.Add(MetadataUtils.Kinds.ScoreColumnKind, TextDataViewType.Instance,
                                (ref ReadOnlyMemory <char> value) => { value = scoreColumnKindValue.AsMemory(); });
            metadataBuilder.Add(MetadataUtils.Kinds.ScoreValueKind, TextDataViewType.Instance,
                                (ref ReadOnlyMemory <char> value) => { value = MetadataUtils.Const.ScoreValueKind.Score.AsMemory(); });

            // Build a schema consisting of a single column.
            var schemaBuilder = new DataViewSchema.Builder();

            schemaBuilder.AddColumn(scoreColumnName, scoreType, metadataBuilder.ToMetadata());

            return(schemaBuilder.ToSchema());
        }
コード例 #8
0
            private void AddMetadata(int iinfo, DataViewSchema.Metadata.Builder builder)
            {
                builder.Add(InputSchema[_parent.ColumnPairs[iinfo].inputColumnName].Metadata, name => name == MetadataUtils.Kinds.SlotNames);
                ValueGetter <VBuffer <ReadOnlyMemory <char> > > getter =
                    (ref VBuffer <ReadOnlyMemory <char> > dst) =>
                {
                    GetKeyValues(iinfo, ref dst);
                };

                builder.AddKeyValues(CharsCount, TextDataViewType.Instance, getter);
            }
コード例 #9
0
            protected override DataViewSchema.DetachedColumn[] GetOutputColumnsCore()
            {
                var result = new DataViewSchema.DetachedColumn[_parent.ColumnPairs.Length];

                for (int i = 0; i < _parent.ColumnPairs.Length; i++)
                {
                    var builder = new DataViewSchema.Metadata.Builder();
                    builder.Add(InputSchema[ColMapNewToOld[i]].Metadata, x => x == MetadataUtils.Kinds.KeyValues || x == MetadataUtils.Kinds.IsNormalized);
                    result[i] = new DataViewSchema.DetachedColumn(_parent.ColumnPairs[i].outputColumnName, _types[i], builder.ToMetadata());
                }
                return(result);
            }
コード例 #10
0
            protected override DataViewSchema.DetachedColumn[] GetOutputColumnsCore()
            {
                var result = new DataViewSchema.DetachedColumn[_parent.ColumnPairs.Length];

                for (int i = 0; i < _parent.ColumnPairs.Length; i++)
                {
                    var meta = new DataViewSchema.Metadata.Builder();
                    meta.Add(InputSchema[ColMapNewToOld[i]].Metadata, name => name == MetadataUtils.Kinds.SlotNames);
                    result[i] = new DataViewSchema.DetachedColumn(_parent.ColumnPairs[i].outputColumnName, _types[i], meta.ToMetadata());
                }
                return(result);
            }
コード例 #11
0
            public UngroupBinding(IExceptionContext ectx, DataViewSchema inputSchema, UngroupMode mode, string[] pivotColumns)
            {
                Contracts.AssertValueOrNull(ectx);
                _ectx = ectx;
                _ectx.AssertValue(inputSchema);
                _ectx.AssertNonEmpty(pivotColumns);

                _inputSchema = inputSchema; // This also makes InputColumnCount valid.
                Mode         = mode;

                Bind(_ectx, inputSchema, pivotColumns, out _infos);

                _pivotIndex = Utils.CreateArray(InputColumnCount, -1);
                for (int i = 0; i < _infos.Length; i++)
                {
                    var info = _infos[i];
                    _ectx.Assert(_pivotIndex[info.Index] == -1);
                    _pivotIndex[info.Index] = i;
                }

                var schemaBuilder = new DataViewSchema.Builder();

                // Iterate through input columns. Input columns which are not pivot columns will be copied to output schema with the same column index unchanged.
                // Input columns which are pivot columns would also be copied but with different data types and different metadata.
                for (int i = 0; i < InputColumnCount; ++i)
                {
                    if (_pivotIndex[i] < 0)
                    {
                        // i-th input column is not a pivot column. Let's do a naive copy.
                        schemaBuilder.AddColumn(inputSchema[i].Name, inputSchema[i].Type, inputSchema[i].Metadata);
                    }
                    else
                    {
                        // i-th input column is a pivot column. Let's calculate proper type and metadata for it.
                        var metadataBuilder = new DataViewSchema.Metadata.Builder();
                        metadataBuilder.Add(inputSchema[i].Metadata, metadataName => ShouldPreserveMetadata(metadataName));
                        // To explain the output type of pivot columns, let's consider a row
                        //   Age UserID
                        //   18  {"Amy", "Willy"}
                        // where "Age" and "UserID" are column names and 18/{"Amy", "Willy"} is "Age"/"UserID" column in this example row.
                        // If the only pivot column is "UserID", the ungroup may produce
                        //   Age UserID
                        //   18  "Amy"
                        //   18  "Willy"
                        // One can see that "UserID" column (in output data) has a type identical to the element's type of the "UserID" column in input data.
                        schemaBuilder.AddColumn(inputSchema[i].Name, inputSchema[i].Type.GetItemType(), metadataBuilder.ToMetadata());
                    }
                }
                OutputSchema = schemaBuilder.ToSchema();
            }
コード例 #12
0
            public FeatureNameCollectionBinding(FeatureNameCollection collection)
            {
                Contracts.CheckValue(collection, nameof(collection));

                _collection    = collection;
                _colType       = new VectorType(NumberDataViewType.Single, collection.Count);
                _slotNamesType = new VectorType(TextDataViewType.Instance, collection.Count);

                var metadataBuilder = new DataViewSchema.Metadata.Builder();

                metadataBuilder.Add(MetadataUtils.Kinds.SlotNames, _slotNamesType,
                                    (ref VBuffer <ReadOnlyMemory <char> > slotNames) => { GetSlotNames(0, ref slotNames); });
                var schemaBuilder = new DataViewSchema.Builder();

                schemaBuilder.AddColumn(RoleMappedSchema.ColumnRole.Feature.Value, _colType, metadataBuilder.ToMetadata());
                FeatureNameCollectionSchema = schemaBuilder.ToSchema();
            }
コード例 #13
0
        private static DataViewSchema CreateSchema(ColumnBindingsBase inputBindings)
        {
            Contracts.CheckValue(inputBindings, nameof(inputBindings));

            var builder = new DataViewSchema.Builder();

            for (int i = 0; i < inputBindings.ColumnCount; i++)
            {
                var meta = new DataViewSchema.Metadata.Builder();
                foreach (var kvp in inputBindings.GetMetadataTypes(i))
                {
                    var getter = Utils.MarshalInvoke(GetMetadataGetterDelegate <int>, kvp.Value.RawType, inputBindings, i, kvp.Key);
                    meta.Add(kvp.Key, kvp.Value, getter);
                }
                builder.AddColumn(inputBindings.GetColumnName(i), inputBindings.GetColumnType(i), meta.ToMetadata());
            }

            return(builder.ToSchema());
        }
コード例 #14
0
        void SimpleTest()
        {
            var metadataBuilder = new DataViewSchema.Metadata.Builder();

            metadataBuilder.Add("M", NumberDataViewType.Single, (ref float v) => v = 484f);
            var schemaBuilder = new DataViewSchema.Builder();

            schemaBuilder.AddColumn("A", new VectorType(NumberDataViewType.Single, 94));
            schemaBuilder.AddColumn("B", new KeyType(typeof(uint), 17));
            schemaBuilder.AddColumn("C", NumberDataViewType.Int32, metadataBuilder.ToMetadata());

            var shape = SchemaShape.Create(schemaBuilder.ToSchema());

            var fakeSchema = FakeSchemaFactory.Create(shape);

            var columnA = fakeSchema[0];
            var columnB = fakeSchema[1];
            var columnC = fakeSchema[2];

            Assert.Equal("A", columnA.Name);
            Assert.Equal(NumberDataViewType.Single, columnA.Type.GetItemType());
            Assert.Equal(10, columnA.Type.GetValueCount());

            Assert.Equal("B", columnB.Name);
            Assert.Equal(InternalDataKind.U4, columnB.Type.GetRawKind());
            Assert.Equal(10u, columnB.Type.GetKeyCount());

            Assert.Equal("C", columnC.Name);
            Assert.Equal(NumberDataViewType.Int32, columnC.Type);

            var metaC = columnC.Metadata;

            Assert.Single(metaC.Schema);

            float mValue = -1;

            metaC.GetValue("M", ref mValue);
            Assert.Equal(default, mValue);
コード例 #15
0
 /// <summary>
 /// Add key values metadata.
 /// </summary>
 /// <typeparam name="TValue">The value type of key values.</typeparam>
 /// <param name="builder">The <see cref="DataViewSchema.Metadata.Builder"/> to which to add the key values.</param>
 /// <param name="size">The size of key values vector.</param>
 /// <param name="valueType">The value type of key values. Its raw type must match <typeparamref name="TValue"/>.</param>
 /// <param name="getter">The getter delegate for the key values.</param>
 public static void AddKeyValues <TValue>(this DataViewSchema.Metadata.Builder builder, int size, PrimitiveDataViewType valueType, ValueGetter <VBuffer <TValue> > getter)
 => builder.Add(MetadataUtils.Kinds.KeyValues, new VectorType(valueType, size), getter);
コード例 #16
0
 /// <summary>
 /// Add slot names metadata.
 /// </summary>
 /// <param name="builder">The <see cref="DataViewSchema.Metadata.Builder"/> to which to add the slot names.</param>
 /// <param name="size">The size of the slot names vector.</param>
 /// <param name="getter">The getter delegate for the slot names.</param>
 public static void AddSlotNames(this DataViewSchema.Metadata.Builder builder, int size, ValueGetter <VBuffer <ReadOnlyMemory <char> > > getter)
 => builder.Add(MetadataUtils.Kinds.SlotNames, new VectorType(TextDataViewType.Instance, size), getter);
コード例 #17
0
        private void HashTestCore <T>(T val, PrimitiveDataViewType type, uint expected, uint expectedOrdered, uint expectedOrdered3)
        {
            const int bits = 10;

            var builder = new DataViewSchema.Metadata.Builder();

            builder.AddPrimitiveValue("Foo", type, val);
            var inRow = MetadataUtils.MetadataAsRow(builder.ToMetadata());

            // First do an unordered hash.
            var info   = new HashingEstimator.ColumnInfo("Bar", "Foo", hashBits: bits);
            var xf     = new HashingTransformer(Env, new[] { info });
            var mapper = ((ITransformer)xf).GetRowToRowMapper(inRow.Schema);

            mapper.OutputSchema.TryGetColumnIndex("Bar", out int outCol);
            var outRow = mapper.GetRow(inRow, c => c == outCol);

            var  getter = outRow.GetGetter <uint>(outCol);
            uint result = 0;

            getter(ref result);
            Assert.Equal(expected, result);

            // Next do an ordered hash.
            info   = new HashingEstimator.ColumnInfo("Bar", "Foo", hashBits: bits, ordered: true);
            xf     = new HashingTransformer(Env, new[] { info });
            mapper = ((ITransformer)xf).GetRowToRowMapper(inRow.Schema);
            mapper.OutputSchema.TryGetColumnIndex("Bar", out outCol);
            outRow = mapper.GetRow(inRow, c => c == outCol);

            getter = outRow.GetGetter <uint>(outCol);
            getter(ref result);
            Assert.Equal(expectedOrdered, result);

            // Next build up a vector to make sure that hashing is consistent between scalar values
            // at least in the first position, and in the unordered case, the last position.
            const int vecLen   = 5;
            var       denseVec = new VBuffer <T>(vecLen, Utils.CreateArray(vecLen, val));

            builder = new DataViewSchema.Metadata.Builder();
            builder.Add("Foo", new VectorType(type, vecLen), (ref VBuffer <T> dst) => denseVec.CopyTo(ref dst));
            inRow = MetadataUtils.MetadataAsRow(builder.ToMetadata());

            info   = new HashingEstimator.ColumnInfo("Bar", "Foo", hashBits: bits, ordered: false);
            xf     = new HashingTransformer(Env, new[] { info });
            mapper = ((ITransformer)xf).GetRowToRowMapper(inRow.Schema);
            mapper.OutputSchema.TryGetColumnIndex("Bar", out outCol);
            outRow = mapper.GetRow(inRow, c => c == outCol);

            var            vecGetter = outRow.GetGetter <VBuffer <uint> >(outCol);
            VBuffer <uint> vecResult = default;

            vecGetter(ref vecResult);

            Assert.Equal(vecLen, vecResult.Length);
            // They all should equal this in this case.
            Assert.All(vecResult.DenseValues(), v => Assert.Equal(expected, v));

            // Now do ordered with the dense vector.
            info   = new HashingEstimator.ColumnInfo("Bar", "Foo", hashBits: bits, ordered: true);
            xf     = new HashingTransformer(Env, new[] { info });
            mapper = ((ITransformer)xf).GetRowToRowMapper(inRow.Schema);
            mapper.OutputSchema.TryGetColumnIndex("Bar", out outCol);
            outRow    = mapper.GetRow(inRow, c => c == outCol);
            vecGetter = outRow.GetGetter <VBuffer <uint> >(outCol);
            vecGetter(ref vecResult);

            Assert.Equal(vecLen, vecResult.Length);
            Assert.Equal(expectedOrdered, vecResult.GetItemOrDefault(0));
            Assert.Equal(expectedOrdered3, vecResult.GetItemOrDefault(3));
            Assert.All(vecResult.DenseValues(), v => Assert.True((v == 0) == (expectedOrdered == 0)));

            // Let's now do a sparse vector.
            var sparseVec = new VBuffer <T>(10, 3, Utils.CreateArray(3, val), new[] { 0, 3, 7 });

            builder = new DataViewSchema.Metadata.Builder();
            builder.Add("Foo", new VectorType(type, vecLen), (ref VBuffer <T> dst) => sparseVec.CopyTo(ref dst));
            inRow = MetadataUtils.MetadataAsRow(builder.ToMetadata());

            info   = new HashingEstimator.ColumnInfo("Bar", "Foo", hashBits: bits, ordered: false);
            xf     = new HashingTransformer(Env, new[] { info });
            mapper = ((ITransformer)xf).GetRowToRowMapper(inRow.Schema);
            mapper.OutputSchema.TryGetColumnIndex("Bar", out outCol);
            outRow    = mapper.GetRow(inRow, c => c == outCol);
            vecGetter = outRow.GetGetter <VBuffer <uint> >(outCol);
            vecGetter(ref vecResult);

            Assert.Equal(10, vecResult.Length);
            Assert.Equal(expected, vecResult.GetItemOrDefault(0));
            Assert.Equal(expected, vecResult.GetItemOrDefault(3));
            Assert.Equal(expected, vecResult.GetItemOrDefault(7));

            info   = new HashingEstimator.ColumnInfo("Bar", "Foo", hashBits: bits, ordered: true);
            xf     = new HashingTransformer(Env, new[] { info });
            mapper = ((ITransformer)xf).GetRowToRowMapper(inRow.Schema);
            mapper.OutputSchema.TryGetColumnIndex("Bar", out outCol);
            outRow    = mapper.GetRow(inRow, c => c == outCol);
            vecGetter = outRow.GetGetter <VBuffer <uint> >(outCol);
            vecGetter(ref vecResult);

            Assert.Equal(10, vecResult.Length);
            Assert.Equal(expectedOrdered, vecResult.GetItemOrDefault(0));
            Assert.Equal(expectedOrdered3, vecResult.GetItemOrDefault(3));
        }
コード例 #18
0
        internal static DataViewSchema GetModelSchema(IExceptionContext ectx, TFGraph graph, string opType = null)
        {
            var schemaBuilder = new DataViewSchema.Builder();

            foreach (var op in graph)
            {
                if (opType != null && opType != op.OpType)
                {
                    continue;
                }

                var tfType = op[0].OutputType;
                // Determine element type in Tensorflow tensor. For example, a vector of floats may get NumberType.R4 here.
                var mlType = Tf2MlNetTypeOrNull(tfType);

                // If the type is not supported in ML.NET then we cannot represent it as a column in an Schema.
                // We also cannot output it with a TensorFlowTransform, so we skip it.
                // Furthermore, operators which have NumOutputs <= 0 needs to be filtered.
                // The 'GetTensorShape' method crashes TensorFlow runtime
                // (https://github.com/dotnet/machinelearning/issues/2156) when the operator has no outputs.
                if (mlType == null || op.NumOutputs <= 0)
                {
                    continue;
                }

                // Construct the final ML.NET type of a Tensorflow variable.
                var tensorShape = graph.GetTensorShape(op[0]).ToIntArray();
                var columnType  = new VectorType(mlType);
                if (!(Utils.Size(tensorShape) == 1 && tensorShape[0] <= 0) &&
                    (Utils.Size(tensorShape) > 0 && tensorShape.Skip(1).All(x => x > 0)))
                {
                    columnType = new VectorType(mlType, tensorShape[0] > 0 ? tensorShape : tensorShape.Skip(1).ToArray());
                }

                // There can be at most two metadata fields.
                //  1. The first field always presents. Its value is this operator's type. For example,
                //     if an output is produced by an "Softmax" operator, the value of this field should be "Softmax".
                //  2. The second field stores operators whose outputs are consumed by this operator. In other words,
                //     these values are names of some upstream operators which should be evaluated before executing
                //     the current operator. It's possible that one operator doesn't need any input, so this field
                //     can be missing.
                var metadataBuilder = new DataViewSchema.Metadata.Builder();
                // Create the first metadata field.
                metadataBuilder.Add(TensorflowOperatorTypeKind, TextDataViewType.Instance, (ref ReadOnlyMemory <char> value) => value = op.OpType.AsMemory());
                if (op.NumInputs > 0)
                {
                    // Put upstream operators' names to an array (type: VBuffer) of string (type: ReadOnlyMemory<char>).
                    VBuffer <ReadOnlyMemory <char> > upstreamOperatorNames = default;
                    var bufferEditor = VBufferEditor.Create(ref upstreamOperatorNames, op.NumInputs);
                    for (int i = 0; i < op.NumInputs; ++i)
                    {
                        bufferEditor.Values[i] = op.GetInput(i).Operation.Name.AsMemory();
                    }
                    upstreamOperatorNames = bufferEditor.Commit(); // Used in metadata's getter.

                    // Create the second metadata field.
                    metadataBuilder.Add(TensorflowUpstreamOperatorsKind, new VectorType(TextDataViewType.Instance, op.NumInputs),
                                        (ref VBuffer <ReadOnlyMemory <char> > value) => { upstreamOperatorNames.CopyTo(ref value); });
                }

                schemaBuilder.AddColumn(op.Name, columnType, metadataBuilder.ToMetadata());
            }
            return(schemaBuilder.ToSchema());
        }
コード例 #19
0
            public BoundMapper(IExceptionContext ectx, TreeEnsembleFeaturizerBindableMapper owner,
                               RoleMappedSchema schema)
            {
                Contracts.AssertValue(ectx);
                ectx.AssertValue(owner);
                ectx.AssertValue(schema);
                ectx.Assert(schema.Feature.HasValue);

                _ectx = ectx;

                _owner = owner;
                InputRoleMappedSchema = schema;

                // A vector containing the output of each tree on a given example.
                var treeValueType = new VectorType(NumberDataViewType.Single, owner._ensemble.TrainedEnsemble.NumTrees);
                // An indicator vector with length = the total number of leaves in the ensemble, indicating which leaf the example
                // ends up in all the trees in the ensemble.
                var leafIdType = new VectorType(NumberDataViewType.Single, owner._totalLeafCount);
                // An indicator vector with length = the total number of nodes in the ensemble, indicating the nodes on
                // the paths of the example in all the trees in the ensemble.
                // The total number of nodes in a binary tree is equal to the number of internal nodes + the number of leaf nodes,
                // and it is also equal to the number of children of internal nodes (which is 2 * the number of internal nodes)
                // plus one (since the root node is not a child of any node). So we have #internal + #leaf = 2*(#internal) + 1,
                // which means that #internal = #leaf - 1.
                // Therefore, the number of internal nodes in the ensemble is #leaf - #trees.
                var pathIdType = new VectorType(NumberDataViewType.Single, owner._totalLeafCount - owner._ensemble.TrainedEnsemble.NumTrees);

                // Start creating output schema with types derived above.
                var schemaBuilder = new DataViewSchema.Builder();

                // Metadata of tree values.
                var treeIdMetadataBuilder = new DataViewSchema.Metadata.Builder();

                treeIdMetadataBuilder.Add(MetadataUtils.Kinds.SlotNames, MetadataUtils.GetNamesType(treeValueType.Size),
                                          (ValueGetter <VBuffer <ReadOnlyMemory <char> > >)owner.GetTreeSlotNames);
                // Add the column of trees' output values
                schemaBuilder.AddColumn(OutputColumnNames.Trees, treeValueType, treeIdMetadataBuilder.ToMetadata());

                // Metadata of leaf IDs.
                var leafIdMetadataBuilder = new DataViewSchema.Metadata.Builder();

                leafIdMetadataBuilder.Add(MetadataUtils.Kinds.SlotNames, MetadataUtils.GetNamesType(leafIdType.Size),
                                          (ValueGetter <VBuffer <ReadOnlyMemory <char> > >)owner.GetLeafSlotNames);
                leafIdMetadataBuilder.Add(MetadataUtils.Kinds.IsNormalized, BooleanDataViewType.Instance, (ref bool value) => value = true);
                // Add the column of leaves' IDs where the input example reaches.
                schemaBuilder.AddColumn(OutputColumnNames.Leaves, leafIdType, leafIdMetadataBuilder.ToMetadata());

                // Metadata of path IDs.
                var pathIdMetadataBuilder = new DataViewSchema.Metadata.Builder();

                pathIdMetadataBuilder.Add(MetadataUtils.Kinds.SlotNames, MetadataUtils.GetNamesType(pathIdType.Size),
                                          (ValueGetter <VBuffer <ReadOnlyMemory <char> > >)owner.GetPathSlotNames);
                pathIdMetadataBuilder.Add(MetadataUtils.Kinds.IsNormalized, BooleanDataViewType.Instance, (ref bool value) => value = true);
                // Add the column of encoded paths which the input example passes.
                schemaBuilder.AddColumn(OutputColumnNames.Paths, pathIdType, pathIdMetadataBuilder.ToMetadata());

                OutputSchema = schemaBuilder.ToSchema();

                // Tree values must be the first output column.
                Contracts.Assert(OutputSchema[OutputColumnNames.Trees].Index == TreeValuesColumnId);
                // leaf IDs must be the second output column.
                Contracts.Assert(OutputSchema[OutputColumnNames.Leaves].Index == LeafIdsColumnId);
                // Path IDs must be the third output column.
                Contracts.Assert(OutputSchema[OutputColumnNames.Paths].Index == PathIdsColumnId);
            }