public DataView(IHostEnvironment env, ArrayDataViewBuilder builder, int rowCount)
            {
                Contracts.AssertValue(env, "env");
                _host = env.Register("ArrayDataView");

                _host.AssertValue(builder);
                _host.Assert(rowCount >= 0);
                _host.Assert(builder._names.Count == builder._columns.Count);
                _columns = builder._columns.ToArray();

                var schemaBuilder = new DataViewSchema.Builder();

                for (int i = 0; i < _columns.Length; i++)
                {
                    var meta = new DataViewSchema.Annotations.Builder();

                    if (builder._getSlotNames.TryGetValue(builder._names[i], out var slotNamesGetter))
                    {
                        meta.AddSlotNames(_columns[i].Type.GetVectorSize(), slotNamesGetter);
                    }

                    if (builder._getKeyValues.TryGetValue(builder._names[i], out var keyValueGetter))
                    {
                        meta.AddKeyValues(_columns[i].Type.GetKeyCountAsInt32(_host), TextDataViewType.Instance, keyValueGetter);
                    }
                    schemaBuilder.AddColumn(builder._names[i], _columns[i].Type, meta.ToAnnotations());
                }

                _schema   = schemaBuilder.ToSchema();
                _rowCount = rowCount;
            }
        public static DataViewSchema Create(SchemaShape shape)
        {
            var builder = new DataViewSchema.Builder();

            for (int i = 0; i < shape.Count; ++i)
            {
                var metaBuilder        = new DataViewSchema.Annotations.Builder();
                var partialAnnotations = shape[i].Annotations;
                for (int j = 0; j < partialAnnotations.Count; ++j)
                {
                    var      metaColumnType = MakeColumnType(partialAnnotations[j]);
                    Delegate del;
                    if (metaColumnType is VectorType vectorType)
                    {
                        del = Utils.MarshalInvoke(GetDefaultVectorGetter <int>, vectorType.ItemType.RawType);
                    }
                    else
                    {
                        del = Utils.MarshalInvoke(GetDefaultGetter <int>, metaColumnType.RawType);
                    }
                    metaBuilder.Add(partialAnnotations[j].Name, metaColumnType, del);
                }
                builder.AddColumn(shape[i].Name, MakeColumnType(shape[i]), metaBuilder.ToAnnotations());
            }
            return(builder.ToSchema());
        }
 private static DataViewSchema.Annotations KeyValueMetadataFromMetadata<T>(DataViewSchema.Annotations meta, DataViewSchema.Column metaCol)
 {
     Contracts.AssertValue(meta);
     Contracts.Assert(metaCol.Type.RawType == typeof(T));
     var builder = new DataViewSchema.Annotations.Builder();
     builder.Add(AnnotationUtils.Kinds.KeyValues, metaCol.Type, meta.GetGetter<T>(metaCol));
     return builder.ToAnnotations();
 }
        /// <summary>
        /// Return a <see cref="DataViewSchema"/> which contains a single score column.
        /// </summary>
        /// <param name="scoreType">The type of the score column.</param>
        /// <param name="scoreColumnKindValue">The kind of the score column. It's the value of <see cref="AnnotationUtils.Kinds.ScoreColumnKind"/> in the score column's metadata.</param>
        /// <param name="scoreColumnName">The score column's name in the generated <see cref="DataViewSchema"/>.</param>
        /// <returns><see cref="DataViewSchema"/> which contains only one column.</returns>
        public static DataViewSchema Create(DataViewType scoreType, string scoreColumnKindValue, string scoreColumnName = AnnotationUtils.Const.ScoreValueKind.Score)
        {
            Contracts.CheckValue(scoreType, nameof(scoreType));
            Contracts.CheckNonEmpty(scoreColumnKindValue, nameof(scoreColumnKindValue));

            // Two metadata fields. One can set up by caller of this function while the other one is a constant.
            var metadataBuilder = new DataViewSchema.Annotations.Builder();

            metadataBuilder.Add(AnnotationUtils.Kinds.ScoreColumnKind, TextDataViewType.Instance,
                                (ref ReadOnlyMemory <char> value) => { value = scoreColumnKindValue.AsMemory(); });
            metadataBuilder.Add(AnnotationUtils.Kinds.ScoreValueKind, TextDataViewType.Instance,
                                (ref ReadOnlyMemory <char> value) => { value = AnnotationUtils.Const.ScoreValueKind.Score.AsMemory(); });

            // Build a schema consisting of a single column.
            var schemaBuilder = new DataViewSchema.Builder();

            schemaBuilder.AddColumn(scoreColumnName, scoreType, metadataBuilder.ToAnnotations());

            return(schemaBuilder.ToSchema());
        }
        void SimpleTest()
        {
            var metadataBuilder = new DataViewSchema.Annotations.Builder();

            metadataBuilder.Add("M", NumberDataViewType.Single, (ref float v) => v = 484f);
            var schemaBuilder = new DataViewSchema.Builder();

            schemaBuilder.AddColumn("A", new VectorDataViewType(NumberDataViewType.Single, 94));
            schemaBuilder.AddColumn("B", new KeyDataViewType(typeof(uint), 17));
            schemaBuilder.AddColumn("C", NumberDataViewType.Int32, metadataBuilder.ToAnnotations());

            var shape = SchemaShape.Create(schemaBuilder.ToSchema());

            var fakeSchema = FakeSchemaFactory.Create(shape);

            var columnA = fakeSchema[0];
            var columnB = fakeSchema[1];
            var columnC = fakeSchema[2];

            Assert.Equal("A", columnA.Name);
            Assert.Equal(NumberDataViewType.Single, columnA.Type.GetItemType());
            Assert.Equal(10, columnA.Type.GetValueCount());

            Assert.Equal("B", columnB.Name);
            Assert.Equal(InternalDataKind.U4, columnB.Type.GetRawKind());
            Assert.Equal(10u, columnB.Type.GetKeyCount());

            Assert.Equal("C", columnC.Name);
            Assert.Equal(NumberDataViewType.Int32, columnC.Type);

            var metaC = columnC.Annotations;

            Assert.Single(metaC.Schema);

            float mValue = -1;

            metaC.GetValue("M", ref mValue);
            Assert.Equal(default, mValue);
        /// <summary>
        /// Create a <see cref="DataViewSchema"/> with two columns for binary classifier. The first column, indexed by 0, is the score column.
        /// The second column is the probability column. For example, for linear support vector machine, score column stands for the inner product
        /// of linear coefficients and the input feature vector and we convert score column to probability column using a calibrator.
        /// </summary>
        /// <param name="scoreColumnName">Column name of score column</param>
        /// <param name="probabilityColumnName">Column name of probability column</param>
        /// <returns><see cref="DataViewSchema"/> of binary classifier's output.</returns>
        public static DataViewSchema CreateBinaryClassificationSchema(string scoreColumnName       = AnnotationUtils.Const.ScoreValueKind.Score,
                                                                      string probabilityColumnName = AnnotationUtils.Const.ScoreValueKind.Probability)
        {
            // Schema of Score column. We are going to extend it by adding a Probability column.
            var partialSchema = Create(NumberDataViewType.Single, AnnotationUtils.Const.ScoreColumnKind.BinaryClassification, scoreColumnName);

            var schemaBuilder = new DataViewSchema.Builder();

            // Copy Score column from partialSchema.
            schemaBuilder.AddColumn(partialSchema[0].Name, partialSchema[0].Type, partialSchema[0].Annotations);

            // Create Probability column's metadata.
            var probabilityMetadataBuilder = new DataViewSchema.Annotations.Builder();

            probabilityMetadataBuilder.Add(AnnotationUtils.Kinds.IsNormalized, BooleanDataViewType.Instance, (ref bool value) => { value = true; });
            probabilityMetadataBuilder.Add(AnnotationUtils.Kinds.ScoreColumnKind, TextDataViewType.Instance,
                                           (ref ReadOnlyMemory <char> value) => { value = AnnotationUtils.Const.ScoreColumnKind.BinaryClassification.AsMemory(); });
            probabilityMetadataBuilder.Add(AnnotationUtils.Kinds.ScoreValueKind, TextDataViewType.Instance,
                                           (ref ReadOnlyMemory <char> value) => { value = AnnotationUtils.Const.ScoreValueKind.Probability.AsMemory(); });

            // Add probability column.
            schemaBuilder.AddColumn(probabilityColumnName, NumberDataViewType.Single, probabilityMetadataBuilder.ToAnnotations());

            return(schemaBuilder.ToSchema());
        }
        /// <summary>
        /// This function returns a schema for sequence predictor's output. Its output column is always called <see cref="AnnotationUtils.Const.ScoreValueKind.PredictedLabel"/>.
        /// </summary>
        /// <param name="scoreType">Score column's type produced by sequence predictor.</param>
        /// <param name="scoreColumnKindValue">A metadata value of score column. It's the value associated with key
        /// <see cref="AnnotationUtils.Kinds.ScoreColumnKind"/>.</param>
        /// <param name="keyNames">Sequence predictor usually generates integer outputs. This field tells the tags of all possible output values.
        /// For example, output integer 0 could be mapped to "Sell" and 0 to "Buy" when predicting stock trend.</param>
        /// <returns><see cref="DataViewSchema"/> of sequence predictor's output.</returns>
        public static DataViewSchema CreateSequencePredictionSchema(DataViewType scoreType, string scoreColumnKindValue, VBuffer <ReadOnlyMemory <char> > keyNames = default)
        {
            Contracts.CheckValue(scoreType, nameof(scoreType));
            Contracts.CheckValue(scoreColumnKindValue, nameof(scoreColumnKindValue));

            var metadataBuilder = new DataViewSchema.Annotations.Builder();

            // Add metadata columns including their getters. We starts with key names of predicted keys if they exist.
            if (keyNames.Length > 0)
            {
                metadataBuilder.AddKeyValues(keyNames.Length, TextDataViewType.Instance,
                                             (ref VBuffer <ReadOnlyMemory <char> > value) => value = keyNames);
            }
            metadataBuilder.Add(AnnotationUtils.Kinds.ScoreColumnKind, TextDataViewType.Instance,
                                (ref ReadOnlyMemory <char> value) => value = scoreColumnKindValue.AsMemory());
            metadataBuilder.Add(AnnotationUtils.Kinds.ScoreValueKind, TextDataViewType.Instance,
                                (ref ReadOnlyMemory <char> value) => value = AnnotationUtils.Const.ScoreValueKind.PredictedLabel.AsMemory());

            // Build a schema consisting of a single column.
            var schemaBuilder = new DataViewSchema.Builder();

            schemaBuilder.AddColumn(AnnotationUtils.Const.ScoreValueKind.PredictedLabel, scoreType, metadataBuilder.ToAnnotations());

            return(schemaBuilder.ToSchema());
        }
Beispiel #8
0
            protected override DataViewSchema.DetachedColumn[] GetOutputColumnsCore()
            {
                var result = new DataViewSchema.DetachedColumn[_parent.ColumnPairs.Length];

                for (int i = 0; i < _parent.ColumnPairs.Length; i++)
                {
                    var builder = new DataViewSchema.Annotations.Builder();
                    builder.Add(InputSchema[ColMapNewToOld[i]].Annotations, x => x == AnnotationUtils.Kinds.KeyValues || x == AnnotationUtils.Kinds.IsNormalized);
                    result[i] = new DataViewSchema.DetachedColumn(_parent.ColumnPairs[i].outputColumnName, _types[i], builder.ToAnnotations());
                }
                return(result);
            }
Beispiel #9
0
        private static DataViewSchema CreateSchema(ColumnBindingsBase inputBindings)
        {
            Contracts.CheckValue(inputBindings, nameof(inputBindings));

            var builder = new DataViewSchema.Builder();

            for (int i = 0; i < inputBindings.ColumnCount; i++)
            {
                var meta = new DataViewSchema.Annotations.Builder();
                foreach (var kvp in inputBindings.GetAnnotationTypes(i))
                {
                    var getter = Utils.MarshalInvoke(GetAnnotationGetterDelegate <int>, kvp.Value.RawType, inputBindings, i, kvp.Key);
                    meta.Add(kvp.Key, kvp.Value, getter);
                }
                builder.AddColumn(inputBindings.GetColumnName(i), inputBindings.GetColumnType(i), meta.ToAnnotations());
            }

            return(builder.ToSchema());
        }
Beispiel #10
0
            public UngroupBinding(IExceptionContext ectx, DataViewSchema inputSchema, UngroupMode mode, string[] pivotColumns)
            {
                Contracts.AssertValueOrNull(ectx);
                _ectx = ectx;
                _ectx.AssertValue(inputSchema);
                _ectx.AssertNonEmpty(pivotColumns);

                _inputSchema = inputSchema; // This also makes InputColumnCount valid.
                Mode         = mode;

                Bind(_ectx, inputSchema, pivotColumns, out _infos);

                _pivotIndex = Utils.CreateArray(InputColumnCount, -1);
                for (int i = 0; i < _infos.Length; i++)
                {
                    var info = _infos[i];
                    _ectx.Assert(_pivotIndex[info.Index] == -1);
                    _pivotIndex[info.Index] = i;
                }

                var schemaBuilder = new DataViewSchema.Builder();

                // Iterate through input columns. Input columns which are not pivot columns will be copied to output schema with the same column index unchanged.
                // Input columns which are pivot columns would also be copied but with different data types and different metadata.
                for (int i = 0; i < InputColumnCount; ++i)
                {
                    if (_pivotIndex[i] < 0)
                    {
                        // i-th input column is not a pivot column. Let's do a naive copy.
                        schemaBuilder.AddColumn(inputSchema[i].Name, inputSchema[i].Type, inputSchema[i].Annotations);
                    }
                    else
                    {
                        // i-th input column is a pivot column. Let's calculate proper type and metadata for it.
                        var metadataBuilder = new DataViewSchema.Annotations.Builder();
                        metadataBuilder.Add(inputSchema[i].Annotations, metadataName => ShouldPreserveMetadata(metadataName));
                        // To explain the output type of pivot columns, let's consider a row
                        //   Age UserID
                        //   18  {"Amy", "Willy"}
                        // where "Age" and "UserID" are column names and 18/{"Amy", "Willy"} is "Age"/"UserID" column in this example row.
                        // If the only pivot column is "UserID", the ungroup may produce
                        //   Age UserID
                        //   18  "Amy"
                        //   18  "Willy"
                        // One can see that "UserID" column (in output data) has a type identical to the element's type of the "UserID" column in input data.
                        schemaBuilder.AddColumn(inputSchema[i].Name, inputSchema[i].Type.GetItemType(), metadataBuilder.ToAnnotations());
                    }
                }
                OutputSchema = schemaBuilder.ToSchema();
            }
            protected override DataViewSchema.DetachedColumn[] GetOutputColumnsCore()
            {
                var stdSuffix = ".output";
                var info      = new DataViewSchema.DetachedColumn[_parent.Outputs.Length];

                for (int i = 0; i < _parent.Outputs.Length; i++)
                {
                    var onnxOutputName = _parent.Outputs[i];
                    var columnName     = onnxOutputName.EndsWith(stdSuffix) ? onnxOutputName.Replace(stdSuffix, "") : onnxOutputName;

                    var builder = new DataViewSchema.Annotations.Builder();
                    AddSlotNames(columnName, builder);

                    info[i] = new DataViewSchema.DetachedColumn(columnName, _parent.OutputTypes[i], builder.ToAnnotations());
                }
                return(info);
            }
Beispiel #12
0
        private protected override DataViewSchema.DetachedColumn[] GetOutputColumnsCore()
        {
            var infos = new DataViewSchema.DetachedColumn[3];

            infos[ClusterIdCol] = new DataViewSchema.DetachedColumn(ClusterId, _types[ClusterIdCol], null);

            var slotNamesType = new VectorType(TextDataViewType.Instance, _numClusters);

            var sortedClusters = new DataViewSchema.Annotations.Builder();
            int vectorSize     = slotNamesType.GetVectorSize();

            sortedClusters.AddSlotNames(vectorSize, CreateSlotNamesGetter(_numClusters, "Cluster"));

            var builder = new DataViewSchema.Annotations.Builder();

            builder.AddSlotNames(vectorSize, CreateSlotNamesGetter(_numClusters, "Score"));

            infos[SortedClusterCol]      = new DataViewSchema.DetachedColumn(SortedClusters, _types[SortedClusterCol], sortedClusters.ToAnnotations());
            infos[SortedClusterScoreCol] = new DataViewSchema.DetachedColumn(SortedClusterScores, _types[SortedClusterScoreCol], builder.ToAnnotations());
            return(infos);
        }
            protected override DataViewSchema.DetachedColumn[] GetOutputColumnsCore()
            {
                var builder    = new DataViewSchema.Annotations.Builder();
                var annotation = InputSchema[_scoreColIndex].Annotations;
                var schema     = annotation.Schema;

                // We only propagate this training column metadata if it looks like it's all there, and all correct.
                if (schema.GetColumnOrNull(AnnotationUtils.Kinds.ScoreColumnSetId) is DataViewSchema.Column setIdCol &&
                    setIdCol.Type is KeyDataViewType setIdType && setIdType.RawType == typeof(uint) &&
                    schema.GetColumnOrNull(AnnotationUtils.Kinds.ScoreColumnKind) is DataViewSchema.Column kindCol &&
                    kindCol.Type is TextDataViewType &&
                    schema.GetColumnOrNull(AnnotationUtils.Kinds.ScoreValueKind) is DataViewSchema.Column valueKindCol &&
                    valueKindCol.Type is TextDataViewType)
                {
                    builder.Add(setIdCol.Name, setIdType, annotation.GetGetter <uint>(setIdCol));
                    // Now, this next one I'm a little less sure about. It is entirely reasonable for someone to, say,
                    // try to calibrate the result of a regression or ranker training, or something else. But should we
                    // just pass through this class just like that? Having throught through the alternatives I view this
                    // as the least harmful thing we could be doing, but it is something to consider I may be wrong
                    // about if it proves that it ever causes problems to, say, have something identified as a probability
                    // column but be marked as being a regression task, or what have you.
                    builder.Add(kindCol.Name, kindCol.Type, annotation.GetGetter <ReadOnlyMemory <char> >(kindCol));
                    builder.Add(valueKindCol.Name, valueKindCol.Type, annotation.GetGetter <ReadOnlyMemory <char> >(valueKindCol));
                }
                // Probabilities are always considered normalized.
                builder.Add(AnnotationUtils.Kinds.IsNormalized, BooleanDataViewType.Instance, (ref bool value) => value = true);

                return(new[]
                {
                    new DataViewSchema.DetachedColumn(DefaultColumnNames.Probability, NumberDataViewType.Single, builder.ToAnnotations())
                });
            }
Beispiel #14
0
        /// <summary>
        /// Manufacture an instance of <see cref="DataViewSchema"/> out of any <see cref="ISchema"/>.
        /// </summary>
        public static DataViewSchema Create(ISchema inputSchema)
        {
            Contracts.CheckValue(inputSchema, nameof(inputSchema));

            var builder = new DataViewSchema.Builder();

            for (int i = 0; i < inputSchema.ColumnCount; i++)
            {
                var meta = new DataViewSchema.Annotations.Builder();
                foreach (var kvp in inputSchema.GetMetadataTypes(i))
                {
                    var getter = Utils.MarshalInvoke(GetMetadataGetterDelegate <int>, kvp.Value.RawType, inputSchema, i, kvp.Key);
                    meta.Add(kvp.Key, kvp.Value, getter);
                }
                builder.AddColumn(inputSchema.GetColumnName(i), inputSchema.GetColumnType(i), meta.ToAnnotations());
            }

            return(builder.ToSchema());
        }
Beispiel #15
0
            /// <summary>
            /// Compute the output schema of a <see cref="GroupTransform"/> given a input schema.
            /// </summary>
            /// <param name="sourceSchema">Input schema.</param>
            /// <returns>The associated output schema produced by <see cref="GroupTransform"/>.</returns>
            private DataViewSchema BuildOutputSchema(DataViewSchema sourceSchema)
            {
                // Create schema build. We will sequentially add group columns and then aggregated columns.
                var schemaBuilder = new DataViewSchema.Builder();

                // Handle group(-key) columns. Those columns are used as keys to partition rows in the input data; specifically,
                // rows with the same key value will be merged into one row in the output data.
                foreach (var groupKeyColumnName in _groupColumns)
                {
                    schemaBuilder.AddColumn(groupKeyColumnName, sourceSchema[groupKeyColumnName].Type, sourceSchema[groupKeyColumnName].Annotations);
                }

                // Handle aggregated (aka keep) columns.
                foreach (var groupValueColumnName in _keepColumns)
                {
                    // Prepare column's metadata.
                    var metadataBuilder = new DataViewSchema.Annotations.Builder();
                    metadataBuilder.Add(sourceSchema[groupValueColumnName].Annotations,
                                        s => s == AnnotationUtils.Kinds.IsNormalized || s == AnnotationUtils.Kinds.KeyValues);

                    // Prepare column's type.
                    var aggregatedValueType = sourceSchema[groupValueColumnName].Type as PrimitiveDataViewType;
                    _ectx.CheckValue(aggregatedValueType, nameof(aggregatedValueType), "Columns being aggregated must be primitive types such as string, float, or integer");
                    var aggregatedResultType = new VectorType(aggregatedValueType);

                    // Add column into output schema.
                    schemaBuilder.AddColumn(groupValueColumnName, aggregatedResultType, metadataBuilder.ToAnnotations());
                }

                return(schemaBuilder.ToSchema());
            }