public void PurposeInferenceHiddenColumnsTest()
        {
            var context = new MLContext();

            // build basic data view
            var schemaBuilder = new DataViewSchema.Builder();

            schemaBuilder.AddColumn(DefaultColumnNames.Label, BooleanDataViewType.Instance);
            schemaBuilder.AddColumn(DefaultColumnNames.Features, NumberDataViewType.Single);
            var       schema = schemaBuilder.ToSchema();
            IDataView data   = DataViewTestFixture.BuildDummyDataView(schema);

            // normalize 'Features' column. this has the effect of creating 2 columns named
            // 'Features' in the data view, the first of which gets marked as 'Hidden'
            var normalizer = context.Transforms.NormalizeMinMax(DefaultColumnNames.Features);

            data = normalizer.Fit(data).Transform(data);

            // infer purposes
            var purposes = PurposeInference.InferPurposes(context, data, new ColumnInformation());

            Assert.Equal(3, purposes.Count());
            Assert.Equal(ColumnPurpose.Label, purposes[0].Purpose);
            // assert first 'Features' purpose (hidden column) is Ignore
            Assert.Equal(ColumnPurpose.Ignore, purposes[1].Purpose);
            // assert second 'Features' purpose is NumericFeature
            Assert.Equal(ColumnPurpose.NumericFeature, purposes[2].Purpose);
        }
        /// <summary>
        /// Create a <see cref="DataViewSchema"/> with two columns for binary classifier. The first column, indexed by 0, is the score column.
        /// The second column is the probability column. For example, for linear support vector machine, score column stands for the inner product
        /// of linear coefficients and the input feature vector and we convert score column to probability column using a calibrator.
        /// </summary>
        /// <param name="scoreColumnName">Column name of score column</param>
        /// <param name="probabilityColumnName">Column name of probability column</param>
        /// <returns><see cref="DataViewSchema"/> of binary classifier's output.</returns>
        public static DataViewSchema CreateBinaryClassificationSchema(string scoreColumnName       = AnnotationUtils.Const.ScoreValueKind.Score,
                                                                      string probabilityColumnName = AnnotationUtils.Const.ScoreValueKind.Probability)
        {
            // Schema of Score column. We are going to extend it by adding a Probability column.
            var partialSchema = Create(NumberDataViewType.Single, AnnotationUtils.Const.ScoreColumnKind.BinaryClassification, scoreColumnName);

            var schemaBuilder = new DataViewSchema.Builder();

            // Copy Score column from partialSchema.
            schemaBuilder.AddColumn(partialSchema[0].Name, partialSchema[0].Type, partialSchema[0].Annotations);

            // Create Probability column's metadata.
            var probabilityMetadataBuilder = new DataViewSchema.Annotations.Builder();

            probabilityMetadataBuilder.Add(AnnotationUtils.Kinds.IsNormalized, BooleanDataViewType.Instance, (ref bool value) => { value = true; });
            probabilityMetadataBuilder.Add(AnnotationUtils.Kinds.ScoreColumnKind, TextDataViewType.Instance,
                                           (ref ReadOnlyMemory <char> value) => { value = AnnotationUtils.Const.ScoreColumnKind.BinaryClassification.AsMemory(); });
            probabilityMetadataBuilder.Add(AnnotationUtils.Kinds.ScoreValueKind, TextDataViewType.Instance,
                                           (ref ReadOnlyMemory <char> value) => { value = AnnotationUtils.Const.ScoreValueKind.Probability.AsMemory(); });

            // Add probability column.
            schemaBuilder.AddColumn(probabilityColumnName, NumberDataViewType.Single, probabilityMetadataBuilder.ToAnnotations());

            return(schemaBuilder.ToSchema());
        }
Exemplo n.º 3
0
            /// <summary>
            /// Compute the output schema of a <see cref="GroupTransform"/> given a input schema.
            /// </summary>
            /// <param name="sourceSchema">Input schema.</param>
            /// <returns>The associated output schema produced by <see cref="GroupTransform"/>.</returns>
            private DataViewSchema BuildOutputSchema(DataViewSchema sourceSchema)
            {
                // Create schema build. We will sequentially add group columns and then aggregated columns.
                var schemaBuilder = new DataViewSchema.Builder();

                // Handle group(-key) columns. Those columns are used as keys to partition rows in the input data; specifically,
                // rows with the same key value will be merged into one row in the output data.
                foreach (var groupKeyColumnName in _groupColumns)
                {
                    schemaBuilder.AddColumn(groupKeyColumnName, sourceSchema[groupKeyColumnName].Type, sourceSchema[groupKeyColumnName].Annotations);
                }

                // Handle aggregated (aka keep) columns.
                foreach (var groupValueColumnName in _keepColumns)
                {
                    // Prepare column's metadata.
                    var metadataBuilder = new DataViewSchema.Annotations.Builder();
                    metadataBuilder.Add(sourceSchema[groupValueColumnName].Annotations,
                                        s => s == AnnotationUtils.Kinds.IsNormalized || s == AnnotationUtils.Kinds.KeyValues);

                    // Prepare column's type.
                    var aggregatedValueType = sourceSchema[groupValueColumnName].Type as PrimitiveDataViewType;
                    _ectx.CheckValue(aggregatedValueType, nameof(aggregatedValueType), "Columns being aggregated must be primitive types such as string, float, or integer");
                    var aggregatedResultType = new VectorType(aggregatedValueType);

                    // Add column into output schema.
                    schemaBuilder.AddColumn(groupValueColumnName, aggregatedResultType, metadataBuilder.ToAnnotations());
                }

                return(schemaBuilder.ToSchema());
            }
Exemplo n.º 4
0
        public void SelectColumns_FeatureColumns_OnlyThem()
        {
            var schemaBuilder = new DataViewSchema.Builder();

            schemaBuilder.AddColumn("Test1", TextDataViewType.Instance);
            schemaBuilder.AddColumn("Test2", TextDataViewType.Instance);
            var schema = schemaBuilder.ToSchema();

            var dataView = _mlContext.Data.LoadFromEnumerable(new[]
            {
                new
                {
                    Test1 = "Hii", Test2 = "Xii"
                },
                new
                {
                    Test1 = "GGG", Test2 = "kkk"
                }
            }, schema);

            var actualSchema = new PipelineBuilder(_mlContext, schema)
                               .SelectColumns("Test1")
                               .TransformData(dataView)
                               .Schema;

            Assert.Single(actualSchema);
            Assert.Contains("Test1", actualSchema.Select(c => c.Name));
        }
Exemplo n.º 5
0
            public InputObjectDataView(IEnumerable <InputObject> data)
            {
                _data = data;

                var builder = new DataViewSchema.Builder();

                builder.AddColumn("Label", BooleanDataViewType.Instance);
                builder.AddColumn("Text", TextDataViewType.Instance);
                Schema = builder.ToSchema();
            }
        public void ValidateColumnNotContainedInData()
        {
            var schemaBuilder = new DataViewSchema.Builder();

            schemaBuilder.AddColumn(DefaultColumnNames.Features, NumberDataViewType.Single);
            schemaBuilder.AddColumn(DefaultColumnNames.Label, NumberDataViewType.Single);
            var schema     = schemaBuilder.ToSchema();
            var dataView   = DataViewTestFixture.BuildDummyDataView(schema);
            var columnInfo = new ColumnInformation();

            columnInfo.CategoricalColumnNames.Add("Categorical");
            Assert.Throws <ArgumentException>(() => ColumnInferenceValidationUtil.ValidateSpecifiedColumnsExist(columnInfo, dataView));
        }
        public void ValidateEmptyTrainingDataThrows()
        {
            var schemaBuilder = new DataViewSchema.Builder();

            schemaBuilder.AddColumn("Number", NumberDataViewType.Single);
            schemaBuilder.AddColumn(DefaultColumnNames.Label, NumberDataViewType.Single);
            var schema   = schemaBuilder.ToSchema();
            var dataView = DataViewTestFixture.BuildDummyDataView(schema, createDummyRow: false);
            var ex       = Assert.Throws <ArgumentException>(() => UserInputValidationUtil.ValidateExperimentExecuteArgs(dataView, new ColumnInformation(),
                                                                                                                         null, TaskKind.Regression));

            Assert.StartsWith("Training data has 0 rows", ex.Message);
        }
        public void ValidateFeaturesColInvalidType()
        {
            var schemaBuilder = new DataViewSchema.Builder();

            schemaBuilder.AddColumn(DefaultColumnNames.Features, NumberDataViewType.Double);
            schemaBuilder.AddColumn(DefaultColumnNames.Label, NumberDataViewType.Single);
            var schema   = schemaBuilder.ToSchema();
            var dataView = DataViewTestFixture.BuildDummyDataView(schema);

            var ex = Assert.Throws <ArgumentException>(() => UserInputValidationUtil.ValidateExperimentExecuteArgs(dataView, new ColumnInformation(), null, TaskKind.Regression));

            Assert.StartsWith("Features column must be of data type Single", ex.Message);
        }
        public void ValidateProhibitedFeatureColumnType()
        {
            var schemaBuilder = new DataViewSchema.Builder();

            schemaBuilder.AddColumn("UInt64", NumberDataViewType.UInt64);
            schemaBuilder.AddColumn(DefaultColumnNames.Label, NumberDataViewType.Single);
            var schema   = schemaBuilder.ToSchema();
            var dataView = DataViewTestFixture.BuildDummyDataView(schema);

            var ex = Assert.Throws <ArgumentException>(() => UserInputValidationUtil.ValidateExperimentExecuteArgs(dataView, new ColumnInformation(),
                                                                                                                   null, TaskKind.Regression));

            Assert.StartsWith("Only supported feature column types are Boolean, Single, and String. Please change the feature column UInt64 of type UInt64 to one of the supported types.", ex.Message);
        }
Exemplo n.º 10
0
            public UngroupBinding(IExceptionContext ectx, DataViewSchema inputSchema, UngroupMode mode, string[] pivotColumns)
            {
                Contracts.AssertValueOrNull(ectx);
                _ectx = ectx;
                _ectx.AssertValue(inputSchema);
                _ectx.AssertNonEmpty(pivotColumns);

                _inputSchema = inputSchema; // This also makes InputColumnCount valid.
                Mode         = mode;

                Bind(_ectx, inputSchema, pivotColumns, out _infos);

                _pivotIndex = Utils.CreateArray(InputColumnCount, -1);
                for (int i = 0; i < _infos.Length; i++)
                {
                    var info = _infos[i];
                    _ectx.Assert(_pivotIndex[info.Index] == -1);
                    _pivotIndex[info.Index] = i;
                }

                var schemaBuilder = new DataViewSchema.Builder();

                // Iterate through input columns. Input columns which are not pivot columns will be copied to output schema with the same column index unchanged.
                // Input columns which are pivot columns would also be copied but with different data types and different metadata.
                for (int i = 0; i < InputColumnCount; ++i)
                {
                    if (_pivotIndex[i] < 0)
                    {
                        // i-th input column is not a pivot column. Let's do a naive copy.
                        schemaBuilder.AddColumn(inputSchema[i].Name, inputSchema[i].Type, inputSchema[i].Annotations);
                    }
                    else
                    {
                        // i-th input column is a pivot column. Let's calculate proper type and metadata for it.
                        var metadataBuilder = new DataViewSchema.Annotations.Builder();
                        metadataBuilder.Add(inputSchema[i].Annotations, metadataName => ShouldPreserveMetadata(metadataName));
                        // To explain the output type of pivot columns, let's consider a row
                        //   Age UserID
                        //   18  {"Amy", "Willy"}
                        // where "Age" and "UserID" are column names and 18/{"Amy", "Willy"} is "Age"/"UserID" column in this example row.
                        // If the only pivot column is "UserID", the ungroup may produce
                        //   Age UserID
                        //   18  "Amy"
                        //   18  "Willy"
                        // One can see that "UserID" column (in output data) has a type identical to the element's type of the "UserID" column in input data.
                        schemaBuilder.AddColumn(inputSchema[i].Name, inputSchema[i].Type.GetItemType(), metadataBuilder.ToAnnotations());
                    }
                }
                OutputSchema = schemaBuilder.ToSchema();
            }
Exemplo n.º 11
0
        private DataViewSchema ProcessInputSchema(DataViewSchema inputSchema, string lengthColumnName)
        {
            var builder = new DataViewSchema.Builder();

            for (int i = 0; i < inputSchema.Count; i++)
            {
                var name = inputSchema[i].Name;

                if (_columnNames.Contains(name))
                {
                    _bindings.vectorToInputMap.Add(i);
                }
                else if (name == lengthColumnName)
                {
                    _bindings.lengthColumn = i;
                }
                else
                {
                    builder.AddColumn(name, inputSchema[i].Type);
                    _bindings.outputToInputMap.Add(i);
                }
            }

            if (_bindings.vectorToInputMap.Count > 0)
            {
                var type = inputSchema[_bindings.vectorToInputMap[0]].Type as PrimitiveDataViewType;

                for (int i = 1; i < _bindings.vectorToInputMap.Count; i++)
                {
                    var nextType = inputSchema[_bindings.vectorToInputMap[i]].Type as PrimitiveDataViewType;
                    if (!nextType.Equals(type))
                    {
                        throw Contracts.Except("Input data types of the columns to vectorize must " +
                                               "all be of the same type. Found {0} and {1}.",
                                               type.ToString(),
                                               nextType.ToString());
                    }
                }

                var outputColumnType = new VectorDataViewType(type, 0);
                var outputColumnName = inputSchema[_bindings.vectorToInputMap[0]].Name;
                builder.AddColumn(outputColumnName, outputColumnType);

                _bindings.outputColumn = _bindings.outputToInputMap.Count;
            }

            return(builder.ToSchema());
        }
Exemplo n.º 12
0
        public XpoInputObjectDataView(DevExpress.Xpo.XPView data, string TextProperty, string BoolProperty)
        {
            _data = data;

            var builder = new DataViewSchema.Builder();

            this.TextProperty = TextProperty;
            this.BoolProperty = BoolProperty;

            builder.AddColumn(BoolProperty, BooleanDataViewType.Instance);
            builder.AddColumn(TextProperty, TextDataViewType.Instance);



            Schema = builder.ToSchema();
        }
Exemplo n.º 13
0
        public static DataViewSchema Create(SchemaShape shape)
        {
            var builder = new DataViewSchema.Builder();

            for (int i = 0; i < shape.Count; ++i)
            {
                var metaBuilder        = new DataViewSchema.Annotations.Builder();
                var partialAnnotations = shape[i].Annotations;
                for (int j = 0; j < partialAnnotations.Count; ++j)
                {
                    var      metaColumnType = MakeColumnType(partialAnnotations[j]);
                    Delegate del;
                    if (metaColumnType is VectorDataViewType vectorType)
                    {
                        del = Utils.MarshalInvoke(_getDefaultVectorGetterMethodInfo, vectorType.ItemType.RawType);
                    }
                    else
                    {
                        del = Utils.MarshalInvoke(_getDefaultGetterMethodInfo, metaColumnType.RawType);
                    }
                    metaBuilder.Add(partialAnnotations[j].Name, metaColumnType, del);
                }
                builder.AddColumn(shape[i].Name, MakeColumnType(shape[i]), metaBuilder.ToAnnotations());
            }
            return(builder.ToSchema());
        }
Exemplo n.º 14
0
        public static DataViewSchema Create(SchemaShape shape)
        {
            var builder = new DataViewSchema.Builder();

            for (int i = 0; i < shape.Count; ++i)
            {
                var metaBuilder     = new DataViewSchema.Metadata.Builder();
                var partialMetadata = shape[i].Metadata;
                for (int j = 0; j < partialMetadata.Count; ++j)
                {
                    var      metaColumnType = MakeColumnType(partialMetadata[j]);
                    Delegate del;
                    if (metaColumnType is VectorType vectorType)
                    {
                        del = Utils.MarshalInvoke(GetDefaultVectorGetter <int>, vectorType.ItemType.RawType);
                    }
                    else
                    {
                        del = Utils.MarshalInvoke(GetDefaultGetter <int>, metaColumnType.RawType);
                    }
                    metaBuilder.Add(partialMetadata[j].Name, metaColumnType, del);
                }
                builder.AddColumn(shape[i].Name, MakeColumnType(shape[i]), metaBuilder.ToMetadata());
            }
            return(builder.ToSchema());
        }
        /// <summary>
        /// This function returns a schema for sequence predictor's output. Its output column is always called <see cref="AnnotationUtils.Const.ScoreValueKind.PredictedLabel"/>.
        /// </summary>
        /// <param name="scoreType">Score column's type produced by sequence predictor.</param>
        /// <param name="scoreColumnKindValue">A metadata value of score column. It's the value associated with key
        /// <see cref="AnnotationUtils.Kinds.ScoreColumnKind"/>.</param>
        /// <param name="keyNames">Sequence predictor usually generates integer outputs. This field tells the tags of all possible output values.
        /// For example, output integer 0 could be mapped to "Sell" and 0 to "Buy" when predicting stock trend.</param>
        /// <returns><see cref="DataViewSchema"/> of sequence predictor's output.</returns>
        public static DataViewSchema CreateSequencePredictionSchema(DataViewType scoreType, string scoreColumnKindValue, VBuffer <ReadOnlyMemory <char> > keyNames = default)
        {
            Contracts.CheckValue(scoreType, nameof(scoreType));
            Contracts.CheckValue(scoreColumnKindValue, nameof(scoreColumnKindValue));

            var metadataBuilder = new DataViewSchema.Annotations.Builder();

            // Add metadata columns including their getters. We starts with key names of predicted keys if they exist.
            if (keyNames.Length > 0)
            {
                metadataBuilder.AddKeyValues(keyNames.Length, TextDataViewType.Instance,
                                             (ref VBuffer <ReadOnlyMemory <char> > value) => value = keyNames);
            }
            metadataBuilder.Add(AnnotationUtils.Kinds.ScoreColumnKind, TextDataViewType.Instance,
                                (ref ReadOnlyMemory <char> value) => value = scoreColumnKindValue.AsMemory());
            metadataBuilder.Add(AnnotationUtils.Kinds.ScoreValueKind, TextDataViewType.Instance,
                                (ref ReadOnlyMemory <char> value) => value = AnnotationUtils.Const.ScoreValueKind.PredictedLabel.AsMemory());

            // Build a schema consisting of a single column.
            var schemaBuilder = new DataViewSchema.Builder();

            schemaBuilder.AddColumn(AnnotationUtils.Const.ScoreValueKind.PredictedLabel, scoreType, metadataBuilder.ToAnnotations());

            return(schemaBuilder.ToSchema());
        }
        /// <summary>
        /// This is very similar to <see cref="Create(DataViewType, string, string)"/> but adds one extra metadata field to the only score column.
        /// </summary>
        /// <param name="scoreType">Output element's type of quantile regressor. Note that a quantile regressor can produce an array of <see cref="PrimitiveDataViewType"/>.</param>
        /// <param name="quantiles">Quantiles used in quantile regressor.</param>
        /// <returns><see cref="DataViewSchema"/> of quantile regressor's output.</returns>
        public static DataViewSchema CreateQuantileRegressionSchema(DataViewType scoreType, double[] quantiles)
        {
            Contracts.CheckValue(scoreType, nameof(scoreType));
            Contracts.CheckValue(scoreType as PrimitiveDataViewType, nameof(scoreType));
            Contracts.AssertValue(quantiles);

            // Create a schema using standard function. The produced schema will be modified by adding one metadata column.
            var partialSchema = Create(new VectorDataViewType(scoreType as PrimitiveDataViewType, quantiles.Length), AnnotationUtils.Const.ScoreColumnKind.QuantileRegression);

            var metadataBuilder = new DataViewSchema.Annotations.Builder();

            // Add the extra metadata.
            metadataBuilder.AddSlotNames(quantiles.Length, (ref VBuffer <ReadOnlyMemory <char> > value) =>
            {
                var bufferEditor = VBufferEditor.Create(ref value, quantiles.Length);
                for (int i = 0; i < quantiles.Length; ++i)
                {
                    bufferEditor.Values[i] = string.Format("Quantile-{0}", quantiles[i]).AsMemory();
                }
                value = bufferEditor.Commit();
            });
            // Copy default metadata from the partial schema.
            metadataBuilder.Add(partialSchema[0].Annotations, (string kind) => true);

            // Build a schema consisting of a single column. Comparing with partial schema, the only difference is a metadata field.
            var schemaBuilder = new DataViewSchema.Builder();

            schemaBuilder.AddColumn(partialSchema[0].Name, partialSchema[0].Type, metadataBuilder.ToAnnotations());

            return(schemaBuilder.ToSchema());
        }
                /// <summary>
                /// Append label names to score column as its metadata.
                /// </summary>
                private DataViewSchema DecorateOutputSchema(DataViewSchema partialSchema, int scoreColumnIndex, VectorDataViewType labelNameType,
                                                            ValueGetter <VBuffer <T> > labelNameGetter, string labelNameKind)
                {
                    var builder = new DataViewSchema.Builder();

                    // Sequentially add columns so that the order of them is not changed comparing with the schema in the mapper
                    // that computes score column.
                    for (int i = 0; i < partialSchema.Count; ++i)
                    {
                        var meta = new DataViewSchema.Annotations.Builder();
                        if (i == scoreColumnIndex)
                        {
                            // Add label names for score column.
                            meta.Add(partialSchema[i].Annotations, selector: s => s != labelNameKind);
                            meta.Add(labelNameKind, labelNameType, labelNameGetter);
                        }
                        else
                        {
                            // Copy all existing metadata because this transform only affects score column.
                            meta.Add(partialSchema[i].Annotations, selector: s => true);
                        }
                        // Instead of appending extra metadata to the existing score column, we create new one because
                        // metadata is read-only.
                        builder.AddColumn(partialSchema[i].Name, partialSchema[i].Type, meta.ToAnnotations());
                    }
                    return(builder.ToSchema());
                }
            public DataView(IHostEnvironment env, ArrayDataViewBuilder builder, int rowCount)
            {
                Contracts.AssertValue(env, "env");
                _host = env.Register("ArrayDataView");

                _host.AssertValue(builder);
                _host.Assert(rowCount >= 0);
                _host.Assert(builder._names.Count == builder._columns.Count);
                _columns = builder._columns.ToArray();

                var schemaBuilder = new DataViewSchema.Builder();

                for (int i = 0; i < _columns.Length; i++)
                {
                    var meta = new DataViewSchema.Metadata.Builder();

                    if (builder._getSlotNames.TryGetValue(builder._names[i], out var slotNamesGetter))
                    {
                        meta.AddSlotNames(_columns[i].Type.GetVectorSize(), slotNamesGetter);
                    }

                    if (builder._getKeyValues.TryGetValue(builder._names[i], out var keyValueGetter))
                    {
                        meta.AddKeyValues(_columns[i].Type.GetKeyCountAsInt32(_host), TextDataViewType.Instance, keyValueGetter);
                    }
                    schemaBuilder.AddColumn(builder._names[i], _columns[i].Type, meta.ToMetadata());
                }

                _schema   = schemaBuilder.ToSchema();
                _rowCount = rowCount;
            }
Exemplo n.º 19
0
            private RowImpl(DataViewType type, Delegate getter)
            {
                var builder = new DataViewSchema.Builder();

                builder.AddColumn("Foo", type, null);
                Schema  = builder.ToSchema();
                _getter = getter;
            }
Exemplo n.º 20
0
        // Schema not changed
        public DataViewSchema GetOutputSchema(DataViewSchema inputSchema)
        {
            var columns       = inputSchema.ToDictionary(x => x.Name);
            var schemaBuilder = new DataViewSchema.Builder();

            schemaBuilder.AddColumns(inputSchema.AsEnumerable());
            schemaBuilder.AddColumn(TimeSeriesImputerEstimator.IsRowImputedColumnName, BooleanDataViewType.Instance);

            return(schemaBuilder.ToSchema());
        }
Exemplo n.º 21
0
                /// <summary>
                /// Returns a <see cref="Metadata"/> row that contains the current contents of this <see cref="Builder"/>.
                /// </summary>
                public Metadata ToMetadata()
                {
                    var builder = new DataViewSchema.Builder();

                    foreach (var item in _items)
                    {
                        builder.AddColumn(item.Name, item.Type, item.Metadata);
                    }
                    return(new Metadata(builder.ToSchema(), _items.Select(x => x.Getter).ToArray()));
                }
Exemplo n.º 22
0
        public void ValidateTextColumnNotText()
        {
            const string TextPurposeColName = "TextColumn";
            var          schemaBuilder      = new DataViewSchema.Builder();

            schemaBuilder.AddColumn(DefaultColumnNames.Features, NumberDataViewType.Single);
            schemaBuilder.AddColumn(DefaultColumnNames.Label, NumberDataViewType.Single);
            schemaBuilder.AddColumn(TextPurposeColName, NumberDataViewType.Single);
            var schema   = schemaBuilder.ToSchema();
            var dataView = DataViewTestFixture.BuildDummyDataView(schema);

            var columnInfo = new ColumnInformation();

            columnInfo.TextColumnNames.Add(TextPurposeColName);

            var ex = Assert.Throws <ArgumentException>(() => UserInputValidationUtil.ValidateExperimentExecuteArgs(dataView, columnInfo, null, TaskKind.Regression));

            Assert.Equal("Provided text column 'TextColumn' was of type Single, but only type String is allowed.", ex.Message);
        }
Exemplo n.º 23
0
                /// <summary>
                /// Returns a <see cref="Annotations"/> row that contains the current contents of this <see cref="Builder"/>.
                /// </summary>
                public Annotations ToAnnotations()
                {
                    var builder = new DataViewSchema.Builder();

                    foreach (var item in _items)
                    {
                        builder.AddColumn(item.Name, item.Type, item.Annotations);
                    }
                    return(new Annotations(builder.ToSchema(), _items.Select(x => x.Getter).ToArray()));
                }
Exemplo n.º 24
0
        private static DataViewSchema CreateSchema(Type type)
        {
            var builder = new DataViewSchema.Builder();
            var members = RecordTypeRegister.GetRecordInfo(type);

            foreach (var member in members)
            {
                builder.AddColumn(member.Name, member.DataViewType);
            }
            return(builder.ToSchema());
        }
Exemplo n.º 25
0
        public FloatsDataView(IEnumerable <IDictionary <string, float> > data)
        {
            _data = data;
            var builder = new DataViewSchema.Builder();

            foreach (var name in data.First().Keys)
            {
                builder.AddColumn(name, NumberDataViewType.Single);
            }

            Schema = builder.ToSchema();
        }
Exemplo n.º 26
0
        public void ValidateEmptyValidationDataThrows()
        {
            // Training data
            var dataViewBuilder = new ArrayDataViewBuilder(new MLContext());

            dataViewBuilder.AddColumn("Number", NumberDataViewType.Single, 0f);
            dataViewBuilder.AddColumn(DefaultColumnNames.Label, NumberDataViewType.Single, 0f);
            var trainingData = dataViewBuilder.GetDataView();

            // Validation data
            var schemaBuilder = new DataViewSchema.Builder();

            schemaBuilder.AddColumn("Number", NumberDataViewType.Single);
            schemaBuilder.AddColumn(DefaultColumnNames.Label, NumberDataViewType.Single);
            var schema         = schemaBuilder.ToSchema();
            var validationData = DataViewTestFixture.BuildDummyDataView(schema, createDummyRow: false);

            var ex = Assert.Throws <ArgumentException>(() => UserInputValidationUtil.ValidateExperimentExecuteArgs(trainingData, new ColumnInformation(),
                                                                                                                   validationData, TaskKind.Regression));

            Assert.StartsWith("Validation data has 0 rows", ex.Message);
        }
Exemplo n.º 27
0
            private DataViewSchema ComputeOutputSchema()
            {
                var schemaBuilder = new DataViewSchema.Builder();

                // Iterate through all loaded columns. The index i indicates the i-th column loaded.
                for (int i = 0; i < Infos.Length; ++i)
                {
                    var info = Infos[i];
                    schemaBuilder.AddColumn(info.Name, info.ColType);
                }

                return(schemaBuilder.ToSchema());
            }
        void SimpleTest()
        {
            var metadataBuilder = new DataViewSchema.Annotations.Builder();

            metadataBuilder.Add("M", NumberDataViewType.Single, (ref float v) => v = 484f);
            var schemaBuilder = new DataViewSchema.Builder();

            schemaBuilder.AddColumn("A", new VectorDataViewType(NumberDataViewType.Single, 94));
            schemaBuilder.AddColumn("B", new KeyDataViewType(typeof(uint), 17));
            schemaBuilder.AddColumn("C", NumberDataViewType.Int32, metadataBuilder.ToAnnotations());

            var shape = SchemaShape.Create(schemaBuilder.ToSchema());

            var fakeSchema = FakeSchemaFactory.Create(shape);

            var columnA = fakeSchema[0];
            var columnB = fakeSchema[1];
            var columnC = fakeSchema[2];

            Assert.Equal("A", columnA.Name);
            Assert.Equal(NumberDataViewType.Single, columnA.Type.GetItemType());
            Assert.Equal(10, columnA.Type.GetValueCount());

            Assert.Equal("B", columnB.Name);
            Assert.Equal(InternalDataKind.U4, columnB.Type.GetRawKind());
            Assert.Equal(10u, columnB.Type.GetKeyCount());

            Assert.Equal("C", columnC.Name);
            Assert.Equal(NumberDataViewType.Int32, columnC.Type);

            var metaC = columnC.Annotations;

            Assert.Single(metaC.Schema);

            float mValue = -1;

            metaC.GetValue("M", ref mValue);
            Assert.Equal(default, mValue);
Exemplo n.º 29
0
        /// <summary>
        /// Gets the Data View from a collection of <see cref="FrameRecord{TData}"/> objects.
        /// <para/>
        /// The Data View uses a lazy access to the enumerable. Thus this method does nothing
        /// it just wraps the enumeration in the data view.
        /// </summary>
        /// <typeparam name="T">The type of converation records.</typeparam>
        /// <param name="records">A collection of records to be used as the basis for the data view.</param>
        /// <returns>The dataview for the given enumerable.</returns>
        public static IDataView AsDataView <T>(this IEnumerable <T> records, IDataViewTypeResolver dataViewTypeResolver)
        {
            var d       = dataViewTypeResolver.GetDataViewType <T>();
            var columns = d.GetColumns();
            var getters = new DataViewGetters.Builder();
            var schema  = new DataViewSchema.Builder();

            foreach (var column in columns)
            {
                getters.AddColumn(column);
                schema.AddColumn(column.Name, column.DataViewType);
            }
            return(new DataView <T>(records, getters.ToGetters(), schema.ToSchema()));
        }
            public RowMapper(IHostEnvironment env, BindableMapper parent, RoleMappedSchema schema)
            {
                Contracts.AssertValue(env);
                _env = env;
                _env.AssertValue(schema);
                _env.AssertValue(parent);
                _env.Assert(schema.Feature.HasValue);
                _parent = parent;
                InputRoleMappedSchema = schema;
                var genericMapper = parent.GenericMapper.Bind(_env, schema);

                _genericRowMapper = genericMapper as ISchemaBoundRowMapper;
                var featureSize = FeatureColumn.Type.GetVectorSize();

                if (parent.Stringify)
                {
                    var builder = new DataViewSchema.Builder();
                    builder.AddColumn(DefaultColumnNames.FeatureContributions, TextDataViewType.Instance, null);
                    _outputSchema = builder.ToSchema();
                    if (FeatureColumn.HasSlotNames(featureSize))
                    {
                        FeatureColumn.Annotations.GetValue(AnnotationUtils.Kinds.SlotNames, ref _slotNames);
                    }
                    else
                    {
                        _slotNames = VBufferUtils.CreateEmpty <ReadOnlyMemory <char> >(featureSize);
                    }
                }
                else
                {
                    var metadataBuilder = new DataViewSchema.Annotations.Builder();
                    if (InputSchema[FeatureColumn.Index].HasSlotNames(featureSize))
                    {
                        metadataBuilder.AddSlotNames(featureSize, (ref VBuffer <ReadOnlyMemory <char> > value) =>
                                                     FeatureColumn.Annotations.GetValue(AnnotationUtils.Kinds.SlotNames, ref value));
                    }

                    var schemaBuilder           = new DataViewSchema.Builder();
                    var featureContributionType = new VectorType(NumberDataViewType.Single, FeatureColumn.Type as VectorType);
                    schemaBuilder.AddColumn(DefaultColumnNames.FeatureContributions, featureContributionType, metadataBuilder.ToAnnotations());
                    _outputSchema = schemaBuilder.ToSchema();
                }

                _outputGenericSchema = _genericRowMapper.OutputSchema;
                OutputSchema         = new ZipBinding(new DataViewSchema[] { _outputGenericSchema, _outputSchema, }).OutputSchema;
            }