public void PurposeInferenceHiddenColumnsTest() { var context = new MLContext(); // build basic data view var schemaBuilder = new DataViewSchema.Builder(); schemaBuilder.AddColumn(DefaultColumnNames.Label, BooleanDataViewType.Instance); schemaBuilder.AddColumn(DefaultColumnNames.Features, NumberDataViewType.Single); var schema = schemaBuilder.ToSchema(); IDataView data = DataViewTestFixture.BuildDummyDataView(schema); // normalize 'Features' column. this has the effect of creating 2 columns named // 'Features' in the data view, the first of which gets marked as 'Hidden' var normalizer = context.Transforms.NormalizeMinMax(DefaultColumnNames.Features); data = normalizer.Fit(data).Transform(data); // infer purposes var purposes = PurposeInference.InferPurposes(context, data, new ColumnInformation()); Assert.Equal(3, purposes.Count()); Assert.Equal(ColumnPurpose.Label, purposes[0].Purpose); // assert first 'Features' purpose (hidden column) is Ignore Assert.Equal(ColumnPurpose.Ignore, purposes[1].Purpose); // assert second 'Features' purpose is NumericFeature Assert.Equal(ColumnPurpose.NumericFeature, purposes[2].Purpose); }
/// <summary> /// Create a <see cref="DataViewSchema"/> with two columns for binary classifier. The first column, indexed by 0, is the score column. /// The second column is the probability column. For example, for linear support vector machine, score column stands for the inner product /// of linear coefficients and the input feature vector and we convert score column to probability column using a calibrator. /// </summary> /// <param name="scoreColumnName">Column name of score column</param> /// <param name="probabilityColumnName">Column name of probability column</param> /// <returns><see cref="DataViewSchema"/> of binary classifier's output.</returns> public static DataViewSchema CreateBinaryClassificationSchema(string scoreColumnName = AnnotationUtils.Const.ScoreValueKind.Score, string probabilityColumnName = AnnotationUtils.Const.ScoreValueKind.Probability) { // Schema of Score column. We are going to extend it by adding a Probability column. var partialSchema = Create(NumberDataViewType.Single, AnnotationUtils.Const.ScoreColumnKind.BinaryClassification, scoreColumnName); var schemaBuilder = new DataViewSchema.Builder(); // Copy Score column from partialSchema. schemaBuilder.AddColumn(partialSchema[0].Name, partialSchema[0].Type, partialSchema[0].Annotations); // Create Probability column's metadata. var probabilityMetadataBuilder = new DataViewSchema.Annotations.Builder(); probabilityMetadataBuilder.Add(AnnotationUtils.Kinds.IsNormalized, BooleanDataViewType.Instance, (ref bool value) => { value = true; }); probabilityMetadataBuilder.Add(AnnotationUtils.Kinds.ScoreColumnKind, TextDataViewType.Instance, (ref ReadOnlyMemory <char> value) => { value = AnnotationUtils.Const.ScoreColumnKind.BinaryClassification.AsMemory(); }); probabilityMetadataBuilder.Add(AnnotationUtils.Kinds.ScoreValueKind, TextDataViewType.Instance, (ref ReadOnlyMemory <char> value) => { value = AnnotationUtils.Const.ScoreValueKind.Probability.AsMemory(); }); // Add probability column. schemaBuilder.AddColumn(probabilityColumnName, NumberDataViewType.Single, probabilityMetadataBuilder.ToAnnotations()); return(schemaBuilder.ToSchema()); }
/// <summary> /// Compute the output schema of a <see cref="GroupTransform"/> given a input schema. /// </summary> /// <param name="sourceSchema">Input schema.</param> /// <returns>The associated output schema produced by <see cref="GroupTransform"/>.</returns> private DataViewSchema BuildOutputSchema(DataViewSchema sourceSchema) { // Create schema build. We will sequentially add group columns and then aggregated columns. var schemaBuilder = new DataViewSchema.Builder(); // Handle group(-key) columns. Those columns are used as keys to partition rows in the input data; specifically, // rows with the same key value will be merged into one row in the output data. foreach (var groupKeyColumnName in _groupColumns) { schemaBuilder.AddColumn(groupKeyColumnName, sourceSchema[groupKeyColumnName].Type, sourceSchema[groupKeyColumnName].Annotations); } // Handle aggregated (aka keep) columns. foreach (var groupValueColumnName in _keepColumns) { // Prepare column's metadata. var metadataBuilder = new DataViewSchema.Annotations.Builder(); metadataBuilder.Add(sourceSchema[groupValueColumnName].Annotations, s => s == AnnotationUtils.Kinds.IsNormalized || s == AnnotationUtils.Kinds.KeyValues); // Prepare column's type. var aggregatedValueType = sourceSchema[groupValueColumnName].Type as PrimitiveDataViewType; _ectx.CheckValue(aggregatedValueType, nameof(aggregatedValueType), "Columns being aggregated must be primitive types such as string, float, or integer"); var aggregatedResultType = new VectorType(aggregatedValueType); // Add column into output schema. schemaBuilder.AddColumn(groupValueColumnName, aggregatedResultType, metadataBuilder.ToAnnotations()); } return(schemaBuilder.ToSchema()); }
public void SelectColumns_FeatureColumns_OnlyThem() { var schemaBuilder = new DataViewSchema.Builder(); schemaBuilder.AddColumn("Test1", TextDataViewType.Instance); schemaBuilder.AddColumn("Test2", TextDataViewType.Instance); var schema = schemaBuilder.ToSchema(); var dataView = _mlContext.Data.LoadFromEnumerable(new[] { new { Test1 = "Hii", Test2 = "Xii" }, new { Test1 = "GGG", Test2 = "kkk" } }, schema); var actualSchema = new PipelineBuilder(_mlContext, schema) .SelectColumns("Test1") .TransformData(dataView) .Schema; Assert.Single(actualSchema); Assert.Contains("Test1", actualSchema.Select(c => c.Name)); }
public InputObjectDataView(IEnumerable <InputObject> data) { _data = data; var builder = new DataViewSchema.Builder(); builder.AddColumn("Label", BooleanDataViewType.Instance); builder.AddColumn("Text", TextDataViewType.Instance); Schema = builder.ToSchema(); }
public void ValidateColumnNotContainedInData() { var schemaBuilder = new DataViewSchema.Builder(); schemaBuilder.AddColumn(DefaultColumnNames.Features, NumberDataViewType.Single); schemaBuilder.AddColumn(DefaultColumnNames.Label, NumberDataViewType.Single); var schema = schemaBuilder.ToSchema(); var dataView = DataViewTestFixture.BuildDummyDataView(schema); var columnInfo = new ColumnInformation(); columnInfo.CategoricalColumnNames.Add("Categorical"); Assert.Throws <ArgumentException>(() => ColumnInferenceValidationUtil.ValidateSpecifiedColumnsExist(columnInfo, dataView)); }
public void ValidateEmptyTrainingDataThrows() { var schemaBuilder = new DataViewSchema.Builder(); schemaBuilder.AddColumn("Number", NumberDataViewType.Single); schemaBuilder.AddColumn(DefaultColumnNames.Label, NumberDataViewType.Single); var schema = schemaBuilder.ToSchema(); var dataView = DataViewTestFixture.BuildDummyDataView(schema, createDummyRow: false); var ex = Assert.Throws <ArgumentException>(() => UserInputValidationUtil.ValidateExperimentExecuteArgs(dataView, new ColumnInformation(), null, TaskKind.Regression)); Assert.StartsWith("Training data has 0 rows", ex.Message); }
public void ValidateFeaturesColInvalidType() { var schemaBuilder = new DataViewSchema.Builder(); schemaBuilder.AddColumn(DefaultColumnNames.Features, NumberDataViewType.Double); schemaBuilder.AddColumn(DefaultColumnNames.Label, NumberDataViewType.Single); var schema = schemaBuilder.ToSchema(); var dataView = DataViewTestFixture.BuildDummyDataView(schema); var ex = Assert.Throws <ArgumentException>(() => UserInputValidationUtil.ValidateExperimentExecuteArgs(dataView, new ColumnInformation(), null, TaskKind.Regression)); Assert.StartsWith("Features column must be of data type Single", ex.Message); }
public void ValidateProhibitedFeatureColumnType() { var schemaBuilder = new DataViewSchema.Builder(); schemaBuilder.AddColumn("UInt64", NumberDataViewType.UInt64); schemaBuilder.AddColumn(DefaultColumnNames.Label, NumberDataViewType.Single); var schema = schemaBuilder.ToSchema(); var dataView = DataViewTestFixture.BuildDummyDataView(schema); var ex = Assert.Throws <ArgumentException>(() => UserInputValidationUtil.ValidateExperimentExecuteArgs(dataView, new ColumnInformation(), null, TaskKind.Regression)); Assert.StartsWith("Only supported feature column types are Boolean, Single, and String. Please change the feature column UInt64 of type UInt64 to one of the supported types.", ex.Message); }
public UngroupBinding(IExceptionContext ectx, DataViewSchema inputSchema, UngroupMode mode, string[] pivotColumns) { Contracts.AssertValueOrNull(ectx); _ectx = ectx; _ectx.AssertValue(inputSchema); _ectx.AssertNonEmpty(pivotColumns); _inputSchema = inputSchema; // This also makes InputColumnCount valid. Mode = mode; Bind(_ectx, inputSchema, pivotColumns, out _infos); _pivotIndex = Utils.CreateArray(InputColumnCount, -1); for (int i = 0; i < _infos.Length; i++) { var info = _infos[i]; _ectx.Assert(_pivotIndex[info.Index] == -1); _pivotIndex[info.Index] = i; } var schemaBuilder = new DataViewSchema.Builder(); // Iterate through input columns. Input columns which are not pivot columns will be copied to output schema with the same column index unchanged. // Input columns which are pivot columns would also be copied but with different data types and different metadata. for (int i = 0; i < InputColumnCount; ++i) { if (_pivotIndex[i] < 0) { // i-th input column is not a pivot column. Let's do a naive copy. schemaBuilder.AddColumn(inputSchema[i].Name, inputSchema[i].Type, inputSchema[i].Annotations); } else { // i-th input column is a pivot column. Let's calculate proper type and metadata for it. var metadataBuilder = new DataViewSchema.Annotations.Builder(); metadataBuilder.Add(inputSchema[i].Annotations, metadataName => ShouldPreserveMetadata(metadataName)); // To explain the output type of pivot columns, let's consider a row // Age UserID // 18 {"Amy", "Willy"} // where "Age" and "UserID" are column names and 18/{"Amy", "Willy"} is "Age"/"UserID" column in this example row. // If the only pivot column is "UserID", the ungroup may produce // Age UserID // 18 "Amy" // 18 "Willy" // One can see that "UserID" column (in output data) has a type identical to the element's type of the "UserID" column in input data. schemaBuilder.AddColumn(inputSchema[i].Name, inputSchema[i].Type.GetItemType(), metadataBuilder.ToAnnotations()); } } OutputSchema = schemaBuilder.ToSchema(); }
private DataViewSchema ProcessInputSchema(DataViewSchema inputSchema, string lengthColumnName) { var builder = new DataViewSchema.Builder(); for (int i = 0; i < inputSchema.Count; i++) { var name = inputSchema[i].Name; if (_columnNames.Contains(name)) { _bindings.vectorToInputMap.Add(i); } else if (name == lengthColumnName) { _bindings.lengthColumn = i; } else { builder.AddColumn(name, inputSchema[i].Type); _bindings.outputToInputMap.Add(i); } } if (_bindings.vectorToInputMap.Count > 0) { var type = inputSchema[_bindings.vectorToInputMap[0]].Type as PrimitiveDataViewType; for (int i = 1; i < _bindings.vectorToInputMap.Count; i++) { var nextType = inputSchema[_bindings.vectorToInputMap[i]].Type as PrimitiveDataViewType; if (!nextType.Equals(type)) { throw Contracts.Except("Input data types of the columns to vectorize must " + "all be of the same type. Found {0} and {1}.", type.ToString(), nextType.ToString()); } } var outputColumnType = new VectorDataViewType(type, 0); var outputColumnName = inputSchema[_bindings.vectorToInputMap[0]].Name; builder.AddColumn(outputColumnName, outputColumnType); _bindings.outputColumn = _bindings.outputToInputMap.Count; } return(builder.ToSchema()); }
public XpoInputObjectDataView(DevExpress.Xpo.XPView data, string TextProperty, string BoolProperty) { _data = data; var builder = new DataViewSchema.Builder(); this.TextProperty = TextProperty; this.BoolProperty = BoolProperty; builder.AddColumn(BoolProperty, BooleanDataViewType.Instance); builder.AddColumn(TextProperty, TextDataViewType.Instance); Schema = builder.ToSchema(); }
public static DataViewSchema Create(SchemaShape shape) { var builder = new DataViewSchema.Builder(); for (int i = 0; i < shape.Count; ++i) { var metaBuilder = new DataViewSchema.Annotations.Builder(); var partialAnnotations = shape[i].Annotations; for (int j = 0; j < partialAnnotations.Count; ++j) { var metaColumnType = MakeColumnType(partialAnnotations[j]); Delegate del; if (metaColumnType is VectorDataViewType vectorType) { del = Utils.MarshalInvoke(_getDefaultVectorGetterMethodInfo, vectorType.ItemType.RawType); } else { del = Utils.MarshalInvoke(_getDefaultGetterMethodInfo, metaColumnType.RawType); } metaBuilder.Add(partialAnnotations[j].Name, metaColumnType, del); } builder.AddColumn(shape[i].Name, MakeColumnType(shape[i]), metaBuilder.ToAnnotations()); } return(builder.ToSchema()); }
public static DataViewSchema Create(SchemaShape shape) { var builder = new DataViewSchema.Builder(); for (int i = 0; i < shape.Count; ++i) { var metaBuilder = new DataViewSchema.Metadata.Builder(); var partialMetadata = shape[i].Metadata; for (int j = 0; j < partialMetadata.Count; ++j) { var metaColumnType = MakeColumnType(partialMetadata[j]); Delegate del; if (metaColumnType is VectorType vectorType) { del = Utils.MarshalInvoke(GetDefaultVectorGetter <int>, vectorType.ItemType.RawType); } else { del = Utils.MarshalInvoke(GetDefaultGetter <int>, metaColumnType.RawType); } metaBuilder.Add(partialMetadata[j].Name, metaColumnType, del); } builder.AddColumn(shape[i].Name, MakeColumnType(shape[i]), metaBuilder.ToMetadata()); } return(builder.ToSchema()); }
/// <summary> /// This function returns a schema for sequence predictor's output. Its output column is always called <see cref="AnnotationUtils.Const.ScoreValueKind.PredictedLabel"/>. /// </summary> /// <param name="scoreType">Score column's type produced by sequence predictor.</param> /// <param name="scoreColumnKindValue">A metadata value of score column. It's the value associated with key /// <see cref="AnnotationUtils.Kinds.ScoreColumnKind"/>.</param> /// <param name="keyNames">Sequence predictor usually generates integer outputs. This field tells the tags of all possible output values. /// For example, output integer 0 could be mapped to "Sell" and 0 to "Buy" when predicting stock trend.</param> /// <returns><see cref="DataViewSchema"/> of sequence predictor's output.</returns> public static DataViewSchema CreateSequencePredictionSchema(DataViewType scoreType, string scoreColumnKindValue, VBuffer <ReadOnlyMemory <char> > keyNames = default) { Contracts.CheckValue(scoreType, nameof(scoreType)); Contracts.CheckValue(scoreColumnKindValue, nameof(scoreColumnKindValue)); var metadataBuilder = new DataViewSchema.Annotations.Builder(); // Add metadata columns including their getters. We starts with key names of predicted keys if they exist. if (keyNames.Length > 0) { metadataBuilder.AddKeyValues(keyNames.Length, TextDataViewType.Instance, (ref VBuffer <ReadOnlyMemory <char> > value) => value = keyNames); } metadataBuilder.Add(AnnotationUtils.Kinds.ScoreColumnKind, TextDataViewType.Instance, (ref ReadOnlyMemory <char> value) => value = scoreColumnKindValue.AsMemory()); metadataBuilder.Add(AnnotationUtils.Kinds.ScoreValueKind, TextDataViewType.Instance, (ref ReadOnlyMemory <char> value) => value = AnnotationUtils.Const.ScoreValueKind.PredictedLabel.AsMemory()); // Build a schema consisting of a single column. var schemaBuilder = new DataViewSchema.Builder(); schemaBuilder.AddColumn(AnnotationUtils.Const.ScoreValueKind.PredictedLabel, scoreType, metadataBuilder.ToAnnotations()); return(schemaBuilder.ToSchema()); }
/// <summary> /// This is very similar to <see cref="Create(DataViewType, string, string)"/> but adds one extra metadata field to the only score column. /// </summary> /// <param name="scoreType">Output element's type of quantile regressor. Note that a quantile regressor can produce an array of <see cref="PrimitiveDataViewType"/>.</param> /// <param name="quantiles">Quantiles used in quantile regressor.</param> /// <returns><see cref="DataViewSchema"/> of quantile regressor's output.</returns> public static DataViewSchema CreateQuantileRegressionSchema(DataViewType scoreType, double[] quantiles) { Contracts.CheckValue(scoreType, nameof(scoreType)); Contracts.CheckValue(scoreType as PrimitiveDataViewType, nameof(scoreType)); Contracts.AssertValue(quantiles); // Create a schema using standard function. The produced schema will be modified by adding one metadata column. var partialSchema = Create(new VectorDataViewType(scoreType as PrimitiveDataViewType, quantiles.Length), AnnotationUtils.Const.ScoreColumnKind.QuantileRegression); var metadataBuilder = new DataViewSchema.Annotations.Builder(); // Add the extra metadata. metadataBuilder.AddSlotNames(quantiles.Length, (ref VBuffer <ReadOnlyMemory <char> > value) => { var bufferEditor = VBufferEditor.Create(ref value, quantiles.Length); for (int i = 0; i < quantiles.Length; ++i) { bufferEditor.Values[i] = string.Format("Quantile-{0}", quantiles[i]).AsMemory(); } value = bufferEditor.Commit(); }); // Copy default metadata from the partial schema. metadataBuilder.Add(partialSchema[0].Annotations, (string kind) => true); // Build a schema consisting of a single column. Comparing with partial schema, the only difference is a metadata field. var schemaBuilder = new DataViewSchema.Builder(); schemaBuilder.AddColumn(partialSchema[0].Name, partialSchema[0].Type, metadataBuilder.ToAnnotations()); return(schemaBuilder.ToSchema()); }
/// <summary> /// Append label names to score column as its metadata. /// </summary> private DataViewSchema DecorateOutputSchema(DataViewSchema partialSchema, int scoreColumnIndex, VectorDataViewType labelNameType, ValueGetter <VBuffer <T> > labelNameGetter, string labelNameKind) { var builder = new DataViewSchema.Builder(); // Sequentially add columns so that the order of them is not changed comparing with the schema in the mapper // that computes score column. for (int i = 0; i < partialSchema.Count; ++i) { var meta = new DataViewSchema.Annotations.Builder(); if (i == scoreColumnIndex) { // Add label names for score column. meta.Add(partialSchema[i].Annotations, selector: s => s != labelNameKind); meta.Add(labelNameKind, labelNameType, labelNameGetter); } else { // Copy all existing metadata because this transform only affects score column. meta.Add(partialSchema[i].Annotations, selector: s => true); } // Instead of appending extra metadata to the existing score column, we create new one because // metadata is read-only. builder.AddColumn(partialSchema[i].Name, partialSchema[i].Type, meta.ToAnnotations()); } return(builder.ToSchema()); }
public DataView(IHostEnvironment env, ArrayDataViewBuilder builder, int rowCount) { Contracts.AssertValue(env, "env"); _host = env.Register("ArrayDataView"); _host.AssertValue(builder); _host.Assert(rowCount >= 0); _host.Assert(builder._names.Count == builder._columns.Count); _columns = builder._columns.ToArray(); var schemaBuilder = new DataViewSchema.Builder(); for (int i = 0; i < _columns.Length; i++) { var meta = new DataViewSchema.Metadata.Builder(); if (builder._getSlotNames.TryGetValue(builder._names[i], out var slotNamesGetter)) { meta.AddSlotNames(_columns[i].Type.GetVectorSize(), slotNamesGetter); } if (builder._getKeyValues.TryGetValue(builder._names[i], out var keyValueGetter)) { meta.AddKeyValues(_columns[i].Type.GetKeyCountAsInt32(_host), TextDataViewType.Instance, keyValueGetter); } schemaBuilder.AddColumn(builder._names[i], _columns[i].Type, meta.ToMetadata()); } _schema = schemaBuilder.ToSchema(); _rowCount = rowCount; }
private RowImpl(DataViewType type, Delegate getter) { var builder = new DataViewSchema.Builder(); builder.AddColumn("Foo", type, null); Schema = builder.ToSchema(); _getter = getter; }
// Schema not changed public DataViewSchema GetOutputSchema(DataViewSchema inputSchema) { var columns = inputSchema.ToDictionary(x => x.Name); var schemaBuilder = new DataViewSchema.Builder(); schemaBuilder.AddColumns(inputSchema.AsEnumerable()); schemaBuilder.AddColumn(TimeSeriesImputerEstimator.IsRowImputedColumnName, BooleanDataViewType.Instance); return(schemaBuilder.ToSchema()); }
/// <summary> /// Returns a <see cref="Metadata"/> row that contains the current contents of this <see cref="Builder"/>. /// </summary> public Metadata ToMetadata() { var builder = new DataViewSchema.Builder(); foreach (var item in _items) { builder.AddColumn(item.Name, item.Type, item.Metadata); } return(new Metadata(builder.ToSchema(), _items.Select(x => x.Getter).ToArray())); }
public void ValidateTextColumnNotText() { const string TextPurposeColName = "TextColumn"; var schemaBuilder = new DataViewSchema.Builder(); schemaBuilder.AddColumn(DefaultColumnNames.Features, NumberDataViewType.Single); schemaBuilder.AddColumn(DefaultColumnNames.Label, NumberDataViewType.Single); schemaBuilder.AddColumn(TextPurposeColName, NumberDataViewType.Single); var schema = schemaBuilder.ToSchema(); var dataView = DataViewTestFixture.BuildDummyDataView(schema); var columnInfo = new ColumnInformation(); columnInfo.TextColumnNames.Add(TextPurposeColName); var ex = Assert.Throws <ArgumentException>(() => UserInputValidationUtil.ValidateExperimentExecuteArgs(dataView, columnInfo, null, TaskKind.Regression)); Assert.Equal("Provided text column 'TextColumn' was of type Single, but only type String is allowed.", ex.Message); }
/// <summary> /// Returns a <see cref="Annotations"/> row that contains the current contents of this <see cref="Builder"/>. /// </summary> public Annotations ToAnnotations() { var builder = new DataViewSchema.Builder(); foreach (var item in _items) { builder.AddColumn(item.Name, item.Type, item.Annotations); } return(new Annotations(builder.ToSchema(), _items.Select(x => x.Getter).ToArray())); }
private static DataViewSchema CreateSchema(Type type) { var builder = new DataViewSchema.Builder(); var members = RecordTypeRegister.GetRecordInfo(type); foreach (var member in members) { builder.AddColumn(member.Name, member.DataViewType); } return(builder.ToSchema()); }
public FloatsDataView(IEnumerable <IDictionary <string, float> > data) { _data = data; var builder = new DataViewSchema.Builder(); foreach (var name in data.First().Keys) { builder.AddColumn(name, NumberDataViewType.Single); } Schema = builder.ToSchema(); }
public void ValidateEmptyValidationDataThrows() { // Training data var dataViewBuilder = new ArrayDataViewBuilder(new MLContext()); dataViewBuilder.AddColumn("Number", NumberDataViewType.Single, 0f); dataViewBuilder.AddColumn(DefaultColumnNames.Label, NumberDataViewType.Single, 0f); var trainingData = dataViewBuilder.GetDataView(); // Validation data var schemaBuilder = new DataViewSchema.Builder(); schemaBuilder.AddColumn("Number", NumberDataViewType.Single); schemaBuilder.AddColumn(DefaultColumnNames.Label, NumberDataViewType.Single); var schema = schemaBuilder.ToSchema(); var validationData = DataViewTestFixture.BuildDummyDataView(schema, createDummyRow: false); var ex = Assert.Throws <ArgumentException>(() => UserInputValidationUtil.ValidateExperimentExecuteArgs(trainingData, new ColumnInformation(), validationData, TaskKind.Regression)); Assert.StartsWith("Validation data has 0 rows", ex.Message); }
private DataViewSchema ComputeOutputSchema() { var schemaBuilder = new DataViewSchema.Builder(); // Iterate through all loaded columns. The index i indicates the i-th column loaded. for (int i = 0; i < Infos.Length; ++i) { var info = Infos[i]; schemaBuilder.AddColumn(info.Name, info.ColType); } return(schemaBuilder.ToSchema()); }
void SimpleTest() { var metadataBuilder = new DataViewSchema.Annotations.Builder(); metadataBuilder.Add("M", NumberDataViewType.Single, (ref float v) => v = 484f); var schemaBuilder = new DataViewSchema.Builder(); schemaBuilder.AddColumn("A", new VectorDataViewType(NumberDataViewType.Single, 94)); schemaBuilder.AddColumn("B", new KeyDataViewType(typeof(uint), 17)); schemaBuilder.AddColumn("C", NumberDataViewType.Int32, metadataBuilder.ToAnnotations()); var shape = SchemaShape.Create(schemaBuilder.ToSchema()); var fakeSchema = FakeSchemaFactory.Create(shape); var columnA = fakeSchema[0]; var columnB = fakeSchema[1]; var columnC = fakeSchema[2]; Assert.Equal("A", columnA.Name); Assert.Equal(NumberDataViewType.Single, columnA.Type.GetItemType()); Assert.Equal(10, columnA.Type.GetValueCount()); Assert.Equal("B", columnB.Name); Assert.Equal(InternalDataKind.U4, columnB.Type.GetRawKind()); Assert.Equal(10u, columnB.Type.GetKeyCount()); Assert.Equal("C", columnC.Name); Assert.Equal(NumberDataViewType.Int32, columnC.Type); var metaC = columnC.Annotations; Assert.Single(metaC.Schema); float mValue = -1; metaC.GetValue("M", ref mValue); Assert.Equal(default, mValue);
/// <summary> /// Gets the Data View from a collection of <see cref="FrameRecord{TData}"/> objects. /// <para/> /// The Data View uses a lazy access to the enumerable. Thus this method does nothing /// it just wraps the enumeration in the data view. /// </summary> /// <typeparam name="T">The type of converation records.</typeparam> /// <param name="records">A collection of records to be used as the basis for the data view.</param> /// <returns>The dataview for the given enumerable.</returns> public static IDataView AsDataView <T>(this IEnumerable <T> records, IDataViewTypeResolver dataViewTypeResolver) { var d = dataViewTypeResolver.GetDataViewType <T>(); var columns = d.GetColumns(); var getters = new DataViewGetters.Builder(); var schema = new DataViewSchema.Builder(); foreach (var column in columns) { getters.AddColumn(column); schema.AddColumn(column.Name, column.DataViewType); } return(new DataView <T>(records, getters.ToGetters(), schema.ToSchema())); }
public RowMapper(IHostEnvironment env, BindableMapper parent, RoleMappedSchema schema) { Contracts.AssertValue(env); _env = env; _env.AssertValue(schema); _env.AssertValue(parent); _env.Assert(schema.Feature.HasValue); _parent = parent; InputRoleMappedSchema = schema; var genericMapper = parent.GenericMapper.Bind(_env, schema); _genericRowMapper = genericMapper as ISchemaBoundRowMapper; var featureSize = FeatureColumn.Type.GetVectorSize(); if (parent.Stringify) { var builder = new DataViewSchema.Builder(); builder.AddColumn(DefaultColumnNames.FeatureContributions, TextDataViewType.Instance, null); _outputSchema = builder.ToSchema(); if (FeatureColumn.HasSlotNames(featureSize)) { FeatureColumn.Annotations.GetValue(AnnotationUtils.Kinds.SlotNames, ref _slotNames); } else { _slotNames = VBufferUtils.CreateEmpty <ReadOnlyMemory <char> >(featureSize); } } else { var metadataBuilder = new DataViewSchema.Annotations.Builder(); if (InputSchema[FeatureColumn.Index].HasSlotNames(featureSize)) { metadataBuilder.AddSlotNames(featureSize, (ref VBuffer <ReadOnlyMemory <char> > value) => FeatureColumn.Annotations.GetValue(AnnotationUtils.Kinds.SlotNames, ref value)); } var schemaBuilder = new DataViewSchema.Builder(); var featureContributionType = new VectorType(NumberDataViewType.Single, FeatureColumn.Type as VectorType); schemaBuilder.AddColumn(DefaultColumnNames.FeatureContributions, featureContributionType, metadataBuilder.ToAnnotations()); _outputSchema = schemaBuilder.ToSchema(); } _outputGenericSchema = _genericRowMapper.OutputSchema; OutputSchema = new ZipBinding(new DataViewSchema[] { _outputGenericSchema, _outputSchema, }).OutputSchema; }