private static DataViewSchema CreateSchema(ColumnBindingsBase inputBindings) { Contracts.CheckValue(inputBindings, nameof(inputBindings)); var builder = new DataViewSchema.Builder(); for (int i = 0; i < inputBindings.ColumnCount; i++) { var meta = new DataViewSchema.Annotations.Builder(); foreach (var kvp in inputBindings.GetAnnotationTypes(i)) { var getter = Utils.MarshalInvoke(GetAnnotationGetterDelegate <int>, kvp.Value.RawType, inputBindings, i, kvp.Key); meta.Add(kvp.Key, kvp.Value, getter); } builder.AddColumn(inputBindings.GetColumnName(i), inputBindings.GetColumnType(i), meta.ToAnnotations()); } return(builder.ToSchema()); }
public void ValidateTextColumnNotText() { const string TextPurposeColName = "TextColumn"; var schemaBuilder = new DataViewSchema.Builder(); schemaBuilder.AddColumn(DefaultColumnNames.Features, NumberDataViewType.Single); schemaBuilder.AddColumn(DefaultColumnNames.Label, NumberDataViewType.Single); schemaBuilder.AddColumn(TextPurposeColName, NumberDataViewType.Single); var schema = schemaBuilder.ToSchema(); var dataView = DataViewTestFixture.BuildDummyDataView(schema); var columnInfo = new ColumnInformation(); columnInfo.TextColumnNames.Add(TextPurposeColName); var ex = Assert.Throws <ArgumentException>(() => UserInputValidationUtil.ValidateExperimentExecuteArgs(dataView, columnInfo, null, TaskKind.Regression)); Assert.Equal("Provided text column 'TextColumn' was of type Single, but only type String is allowed.", ex.Message); }
/// <summary> /// Return a <see cref="DataViewSchema"/> which contains a single score column. /// </summary> /// <param name="scoreType">The type of the score column.</param> /// <param name="scoreColumnKindValue">The kind of the score column. It's the value of <see cref="AnnotationUtils.Kinds.ScoreColumnKind"/> in the score column's metadata.</param> /// <param name="scoreColumnName">The score column's name in the generated <see cref="DataViewSchema"/>.</param> /// <returns><see cref="DataViewSchema"/> which contains only one column.</returns> public static DataViewSchema Create(DataViewType scoreType, string scoreColumnKindValue, string scoreColumnName = AnnotationUtils.Const.ScoreValueKind.Score) { Contracts.CheckValue(scoreType, nameof(scoreType)); Contracts.CheckNonEmpty(scoreColumnKindValue, nameof(scoreColumnKindValue)); // Two metadata fields. One can set up by caller of this function while the other one is a constant. var metadataBuilder = new DataViewSchema.Annotations.Builder(); metadataBuilder.Add(AnnotationUtils.Kinds.ScoreColumnKind, TextDataViewType.Instance, (ref ReadOnlyMemory <char> value) => { value = scoreColumnKindValue.AsMemory(); }); metadataBuilder.Add(AnnotationUtils.Kinds.ScoreValueKind, TextDataViewType.Instance, (ref ReadOnlyMemory <char> value) => { value = AnnotationUtils.Const.ScoreValueKind.Score.AsMemory(); }); // Build a schema consisting of a single column. var schemaBuilder = new DataViewSchema.Builder(); schemaBuilder.AddColumn(scoreColumnName, scoreType, metadataBuilder.ToAnnotations()); return(schemaBuilder.ToSchema()); }
/// <summary> /// After <see cref="_sourceSchema"/> and <see cref="_sources"/> are set, pick up selected columns from <see cref="_sourceSchema"/> to create <see cref="OutputSchema"/> /// Note that <see cref="_sources"/> tells us what columns in <see cref="_sourceSchema"/> are put into <see cref="OutputSchema"/>. /// </summary> private DataViewSchema ComputeOutputSchema() { var schemaBuilder = new DataViewSchema.Builder(); for (int i = 0; i < _sources.Length; ++i) { // selectedIndex is an column index of input schema. Note that the input column indexed by _sources[i] in _sourceSchema is sent // to the i-th column in the output schema. var selectedIndex = _sources[i]; // The dropped/kept columns are determined by user-specified arguments, so we throw if a bad configuration is provided. string fmt = string.Format("Column index {0} invalid for input with {1} columns", selectedIndex, _sourceSchema.Count); Contracts.Check(selectedIndex < _sourceSchema.Count, fmt); // Copy the selected column into output schema. var selectedColumn = _sourceSchema[selectedIndex]; schemaBuilder.AddColumn(selectedColumn.Name, selectedColumn.Type, selectedColumn.Annotations); } return(schemaBuilder.ToSchema()); }
public ZipBinding(DataViewSchema[] sources) { Contracts.AssertNonEmpty(sources); _sources = sources; _cumulativeColCounts = new int[_sources.Length + 1]; _cumulativeColCounts[0] = 0; for (int i = 0; i < sources.Length; i++) { var schema = sources[i]; _cumulativeColCounts[i + 1] = _cumulativeColCounts[i] + schema.Count; } var schemaBuilder = new DataViewSchema.Builder(); foreach (var sourceSchema in sources) { schemaBuilder.AddColumns(sourceSchema); } OutputSchema = schemaBuilder.ToSchema(); }
public void ValidateEmptyValidationDataThrows() { // Training data var dataViewBuilder = new ArrayDataViewBuilder(new MLContext()); dataViewBuilder.AddColumn("Number", NumberDataViewType.Single, 0f); dataViewBuilder.AddColumn(DefaultColumnNames.Label, NumberDataViewType.Single, 0f); var trainingData = dataViewBuilder.GetDataView(); // Validation data var schemaBuilder = new DataViewSchema.Builder(); schemaBuilder.AddColumn("Number", NumberDataViewType.Single); schemaBuilder.AddColumn(DefaultColumnNames.Label, NumberDataViewType.Single); var schema = schemaBuilder.ToSchema(); var validationData = DataViewTestFixture.BuildDummyDataView(schema, createDummyRow: false); var ex = Assert.Throws <ArgumentException>(() => UserInputValidationUtil.ValidateExperimentExecuteArgs(trainingData, new ColumnInformation(), validationData, TaskKind.Regression)); Assert.StartsWith("Validation data has 0 rows", ex.Message); }
void SimpleTest() { var metadataBuilder = new DataViewSchema.Annotations.Builder(); metadataBuilder.Add("M", NumberDataViewType.Single, (ref float v) => v = 484f); var schemaBuilder = new DataViewSchema.Builder(); schemaBuilder.AddColumn("A", new VectorDataViewType(NumberDataViewType.Single, 94)); schemaBuilder.AddColumn("B", new KeyDataViewType(typeof(uint), 17)); schemaBuilder.AddColumn("C", NumberDataViewType.Int32, metadataBuilder.ToAnnotations()); var shape = SchemaShape.Create(schemaBuilder.ToSchema()); var fakeSchema = FakeSchemaFactory.Create(shape); var columnA = fakeSchema[0]; var columnB = fakeSchema[1]; var columnC = fakeSchema[2]; Assert.Equal("A", columnA.Name); Assert.Equal(NumberDataViewType.Single, columnA.Type.GetItemType()); Assert.Equal(10, columnA.Type.GetValueCount()); Assert.Equal("B", columnB.Name); Assert.Equal(InternalDataKind.U4, columnB.Type.GetRawKind()); Assert.Equal(10u, columnB.Type.GetKeyCount()); Assert.Equal("C", columnC.Name); Assert.Equal(NumberDataViewType.Int32, columnC.Type); var metaC = columnC.Annotations; Assert.Single(metaC.Schema); float mValue = -1; metaC.GetValue("M", ref mValue); Assert.Equal(default, mValue);
public BoundMapper(IExceptionContext ectx, TreeEnsembleFeaturizerBindableMapper owner, RoleMappedSchema schema, string treesColumnName, string leavesColumnName, string pathsColumnName) { Contracts.AssertValue(ectx); ectx.AssertValue(owner); ectx.AssertValue(schema); ectx.Assert(schema.Feature.HasValue); _ectx = ectx; _owner = owner; InputRoleMappedSchema = schema; // A vector containing the output of each tree on a given example. var treeValueType = new VectorDataViewType(NumberDataViewType.Single, owner._ensemble.TrainedEnsemble.NumTrees); // An indicator vector with length = the total number of leaves in the ensemble, indicating which leaf the example // ends up in all the trees in the ensemble. var leafIdType = new VectorDataViewType(NumberDataViewType.Single, owner._totalLeafCount); // An indicator vector with length = the total number of nodes in the ensemble, indicating the nodes on // the paths of the example in all the trees in the ensemble. // The total number of nodes in a binary tree is equal to the number of internal nodes + the number of leaf nodes, // and it is also equal to the number of children of internal nodes (which is 2 * the number of internal nodes) // plus one (since the root node is not a child of any node). So we have #internal + #leaf = 2*(#internal) + 1, // which means that #internal = #leaf - 1. // Therefore, the number of internal nodes in the ensemble is #leaf - #trees. var pathIdType = new VectorDataViewType(NumberDataViewType.Single, owner._totalLeafCount - owner._ensemble.TrainedEnsemble.NumTrees); // Start creating output schema with types derived above. var schemaBuilder = new DataViewSchema.Builder(); _treesColumnName = treesColumnName; if (treesColumnName != null) { // Metadata of tree values. var treeIdMetadataBuilder = new DataViewSchema.Annotations.Builder(); treeIdMetadataBuilder.Add(AnnotationUtils.Kinds.SlotNames, AnnotationUtils.GetNamesType(treeValueType.Size), (ValueGetter <VBuffer <ReadOnlyMemory <char> > >)owner.GetTreeSlotNames); // Add the column of trees' output values schemaBuilder.AddColumn(treesColumnName, treeValueType, treeIdMetadataBuilder.ToAnnotations()); } _leavesColumnName = leavesColumnName; if (leavesColumnName != null) { // Metadata of leaf IDs. var leafIdMetadataBuilder = new DataViewSchema.Annotations.Builder(); leafIdMetadataBuilder.Add(AnnotationUtils.Kinds.SlotNames, AnnotationUtils.GetNamesType(leafIdType.Size), (ValueGetter <VBuffer <ReadOnlyMemory <char> > >)owner.GetLeafSlotNames); leafIdMetadataBuilder.Add(AnnotationUtils.Kinds.IsNormalized, BooleanDataViewType.Instance, (ref bool value) => value = true); // Add the column of leaves' IDs where the input example reaches. schemaBuilder.AddColumn(leavesColumnName, leafIdType, leafIdMetadataBuilder.ToAnnotations()); } _pathsColumnName = pathsColumnName; if (pathsColumnName != null) { // Metadata of path IDs. var pathIdMetadataBuilder = new DataViewSchema.Annotations.Builder(); pathIdMetadataBuilder.Add(AnnotationUtils.Kinds.SlotNames, AnnotationUtils.GetNamesType(pathIdType.Size), (ValueGetter <VBuffer <ReadOnlyMemory <char> > >)owner.GetPathSlotNames); pathIdMetadataBuilder.Add(AnnotationUtils.Kinds.IsNormalized, BooleanDataViewType.Instance, (ref bool value) => value = true); // Add the column of encoded paths which the input example passes. schemaBuilder.AddColumn(pathsColumnName, pathIdType, pathIdMetadataBuilder.ToAnnotations()); } OutputSchema = schemaBuilder.ToSchema(); }
internal static DataViewSchema GetModelSchema(IExceptionContext ectx, Graph graph, string opType = null) { var schemaBuilder = new DataViewSchema.Builder(); foreach (Operation op in graph) { if (opType != null && opType != op.OpType) { continue; } var tfType = op.OutputType(0); // Determine element type in Tensorflow tensor. For example, a vector of floats may get NumberType.R4 here. var mlType = Tf2MlNetTypeOrNull(tfType); // If the type is not supported in ML.NET then we cannot represent it as a column in an Schema. // We also cannot output it with a TensorFlowTransform, so we skip it. // Furthermore, operators which have NumOutputs <= 0 needs to be filtered. // The 'GetTensorShape' method crashes TensorFlow runtime // (https://github.com/dotnet/machinelearning/issues/2156) when the operator has no outputs. if (mlType == null || op.NumOutputs <= 0) { continue; } // Construct the final ML.NET type of a Tensorflow variable. var tensorShape = op.output.TensorShape.dims; var columnType = new VectorDataViewType(mlType); if (!(Utils.Size(tensorShape) == 1 && tensorShape[0] <= 0) && (Utils.Size(tensorShape) > 0 && tensorShape.Skip(1).All(x => x > 0))) { columnType = new VectorDataViewType(mlType, tensorShape[0] > 0 ? tensorShape : tensorShape.Skip(1).ToArray()); } // There can be at most two metadata fields. // 1. The first field always presents. Its value is this operator's type. For example, // if an output is produced by an "Softmax" operator, the value of this field should be "Softmax". // 2. The second field stores operators whose outputs are consumed by this operator. In other words, // these values are names of some upstream operators which should be evaluated before executing // the current operator. It's possible that one operator doesn't need any input, so this field // can be missing. var metadataBuilder = new DataViewSchema.Annotations.Builder(); // Create the first metadata field. metadataBuilder.Add(TensorflowOperatorTypeKind, TextDataViewType.Instance, (ref ReadOnlyMemory <char> value) => value = op.OpType.AsMemory()); if (op.NumInputs > 0) { // Put upstream operators' names to an array (type: VBuffer) of string (type: ReadOnlyMemory<char>). VBuffer <ReadOnlyMemory <char> > upstreamOperatorNames = default; var bufferEditor = VBufferEditor.Create(ref upstreamOperatorNames, op.NumInputs); for (int i = 0; i < op.NumInputs; ++i) { bufferEditor.Values[i] = op.inputs[i].op.name.AsMemory(); } upstreamOperatorNames = bufferEditor.Commit(); // Used in metadata's getter. // Create the second metadata field. metadataBuilder.Add(TensorflowUpstreamOperatorsKind, new VectorDataViewType(TextDataViewType.Instance, op.NumInputs), (ref VBuffer <ReadOnlyMemory <char> > value) => { upstreamOperatorNames.CopyTo(ref value); }); } schemaBuilder.AddColumn(op.name, columnType, metadataBuilder.ToAnnotations()); } return(schemaBuilder.ToSchema()); }
static void Main(string[] args) { var mlContext = new MLContext(); var trainingData = mlContext.Data.LoadFromTextFile <RawColorData>(@"D:\Axodox\Documents\rgbMapping.csv", ','); var pipeline = mlContext.Transforms.Conversion.ConvertType(new[] { new InputOutputColumnPair("NR0", "R0"), new InputOutputColumnPair("NG0", "G0"), new InputOutputColumnPair("NB0", "B0"), new InputOutputColumnPair("NR1", "R1"), new InputOutputColumnPair("NG1", "G1"), new InputOutputColumnPair("NB1", "B1") }, DataKind.Single) .Append(mlContext.Transforms.Expression("NR0", "NR0 => NR0 / 255", "NR0")) .Append(mlContext.Transforms.Expression("NG0", "NG0 => NG0 / 255", "NG0")) .Append(mlContext.Transforms.Expression("NB0", "NB0 => NB0 / 255", "NB0")) .Append(mlContext.Transforms.Expression("NR1", "NR1 => NR1 / 255", "NR1")) .Append(mlContext.Transforms.Expression("NG1", "NG1 => NG1 / 255", "NG1")) .Append(mlContext.Transforms.Expression("NB1", "NB1 => NB1 / 255", "NB1")) .Append(mlContext.Transforms.CopyColumns("Label", "NB1")) .Append(mlContext.Transforms.Concatenate("Features", "NR0", "NG0", "NB0")) .Append(mlContext.Transforms.SelectColumns("Label", "Features")) .Append(mlContext.Regression.Trainers.LbfgsPoissonRegression()); var model = pipeline.Fit(trainingData); var predictions = model.Transform(trainingData); var metrics = mlContext.Regression.Evaluate(predictions); var testValues = new RawColorData[100]; for (var i = 0; i < testValues.Length; i++) { testValues[i] = new RawColorData() { B0 = (byte)(i / (float)testValues.Length * 255f) }; } var testData = mlContext.Data.LoadFromEnumerable(testValues); var testOutput = model.Transform(testData); var valuesOut = testOutput.GetColumn <float>("Score").ToArray(); var inputSchemaBuilder = new DataViewSchema.Builder(); inputSchemaBuilder.AddColumn("R0", NumberDataViewType.Byte); inputSchemaBuilder.AddColumn("G0", NumberDataViewType.Byte); inputSchemaBuilder.AddColumn("B0", NumberDataViewType.Byte); var inputSchema = inputSchemaBuilder.ToSchema(); mlContext.Model.Save(model, inputSchema, "test.bin"); var exportData = mlContext.Data.LoadFromEnumerable(new[] { new RawColorInput() }); using (var stream = new FileStream("test.onnx", FileMode.Create, FileAccess.Write)) { OnnxExportExtensions.ConvertToOnnx(mlContext.Model, model, exportData, stream); stream.Flush(); } }
public DynamicMatrix(object[][] data, IEnumerable <string> columnNames) { // Check arguments if (data is null) { throw new ArgumentNullException(nameof(data)); } if (data.Length == 0) { throw new ArgumentException("Input data must contains at least 1 row"); } if (columnNames is null) { throw new ArgumentNullException(nameof(columnNames)); } // Determine data type of each column from the first row var collectedColumnNames = columnNames.ToArray(); var builder = new DataViewSchema.Builder(); for (var i = 0; i < collectedColumnNames.Length; i++) { var columnName = collectedColumnNames[i]; var firstValue = data[0][i]; DataViewType type; if (firstValue is float) { type = NumberDataViewType.Single; } else if (firstValue is DateTime) { type = DateTimeDataViewType.Instance; } else if (firstValue is string) { type = TextDataViewType.Instance; // This is not for String but for ReadOnlyMemory<char> } else { throw new ArgumentException($"Unsupported type of value detected: {firstValue.GetType()}"); } builder.AddColumn(columnName, type); } Schema = builder.ToSchema(); // Reference all values ensuring its type var rows = new List <object[]>(); for (var i = 0; i < data.Length; i++) { var row = data[i].ToArray(); // Shallow copy this row so that we can safely swap its elements for (var j = 0; j < row.Length; j++) { if (Schema[j].Type == TextDataViewType.Instance) { row[j] = new ReadOnlyMemory <char>(((string)row[j]).ToCharArray()); } //TODO: We should check type consistency here for other data types } rows.Add(row); } _data = rows.ToArray(); }