public void TransposerSaverLoaderTest() { const int rowCount = 1000; Random rgen = new Random(1); ArrayDataViewBuilder builder = new ArrayDataViewBuilder(Env); // A is to check the splitting of a sparse-ish column. var dataA = GenerateHelper(rowCount, 0.1, rgen, () => (int)rgen.Next(), 50, 5, 10, 15); dataA[rowCount / 2] = new VBuffer <int>(50, 0, null, null); // Coverage for the null vbuffer case. builder.AddColumn("A", NumberDataViewType.Int32, dataA); // B is to check the splitting of a dense-ish column. builder.AddColumn("B", NumberDataViewType.Double, GenerateHelper(rowCount, 0.8, rgen, rgen.NextDouble, 50, 0, 25, 49)); // C is to just have some column we do nothing with. builder.AddColumn("C", NumberDataViewType.Int16, GenerateHelper(rowCount, 0.1, rgen, () => (short)1, 30, 3, 10, 24)); // D is to check some column we don't have to split because it's sufficiently small. builder.AddColumn("D", NumberDataViewType.Double, GenerateHelper(rowCount, 0.1, rgen, rgen.NextDouble, 3, 1)); // E is to check a sparse scalar column. builder.AddColumn("E", NumberDataViewType.UInt32, GenerateHelper(rowCount, 0.1, rgen, () => (uint)rgen.Next(int.MinValue, int.MaxValue))); // F is to check a dense-ish scalar column. builder.AddColumn("F", NumberDataViewType.Int32, GenerateHelper(rowCount, 0.8, rgen, () => (int)rgen.Next())); IDataView view = builder.GetDataView(); IMultiStreamSource src; using (MemoryStream mem = new MemoryStream()) { TransposeSaver saver = new TransposeSaver(Env, new TransposeSaver.Arguments()); saver.SaveData(mem, view, Utils.GetIdentityPermutation(view.Schema.Count)); src = new BytesStreamSource(mem.ToArray()); } TransposeLoader loader = new TransposeLoader(Env, new TransposeLoader.Arguments(), src); // First check whether this as an IDataView yields the same values. CheckSameValues(view, loader); TransposeCheckHelper <int>(view, 0, loader); // A TransposeCheckHelper <Double>(view, 1, loader); // B TransposeCheckHelper <short>(view, 2, loader); // C TransposeCheckHelper <Double>(view, 3, loader); // D TransposeCheckHelper <uint>(view, 4, loader); // E TransposeCheckHelper <int>(view, 5, loader); // F Done(); }
public void SimpleTextLoaderCopyColumnsTest() { var env = new ConsoleEnvironment(0, verbose: true); const string data = "0 hello 3.14159 -0 2\n" + "1 1 2 4 15"; var dataSource = new BytesStreamSource(data); var text = TextLoader.CreateReader(env, ctx => ( label: ctx.LoadBool(0), text: ctx.LoadText(1), numericFeatures: ctx.LoadFloat(2, null)), // If fit correctly, this ought to be equivalent to max of 4, that is, length of 3. dataSource, separator: ' '); // While we have a type-safe wrapper for `IDataView` it is utterly useless except as an input to the `Fit` functions // of the other statically typed wrappers. We perhaps ought to make it useful in its own right, but perhaps not now. // For now, just operate over the actual `IDataView`. var textData = text.Read(dataSource).AsDynamic; var schema = textData.Schema; // First verify that the columns are there. There ought to be at least one column corresponding to the identifiers in the tuple. CheckSchemaHasColumn(schema, "label", out int labelIdx); CheckSchemaHasColumn(schema, "text", out int textIdx); CheckSchemaHasColumn(schema, "numericFeatures", out int numericFeaturesIdx); // Next verify they have the expected types. Assert.Equal(BoolType.Instance, schema.GetColumnType(labelIdx)); Assert.Equal(TextType.Instance, schema.GetColumnType(textIdx)); Assert.Equal(new VectorType(NumberType.R4, 3), schema.GetColumnType(numericFeaturesIdx)); // Next actually inspect the data. using (var cursor = textData.GetRowCursor(c => true)) { var textGetter = cursor.GetGetter <ReadOnlyMemory <char> >(textIdx); var numericFeaturesGetter = cursor.GetGetter <VBuffer <float> >(numericFeaturesIdx); ReadOnlyMemory <char> textVal = default; var labelGetter = cursor.GetGetter <bool>(labelIdx); bool labelVal = default; VBuffer <float> numVal = default; void CheckValuesSame(bool bl, string tx, float v0, float v1, float v2) { labelGetter(ref labelVal); textGetter(ref textVal); numericFeaturesGetter(ref numVal); Assert.True(tx.AsSpan().SequenceEqual(textVal.Span)); Assert.Equal((bool)bl, labelVal); Assert.Equal(3, numVal.Length); Assert.Equal(v0, numVal.GetItemOrDefault(0)); Assert.Equal(v1, numVal.GetItemOrDefault(1)); Assert.Equal(v2, numVal.GetItemOrDefault(2)); } Assert.True(cursor.MoveNext(), "Could not move even to first row"); CheckValuesSame(false, "hello", 3.14159f, -0f, 2f); Assert.True(cursor.MoveNext(), "Could not move to second row"); CheckValuesSame(true, "1", 2f, 4f, 15f); Assert.False(cursor.MoveNext(), "Moved to third row, but there should have been only two"); } // The next step where we shuffle the names around a little bit is one where we are // testing out the implicit usage of copy columns. var est = text.MakeNewEstimator().Append(r => (text: r.label, label: r.numericFeatures)); var newText = text.Append(est); var newTextData = newText.Fit(dataSource).Read(dataSource); schema = newTextData.AsDynamic.Schema; // First verify that the columns are there. There ought to be at least one column corresponding to the identifiers in the tuple. CheckSchemaHasColumn(schema, "label", out labelIdx); CheckSchemaHasColumn(schema, "text", out textIdx); // Next verify they have the expected types. Assert.Equal(BoolType.Instance, schema.GetColumnType(textIdx)); Assert.Equal(new VectorType(NumberType.R4, 3), schema.GetColumnType(labelIdx)); }