Beispiel #1
0
        public void TransposerSaverLoaderTest()
        {
            const int            rowCount = 1000;
            Random               rgen     = new Random(1);
            ArrayDataViewBuilder builder  = new ArrayDataViewBuilder(Env);

            // A is to check the splitting of a sparse-ish column.
            var dataA = GenerateHelper(rowCount, 0.1, rgen, () => (int)rgen.Next(), 50, 5, 10, 15);

            dataA[rowCount / 2] = new VBuffer <int>(50, 0, null, null); // Coverage for the null vbuffer case.
            builder.AddColumn("A", NumberDataViewType.Int32, dataA);
            // B is to check the splitting of a dense-ish column.
            builder.AddColumn("B", NumberDataViewType.Double, GenerateHelper(rowCount, 0.8, rgen, rgen.NextDouble, 50, 0, 25, 49));
            // C is to just have some column we do nothing with.
            builder.AddColumn("C", NumberDataViewType.Int16, GenerateHelper(rowCount, 0.1, rgen, () => (short)1, 30, 3, 10, 24));
            // D is to check some column we don't have to split because it's sufficiently small.
            builder.AddColumn("D", NumberDataViewType.Double, GenerateHelper(rowCount, 0.1, rgen, rgen.NextDouble, 3, 1));
            // E is to check a sparse scalar column.
            builder.AddColumn("E", NumberDataViewType.UInt32, GenerateHelper(rowCount, 0.1, rgen, () => (uint)rgen.Next(int.MinValue, int.MaxValue)));
            // F is to check a dense-ish scalar column.
            builder.AddColumn("F", NumberDataViewType.Int32, GenerateHelper(rowCount, 0.8, rgen, () => (int)rgen.Next()));

            IDataView view = builder.GetDataView();

            IMultiStreamSource src;

            using (MemoryStream mem = new MemoryStream())
            {
                TransposeSaver saver = new TransposeSaver(Env, new TransposeSaver.Arguments());
                saver.SaveData(mem, view, Utils.GetIdentityPermutation(view.Schema.Count));
                src = new BytesStreamSource(mem.ToArray());
            }
            TransposeLoader loader = new TransposeLoader(Env, new TransposeLoader.Arguments(), src);

            // First check whether this as an IDataView yields the same values.
            CheckSameValues(view, loader);

            TransposeCheckHelper <int>(view, 0, loader);    // A
            TransposeCheckHelper <Double>(view, 1, loader); // B
            TransposeCheckHelper <short>(view, 2, loader);  // C
            TransposeCheckHelper <Double>(view, 3, loader); // D
            TransposeCheckHelper <uint>(view, 4, loader);   // E
            TransposeCheckHelper <int>(view, 5, loader);    // F

            Done();
        }
Beispiel #2
0
        public void SimpleTextLoaderCopyColumnsTest()
        {
            var env = new ConsoleEnvironment(0, verbose: true);

            const string data = "0 hello 3.14159 -0 2\n"
                                + "1 1 2 4 15";
            var dataSource = new BytesStreamSource(data);

            var text = TextLoader.CreateReader(env, ctx => (
                                                   label: ctx.LoadBool(0),
                                                   text: ctx.LoadText(1),
                                                   numericFeatures: ctx.LoadFloat(2, null)), // If fit correctly, this ought to be equivalent to max of 4, that is, length of 3.
                                               dataSource, separator: ' ');

            // While we have a type-safe wrapper for `IDataView` it is utterly useless except as an input to the `Fit` functions
            // of the other statically typed wrappers. We perhaps ought to make it useful in its own right, but perhaps not now.
            // For now, just operate over the actual `IDataView`.
            var textData = text.Read(dataSource).AsDynamic;

            var schema = textData.Schema;

            // First verify that the columns are there. There ought to be at least one column corresponding to the identifiers in the tuple.
            CheckSchemaHasColumn(schema, "label", out int labelIdx);
            CheckSchemaHasColumn(schema, "text", out int textIdx);
            CheckSchemaHasColumn(schema, "numericFeatures", out int numericFeaturesIdx);
            // Next verify they have the expected types.
            Assert.Equal(BoolType.Instance, schema.GetColumnType(labelIdx));
            Assert.Equal(TextType.Instance, schema.GetColumnType(textIdx));
            Assert.Equal(new VectorType(NumberType.R4, 3), schema.GetColumnType(numericFeaturesIdx));
            // Next actually inspect the data.
            using (var cursor = textData.GetRowCursor(c => true))
            {
                var textGetter                = cursor.GetGetter <ReadOnlyMemory <char> >(textIdx);
                var numericFeaturesGetter     = cursor.GetGetter <VBuffer <float> >(numericFeaturesIdx);
                ReadOnlyMemory <char> textVal = default;
                var             labelGetter   = cursor.GetGetter <bool>(labelIdx);
                bool            labelVal      = default;
                VBuffer <float> numVal        = default;

                void CheckValuesSame(bool bl, string tx, float v0, float v1, float v2)
                {
                    labelGetter(ref labelVal);
                    textGetter(ref textVal);
                    numericFeaturesGetter(ref numVal);
                    Assert.True(tx.AsSpan().SequenceEqual(textVal.Span));
                    Assert.Equal((bool)bl, labelVal);
                    Assert.Equal(3, numVal.Length);
                    Assert.Equal(v0, numVal.GetItemOrDefault(0));
                    Assert.Equal(v1, numVal.GetItemOrDefault(1));
                    Assert.Equal(v2, numVal.GetItemOrDefault(2));
                }

                Assert.True(cursor.MoveNext(), "Could not move even to first row");
                CheckValuesSame(false, "hello", 3.14159f, -0f, 2f);
                Assert.True(cursor.MoveNext(), "Could not move to second row");
                CheckValuesSame(true, "1", 2f, 4f, 15f);
                Assert.False(cursor.MoveNext(), "Moved to third row, but there should have been only two");
            }

            // The next step where we shuffle the names around a little bit is one where we are
            // testing out the implicit usage of copy columns.

            var est         = text.MakeNewEstimator().Append(r => (text: r.label, label: r.numericFeatures));
            var newText     = text.Append(est);
            var newTextData = newText.Fit(dataSource).Read(dataSource);

            schema = newTextData.AsDynamic.Schema;
            // First verify that the columns are there. There ought to be at least one column corresponding to the identifiers in the tuple.
            CheckSchemaHasColumn(schema, "label", out labelIdx);
            CheckSchemaHasColumn(schema, "text", out textIdx);
            // Next verify they have the expected types.
            Assert.Equal(BoolType.Instance, schema.GetColumnType(textIdx));
            Assert.Equal(new VectorType(NumberType.R4, 3), schema.GetColumnType(labelIdx));
        }