Exemple #1
0
            public PolynomialState(IHostEnvironment host, IDataView input, Arguments args, Func <TInput, TInput, TInput> multiplication)
            {
                _host = host.Register("PolynomialState");
                _host.CheckValue(input, "input");
                _input = input;
                // _lock = new object();
                _args           = args;
                _multiplication = multiplication;
                var column = _args.columns[0];
                var schema = input.Schema;

                using (var ch = _host.Start("PolynomialState"))
                {
                    if (!schema.TryGetColumnIndex(column.Source, out _inputCol))
                    {
                        throw _host.ExceptParam("inputColumn", "Column '{0}' not found in schema.", column.Source);
                    }
                    var type = schema.GetColumnType(_inputCol);
                    if (!type.IsVector())
                    {
                        throw _host.Except("Input column type must be a vector.");
                    }
                    int dim = type.AsVector().DimCount();
                    if (dim > 1)
                    {
                        throw _host.Except("Input column type must be a vector of one dimension.");
                    }
                    int size = dim > 0 ? type.AsVector().GetDim(0) : 0;
                    if (size > 0)
                    {
                        size = TotalCumulated[_args.degree](size);
                    }
                    ch.Trace("PolynomialTransform {0}->{1}.", dim, size);

                    // We extend the input schema. The new type has the same type as the input.
                    _schema = Schema.Create(new ExtendedSchema(input.Schema,
                                                               new[] { column.Name },
                                                               new[] { new VectorType(type.AsVector().ItemType(), size) }));
                }
            }
Exemple #2
0
            /// <summary>
            /// We compute the polynomial features.
            /// </summary>
            private ValueGetter <VBuffer <TInput> > PolynomialBuilder()
            {
                // VBuffer<TInput> is the internal representation of a vector.
                // It can be dense (TInput[]) or sparse.
                // If there are n features, we can expect sum(i=1, d) n^i / i! polynomial features.
                VBuffer <TInput> features = new VBuffer <TInput>();
                int degree  = _args.degree;
                var values  = new List <TInput>();
                var indices = new List <int>();

                int[] tempIndices = new int[3];

                Func <IEnumerable <int>, int, int> computeIndex = (IEnumerable <int> sparseIndices, int nbFeatures) =>
                {
                    int nb = 0;
                    foreach (var i in sparseIndices)
                    {
                        tempIndices[nb] = i;
                        nb += 1;
                    }
                    switch (nb)
                    {
                    case 1:
                        return(tempIndices[0]);

                    case 2:
                        int d1 = Total[1](nbFeatures);
                        int d2 = Total[2](nbFeatures);
                        return(d1 + (d2 - Total[2](nbFeatures - tempIndices[0])) + (tempIndices[1] - tempIndices[0]));

                    case 3:
                        int d1_ = Total[1](nbFeatures);
                        int d2_ = Total[2](nbFeatures);
                        int d3_ = Total[3](nbFeatures);
                        int d1d = Total[2](nbFeatures - tempIndices[0]);
                        int d2d = Total[2](nbFeatures - tempIndices[1]);
                        return(d1_ + d2_ +
                               d1d - d2d + tempIndices[2] - tempIndices[1] +     // part with N^2
                               d3_ - Total[3](nbFeatures - tempIndices[0]));     // part with N^3

                    default:
                        throw Contracts.ExceptNotSupp("Level should be in [1, 3].");
                    }
                };

                return((ref VBuffer <TInput> polyfeat) =>
                {
                    _inputGetter(ref features);
                    int total;

                    if (features.IsDense)
                    {
                        var poly = EnumeratePosition(features.Count, degree)
                                   .Select(pos => pos.Select(p => features.Values[p]).Aggregate((a, b) => _multiplication(a, b)))
                                   .ToArray();
                        polyfeat = new VBuffer <TInput>(poly.Length, poly);
                    }
                    else
                    {
                        values.Clear();
                        indices.Clear();

                        foreach (var pos in EnumeratePosition(features.Count, degree))
                        {
                            values.Add(pos.Select(p => features.Values[p]).Aggregate((a, b) => _multiplication(a, b)));
                            indices.Add(computeIndex(pos.Select(p => features.Indices[p]), features.Length));
#if (DEBUG)
                            if (indices.Count > 1)
                            {
                                if (indices[indices.Count - 1] <= indices[indices.Count - 2])
                                {
                                    throw Contracts.Except("Inconsistency");
                                }
                            }
#endif
                        }
                        total = TotalCumulated[_args.degree](features.Length);
                        polyfeat = new VBuffer <TInput>(total, values.Count, values.ToArray(), indices.ToArray());
                    }
                });
            }