private static double[,] PrepareInputData(IDataset ds, IEnumerable <string> allowedInputs, IEnumerable <int> rows)
        {
            var doubleVariables     = allowedInputs.Where(ds.VariableHasType <double>);
            var factorVariableNames = allowedInputs.Where(ds.VariableHasType <string>);
            var factorVariables     = ds.GetFactorVariableValues(factorVariableNames, Enumerable.Range(0, ds.Rows)); // must consider all factor values (in train and test set)

            double[,] binaryMatrix    = ds.ToArray(factorVariables, rows);
            double[,] doubleVarMatrix = ds.ToArray(doubleVariables, rows);
            var x = binaryMatrix.HorzCat(doubleVarMatrix);

            return(x.Transpose());
        }
Ejemplo n.º 2
0
        private static IndexedDataTable <double> CoefficientGraph(double[,] coeff, double[] lambda, IEnumerable <string> allowedVars, IDataset ds, bool showOnlyRelevantBasisFuncs = true)
        {
            var coeffTable = new IndexedDataTable <double>("Coefficients", "The paths of standarized coefficient values over different lambda values");

            coeffTable.VisualProperties.YAxisMaximumAuto = false;
            coeffTable.VisualProperties.YAxisMinimumAuto = false;
            coeffTable.VisualProperties.XAxisMaximumAuto = false;
            coeffTable.VisualProperties.XAxisMinimumAuto = false;

            coeffTable.VisualProperties.XAxisLogScale    = true;
            coeffTable.VisualProperties.XAxisTitle       = "Lambda";
            coeffTable.VisualProperties.YAxisTitle       = "Coefficients";
            coeffTable.VisualProperties.SecondYAxisTitle = "Number of variables";

            var nLambdas         = lambda.Length;
            var nCoeff           = coeff.GetLength(1);
            var dataRows         = new IndexedDataRow <double> [nCoeff];
            var numNonZeroCoeffs = new int[nLambdas];

            var doubleVariables          = allowedVars.Where(ds.VariableHasType <double>);
            var factorVariableNames      = allowedVars.Where(ds.VariableHasType <string>);
            var factorVariablesAndValues = ds.GetFactorVariableValues(factorVariableNames, Enumerable.Range(0, ds.Rows)); //must consider all factor values (in train and test set)

            for (int i = 0; i < coeff.GetLength(0); i++)
            {
                for (int j = 0; j < coeff.GetLength(1); j++)
                {
                    if (!coeff[i, j].IsAlmost(0.0))
                    {
                        numNonZeroCoeffs[i]++;
                    }
                }
            }

            {
                int i = 0;
                foreach (var factorVariableAndValues in factorVariablesAndValues)
                {
                    foreach (var factorValue in factorVariableAndValues.Value)
                    {
                        double sigma = ds.GetStringValues(factorVariableAndValues.Key)
                                       .Select(s => s == factorValue ? 1.0 : 0.0)
                                       .StandardDeviation(); // calc std dev of binary indicator
                        var path = Enumerable.Range(0, nLambdas).Select(r => Tuple.Create(lambda[r], coeff[r, i] * sigma)).ToArray();
                        dataRows[i] = new IndexedDataRow <double>(factorVariableAndValues.Key + "=" + factorValue, factorVariableAndValues.Key + "=" + factorValue, path);
                        i++;
                    }
                }

                foreach (var doubleVariable in doubleVariables)
                {
                    double sigma = ds.GetDoubleValues(doubleVariable).StandardDeviation();
                    var    path  = Enumerable.Range(0, nLambdas).Select(r => Tuple.Create(lambda[r], coeff[r, i] * sigma)).ToArray();
                    dataRows[i] = new IndexedDataRow <double>(doubleVariable, doubleVariable, path);
                    i++;
                }

                // add to coeffTable by total weight (larger area under the curve => more important);
                foreach (var r in dataRows.OrderByDescending(r => r.Values.Select(t => t.Item2).Sum(x => Math.Abs(x))))
                {
                    coeffTable.Rows.Add(r);
                }
            }

            if (lambda.Length > 2)
            {
                coeffTable.VisualProperties.XAxisMinimumFixedValue = Math.Pow(10, Math.Floor(Math.Log10(lambda.Last())));
                coeffTable.VisualProperties.XAxisMaximumFixedValue = Math.Pow(10, Math.Ceiling(Math.Log10(lambda.Skip(1).First())));
            }

            coeffTable.Rows.Add(new IndexedDataRow <double>("Number of variables", "The number of non-zero coefficients for each step in the path", lambda.Zip(numNonZeroCoeffs, (l, v) => Tuple.Create(l, (double)v))));
            coeffTable.Rows["Number of variables"].VisualProperties.ChartType   = DataRowVisualProperties.DataRowChartType.Points;
            coeffTable.Rows["Number of variables"].VisualProperties.SecondYAxis = true;

            return(coeffTable);
        }