private static double[,] PrepareInputData(IDataset ds, IEnumerable <string> allowedInputs, IEnumerable <int> rows) { var doubleVariables = allowedInputs.Where(ds.VariableHasType <double>); var factorVariableNames = allowedInputs.Where(ds.VariableHasType <string>); var factorVariables = ds.GetFactorVariableValues(factorVariableNames, Enumerable.Range(0, ds.Rows)); // must consider all factor values (in train and test set) double[,] binaryMatrix = ds.ToArray(factorVariables, rows); double[,] doubleVarMatrix = ds.ToArray(doubleVariables, rows); var x = binaryMatrix.HorzCat(doubleVarMatrix); return(x.Transpose()); }
private static IndexedDataTable <double> CoefficientGraph(double[,] coeff, double[] lambda, IEnumerable <string> allowedVars, IDataset ds, bool showOnlyRelevantBasisFuncs = true) { var coeffTable = new IndexedDataTable <double>("Coefficients", "The paths of standarized coefficient values over different lambda values"); coeffTable.VisualProperties.YAxisMaximumAuto = false; coeffTable.VisualProperties.YAxisMinimumAuto = false; coeffTable.VisualProperties.XAxisMaximumAuto = false; coeffTable.VisualProperties.XAxisMinimumAuto = false; coeffTable.VisualProperties.XAxisLogScale = true; coeffTable.VisualProperties.XAxisTitle = "Lambda"; coeffTable.VisualProperties.YAxisTitle = "Coefficients"; coeffTable.VisualProperties.SecondYAxisTitle = "Number of variables"; var nLambdas = lambda.Length; var nCoeff = coeff.GetLength(1); var dataRows = new IndexedDataRow <double> [nCoeff]; var numNonZeroCoeffs = new int[nLambdas]; var doubleVariables = allowedVars.Where(ds.VariableHasType <double>); var factorVariableNames = allowedVars.Where(ds.VariableHasType <string>); var factorVariablesAndValues = ds.GetFactorVariableValues(factorVariableNames, Enumerable.Range(0, ds.Rows)); //must consider all factor values (in train and test set) for (int i = 0; i < coeff.GetLength(0); i++) { for (int j = 0; j < coeff.GetLength(1); j++) { if (!coeff[i, j].IsAlmost(0.0)) { numNonZeroCoeffs[i]++; } } } { int i = 0; foreach (var factorVariableAndValues in factorVariablesAndValues) { foreach (var factorValue in factorVariableAndValues.Value) { double sigma = ds.GetStringValues(factorVariableAndValues.Key) .Select(s => s == factorValue ? 1.0 : 0.0) .StandardDeviation(); // calc std dev of binary indicator var path = Enumerable.Range(0, nLambdas).Select(r => Tuple.Create(lambda[r], coeff[r, i] * sigma)).ToArray(); dataRows[i] = new IndexedDataRow <double>(factorVariableAndValues.Key + "=" + factorValue, factorVariableAndValues.Key + "=" + factorValue, path); i++; } } foreach (var doubleVariable in doubleVariables) { double sigma = ds.GetDoubleValues(doubleVariable).StandardDeviation(); var path = Enumerable.Range(0, nLambdas).Select(r => Tuple.Create(lambda[r], coeff[r, i] * sigma)).ToArray(); dataRows[i] = new IndexedDataRow <double>(doubleVariable, doubleVariable, path); i++; } // add to coeffTable by total weight (larger area under the curve => more important); foreach (var r in dataRows.OrderByDescending(r => r.Values.Select(t => t.Item2).Sum(x => Math.Abs(x)))) { coeffTable.Rows.Add(r); } } if (lambda.Length > 2) { coeffTable.VisualProperties.XAxisMinimumFixedValue = Math.Pow(10, Math.Floor(Math.Log10(lambda.Last()))); coeffTable.VisualProperties.XAxisMaximumFixedValue = Math.Pow(10, Math.Ceiling(Math.Log10(lambda.Skip(1).First()))); } coeffTable.Rows.Add(new IndexedDataRow <double>("Number of variables", "The number of non-zero coefficients for each step in the path", lambda.Zip(numNonZeroCoeffs, (l, v) => Tuple.Create(l, (double)v)))); coeffTable.Rows["Number of variables"].VisualProperties.ChartType = DataRowVisualProperties.DataRowChartType.Points; coeffTable.Rows["Number of variables"].VisualProperties.SecondYAxis = true; return(coeffTable); }