private static void FillMatrixDontKeep(int groupColInd, int validVals, IMatrixData mdata, Func <IList <double>, double> func) { string[][] groupCol = mdata.GetCategoryRowAt(groupColInd); string[] groupNames = ArrayUtils.UniqueValuesPreserveOrder(groupCol); int[][] colInds = PerseusPluginUtils.GetMainColIndices(groupCol, groupNames); double[,] newExCols = new double[mdata.RowCount, groupNames.Length]; double[,] newQuality = new double[mdata.RowCount, groupNames.Length]; bool[,] newImputed = new bool[mdata.RowCount, groupNames.Length]; for (int i = 0; i < newExCols.GetLength(0); i++) { for (int j = 0; j < newExCols.GetLength(1); j++) { List <double> vals = new List <double>(); List <bool> imps = new List <bool>(); foreach (int ind in colInds[j]) { double val = mdata.Values.Get(i, ind); if (!double.IsNaN(val) && !double.IsInfinity(val)) { vals.Add(val); imps.Add(mdata.IsImputed[i, ind]); } } bool imp = false; double xy = double.NaN; if (vals.Count >= validVals) { xy = func(vals); imp = ArrayUtils.Or(imps); } newExCols[i, j] = xy; newQuality[i, j] = double.NaN; newImputed[i, j] = imp; } } mdata.ColumnNames = new List <string>(groupNames); mdata.ColumnDescriptions = GetEmpty(groupNames); mdata.Values.Set(newExCols); mdata.Quality.Set(newQuality); mdata.IsImputed.Set(newImputed); mdata.RemoveCategoryRowAt(groupColInd); for (int i = 0; i < mdata.CategoryRowCount; i++) { mdata.SetCategoryRowAt(AverageCategoryRow(mdata.GetCategoryRowAt(i), colInds), i); } for (int i = 0; i < mdata.NumericRows.Count; i++) { mdata.NumericRows[i] = AverageNumericRow(mdata.NumericRows[i], colInds); } }
private static void AddStandardDeviation(int groupColInd, int validVals, IMatrixData mdata, int varInd) { string[][] groupCol = mdata.GetCategoryRowAt(groupColInd); string[] groupNames = ArrayUtils.UniqueValuesPreserveOrder(groupCol); int[][] colInds = PerseusPluginUtils.GetMainColIndices(groupCol, groupNames); double[][] newNumCols = new double[groupNames.Length][]; for (int i = 0; i < newNumCols.Length; i++) { newNumCols[i] = new double[mdata.RowCount]; } for (int i = 0; i < mdata.RowCount; i++) { for (int j = 0; j < groupNames.Length; j++) { List <double> vals = new List <double>(); foreach (int ind in colInds[j]) { double val = mdata.Values.Get(i, ind); if (!double.IsNaN(val) && !double.IsInfinity(val)) { vals.Add(val); } } double xy = double.NaN; if (vals.Count >= validVals) { if (varInd == 0) { xy = ArrayUtils.StandardDeviation(vals); } else { xy = ArrayUtils.StandardDeviation(vals) / Math.Sqrt(vals.Count); } } newNumCols[j][i] = xy; } } for (int i = 0; i < groupNames.Length; i++) { string name = "stddev " + groupNames[i]; mdata.AddNumericColumn(name, name, newNumCols[i]); } }
private static void FillMatrixKeep(int groupColInd, int validVals, IMatrixData mdata, Func <IList <double>, double> func) { string[][] groupCol = mdata.GetCategoryRowAt(groupColInd); string[] groupNames = ArrayUtils.UniqueValuesPreserveOrder(groupCol); int[][] colInds = PerseusPluginUtils.GetMainColIndices(groupCol, groupNames); double[][] newNumCols = new double[groupNames.Length][]; for (int i = 0; i < newNumCols.Length; i++) { newNumCols[i] = new double[mdata.RowCount]; } for (int i = 0; i < mdata.RowCount; i++) { for (int j = 0; j < groupNames.Length; j++) { List <double> vals = new List <double>(); foreach (int ind in colInds[j]) { double val = mdata.Values.Get(i, ind); if (!double.IsNaN(val) && !double.IsInfinity(val)) { vals.Add(val); } } double xy = double.NaN; if (vals.Count >= validVals) { xy = func(vals); } newNumCols[j][i] = xy; } } for (int i = 0; i < groupNames.Length; i++) { mdata.AddNumericColumn(groupNames[i], groupNames[i], newNumCols[i]); } }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { ParameterWithSubParams <int> xp = param.GetParamWithSubParams <int>("Expression column selection"); bool groups = xp.Value == 2; string[] groupNames = null; int[][] colIndsGroups = null; if (groups) { int groupRowInd = xp.GetSubParameters().GetParam <int>("Group").Value; string[][] groupCol = mdata.GetCategoryRowAt(groupRowInd); groupNames = ArrayUtils.UniqueValuesPreserveOrder(groupCol); colIndsGroups = PerseusPluginUtils.GetMainColIndices(groupCol, groupNames); } int[] useCols = xp.Value == 1 ? xp.GetSubParameters().GetParam <int[]>("Columns").Value : ArrayUtils.ConsecutiveInts(mdata.ColumnCount); HashSet <int> w = ArrayUtils.ToHashSet(param.GetParam <int[]>("Calculate").Value); bool[] include = new bool[procs.Length]; double[][] columns = new double[procs.Length][]; double[][][] columnsG = null; if (groups) { columnsG = new double[procs.Length][][]; for (int i = 0; i < columnsG.Length; i++) { columnsG[i] = new double[groupNames.Length][]; } } for (int i = 0; i < include.Length; i++) { include[i] = w.Contains(i); if (include[i]) { columns[i] = new double[mdata.RowCount]; if (groups) { for (int j = 0; j < groupNames.Length; j++) { columnsG[i][j] = new double[mdata.RowCount]; } } } } for (int i = 0; i < mdata.RowCount; i++) { List <double> v = new List <double>(); foreach (int j in useCols) { double x = mdata.Values.Get(i, j); if (!double.IsNaN(x) && !double.IsInfinity(x)) { v.Add(x); } } for (int j = 0; j < include.Length; j++) { if (include[j]) { columns[j][i] = procs[j].Item2(v); } } if (groups) { List <double>[] vg = new List <double> [groupNames.Length]; for (int j = 0; j < colIndsGroups.Length; j++) { vg[j] = new List <double>(); for (int k = 0; k < colIndsGroups[j].Length; k++) { double x = mdata.Values.Get(i, colIndsGroups[j][k]); if (!double.IsNaN(x) && !double.IsInfinity(x)) { vg[j].Add(x); } } } for (int j = 0; j < include.Length; j++) { if (include[j]) { for (int k = 0; k < groupNames.Length; k++) { columnsG[j][k][i] = procs[j].Item2(vg[k]); } } } } } for (int i = 0; i < include.Length; i++) { if (include[i]) { mdata.AddNumericColumn(procs[i].Item1, procs[i].Item3, columns[i]); if (groups) { for (int k = 0; k < groupNames.Length; k++) { mdata.AddNumericColumn(procs[i].Item1 + " " + groupNames[k], procs[i].Item3, columnsG[i][k]); } } } } }