public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { ParameterWithSubParams <int> p = param.GetParamWithSubParams <int>("Row"); int colInd = p.Value; if (colInd < 0) { processInfo.ErrString = "No categorical rows available."; return; } Parameter <int[]> mcp = p.GetSubParameters().GetParam <int[]>("Values"); int[] inds = mcp.Value; if (inds.Length < 1) { processInfo.ErrString = "Please select at least two terms for merging."; return; } string newTerm = param.GetParam <string>("New term").Value; if (newTerm.Length == 0) { processInfo.ErrString = "Please specify a new term."; return; } string[] values = new string[inds.Length]; for (int i = 0; i < values.Length; i++) { values[i] = mdata.GetCategoryRowValuesAt(colInd)[inds[i]]; } HashSet <string> value = new HashSet <string>(values); string[][] cats = mdata.GetCategoryRowAt(colInd); string[][] newCat = new string[cats.Length][]; for (int i = 0; i < cats.Length; i++) { string[] w = cats[i]; bool changed = false; for (int j = 0; j < w.Length; j++) { if (value.Contains(w[j])) { w[j] = newTerm; changed = true; } } if (changed) { Array.Sort(w); } newCat[i] = w; } mdata.SetCategoryRowAt(newCat, colInd); }
private static void StringToExpression(IList <int> colInds, IMatrixData mdata) { int[] inds = ArrayUtils.Complement(colInds, mdata.StringColumnCount); string[] names = ArrayUtils.SubArray(mdata.StringColumnNames, colInds); string[] descriptions = ArrayUtils.SubArray(mdata.StringColumnDescriptions, colInds); string[][] str = ArrayUtils.SubArray(mdata.StringColumns, colInds); double[][] newEx = new double[str.Length][]; for (int j = 0; j < str.Length; j++) { newEx[j] = new double[str[j].Length]; for (int i = 0; i < newEx[j].Length; i++) { bool success = Parser.TryDouble(str[j][i], out double f); newEx[j][i] = success ? f : double.NaN; } } double[,] newExp = new double[mdata.RowCount, mdata.ColumnCount + str.Length]; double[,] newQual = new double[mdata.RowCount, mdata.ColumnCount + str.Length]; bool[,] newIsImputed = new bool[mdata.RowCount, mdata.ColumnCount + str.Length]; for (int i = 0; i < mdata.RowCount; i++) { for (int j = 0; j < mdata.ColumnCount; j++) { newExp[i, j] = mdata.Values.Get(i, j); newQual[i, j] = mdata.Quality.Get(i, j); newIsImputed[i, j] = mdata.IsImputed[i, j]; } for (int j = 0; j < newEx.Length; j++) { newExp[i, j + mdata.ColumnCount] = newEx[j][i]; newQual[i, j + mdata.ColumnCount] = double.NaN; newIsImputed[i, j + mdata.ColumnCount] = false; } } mdata.Values.Set(newExp); mdata.Quality.Set(newQual); mdata.IsImputed.Set(newIsImputed); mdata.ColumnNames.AddRange(names); mdata.ColumnDescriptions.AddRange(descriptions); mdata.StringColumns = ArrayUtils.SubList(mdata.StringColumns, inds); mdata.StringColumnNames = ArrayUtils.SubList(mdata.StringColumnNames, inds); mdata.ColumnDescriptions = ArrayUtils.SubList(mdata.StringColumnDescriptions, inds); for (int i = 0; i < mdata.CategoryRowCount; i++) { mdata.SetCategoryRowAt(ExtendCategoryRow(mdata.GetCategoryRowAt(i), str.Length), i); } for (int i = 0; i < mdata.NumericRows.Count; i++) { mdata.NumericRows[i] = ExtendNumericRow(mdata.NumericRows[i], str.Length); } for (int i = 0; i < mdata.StringRows.Count; i++) { mdata.StringRows[i] = ExtendStringRow(mdata.StringRows[i], str.Length); } }
private static void FillMatrixDontKeep(int groupColInd, int validVals, IMatrixData mdata, Func <IList <double>, double> func) { string[][] groupCol = mdata.GetCategoryRowAt(groupColInd); string[] groupNames = ArrayUtils.UniqueValuesPreserveOrder(groupCol); int[][] colInds = PerseusPluginUtils.GetMainColIndices(groupCol, groupNames); double[,] newExCols = new double[mdata.RowCount, groupNames.Length]; double[,] newQuality = new double[mdata.RowCount, groupNames.Length]; bool[,] newImputed = new bool[mdata.RowCount, groupNames.Length]; for (int i = 0; i < newExCols.GetLength(0); i++) { for (int j = 0; j < newExCols.GetLength(1); j++) { List <double> vals = new List <double>(); List <bool> imps = new List <bool>(); foreach (int ind in colInds[j]) { double val = mdata.Values.Get(i, ind); if (!double.IsNaN(val) && !double.IsInfinity(val)) { vals.Add(val); imps.Add(mdata.IsImputed[i, ind]); } } bool imp = false; double xy = double.NaN; if (vals.Count >= validVals) { xy = func(vals); imp = ArrayUtils.Or(imps); } newExCols[i, j] = xy; newQuality[i, j] = double.NaN; newImputed[i, j] = imp; } } mdata.ColumnNames = new List <string>(groupNames); mdata.ColumnDescriptions = GetEmpty(groupNames); mdata.Values.Set(newExCols); mdata.Quality.Set(newQuality); mdata.IsImputed.Set(newImputed); mdata.RemoveCategoryRowAt(groupColInd); for (int i = 0; i < mdata.CategoryRowCount; i++) { mdata.SetCategoryRowAt(AverageCategoryRow(mdata.GetCategoryRowAt(i), colInds), i); } for (int i = 0; i < mdata.NumericRows.Count; i++) { mdata.NumericRows[i] = AverageNumericRow(mdata.NumericRows[i], colInds); } }
private static void NumericToExpression(IList <int> colInds, IMatrixData mdata) { int[] inds = ArrayUtils.Complement(colInds, mdata.NumericColumnCount); string[] names = ArrayUtils.SubArray(mdata.NumericColumnNames, colInds); string[] descriptions = ArrayUtils.SubArray(mdata.NumericColumnDescriptions, colInds); double[][] num = ArrayUtils.SubArray(mdata.NumericColumns, colInds); var newEx = new float[num.Length][]; for (int j = 0; j < num.Length; j++) { newEx[j] = new float[num[j].Length]; for (int i = 0; i < newEx[j].Length; i++) { newEx[j][i] = (float)num[j][i]; } } float[,] newExp = new float[mdata.RowCount, mdata.ColumnCount + num.Length]; float[,] newQual = new float[mdata.RowCount, mdata.ColumnCount + num.Length]; bool[,] newIsImputed = new bool[mdata.RowCount, mdata.ColumnCount + num.Length]; for (int i = 0; i < mdata.RowCount; i++) { for (int j = 0; j < mdata.ColumnCount; j++) { newExp[i, j] = mdata.Values[i, j]; newQual[i, j] = mdata.Quality[i, j]; newIsImputed[i, j] = mdata.IsImputed[i, j]; } for (int j = 0; j < newEx.Length; j++) { newExp[i, j + mdata.ColumnCount] = newEx[j][i]; newQual[i, j + mdata.ColumnCount] = float.NaN; newIsImputed[i, j + mdata.ColumnCount] = false; } } mdata.Values.Set(newExp); mdata.Quality.Set(newQual); mdata.IsImputed.Set(newIsImputed); mdata.ColumnNames.AddRange(names); mdata.ColumnDescriptions.AddRange(descriptions); mdata.NumericColumns = ArrayUtils.SubList(mdata.NumericColumns, inds); mdata.NumericColumnNames = ArrayUtils.SubList(mdata.NumericColumnNames, inds); mdata.NumericColumnDescriptions = ArrayUtils.SubList(mdata.NumericColumnDescriptions, inds); for (int i = 0; i < mdata.CategoryRowCount; i++) { mdata.SetCategoryRowAt(ExtendCategoryRow(mdata.GetCategoryRowAt(i), num.Length), i); } for (int i = 0; i < mdata.NumericRows.Count; i++) { mdata.NumericRows[i] = ExtendNumericRow(mdata.NumericRows[i], num.Length); } }
private static void ProcessDataEdit(IMatrixData mdata, Parameters param) { SingleChoiceWithSubParams s = param.GetSingleChoiceWithSubParams("Category row"); int groupColInd = s.Value; Parameters sp = s.GetSubParameters(); string[][] newRow = new string[mdata.ExpressionColumnCount][]; for (int i = 0; i < mdata.ExpressionColumnCount; i++){ string t = mdata.ExpressionColumnNames[i]; string x = sp.GetStringParam(t).Value; newRow[i] = x.Length > 0 ? x.Split(';') : new string[0]; } mdata.SetCategoryRowAt(newRow, groupColInd); }
public static void DivideByColumn(IMatrixData data, int index) { int p = data.RowCount; int n = data.ExpressionColumnCount; float[,] newEx = new float[p,n - 1]; for (int i = 0; i < p; i++){ for (int j = 0; j < index; j++){ newEx[i, j] = data[i, j]/data[i, index]; if (float.IsInfinity(newEx[i, j])){ newEx[i, j] = float.NaN; } } for (int j = index + 1; j < n; j++){ newEx[i, j - 1] = data[i, j]/data[i, index]; if (float.IsInfinity(newEx[i, j - 1])){ newEx[i, j - 1] = float.NaN; } } } bool[,] newImp = new bool[p,n - 1]; for (int i = 0; i < p; i++){ for (int j = 0; j < index; j++){ newImp[i, j] = data.IsImputed[i, j] || data.IsImputed[i, index]; } for (int j = index + 1; j < n; j++){ newImp[i, j - 1] = data.IsImputed[i, j] || data.IsImputed[i, index]; } } data.ExpressionValues = newEx; data.IsImputed = newImp; data.ExpressionColumnNames.RemoveAt(index); data.ExpressionColumnDescriptions.RemoveAt(index); for (int i = 0; i < data.CategoryRowCount; i++){ data.SetCategoryRowAt(ArrayUtils.RemoveAtIndex(data.GetCategoryRowAt(i), index), i); } for (int i = 0; i < data.NumericRowCount; i++){ data.NumericRows[i] = ArrayUtils.RemoveAtIndex(data.NumericRows[i], index); } }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { SingleChoiceWithSubParams p = param.GetSingleChoiceWithSubParams("Row"); int colInd = p.Value; if (colInd < 0) { processInfo.ErrString = "No categorical rows available."; return; } MultiChoiceParam mcp = p.GetSubParameters().GetMultiChoiceParam("Values"); int[] inds = mcp.Value; if (inds.Length < 1) { processInfo.ErrString = "Please select at least two terms for merging."; return; } string newTerm = param.GetStringParam("New term").Value; if (newTerm.Length == 0){ processInfo.ErrString = "Please specify a new term."; return; } string[] values = new string[inds.Length]; for (int i = 0; i < values.Length; i++) { values[i] = mdata.GetCategoryRowValuesAt(colInd)[inds[i]]; } HashSet<string> value = new HashSet<string>(values); string[][] cats = mdata.GetCategoryRowAt(colInd); string[][] newCat = new string[cats.Length][]; for (int i = 0; i < cats.Length; i++){ string[] w = cats[i]; bool changed = false; for (int j = 0; j < w.Length; j++){ if (value.Contains(w[j])){ w[j] = newTerm; changed = true; } } if (changed){ Array.Sort(w); } newCat[i] = w; } mdata.SetCategoryRowAt(newCat, colInd); }
private static void FillMatrixDontKeep(int groupColInd, int validVals, IMatrixData mdata, Func<IList<double>, double> func) { string[][] groupCol = mdata.GetCategoryRowAt(groupColInd); string[] groupNames = ArrayUtils.UniqueValuesPreserveOrder(groupCol); int[][] colInds = PerseusPluginUtils.GetExpressionColIndices(groupCol, groupNames); float[,] newExCols = new float[mdata.RowCount,groupNames.Length]; float[,] newQuality = new float[mdata.RowCount,groupNames.Length]; bool[,] newImputed = new bool[mdata.RowCount,groupNames.Length]; for (int i = 0; i < newExCols.GetLength(0); i++){ for (int j = 0; j < newExCols.GetLength(1); j++){ List<double> vals = new List<double>(); List<bool> imps = new List<bool>(); foreach (int ind in colInds[j]){ double val = mdata[i, ind]; if (!double.IsNaN(val) && !double.IsInfinity(val)){ vals.Add(val); imps.Add(mdata.IsImputed[i, ind]); } } bool imp = false; float xy = float.NaN; if (vals.Count >= validVals){ xy = (float) func(vals); imp = ArrayUtils.Or(imps); } newExCols[i, j] = xy; newQuality[i, j] = float.NaN; newImputed[i, j] = imp; } } mdata.ExpressionColumnNames = new List<string>(groupNames); mdata.ExpressionColumnDescriptions = GetEmpty(groupNames); mdata.ExpressionValues = newExCols; mdata.QualityValues = newQuality; mdata.IsImputed = newImputed; mdata.RemoveCategoryRowAt(groupColInd); for (int i = 0; i < mdata.CategoryRowCount; i++){ mdata.SetCategoryRowAt(AverageCategoryRow(mdata.GetCategoryRowAt(i), colInds), i); } for (int i = 0; i < mdata.NumericRows.Count; i++){ mdata.NumericRows[i] = AverageNumericRow(mdata.NumericRows[i], colInds); } }
private static void StringToExpression(IList<int> colInds, IMatrixData mdata) { int[] inds = ArrayUtils.Complement(colInds, mdata.StringColumnCount); string[] names = ArrayUtils.SubArray(mdata.StringColumnNames, colInds); string[] descriptions = ArrayUtils.SubArray(mdata.StringColumnDescriptions, colInds); string[][] str = ArrayUtils.SubArray(mdata.StringColumns, colInds); float[][] newEx = new float[str.Length][]; for (int j = 0; j < str.Length; j++){ newEx[j] = new float[str[j].Length]; for (int i = 0; i < newEx[j].Length; i++){ float f; bool success = float.TryParse(str[j][i], out f); newEx[j][i] = success ? f : float.NaN; } } float[,] newExp = new float[mdata.RowCount,mdata.ExpressionColumnCount + str.Length]; float[,] newQual = new float[mdata.RowCount,mdata.ExpressionColumnCount + str.Length]; bool[,] newIsImputed = new bool[mdata.RowCount,mdata.ExpressionColumnCount + str.Length]; for (int i = 0; i < mdata.RowCount; i++){ for (int j = 0; j < mdata.ExpressionColumnCount; j++){ newExp[i, j] = mdata[i, j]; newQual[i, j] = mdata.QualityValues[i, j]; newIsImputed[i, j] = mdata.IsImputed[i, j]; } for (int j = 0; j < newEx.Length; j++){ newExp[i, j + mdata.ExpressionColumnCount] = newEx[j][i]; newQual[i, j + mdata.ExpressionColumnCount] = float.NaN; newIsImputed[i, j + mdata.ExpressionColumnCount] = false; } } mdata.ExpressionValues = newExp; mdata.QualityValues = newQual; mdata.IsImputed = newIsImputed; mdata.ExpressionColumnNames.AddRange(names); mdata.ExpressionColumnDescriptions.AddRange(descriptions); mdata.StringColumns = ArrayUtils.SubList(mdata.StringColumns, inds); mdata.StringColumnNames = ArrayUtils.SubList(mdata.StringColumnNames, inds); mdata.StringColumnDescriptions = ArrayUtils.SubList(mdata.StringColumnDescriptions, inds); for (int i = 0; i < mdata.CategoryRowCount; i++){ mdata.SetCategoryRowAt(ExtendCategoryRow(mdata.GetCategoryRowAt(i), str.Length),i); } for (int i = 0; i < mdata.NumericRows.Count; i++){ mdata.NumericRows[i] = ExtendNumericRow(mdata.NumericRows[i], str.Length); } }