public IMatrixData CombineData(IMatrixData matrixData1, IMatrixData matrixData2, Parameters parameters, ProcessInfo processInfo) { bool indicator = parameters.GetBoolParam("Indicator").Value; int otherCol = parameters.GetSingleChoiceParam("Matching column 2").Value; Average avExpression = GetAveraging(parameters.GetSingleChoiceParam("Combine expression values").Value); Average avNumerical = GetAveraging(parameters.GetSingleChoiceParam("Combine numerical values").Value); string[] q = matrixData2.StringColumns[otherCol]; string[][] w = new string[q.Length][]; for (int i = 0; i < q.Length; i++){ string r = q[i].Trim(); w[i] = r.Length == 0 ? new string[0] : r.Split(';'); w[i] = ArrayUtils.UniqueValues(w[i]); } Dictionary<string, List<int>> id2Cols = new Dictionary<string, List<int>>(); for (int i = 0; i < w.Length; i++){ foreach (string s in w[i]){ if (!id2Cols.ContainsKey(s)){ id2Cols.Add(s, new List<int>()); } id2Cols[s].Add(i); } } int pgCol = parameters.GetSingleChoiceParam("Matching column 1").Value; string[] d = matrixData1.StringColumns[pgCol]; string[][] x = new string[d.Length][]; for (int i = 0; i < d.Length; i++){ string r = d[i].Trim(); x[i] = r.Length == 0 ? new string[0] : r.Split(';'); x[i] = ArrayUtils.UniqueValues(x[i]); } int[][] indexMap = new int[x.Length][]; string[][] indicatorCol = new string[x.Length][]; for (int i = 0; i < indexMap.Length; i++){ List<int> qwer = new List<int>(); foreach (string s in x[i]){ if (id2Cols.ContainsKey(s)){ List<int> en = id2Cols[s]; qwer.AddRange(en); } } indexMap[i] = qwer.ToArray(); indexMap[i] = ArrayUtils.UniqueValues(indexMap[i]); indicatorCol[i] = indexMap[i].Length > 0 ? new[]{"+"} : new string[0]; } IMatrixData result = matrixData1.Copy(); SetAnnotationRows(result, matrixData1, matrixData2); if (indicator){ result.AddCategoryColumn(matrixData2.Name, "", indicatorCol); } { int[] exCols = parameters.GetMultiChoiceParam("Expression columns").Value; float[,] newExColumns = new float[matrixData1.RowCount, exCols.Length]; float[,] newQuality = new float[matrixData1.RowCount, exCols.Length]; bool[,] newIsImputed = new bool[matrixData1.RowCount, exCols.Length]; string[] newExColNames = new string[exCols.Length]; float[,] oldEx = matrixData2.ExpressionValues; float[,] oldQual = matrixData2.QualityValues; bool[,] oldImp = matrixData2.IsImputed; for (int i = 0; i < exCols.Length; i++) { newExColNames[i] = matrixData2.ExpressionColumnNames[exCols[i]]; for (int j = 0; j < matrixData1.RowCount; j++){ int[] inds = indexMap[j]; List<double> values = new List<double>(); List<double> qual = new List<double>(); List<bool> imp = new List<bool>(); foreach (int ind in inds) { double v = oldEx[ind, exCols[i]]; if (!double.IsNaN(v) && !double.IsInfinity(v)){ values.Add(v); double qx = oldQual[ind, exCols[i]]; if (!double.IsNaN(qx) && !double.IsInfinity(qx)){ qual.Add(qx); } bool isi = oldImp[ind, exCols[i]]; imp.Add(isi); } } newExColumns[j, i] = values.Count == 0 ? float.NaN : (float)avExpression(values.ToArray()); newQuality[j, i] = qual.Count == 0 ? float.NaN : (float)avExpression(qual.ToArray()); newIsImputed[j, i] = imp.Count != 0 && AvImp(imp.ToArray()); } } MakeNewNames(newExColNames, result.ExpressionColumnNames); AddExpressionColumns(result, newExColNames, newExColumns, newQuality, newIsImputed); } { int[] numCols = parameters.GetMultiChoiceParam("Numerical columns").Value; double[][] newNumericalColumns = new double[numCols.Length][]; string[] newNumColNames = new string[numCols.Length]; for (int i = 0; i < numCols.Length; i++){ double[] oldCol = matrixData2.NumericColumns[numCols[i]]; newNumColNames[i] = matrixData2.NumericColumnNames[numCols[i]]; newNumericalColumns[i] = new double[matrixData1.RowCount]; for (int j = 0; j < matrixData1.RowCount; j++){ int[] inds = indexMap[j]; List<double> values = new List<double>(); foreach (int ind in inds){ double v = oldCol[ind]; if (!double.IsNaN(v)){ values.Add(v); } } newNumericalColumns[i][j] = values.Count == 0 ? double.NaN : avNumerical(values.ToArray()); } } for (int i = 0; i < numCols.Length; i++){ result.AddNumericColumn(newNumColNames[i], "", newNumericalColumns[i]); } } { int[] catCols = parameters.GetMultiChoiceParam("Categorical columns").Value; string[][][] newCatColumns = new string[catCols.Length][][]; string[] newCatColNames = new string[catCols.Length]; for (int i = 0; i < catCols.Length; i++){ string[][] oldCol = matrixData2.CategoryColumns[catCols[i]]; newCatColNames[i] = matrixData2.CategoryColumnNames[catCols[i]]; newCatColumns[i] = new string[matrixData1.RowCount][]; for (int j = 0; j < matrixData1.RowCount; j++){ int[] inds = indexMap[j]; List<string[]> values = new List<string[]>(); foreach (int ind in inds){ string[] v = oldCol[ind]; if (v.Length > 0){ values.Add(v); } } newCatColumns[i][j] = values.Count == 0 ? new string[0] : ArrayUtils.UniqueValues(ArrayUtils.Concat(values.ToArray())); } } for (int i = 0; i < catCols.Length; i++){ result.AddCategoryColumn(newCatColNames[i], "", newCatColumns[i]); } } { int[] stringCols = parameters.GetMultiChoiceParam("String columns").Value; string[][] newStringColumns = new string[stringCols.Length][]; string[] newStringColNames = new string[stringCols.Length]; for (int i = 0; i < stringCols.Length; i++){ string[] oldCol = matrixData2.StringColumns[stringCols[i]]; newStringColNames[i] = matrixData2.StringColumnNames[stringCols[i]]; newStringColumns[i] = new string[matrixData1.RowCount]; for (int j = 0; j < matrixData1.RowCount; j++){ int[] inds = indexMap[j]; List<string> values = new List<string>(); foreach (int ind in inds){ string v = oldCol[ind]; if (v.Length > 0){ values.Add(v); } } newStringColumns[i][j] = values.Count == 0 ? "" : StringUtils.Concat(";", values.ToArray()); } } for (int i = 0; i < stringCols.Length; i++){ result.AddStringColumn(newStringColNames[i], "", newStringColumns[i]); } } result.Origin = "Combination"; return result; }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { bool keepEmpty = param.GetBoolParam("Keep rows without ID").Value; AverageType atype = GetAverageType(param.GetSingleChoiceParam("Average type for expression columns").Value); string[] ids2 = mdata.StringColumns[param.GetSingleChoiceParam("ID column").Value]; string[][] ids = SplitIds(ids2); int[] present; int[] absent; GetPresentAbsentIndices(ids, out present, out absent); ids = ArrayUtils.SubArray(ids, present); int[][] rowInds = new int[present.Length][]; for (int i = 0; i < rowInds.Length; i++){ rowInds[i] = new[]{present[i]}; } ClusterRows(ref rowInds, ref ids); if (keepEmpty){ rowInds = ProlongRowInds(rowInds, absent); } int nrows = rowInds.Length; int ncols = mdata.ExpressionColumnCount; float[,] expVals = new float[nrows,ncols]; for (int j = 0; j < ncols; j++){ float[] c = mdata.GetExpressionColumn(j); for (int i = 0; i < nrows; i++){ float[] d = ArrayUtils.SubArray(c, rowInds[i]); expVals[i, j] = Average(d, atype); } } mdata.ExpressionValues = expVals; for (int i = 0; i < mdata.NumericColumnCount; i++){ string name = mdata.NumericColumnNames[i]; AverageType atype1 = GetAverageType(param.GetSingleChoiceParam("Average type for " + name).Value); double[] c = mdata.NumericColumns[i]; double[] newCol = new double[nrows]; for (int k = 0; k < nrows; k++){ double[] d = ArrayUtils.SubArray(c, rowInds[k]); newCol[k] = Average(d, atype1); } mdata.NumericColumns[i] = newCol; } for (int i = 0; i < mdata.CategoryColumnCount; i++){ string[][] c = mdata.GetCategoryColumnAt(i); string[][] newCol = new string[nrows][]; for (int k = 0; k < nrows; k++){ string[][] d = ArrayUtils.SubArray(c, rowInds[k]); newCol[k] = Average(d); } mdata.SetCategoryColumnAt(newCol,i); } for (int i = 0; i < mdata.StringColumnCount; i++){ string[] c = mdata.StringColumns[i]; string[] newCol = new string[nrows]; for (int k = 0; k < nrows; k++){ string[] d = ArrayUtils.SubArray(c, rowInds[k]); newCol[k] = Average(d); } mdata.StringColumns[i] = newCol; } for (int i = 0; i < mdata.MultiNumericColumnCount; i++){ double[][] c = mdata.MultiNumericColumns[i]; double[][] newCol = new double[nrows][]; for (int k = 0; k < nrows; k++){ double[][] d = ArrayUtils.SubArray(c, rowInds[k]); newCol[k] = Average(d); } mdata.MultiNumericColumns[i] = newCol; } }
public void ProcessData(IMatrixData data, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { bool falseAreIndicated = param.GetSingleChoiceParam("Indicated are").Value == 0; int catCol = param.GetSingleChoiceParam("In column").Value; string word = param.GetStringParam("Indicator").Value; int[] scoreColumns = param.GetMultiChoiceParam("Scores").Value; if (scoreColumns.Length == 0){ processInfo.ErrString = "Please specify at least one column with scores."; return; } bool largeIsGood = param.GetBoolParam("Large values are good").Value; int[] showColumns = param.GetMultiChoiceParam("Display quantity").Value; if (showColumns.Length == 0){ processInfo.ErrString = "Please select at least one quantity to display"; return; } bool[] indCol = GetIndicatorColumn(falseAreIndicated, catCol, word, data); List<string> expColNames = new List<string>(); List<float[]> expCols = new List<float[]>(); foreach (int scoreColumn in scoreColumns){ double[] vals = scoreColumn < data.NumericColumnCount ? data.NumericColumns[scoreColumn] : ArrayUtils.ToDoubles(data.GetExpressionColumn(scoreColumn - data.NumericColumnCount)); string name = scoreColumn < data.NumericColumnCount ? data.NumericColumnNames[scoreColumn] : data.ExpressionColumnNames[scoreColumn - data.NumericColumnCount]; int[] order = GetOrder(vals, largeIsGood); CalcCurve(ArrayUtils.SubArray(indCol, order), showColumns, name, expCols, expColNames); } float[,] expData = ToMatrix(expCols); data.SetData(data.Name, expColNames, expData, new List<string>(), new List<string[]>(), new List<string>(), new List<string[][]>(), new List<string>(), new List<double[]>(), new List<string>(), new List<double[][]>()); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ProcessInfo processInfo) { int colInd = param.GetSingleChoiceParam("Column").Value; double value = param.GetDoubleParam("Value").Value; int ruleInd = param.GetSingleChoiceParam("Remove if").Value; bool keepNan = param.GetBoolParam("Keep NaN").Value; double[] vals = colInd < mdata.NumericColumnCount ? mdata.NumericColumns[colInd] : ArrayUtils.ToDoubles(mdata.GetExpressionColumn(colInd - mdata.NumericColumnCount)); List<int> valids = new List<int>(); for (int i = 0; i < vals.Length; i++){ bool valid; double val = vals[i]; if (double.IsNaN(val)){ valid = keepNan; } else{ switch (ruleInd){ case 0: valid = val > value; break; case 1: valid = val >= value; break; case 2: valid = val != value; break; case 3: valid = val == value; break; case 4: valid = val <= value; break; case 5: valid = val < value; break; default: throw new Exception("Never get here."); } } if (valid){ valids.Add(i); } } PerseusPluginUtils.FilterRows(mdata, param, valids.ToArray()); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int colInd = param.GetSingleChoiceParam("Column").Value; string searchString = param.GetStringParam("Search string").Value; if (string.IsNullOrEmpty(searchString)){ processInfo.ErrString = "Please provide a search string"; return; } bool remove = param.GetSingleChoiceParam("Mode").Value == 0; bool matchCase = param.GetBoolParam("Match case").Value; bool matchWholeWord = param.GetBoolParam("Match whole word").Value; string[] vals = mdata.StringColumns[colInd]; List<int> valids = new List<int>(); for (int i = 0; i < vals.Length; i++){ bool matches = Matches(vals[i], searchString, matchCase, matchWholeWord); if (matches && !remove){ valids.Add(i); } else if (!matches && remove){ valids.Add(i); } } PerseusPluginUtils.FilterRows(mdata, param, valids.ToArray()); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int ind = param.GetSingleChoiceParam("Column").Value; bool descending = param.GetBoolParam("Descending").Value; if (ind < mdata.ExpressionColumnCount){ float[] v = mdata.GetExpressionColumn(ind); int[] o = ArrayUtils.Order(v); if (descending){ ArrayUtils.Revert(o); } mdata.ExtractExpressionRows(o); } else{ double[] v = mdata.NumericColumns[ind - mdata.ExpressionColumnCount]; int[] o = ArrayUtils.Order(v); if (descending){ ArrayUtils.Revert(o); } mdata.ExtractExpressionRows(o); } }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int avType = param.GetSingleChoiceParam("Average type").Value; if (mdata.CategoryRowCount == 0){ processInfo.ErrString = "No category rows were loaded."; return; } int groupColInd = param.GetSingleChoiceParam("Grouping").Value; int validVals = param.GetIntParam("Min. valid values per group").Value; bool keep = param.GetBoolParam("Keep original data").Value; bool sdev = param.GetBoolParam("Add standard deviation").Value; Func<IList<double>, double> func; switch (avType){ case 0: func = ArrayUtils.Median; break; case 1: func = ArrayUtils.Mean; break; case 2: func = ArrayUtils.Sum; break; case 3: func = ArrayUtils.GeometricMean; break; default: throw new Exception("Never get here."); } if (sdev) { AddStandardDeviation(groupColInd, validVals, mdata); } if (keep) { FillMatrixKeep(groupColInd, validVals, mdata, func); } else{ FillMatrixDontKeep(groupColInd, validVals, mdata, func); } }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { string colName = param.GetStringParam("Name of new column").Value; int[] columns = param.GetMultiChoiceParam("Categories").Value; bool inverse = param.GetBoolParam("Inverse").Value; int[] catCols; int[] stringCols; Split(columns, out catCols, out stringCols, mdata.CategoryColumnCount); string[] word1 = param.GetMultiStringParam("Search terms").Value; if (word1.Length == 0){ processInfo.ErrString = "Please specify one or more search terms."; return; } if (string.IsNullOrEmpty(colName)){ colName = word1[0]; } string[] word = new string[word1.Length]; for (int i = 0; i < word.Length; i++){ word[i] = word1[i].ToLower().Trim(); } bool[] indicator = new bool[mdata.RowCount]; foreach (int col in catCols){ string[][] cat = mdata.GetCategoryColumnAt(col); for (int i = 0; i < cat.Length; i++){ foreach (string s in cat[i]){ foreach (string s1 in word){ if (s.ToLower().Contains(s1)){ indicator[i] = true; break; } } } } } foreach (string[] txt in stringCols.Select(col => mdata.StringColumns[col])){ for (int i = 0; i < txt.Length; i++){ string s = txt[i]; foreach (string s1 in word){ if (s.ToLower().Contains(s1)){ indicator[i] = true; break; } } } } string[][] newCol = new string[indicator.Length][]; for (int i = 0; i < newCol.Length; i++){ bool yes = inverse ? !indicator[i] : indicator[i]; newCol[i] = yes ? new[]{"+"} : new string[0]; } mdata.AddCategoryColumn(colName, "", newCol); }