public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { SingleChoiceParam access = param.GetSingleChoiceParam("Matrix access"); bool rows = access.Value == 0; Rank1(rows, mdata); }
public void ProcessData(IMatrixData data, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { bool falseAreIndicated = param.GetSingleChoiceParam("Indicated are").Value == 0; int catCol = param.GetSingleChoiceParam("In column").Value; string word = param.GetStringParam("Indicator").Value; int[] scoreColumns = param.GetMultiChoiceParam("Scores").Value; if (scoreColumns.Length == 0){ processInfo.ErrString = "Please specify at least one column with scores."; return; } bool largeIsGood = param.GetBoolParam("Large values are good").Value; int[] showColumns = param.GetMultiChoiceParam("Display quantity").Value; if (showColumns.Length == 0){ processInfo.ErrString = "Please select at least one quantity to display"; return; } bool[] indCol = GetIndicatorColumn(falseAreIndicated, catCol, word, data); List<string> expColNames = new List<string>(); List<float[]> expCols = new List<float[]>(); foreach (int scoreColumn in scoreColumns){ double[] vals = scoreColumn < data.NumericColumnCount ? data.NumericColumns[scoreColumn] : ArrayUtils.ToDoubles(data.GetExpressionColumn(scoreColumn - data.NumericColumnCount)); string name = scoreColumn < data.NumericColumnCount ? data.NumericColumnNames[scoreColumn] : data.ExpressionColumnNames[scoreColumn - data.NumericColumnCount]; int[] order = GetOrder(vals, largeIsGood); CalcCurve(ArrayUtils.SubArray(indCol, order), showColumns, name, expCols, expColNames); } float[,] expData = ToMatrix(expCols); data.SetData(data.Name, expColNames, expData, new List<string>(), new List<string[]>(), new List<string>(), new List<string[][]>(), new List<string>(), new List<double[]>(), new List<string>(), new List<double[][]>()); }
private static bool GetReduceMatrix(Parameters parameters) { return parameters.GetSingleChoiceParam("Filter mode").Value == 0; }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ProcessInfo processInfo) { int colIndex = param.GetSingleChoiceParam("Control column").Value; if (colIndex < mdata.ExpressionColumnCount){ DivideByColumn(mdata, colIndex); } else{ DivideByColumnNum(mdata, colIndex - mdata.ExpressionColumnCount); } }
private static void ProcessDataRename(IMatrixData mdata, Parameters param) { int groupColInd = param.GetSingleChoiceParam("Numerical row").Value; string newName = param.GetStringParam("New name").Value; string newDescription = param.GetStringParam("New description").Value; mdata.NumericRowNames[groupColInd] = newName; mdata.NumericRowDescriptions[groupColInd] = newDescription; }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ProcessInfo processInfo) { string[][] col = mdata.CategoryColumns[param.GetSingleChoiceParam("Indicator column").Value]; string term = param.GetStringParam("Value").Value; List<int> inds = new List<int>(); for (int i = 0; i < col.Length; i++){ if (Contains(col[i], term)){ inds.Add(i); } } float[][] profiles = new float[inds.Count][]; for (int i = 0; i < profiles.Length; i++){ profiles[i] = mdata.GetExpressionRow(inds[i]); float mean = (float) ArrayUtils.Mean(profiles[i]); for (int j = 0; j < profiles[i].Length; j++){ profiles[i][j] -= mean; } } float[] totalProfile = new float[mdata.ExpressionColumnCount]; for (int i = 0; i < totalProfile.Length; i++){ List<float> vals = new List<float>(); foreach (float[] t in profiles){ float val = t[i]; if (float.IsNaN(val) || float.IsInfinity(val)){ continue; } vals.Add(val); } totalProfile[i] = vals.Count > 0 ? ArrayUtils.Median(vals) : float.NaN; } for (int i = 0; i < mdata.RowCount; i++){ for (int j = 0; j < mdata.ExpressionColumnCount; j++){ mdata[i, j] -= totalProfile[j]; } } }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ProcessInfo processInfo) { double width = param.GetDoubleParam("Width").Value; double shift = param.GetDoubleParam("Down shift").Value; bool separateColumns = param.GetSingleChoiceParam("Mode").Value == 0; if (separateColumns){ ReplaceMissingsByGaussianByColumn(width, shift, mdata); } else{ ReplaceMissingsByGaussianWholeMatrix(width, shift, mdata); } }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int nameCol = param.GetSingleChoiceParam("New column names").Value; List<string> colNames; if (nameCol >= 0){ HashSet<string> taken = new HashSet<string>(); colNames = new List<string>(); foreach (string n in mdata.StringColumns[nameCol]){ string n1 = GetNextAvailableName(n, taken); taken.Add(n1); colNames.Add(n1); } } else{ colNames = new List<string>(); for (int i = 0; i < mdata.RowCount; i++){ colNames.Add("Column" + (i + 1)); } } List<string> rowNames = mdata.ExpressionColumnNames; mdata.SetData(mdata.Name, mdata.Description, colNames, colNames, ArrayUtils.Transpose(mdata.ExpressionValues), ArrayUtils.Transpose(mdata.IsImputed), ArrayUtils.Transpose(mdata.QualityValues), mdata.QualityName, mdata.QualityBiggerIsBetter, new List<string>(new[]{"Name"}), new List<string>(new[]{"Name"}), new List<string[]>(new[]{rowNames.ToArray()}), mdata.CategoryRowNames, mdata.CategoryRowDescriptions, GetCategoryRows(mdata), mdata.NumericRowNames, mdata.NumericRowDescriptions, mdata.NumericRows, new List<string>(), new List<string>(), new List<double[][]>(), mdata.CategoryColumnNames, mdata.CategoryColumnDescriptions, GetCategoryColumns(mdata), mdata.NumericColumnNames, mdata.NumericColumnDescriptions, mdata.NumericColumns); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ProcessInfo processInfo) { bool rows = param.GetSingleChoiceParam("Matrix access").Value == 0; bool atLeast = param.GetSingleChoiceParam("Side").Value == 0; int numValids = param.GetIntParam("Number of valid values").Value; SingleChoiceWithSubParams modeParam = param.GetSingleChoiceWithSubParams("Mode"); int modeInd = modeParam.Value; if (modeInd != 0 && mdata.CategoryRowNames.Count == 0){ processInfo.ErrString = "No grouping is defined."; return; } if (modeInd != 0 && !rows){ processInfo.ErrString = "Group-wise filtering can only be appled to rows."; return; } if (modeInd != 0){ int gind = modeParam.GetSubParameters().GetSingleChoiceParam("Grouping").Value; string[][] groupCol = mdata.CategoryRows[gind]; ValidValueFilterGroup(numValids, mdata, param, modeInd == 2, groupCol, atLeast); } else{ ValidValueFilter1(rows, numValids, mdata, param, atLeast); } }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ProcessInfo processInfo) { int colInd = param.GetSingleChoiceParam("Column").Value; string value = param.GetStringParam("Find").Value; bool remove = param.GetSingleChoiceParam("Mode").Value == 0; string[][] cats = mdata.CategoryColumns[colInd]; List<int> valids = new List<int>(); for (int i = 0; i < cats.Length; i++){ bool valid = true; foreach (string w in cats[i]){ if (w.Equals(value)){ valid = false; break; } } if ((valid && remove) || (!valid && !remove)){ valids.Add(i); } } mdata.ExtractExpressionRows(valids.ToArray()); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { float[,] q = new float[mdata.RowCount,mdata.ExpressionColumnCount]; for (int j = 0; j < mdata.ExpressionColumnCount; j++){ int ind = param.GetSingleChoiceParam(mdata.ExpressionColumnNames[j]).Value; double[] w = mdata.NumericColumns[ind]; for (int i = 0; i < mdata.RowCount; i++){ q[i, j] = (float) w[i]; } } mdata.QualityValues = q; mdata.QualityBiggerIsBetter = false; }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int ind = param.GetSingleChoiceParam("Column").Value; bool descending = param.GetBoolParam("Descending").Value; if (ind < mdata.ExpressionColumnCount){ float[] v = mdata.GetExpressionColumn(ind); int[] o = ArrayUtils.Order(v); if (descending){ ArrayUtils.Revert(o); } mdata.ExtractExpressionRows(o); } else{ double[] v = mdata.NumericColumns[ind - mdata.ExpressionColumnCount]; int[] o = ArrayUtils.Order(v); if (descending){ ArrayUtils.Revert(o); } mdata.ExtractExpressionRows(o); } }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ProcessInfo processInfo) { SingleChoiceParam access = param.GetSingleChoiceParam("Matrix access"); bool rows = access.Value == 0; int what = param.GetSingleChoiceParam("Divide by what").Value; DivideImpl(rows, ArrayUtils.Mean, mdata); switch (what){ case 0: DivideImpl(rows, ArrayUtils.Mean, mdata); break; case 1: DivideImpl(rows, ArrayUtils.Median, mdata); break; case 2: DivideImpl(rows, ArrayUtils.MostFrequentValue, mdata); break; default: throw new Exception("Never get here."); } }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int colIndex = param.GetSingleChoiceParam("Column with second last AA").Value; string aas = param.GetStringParam("Amino acids").Value; string[][] col = mdata.GetCategoryColumnAt(colIndex); List<int> validRows = new List<int>(); for (int i = 0; i < mdata.RowCount; i++){ string[] x = col[i]; for (int j = 0; j < x.Length; j++){ if (x[j].Length != 1){ processInfo.ErrString = "Some of the entries in column " + mdata.CategoryColumnNames[colIndex] + " do not contain amino acids"; return; } } bool keep = JudgeIfKept(aas, x); if (keep){ validRows.Add(i); } } mdata.ExtractExpressionRows(validRows.ToArray()); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { bool keepEmpty = param.GetBoolParam("Keep rows without ID").Value; AverageType atype = GetAverageType(param.GetSingleChoiceParam("Average type for expression columns").Value); string[] ids2 = mdata.StringColumns[param.GetSingleChoiceParam("ID column").Value]; string[][] ids = SplitIds(ids2); int[] present; int[] absent; GetPresentAbsentIndices(ids, out present, out absent); ids = ArrayUtils.SubArray(ids, present); int[][] rowInds = new int[present.Length][]; for (int i = 0; i < rowInds.Length; i++){ rowInds[i] = new[]{present[i]}; } ClusterRows(ref rowInds, ref ids); if (keepEmpty){ rowInds = ProlongRowInds(rowInds, absent); } int nrows = rowInds.Length; int ncols = mdata.ExpressionColumnCount; float[,] expVals = new float[nrows,ncols]; for (int j = 0; j < ncols; j++){ float[] c = mdata.GetExpressionColumn(j); for (int i = 0; i < nrows; i++){ float[] d = ArrayUtils.SubArray(c, rowInds[i]); expVals[i, j] = Average(d, atype); } } mdata.ExpressionValues = expVals; for (int i = 0; i < mdata.NumericColumnCount; i++){ string name = mdata.NumericColumnNames[i]; AverageType atype1 = GetAverageType(param.GetSingleChoiceParam("Average type for " + name).Value); double[] c = mdata.NumericColumns[i]; double[] newCol = new double[nrows]; for (int k = 0; k < nrows; k++){ double[] d = ArrayUtils.SubArray(c, rowInds[k]); newCol[k] = Average(d, atype1); } mdata.NumericColumns[i] = newCol; } for (int i = 0; i < mdata.CategoryColumnCount; i++){ string[][] c = mdata.GetCategoryColumnAt(i); string[][] newCol = new string[nrows][]; for (int k = 0; k < nrows; k++){ string[][] d = ArrayUtils.SubArray(c, rowInds[k]); newCol[k] = Average(d); } mdata.SetCategoryColumnAt(newCol,i); } for (int i = 0; i < mdata.StringColumnCount; i++){ string[] c = mdata.StringColumns[i]; string[] newCol = new string[nrows]; for (int k = 0; k < nrows; k++){ string[] d = ArrayUtils.SubArray(c, rowInds[k]); newCol[k] = Average(d); } mdata.StringColumns[i] = newCol; } for (int i = 0; i < mdata.MultiNumericColumnCount; i++){ double[][] c = mdata.MultiNumericColumns[i]; double[][] newCol = new double[nrows][]; for (int k = 0; k < nrows; k++){ double[][] d = ArrayUtils.SubArray(c, rowInds[k]); newCol[k] = Average(d); } mdata.MultiNumericColumns[i] = newCol; } }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ProcessInfo processInfo) { int nameCol = param.GetSingleChoiceParam("New column names").Value; float[,] x = ArrayUtils.Transpose(mdata.ExpressionValues); List<string> colNames = new List<string>(mdata.StringColumns[nameCol]); List<string> rowNames = mdata.ExpressionColumnNames; mdata.SetData(mdata.Name, colNames, x, new List<string>(new[]{"Name"}), new List<string[]>(new[]{rowNames.ToArray()}), new List<string>(), new List<string[][]>(), new List<string>(), new List<double[]>(), new List<string>(), new List<double[][]>()); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int[] colIndx = param.GetMultiChoiceParam("x").Value; int[] colIndy = param.GetMultiChoiceParam("y").Value; if (colIndx.Length == 0){ processInfo.ErrString = "Please select some columns"; return; } if (colIndx.Length != colIndy.Length){ processInfo.ErrString = "Please select the same number of columns in the boxes for the first and second columns."; return; } int typeInd = param.GetSingleChoiceParam("Distribution type").Value; int points = param.GetIntParam("Number of points").Value; for (int k = 0; k < colIndx.Length; k++){ float[] xvals = GetColumn(mdata, colIndx[k]); float[] yvals = GetColumn(mdata, colIndy[k]); float[] xvals1; float[] yvals1; NumUtils.GetValidPairs(xvals, yvals, out xvals1, out yvals1); double xmin; double xmax; double ymin; double ymax; DensityEstimation.CalcRanges(xvals1, yvals1, out xmin, out xmax, out ymin, out ymax); float[,] values = DensityEstimation.GetValuesOnGrid(xvals1, xmin, (xmax - xmin)/points, points, yvals1, ymin, (ymax - ymin)/points, points); if (typeInd == 1 || typeInd == 3){ MakeConditional1(values); } if (typeInd == 2 || typeInd == 3){ MakeConditional2(values); } DensityEstimation.DivideByMaximum(values); double[] xmat = new double[points]; for (int i = 0; i < points; i++){ xmat[i] = xmin + i*(xmax - xmin)/points; } double[] ymat = new double[points]; for (int i = 0; i < points; i++){ ymat[i] = ymin + i*(ymax - ymin)/points; } float[,] percvalues = CalcExcludedPercentage(values); double[] dvals = new double[xvals.Length]; double[] pvals = new double[xvals.Length]; for (int i = 0; i < dvals.Length; i++){ double xx = xvals[i]; double yy = yvals[i]; if (!double.IsNaN(xx) && !double.IsNaN(yy)){ int xind = ArrayUtils.ClosestIndex(xmat, xx); int yind = ArrayUtils.ClosestIndex(ymat, yy); dvals[i] = values[xind, yind]; pvals[i] = percvalues[xind, yind]; } else{ dvals[i] = double.NaN; pvals[i] = double.NaN; } } string xname = GetColumnName(mdata, colIndx[k]); string yname = GetColumnName(mdata, colIndy[k]); mdata.AddNumericColumn("Density_" + xname + "_" + yname, "Density of data points in the plane spanned by the columns " + xname + " and " + yname + ".", dvals); mdata.AddNumericColumn("Excluded fraction_" + xname + "_" + yname, "Percentage of points with a point density smaller than at this point in the plane spanned by the columns " + xname + " and " + yname + ".", pvals); } }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int[] rcols = param.GetMultiChoiceParam("Ratio columns").Value; int[] icols = param.GetMultiChoiceParam("Intensity columns").Value; if (rcols.Length == 0){ processInfo.ErrString = "Please specify some ratio columns."; return; } if (rcols.Length != icols.Length){ processInfo.ErrString = "The number of ratio and intensity columns have to be equal."; return; } int truncIndex = param.GetSingleChoiceParam("Use for truncation").Value; TestTruncation truncation = truncIndex == 0 ? TestTruncation.Pvalue : (truncIndex == 1 ? TestTruncation.BenjaminiHochberg : TestTruncation.PermutationBased); double threshold = param.GetDoubleParam("Threshold value").Value; int sideInd = param.GetSingleChoiceParam("Side").Value; TestSide side; switch (sideInd){ case 0: side = TestSide.Both; break; case 1: side = TestSide.Left; break; case 2: side = TestSide.Right; break; default: throw new Exception("Never get here."); } for (int i = 0; i < rcols.Length; i++){ float[] r = mdata.GetExpressionColumn(rcols[i]); float[] intens = icols[i] < mdata.ExpressionColumnCount ? mdata.GetExpressionColumn(icols[i]) : ArrayUtils.ToFloats(mdata.NumericColumns[icols[i] - mdata.ExpressionColumnCount]); double[] pvals = CalcSignificanceB(r, intens, side); string[][] fdr; switch (truncation){ case TestTruncation.Pvalue: fdr = PerseusPluginUtils.CalcPvalueSignificance(pvals, threshold); break; case TestTruncation.BenjaminiHochberg: fdr = PerseusPluginUtils.CalcBenjaminiHochbergFdr(pvals, threshold); break; default: throw new Exception("Never get here."); } mdata.AddNumericColumn(mdata.ExpressionColumnNames[rcols[i]] + " Significance B", "", pvals); mdata.AddCategoryColumn(mdata.ExpressionColumnNames[rcols[i]] + " B significant", "", fdr); } }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ProcessInfo processInfo) { SingleChoiceParam access = param.GetSingleChoiceParam("Matrix access"); bool rows = access.Value == 0; UnitVectors(rows, mdata); }
private static void ProcessDataDelete(IMatrixData mdata, Parameters param) { int groupColInd = param.GetSingleChoiceParam("Category row").Value; mdata.RemoveCategoryRowAt(groupColInd); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int[] cols = param.GetMultiChoiceParam("Columns").Value; int truncIndex = param.GetSingleChoiceParam("Use for truncation").Value; TestTruncation truncation = truncIndex == 0 ? TestTruncation.Pvalue : (truncIndex == 1 ? TestTruncation.BenjaminiHochberg : TestTruncation.PermutationBased); double threshold = param.GetDoubleParam("Threshold value").Value; int sideInd = param.GetSingleChoiceParam("Side").Value; TestSide side; switch (sideInd){ case 0: side = TestSide.Both; break; case 1: side = TestSide.Left; break; case 2: side = TestSide.Right; break; default: throw new Exception("Never get here."); } foreach (int col in cols){ float[] r = mdata.GetExpressionColumn(col); double[] pvals = CalcSignificanceA(r, side); string[][] fdr; switch (truncation){ case TestTruncation.Pvalue: fdr = PerseusPluginUtils.CalcPvalueSignificance(pvals, threshold); break; case TestTruncation.BenjaminiHochberg: fdr = PerseusPluginUtils.CalcBenjaminiHochbergFdr(pvals, threshold); break; default: throw new Exception("Never get here."); } mdata.AddNumericColumn(mdata.ExpressionColumnNames[col] + " Significance A", "", pvals); mdata.AddCategoryColumn(mdata.ExpressionColumnNames[col] + " A significant", "", fdr); } }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { if (mdata.CategoryColumnCount < 2){ processInfo.ErrString = "There are less than two categorical columns available."; return; } int colInd1 = param.GetSingleChoiceParam("First column").Value; int colInd2 = param.GetSingleChoiceParam("Second column").Value; string[][] col1 = mdata.GetCategoryColumnAt(colInd1); string[][] col2 = mdata.GetCategoryColumnAt(colInd2); string[][] result = new string[col1.Length][]; for (int i = 0; i < result.Length; i++){ result[i] = CombineTerms(col1[i], col2[i]); } string colName = mdata.CategoryColumnNames[colInd1] + "_" + mdata.CategoryColumnNames[colInd2]; mdata.AddCategoryColumn(colName, "", result); }
private static void ProcessDataDelete(IMatrixData mdata, Parameters param) { int groupColInd = param.GetSingleChoiceParam("Numerical row").Value; mdata.NumericRows.RemoveAt(groupColInd); mdata.NumericRowNames.RemoveAt(groupColInd); mdata.NumericRowDescriptions.RemoveAt(groupColInd); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int colInd = param.GetSingleChoiceParam("Column").Value; string searchString = param.GetStringParam("Search string").Value; if (string.IsNullOrEmpty(searchString)){ processInfo.ErrString = "Please provide a search string"; return; } bool remove = param.GetSingleChoiceParam("Mode").Value == 0; bool matchCase = param.GetBoolParam("Match case").Value; bool matchWholeWord = param.GetBoolParam("Match whole word").Value; string[] vals = mdata.StringColumns[colInd]; List<int> valids = new List<int>(); for (int i = 0; i < vals.Length; i++){ bool matches = Matches(vals[i], searchString, matchCase, matchWholeWord); if (matches && !remove){ valids.Add(i); } else if (!matches && remove){ valids.Add(i); } } PerseusPluginUtils.FilterRows(mdata, param, valids.ToArray()); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { SingleChoiceWithSubParams access = param.GetSingleChoiceWithSubParams("Matrix access"); bool rows = access.Value == 0; int groupInd; if (rows){ groupInd = access.GetSubParameters().GetSingleChoiceParam("Grouping").Value - 1; } else{ groupInd = -1; } int what = param.GetSingleChoiceParam("Subtract what").Value; if (groupInd < 0){ SubtractValues(rows, GetFunc(what), mdata, processInfo.NumThreads); } else{ string[][] catRow = mdata.GetCategoryRowAt(groupInd); foreach (string[] t in catRow){ if (t.Length > 1){ processInfo.ErrString = "The groups are overlapping."; return; } } SubtractGroups(mdata, catRow, GetFunc(what)); } }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int[] outputColumns = param.GetMultiChoiceParam("Output").Value; int proteinIdColumnInd = param.GetSingleChoiceParam("Protein IDs").Value; string[] proteinIds = mdata.StringColumns[proteinIdColumnInd]; int[] intensityCols = param.GetMultiChoiceParam("Intensities").Value; if (intensityCols.Length == 0){ processInfo.ErrString = "Please select at least one column containing protein intensities."; return; } // variable to hold all intensity values List<double[]> columns = new List<double[]>(); string[] sampleNames = new string[intensityCols.Length]; for (int col = 0; col < intensityCols.Length; col++){ double[] values; if (intensityCols[col] < mdata.ExpressionColumnCount){ values = ArrayUtils.ToDoubles(mdata.GetExpressionColumn(intensityCols[col])); sampleNames[col] = mdata.ExpressionColumnNames[intensityCols[col]]; } else{ values = mdata.NumericColumns[intensityCols[col] - mdata.ExpressionColumnCount]; sampleNames[col] = mdata.NumericColumnNames[intensityCols[col] - mdata.ExpressionColumnCount]; } sampleNames[col] = new Regex(@"^(?:(?:LFQ )?[Ii]ntensity )?(.*)$").Match(sampleNames[col]).Groups[1].Value; columns.Add(values); } // average over columns if this option is selected if (param.GetSingleChoiceWithSubParams("Averaging mode").Value == 3){ double[] column = new double[mdata.RowCount]; for (int row = 0; row < mdata.RowCount; row++){ double[] values = new double[intensityCols.Length]; for (int col = 0; col < intensityCols.Length; col++){ values[col] = columns[col][row]; } column[row] = ArrayUtils.Median(ExtractValidValues(values, false)); } // delete the original list of columns columns = new List<double[]>{column}; sampleNames = new[]{""}; } // revert logarithm if necessary if (param.GetBoolWithSubParams("Logarithmized").Value){ double[] logBases = new[]{2, Math.E, 10}; double logBase = logBases[param.GetBoolWithSubParams("Logarithmized").GetSubParameters().GetSingleChoiceParam("log base").Value]; foreach (double[] t in columns){ for (int row = 0; row < mdata.RowCount; row++){ if (t[row] == 0){ processInfo.ErrString = "Are the columns really logarithmized?\nThey contain zeroes!"; } t[row] = Math.Pow(logBase, t[row]); } } } double[] mw = mdata.NumericColumns[param.GetSingleChoiceParam("Molecular masses").Value]; // detect whether the molecular masses are given in Da or kDa if (ArrayUtils.Median(mw) < 250) // likely kDa { for (int i = 0; i < mw.Length; i++){ mw[i] *= 1000; } } double[] detectabilityNormFactor = mw; if (param.GetBoolWithSubParams("Detectability correction").Value){ detectabilityNormFactor = mdata.NumericColumns[ param.GetBoolWithSubParams("Detectability correction") .GetSubParameters() .GetSingleChoiceParam("Correction factor") .Value]; } // the normalization factor needs to be nonzero for all proteins // check and replace with 1 for all relevant cases for (int row = 0; row < mdata.RowCount; row++){ if (detectabilityNormFactor[row] == 0 || detectabilityNormFactor[row] == double.NaN){ detectabilityNormFactor[row] = 1; } } // detect the organism Organism organism = DetectOrganism(proteinIds); // c value the amount of DNA per cell, see: http://en.wikipedia.org/wiki/C-value double cValue = (organism.genomeSize*basePairWeight)/avogadro; // find the histones int[] histoneRows = FindHistones(proteinIds, organism); // write a categorical column indicating the histones string[][] histoneCol = new string[mdata.RowCount][]; for (int row = 0; row < mdata.RowCount; row++){ histoneCol[row] = (ArrayUtils.Contains(histoneRows, row)) ? new[]{"+"} : new[]{""}; } mdata.AddCategoryColumn("Histones", "", histoneCol); // initialize the variables for the annotation rows double[] totalProteinRow = new double[mdata.ExpressionColumnCount]; double[] totalMoleculesRow = new double[mdata.ExpressionColumnCount]; string[][] organismRow = new string[mdata.ExpressionColumnCount][]; double[] histoneMassRow = new double[mdata.ExpressionColumnCount]; double[] ploidyRow = new double[mdata.ExpressionColumnCount]; double[] cellVolumeRow = new double[mdata.ExpressionColumnCount]; double[] normalizationFactors = new double[columns.Count]; // calculate normalization factors for each column for (int col = 0; col < columns.Count; col++){ string sampleName = sampleNames[col]; double[] column = columns[col]; // normalization factor to go from intensities to copies, // needs to be determined either using the total protein or the histone scaling approach double factor; switch (param.GetSingleChoiceWithSubParams("Scaling mode").Value){ case 0: // total protein amount double mwWeightedNormalizedSummedIntensities = 0; for (int row = 0; row < mdata.RowCount; row++){ if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row])){ mwWeightedNormalizedSummedIntensities += (column[row]/detectabilityNormFactor[row])*mw[row]; } } factor = (param.GetSingleChoiceWithSubParams("Scaling mode") .GetSubParameters() .GetDoubleParam("Protein amount per cell [pg]") .Value*1e-12*avogadro)/mwWeightedNormalizedSummedIntensities; break; case 1: // histone mode double mwWeightedNormalizedSummedHistoneIntensities = 0; foreach (int row in histoneRows){ if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row])){ mwWeightedNormalizedSummedHistoneIntensities += (column[row]/detectabilityNormFactor[row])*mw[row]; } } double ploidy = param.GetSingleChoiceWithSubParams("Scaling mode").GetSubParameters().GetDoubleParam("Ploidy").Value; factor = (cValue*ploidy*avogadro)/mwWeightedNormalizedSummedHistoneIntensities; break; default: factor = 1; break; } normalizationFactors[col] = factor; } // check averaging mode if (param.GetSingleChoiceWithSubParams("Averaging mode").Value == 1) // same factor for all { double factor = ArrayUtils.Mean(normalizationFactors); for (int i = 0; i < normalizationFactors.Length; i++){ normalizationFactors[i] = factor; } } if (param.GetSingleChoiceWithSubParams("Averaging mode").Value == 2) // same factor in each group { if ( param.GetSingleChoiceWithSubParams("Averaging mode").GetSubParameters().GetSingleChoiceParam("Grouping").Value == -1){ processInfo.ErrString = "No grouping selected."; return; } string[][] groupNames = mdata.GetCategoryRowAt( param.GetSingleChoiceWithSubParams("Averaging mode").GetSubParameters().GetSingleChoiceParam("Grouping").Value); string[] uniqueGroupNames = Unique(groupNames); int[] grouping = new int[columns.Count]; for (int i = 0; i < columns.Count; i++){ if (intensityCols[i] >= mdata.ExpressionColumnCount){ // Numeric annotation columns cannot be grouped grouping[i] = i; continue; } if (ArrayUtils.Contains(uniqueGroupNames, groupNames[i][0])){ grouping[i] = ArrayUtils.IndexOf(uniqueGroupNames, groupNames[i][0]); continue; } grouping[i] = i; } Dictionary<int, List<double>> factors = new Dictionary<int, List<double>>(); for (int i = 0; i < columns.Count; i++){ if (factors.ContainsKey(grouping[i])){ factors[grouping[i]].Add(normalizationFactors[i]); } else{ factors.Add(grouping[i], new List<double>{normalizationFactors[i]}); } } double[] averagedNormalizationFactors = new double[columns.Count]; for (int i = 0; i < columns.Count; i++){ List<double> factor; factors.TryGetValue(grouping[i], out factor); averagedNormalizationFactors[i] = ArrayUtils.Mean(factor); } normalizationFactors = averagedNormalizationFactors; } // loop over all selected columns and calculate copy numbers for (int col = 0; col < columns.Count; col++){ string sampleName = sampleNames[col]; double[] column = columns[col]; double factor = normalizationFactors[col]; double[] copyNumbers = new double[mdata.RowCount]; double[] concentrations = new double[mdata.RowCount]; // femtoliters double[] massFraction = new double[mdata.RowCount]; double[] moleFraction = new double[mdata.RowCount]; double totalProtein = 0; // picograms double histoneMass = 0; // picograms double totalMolecules = 0; for (int row = 0; row < mdata.RowCount; row++){ if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row])){ copyNumbers[row] = (column[row]/detectabilityNormFactor[row])*factor; totalMolecules += copyNumbers[row]; totalProtein += (copyNumbers[row]*mw[row]*1e12)/avogadro; // picograms if (ArrayUtils.Contains(histoneRows, row)){ histoneMass += (copyNumbers[row]*mw[row]*1e12)/avogadro; // picograms } } } double totalVolume = (totalProtein/(param.GetDoubleParam("Total cellular protein concentration [g/l]").Value))*1000; // femtoliters for (int row = 0; row < mdata.RowCount; row++){ if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row])){ concentrations[row] = ((copyNumbers[row]/(totalVolume*1e-15))/avogadro)*1e9; // nanomolar massFraction[row] = (((copyNumbers[row]*mw[row]*1e12)/avogadro)/totalProtein)*1e6; // ppm moleFraction[row] = (copyNumbers[row]/totalMolecules)*1e6; // ppm } } string suffix = (sampleName == "") ? "" : " " + sampleName; if (ArrayUtils.Contains(outputColumns, 0)){ mdata.AddNumericColumn("Copy number" + suffix, "", copyNumbers); } if (ArrayUtils.Contains(outputColumns, 1)){ mdata.AddNumericColumn("Concentration [nM]" + suffix, "", concentrations); } if (ArrayUtils.Contains(outputColumns, 2)){ mdata.AddNumericColumn("Abundance (mass/total mass) [*10^-6]" + suffix, "", massFraction); } if (ArrayUtils.Contains(outputColumns, 3)){ mdata.AddNumericColumn("Abundance (molecules/total molecules) [*10^-6]" + suffix, "", moleFraction); } double[] rank = ArrayUtils.Rank(copyNumbers); double[] relativeRank = new double[mdata.RowCount]; double validRanks = mdata.RowCount; for (int row = 0; row < mdata.RowCount; row++){ // remove rank for protein with no copy number information if (double.IsNaN((copyNumbers[row])) || copyNumbers[row] == 0){ rank[row] = double.NaN; validRanks--; // do not consider as valid } // invert ranking, so that rank 0 is the most abundant protein rank[row] = mdata.RowCount - rank[row]; } for (int row = 0; row < mdata.RowCount; row++){ relativeRank[row] = rank[row]/validRanks; } if (ArrayUtils.Contains(outputColumns, 4)){ mdata.AddNumericColumn("Copy number rank" + suffix, "", rank); } if (ArrayUtils.Contains(outputColumns, 5)){ mdata.AddNumericColumn("Relative copy number rank" + suffix, "", relativeRank); } if (intensityCols[col] < mdata.ExpressionColumnCount && param.GetSingleChoiceWithSubParams("Averaging mode").Value != 3){ totalProteinRow[intensityCols[col]] = Math.Round(totalProtein, 2); totalMoleculesRow[intensityCols[col]] = Math.Round(totalMolecules, 0); organismRow[intensityCols[col]] = new string[]{organism.name}; histoneMassRow[intensityCols[col]] = Math.Round(histoneMass, 4); ploidyRow[intensityCols[col]] = Math.Round((histoneMass*1e-12)/cValue, 2); cellVolumeRow[intensityCols[col]] = Math.Round(totalVolume, 2); // femtoliters } } if (param.GetSingleChoiceWithSubParams("Averaging mode").Value != 3 && ArrayUtils.Contains(outputColumns, 6)){ mdata.AddNumericRow("Total protein [pg/cell]", "", totalProteinRow); mdata.AddNumericRow("Total molecules per cell", "", totalMoleculesRow); mdata.AddCategoryRow("Organism", "", organismRow); mdata.AddNumericRow("Histone mass [pg/cell]", "", histoneMassRow); mdata.AddNumericRow("Ploidy", "", ploidyRow); mdata.AddNumericRow("Cell volume [fl]", "", cellVolumeRow); } }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ProcessInfo processInfo) { bool rows = param.GetSingleChoiceParam("Matrix access").Value == 0; double min = param.GetDoubleParam("Minimum").Value; double max = param.GetDoubleParam("Maximum").Value; MapToInterval1(rows, mdata, min, max); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ProcessInfo processInfo) { int colInd = param.GetSingleChoiceParam("Column").Value; double value = param.GetDoubleParam("Value").Value; int ruleInd = param.GetSingleChoiceParam("Remove if").Value; bool keepNan = param.GetBoolParam("Keep NaN").Value; double[] vals = colInd < mdata.NumericColumnCount ? mdata.NumericColumns[colInd] : ArrayUtils.ToDoubles(mdata.GetExpressionColumn(colInd - mdata.NumericColumnCount)); List<int> valids = new List<int>(); for (int i = 0; i < vals.Length; i++){ bool valid; double val = vals[i]; if (double.IsNaN(val)){ valid = keepNan; } else{ switch (ruleInd){ case 0: valid = val > value; break; case 1: valid = val >= value; break; case 2: valid = val != value; break; case 3: valid = val == value; break; case 4: valid = val <= value; break; case 5: valid = val < value; break; default: throw new Exception("Never get here."); } } if (valid){ valids.Add(i); } } PerseusPluginUtils.FilterRows(mdata, param, valids.ToArray()); }
public IMatrixData CombineData(IMatrixData matrixData1, IMatrixData matrixData2, Parameters parameters, ProcessInfo processInfo) { bool indicator = parameters.GetBoolParam("Indicator").Value; int otherCol = parameters.GetSingleChoiceParam("Matching column 2").Value; Average avExpression = GetAveraging(parameters.GetSingleChoiceParam("Combine expression values").Value); Average avNumerical = GetAveraging(parameters.GetSingleChoiceParam("Combine numerical values").Value); string[] q = matrixData2.StringColumns[otherCol]; string[][] w = new string[q.Length][]; for (int i = 0; i < q.Length; i++){ string r = q[i].Trim(); w[i] = r.Length == 0 ? new string[0] : r.Split(';'); w[i] = ArrayUtils.UniqueValues(w[i]); } Dictionary<string, List<int>> id2Cols = new Dictionary<string, List<int>>(); for (int i = 0; i < w.Length; i++){ foreach (string s in w[i]){ if (!id2Cols.ContainsKey(s)){ id2Cols.Add(s, new List<int>()); } id2Cols[s].Add(i); } } int pgCol = parameters.GetSingleChoiceParam("Matching column 1").Value; string[] d = matrixData1.StringColumns[pgCol]; string[][] x = new string[d.Length][]; for (int i = 0; i < d.Length; i++){ string r = d[i].Trim(); x[i] = r.Length == 0 ? new string[0] : r.Split(';'); x[i] = ArrayUtils.UniqueValues(x[i]); } int[][] indexMap = new int[x.Length][]; string[][] indicatorCol = new string[x.Length][]; for (int i = 0; i < indexMap.Length; i++){ List<int> qwer = new List<int>(); foreach (string s in x[i]){ if (id2Cols.ContainsKey(s)){ List<int> en = id2Cols[s]; qwer.AddRange(en); } } indexMap[i] = qwer.ToArray(); indexMap[i] = ArrayUtils.UniqueValues(indexMap[i]); indicatorCol[i] = indexMap[i].Length > 0 ? new[]{"+"} : new string[0]; } IMatrixData result = matrixData1.Copy(); SetAnnotationRows(result, matrixData1, matrixData2); if (indicator){ result.AddCategoryColumn(matrixData2.Name, "", indicatorCol); } { int[] exCols = parameters.GetMultiChoiceParam("Expression columns").Value; float[,] newExColumns = new float[matrixData1.RowCount, exCols.Length]; float[,] newQuality = new float[matrixData1.RowCount, exCols.Length]; bool[,] newIsImputed = new bool[matrixData1.RowCount, exCols.Length]; string[] newExColNames = new string[exCols.Length]; float[,] oldEx = matrixData2.ExpressionValues; float[,] oldQual = matrixData2.QualityValues; bool[,] oldImp = matrixData2.IsImputed; for (int i = 0; i < exCols.Length; i++) { newExColNames[i] = matrixData2.ExpressionColumnNames[exCols[i]]; for (int j = 0; j < matrixData1.RowCount; j++){ int[] inds = indexMap[j]; List<double> values = new List<double>(); List<double> qual = new List<double>(); List<bool> imp = new List<bool>(); foreach (int ind in inds) { double v = oldEx[ind, exCols[i]]; if (!double.IsNaN(v) && !double.IsInfinity(v)){ values.Add(v); double qx = oldQual[ind, exCols[i]]; if (!double.IsNaN(qx) && !double.IsInfinity(qx)){ qual.Add(qx); } bool isi = oldImp[ind, exCols[i]]; imp.Add(isi); } } newExColumns[j, i] = values.Count == 0 ? float.NaN : (float)avExpression(values.ToArray()); newQuality[j, i] = qual.Count == 0 ? float.NaN : (float)avExpression(qual.ToArray()); newIsImputed[j, i] = imp.Count != 0 && AvImp(imp.ToArray()); } } MakeNewNames(newExColNames, result.ExpressionColumnNames); AddExpressionColumns(result, newExColNames, newExColumns, newQuality, newIsImputed); } { int[] numCols = parameters.GetMultiChoiceParam("Numerical columns").Value; double[][] newNumericalColumns = new double[numCols.Length][]; string[] newNumColNames = new string[numCols.Length]; for (int i = 0; i < numCols.Length; i++){ double[] oldCol = matrixData2.NumericColumns[numCols[i]]; newNumColNames[i] = matrixData2.NumericColumnNames[numCols[i]]; newNumericalColumns[i] = new double[matrixData1.RowCount]; for (int j = 0; j < matrixData1.RowCount; j++){ int[] inds = indexMap[j]; List<double> values = new List<double>(); foreach (int ind in inds){ double v = oldCol[ind]; if (!double.IsNaN(v)){ values.Add(v); } } newNumericalColumns[i][j] = values.Count == 0 ? double.NaN : avNumerical(values.ToArray()); } } for (int i = 0; i < numCols.Length; i++){ result.AddNumericColumn(newNumColNames[i], "", newNumericalColumns[i]); } } { int[] catCols = parameters.GetMultiChoiceParam("Categorical columns").Value; string[][][] newCatColumns = new string[catCols.Length][][]; string[] newCatColNames = new string[catCols.Length]; for (int i = 0; i < catCols.Length; i++){ string[][] oldCol = matrixData2.CategoryColumns[catCols[i]]; newCatColNames[i] = matrixData2.CategoryColumnNames[catCols[i]]; newCatColumns[i] = new string[matrixData1.RowCount][]; for (int j = 0; j < matrixData1.RowCount; j++){ int[] inds = indexMap[j]; List<string[]> values = new List<string[]>(); foreach (int ind in inds){ string[] v = oldCol[ind]; if (v.Length > 0){ values.Add(v); } } newCatColumns[i][j] = values.Count == 0 ? new string[0] : ArrayUtils.UniqueValues(ArrayUtils.Concat(values.ToArray())); } } for (int i = 0; i < catCols.Length; i++){ result.AddCategoryColumn(newCatColNames[i], "", newCatColumns[i]); } } { int[] stringCols = parameters.GetMultiChoiceParam("String columns").Value; string[][] newStringColumns = new string[stringCols.Length][]; string[] newStringColNames = new string[stringCols.Length]; for (int i = 0; i < stringCols.Length; i++){ string[] oldCol = matrixData2.StringColumns[stringCols[i]]; newStringColNames[i] = matrixData2.StringColumnNames[stringCols[i]]; newStringColumns[i] = new string[matrixData1.RowCount]; for (int j = 0; j < matrixData1.RowCount; j++){ int[] inds = indexMap[j]; List<string> values = new List<string>(); foreach (int ind in inds){ string v = oldCol[ind]; if (v.Length > 0){ values.Add(v); } } newStringColumns[i][j] = values.Count == 0 ? "" : StringUtils.Concat(";", values.ToArray()); } } for (int i = 0; i < stringCols.Length; i++){ result.AddStringColumn(newStringColNames[i], "", newStringColumns[i]); } } result.Origin = "Combination"; return result; }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int minCount = param.GetIntParam("Min. count").Value; int selCol = param.GetSingleChoiceParam("Selection").Value; string value = param.GetStringParam("Value").Value; int[] catIndices = param.GetMultiChoiceParam("Categories").Value; bool[] selection = null; if (selCol < mdata.CategoryColumnCount){ selection = new bool[mdata.RowCount]; string[][] x = mdata.GetCategoryColumnAt(selCol); for (int i = 0; i < selection.Length; i++){ if (x[i] != null){ for (int j = 0; j < x[i].Length; j++){ if (x[i][j].Equals(value)){ selection[i] = true; break; } } } } } CountingResult result = CountCategories(mdata, selection, selCol, catIndices); CreateMatrixData(result, mdata, minCount, selection); }