public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ProcessInfo processInfo) { float value = (float) param.GetDoubleParam("Value").Value; ReplaceMissingsByVal(value, mdata); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ProcessInfo processInfo) { bool rows = param.GetSingleChoiceParam("Matrix access").Value == 0; double min = param.GetDoubleParam("Minimum").Value; double max = param.GetDoubleParam("Maximum").Value; MapToInterval1(rows, mdata, min, max); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int[] cols = param.GetMultiChoiceParam("Columns").Value; int truncIndex = param.GetSingleChoiceParam("Use for truncation").Value; TestTruncation truncation = truncIndex == 0 ? TestTruncation.Pvalue : (truncIndex == 1 ? TestTruncation.BenjaminiHochberg : TestTruncation.PermutationBased); double threshold = param.GetDoubleParam("Threshold value").Value; int sideInd = param.GetSingleChoiceParam("Side").Value; TestSide side; switch (sideInd){ case 0: side = TestSide.Both; break; case 1: side = TestSide.Left; break; case 2: side = TestSide.Right; break; default: throw new Exception("Never get here."); } foreach (int col in cols){ float[] r = mdata.GetExpressionColumn(col); double[] pvals = CalcSignificanceA(r, side); string[][] fdr; switch (truncation){ case TestTruncation.Pvalue: fdr = PerseusPluginUtils.CalcPvalueSignificance(pvals, threshold); break; case TestTruncation.BenjaminiHochberg: fdr = PerseusPluginUtils.CalcBenjaminiHochbergFdr(pvals, threshold); break; default: throw new Exception("Never get here."); } mdata.AddNumericColumn(mdata.ExpressionColumnNames[col] + " Significance A", "", pvals); mdata.AddCategoryColumn(mdata.ExpressionColumnNames[col] + " A significant", "", fdr); } }
private static void ProcessDataCreate(IMatrixData mdata, Parameters param) { string name = param.GetStringParam("Row name").Value; double[] groupCol = new double[mdata.ExpressionColumnCount]; for (int i = 0; i < mdata.ExpressionColumnCount; i++){ string ename = mdata.ExpressionColumnNames[i]; double value = param.GetDoubleParam(ename).Value; groupCol[i] = value; } mdata.AddNumericRow(name, name, groupCol); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ProcessInfo processInfo) { double width = param.GetDoubleParam("Width").Value; double shift = param.GetDoubleParam("Down shift").Value; bool separateColumns = param.GetSingleChoiceParam("Mode").Value == 0; if (separateColumns){ ReplaceMissingsByGaussianByColumn(width, shift, mdata); } else{ ReplaceMissingsByGaussianWholeMatrix(width, shift, mdata); } }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { if (!mdata.HasQuality){ processInfo.ErrString = "No quality data loaded."; return; } double threshold = param.GetDoubleParam("Threshold").Value; for (int i = 0; i < mdata.RowCount; i++){ for (int j = 0; j < mdata.ExpressionColumnCount; j++){ float value = mdata.QualityValues[i, j]; if (mdata.QualityBiggerIsBetter){ if (value < threshold){ mdata[i, j] = float.NaN; } } else{ if (value > threshold){ mdata[i, j] = float.NaN; } } } } }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ProcessInfo processInfo) { int colInd = param.GetSingleChoiceParam("Column").Value; double value = param.GetDoubleParam("Value").Value; int ruleInd = param.GetSingleChoiceParam("Remove if").Value; bool keepNan = param.GetBoolParam("Keep NaN").Value; double[] vals = colInd < mdata.NumericColumnCount ? mdata.NumericColumns[colInd] : ArrayUtils.ToDoubles(mdata.GetExpressionColumn(colInd - mdata.NumericColumnCount)); List<int> valids = new List<int>(); for (int i = 0; i < vals.Length; i++){ bool valid; double val = vals[i]; if (double.IsNaN(val)){ valid = keepNan; } else{ switch (ruleInd){ case 0: valid = val > value; break; case 1: valid = val >= value; break; case 2: valid = val != value; break; case 3: valid = val == value; break; case 4: valid = val <= value; break; case 5: valid = val < value; break; default: throw new Exception("Never get here."); } } if (valid){ valids.Add(i); } } PerseusPluginUtils.FilterRows(mdata, param, valids.ToArray()); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int[] outputColumns = param.GetMultiChoiceParam("Output").Value; int proteinIdColumnInd = param.GetSingleChoiceParam("Protein IDs").Value; string[] proteinIds = mdata.StringColumns[proteinIdColumnInd]; int[] intensityCols = param.GetMultiChoiceParam("Intensities").Value; if (intensityCols.Length == 0){ processInfo.ErrString = "Please select at least one column containing protein intensities."; return; } // variable to hold all intensity values List<double[]> columns = new List<double[]>(); string[] sampleNames = new string[intensityCols.Length]; for (int col = 0; col < intensityCols.Length; col++){ double[] values; if (intensityCols[col] < mdata.ExpressionColumnCount){ values = ArrayUtils.ToDoubles(mdata.GetExpressionColumn(intensityCols[col])); sampleNames[col] = mdata.ExpressionColumnNames[intensityCols[col]]; } else{ values = mdata.NumericColumns[intensityCols[col] - mdata.ExpressionColumnCount]; sampleNames[col] = mdata.NumericColumnNames[intensityCols[col] - mdata.ExpressionColumnCount]; } sampleNames[col] = new Regex(@"^(?:(?:LFQ )?[Ii]ntensity )?(.*)$").Match(sampleNames[col]).Groups[1].Value; columns.Add(values); } // average over columns if this option is selected if (param.GetSingleChoiceWithSubParams("Averaging mode").Value == 3){ double[] column = new double[mdata.RowCount]; for (int row = 0; row < mdata.RowCount; row++){ double[] values = new double[intensityCols.Length]; for (int col = 0; col < intensityCols.Length; col++){ values[col] = columns[col][row]; } column[row] = ArrayUtils.Median(ExtractValidValues(values, false)); } // delete the original list of columns columns = new List<double[]>{column}; sampleNames = new[]{""}; } // revert logarithm if necessary if (param.GetBoolWithSubParams("Logarithmized").Value){ double[] logBases = new[]{2, Math.E, 10}; double logBase = logBases[param.GetBoolWithSubParams("Logarithmized").GetSubParameters().GetSingleChoiceParam("log base").Value]; foreach (double[] t in columns){ for (int row = 0; row < mdata.RowCount; row++){ if (t[row] == 0){ processInfo.ErrString = "Are the columns really logarithmized?\nThey contain zeroes!"; } t[row] = Math.Pow(logBase, t[row]); } } } double[] mw = mdata.NumericColumns[param.GetSingleChoiceParam("Molecular masses").Value]; // detect whether the molecular masses are given in Da or kDa if (ArrayUtils.Median(mw) < 250) // likely kDa { for (int i = 0; i < mw.Length; i++){ mw[i] *= 1000; } } double[] detectabilityNormFactor = mw; if (param.GetBoolWithSubParams("Detectability correction").Value){ detectabilityNormFactor = mdata.NumericColumns[ param.GetBoolWithSubParams("Detectability correction") .GetSubParameters() .GetSingleChoiceParam("Correction factor") .Value]; } // the normalization factor needs to be nonzero for all proteins // check and replace with 1 for all relevant cases for (int row = 0; row < mdata.RowCount; row++){ if (detectabilityNormFactor[row] == 0 || detectabilityNormFactor[row] == double.NaN){ detectabilityNormFactor[row] = 1; } } // detect the organism Organism organism = DetectOrganism(proteinIds); // c value the amount of DNA per cell, see: http://en.wikipedia.org/wiki/C-value double cValue = (organism.genomeSize*basePairWeight)/avogadro; // find the histones int[] histoneRows = FindHistones(proteinIds, organism); // write a categorical column indicating the histones string[][] histoneCol = new string[mdata.RowCount][]; for (int row = 0; row < mdata.RowCount; row++){ histoneCol[row] = (ArrayUtils.Contains(histoneRows, row)) ? new[]{"+"} : new[]{""}; } mdata.AddCategoryColumn("Histones", "", histoneCol); // initialize the variables for the annotation rows double[] totalProteinRow = new double[mdata.ExpressionColumnCount]; double[] totalMoleculesRow = new double[mdata.ExpressionColumnCount]; string[][] organismRow = new string[mdata.ExpressionColumnCount][]; double[] histoneMassRow = new double[mdata.ExpressionColumnCount]; double[] ploidyRow = new double[mdata.ExpressionColumnCount]; double[] cellVolumeRow = new double[mdata.ExpressionColumnCount]; double[] normalizationFactors = new double[columns.Count]; // calculate normalization factors for each column for (int col = 0; col < columns.Count; col++){ string sampleName = sampleNames[col]; double[] column = columns[col]; // normalization factor to go from intensities to copies, // needs to be determined either using the total protein or the histone scaling approach double factor; switch (param.GetSingleChoiceWithSubParams("Scaling mode").Value){ case 0: // total protein amount double mwWeightedNormalizedSummedIntensities = 0; for (int row = 0; row < mdata.RowCount; row++){ if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row])){ mwWeightedNormalizedSummedIntensities += (column[row]/detectabilityNormFactor[row])*mw[row]; } } factor = (param.GetSingleChoiceWithSubParams("Scaling mode") .GetSubParameters() .GetDoubleParam("Protein amount per cell [pg]") .Value*1e-12*avogadro)/mwWeightedNormalizedSummedIntensities; break; case 1: // histone mode double mwWeightedNormalizedSummedHistoneIntensities = 0; foreach (int row in histoneRows){ if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row])){ mwWeightedNormalizedSummedHistoneIntensities += (column[row]/detectabilityNormFactor[row])*mw[row]; } } double ploidy = param.GetSingleChoiceWithSubParams("Scaling mode").GetSubParameters().GetDoubleParam("Ploidy").Value; factor = (cValue*ploidy*avogadro)/mwWeightedNormalizedSummedHistoneIntensities; break; default: factor = 1; break; } normalizationFactors[col] = factor; } // check averaging mode if (param.GetSingleChoiceWithSubParams("Averaging mode").Value == 1) // same factor for all { double factor = ArrayUtils.Mean(normalizationFactors); for (int i = 0; i < normalizationFactors.Length; i++){ normalizationFactors[i] = factor; } } if (param.GetSingleChoiceWithSubParams("Averaging mode").Value == 2) // same factor in each group { if ( param.GetSingleChoiceWithSubParams("Averaging mode").GetSubParameters().GetSingleChoiceParam("Grouping").Value == -1){ processInfo.ErrString = "No grouping selected."; return; } string[][] groupNames = mdata.GetCategoryRowAt( param.GetSingleChoiceWithSubParams("Averaging mode").GetSubParameters().GetSingleChoiceParam("Grouping").Value); string[] uniqueGroupNames = Unique(groupNames); int[] grouping = new int[columns.Count]; for (int i = 0; i < columns.Count; i++){ if (intensityCols[i] >= mdata.ExpressionColumnCount){ // Numeric annotation columns cannot be grouped grouping[i] = i; continue; } if (ArrayUtils.Contains(uniqueGroupNames, groupNames[i][0])){ grouping[i] = ArrayUtils.IndexOf(uniqueGroupNames, groupNames[i][0]); continue; } grouping[i] = i; } Dictionary<int, List<double>> factors = new Dictionary<int, List<double>>(); for (int i = 0; i < columns.Count; i++){ if (factors.ContainsKey(grouping[i])){ factors[grouping[i]].Add(normalizationFactors[i]); } else{ factors.Add(grouping[i], new List<double>{normalizationFactors[i]}); } } double[] averagedNormalizationFactors = new double[columns.Count]; for (int i = 0; i < columns.Count; i++){ List<double> factor; factors.TryGetValue(grouping[i], out factor); averagedNormalizationFactors[i] = ArrayUtils.Mean(factor); } normalizationFactors = averagedNormalizationFactors; } // loop over all selected columns and calculate copy numbers for (int col = 0; col < columns.Count; col++){ string sampleName = sampleNames[col]; double[] column = columns[col]; double factor = normalizationFactors[col]; double[] copyNumbers = new double[mdata.RowCount]; double[] concentrations = new double[mdata.RowCount]; // femtoliters double[] massFraction = new double[mdata.RowCount]; double[] moleFraction = new double[mdata.RowCount]; double totalProtein = 0; // picograms double histoneMass = 0; // picograms double totalMolecules = 0; for (int row = 0; row < mdata.RowCount; row++){ if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row])){ copyNumbers[row] = (column[row]/detectabilityNormFactor[row])*factor; totalMolecules += copyNumbers[row]; totalProtein += (copyNumbers[row]*mw[row]*1e12)/avogadro; // picograms if (ArrayUtils.Contains(histoneRows, row)){ histoneMass += (copyNumbers[row]*mw[row]*1e12)/avogadro; // picograms } } } double totalVolume = (totalProtein/(param.GetDoubleParam("Total cellular protein concentration [g/l]").Value))*1000; // femtoliters for (int row = 0; row < mdata.RowCount; row++){ if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row])){ concentrations[row] = ((copyNumbers[row]/(totalVolume*1e-15))/avogadro)*1e9; // nanomolar massFraction[row] = (((copyNumbers[row]*mw[row]*1e12)/avogadro)/totalProtein)*1e6; // ppm moleFraction[row] = (copyNumbers[row]/totalMolecules)*1e6; // ppm } } string suffix = (sampleName == "") ? "" : " " + sampleName; if (ArrayUtils.Contains(outputColumns, 0)){ mdata.AddNumericColumn("Copy number" + suffix, "", copyNumbers); } if (ArrayUtils.Contains(outputColumns, 1)){ mdata.AddNumericColumn("Concentration [nM]" + suffix, "", concentrations); } if (ArrayUtils.Contains(outputColumns, 2)){ mdata.AddNumericColumn("Abundance (mass/total mass) [*10^-6]" + suffix, "", massFraction); } if (ArrayUtils.Contains(outputColumns, 3)){ mdata.AddNumericColumn("Abundance (molecules/total molecules) [*10^-6]" + suffix, "", moleFraction); } double[] rank = ArrayUtils.Rank(copyNumbers); double[] relativeRank = new double[mdata.RowCount]; double validRanks = mdata.RowCount; for (int row = 0; row < mdata.RowCount; row++){ // remove rank for protein with no copy number information if (double.IsNaN((copyNumbers[row])) || copyNumbers[row] == 0){ rank[row] = double.NaN; validRanks--; // do not consider as valid } // invert ranking, so that rank 0 is the most abundant protein rank[row] = mdata.RowCount - rank[row]; } for (int row = 0; row < mdata.RowCount; row++){ relativeRank[row] = rank[row]/validRanks; } if (ArrayUtils.Contains(outputColumns, 4)){ mdata.AddNumericColumn("Copy number rank" + suffix, "", rank); } if (ArrayUtils.Contains(outputColumns, 5)){ mdata.AddNumericColumn("Relative copy number rank" + suffix, "", relativeRank); } if (intensityCols[col] < mdata.ExpressionColumnCount && param.GetSingleChoiceWithSubParams("Averaging mode").Value != 3){ totalProteinRow[intensityCols[col]] = Math.Round(totalProtein, 2); totalMoleculesRow[intensityCols[col]] = Math.Round(totalMolecules, 0); organismRow[intensityCols[col]] = new string[]{organism.name}; histoneMassRow[intensityCols[col]] = Math.Round(histoneMass, 4); ploidyRow[intensityCols[col]] = Math.Round((histoneMass*1e-12)/cValue, 2); cellVolumeRow[intensityCols[col]] = Math.Round(totalVolume, 2); // femtoliters } } if (param.GetSingleChoiceWithSubParams("Averaging mode").Value != 3 && ArrayUtils.Contains(outputColumns, 6)){ mdata.AddNumericRow("Total protein [pg/cell]", "", totalProteinRow); mdata.AddNumericRow("Total molecules per cell", "", totalMoleculesRow); mdata.AddCategoryRow("Organism", "", organismRow); mdata.AddNumericRow("Histone mass [pg/cell]", "", histoneMassRow); mdata.AddNumericRow("Ploidy", "", ploidyRow); mdata.AddNumericRow("Cell volume [fl]", "", cellVolumeRow); } }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int[] rcols = param.GetMultiChoiceParam("Ratio columns").Value; int[] icols = param.GetMultiChoiceParam("Intensity columns").Value; if (rcols.Length == 0){ processInfo.ErrString = "Please specify some ratio columns."; return; } if (rcols.Length != icols.Length){ processInfo.ErrString = "The number of ratio and intensity columns have to be equal."; return; } int truncIndex = param.GetSingleChoiceParam("Use for truncation").Value; TestTruncation truncation = truncIndex == 0 ? TestTruncation.Pvalue : (truncIndex == 1 ? TestTruncation.BenjaminiHochberg : TestTruncation.PermutationBased); double threshold = param.GetDoubleParam("Threshold value").Value; int sideInd = param.GetSingleChoiceParam("Side").Value; TestSide side; switch (sideInd){ case 0: side = TestSide.Both; break; case 1: side = TestSide.Left; break; case 2: side = TestSide.Right; break; default: throw new Exception("Never get here."); } for (int i = 0; i < rcols.Length; i++){ float[] r = mdata.GetExpressionColumn(rcols[i]); float[] intens = icols[i] < mdata.ExpressionColumnCount ? mdata.GetExpressionColumn(icols[i]) : ArrayUtils.ToFloats(mdata.NumericColumns[icols[i] - mdata.ExpressionColumnCount]); double[] pvals = CalcSignificanceB(r, intens, side); string[][] fdr; switch (truncation){ case TestTruncation.Pvalue: fdr = PerseusPluginUtils.CalcPvalueSignificance(pvals, threshold); break; case TestTruncation.BenjaminiHochberg: fdr = PerseusPluginUtils.CalcBenjaminiHochbergFdr(pvals, threshold); break; default: throw new Exception("Never get here."); } mdata.AddNumericColumn(mdata.ExpressionColumnNames[rcols[i]] + " Significance B", "", pvals); mdata.AddCategoryColumn(mdata.ExpressionColumnNames[rcols[i]] + " B significant", "", fdr); } }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { double shift = param.GetDoubleParam("shift").Value; for (int i = 0; i < mdata.RowCount; i++){ for(int j = 0;j < mdata.ExpressionColumnCount; j++){ mdata[i, j] -= (float)shift; } } }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { Random2 rand = new Random2(); double std = param.GetDoubleParam("Standard deviation").Value; for (int i = 0; i < mdata.RowCount; i++){ for (int j = 0; j < mdata.ExpressionColumnCount; j++){ mdata[i, j] += (float) rand.NextGaussian(0, std); } } }