public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ProcessInfo processInfo)
 {
     float value = (float) param.GetDoubleParam("Value").Value;
     ReplaceMissingsByVal(value, mdata);
 }
예제 #2
0
 public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ProcessInfo processInfo)
 {
     bool rows = param.GetSingleChoiceParam("Matrix access").Value == 0;
     double min = param.GetDoubleParam("Minimum").Value;
     double max = param.GetDoubleParam("Maximum").Value;
     MapToInterval1(rows, mdata, min, max);
 }
예제 #3
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            int[] cols = param.GetMultiChoiceParam("Columns").Value;
            int truncIndex = param.GetSingleChoiceParam("Use for truncation").Value;
            TestTruncation truncation = truncIndex == 0
                ? TestTruncation.Pvalue : (truncIndex == 1 ? TestTruncation.BenjaminiHochberg : TestTruncation.PermutationBased);
            double threshold = param.GetDoubleParam("Threshold value").Value;
            int sideInd = param.GetSingleChoiceParam("Side").Value;
            TestSide side;
            switch (sideInd){
                case 0:
                    side = TestSide.Both;
                    break;
                case 1:
                    side = TestSide.Left;
                    break;
                case 2:
                    side = TestSide.Right;
                    break;
                default:
                    throw new Exception("Never get here.");
            }
            foreach (int col in cols){
                float[] r = mdata.GetExpressionColumn(col);
                double[] pvals = CalcSignificanceA(r, side);
                string[][] fdr;
                switch (truncation){
                    case TestTruncation.Pvalue:
                        fdr = PerseusPluginUtils.CalcPvalueSignificance(pvals, threshold);
                        break;
                    case TestTruncation.BenjaminiHochberg:
                        fdr = PerseusPluginUtils.CalcBenjaminiHochbergFdr(pvals, threshold);
                        break;
                    default:
                        throw new Exception("Never get here.");
                }
                mdata.AddNumericColumn(mdata.ExpressionColumnNames[col] + " Significance A", "", pvals);
                mdata.AddCategoryColumn(mdata.ExpressionColumnNames[col] + " A significant", "", fdr);
            }
        }
 private static void ProcessDataCreate(IMatrixData mdata, Parameters param)
 {
     string name = param.GetStringParam("Row name").Value;
     double[] groupCol = new double[mdata.ExpressionColumnCount];
     for (int i = 0; i < mdata.ExpressionColumnCount; i++){
         string ename = mdata.ExpressionColumnNames[i];
         double value = param.GetDoubleParam(ename).Value;
         groupCol[i] = value;
     }
     mdata.AddNumericRow(name, name, groupCol);
 }
 public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ProcessInfo processInfo)
 {
     double width = param.GetDoubleParam("Width").Value;
     double shift = param.GetDoubleParam("Down shift").Value;
     bool separateColumns = param.GetSingleChoiceParam("Mode").Value == 0;
     if (separateColumns){
         ReplaceMissingsByGaussianByColumn(width, shift, mdata);
     } else{
         ReplaceMissingsByGaussianWholeMatrix(width, shift, mdata);
     }
 }
예제 #6
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            if (!mdata.HasQuality){
                processInfo.ErrString = "No quality data loaded.";
                return;
            }
            double threshold = param.GetDoubleParam("Threshold").Value;
            for (int i = 0; i < mdata.RowCount; i++){
                for (int j = 0; j < mdata.ExpressionColumnCount; j++){
                    float value = mdata.QualityValues[i, j];
                    if (mdata.QualityBiggerIsBetter){
                        if (value < threshold){
                            mdata[i, j] = float.NaN;
                        }
                    } else{
                        if (value > threshold){
                            mdata[i, j] = float.NaN;
                        }
                    }
                }
            }
        }
 public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ProcessInfo processInfo)
 {
     int colInd = param.GetSingleChoiceParam("Column").Value;
     double value = param.GetDoubleParam("Value").Value;
     int ruleInd = param.GetSingleChoiceParam("Remove if").Value;
     bool keepNan = param.GetBoolParam("Keep NaN").Value;
     double[] vals = colInd < mdata.NumericColumnCount
         ? mdata.NumericColumns[colInd] : ArrayUtils.ToDoubles(mdata.GetExpressionColumn(colInd - mdata.NumericColumnCount));
     List<int> valids = new List<int>();
     for (int i = 0; i < vals.Length; i++){
         bool valid;
         double val = vals[i];
         if (double.IsNaN(val)){
             valid = keepNan;
         } else{
             switch (ruleInd){
                 case 0:
                     valid = val > value;
                     break;
                 case 1:
                     valid = val >= value;
                     break;
                 case 2:
                     valid = val != value;
                     break;
                 case 3:
                     valid = val == value;
                     break;
                 case 4:
                     valid = val <= value;
                     break;
                 case 5:
                     valid = val < value;
                     break;
                 default:
                     throw new Exception("Never get here.");
             }
         }
         if (valid){
             valids.Add(i);
         }
     }
     PerseusPluginUtils.FilterRows(mdata, param, valids.ToArray());
 }
예제 #8
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            int[] outputColumns = param.GetMultiChoiceParam("Output").Value;
            int proteinIdColumnInd = param.GetSingleChoiceParam("Protein IDs").Value;
            string[] proteinIds = mdata.StringColumns[proteinIdColumnInd];
            int[] intensityCols = param.GetMultiChoiceParam("Intensities").Value;
            if (intensityCols.Length == 0){
                processInfo.ErrString = "Please select at least one column containing protein intensities.";
                return;
            }
            // variable to hold all intensity values
            List<double[]> columns = new List<double[]>();
            string[] sampleNames = new string[intensityCols.Length];
            for (int col = 0; col < intensityCols.Length; col++){
                double[] values;
                if (intensityCols[col] < mdata.ExpressionColumnCount){
                    values = ArrayUtils.ToDoubles(mdata.GetExpressionColumn(intensityCols[col]));
                    sampleNames[col] = mdata.ExpressionColumnNames[intensityCols[col]];
                } else{
                    values = mdata.NumericColumns[intensityCols[col] - mdata.ExpressionColumnCount];
                    sampleNames[col] = mdata.NumericColumnNames[intensityCols[col] - mdata.ExpressionColumnCount];
                }
                sampleNames[col] = new Regex(@"^(?:(?:LFQ )?[Ii]ntensity )?(.*)$").Match(sampleNames[col]).Groups[1].Value;
                columns.Add(values);
            }
            // average over columns if this option is selected
            if (param.GetSingleChoiceWithSubParams("Averaging mode").Value == 3){
                double[] column = new double[mdata.RowCount];
                for (int row = 0; row < mdata.RowCount; row++){
                    double[] values = new double[intensityCols.Length];
                    for (int col = 0; col < intensityCols.Length; col++){
                        values[col] = columns[col][row];
                    }
                    column[row] = ArrayUtils.Median(ExtractValidValues(values, false));
                }
                // delete the original list of columns
                columns = new List<double[]>{column};
                sampleNames = new[]{""};
            }
            // revert logarithm if necessary
            if (param.GetBoolWithSubParams("Logarithmized").Value){
                double[] logBases = new[]{2, Math.E, 10};
                double logBase =
                    logBases[param.GetBoolWithSubParams("Logarithmized").GetSubParameters().GetSingleChoiceParam("log base").Value];
                foreach (double[] t in columns){
                    for (int row = 0; row < mdata.RowCount; row++){
                        if (t[row] == 0){
                            processInfo.ErrString = "Are the columns really logarithmized?\nThey contain zeroes!";
                        }
                        t[row] = Math.Pow(logBase, t[row]);
                    }
                }
            }
            double[] mw = mdata.NumericColumns[param.GetSingleChoiceParam("Molecular masses").Value];
            // detect whether the molecular masses are given in Da or kDa
            if (ArrayUtils.Median(mw) < 250) // likely kDa
            {
                for (int i = 0; i < mw.Length; i++){
                    mw[i] *= 1000;
                }
            }
            double[] detectabilityNormFactor = mw;
            if (param.GetBoolWithSubParams("Detectability correction").Value){
                detectabilityNormFactor =
                    mdata.NumericColumns[
                        param.GetBoolWithSubParams("Detectability correction")
                             .GetSubParameters()
                             .GetSingleChoiceParam("Correction factor")
                             .Value];
            }
            // the normalization factor needs to be nonzero for all proteins
            // check and replace with 1 for all relevant cases
            for (int row = 0; row < mdata.RowCount; row++){
                if (detectabilityNormFactor[row] == 0 || detectabilityNormFactor[row] == double.NaN){
                    detectabilityNormFactor[row] = 1;
                }
            }
            // detect the organism
            Organism organism = DetectOrganism(proteinIds);
            // c value the amount of DNA per cell, see: http://en.wikipedia.org/wiki/C-value
            double cValue = (organism.genomeSize*basePairWeight)/avogadro;
            // find the histones
            int[] histoneRows = FindHistones(proteinIds, organism);
            // write a categorical column indicating the histones
            string[][] histoneCol = new string[mdata.RowCount][];
            for (int row = 0; row < mdata.RowCount; row++){
                histoneCol[row] = (ArrayUtils.Contains(histoneRows, row)) ? new[]{"+"} : new[]{""};
            }
            mdata.AddCategoryColumn("Histones", "", histoneCol);
            // initialize the variables for the annotation rows
            double[] totalProteinRow = new double[mdata.ExpressionColumnCount];
            double[] totalMoleculesRow = new double[mdata.ExpressionColumnCount];
            string[][] organismRow = new string[mdata.ExpressionColumnCount][];
            double[] histoneMassRow = new double[mdata.ExpressionColumnCount];
            double[] ploidyRow = new double[mdata.ExpressionColumnCount];
            double[] cellVolumeRow = new double[mdata.ExpressionColumnCount];
            double[] normalizationFactors = new double[columns.Count];
            // calculate normalization factors for each column
            for (int col = 0; col < columns.Count; col++){
                string sampleName = sampleNames[col];
                double[] column = columns[col];
                // normalization factor to go from intensities to copies,
                // needs to be determined either using the total protein or the histone scaling approach
                double factor;
                switch (param.GetSingleChoiceWithSubParams("Scaling mode").Value){
                    case 0: // total protein amount
                        double mwWeightedNormalizedSummedIntensities = 0;
                        for (int row = 0; row < mdata.RowCount; row++){
                            if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row])){
                                mwWeightedNormalizedSummedIntensities += (column[row]/detectabilityNormFactor[row])*mw[row];
                            }
                        }
                        factor =
                            (param.GetSingleChoiceWithSubParams("Scaling mode")
                                  .GetSubParameters()
                                  .GetDoubleParam("Protein amount per cell [pg]")
                                  .Value*1e-12*avogadro)/mwWeightedNormalizedSummedIntensities;
                        break;
                    case 1: // histone mode
                        double mwWeightedNormalizedSummedHistoneIntensities = 0;
                        foreach (int row in histoneRows){
                            if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row])){
                                mwWeightedNormalizedSummedHistoneIntensities += (column[row]/detectabilityNormFactor[row])*mw[row];
                            }
                        }
                        double ploidy =
                            param.GetSingleChoiceWithSubParams("Scaling mode").GetSubParameters().GetDoubleParam("Ploidy").Value;
                        factor = (cValue*ploidy*avogadro)/mwWeightedNormalizedSummedHistoneIntensities;
                        break;
                    default:
                        factor = 1;
                        break;
                }
                normalizationFactors[col] = factor;
            }
            // check averaging mode
            if (param.GetSingleChoiceWithSubParams("Averaging mode").Value == 1) // same factor for all
            {
                double factor = ArrayUtils.Mean(normalizationFactors);
                for (int i = 0; i < normalizationFactors.Length; i++){
                    normalizationFactors[i] = factor;
                }
            }
            if (param.GetSingleChoiceWithSubParams("Averaging mode").Value == 2) // same factor in each group
            {
                if (
                    param.GetSingleChoiceWithSubParams("Averaging mode").GetSubParameters().GetSingleChoiceParam("Grouping").Value ==
                        -1){
                    processInfo.ErrString = "No grouping selected.";
                    return;
                }
                string[][] groupNames =
                    mdata.GetCategoryRowAt(
                        param.GetSingleChoiceWithSubParams("Averaging mode").GetSubParameters().GetSingleChoiceParam("Grouping").Value);
                string[] uniqueGroupNames = Unique(groupNames);
                int[] grouping = new int[columns.Count];
                for (int i = 0; i < columns.Count; i++){
                    if (intensityCols[i] >= mdata.ExpressionColumnCount){ // Numeric annotation columns cannot be grouped
                        grouping[i] = i;
                        continue;
                    }
                    if (ArrayUtils.Contains(uniqueGroupNames, groupNames[i][0])){
                        grouping[i] = ArrayUtils.IndexOf(uniqueGroupNames, groupNames[i][0]);
                        continue;
                    }
                    grouping[i] = i;
                }
                Dictionary<int, List<double>> factors = new Dictionary<int, List<double>>();
                for (int i = 0; i < columns.Count; i++){
                    if (factors.ContainsKey(grouping[i])){
                        factors[grouping[i]].Add(normalizationFactors[i]);
                    } else{
                        factors.Add(grouping[i], new List<double>{normalizationFactors[i]});
                    }
                }
                double[] averagedNormalizationFactors = new double[columns.Count];
                for (int i = 0; i < columns.Count; i++){
                    List<double> factor;
                    factors.TryGetValue(grouping[i], out factor);
                    averagedNormalizationFactors[i] = ArrayUtils.Mean(factor);
                }
                normalizationFactors = averagedNormalizationFactors;
            }
            // loop over all selected columns and calculate copy numbers
            for (int col = 0; col < columns.Count; col++){
                string sampleName = sampleNames[col];
                double[] column = columns[col];
                double factor = normalizationFactors[col];
                double[] copyNumbers = new double[mdata.RowCount];
                double[] concentrations = new double[mdata.RowCount]; // femtoliters
                double[] massFraction = new double[mdata.RowCount];
                double[] moleFraction = new double[mdata.RowCount];
                double totalProtein = 0; // picograms
                double histoneMass = 0; // picograms
                double totalMolecules = 0;
                for (int row = 0; row < mdata.RowCount; row++){
                    if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row])){
                        copyNumbers[row] = (column[row]/detectabilityNormFactor[row])*factor;
                        totalMolecules += copyNumbers[row];
                        totalProtein += (copyNumbers[row]*mw[row]*1e12)/avogadro; // picograms
                        if (ArrayUtils.Contains(histoneRows, row)){
                            histoneMass += (copyNumbers[row]*mw[row]*1e12)/avogadro; // picograms
                        }
                    }
                }
                double totalVolume = (totalProtein/(param.GetDoubleParam("Total cellular protein concentration [g/l]").Value))*1000;
                // femtoliters
                for (int row = 0; row < mdata.RowCount; row++){
                    if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row])){
                        concentrations[row] = ((copyNumbers[row]/(totalVolume*1e-15))/avogadro)*1e9; // nanomolar
                        massFraction[row] = (((copyNumbers[row]*mw[row]*1e12)/avogadro)/totalProtein)*1e6; // ppm
                        moleFraction[row] = (copyNumbers[row]/totalMolecules)*1e6; // ppm
                    }
                }
                string suffix = (sampleName == "") ? "" : " " + sampleName;
                if (ArrayUtils.Contains(outputColumns, 0)){
                    mdata.AddNumericColumn("Copy number" + suffix, "", copyNumbers);
                }
                if (ArrayUtils.Contains(outputColumns, 1)){
                    mdata.AddNumericColumn("Concentration [nM]" + suffix, "", concentrations);
                }
                if (ArrayUtils.Contains(outputColumns, 2)){
                    mdata.AddNumericColumn("Abundance (mass/total mass) [*10^-6]" + suffix, "", massFraction);
                }
                if (ArrayUtils.Contains(outputColumns, 3)){
                    mdata.AddNumericColumn("Abundance (molecules/total molecules) [*10^-6]" + suffix, "", moleFraction);
                }
                double[] rank = ArrayUtils.Rank(copyNumbers);
                double[] relativeRank = new double[mdata.RowCount];
                double validRanks = mdata.RowCount;
                for (int row = 0; row < mdata.RowCount; row++){
                    // remove rank for protein with no copy number information
                    if (double.IsNaN((copyNumbers[row])) || copyNumbers[row] == 0){
                        rank[row] = double.NaN;
                        validRanks--; // do not consider as valid
                    }
                    // invert ranking, so that rank 0 is the most abundant protein
                    rank[row] = mdata.RowCount - rank[row];
                }
                for (int row = 0; row < mdata.RowCount; row++){
                    relativeRank[row] = rank[row]/validRanks;
                }
                if (ArrayUtils.Contains(outputColumns, 4)){
                    mdata.AddNumericColumn("Copy number rank" + suffix, "", rank);
                }
                if (ArrayUtils.Contains(outputColumns, 5)){
                    mdata.AddNumericColumn("Relative copy number rank" + suffix, "", relativeRank);
                }
                if (intensityCols[col] < mdata.ExpressionColumnCount &&
                    param.GetSingleChoiceWithSubParams("Averaging mode").Value != 3){
                    totalProteinRow[intensityCols[col]] = Math.Round(totalProtein, 2);
                    totalMoleculesRow[intensityCols[col]] = Math.Round(totalMolecules, 0);
                    organismRow[intensityCols[col]] = new string[]{organism.name};
                    histoneMassRow[intensityCols[col]] = Math.Round(histoneMass, 4);
                    ploidyRow[intensityCols[col]] = Math.Round((histoneMass*1e-12)/cValue, 2);
                    cellVolumeRow[intensityCols[col]] = Math.Round(totalVolume, 2); // femtoliters
                }
            }
            if (param.GetSingleChoiceWithSubParams("Averaging mode").Value != 3 && ArrayUtils.Contains(outputColumns, 6)){
                mdata.AddNumericRow("Total protein [pg/cell]", "", totalProteinRow);
                mdata.AddNumericRow("Total molecules per cell", "", totalMoleculesRow);
                mdata.AddCategoryRow("Organism", "", organismRow);
                mdata.AddNumericRow("Histone mass [pg/cell]", "", histoneMassRow);
                mdata.AddNumericRow("Ploidy", "", ploidyRow);
                mdata.AddNumericRow("Cell volume [fl]", "", cellVolumeRow);
            }
        }
예제 #9
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            int[] rcols = param.GetMultiChoiceParam("Ratio columns").Value;
            int[] icols = param.GetMultiChoiceParam("Intensity columns").Value;
            if (rcols.Length == 0){
                processInfo.ErrString = "Please specify some ratio columns.";
                return;
            }
            if (rcols.Length != icols.Length){
                processInfo.ErrString = "The number of ratio and intensity columns have to be equal.";
                return;
            }
            int truncIndex = param.GetSingleChoiceParam("Use for truncation").Value;
            TestTruncation truncation = truncIndex == 0
                ? TestTruncation.Pvalue : (truncIndex == 1 ? TestTruncation.BenjaminiHochberg : TestTruncation.PermutationBased);
            double threshold = param.GetDoubleParam("Threshold value").Value;
            int sideInd = param.GetSingleChoiceParam("Side").Value;
            TestSide side;
            switch (sideInd){
                case 0:
                    side = TestSide.Both;
                    break;
                case 1:
                    side = TestSide.Left;
                    break;
                case 2:
                    side = TestSide.Right;
                    break;
                default:
                    throw new Exception("Never get here.");
            }
            for (int i = 0; i < rcols.Length; i++){
                float[] r = mdata.GetExpressionColumn(rcols[i]);
                float[] intens = icols[i] < mdata.ExpressionColumnCount
                    ? mdata.GetExpressionColumn(icols[i])
                    : ArrayUtils.ToFloats(mdata.NumericColumns[icols[i] - mdata.ExpressionColumnCount]);
                double[] pvals = CalcSignificanceB(r, intens, side);
                string[][] fdr;
                switch (truncation){
                    case TestTruncation.Pvalue:
                        fdr = PerseusPluginUtils.CalcPvalueSignificance(pvals, threshold);
                        break;
                    case TestTruncation.BenjaminiHochberg:
                        fdr = PerseusPluginUtils.CalcBenjaminiHochbergFdr(pvals, threshold);
                        break;
                    default:
                        throw new Exception("Never get here.");
                }
                mdata.AddNumericColumn(mdata.ExpressionColumnNames[rcols[i]] + " Significance B", "", pvals);
                mdata.AddCategoryColumn(mdata.ExpressionColumnNames[rcols[i]] + " B significant", "", fdr);
            }
        }
예제 #10
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            double shift = param.GetDoubleParam("shift").Value;
            for (int i = 0; i < mdata.RowCount; i++){
                for(int j = 0;j < mdata.ExpressionColumnCount; j++){
                    mdata[i, j] -= (float)shift;
                }
            }
        }
예제 #11
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            Random2 rand = new Random2();
            double std = param.GetDoubleParam("Standard deviation").Value;
            for (int i = 0; i < mdata.RowCount; i++){
                for (int j = 0; j < mdata.ExpressionColumnCount; j++){
                    mdata[i, j] += (float) rand.NextGaussian(0, std);
                }
            }
        }