Ejemplo n.º 1
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            ParameterWithSubParams <int> access = param.GetParamWithSubParams <int>("Matrix access");
            bool rows = access.Value == 0;
            int  groupInd;

            if (rows)
            {
                groupInd = access.GetSubParameters().GetParam <int>("Grouping").Value - 1;
            }
            else
            {
                groupInd = -1;
            }
            bool report = param.GetParam <bool>("Report mean and std. dev.").Value;
            bool median = param.GetParam <bool>("Use median").Value;

            if (groupInd < 0)
            {
                Zscore(rows, mdata, processInfo.NumThreads, report, median, out double[] means, out double[] stddevs);
                if (report)
                {
                    if (rows)
                    {
                        mdata.AddNumericColumn("Mean", "Mean", means);
                        mdata.AddNumericColumn("Std. dev.", "Std. dev.", stddevs);
                    }
                    else
                    {
                        mdata.AddNumericRow("Mean", "Mean", means);
                        mdata.AddNumericRow("Std. dev.", "Std. dev.", stddevs);
                    }
                }
            }
            else
            {
                string[][] catRow = mdata.GetCategoryRowAt(groupInd);
                foreach (string[] t in catRow)
                {
                    if (t.Length > 1)
                    {
                        processInfo.ErrString = "The groups are overlapping.";
                        return;
                    }
                }
                string[] groupVals = ArrayUtils.UniqueValuesPreserveOrder(catRow);
                ZscoreGroups(mdata, catRow, processInfo.NumThreads, report, median, groupVals, out double[][] means, out double[][] stddevs);
                if (report)
                {
                    for (int i = 0; i < groupVals.Length; i++)
                    {
                        mdata.AddNumericColumn("Mean " + groupVals[i], "Mean", means[i]);
                        mdata.AddNumericColumn("Std. dev. " + groupVals[i], "Std. dev.", stddevs[i]);
                    }
                }
            }
        }
Ejemplo n.º 2
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            int[] colIndx = param.GetParam <int[]>("x").Value;
            int[] colIndy = param.GetParam <int[]>("y").Value;
            if (colIndx.Length == 0)
            {
                processInfo.ErrString = "Please select some columns";
                return;
            }
            if (colIndx.Length != colIndy.Length)
            {
                processInfo.ErrString =
                    "Please select the same number of columns in the boxes for the first and second columns.";
                return;
            }
            int typeInd = param.GetParam <int>("Distribution type").Value;
            int points  = param.GetParam <int>("Number of points").Value;

            for (int k = 0; k < colIndx.Length; k++)
            {
                double[] xvals             = GetColumn(mdata, colIndx[k]);
                double[] yvals             = GetColumn(mdata, colIndy[k]);
                DensityEstimationType type = DensityEstimationType.JointDistribution;
                switch (typeInd)
                {
                case 1:
                    type = DensityEstimationType.DivideByX;
                    break;

                case 2:
                    type = DensityEstimationType.DivideByY;
                    break;

                case 3:
                    type = DensityEstimationType.DivideByXY;
                    break;
                }
                (double[] dvals, double[] pvals) = DensityEstimation.CalcDensitiesAtData(xvals, yvals, points, type);
                string xname = GetColumnName(mdata, colIndx[k]);
                string yname = GetColumnName(mdata, colIndy[k]);
                mdata.AddNumericColumn("Density_" + xname + "_" + yname,
                                       "Density of data points in the plane spanned by the columns " + xname + " and " + yname + ".",
                                       dvals);
                mdata.AddNumericColumn("Excluded fraction_" + xname + "_" + yname,
                                       "Percentage of points with a point density smaller than at this point in the plane spanned by the columns " +
                                       xname + " and " + yname + ".", pvals);
            }
        }
        public void ProcessData(IMatrixData mdata, Parameters para, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            string[]     baseIds = GetBaseIds(para, mdata);
            string[]     name;
            int[]        catColInds;
            int[]        textColInds;
            int[]        numColInds;
            string[][][] catCols;
            string[][]   textCols;
            double[][]   numCols;
            bool         success = ProcessDataAddAnnotation(mdata.RowCount, para, baseIds, processInfo, out name, out catColInds,
                                                            out textColInds, out numColInds, out catCols, out textCols, out numCols);

            if (!success)
            {
                return;
            }
            for (int i = 0; i < catCols.Length; i++)
            {
                mdata.AddCategoryColumn(name[catColInds[i]], "", catCols[i]);
            }
            for (int i = 0; i < textCols.Length; i++)
            {
                mdata.AddStringColumn(name[textColInds[i]], "", textCols[i]);
            }
            for (int i = 0; i < numCols.Length; i++)
            {
                mdata.AddNumericColumn(name[numColInds[i]], "", numCols[i]);
            }
        }
Ejemplo n.º 4
0
        public void SmallTest()
        {
            IMatrixData mdata = PerseusFactory.CreateMatrixData(new double[, ]
            {
                { 0, 4 },
                { 1, 5 },
                { 2, 6 },
                { 3, 7 }
            });

            mdata.AddStringColumn("id", "", new [] { "a", "b", "b", "b" });
            mdata.AddStringColumn("str", "", new [] { "a;b", "b;c", "c;d", "d;e" });
            mdata.AddCategoryColumn("cat", "", new[] { new[] { "a", "b" }, new[] { "b", "c" }, new[] { "c", "d" }, new[] { "d", "e" } });
            mdata.AddNumericColumn("num", "", new [] { 0, 1, 2, 3, 4.0 });
            mdata.AddMultiNumericColumn("mnum", "", new [] { new [] { 0, 4d }, new [] { 1, 5d }, new [] { 2, 6d }, new [] { 3, 7d } });
            mdata.UniqueRows(mdata.StringColumns[0], ArrayUtils.Median, UniqueRows.Union, UniqueRows.CatUnion, UniqueRows.MultiNumUnion);

            Assert.AreEqual(2, mdata.RowCount);
            CollectionAssert.AreEqual(new [] { 0, 2 }, mdata.Values.GetColumn(0));
            CollectionAssert.AreEqual(new [] { 4, 6 }, mdata.Values.GetColumn(1));
            CollectionAssert.AreEqual(new [] { "a;b", "b;c;d;e" }, mdata.GetStringColumn("str"));
            CollectionAssert.AreEqual(new [] { new [] { "a", "b" }, new [] { "b", "c", "d", "e" } }, mdata.GetCategoryColumnAt(0));
            CollectionAssert.AreEqual(new [] { 0, 2 }, mdata.NumericColumns[0]);
            CollectionAssert.AreEqual(new [] { new [] { 0d, 4 }, new [] { 1d, 5, 2, 6, 3, 7 } }, mdata.MultiNumericColumns[0]);
        }
Ejemplo n.º 5
0
 public void ImportResult(Dictionary <string, string[]> results, IMatrixData mdata,
                          string pair1, string pair2, string[][] validCol, string[][] sigCol, string method,
                          bool replicate)
 {
     foreach (KeyValuePair <string, string[]> entry in results)
     {
         if ((entry.Key == "LR") && (!replicate))
         {
         }
         else
         {
             mdata.AddNumericColumn(pair1 + "_vs_" + pair2 + "_" + entry.Key,
                                    pair1 + "_vs_" + pair2 + "_" + entry.Key,
                                    Array.ConvertAll(entry.Value, Double.Parse));
         }
         double[] t = new double[entry.Value.Length];
         if (((entry.Key == "p-value" || entry.Key == "padj") && method == "DESeq2") ||
             ((entry.Key == "p-value" || entry.Key == "FDR") && method == "EdgeR"))
         {
             for (int i = 0; i < entry.Value.Length; i++)
             {
                 double.TryParse(entry.Value[i], out double p);
                 if (p == 0)
                 {
                     t[i] = Math.Log10(1 / Double.MaxValue) * -1;
                 }
                 else
                 {
                     t[i] = Math.Log10(p) * -1;
                 }
             }
             mdata.AddNumericColumn(pair1 + "_vs_" + pair2 + "_-log10" + entry.Key,
                                    pair1 + "_vs_" + pair2 + "_-log10" + entry.Key, t);
         }
     }
     if (method == "DESeq2")
     {
         mdata.AddCategoryColumn(pair1 + "_vs_" + pair2 + "_Valid",
                                 pair1 + "_vs_" + pair2 + "_Valid",
                                 validCol);
     }
     mdata.AddCategoryColumn(pair1 + "_vs_" + pair2 + "_Significant",
                             pair1 + "_vs_" + pair2 + "_Significant",
                             sigCol);
 }
Ejemplo n.º 6
0
 private static void ExpressionToNumeric(IList <int> colInds, IMatrixData mdata)
 {
     int[] remainingInds = ArrayUtils.Complement(colInds, mdata.ColumnCount);
     foreach (int colInd in colInds)
     {
         double[] d = ArrayUtils.ToDoubles(mdata.Values.GetColumn(colInd));
         mdata.AddNumericColumn(mdata.ColumnNames[colInd], mdata.ColumnDescriptions[colInd], d);
     }
     mdata.ExtractColumns(remainingInds);
 }
        public void TestInitialize()
        {
            var peptidesValues = new[, ] {
                { 9.0f }
            };

            peptides = PerseusFactory.CreateMatrixData(peptidesValues, new List <string> {
                "pep_MS/MS Count"
            });
            peptides.AddNumericColumn("pep_Intensity", "", new [] { 0.0 });
            peptides.AddStringColumn("pep_id", "", new [] { "35" });
            peptides.AddStringColumn("pep_Protein group IDs", "", new [] { "13;21" });
            peptides.Quality.Init(1, 1);
            peptides.Quality.Set(0, 0, 1);
            var multiNum    = new ExpandMultiNumeric();
            var errorString = string.Empty;
            var parameters2 = multiNum.GetParameters(peptides, ref errorString);

            parameters2.GetParam <int[]>("Text columns").Value = new[] { 1 };
            IMatrixData[]   suppl = null;
            IDocumentData[] docs  = null;
            multiNum.ProcessData(peptides, parameters2, ref suppl, ref docs, CreateProcessInfo());

            var proteinMainValues = new[, ]
            {
                { 166250000.0f },
                { 8346000.0f }
            };

            proteinMain = PerseusFactory.CreateMatrixData(proteinMainValues, new List <string> {
                "prot_LFQ intensity"
            });
            proteinMain.Name = "protein main";
            proteinMain.AddStringColumn("prot_id", "", new [] { "13", "21" });
            proteinMain.AddStringColumn("prot_gene name", "", new [] { "geneA", "geneB" });
            var expandValues = new[, ]
            {
                { 9.0f },
                { 9.0f }
            };

            expand = PerseusFactory.CreateMatrixData(expandValues, new List <string> {
                "pep_MS/MS Count"
            });
            expand.Name = "expand";
            expand.AddNumericColumn("pep_Intensity", "", new [] { 0.0, 0.0 });
            expand.AddStringColumn("pep_id", "", new [] { "35", "35" });
            expand.AddStringColumn("pep_Protein group IDs", "", new [] { "13", "21" });

            matching = new MatchingRowsByName();
            var err = string.Empty;

            parameters = matching.GetParameters(new[] { expand, proteinMain }, ref err);
        }
Ejemplo n.º 8
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            int[]          cols       = param.GetParam <int[]>("Columns").Value;
            int            truncIndex = param.GetParam <int>("Use for truncation").Value;
            TestTruncation truncation = truncIndex == 0
                                ? TestTruncation.Pvalue
                                : (truncIndex == 1 ? TestTruncation.BenjaminiHochberg : TestTruncation.PermutationBased);
            double   threshold = param.GetParam <double>("Threshold value").Value;
            int      sideInd   = param.GetParam <int>("Side").Value;
            TestSide side;

            switch (sideInd)
            {
            case 0:
                side = TestSide.Both;
                break;

            case 1:
                side = TestSide.Left;
                break;

            case 2:
                side = TestSide.Right;
                break;

            default:
                throw new Exception("Never get here.");
            }
            foreach (int col in cols)
            {
                BaseVector r     = mdata.Values.GetColumn(col);
                double[]   pvals = CalcSignificanceA(r, side);
                string[][] fdr;
                switch (truncation)
                {
                case TestTruncation.Pvalue:
                    fdr = PerseusPluginUtils.CalcPvalueSignificance(pvals, threshold);
                    break;

                case TestTruncation.BenjaminiHochberg:
                    double[] fdrs;
                    fdr = PerseusPluginUtils.CalcBenjaminiHochbergFdr(pvals, threshold, pvals.Length, out fdrs);
                    break;

                default:
                    throw new Exception("Never get here.");
                }
                mdata.AddNumericColumn(mdata.ColumnNames[col] + " Significance A", "", pvals);
                mdata.AddCategoryColumn(mdata.ColumnNames[col] + " A significant", "", fdr);
            }
        }
Ejemplo n.º 9
0
 public void ProcessData(IMatrixData data, Parameters param, ref IMatrixData[] supplTables,
                         ref IDocumentData[] documents, ProcessInfo processInfo)
 {
     int[] exColInds       = param.GetParam <int[]>("Main columns").Value;
     int[] numColInds      = param.GetParam <int[]>("Numerical columns").Value;
     int[] multiNumColInds = param.GetParam <int[]>("Multi-numerical columns").Value;
     int[] catColInds      = param.GetParam <int[]>("Categorical columns").Value;
     int[] textColInds     = param.GetParam <int[]>("Text columns").Value;
     if (exColInds.Length > 0)
     {
         int ncol = data.ColumnCount;
         data.ExtractColumns(ArrayUtils.Concat(ArrayUtils.ConsecutiveInts(data.ColumnCount), exColInds));
         HashSet <string> taken = new HashSet <string>(data.ColumnNames);
         for (int i = 0; i < exColInds.Length; i++)
         {
             string s = StringUtils.GetNextAvailableName(data.ColumnNames[ncol + i], taken);
             data.ColumnNames[ncol + i] = s;
             taken.Add(s);
         }
     }
     foreach (int ind in numColInds)
     {
         HashSet <string> taken = new HashSet <string>(data.NumericColumnNames);
         string           s     = StringUtils.GetNextAvailableName(data.NumericColumnNames[ind], taken);
         data.AddNumericColumn(s, data.NumericColumnDescriptions[ind], (double[])data.NumericColumns[ind].Clone());
         taken.Add(s);
     }
     foreach (int ind in multiNumColInds)
     {
         HashSet <string> taken = new HashSet <string>(data.MultiNumericColumnNames);
         string           s     = StringUtils.GetNextAvailableName(data.MultiNumericColumnNames[ind], taken);
         data.AddMultiNumericColumn(s, data.MultiNumericColumnDescriptions[ind],
                                    (double[][])data.MultiNumericColumns[ind].Clone());
         taken.Add(s);
     }
     foreach (int ind in catColInds)
     {
         HashSet <string> taken = new HashSet <string>(data.CategoryColumnNames);
         string           s     = StringUtils.GetNextAvailableName(data.CategoryColumnNames[ind], taken);
         data.AddCategoryColumn(s, data.CategoryColumnDescriptions[ind], data.GetCategoryColumnAt(ind));
         taken.Add(s);
     }
     foreach (int ind in textColInds)
     {
         HashSet <string> taken = new HashSet <string>(data.StringColumnNames);
         string           s     = StringUtils.GetNextAvailableName(data.StringColumnNames[ind], taken);
         data.AddStringColumn(s, data.ColumnDescriptions[ind], (string[])data.StringColumns[ind].Clone());
         taken.Add(s);
     }
 }
Ejemplo n.º 10
0
        public void WriteMatrixTest()
        {
            // main data
            IMatrixData mdata = PerseusFactory.CreateMatrixData(new double[, ] {
                { 1, 2, 3 }, { 3, 4, 5 }
            },
                                                                new List <string> {
                "col1", "col2", "col3"
            });

            // annotation rows
            mdata.AddCategoryRow("catrow", "this is catrow", new[] { new[] { "cat1" }, new[] { "cat1", "cat2" }, new[] { "cat2" } });
            mdata.AddNumericRow("numrow", "this is numrow", new[] { -1.0, 1, 2 });
            // annotation columns
            mdata.AddStringColumn("strcol1", "this is stringcol1", new[] { "1", "2" });
            mdata.AddStringColumn("strcol2", "", new[] { "", "hallo" });
            mdata.AddNumericColumn("numcol", "", new[] { 1.0, 2.0 });
            mdata.AddMultiNumericColumn("multnumcol", "this is multnumcol", new[] { new[] { -2.0, 2.0 }, new double[] {} });
            mdata.AddCategoryColumn("catcol", "", new[] { new[] { "cat1", "cat1.1" }, new[] { "cat2", "cat1" } });

            string mdataStr;

            using (MemoryStream memstream = new MemoryStream())
                using (StreamWriter writer = new StreamWriter(memstream)) {
                    PerseusUtils.WriteMatrix(mdata, writer);
                    writer.Flush();
                    mdataStr = Encoding.UTF8.GetString(memstream.ToArray());
                }

            IMatrixData mdata2 = PerseusFactory.CreateMatrixData();

            PerseusUtils.ReadMatrix(mdata2, new ProcessInfo(new Settings(), status => { }, progress => { }, 1), () => {
                StreamReader tmpStream = new StreamReader(new MemoryStream(Encoding.UTF8.GetBytes(mdataStr)));
                return(tmpStream);
            }, "matrix1", '\t');

            Assert.AreEqual(2, mdata2.RowCount);
            Assert.AreEqual(3, mdata2.ColumnCount);

            Assert.AreEqual(2, mdata2.StringColumnCount);
            Assert.AreEqual(1, mdata2.NumericColumnCount);
            Assert.AreEqual(1, mdata2.CategoryColumnCount);
            Assert.AreEqual(1, mdata2.MultiNumericColumnCount);

            Assert.AreEqual("hallo", mdata2.StringColumns[mdata2.StringColumnNames.FindIndex(col => col.Equals("strcol2"))][1]);

            Assert.AreEqual(1, mdata2.CategoryRowCount);
            Assert.AreEqual(1, mdata2.NumericRowCount);
        }
Ejemplo n.º 11
0
 private static void AddStandardDeviation(int groupColInd, int validVals, IMatrixData mdata, int varInd)
 {
     string[][] groupCol   = mdata.GetCategoryRowAt(groupColInd);
     string[]   groupNames = ArrayUtils.UniqueValuesPreserveOrder(groupCol);
     int[][]    colInds    = PerseusPluginUtils.GetMainColIndices(groupCol, groupNames);
     double[][] newNumCols = new double[groupNames.Length][];
     for (int i = 0; i < newNumCols.Length; i++)
     {
         newNumCols[i] = new double[mdata.RowCount];
     }
     for (int i = 0; i < mdata.RowCount; i++)
     {
         for (int j = 0; j < groupNames.Length; j++)
         {
             List <double> vals = new List <double>();
             foreach (int ind in colInds[j])
             {
                 double val = mdata.Values.Get(i, ind);
                 if (!double.IsNaN(val) && !double.IsInfinity(val))
                 {
                     vals.Add(val);
                 }
             }
             double xy = double.NaN;
             if (vals.Count >= validVals)
             {
                 if (varInd == 0)
                 {
                     xy = ArrayUtils.StandardDeviation(vals);
                 }
                 else
                 {
                     xy = ArrayUtils.StandardDeviation(vals) / Math.Sqrt(vals.Count);
                 }
             }
             newNumCols[j][i] = xy;
         }
     }
     for (int i = 0; i < groupNames.Length; i++)
     {
         string name = "stddev " + groupNames[i];
         mdata.AddNumericColumn(name, name, newNumCols[i]);
     }
 }
Ejemplo n.º 12
0
 public void ProcessData(IMatrixData mdata, Parameters param1, ref IMatrixData[] supplTables,
                         ref IDocumentData[] documents, ProcessInfo processInfo)
 {
     int[] cols = param1.GetParam <int[]>("Columns").Value;
     int[] ops  = param1.GetParam <int[]>("Operation").Value;
     foreach (int t in ops)
     {
         double[][] vals = new double[cols.Length][];
         for (int i = 0; i < cols.Length; i++)
         {
             double[][] x = mdata.MultiNumericColumns[cols[i]];
             vals[i] = new double[x.Length];
             for (int j = 0; j < vals[i].Length; j++)
             {
                 vals[i][j] = operations[t](x[j]);
             }
         }
         for (int i = 0; i < cols.Length; i++)
         {
             mdata.AddNumericColumn(mdata.MultiNumericColumnNames[cols[i]] + "_" + names[t], "", vals[i]);
         }
     }
 }
Ejemplo n.º 13
0
 private static void FillMatrixKeep(int groupColInd, int validVals, IMatrixData mdata, Func <IList <double>, double> func)
 {
     string[][] groupCol   = mdata.GetCategoryRowAt(groupColInd);
     string[]   groupNames = ArrayUtils.UniqueValuesPreserveOrder(groupCol);
     int[][]    colInds    = PerseusPluginUtils.GetMainColIndices(groupCol, groupNames);
     double[][] newNumCols = new double[groupNames.Length][];
     for (int i = 0; i < newNumCols.Length; i++)
     {
         newNumCols[i] = new double[mdata.RowCount];
     }
     for (int i = 0; i < mdata.RowCount; i++)
     {
         for (int j = 0; j < groupNames.Length; j++)
         {
             List <double> vals = new List <double>();
             foreach (int ind in colInds[j])
             {
                 double val = mdata.Values.Get(i, ind);
                 if (!double.IsNaN(val) && !double.IsInfinity(val))
                 {
                     vals.Add(val);
                 }
             }
             double xy = double.NaN;
             if (vals.Count >= validVals)
             {
                 xy = func(vals);
             }
             newNumCols[j][i] = xy;
         }
     }
     for (int i = 0; i < groupNames.Length; i++)
     {
         mdata.AddNumericColumn(groupNames[i], groupNames[i], newNumCols[i]);
     }
 }
Ejemplo n.º 14
0
 public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                         ref IDocumentData[] documents, ProcessInfo processInfo)
 {
     string[] mods = param.GetParam <int[]>("Modifications").StringValue.Split(new[] { ';' },
                                                                               StringSplitOptions.RemoveEmptyEntries);
     string[]   up    = mdata.StringColumns[param.GetParam <int>("Uniprot column").Value];
     string[][] uprot = new string[up.Length][];
     for (int i = 0; i < up.Length; i++)
     {
         uprot[i] = up[i].Length > 0 ? up[i].Split(';') : new string[0];
     }
     double[][] c = new double[mods.Length][];
     for (int index = 0; index < mods.Length; index++)
     {
         string mod      = mods[index];
         string filename = PhosphoSitePlusParser.GetFilenameForMod(mod);
         if (filename == null)
         {
             processInfo.ErrString = "File does not exist.";
             return;
         }
         PhosphoSitePlusParser.ParseKnownMods(filename, out string[] seqWins, out string[] accs, out string[] pubmedLtp, out string[] pubmedMs2, out string[] cstMs2, out string[] species);
         for (int i = 0; i < seqWins.Length; i++)
         {
             seqWins[i] = seqWins[i].ToUpper();
         }
         Dictionary <string, HashSet <string> > counts = new Dictionary <string, HashSet <string> >();
         for (int i = 0; i < accs.Length; i++)
         {
             string acc = accs[i];
             if (!counts.ContainsKey(acc))
             {
                 counts.Add(acc, new HashSet <string>());
             }
             counts[acc].Add(seqWins[i]);
         }
         c[index] = new double[up.Length];
         for (int i = 0; i < up.Length; i++)
         {
             c[index][i] = CountSites(uprot[i], counts);
         }
     }
     string[][] catCol = new string[up.Length][];
     for (int i = 0; i < catCol.Length; i++)
     {
         List <string> x = new List <string>();
         for (int j = 0; j < mods.Length; j++)
         {
             if (c[j][i] > 0)
             {
                 x.Add(mods[j]);
             }
         }
         x.Sort();
         catCol[i] = x.ToArray();
     }
     mdata.AddCategoryColumn("Known modifications", "Known modifications", catCol);
     for (int i = 0; i < mods.Length; i++)
     {
         mdata.AddNumericColumn(mods[i] + " count", mods[i] + " count", c[i]);
     }
 }
Ejemplo n.º 15
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            int[] cols = param.GetMultiChoiceParam("Columns").Value;
            int truncIndex = param.GetSingleChoiceParam("Use for truncation").Value;
            TestTruncation truncation = truncIndex == 0
                ? TestTruncation.Pvalue : (truncIndex == 1 ? TestTruncation.BenjaminiHochberg : TestTruncation.PermutationBased);
            double threshold = param.GetDoubleParam("Threshold value").Value;
            int sideInd = param.GetSingleChoiceParam("Side").Value;
            TestSide side;
            switch (sideInd){
                case 0:
                    side = TestSide.Both;
                    break;
                case 1:
                    side = TestSide.Left;
                    break;
                case 2:
                    side = TestSide.Right;
                    break;
                default:
                    throw new Exception("Never get here.");
            }
            foreach (int col in cols){
                float[] r = mdata.GetExpressionColumn(col);
                double[] pvals = CalcSignificanceA(r, side);
                string[][] fdr;
                switch (truncation){
                    case TestTruncation.Pvalue:
                        fdr = PerseusPluginUtils.CalcPvalueSignificance(pvals, threshold);
                        break;
                    case TestTruncation.BenjaminiHochberg:
                        fdr = PerseusPluginUtils.CalcBenjaminiHochbergFdr(pvals, threshold);
                        break;
                    default:
                        throw new Exception("Never get here.");
                }
                mdata.AddNumericColumn(mdata.ExpressionColumnNames[col] + " Significance A", "", pvals);
                mdata.AddCategoryColumn(mdata.ExpressionColumnNames[col] + " A significant", "", fdr);
            }
        }
Ejemplo n.º 16
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            int proteinIdColumnInd = param.GetParam<int>("Protein IDs").Value;
            string[][] proteinIds = new string[mdata.RowCount][];
            string[][] leadingIds = new string[mdata.RowCount][];
            List<string> allIds = new List<string>();
            for (int row = 0; row < mdata.RowCount; row++){
                proteinIds[row] = mdata.StringColumns[proteinIdColumnInd][row].Split(';');
                leadingIds[row] = new[]{proteinIds[row][0]};
                allIds.AddRange(proteinIds[row]);
            }
            string fastaFilePath = param.GetParam<string>("Fasta file").Value;
            Fasta fasta = new Fasta();
            fasta.ParseFile(fastaFilePath, processInfo);
            // Text annotations
            processInfo.Status("Adding fasta header annotations.");
            int[] selection =
                param.GetParamWithSubParams<int>("Fasta header annotations").GetSubParameters().GetParam<int[]>("Annotations").Value;
            string[][] idsToBeAnnotated = (param.GetParamWithSubParams<int>("Fasta header annotations").Value == 0)
                ? proteinIds
                : leadingIds;
            ProteinSequence[][] fastaEntries = new ProteinSequence[mdata.RowCount][];
            for (int row = 0; row < mdata.RowCount; row++){
                List<ProteinSequence> rowEntries = new List<ProteinSequence>();
                foreach (string id in idsToBeAnnotated[row]){
                    ProteinSequence entry = fasta.GetEntry(id);
                    if (entry == null){
                        continue;
                    }
                    rowEntries.Add(entry);
                }
                fastaEntries[row] = rowEntries.ToArray();
            }
            if (ArrayUtils.Contains(selection, 0)){ // Entry name
                string[] annotationColumn = new string[mdata.RowCount];
                for (int row = 0; row < mdata.RowCount; row++){
                    List<string> rowAnnotations = new List<string>();
                    foreach (ProteinSequence entry in fastaEntries[row]){
                        string entryName = entry.EntryName;
                        if (entryName != null && !ArrayUtils.Contains(rowAnnotations, entryName)){
                            rowAnnotations.Add(entryName);
                        }
                    }
                    annotationColumn[row] = string.Join(";", rowAnnotations.ToArray());
                }
                mdata.AddStringColumn("Entry name", "", annotationColumn);
            }
            if (ArrayUtils.Contains(selection, 1)){ // Gene name
                string[] annotationColumn = new string[mdata.RowCount];
                for (int row = 0; row < mdata.RowCount; row++){
                    List<string> rowAnnotations = new List<string>();
                    foreach (ProteinSequence entry in fastaEntries[row]){
                        string geneName = entry.GeneName;
                        if (geneName != null && !ArrayUtils.Contains(rowAnnotations, geneName)){
                            rowAnnotations.Add(geneName);
                        }
                    }
                    annotationColumn[row] = string.Join(";", rowAnnotations.ToArray());
                }
                mdata.AddStringColumn("Gene name", "", annotationColumn);
            }
            if (ArrayUtils.Contains(selection, 2)){
                // Verbose protein name, i.e. all protein names annotated in all fasta headers, including the
                //'Isoform x of...' prefixes and '(Fragment)' suffixes
                string[] annotationColumn = new string[mdata.RowCount];
                for (int row = 0; row < mdata.RowCount; row++){
                    List<string> rowAnnotations = new List<string>();
                    foreach (ProteinSequence entry in fastaEntries[row]){
                        string proteinName = entry.ProteinName;
                        if (proteinName != null && !ArrayUtils.Contains(rowAnnotations, proteinName)){
                            rowAnnotations.Add(proteinName);
                        }
                    }
                    annotationColumn[row] = string.Join(";", rowAnnotations.ToArray());
                }
                mdata.AddStringColumn("Protein name (verbose)", "", annotationColumn);
            }
            if (ArrayUtils.Contains(selection, 3)){ // Consensus protein name
                string[] annotationColumn = new string[mdata.RowCount];
                for (int row = 0; row < mdata.RowCount; row++){
                    List<string> rowAnnotations = new List<string>();
                    foreach (ProteinSequence entry in fastaEntries[row]){
                        string proteinName = entry.ConsensusProteinName;
                        if (proteinName != null && !ArrayUtils.Contains(rowAnnotations, proteinName)){
                            rowAnnotations.Add(proteinName);
                        }
                    }
                    annotationColumn[row] = String.Join(";", rowAnnotations.ToArray());
                }
                mdata.AddStringColumn("Protein name", "", annotationColumn);
            }
            if (ArrayUtils.Contains(selection, 4)){ // Species
                string[] annotationColumn = new string[mdata.RowCount];
                for (int row = 0; row < mdata.RowCount; row++){
                    List<string> rowAnnotations = new List<string>();
                    foreach (ProteinSequence entry in fastaEntries[row]){
                        string speciesName = entry.Species;
                        if (speciesName != null && !ArrayUtils.Contains(rowAnnotations, speciesName)){
                            rowAnnotations.Add(speciesName);
                        }
                    }
                    annotationColumn[row] = String.Join(";", rowAnnotations.ToArray());
                }
                mdata.AddStringColumn("Species", "", annotationColumn);
            }
            // Numeric annotations
            processInfo.Status("Adding numeric annotations.");
            selection =
                param.GetParamWithSubParams<int>("Numeric annotations").GetSubParameters().GetParam<int[]>("Annotations").Value;
            bool annotateLeadingId = (param.GetParamWithSubParams<int>("Numeric annotations").Value == 1);
            if (ArrayUtils.Contains(selection, 0)){ // Sequence length
                double[] annotationColumn = new double[mdata.RowCount];
                for (int row = 0; row < mdata.RowCount; row++){
                    List<double> rowAnnotations = new List<double>();
                    foreach (ProteinSequence entry in fastaEntries[row]){
                        double sequenceLength = entry.GetSequence().Length;
                        rowAnnotations.Add(sequenceLength);
                        if (annotateLeadingId && rowAnnotations.Count > 0){
                            break;
                        }
                    }
                    annotationColumn[row] = ArrayUtils.Median(rowAnnotations.ToArray());
                }
                mdata.AddNumericColumn("Sequence length", "", annotationColumn);
            }
            if (ArrayUtils.Contains(selection, 1)){ // Monoisotopic molecular mass
                double[] annotationColumn = new double[mdata.RowCount];
                for (int row = 0; row < mdata.RowCount; row++){
                    List<double> rowAnnotations = new List<double>();
                    foreach (ProteinSequence entry in fastaEntries[row]){
                        double monoisotopicMass = entry.GetMonoisotopicMolecularMass();
                        rowAnnotations.Add(monoisotopicMass);
                        if (annotateLeadingId && rowAnnotations.Count > 0){
                            break;
                        }
                    }
                    annotationColumn[row] = ArrayUtils.Median(rowAnnotations.ToArray());
                }
                mdata.AddNumericColumn("Monoisotopic molecular mass", "", annotationColumn);
            }
            if (ArrayUtils.Contains(selection, 2)){ // Average molecular mass
                double[] annotationColumn = new double[mdata.RowCount];
                for (int row = 0; row < mdata.RowCount; row++){
                    List<double> rowAnnotations = new List<double>();
                    foreach (ProteinSequence entry in fastaEntries[row]){
                        double averageMass = entry.GetAverageMolecularMass();
                        rowAnnotations.Add(averageMass);
                        if (annotateLeadingId && rowAnnotations.Count > 0){
                            break;
                        }
                    }
                    annotationColumn[row] = ArrayUtils.Median(rowAnnotations.ToArray());
                }
                mdata.AddNumericColumn("Average molecular mass", "", annotationColumn);
            }
            // Theoretical peptides
            processInfo.Status("Calculating theoretical peptides.");
            annotateLeadingId = (param.GetParamWithSubParams<int>("Calculate theoretical peptides").Value == 1);
            Protease[] proteases = ArrayUtils.SubArray(Constants.defaultProteases,
                param.GetParamWithSubParams<int>("Calculate theoretical peptides").GetSubParameters().GetParam<int[]>("Proteases")
                    .Value);
            double minLength =
                param.GetParamWithSubParams<int>("Calculate theoretical peptides").GetSubParameters().GetParam<double>(
                    "Min. peptide length").Value;
            double maxLength =
                param.GetParamWithSubParams<int>("Calculate theoretical peptides").GetSubParameters().GetParam<double>(
                    "Max. peptide length").Value;
            bool displayPeptideSequences = annotateLeadingId &&
                                            param.GetParamWithSubParams<int>("Calculate theoretical peptides").GetSubParameters().GetParam<bool>(
                                                "Show sequences").Value;
            foreach (Protease protease in proteases){
                double[] annotationColumn = new double[mdata.RowCount];
                string[] peptideColumn = new string[mdata.RowCount];
                for (int row = 0; row < mdata.RowCount; row++){
                    List<double> rowAnnotations = new List<double>();
                    List<string> rowPeptides = new List<string>();
                    foreach (ProteinSequence entry in fastaEntries[row]){
                        double nTheoreticalPeptides = entry.GetNumberOfTheoreticalPeptides(protease, (int) minLength, (int) maxLength);
                        rowAnnotations.Add(nTheoreticalPeptides);
                        if (displayPeptideSequences){
                            rowPeptides.AddRange(entry.GetTheoreticalPeptideSequences(protease, (int) minLength, (int) maxLength));
                        }
                        if (annotateLeadingId && rowAnnotations.Count > 0){
                            break;
                        }
                    }
                    annotationColumn[row] = ArrayUtils.Median(rowAnnotations.ToArray());
                    peptideColumn[row] = String.Join(";", rowPeptides);
                }
                mdata.AddNumericColumn(
                    "Number of theoretical peptides (" + protease.name + ", " + minLength + "-" + maxLength + ")", "", annotationColumn);
                if (displayPeptideSequences){
                    mdata.AddStringColumn(
                        "Theoretical peptide sequences (" + protease.name + ", " + minLength + "-" + maxLength + ")", "", peptideColumn);
                }
            }
            // Sequence features
            processInfo.Status("Counting sequence features.");
            annotateLeadingId = (param.GetParamWithSubParams<int>("Count sequence features").Value == 1);
            bool normalizeBySequenceLength =
                param.GetParamWithSubParams<int>("Count sequence features").GetSubParameters().GetParam<bool>(
                    "Normalize by sequence length").Value;
            if (param.GetParamWithSubParams<int>("Count sequence features").GetSubParameters().GetParam<string>("Regex").Value !=
                ""){
                Regex regex;
                try{
                    regex =
                        new Regex(
                            param.GetParamWithSubParams<int>("Count sequence features").GetSubParameters().GetParam<string>("Regex").Value);
                } catch (ArgumentException){
                    processInfo.ErrString = "The regular expression you provided has invalid syntax.";
                    return;
                }
                double[] sequenceFeatureColumn = new double[mdata.RowCount];
                for (int row = 0; row < mdata.RowCount; row++){
                    List<double> featureCount = new List<double>();
                    foreach (ProteinSequence entry in fastaEntries[row]){
                        double nFeatures = regex.Matches(entry.GetSequence()).Count;
                        featureCount.Add(normalizeBySequenceLength ? nFeatures/entry.GetLength() : nFeatures);
                        if (annotateLeadingId){
                            break;
                        }
                    }
                    sequenceFeatureColumn[row] = ArrayUtils.Median(featureCount.ToArray());
                }
                mdata.AddNumericColumn(
                    (normalizeBySequenceLength ? "Normalized feature count (" : "Feature count (") + regex + ")", "",
                    sequenceFeatureColumn);
            }
            processInfo.Status("Done.");
        }
Ejemplo n.º 17
0
 private static void FillMatrixKeep(int groupColInd, int validVals, IMatrixData mdata, Func<IList<double>, double> func)
 {
     string[][] groupCol = mdata.GetCategoryRowAt(groupColInd);
     string[] groupNames = ArrayUtils.UniqueValuesPreserveOrder(groupCol);
     int[][] colInds = PerseusPluginUtils.GetExpressionColIndices(groupCol, groupNames);
     double[][] newNumCols = new double[groupNames.Length][];
     for (int i = 0; i < newNumCols.Length; i++){
         newNumCols[i] = new double[mdata.RowCount];
     }
     for (int i = 0; i < mdata.RowCount; i++){
         for (int j = 0; j < groupNames.Length; j++){
             List<double> vals = new List<double>();
             foreach (int ind in colInds[j]){
                 double val = mdata[i, ind];
                 if (!double.IsNaN(val) && !double.IsInfinity(val)){
                     vals.Add(val);
                 }
             }
             float xy = float.NaN;
             if (vals.Count >= validVals){
                 xy = (float) func(vals);
             }
             newNumCols[j][i] = xy;
         }
     }
     for (int i = 0; i < groupNames.Length; i++){
         mdata.AddNumericColumn(groupNames[i], groupNames[i], newNumCols[i]);
     }
 }
Ejemplo n.º 18
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            int[] rcols = param.GetParam <int[]>("Ratio columns").Value;
            int[] icols = param.GetParam <int[]>("Intensity columns").Value;
            if (rcols.Length == 0)
            {
                processInfo.ErrString = "Please specify some ratio columns.";
                return;
            }
            if (rcols.Length != icols.Length)
            {
                processInfo.ErrString = "The number of ratio and intensity columns have to be equal.";
                return;
            }
            int            truncIndex = param.GetParam <int>("Use for truncation").Value;
            TestTruncation truncation = truncIndex == 0
                                ? TestTruncation.Pvalue
                                : (truncIndex == 1 ? TestTruncation.BenjaminiHochberg : TestTruncation.PermutationBased);
            double   threshold = param.GetParam <double>("Threshold value").Value;
            int      sideInd   = param.GetParam <int>("Side").Value;
            TestSide side;

            switch (sideInd)
            {
            case 0:
                side = TestSide.Both;
                break;

            case 1:
                side = TestSide.Left;
                break;

            case 2:
                side = TestSide.Right;
                break;

            default:
                throw new Exception("Never get here.");
            }
            for (int i = 0; i < rcols.Length; i++)
            {
                BaseVector r      = mdata.Values.GetColumn(rcols[i]);
                BaseVector intens = icols[i] < mdata.ColumnCount
                                        ? mdata.Values.GetColumn(icols[i])
                                        : new DoubleArrayVector(mdata.NumericColumns[icols[i] - mdata.ColumnCount]);
                double[]   pvals = CalcSignificanceB(r, intens, side);
                string[][] fdr;
                switch (truncation)
                {
                case TestTruncation.Pvalue:
                    fdr = PerseusPluginUtils.CalcPvalueSignificance(pvals, threshold);
                    break;

                case TestTruncation.BenjaminiHochberg:
                    fdr = PerseusPluginUtils.CalcBenjaminiHochbergFdr(pvals, threshold, out double[] fdrs);
                    break;

                default:
                    throw new Exception("Never get here.");
                }
                mdata.AddNumericColumn(mdata.ColumnNames[rcols[i]] + " Significance B", "", pvals);
                mdata.AddCategoryColumn(mdata.ColumnNames[rcols[i]] + " B significant", "", fdr);
            }
        }
Ejemplo n.º 19
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            int[] colIndx = param.GetParam <int[]>("x").Value;
            int[] colIndy = param.GetParam <int[]>("y").Value;
            if (colIndx.Length == 0)
            {
                processInfo.ErrString = "Please select some columns";
                return;
            }
            if (colIndx.Length != colIndy.Length)
            {
                processInfo.ErrString = "Please select the same number of columns in the boxes for the first and second columns.";
                return;
            }
            int typeInd = param.GetParam <int>("Distribution type").Value;
            int points  = param.GetParam <int>("Number of points").Value;

            for (int k = 0; k < colIndx.Length; k++)
            {
                float[] xvals = GetColumn(mdata, colIndx[k]);
                float[] yvals = GetColumn(mdata, colIndy[k]);
                float[] xvals1;
                float[] yvals1;
                NumUtils.GetValidPairs(xvals, yvals, out xvals1, out yvals1);
                double xmin;
                double xmax;
                double ymin;
                double ymax;
                DensityEstimation.CalcRanges(xvals1, yvals1, out xmin, out xmax, out ymin, out ymax);
                float[,] values = DensityEstimation.GetValuesOnGrid(xvals1, xmin, (xmax - xmin) / points, points, yvals1, ymin,
                                                                    (ymax - ymin) / points, points);
                if (typeInd == 1)
                {
                    MakeConditional1(values);
                }
                if (typeInd == 2)
                {
                    MakeConditional2(values);
                }
                if (typeInd == 3)
                {
                    MakeConditional3(values);
                }
                DensityEstimation.DivideByMaximum(values);
                double[] xmat = new double[points];
                for (int i = 0; i < points; i++)
                {
                    xmat[i] = xmin + i * (xmax - xmin) / points;
                }
                double[] ymat = new double[points];
                for (int i = 0; i < points; i++)
                {
                    ymat[i] = ymin + i * (ymax - ymin) / points;
                }
                float[,] percvalues = CalcExcludedPercentage(values);
                double[] dvals = new double[xvals.Length];
                double[] pvals = new double[xvals.Length];
                for (int i = 0; i < dvals.Length; i++)
                {
                    double xx = xvals[i];
                    double yy = yvals[i];
                    if (!double.IsNaN(xx) && !double.IsNaN(yy))
                    {
                        int xind = ArrayUtils.ClosestIndex(xmat, xx);
                        int yind = ArrayUtils.ClosestIndex(ymat, yy);
                        dvals[i] = values[xind, yind];
                        pvals[i] = percvalues[xind, yind];
                    }
                    else
                    {
                        dvals[i] = double.NaN;
                        pvals[i] = double.NaN;
                    }
                }
                string xname = GetColumnName(mdata, colIndx[k]);
                string yname = GetColumnName(mdata, colIndy[k]);
                mdata.AddNumericColumn("Density_" + xname + "_" + yname,
                                       "Density of data points in the plane spanned by the columns " + xname + " and " + yname + ".", dvals);
                mdata.AddNumericColumn("Excluded fraction_" + xname + "_" + yname,
                                       "Percentage of points with a point density smaller than at this point in the plane spanned by the columns " + xname +
                                       " and " + yname + ".", pvals);
            }
        }
Ejemplo n.º 20
0
        public void ExtractDESeq2Results(IMatrixData mdata, string pair1, string pair2,
                                         ParameterWithSubParams <bool> fdrValid, ParameterWithSubParams <bool> pValid,
                                         ParameterWithSubParams <bool> lfcValid)
        {
            StreamReader reader  = new StreamReader(File.OpenRead("results.csv"));
            int          lineNum = 0;

            string[][] validCol = new string[mdata.Values.RowCount][];
            string[][] sigCol   = new string[mdata.Values.RowCount][];
            Dictionary <string, string[]> results = new Dictionary <string, string[]>
            {
                { "baseMean", new string[mdata.Values.RowCount] },
                { "log2FoldChange", new string[mdata.Values.RowCount] },
                { "lfcSE", new string[mdata.Values.RowCount] },
                { "stat", new string[mdata.Values.RowCount] },
                { "p-value", new string[mdata.Values.RowCount] },
                { "padj", new string[mdata.Values.RowCount] }
            };

            while (!reader.EndOfStream)
            {
                string line = reader.ReadLine();
                if (!String.IsNullOrWhiteSpace(line))
                {
                    line = line.Replace("\"", "");
                    string[] info = line.Split(',');
                    if (lineNum != 0)
                    {
                        validCol[lineNum - 1] = new string[] { "+" };
                        sigCol[lineNum - 1]   = new string[] { "Not Valid" };
                        for (int v = 0; v < info.Length; v++)
                        {
                            if (info[v] == "NA")
                            {
                                if (v == 3 || v == 5 || v == 6)
                                {
                                    info[v] = "1";
                                }
                                else if (v == 2 || v == 4)
                                {
                                    info[v] = "0";
                                }
                                validCol[lineNum - 1][0] = "-";
                            }
                        }
                        if (validCol[lineNum - 1][0] == "+")
                        {
                            CheckSignificant(sigCol, info, fdrValid, pValid, lfcValid, lineNum);
                        }
                        results["baseMean"][lineNum - 1]       = info[1];
                        results["log2FoldChange"][lineNum - 1] = info[2];
                        results["lfcSE"][lineNum - 1]          = info[3];
                        results["stat"][lineNum - 1]           = info[4];
                        results["p-value"][lineNum - 1]        = info[5];
                        results["padj"][lineNum - 1]           = info[6];
                    }
                }
                lineNum++;
            }
            reader.Close();
            foreach (KeyValuePair <string, string[]> entry in results)
            {
                mdata.AddNumericColumn(pair1 + "_vs_" + pair2 + "_" + entry.Key,
                                       pair1 + "_vs_" + pair2 + "_" + entry.Key,
                                       Array.ConvertAll(entry.Value, Double.Parse));
                double[] t = new double[entry.Value.Length];
                if (entry.Key == "p-value" || entry.Key == "padj")
                {
                    for (int i = 0; i < entry.Value.Length; i++)
                    {
                        double.TryParse(entry.Value[i], out double p);
                        if (p == 0)
                        {
                            t[i] = Math.Log10(1 / Double.MaxValue) * -1;
                        }
                        else
                        {
                            t[i] = Math.Log10(p) * -1;
                        }
                    }
                    mdata.AddNumericColumn(pair1 + "_vs_" + pair2 + "_-log10" + entry.Key,
                                           pair1 + "_vs_" + pair2 + "_-log10" + entry.Key, t);
                }
            }
            mdata.AddCategoryColumn(pair1 + "_vs_" + pair2 + "_Valid",
                                    pair1 + "_vs_" + pair2 + "_Valid",
                                    validCol);
            mdata.AddCategoryColumn(pair1 + "_vs_" + pair2 + "_Significant",
                                    pair1 + "_vs_" + pair2 + "_Significant",
                                    sigCol);
        }
Ejemplo n.º 21
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            int[] outputColumns      = param.GetParam <int[]>("Output").Value;
            int   proteinIdColumnInd = param.GetParam <int>("Protein IDs").Value;

            string[] proteinIds    = mdata.StringColumns[proteinIdColumnInd];
            int[]    intensityCols = param.GetParam <int[]>("Intensities").Value;
            if (intensityCols.Length == 0)
            {
                processInfo.ErrString = "Please select at least one column containing protein intensities.";
                return;
            }
            // variable to hold all intensity values
            List <double[]> columns = new List <double[]>();

            string[] inputNames  = new string[intensityCols.Length];
            string[] sampleNames = new string[intensityCols.Length];
            for (int col = 0; col < intensityCols.Length; col++)
            {
                double[] values;
                if (intensityCols[col] < mdata.ColumnCount)
                {
                    values          = ArrayUtils.ToDoubles(mdata.Values.GetColumn(intensityCols[col]));
                    inputNames[col] = mdata.ColumnNames[intensityCols[col]];
                }
                else
                {
                    values          = mdata.NumericColumns[intensityCols[col] - mdata.ColumnCount];
                    inputNames[col] = mdata.NumericColumnNames[intensityCols[col] - mdata.ColumnCount];
                }
                sampleNames[col] = new Regex(@"^(?:(?:LFQ )?[Ii]ntensity )?(.*)$").Match(inputNames[col]).Groups[1].Value;
                columns.Add(values);
            }
            // average over columns if this option is selected
            if (param.GetParamWithSubParams <int>("Averaging mode").Value == 3)
            {
                double[] column = new double[mdata.RowCount];
                for (int row = 0; row < mdata.RowCount; row++)
                {
                    double[] values = new double[intensityCols.Length];
                    for (int col = 0; col < intensityCols.Length; col++)
                    {
                        values[col] = columns[col][row];
                    }
                    column[row] = ArrayUtils.Median(ExtractValidValues(values, false));
                }
                // delete the original list of columns
                columns = new List <double[]> {
                    column
                };
                sampleNames = new[] { "" };
            }
            // revert logarithm if necessary
            if (param.GetParamWithSubParams <bool>("Logarithmized").Value)
            {
                double[] logBases = new[] { 2, Math.E, 10 };
                double   logBase  =
                    logBases[param.GetParamWithSubParams <bool>("Logarithmized").GetSubParameters().GetParam <int>("log base").Value];
                foreach (double[] t in columns)
                {
                    for (int row = 0; row < mdata.RowCount; row++)
                    {
                        if (t[row] == 0)
                        {
                            processInfo.ErrString = "Are the columns really logarithmized?\nThey contain zeroes!";
                        }
                        t[row] = Math.Pow(logBase, t[row]);
                    }
                }
            }
            double[] mw = mdata.NumericColumns[param.GetParam <int>("Molecular masses").Value];
            // define whether the molecular masses are given in Da or kDa
            if (ArrayUtils.Median(mw) < 250)             // most likely kDa
            {
                for (int i = 0; i < mw.Length; i++)
                {
                    mw[i] *= 1000;
                }
            }
            double[] detectabilityNormFactor = mw;
            if (param.GetParamWithSubParams <bool>("Detectability correction").Value)
            {
                detectabilityNormFactor =
                    mdata.NumericColumns[
                        param.GetParamWithSubParams <bool>("Detectability correction").GetSubParameters().GetParam <int>("Correction factor")
                        .Value];
            }
            // the normalization factor needs to be nonzero for all proteins
            // check and replace with 1 for all relevant cases
            for (int row = 0; row < mdata.RowCount; row++)
            {
                if (detectabilityNormFactor[row] == 0 || double.IsNaN(detectabilityNormFactor[row]))
                {
                    detectabilityNormFactor[row] = 1;
                }
            }
            // detect the organism
            Organism organism = DetectOrganism(proteinIds);
            // c value the amount of DNA per haploid genome, see: http://en.wikipedia.org/wiki/C-value
            double cValue = organism.genomeSize * basePairWeight / avogadro;

            // find the histones
            int[] histoneRows = FindHistones(proteinIds, organism);
            // write a categorical column indicating the histones
            string[][] histoneCol = new string[mdata.RowCount][];
            for (int row = 0; row < mdata.RowCount; row++)
            {
                histoneCol[row] = ArrayUtils.Contains(histoneRows, row) ? new[] { "+" } : new string[0];
            }
            mdata.AddCategoryColumn("Histones", "", histoneCol);

            // initialize the variables for the annotation rows
            string[]   sampleNameRow     = new string[mdata.ColumnCount];
            string[]   inputNameRow      = new string[mdata.ColumnCount];
            double[]   totalProteinRow   = new double[mdata.ColumnCount];
            double[]   totalMoleculesRow = new double[mdata.ColumnCount];
            string[][] organismRow       = new string[mdata.ColumnCount][];
            // populate the organismRow variable with empty strings as defaults (not null, which may cause errors when writing the annotations in the end.)
            for (int i = 0; i < organismRow.Length; i++)
            {
                organismRow[i] = new[] { "N/A" };
            }
            double[] histoneMassRow       = new double[mdata.ColumnCount];
            double[] ploidyRow            = new double[mdata.ColumnCount];
            double[] cellVolumeRow        = new double[mdata.ColumnCount];
            double[] normalizationFactors = new double[columns.Count];
            // calculate normalization factors for each column
            for (int col = 0; col < columns.Count; col++)
            {
                string   sampleName = sampleNames[col];
                double[] column     = columns[col];
                // normalization factor to go from intensities to copies,
                // needs to be determined either using the total protein or the histone scaling approach
                double factor;
                switch (param.GetParamWithSubParams <int>("Scaling mode").Value)
                {
                case 0:                         // total protein amount
                    double mwWeightedNormalizedSummedIntensities = 0;
                    for (int row = 0; row < mdata.RowCount; row++)
                    {
                        if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row]))
                        {
                            mwWeightedNormalizedSummedIntensities += column[row] / detectabilityNormFactor[row] * mw[row];
                        }
                    }
                    factor =
                        param.GetParamWithSubParams <int>("Scaling mode").GetSubParameters().GetParam <double>(
                            "Protein amount per cell [pg]").Value *1e-12 * avogadro / mwWeightedNormalizedSummedIntensities;
                    break;

                case 1:                         // histone mode
                    double mwWeightedNormalizedSummedHistoneIntensities = 0;
                    foreach (int row in histoneRows)
                    {
                        if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row]))
                        {
                            mwWeightedNormalizedSummedHistoneIntensities += column[row] / detectabilityNormFactor[row] * mw[row];
                        }
                    }
                    double ploidy =
                        param.GetParamWithSubParams <int>("Scaling mode").GetSubParameters().GetParam <double>("Ploidy").Value;
                    factor = cValue * ploidy * avogadro / mwWeightedNormalizedSummedHistoneIntensities;
                    break;

                default:
                    factor = 1;
                    break;
                }
                normalizationFactors[col] = factor;
            }
            // check averaging mode
            if (param.GetParamWithSubParams <int>("Averaging mode").Value == 1)            // same factor for all
            {
                double factor = ArrayUtils.Mean(normalizationFactors);
                for (int i = 0; i < normalizationFactors.Length; i++)
                {
                    normalizationFactors[i] = factor;
                }
            }
            if (param.GetParamWithSubParams <int>("Averaging mode").Value == 2)            // same factor in each group
            {
                if (param.GetParamWithSubParams <int>("Averaging mode").GetSubParameters().GetParam <int>("Grouping").Value == -1)
                {
                    processInfo.ErrString = "No grouping selected.";
                    return;
                }
                string[][] groupNames =
                    mdata.GetCategoryRowAt(
                        param.GetParamWithSubParams <int>("Averaging mode").GetSubParameters().GetParam <int>("Grouping").Value);
                string[] uniqueGroupNames = Unique(groupNames);
                int[]    grouping         = new int[columns.Count];
                for (int i = 0; i < columns.Count; i++)
                {
                    if (intensityCols[i] >= mdata.ColumnCount)                      // Numeric annotation columns cannot be grouped
                    {
                        grouping[i] = i;
                        continue;
                    }
                    if (ArrayUtils.Contains(uniqueGroupNames, groupNames[i][0]))
                    {
                        grouping[i] = ArrayUtils.IndexOf(uniqueGroupNames, groupNames[i][0]);
                        continue;
                    }
                    grouping[i] = i;
                }
                Dictionary <int, List <double> > factors = new Dictionary <int, List <double> >();
                for (int i = 0; i < columns.Count; i++)
                {
                    if (factors.ContainsKey(grouping[i]))
                    {
                        factors[grouping[i]].Add(normalizationFactors[i]);
                    }
                    else
                    {
                        factors.Add(grouping[i], new List <double> {
                            normalizationFactors[i]
                        });
                    }
                }
                double[] averagedNormalizationFactors = new double[columns.Count];
                for (int i = 0; i < columns.Count; i++)
                {
                    List <double> factor;
                    factors.TryGetValue(grouping[i], out factor);
                    averagedNormalizationFactors[i] = ArrayUtils.Mean(factor);
                }
                normalizationFactors = averagedNormalizationFactors;
            }
            // loop over all selected columns and calculate copy numbers
            for (int col = 0; col < columns.Count; col++)
            {
                string   sampleName     = sampleNames[col];
                double[] column         = columns[col];
                double   factor         = normalizationFactors[col];
                double[] copyNumbers    = new double[mdata.RowCount];
                double[] concentrations = new double[mdata.RowCount];                 // femtoliters
                double[] massFraction   = new double[mdata.RowCount];
                double[] moleFraction   = new double[mdata.RowCount];
                double   totalProtein   = 0;            // picograms
                double   histoneMass    = 0;            // picograms
                double   totalMolecules = 0;
                for (int row = 0; row < mdata.RowCount; row++)
                {
                    if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row]))
                    {
                        copyNumbers[row] = column[row] / detectabilityNormFactor[row] * factor;
                        totalMolecules  += copyNumbers[row];
                        totalProtein    += copyNumbers[row] * mw[row] * 1e12 / avogadro;                // picograms
                        if (ArrayUtils.Contains(histoneRows, row))
                        {
                            histoneMass += copyNumbers[row] * mw[row] * 1e12 / avogadro;                       // picograms
                        }
                    }
                }
                double totalVolume = totalProtein / param.GetParam <double>("Total cellular protein concentration [g/l]").Value *
                                     1000;
                // femtoliters
                for (int row = 0; row < mdata.RowCount; row++)
                {
                    if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row]))
                    {
                        concentrations[row] = copyNumbers[row] / (totalVolume * 1e-15) / avogadro * 1e9;         // nanomolar
                        massFraction[row]   = copyNumbers[row] * mw[row] * 1e12 / avogadro / totalProtein * 1e6; // ppm
                        moleFraction[row]   = copyNumbers[row] / totalMolecules * 1e6;                           // ppm
                    }
                }
                string suffix = sampleName == "" ? "" : " " + sampleName;
                if (ArrayUtils.Contains(outputColumns, 0))
                {
                    mdata.AddNumericColumn("Copy number" + suffix, "", copyNumbers);
                }
                if (ArrayUtils.Contains(outputColumns, 1))
                {
                    mdata.AddNumericColumn("Concentration [nM]" + suffix, "", concentrations);
                }
                if (ArrayUtils.Contains(outputColumns, 2))
                {
                    mdata.AddNumericColumn("Abundance (mass/total mass) [*10^-6]" + suffix, "", massFraction);
                }
                if (ArrayUtils.Contains(outputColumns, 3))
                {
                    mdata.AddNumericColumn("Abundance (molecules/total molecules) [*10^-6]" + suffix, "", moleFraction);
                }
                double[] rank         = ArrayUtils.Rank(copyNumbers);
                double[] relativeRank = new double[mdata.RowCount];
                double   validRanks   = mdata.RowCount;
                for (int row = 0; row < mdata.RowCount; row++)
                {
                    // remove rank for protein with no copy number information
                    if (double.IsNaN(copyNumbers[row]) || copyNumbers[row] == 0)
                    {
                        rank[row] = double.NaN;
                        validRanks--;                         // do not consider as valid
                    }
                    // invert ranking, so that rank 0 is the most abundant protein
                    rank[row] = mdata.RowCount - rank[row];
                }
                for (int row = 0; row < mdata.RowCount; row++)
                {
                    relativeRank[row] = rank[row] / validRanks;
                }
                if (ArrayUtils.Contains(outputColumns, 4))
                {
                    mdata.AddNumericColumn("Copy number rank" + suffix, "", rank);
                }
                if (ArrayUtils.Contains(outputColumns, 5))
                {
                    mdata.AddNumericColumn("Relative copy number rank" + suffix, "", relativeRank);
                }
                if (intensityCols[col] < mdata.ColumnCount && param.GetParamWithSubParams <int>("Averaging mode").Value != 3)
                {
                    inputNameRow[intensityCols[col]]      = inputNames[col];
                    sampleNameRow[intensityCols[col]]     = sampleNames[col];
                    totalProteinRow[intensityCols[col]]   = Math.Round(totalProtein, 2);
                    totalMoleculesRow[intensityCols[col]] = Math.Round(totalMolecules, 0);
                    organismRow[intensityCols[col]]       = new[] { organism.name };
                    histoneMassRow[intensityCols[col]]    = Math.Round(histoneMass, 4);
                    ploidyRow[intensityCols[col]]         = Math.Round(histoneMass * 1e-12 / cValue, 2);
                    cellVolumeRow[intensityCols[col]]     = Math.Round(totalVolume, 2);                 // femtoliters
                }
            }

            // Summary annotation row
            if (param.GetParamWithSubParams <int>("Averaging mode").Value != 3 && ArrayUtils.Contains(outputColumns, 6))
            {
                mdata.AddNumericRow("Total protein [pg/cell]", "", totalProteinRow);
                mdata.AddNumericRow("Total molecules per cell", "", totalMoleculesRow);
                mdata.AddCategoryRow("Organism", "", organismRow);
                mdata.AddNumericRow("Histone mass [pg/cell]", "", histoneMassRow);
                mdata.AddNumericRow("Ploidy", "", ploidyRow);
                mdata.AddNumericRow("Cell volume [fl]", "", cellVolumeRow);
            }

            // Summary matrix
            if (param.GetParamWithSubParams <int>("Averaging mode").Value != 3 && ArrayUtils.Contains(outputColumns, 7))
            {
                supplTables = new IMatrixData[1];
                IMatrixData supplTab = PerseusFactory.CreateMatrixData();
                supplTab.ColumnNames = new List <string>();
                supplTab.Values.Init(totalProteinRow.Length, 0);
                supplTab.SetAnnotationColumns(new List <string> {
                    "Sample", "Input Column"
                },
                                              new List <string[]>()
                {
                    sampleNameRow, inputNameRow
                }, new List <string>()
                {
                    "Organism"
                },
                                              new List <string[][]>()
                {
                    organismRow
                },
                                              new List <string>()
                {
                    "Total protein [pg/cell]",
                    "Total molecules per cell",
                    "Histone mass [pg/cell]",
                    "Ploidy",
                    "Cell volume [fl]"
                },
                                              new List <double[]>()
                {
                    totalProteinRow, totalMoleculesRow, histoneMassRow, ploidyRow, cellVolumeRow
                },
                                              new List <string>(), new List <double[][]>());
                supplTables[0] = supplTab;
            }
        }
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            SingleChoiceWithSubParams xp = param.GetSingleChoiceWithSubParams("Expression column selection");
            bool groups = xp.Value == 2;
            string[] groupNames = null;
            int[][] colIndsGroups = null;
            if (groups){
                int groupRowInd = xp.GetSubParameters().GetSingleChoiceParam("Group").Value;
                string[][] groupCol = mdata.GetCategoryRowAt(groupRowInd);
                groupNames = ArrayUtils.UniqueValuesPreserveOrder(groupCol);
                colIndsGroups = PerseusPluginUtils.GetExpressionColIndices(groupCol, groupNames);
            }
            int[] useCols = xp.Value == 1
                ? xp.GetSubParameters().GetMultiChoiceParam("Columns").Value
                : ArrayUtils.ConsecutiveInts(mdata.ExpressionColumnCount);
            HashSet<int> w = ArrayUtils.ToHashSet(param.GetMultiChoiceParam("Calculate").Value);
            bool[] include = new bool[procs.Length];
            double[][] columns = new double[procs.Length][];
            double[][][] columnsG = null;
            if (groups){
                columnsG = new double[procs.Length][][];
                for (int i = 0; i < columnsG.Length; i++){
                    columnsG[i] = new double[groupNames.Length][];
                }
            }
            for (int i = 0; i < include.Length; i++){
                include[i] = w.Contains(i);
                if (include[i]){
                    columns[i] = new double[mdata.RowCount];
                    if (groups){
                        for (int j = 0; j < groupNames.Length; j++){
                            columnsG[i][j] = new double[mdata.RowCount];
                        }
                    }
                }
            }
            for (int i = 0; i < mdata.RowCount; i++){
                List<double> v = new List<double>();
                foreach (int j in useCols){
                    double x = mdata[i, j];
                    if (!double.IsNaN(x) && !double.IsInfinity(x)){
                        v.Add(x);
                    }
                }
                for (int j = 0; j < include.Length; j++){
                    if (include[j]){
                        columns[j][i] = procs[j].Item2(v);
                    }
                }
                if (groups){
                    List<double>[] vg = new List<double>[groupNames.Length];
                    for (int j = 0; j < colIndsGroups.Length; j++){
                        vg[j] = new List<double>();
                        for (int k = 0; k < colIndsGroups[j].Length; k++){
                            double x = mdata[i, colIndsGroups[j][k]];
                            if (!double.IsNaN(x) && !double.IsInfinity(x)){
                                vg[j].Add(x);
                            }
                        }
                    }
                    for (int j = 0; j < include.Length; j++){
                        if (include[j]){
                            for (int k = 0; k < groupNames.Length; k++){
                                columnsG[j][k][i] = procs[j].Item2(vg[k]);
                            }
                        }
                    }
                }
            }
            for (int i = 0; i < include.Length; i++){
                if (include[i]){
                    mdata.AddNumericColumn(procs[i].Item1, procs[i].Item3, columns[i]);
                    if (groups){
                        for (int k = 0; k < groupNames.Length; k++){
                            mdata.AddNumericColumn(procs[i].Item1 + " " + groupNames[k], procs[i].Item3, columnsG[i][k]);
                        }
                    }
                }
            }
        }
Ejemplo n.º 23
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            int[] outputColumns = param.GetMultiChoiceParam("Output").Value;
            int proteinIdColumnInd = param.GetSingleChoiceParam("Protein IDs").Value;
            string[] proteinIds = mdata.StringColumns[proteinIdColumnInd];
            int[] intensityCols = param.GetMultiChoiceParam("Intensities").Value;
            if (intensityCols.Length == 0){
                processInfo.ErrString = "Please select at least one column containing protein intensities.";
                return;
            }
            // variable to hold all intensity values
            List<double[]> columns = new List<double[]>();
            string[] sampleNames = new string[intensityCols.Length];
            for (int col = 0; col < intensityCols.Length; col++){
                double[] values;
                if (intensityCols[col] < mdata.ExpressionColumnCount){
                    values = ArrayUtils.ToDoubles(mdata.GetExpressionColumn(intensityCols[col]));
                    sampleNames[col] = mdata.ExpressionColumnNames[intensityCols[col]];
                } else{
                    values = mdata.NumericColumns[intensityCols[col] - mdata.ExpressionColumnCount];
                    sampleNames[col] = mdata.NumericColumnNames[intensityCols[col] - mdata.ExpressionColumnCount];
                }
                sampleNames[col] = new Regex(@"^(?:(?:LFQ )?[Ii]ntensity )?(.*)$").Match(sampleNames[col]).Groups[1].Value;
                columns.Add(values);
            }
            // average over columns if this option is selected
            if (param.GetSingleChoiceWithSubParams("Averaging mode").Value == 3){
                double[] column = new double[mdata.RowCount];
                for (int row = 0; row < mdata.RowCount; row++){
                    double[] values = new double[intensityCols.Length];
                    for (int col = 0; col < intensityCols.Length; col++){
                        values[col] = columns[col][row];
                    }
                    column[row] = ArrayUtils.Median(ExtractValidValues(values, false));
                }
                // delete the original list of columns
                columns = new List<double[]>{column};
                sampleNames = new[]{""};
            }
            // revert logarithm if necessary
            if (param.GetBoolWithSubParams("Logarithmized").Value){
                double[] logBases = new[]{2, Math.E, 10};
                double logBase =
                    logBases[param.GetBoolWithSubParams("Logarithmized").GetSubParameters().GetSingleChoiceParam("log base").Value];
                foreach (double[] t in columns){
                    for (int row = 0; row < mdata.RowCount; row++){
                        if (t[row] == 0){
                            processInfo.ErrString = "Are the columns really logarithmized?\nThey contain zeroes!";
                        }
                        t[row] = Math.Pow(logBase, t[row]);
                    }
                }
            }
            double[] mw = mdata.NumericColumns[param.GetSingleChoiceParam("Molecular masses").Value];
            // detect whether the molecular masses are given in Da or kDa
            if (ArrayUtils.Median(mw) < 250) // likely kDa
            {
                for (int i = 0; i < mw.Length; i++){
                    mw[i] *= 1000;
                }
            }
            double[] detectabilityNormFactor = mw;
            if (param.GetBoolWithSubParams("Detectability correction").Value){
                detectabilityNormFactor =
                    mdata.NumericColumns[
                        param.GetBoolWithSubParams("Detectability correction")
                             .GetSubParameters()
                             .GetSingleChoiceParam("Correction factor")
                             .Value];
            }
            // the normalization factor needs to be nonzero for all proteins
            // check and replace with 1 for all relevant cases
            for (int row = 0; row < mdata.RowCount; row++){
                if (detectabilityNormFactor[row] == 0 || detectabilityNormFactor[row] == double.NaN){
                    detectabilityNormFactor[row] = 1;
                }
            }
            // detect the organism
            Organism organism = DetectOrganism(proteinIds);
            // c value the amount of DNA per cell, see: http://en.wikipedia.org/wiki/C-value
            double cValue = (organism.genomeSize*basePairWeight)/avogadro;
            // find the histones
            int[] histoneRows = FindHistones(proteinIds, organism);
            // write a categorical column indicating the histones
            string[][] histoneCol = new string[mdata.RowCount][];
            for (int row = 0; row < mdata.RowCount; row++){
                histoneCol[row] = (ArrayUtils.Contains(histoneRows, row)) ? new[]{"+"} : new[]{""};
            }
            mdata.AddCategoryColumn("Histones", "", histoneCol);
            // initialize the variables for the annotation rows
            double[] totalProteinRow = new double[mdata.ExpressionColumnCount];
            double[] totalMoleculesRow = new double[mdata.ExpressionColumnCount];
            string[][] organismRow = new string[mdata.ExpressionColumnCount][];
            double[] histoneMassRow = new double[mdata.ExpressionColumnCount];
            double[] ploidyRow = new double[mdata.ExpressionColumnCount];
            double[] cellVolumeRow = new double[mdata.ExpressionColumnCount];
            double[] normalizationFactors = new double[columns.Count];
            // calculate normalization factors for each column
            for (int col = 0; col < columns.Count; col++){
                string sampleName = sampleNames[col];
                double[] column = columns[col];
                // normalization factor to go from intensities to copies,
                // needs to be determined either using the total protein or the histone scaling approach
                double factor;
                switch (param.GetSingleChoiceWithSubParams("Scaling mode").Value){
                    case 0: // total protein amount
                        double mwWeightedNormalizedSummedIntensities = 0;
                        for (int row = 0; row < mdata.RowCount; row++){
                            if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row])){
                                mwWeightedNormalizedSummedIntensities += (column[row]/detectabilityNormFactor[row])*mw[row];
                            }
                        }
                        factor =
                            (param.GetSingleChoiceWithSubParams("Scaling mode")
                                  .GetSubParameters()
                                  .GetDoubleParam("Protein amount per cell [pg]")
                                  .Value*1e-12*avogadro)/mwWeightedNormalizedSummedIntensities;
                        break;
                    case 1: // histone mode
                        double mwWeightedNormalizedSummedHistoneIntensities = 0;
                        foreach (int row in histoneRows){
                            if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row])){
                                mwWeightedNormalizedSummedHistoneIntensities += (column[row]/detectabilityNormFactor[row])*mw[row];
                            }
                        }
                        double ploidy =
                            param.GetSingleChoiceWithSubParams("Scaling mode").GetSubParameters().GetDoubleParam("Ploidy").Value;
                        factor = (cValue*ploidy*avogadro)/mwWeightedNormalizedSummedHistoneIntensities;
                        break;
                    default:
                        factor = 1;
                        break;
                }
                normalizationFactors[col] = factor;
            }
            // check averaging mode
            if (param.GetSingleChoiceWithSubParams("Averaging mode").Value == 1) // same factor for all
            {
                double factor = ArrayUtils.Mean(normalizationFactors);
                for (int i = 0; i < normalizationFactors.Length; i++){
                    normalizationFactors[i] = factor;
                }
            }
            if (param.GetSingleChoiceWithSubParams("Averaging mode").Value == 2) // same factor in each group
            {
                if (
                    param.GetSingleChoiceWithSubParams("Averaging mode").GetSubParameters().GetSingleChoiceParam("Grouping").Value ==
                        -1){
                    processInfo.ErrString = "No grouping selected.";
                    return;
                }
                string[][] groupNames =
                    mdata.GetCategoryRowAt(
                        param.GetSingleChoiceWithSubParams("Averaging mode").GetSubParameters().GetSingleChoiceParam("Grouping").Value);
                string[] uniqueGroupNames = Unique(groupNames);
                int[] grouping = new int[columns.Count];
                for (int i = 0; i < columns.Count; i++){
                    if (intensityCols[i] >= mdata.ExpressionColumnCount){ // Numeric annotation columns cannot be grouped
                        grouping[i] = i;
                        continue;
                    }
                    if (ArrayUtils.Contains(uniqueGroupNames, groupNames[i][0])){
                        grouping[i] = ArrayUtils.IndexOf(uniqueGroupNames, groupNames[i][0]);
                        continue;
                    }
                    grouping[i] = i;
                }
                Dictionary<int, List<double>> factors = new Dictionary<int, List<double>>();
                for (int i = 0; i < columns.Count; i++){
                    if (factors.ContainsKey(grouping[i])){
                        factors[grouping[i]].Add(normalizationFactors[i]);
                    } else{
                        factors.Add(grouping[i], new List<double>{normalizationFactors[i]});
                    }
                }
                double[] averagedNormalizationFactors = new double[columns.Count];
                for (int i = 0; i < columns.Count; i++){
                    List<double> factor;
                    factors.TryGetValue(grouping[i], out factor);
                    averagedNormalizationFactors[i] = ArrayUtils.Mean(factor);
                }
                normalizationFactors = averagedNormalizationFactors;
            }
            // loop over all selected columns and calculate copy numbers
            for (int col = 0; col < columns.Count; col++){
                string sampleName = sampleNames[col];
                double[] column = columns[col];
                double factor = normalizationFactors[col];
                double[] copyNumbers = new double[mdata.RowCount];
                double[] concentrations = new double[mdata.RowCount]; // femtoliters
                double[] massFraction = new double[mdata.RowCount];
                double[] moleFraction = new double[mdata.RowCount];
                double totalProtein = 0; // picograms
                double histoneMass = 0; // picograms
                double totalMolecules = 0;
                for (int row = 0; row < mdata.RowCount; row++){
                    if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row])){
                        copyNumbers[row] = (column[row]/detectabilityNormFactor[row])*factor;
                        totalMolecules += copyNumbers[row];
                        totalProtein += (copyNumbers[row]*mw[row]*1e12)/avogadro; // picograms
                        if (ArrayUtils.Contains(histoneRows, row)){
                            histoneMass += (copyNumbers[row]*mw[row]*1e12)/avogadro; // picograms
                        }
                    }
                }
                double totalVolume = (totalProtein/(param.GetDoubleParam("Total cellular protein concentration [g/l]").Value))*1000;
                // femtoliters
                for (int row = 0; row < mdata.RowCount; row++){
                    if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row])){
                        concentrations[row] = ((copyNumbers[row]/(totalVolume*1e-15))/avogadro)*1e9; // nanomolar
                        massFraction[row] = (((copyNumbers[row]*mw[row]*1e12)/avogadro)/totalProtein)*1e6; // ppm
                        moleFraction[row] = (copyNumbers[row]/totalMolecules)*1e6; // ppm
                    }
                }
                string suffix = (sampleName == "") ? "" : " " + sampleName;
                if (ArrayUtils.Contains(outputColumns, 0)){
                    mdata.AddNumericColumn("Copy number" + suffix, "", copyNumbers);
                }
                if (ArrayUtils.Contains(outputColumns, 1)){
                    mdata.AddNumericColumn("Concentration [nM]" + suffix, "", concentrations);
                }
                if (ArrayUtils.Contains(outputColumns, 2)){
                    mdata.AddNumericColumn("Abundance (mass/total mass) [*10^-6]" + suffix, "", massFraction);
                }
                if (ArrayUtils.Contains(outputColumns, 3)){
                    mdata.AddNumericColumn("Abundance (molecules/total molecules) [*10^-6]" + suffix, "", moleFraction);
                }
                double[] rank = ArrayUtils.Rank(copyNumbers);
                double[] relativeRank = new double[mdata.RowCount];
                double validRanks = mdata.RowCount;
                for (int row = 0; row < mdata.RowCount; row++){
                    // remove rank for protein with no copy number information
                    if (double.IsNaN((copyNumbers[row])) || copyNumbers[row] == 0){
                        rank[row] = double.NaN;
                        validRanks--; // do not consider as valid
                    }
                    // invert ranking, so that rank 0 is the most abundant protein
                    rank[row] = mdata.RowCount - rank[row];
                }
                for (int row = 0; row < mdata.RowCount; row++){
                    relativeRank[row] = rank[row]/validRanks;
                }
                if (ArrayUtils.Contains(outputColumns, 4)){
                    mdata.AddNumericColumn("Copy number rank" + suffix, "", rank);
                }
                if (ArrayUtils.Contains(outputColumns, 5)){
                    mdata.AddNumericColumn("Relative copy number rank" + suffix, "", relativeRank);
                }
                if (intensityCols[col] < mdata.ExpressionColumnCount &&
                    param.GetSingleChoiceWithSubParams("Averaging mode").Value != 3){
                    totalProteinRow[intensityCols[col]] = Math.Round(totalProtein, 2);
                    totalMoleculesRow[intensityCols[col]] = Math.Round(totalMolecules, 0);
                    organismRow[intensityCols[col]] = new string[]{organism.name};
                    histoneMassRow[intensityCols[col]] = Math.Round(histoneMass, 4);
                    ploidyRow[intensityCols[col]] = Math.Round((histoneMass*1e-12)/cValue, 2);
                    cellVolumeRow[intensityCols[col]] = Math.Round(totalVolume, 2); // femtoliters
                }
            }
            if (param.GetSingleChoiceWithSubParams("Averaging mode").Value != 3 && ArrayUtils.Contains(outputColumns, 6)){
                mdata.AddNumericRow("Total protein [pg/cell]", "", totalProteinRow);
                mdata.AddNumericRow("Total molecules per cell", "", totalMoleculesRow);
                mdata.AddCategoryRow("Organism", "", organismRow);
                mdata.AddNumericRow("Histone mass [pg/cell]", "", histoneMassRow);
                mdata.AddNumericRow("Ploidy", "", ploidyRow);
                mdata.AddNumericRow("Cell volume [fl]", "", cellVolumeRow);
            }
        }
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            int[] colIndx = param.GetParam<int[]>("x").Value;
            int[] colIndy = param.GetParam<int[]>("y").Value;
            if (colIndx.Length == 0){
                processInfo.ErrString = "Please select some columns";
                return;
            }
            if (colIndx.Length != colIndy.Length){
                processInfo.ErrString = "Please select the same number of columns in the boxes for the first and second columns.";
                return;
            }
            int typeInd = param.GetParam<int>("Distribution type").Value;
            int points = param.GetParam<int>("Number of points").Value;
            for (int k = 0; k < colIndx.Length; k++){
                float[] xvals = GetColumn(mdata, colIndx[k]);
                float[] yvals = GetColumn(mdata, colIndy[k]);
                float[] xvals1;
                float[] yvals1;
                GetValidPairs(xvals, yvals, out xvals1, out yvals1);
                double xmin;
                double xmax;
                double ymin;
                double ymax;
                DensityEstimation.CalcRanges(xvals1, yvals1, out xmin, out xmax, out ymin, out ymax);
                float[,] values = DensityEstimation.GetValuesOnGrid(xvals1, xmin, (xmax - xmin)/points, points, yvals1, ymin,
                    (ymax - ymin)/points, points);
                if (typeInd == 1){
                    MakeConditional1(values);
                }
                if (typeInd == 2){
                    MakeConditional2(values);
                }
                if (typeInd == 3){
                    MakeConditional3(values);
                }
                DensityEstimation.DivideByMaximum(values);
                double[] xmat = new double[points];
                for (int i = 0; i < points; i++){
                    xmat[i] = xmin + i*(xmax - xmin)/points;
                }
                double[] ymat = new double[points];
                for (int i = 0; i < points; i++){
                    ymat[i] = ymin + i*(ymax - ymin)/points;
                }
                float[,] percvalues = CalcExcludedPercentage(values);
                double[] dvals = new double[xvals.Length];
                double[] pvals = new double[xvals.Length];
                for (int i = 0; i < dvals.Length; i++){
                    double xx = xvals[i];
                    double yy = yvals[i];
                    if (!double.IsNaN(xx) && !double.IsNaN(yy)){
                        int xind = ArrayUtils.ClosestIndex(xmat, xx);
                        int yind = ArrayUtils.ClosestIndex(ymat, yy);
                        dvals[i] = values[xind, yind];
                        pvals[i] = percvalues[xind, yind];
                    } else{
                        dvals[i] = double.NaN;
                        pvals[i] = double.NaN;
                    }
                }
                string xname = GetColumnName(mdata, colIndx[k]);
                string yname = GetColumnName(mdata, colIndy[k]);
                mdata.AddNumericColumn("Density_" + xname + "_" + yname,
                    "Density of data points in the plane spanned by the columns " + xname + " and " + yname + ".", dvals);
                mdata.AddNumericColumn("Excluded fraction_" + xname + "_" + yname,
                    "Percentage of points with a point density smaller than at this point in the plane spanned by the columns " + xname +
                    " and " + yname + ".", pvals);
            }
        }
Ejemplo n.º 25
0
 private static void ExpressionToNumeric(IList<int> colInds, IMatrixData mdata)
 {
     int[] remainingInds = ArrayUtils.Complement(colInds, mdata.NumericColumnCount);
     foreach (int colInd in colInds){
         double[] d = ArrayUtils.ToDoubles(mdata.GetExpressionColumn(colInd));
         mdata.AddNumericColumn(mdata.ExpressionColumnNames[colInd], mdata.ExpressionColumnDescriptions[colInd], d);
     }
     mdata.ExtractExpressionColumns(remainingInds);
 }
 public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ProcessInfo processInfo)
 {
     int[] colIndx = param.GetMultiChoiceParam("Column 1").Value;
     int[] colIndy = param.GetMultiChoiceParam("Column 2").Value;
     if (colIndx.Length == 0){
         processInfo.ErrString = "Please select some columns";
         return;
     }
     if (colIndx.Length != colIndy.Length){
         processInfo.ErrString = "Please select the same number of columns in the boxes for the first and second columns.";
         return;
     }
     int points = param.GetIntParam("Number of points").Value;
     for (int k = 0; k < colIndx.Length; k++){
         float[] xvals = GetColumn(mdata, colIndx[k]);
         float[] yvals = GetColumn(mdata, colIndy[k]);
         float[] xvals1;
         float[] yvals1;
         NumUtils.GetValidPairs(xvals, yvals, out xvals1, out yvals1);
         double xmin;
         double xmax;
         double ymin;
         double ymax;
         DensityEstimation.CalcRanges(xvals1, yvals1, out xmin, out xmax, out ymin, out ymax);
         float[,] values = DensityEstimation.GetValuesOnGrid(xvals1, xmin, (xmax - xmin) / points, points, yvals1, ymin,
             (ymax - ymin)/points, points);
         DensityEstimation.DivideByMaximum(values);
         //if (modeInd == 1){
         //	values = InvertRows(values);
         //	List<string> colNames = new List<string>();
         //	for (int i = 0; i < values.GetLength(1); i++){
         //		colNames.Add("" + i);
         //	}
         //	mdata.SetData(mdata.Name, colNames, values, new List<string>(), new List<string[]>(), new List<string>(),
         //		new List<string[][]>(), new List<string>(), new List<double[]>(), new List<string>(), new List<double[][]>());
         //	return;
         //}
         double[] xmat = new double[points];
         for (int i = 0; i < points; i++){
             xmat[i] = xmin + i*(xmax - xmin)/points;
         }
         double[] ymat = new double[points];
         for (int i = 0; i < points; i++){
             ymat[i] = ymin + i*(ymax - ymin)/points;
         }
         float[,] percvalues = CalcExcludedPercentage(values);
         double[] dvals = new double[xvals.Length];
         double[] pvals = new double[xvals.Length];
         for (int i = 0; i < dvals.Length; i++){
             double xx = xvals[i];
             double yy = yvals[i];
             if (!double.IsNaN(xx) && !double.IsNaN(yy)){
                 int xind = ArrayUtils.ClosestIndex(xmat, xx);
                 int yind = ArrayUtils.ClosestIndex(ymat, yy);
                 dvals[i] = values[xind, yind];
                 pvals[i] = percvalues[xind, yind];
             } else{
                 dvals[i] = double.NaN;
                 pvals[i] = double.NaN;
             }
         }
         string xname = GetColumnName(mdata, colIndx[k]);
         string yname = GetColumnName(mdata, colIndy[k]);
         mdata.AddNumericColumn("Density_" + xname + "_" + yname,
             "Density of data points in the plane spanned by the columns " + xname + " and " + yname + ".", dvals);
         mdata.AddNumericColumn("Excluded fraction_" + xname + "_" + yname,
             "Percentage of points with a point density smaller than at this point in the plane spanned by the columns " + xname +
                 " and " + yname + ".", pvals);
     }
 }
        public void ProcessData(IMatrixData mdata, Parameters param1, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            int[] cols = param1.GetMultiChoiceParam("Columns").Value;
            int[] ops = param1.GetMultiChoiceParam("Operation").Value;
            foreach (int t in ops){
                double[][] vals = new double[cols.Length][];
                for (int i = 0; i < cols.Length; i++){
                    double[][] x = mdata.MultiNumericColumns[cols[i]];
                    vals[i] = new double[x.Length];
                    for (int j = 0; j < vals[i].Length; j++){
                        vals[i][j] = operations[t](x[j]);
                    }
                }
                for (int i = 0; i < cols.Length; i++){
                    mdata.AddNumericColumn(mdata.MultiNumericColumnNames[cols[i]] + "_" + names[t], "", vals[i]);
                }
            }
        }
Ejemplo n.º 28
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            int[] rcols = param.GetMultiChoiceParam("Ratio columns").Value;
            int[] icols = param.GetMultiChoiceParam("Intensity columns").Value;
            if (rcols.Length == 0){
                processInfo.ErrString = "Please specify some ratio columns.";
                return;
            }
            if (rcols.Length != icols.Length){
                processInfo.ErrString = "The number of ratio and intensity columns have to be equal.";
                return;
            }
            int truncIndex = param.GetSingleChoiceParam("Use for truncation").Value;
            TestTruncation truncation = truncIndex == 0
                ? TestTruncation.Pvalue : (truncIndex == 1 ? TestTruncation.BenjaminiHochberg : TestTruncation.PermutationBased);
            double threshold = param.GetDoubleParam("Threshold value").Value;
            int sideInd = param.GetSingleChoiceParam("Side").Value;
            TestSide side;
            switch (sideInd){
                case 0:
                    side = TestSide.Both;
                    break;
                case 1:
                    side = TestSide.Left;
                    break;
                case 2:
                    side = TestSide.Right;
                    break;
                default:
                    throw new Exception("Never get here.");
            }
            for (int i = 0; i < rcols.Length; i++){
                float[] r = mdata.GetExpressionColumn(rcols[i]);
                float[] intens = icols[i] < mdata.ExpressionColumnCount
                    ? mdata.GetExpressionColumn(icols[i])
                    : ArrayUtils.ToFloats(mdata.NumericColumns[icols[i] - mdata.ExpressionColumnCount]);
                double[] pvals = CalcSignificanceB(r, intens, side);
                string[][] fdr;
                switch (truncation){
                    case TestTruncation.Pvalue:
                        fdr = PerseusPluginUtils.CalcPvalueSignificance(pvals, threshold);
                        break;
                    case TestTruncation.BenjaminiHochberg:
                        fdr = PerseusPluginUtils.CalcBenjaminiHochbergFdr(pvals, threshold);
                        break;
                    default:
                        throw new Exception("Never get here.");
                }
                mdata.AddNumericColumn(mdata.ExpressionColumnNames[rcols[i]] + " Significance B", "", pvals);
                mdata.AddCategoryColumn(mdata.ExpressionColumnNames[rcols[i]] + " B significant", "", fdr);
            }
        }
Ejemplo n.º 29
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            int proteinIdColumnInd = param.GetParam <int>("Protein IDs").Value;

            string[][]    proteinIds = new string[mdata.RowCount][];
            string[][]    leadingIds = new string[mdata.RowCount][];
            List <string> allIds     = new List <string>();

            for (int row = 0; row < mdata.RowCount; row++)
            {
                proteinIds[row] = mdata.StringColumns[proteinIdColumnInd][row].Split(';');
                leadingIds[row] = new[] { proteinIds[row][0] };
                allIds.AddRange(proteinIds[row]);
            }
            string fastaFilePath = param.GetParam <string>("Fasta file").Value;
            Fasta  fasta         = new Fasta();

            fasta.ParseFile(fastaFilePath, processInfo);
            // Text annotations
            processInfo.Status("Adding fasta header annotations.");
            int[] selection =
                param.GetParamWithSubParams <int>("Fasta header annotations").GetSubParameters().GetParam <int[]>("Annotations").Value;
            string[][] idsToBeAnnotated = param.GetParamWithSubParams <int>("Fasta header annotations").Value == 0
                                ? proteinIds
                                : leadingIds;
            ProteinSequence[][] fastaEntries = new ProteinSequence[mdata.RowCount][];
            for (int row = 0; row < mdata.RowCount; row++)
            {
                List <ProteinSequence> rowEntries = new List <ProteinSequence>();
                foreach (string id in idsToBeAnnotated[row])
                {
                    ProteinSequence entry = fasta.GetEntry(id);
                    if (entry == null)
                    {
                        continue;
                    }
                    rowEntries.Add(entry);
                }
                fastaEntries[row] = rowEntries.ToArray();
            }
            if (ArrayUtils.Contains(selection, 0))              // Entry name
            {
                string[] annotationColumn = new string[mdata.RowCount];
                for (int row = 0; row < mdata.RowCount; row++)
                {
                    List <string> rowAnnotations = new List <string>();
                    foreach (ProteinSequence entry in fastaEntries[row])
                    {
                        string entryName = entry.EntryName;
                        if (entryName != null && !ArrayUtils.Contains(rowAnnotations, entryName))
                        {
                            rowAnnotations.Add(entryName);
                        }
                    }
                    annotationColumn[row] = string.Join(";", rowAnnotations.ToArray());
                }
                mdata.AddStringColumn("Entry name", "", annotationColumn);
            }
            if (ArrayUtils.Contains(selection, 1))              // Gene name
            {
                string[] annotationColumn = new string[mdata.RowCount];
                for (int row = 0; row < mdata.RowCount; row++)
                {
                    List <string> rowAnnotations = new List <string>();
                    foreach (ProteinSequence entry in fastaEntries[row])
                    {
                        string geneName = entry.GeneName;
                        if (geneName != null && !ArrayUtils.Contains(rowAnnotations, geneName))
                        {
                            rowAnnotations.Add(geneName);
                        }
                    }
                    annotationColumn[row] = string.Join(";", rowAnnotations.ToArray());
                }
                mdata.AddStringColumn("Gene name", "", annotationColumn);
            }
            if (ArrayUtils.Contains(selection, 2))
            {
                // Verbose protein name, i.e. all protein names annotated in all fasta headers, including the
                //'Isoform x of...' prefixes and '(Fragment)' suffixes
                string[] annotationColumn = new string[mdata.RowCount];
                for (int row = 0; row < mdata.RowCount; row++)
                {
                    List <string> rowAnnotations = new List <string>();
                    foreach (ProteinSequence entry in fastaEntries[row])
                    {
                        string proteinName = entry.ProteinName;
                        if (proteinName != null && !ArrayUtils.Contains(rowAnnotations, proteinName))
                        {
                            rowAnnotations.Add(proteinName);
                        }
                    }
                    annotationColumn[row] = string.Join(";", rowAnnotations.ToArray());
                }
                mdata.AddStringColumn("Protein name (verbose)", "", annotationColumn);
            }
            if (ArrayUtils.Contains(selection, 3))              // Consensus protein name
            {
                string[] annotationColumn = new string[mdata.RowCount];
                for (int row = 0; row < mdata.RowCount; row++)
                {
                    List <string> rowAnnotations = new List <string>();
                    foreach (ProteinSequence entry in fastaEntries[row])
                    {
                        string proteinName = entry.ConsensusProteinName;
                        if (proteinName != null && !ArrayUtils.Contains(rowAnnotations, proteinName))
                        {
                            rowAnnotations.Add(proteinName);
                        }
                    }
                    annotationColumn[row] = String.Join(";", rowAnnotations.ToArray());
                }
                mdata.AddStringColumn("Protein name", "", annotationColumn);
            }
            if (ArrayUtils.Contains(selection, 4))              // Species
            {
                string[] annotationColumn = new string[mdata.RowCount];
                for (int row = 0; row < mdata.RowCount; row++)
                {
                    List <string> rowAnnotations = new List <string>();
                    foreach (ProteinSequence entry in fastaEntries[row])
                    {
                        string speciesName = entry.Species;
                        if (speciesName != null && !ArrayUtils.Contains(rowAnnotations, speciesName))
                        {
                            rowAnnotations.Add(speciesName);
                        }
                    }
                    annotationColumn[row] = String.Join(";", rowAnnotations.ToArray());
                }
                mdata.AddStringColumn("Species", "", annotationColumn);
            }
            // Numeric annotations
            processInfo.Status("Adding numeric annotations.");
            selection =
                param.GetParamWithSubParams <int>("Numeric annotations").GetSubParameters().GetParam <int[]>("Annotations").Value;
            bool annotateLeadingId = param.GetParamWithSubParams <int>("Numeric annotations").Value == 1;

            if (ArrayUtils.Contains(selection, 0))              // Sequence length
            {
                double[] annotationColumn = new double[mdata.RowCount];
                for (int row = 0; row < mdata.RowCount; row++)
                {
                    List <double> rowAnnotations = new List <double>();
                    foreach (ProteinSequence entry in fastaEntries[row])
                    {
                        double sequenceLength = entry.GetSequence().Length;
                        rowAnnotations.Add(sequenceLength);
                        if (annotateLeadingId && rowAnnotations.Count > 0)
                        {
                            break;
                        }
                    }
                    annotationColumn[row] = ArrayUtils.Median(rowAnnotations.ToArray());
                }
                mdata.AddNumericColumn("Sequence length", "", annotationColumn);
            }
            if (ArrayUtils.Contains(selection, 1))              // Monoisotopic molecular mass
            {
                double[] annotationColumn = new double[mdata.RowCount];
                for (int row = 0; row < mdata.RowCount; row++)
                {
                    List <double> rowAnnotations = new List <double>();
                    foreach (ProteinSequence entry in fastaEntries[row])
                    {
                        double monoisotopicMass = entry.GetMonoisotopicMolecularMass();
                        rowAnnotations.Add(monoisotopicMass);
                        if (annotateLeadingId && rowAnnotations.Count > 0)
                        {
                            break;
                        }
                    }
                    annotationColumn[row] = ArrayUtils.Median(rowAnnotations.ToArray());
                }
                mdata.AddNumericColumn("Monoisotopic molecular mass", "", annotationColumn);
            }
            if (ArrayUtils.Contains(selection, 2))              // Average molecular mass
            {
                double[] annotationColumn = new double[mdata.RowCount];
                for (int row = 0; row < mdata.RowCount; row++)
                {
                    List <double> rowAnnotations = new List <double>();
                    foreach (ProteinSequence entry in fastaEntries[row])
                    {
                        double averageMass = entry.GetAverageMolecularMass();
                        rowAnnotations.Add(averageMass);
                        if (annotateLeadingId && rowAnnotations.Count > 0)
                        {
                            break;
                        }
                    }
                    annotationColumn[row] = ArrayUtils.Median(rowAnnotations.ToArray());
                }
                mdata.AddNumericColumn("Average molecular mass", "", annotationColumn);
            }
            // Theoretical peptides
            processInfo.Status("Calculating theoretical peptides.");
            annotateLeadingId = param.GetParamWithSubParams <int>("Calculate theoretical peptides").Value == 1;
            Protease[] proteases = ArrayUtils.SubArray(Constants.defaultProteases,
                                                       param.GetParamWithSubParams <int>("Calculate theoretical peptides").GetSubParameters().GetParam <int[]>("Proteases")
                                                       .Value);
            double minLength =
                param.GetParamWithSubParams <int>("Calculate theoretical peptides").GetSubParameters().GetParam <double>(
                    "Min. peptide length").Value;
            double maxLength =
                param.GetParamWithSubParams <int>("Calculate theoretical peptides").GetSubParameters().GetParam <double>(
                    "Max. peptide length").Value;
            bool displayPeptideSequences = annotateLeadingId &&
                                           param.GetParamWithSubParams <int>("Calculate theoretical peptides").GetSubParameters().GetParam <bool>(
                "Show sequences").Value;

            foreach (Protease protease in proteases)
            {
                double[] annotationColumn = new double[mdata.RowCount];
                string[] peptideColumn    = new string[mdata.RowCount];
                for (int row = 0; row < mdata.RowCount; row++)
                {
                    List <double> rowAnnotations = new List <double>();
                    List <string> rowPeptides    = new List <string>();
                    foreach (ProteinSequence entry in fastaEntries[row])
                    {
                        double nTheoreticalPeptides = entry.GetNumberOfTheoreticalPeptides(protease, (int)minLength, (int)maxLength);
                        rowAnnotations.Add(nTheoreticalPeptides);
                        if (displayPeptideSequences)
                        {
                            rowPeptides.AddRange(entry.GetTheoreticalPeptideSequences(protease, (int)minLength, (int)maxLength));
                        }
                        if (annotateLeadingId && rowAnnotations.Count > 0)
                        {
                            break;
                        }
                    }
                    annotationColumn[row] = ArrayUtils.Median(rowAnnotations.ToArray());
                    peptideColumn[row]    = String.Join(";", rowPeptides);
                }
                mdata.AddNumericColumn(
                    "Number of theoretical peptides (" + protease.name + ", " + minLength + "-" + maxLength + ")", "", annotationColumn);
                if (displayPeptideSequences)
                {
                    mdata.AddStringColumn(
                        "Theoretical peptide sequences (" + protease.name + ", " + minLength + "-" + maxLength + ")", "", peptideColumn);
                }
            }
            // Sequence features
            processInfo.Status("Counting sequence features.");
            annotateLeadingId = param.GetParamWithSubParams <int>("Count sequence features").Value == 1;
            bool normalizeBySequenceLength =
                param.GetParamWithSubParams <int>("Count sequence features").GetSubParameters().GetParam <bool>(
                    "Normalize by sequence length").Value;

            if (param.GetParamWithSubParams <int>("Count sequence features").GetSubParameters().GetParam <string>("Regex").Value !=
                "")
            {
                Regex regex;
                try{
                    regex =
                        new Regex(
                            param.GetParamWithSubParams <int>("Count sequence features").GetSubParameters().GetParam <string>("Regex").Value);
                } catch (ArgumentException) {
                    processInfo.ErrString = "The regular expression you provided has invalid syntax.";
                    return;
                }
                double[] sequenceFeatureColumn = new double[mdata.RowCount];
                for (int row = 0; row < mdata.RowCount; row++)
                {
                    List <double> featureCount = new List <double>();
                    foreach (ProteinSequence entry in fastaEntries[row])
                    {
                        double nFeatures = regex.Matches(entry.GetSequence()).Count;
                        featureCount.Add(normalizeBySequenceLength ? nFeatures / entry.GetLength() : nFeatures);
                        if (annotateLeadingId)
                        {
                            break;
                        }
                    }
                    sequenceFeatureColumn[row] = ArrayUtils.Median(featureCount.ToArray());
                }
                mdata.AddNumericColumn(
                    (normalizeBySequenceLength ? "Normalized feature count (" : "Feature count (") + regex + ")", "",
                    sequenceFeatureColumn);
            }
            processInfo.Status("Done.");
        }
Ejemplo n.º 30
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            ParameterWithSubParams <int> xp = param.GetParamWithSubParams <int>("Expression column selection");
            bool groups = xp.Value == 2;

            string[] groupNames    = null;
            int[][]  colIndsGroups = null;
            if (groups)
            {
                int        groupRowInd = xp.GetSubParameters().GetParam <int>("Group").Value;
                string[][] groupCol    = mdata.GetCategoryRowAt(groupRowInd);
                groupNames    = ArrayUtils.UniqueValuesPreserveOrder(groupCol);
                colIndsGroups = PerseusPluginUtils.GetMainColIndices(groupCol, groupNames);
            }
            int[] useCols = xp.Value == 1
                                ? xp.GetSubParameters().GetParam <int[]>("Columns").Value
                                : ArrayUtils.ConsecutiveInts(mdata.ColumnCount);
            HashSet <int> w = ArrayUtils.ToHashSet(param.GetParam <int[]>("Calculate").Value);

            bool[]       include  = new bool[procs.Length];
            double[][]   columns  = new double[procs.Length][];
            double[][][] columnsG = null;
            if (groups)
            {
                columnsG = new double[procs.Length][][];
                for (int i = 0; i < columnsG.Length; i++)
                {
                    columnsG[i] = new double[groupNames.Length][];
                }
            }
            for (int i = 0; i < include.Length; i++)
            {
                include[i] = w.Contains(i);
                if (include[i])
                {
                    columns[i] = new double[mdata.RowCount];
                    if (groups)
                    {
                        for (int j = 0; j < groupNames.Length; j++)
                        {
                            columnsG[i][j] = new double[mdata.RowCount];
                        }
                    }
                }
            }
            for (int i = 0; i < mdata.RowCount; i++)
            {
                List <double> v = new List <double>();
                foreach (int j in useCols)
                {
                    double x = mdata.Values.Get(i, j);
                    if (!double.IsNaN(x) && !double.IsInfinity(x))
                    {
                        v.Add(x);
                    }
                }
                for (int j = 0; j < include.Length; j++)
                {
                    if (include[j])
                    {
                        columns[j][i] = procs[j].Item2(v);
                    }
                }
                if (groups)
                {
                    List <double>[] vg = new List <double> [groupNames.Length];
                    for (int j = 0; j < colIndsGroups.Length; j++)
                    {
                        vg[j] = new List <double>();
                        for (int k = 0; k < colIndsGroups[j].Length; k++)
                        {
                            double x = mdata.Values.Get(i, colIndsGroups[j][k]);
                            if (!double.IsNaN(x) && !double.IsInfinity(x))
                            {
                                vg[j].Add(x);
                            }
                        }
                    }
                    for (int j = 0; j < include.Length; j++)
                    {
                        if (include[j])
                        {
                            for (int k = 0; k < groupNames.Length; k++)
                            {
                                columnsG[j][k][i] = procs[j].Item2(vg[k]);
                            }
                        }
                    }
                }
            }
            for (int i = 0; i < include.Length; i++)
            {
                if (include[i])
                {
                    mdata.AddNumericColumn(procs[i].Item1, procs[i].Item3, columns[i]);
                    if (groups)
                    {
                        for (int k = 0; k < groupNames.Length; k++)
                        {
                            mdata.AddNumericColumn(procs[i].Item1 + " " + groupNames[k], procs[i].Item3, columnsG[i][k]);
                        }
                    }
                }
            }
        }
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            string[] mods = param.GetParam<int[]>("Modifications").StringValue.Split(new[]{';'},
                StringSplitOptions.RemoveEmptyEntries);
            string[] up = mdata.StringColumns[param.GetParam<int>("Uniprot column").Value];
            string[][] uprot = new string[up.Length][];
            for (int i = 0; i < up.Length; i++){
                uprot[i] = up[i].Length > 0 ? up[i].Split(';') : new string[0];
            }
            double[][] c = new double[mods.Length][];
            for (int index = 0; index < mods.Length; index++){
                string mod = mods[index];
                string filename = PhosphoSitePlusParser.GetFilenameForMod(mod);
                if (filename == null){
                    processInfo.ErrString = "File does not exist.";
                    return;
                }
                string[] seqWins;
                string[] accs;
                string[] pubmedLtp;
                string[] pubmedMs2;
                string[] cstMs2;
                string[] species;
                PhosphoSitePlusParser.ParseKnownMods(filename, out seqWins, out accs, out pubmedLtp, out pubmedMs2, out cstMs2, out species);
                for (int i = 0; i < seqWins.Length; i++){
                    seqWins[i] = seqWins[i].ToUpper();
                }
                Dictionary<string, HashSet<string>> counts = new Dictionary<string, HashSet<string>>();
                for (int i = 0; i < accs.Length; i++){
                    string acc = accs[i];
                    if (!counts.ContainsKey(acc)){
                        counts.Add(acc, new HashSet<string>());
                    }
                    counts[acc].Add(seqWins[i]);
                }
                c[index] = new double[up.Length];
                for (int i = 0; i < up.Length; i++){
                    c[index][i] = CountSites(uprot[i], counts);
                }
            }
            string[][] catCol = new string[up.Length][];
            for (int i = 0; i < catCol.Length; i++){
                List<string> x = new List<string>();
                for (int j = 0; j < mods.Length; j++){
                    if (c[j][i] > 0){
                        x.Add(mods[j]);
                    }
                }
                x.Sort();
                catCol[i] = x.ToArray();
            }
            mdata.AddCategoryColumn("Known modifications", "Known modifications", catCol);
            for (int i = 0; i < mods.Length; i++){
                mdata.AddNumericColumn(mods[i] + " count", mods[i] + " count", c[i]);
            }
        }