Exemplo n.º 1
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            ParameterWithSubParams <int> access = param.GetParamWithSubParams <int>("Matrix access");
            bool rows = access.Value == 0;
            int  groupInd;

            if (rows)
            {
                groupInd = access.GetSubParameters().GetParam <int>("Grouping").Value - 1;
            }
            else
            {
                groupInd = -1;
            }
            bool report = param.GetParam <bool>("Report mean and std. dev.").Value;
            bool median = param.GetParam <bool>("Use median").Value;

            if (groupInd < 0)
            {
                Zscore(rows, mdata, processInfo.NumThreads, report, median, out double[] means, out double[] stddevs);
                if (report)
                {
                    if (rows)
                    {
                        mdata.AddNumericColumn("Mean", "Mean", means);
                        mdata.AddNumericColumn("Std. dev.", "Std. dev.", stddevs);
                    }
                    else
                    {
                        mdata.AddNumericRow("Mean", "Mean", means);
                        mdata.AddNumericRow("Std. dev.", "Std. dev.", stddevs);
                    }
                }
            }
            else
            {
                string[][] catRow = mdata.GetCategoryRowAt(groupInd);
                foreach (string[] t in catRow)
                {
                    if (t.Length > 1)
                    {
                        processInfo.ErrString = "The groups are overlapping.";
                        return;
                    }
                }
                string[] groupVals = ArrayUtils.UniqueValuesPreserveOrder(catRow);
                ZscoreGroups(mdata, catRow, processInfo.NumThreads, report, median, groupVals, out double[][] means, out double[][] stddevs);
                if (report)
                {
                    for (int i = 0; i < groupVals.Length; i++)
                    {
                        mdata.AddNumericColumn("Mean " + groupVals[i], "Mean", means[i]);
                        mdata.AddNumericColumn("Std. dev. " + groupVals[i], "Std. dev.", stddevs[i]);
                    }
                }
            }
        }
Exemplo n.º 2
0
        public void WriteMatrixTest()
        {
            // main data
            IMatrixData mdata = PerseusFactory.CreateMatrixData(new double[, ] {
                { 1, 2, 3 }, { 3, 4, 5 }
            },
                                                                new List <string> {
                "col1", "col2", "col3"
            });

            // annotation rows
            mdata.AddCategoryRow("catrow", "this is catrow", new[] { new[] { "cat1" }, new[] { "cat1", "cat2" }, new[] { "cat2" } });
            mdata.AddNumericRow("numrow", "this is numrow", new[] { -1.0, 1, 2 });
            // annotation columns
            mdata.AddStringColumn("strcol1", "this is stringcol1", new[] { "1", "2" });
            mdata.AddStringColumn("strcol2", "", new[] { "", "hallo" });
            mdata.AddNumericColumn("numcol", "", new[] { 1.0, 2.0 });
            mdata.AddMultiNumericColumn("multnumcol", "this is multnumcol", new[] { new[] { -2.0, 2.0 }, new double[] {} });
            mdata.AddCategoryColumn("catcol", "", new[] { new[] { "cat1", "cat1.1" }, new[] { "cat2", "cat1" } });

            string mdataStr;

            using (MemoryStream memstream = new MemoryStream())
                using (StreamWriter writer = new StreamWriter(memstream)) {
                    PerseusUtils.WriteMatrix(mdata, writer);
                    writer.Flush();
                    mdataStr = Encoding.UTF8.GetString(memstream.ToArray());
                }

            IMatrixData mdata2 = PerseusFactory.CreateMatrixData();

            PerseusUtils.ReadMatrix(mdata2, new ProcessInfo(new Settings(), status => { }, progress => { }, 1), () => {
                StreamReader tmpStream = new StreamReader(new MemoryStream(Encoding.UTF8.GetBytes(mdataStr)));
                return(tmpStream);
            }, "matrix1", '\t');

            Assert.AreEqual(2, mdata2.RowCount);
            Assert.AreEqual(3, mdata2.ColumnCount);

            Assert.AreEqual(2, mdata2.StringColumnCount);
            Assert.AreEqual(1, mdata2.NumericColumnCount);
            Assert.AreEqual(1, mdata2.CategoryColumnCount);
            Assert.AreEqual(1, mdata2.MultiNumericColumnCount);

            Assert.AreEqual("hallo", mdata2.StringColumns[mdata2.StringColumnNames.FindIndex(col => col.Equals("strcol2"))][1]);

            Assert.AreEqual(1, mdata2.CategoryRowCount);
            Assert.AreEqual(1, mdata2.NumericRowCount);
        }
Exemplo n.º 3
0
        private static void LoadMatrixData(IList<string> colNames, IList<string> colDescriptions, IList<int> mainColIndices,
			IList<int> catColIndices, IList<int> numColIndices, IList<int> textColIndices, IList<int> multiNumColIndices,
			string origin, IMatrixData matrixData, IDictionary<string, string[]> annotationRows, Action<int> progress,
			Action<string> status, char separator, TextReader reader, StreamReader auxReader, int nrows,
			bool shortenExpressionNames, List<Tuple<Relation[], int[], bool>> filters)
        {
            Dictionary<string, string[]> catAnnotatRows;
            Dictionary<string, string[]> numAnnotatRows;
            status("Reading data");
            SplitAnnotRows(annotationRows, out catAnnotatRows, out numAnnotatRows);
            List<string[][]> categoryAnnotation = new List<string[][]>();
            for (int i = 0; i < catColIndices.Count; i++){
                categoryAnnotation.Add(new string[nrows][]);
            }
            List<double[]> numericAnnotation = new List<double[]>();
            for (int i = 0; i < numColIndices.Count; i++){
                numericAnnotation.Add(new double[nrows]);
            }
            List<double[][]> multiNumericAnnotation = new List<double[][]>();
            for (int i = 0; i < multiNumColIndices.Count; i++){
                multiNumericAnnotation.Add(new double[nrows][]);
            }
            List<string[]> stringAnnotation = new List<string[]>();
            for (int i = 0; i < textColIndices.Count; i++){
                stringAnnotation.Add(new string[nrows]);
            }
            float[,] mainValues = new float[nrows, mainColIndices.Count];
            float[,] qualityValues = null;
            bool[,] isImputedValues = null;
            bool hasAddtlMatrices = auxReader != null && GetHasAddtlMatrices(auxReader, mainColIndices, separator);
            if (hasAddtlMatrices){
                qualityValues = new float[nrows, mainColIndices.Count];
                isImputedValues = new bool[nrows, mainColIndices.Count];
            }
            reader.ReadLine();
            int count = 0;
            string line;
            while ((line = reader.ReadLine()) != null){
                progress(100*(count + 1)/nrows);
                if (TabSep.IsCommentLine(line, commentPrefix, commentPrefixExceptions)){
                    continue;
                }
                string[] w;
                if (!IsValidLine(line, separator, filters, out w, hasAddtlMatrices)){
                    continue;
                }
                for (int i = 0; i < mainColIndices.Count; i++){
                    if (mainColIndices[i] >= w.Length){
                        mainValues[count, i] = float.NaN;
                    } else{
                        string s = StringUtils.RemoveWhitespace(w[mainColIndices[i]]);
                        if (hasAddtlMatrices){
                            ParseExp(s, out mainValues[count, i], out isImputedValues[count, i], out qualityValues[count, i]);
                        } else{
                            if (count < mainValues.GetLength(0)){
                                bool success = float.TryParse(s, out mainValues[count, i]);
                                if (!success){
                                    mainValues[count, i] = float.NaN;
                                }
                            }
                        }
                    }
                }
                for (int i = 0; i < numColIndices.Count; i++){
                    if (numColIndices[i] >= w.Length){
                        numericAnnotation[i][count] = double.NaN;
                    } else{
                        double q;
                        bool success = double.TryParse(w[numColIndices[i]].Trim(), out q);
                        if (numericAnnotation[i].Length > count){
                            numericAnnotation[i][count] = success ? q : double.NaN;
                        }
                    }
                }
                for (int i = 0; i < multiNumColIndices.Count; i++){
                    if (multiNumColIndices[i] >= w.Length){
                        multiNumericAnnotation[i][count] = new double[0];
                    } else{
                        string q = w[multiNumColIndices[i]].Trim();
                        if (q.Length >= 2 && q[0] == '\"' && q[q.Length - 1] == '\"'){
                            q = q.Substring(1, q.Length - 2);
                        }
                        if (q.Length >= 2 && q[0] == '\'' && q[q.Length - 1] == '\''){
                            q = q.Substring(1, q.Length - 2);
                        }
                        string[] ww = q.Length == 0 ? new string[0] : q.Split(';');
                        multiNumericAnnotation[i][count] = new double[ww.Length];
                        for (int j = 0; j < ww.Length; j++){
                            double q1;
                            bool success = double.TryParse(ww[j], out q1);
                            multiNumericAnnotation[i][count][j] = success ? q1 : double.NaN;
                        }
                    }
                }
                for (int i = 0; i < catColIndices.Count; i++){
                    if (catColIndices[i] >= w.Length){
                        categoryAnnotation[i][count] = new string[0];
                    } else{
                        string q = w[catColIndices[i]].Trim();
                        if (q.Length >= 2 && q[0] == '\"' && q[q.Length - 1] == '\"'){
                            q = q.Substring(1, q.Length - 2);
                        }
                        if (q.Length >= 2 && q[0] == '\'' && q[q.Length - 1] == '\''){
                            q = q.Substring(1, q.Length - 2);
                        }
                        string[] ww = q.Length == 0 ? new string[0] : q.Split(';');
                        List<int> valids = new List<int>();
                        for (int j = 0; j < ww.Length; j++){
                            ww[j] = ww[j].Trim();
                            if (ww[j].Length > 0){
                                valids.Add(j);
                            }
                        }
                        ww = ArrayUtils.SubArray(ww, valids);
                        Array.Sort(ww);
                        if (categoryAnnotation[i].Length > count){
                            categoryAnnotation[i][count] = ww;
                        }
                    }
                }
                for (int i = 0; i < textColIndices.Count; i++){
                    if (textColIndices[i] >= w.Length){
                        stringAnnotation[i][count] = "";
                    } else{
                        string q = w[textColIndices[i]].Trim();
                        if (stringAnnotation[i].Length > count){
                            stringAnnotation[i][count] = RemoveSplitWhitespace(RemoveQuotes(q));
                        }
                    }
                }
                count++;
            }
            reader.Close();
            string[] columnNames = ArrayUtils.SubArray(colNames, mainColIndices);
            if (shortenExpressionNames){
                columnNames = StringUtils.RemoveCommonSubstrings(columnNames, true);
            }
            string[] catColnames = ArrayUtils.SubArray(colNames, catColIndices);
            string[] numColnames = ArrayUtils.SubArray(colNames, numColIndices);
            string[] multiNumColnames = ArrayUtils.SubArray(colNames, multiNumColIndices);
            string[] textColnames = ArrayUtils.SubArray(colNames, textColIndices);
            matrixData.Name = origin;
            matrixData.ColumnNames = RemoveQuotes(columnNames);
            matrixData.Values.Set(mainValues);
            if (hasAddtlMatrices){
                matrixData.Quality.Set(qualityValues);
                matrixData.IsImputed.Set(isImputedValues);
            } else{
                matrixData.Quality.Set(new float[mainValues.GetLength(0), mainValues.GetLength(1)]);
                matrixData.IsImputed.Set(new bool[mainValues.GetLength(0), mainValues.GetLength(1)]);
            }
            matrixData.SetAnnotationColumns(RemoveQuotes(textColnames), stringAnnotation, RemoveQuotes(catColnames),
                categoryAnnotation, RemoveQuotes(numColnames), numericAnnotation, RemoveQuotes(multiNumColnames),
                multiNumericAnnotation);
            if (colDescriptions != null){
                string[] columnDesc = ArrayUtils.SubArray(colDescriptions, mainColIndices);
                string[] catColDesc = ArrayUtils.SubArray(colDescriptions, catColIndices);
                string[] numColDesc = ArrayUtils.SubArray(colDescriptions, numColIndices);
                string[] multiNumColDesc = ArrayUtils.SubArray(colDescriptions, multiNumColIndices);
                string[] textColDesc = ArrayUtils.SubArray(colDescriptions, textColIndices);
                matrixData.ColumnDescriptions = new List<string>(columnDesc);
                matrixData.NumericColumnDescriptions = new List<string>(numColDesc);
                matrixData.CategoryColumnDescriptions = new List<string>(catColDesc);
                matrixData.StringColumnDescriptions = new List<string>(textColDesc);
                matrixData.MultiNumericColumnDescriptions = new List<string>(multiNumColDesc);
            }
            foreach (string key in catAnnotatRows.Keys){
                string name = key;
                string[] svals = ArrayUtils.SubArray(catAnnotatRows[key], mainColIndices);
                string[][] cat = new string[svals.Length][];
                for (int i = 0; i < cat.Length; i++){
                    string s = svals[i].Trim();
                    cat[i] = s.Length > 0 ? s.Split(';') : new string[0];
                    List<int> valids = new List<int>();
                    for (int j = 0; j < cat[i].Length; j++){
                        cat[i][j] = cat[i][j].Trim();
                        if (cat[i][j].Length > 0){
                            valids.Add(j);
                        }
                    }
                    cat[i] = ArrayUtils.SubArray(cat[i], valids);
                    Array.Sort(cat[i]);
                }
                matrixData.AddCategoryRow(name, name, cat);
            }
            foreach (string key in numAnnotatRows.Keys){
                string name = key;
                string[] svals = ArrayUtils.SubArray(numAnnotatRows[key], mainColIndices);
                double[] num = new double[svals.Length];
                for (int i = 0; i < num.Length; i++){
                    string s = svals[i].Trim();
                    num[i] = double.NaN;
                    double.TryParse(s, out num[i]);
                }
                matrixData.AddNumericRow(name, name, num);
            }
            matrixData.Origin = origin;
            progress(0);
            status("");
        }
 private static void ProcessDataCreate(IMatrixData mdata, Parameters param)
 {
     string name = param.GetStringParam("Row name").Value;
     double[] groupCol = new double[mdata.ExpressionColumnCount];
     for (int i = 0; i < mdata.ExpressionColumnCount; i++){
         string ename = mdata.ExpressionColumnNames[i];
         double value = param.GetDoubleParam(ename).Value;
         groupCol[i] = value;
     }
     mdata.AddNumericRow(name, name, groupCol);
 }
Exemplo n.º 5
0
        private static void LoadMatrixData(IList <string> colNames, IList <string> colDescriptions, IList <int> mainColIndices,
                                           IList <int> catColIndices, IList <int> numColIndices, IList <int> textColIndices, IList <int> multiNumColIndices,
                                           string origin, IMatrixData matrixData, IDictionary <string, string[]> annotationRows, Action <int> progress,
                                           Action <string> status, char separator, TextReader reader, StreamReader auxReader, int nrows,
                                           bool shortenExpressionNames, List <Tuple <Relation[], int[], bool> > filters)
        {
            Dictionary <string, string[]> catAnnotatRows;
            Dictionary <string, string[]> numAnnotatRows;

            status("Reading data");
            SplitAnnotRows(annotationRows, out catAnnotatRows, out numAnnotatRows);
            List <string[][]> categoryAnnotation = new List <string[][]>();

            for (int i = 0; i < catColIndices.Count; i++)
            {
                categoryAnnotation.Add(new string[nrows][]);
            }
            List <double[]> numericAnnotation = new List <double[]>();

            for (int i = 0; i < numColIndices.Count; i++)
            {
                numericAnnotation.Add(new double[nrows]);
            }
            List <double[][]> multiNumericAnnotation = new List <double[][]>();

            for (int i = 0; i < multiNumColIndices.Count; i++)
            {
                multiNumericAnnotation.Add(new double[nrows][]);
            }
            List <string[]> stringAnnotation = new List <string[]>();

            for (int i = 0; i < textColIndices.Count; i++)
            {
                stringAnnotation.Add(new string[nrows]);
            }
            float[,] mainValues     = new float[nrows, mainColIndices.Count];
            float[,] qualityValues  = null;
            bool[,] isImputedValues = null;
            bool hasAddtlMatrices = auxReader != null && GetHasAddtlMatrices(auxReader, mainColIndices, separator);

            if (hasAddtlMatrices)
            {
                qualityValues   = new float[nrows, mainColIndices.Count];
                isImputedValues = new bool[nrows, mainColIndices.Count];
            }
            reader.ReadLine();
            int    count = 0;
            string line;

            while ((line = reader.ReadLine()) != null)
            {
                progress(100 * (count + 1) / nrows);
                if (TabSep.IsCommentLine(line, commentPrefix, commentPrefixExceptions))
                {
                    continue;
                }
                string[] w;
                if (!IsValidLine(line, separator, filters, out w, hasAddtlMatrices))
                {
                    continue;
                }
                for (int i = 0; i < mainColIndices.Count; i++)
                {
                    if (mainColIndices[i] >= w.Length)
                    {
                        mainValues[count, i] = float.NaN;
                    }
                    else
                    {
                        string s = StringUtils.RemoveWhitespace(w[mainColIndices[i]]);
                        if (hasAddtlMatrices)
                        {
                            ParseExp(s, out mainValues[count, i], out isImputedValues[count, i], out qualityValues[count, i]);
                        }
                        else
                        {
                            if (count < mainValues.GetLength(0))
                            {
                                bool success = float.TryParse(s, out mainValues[count, i]);
                                if (!success)
                                {
                                    mainValues[count, i] = float.NaN;
                                }
                            }
                        }
                    }
                }
                for (int i = 0; i < numColIndices.Count; i++)
                {
                    if (numColIndices[i] >= w.Length)
                    {
                        numericAnnotation[i][count] = double.NaN;
                    }
                    else
                    {
                        double q;
                        bool   success = double.TryParse(w[numColIndices[i]].Trim(), out q);
                        if (numericAnnotation[i].Length > count)
                        {
                            numericAnnotation[i][count] = success ? q : double.NaN;
                        }
                    }
                }
                for (int i = 0; i < multiNumColIndices.Count; i++)
                {
                    if (multiNumColIndices[i] >= w.Length)
                    {
                        multiNumericAnnotation[i][count] = new double[0];
                    }
                    else
                    {
                        string q = w[multiNumColIndices[i]].Trim();
                        if (q.Length >= 2 && q[0] == '\"' && q[q.Length - 1] == '\"')
                        {
                            q = q.Substring(1, q.Length - 2);
                        }
                        if (q.Length >= 2 && q[0] == '\'' && q[q.Length - 1] == '\'')
                        {
                            q = q.Substring(1, q.Length - 2);
                        }
                        string[] ww = q.Length == 0 ? new string[0] : q.Split(';');
                        multiNumericAnnotation[i][count] = new double[ww.Length];
                        for (int j = 0; j < ww.Length; j++)
                        {
                            double q1;
                            bool   success = double.TryParse(ww[j], out q1);
                            multiNumericAnnotation[i][count][j] = success ? q1 : double.NaN;
                        }
                    }
                }
                for (int i = 0; i < catColIndices.Count; i++)
                {
                    if (catColIndices[i] >= w.Length)
                    {
                        categoryAnnotation[i][count] = new string[0];
                    }
                    else
                    {
                        string q = w[catColIndices[i]].Trim();
                        if (q.Length >= 2 && q[0] == '\"' && q[q.Length - 1] == '\"')
                        {
                            q = q.Substring(1, q.Length - 2);
                        }
                        if (q.Length >= 2 && q[0] == '\'' && q[q.Length - 1] == '\'')
                        {
                            q = q.Substring(1, q.Length - 2);
                        }
                        string[]   ww     = q.Length == 0 ? new string[0] : q.Split(';');
                        List <int> valids = new List <int>();
                        for (int j = 0; j < ww.Length; j++)
                        {
                            ww[j] = ww[j].Trim();
                            if (ww[j].Length > 0)
                            {
                                valids.Add(j);
                            }
                        }
                        ww = ArrayUtils.SubArray(ww, valids);
                        Array.Sort(ww);
                        if (categoryAnnotation[i].Length > count)
                        {
                            categoryAnnotation[i][count] = ww;
                        }
                    }
                }
                for (int i = 0; i < textColIndices.Count; i++)
                {
                    if (textColIndices[i] >= w.Length)
                    {
                        stringAnnotation[i][count] = "";
                    }
                    else
                    {
                        string q = w[textColIndices[i]].Trim();
                        if (stringAnnotation[i].Length > count)
                        {
                            stringAnnotation[i][count] = RemoveSplitWhitespace(RemoveQuotes(q));
                        }
                    }
                }
                count++;
            }
            reader.Close();
            string[] columnNames = ArrayUtils.SubArray(colNames, mainColIndices);
            if (shortenExpressionNames)
            {
                columnNames = StringUtils.RemoveCommonSubstrings(columnNames, true);
            }
            string[] catColnames      = ArrayUtils.SubArray(colNames, catColIndices);
            string[] numColnames      = ArrayUtils.SubArray(colNames, numColIndices);
            string[] multiNumColnames = ArrayUtils.SubArray(colNames, multiNumColIndices);
            string[] textColnames     = ArrayUtils.SubArray(colNames, textColIndices);
            matrixData.Name        = origin;
            matrixData.ColumnNames = RemoveQuotes(columnNames);
            matrixData.Values.Set(mainValues);
            if (hasAddtlMatrices)
            {
                matrixData.Quality.Set(qualityValues);
                matrixData.IsImputed.Set(isImputedValues);
            }
            else
            {
                matrixData.Quality.Set(new float[mainValues.GetLength(0), mainValues.GetLength(1)]);
                matrixData.IsImputed.Set(new bool[mainValues.GetLength(0), mainValues.GetLength(1)]);
            }
            matrixData.SetAnnotationColumns(RemoveQuotes(textColnames), stringAnnotation, RemoveQuotes(catColnames),
                                            categoryAnnotation, RemoveQuotes(numColnames), numericAnnotation, RemoveQuotes(multiNumColnames),
                                            multiNumericAnnotation);
            if (colDescriptions != null)
            {
                string[] columnDesc      = ArrayUtils.SubArray(colDescriptions, mainColIndices);
                string[] catColDesc      = ArrayUtils.SubArray(colDescriptions, catColIndices);
                string[] numColDesc      = ArrayUtils.SubArray(colDescriptions, numColIndices);
                string[] multiNumColDesc = ArrayUtils.SubArray(colDescriptions, multiNumColIndices);
                string[] textColDesc     = ArrayUtils.SubArray(colDescriptions, textColIndices);
                matrixData.ColumnDescriptions             = new List <string>(columnDesc);
                matrixData.NumericColumnDescriptions      = new List <string>(numColDesc);
                matrixData.CategoryColumnDescriptions     = new List <string>(catColDesc);
                matrixData.StringColumnDescriptions       = new List <string>(textColDesc);
                matrixData.MultiNumericColumnDescriptions = new List <string>(multiNumColDesc);
            }
            foreach (string key in catAnnotatRows.Keys)
            {
                string     name  = key;
                string[]   svals = ArrayUtils.SubArray(catAnnotatRows[key], mainColIndices);
                string[][] cat   = new string[svals.Length][];
                for (int i = 0; i < cat.Length; i++)
                {
                    string s = svals[i].Trim();
                    cat[i] = s.Length > 0 ? s.Split(';') : new string[0];
                    List <int> valids = new List <int>();
                    for (int j = 0; j < cat[i].Length; j++)
                    {
                        cat[i][j] = cat[i][j].Trim();
                        if (cat[i][j].Length > 0)
                        {
                            valids.Add(j);
                        }
                    }
                    cat[i] = ArrayUtils.SubArray(cat[i], valids);
                    Array.Sort(cat[i]);
                }
                matrixData.AddCategoryRow(name, name, cat);
            }
            foreach (string key in numAnnotatRows.Keys)
            {
                string   name  = key;
                string[] svals = ArrayUtils.SubArray(numAnnotatRows[key], mainColIndices);
                double[] num   = new double[svals.Length];
                for (int i = 0; i < num.Length; i++)
                {
                    string s = svals[i].Trim();
                    num[i] = double.NaN;
                    double.TryParse(s, out num[i]);
                }
                matrixData.AddNumericRow(name, name, num);
            }
            matrixData.Origin = origin;
            progress(0);
            status("");
        }
Exemplo n.º 6
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            int[] outputColumns      = param.GetParam <int[]>("Output").Value;
            int   proteinIdColumnInd = param.GetParam <int>("Protein IDs").Value;

            string[] proteinIds    = mdata.StringColumns[proteinIdColumnInd];
            int[]    intensityCols = param.GetParam <int[]>("Intensities").Value;
            if (intensityCols.Length == 0)
            {
                processInfo.ErrString = "Please select at least one column containing protein intensities.";
                return;
            }
            // variable to hold all intensity values
            List <double[]> columns = new List <double[]>();

            string[] inputNames  = new string[intensityCols.Length];
            string[] sampleNames = new string[intensityCols.Length];
            for (int col = 0; col < intensityCols.Length; col++)
            {
                double[] values;
                if (intensityCols[col] < mdata.ColumnCount)
                {
                    values          = ArrayUtils.ToDoubles(mdata.Values.GetColumn(intensityCols[col]));
                    inputNames[col] = mdata.ColumnNames[intensityCols[col]];
                }
                else
                {
                    values          = mdata.NumericColumns[intensityCols[col] - mdata.ColumnCount];
                    inputNames[col] = mdata.NumericColumnNames[intensityCols[col] - mdata.ColumnCount];
                }
                sampleNames[col] = new Regex(@"^(?:(?:LFQ )?[Ii]ntensity )?(.*)$").Match(inputNames[col]).Groups[1].Value;
                columns.Add(values);
            }
            // average over columns if this option is selected
            if (param.GetParamWithSubParams <int>("Averaging mode").Value == 3)
            {
                double[] column = new double[mdata.RowCount];
                for (int row = 0; row < mdata.RowCount; row++)
                {
                    double[] values = new double[intensityCols.Length];
                    for (int col = 0; col < intensityCols.Length; col++)
                    {
                        values[col] = columns[col][row];
                    }
                    column[row] = ArrayUtils.Median(ExtractValidValues(values, false));
                }
                // delete the original list of columns
                columns = new List <double[]> {
                    column
                };
                sampleNames = new[] { "" };
            }
            // revert logarithm if necessary
            if (param.GetParamWithSubParams <bool>("Logarithmized").Value)
            {
                double[] logBases = new[] { 2, Math.E, 10 };
                double   logBase  =
                    logBases[param.GetParamWithSubParams <bool>("Logarithmized").GetSubParameters().GetParam <int>("log base").Value];
                foreach (double[] t in columns)
                {
                    for (int row = 0; row < mdata.RowCount; row++)
                    {
                        if (t[row] == 0)
                        {
                            processInfo.ErrString = "Are the columns really logarithmized?\nThey contain zeroes!";
                        }
                        t[row] = Math.Pow(logBase, t[row]);
                    }
                }
            }
            double[] mw = mdata.NumericColumns[param.GetParam <int>("Molecular masses").Value];
            // define whether the molecular masses are given in Da or kDa
            if (ArrayUtils.Median(mw) < 250)             // most likely kDa
            {
                for (int i = 0; i < mw.Length; i++)
                {
                    mw[i] *= 1000;
                }
            }
            double[] detectabilityNormFactor = mw;
            if (param.GetParamWithSubParams <bool>("Detectability correction").Value)
            {
                detectabilityNormFactor =
                    mdata.NumericColumns[
                        param.GetParamWithSubParams <bool>("Detectability correction").GetSubParameters().GetParam <int>("Correction factor")
                        .Value];
            }
            // the normalization factor needs to be nonzero for all proteins
            // check and replace with 1 for all relevant cases
            for (int row = 0; row < mdata.RowCount; row++)
            {
                if (detectabilityNormFactor[row] == 0 || double.IsNaN(detectabilityNormFactor[row]))
                {
                    detectabilityNormFactor[row] = 1;
                }
            }
            // detect the organism
            Organism organism = DetectOrganism(proteinIds);
            // c value the amount of DNA per haploid genome, see: http://en.wikipedia.org/wiki/C-value
            double cValue = organism.genomeSize * basePairWeight / avogadro;

            // find the histones
            int[] histoneRows = FindHistones(proteinIds, organism);
            // write a categorical column indicating the histones
            string[][] histoneCol = new string[mdata.RowCount][];
            for (int row = 0; row < mdata.RowCount; row++)
            {
                histoneCol[row] = ArrayUtils.Contains(histoneRows, row) ? new[] { "+" } : new string[0];
            }
            mdata.AddCategoryColumn("Histones", "", histoneCol);

            // initialize the variables for the annotation rows
            string[]   sampleNameRow     = new string[mdata.ColumnCount];
            string[]   inputNameRow      = new string[mdata.ColumnCount];
            double[]   totalProteinRow   = new double[mdata.ColumnCount];
            double[]   totalMoleculesRow = new double[mdata.ColumnCount];
            string[][] organismRow       = new string[mdata.ColumnCount][];
            // populate the organismRow variable with empty strings as defaults (not null, which may cause errors when writing the annotations in the end.)
            for (int i = 0; i < organismRow.Length; i++)
            {
                organismRow[i] = new[] { "N/A" };
            }
            double[] histoneMassRow       = new double[mdata.ColumnCount];
            double[] ploidyRow            = new double[mdata.ColumnCount];
            double[] cellVolumeRow        = new double[mdata.ColumnCount];
            double[] normalizationFactors = new double[columns.Count];
            // calculate normalization factors for each column
            for (int col = 0; col < columns.Count; col++)
            {
                string   sampleName = sampleNames[col];
                double[] column     = columns[col];
                // normalization factor to go from intensities to copies,
                // needs to be determined either using the total protein or the histone scaling approach
                double factor;
                switch (param.GetParamWithSubParams <int>("Scaling mode").Value)
                {
                case 0:                         // total protein amount
                    double mwWeightedNormalizedSummedIntensities = 0;
                    for (int row = 0; row < mdata.RowCount; row++)
                    {
                        if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row]))
                        {
                            mwWeightedNormalizedSummedIntensities += column[row] / detectabilityNormFactor[row] * mw[row];
                        }
                    }
                    factor =
                        param.GetParamWithSubParams <int>("Scaling mode").GetSubParameters().GetParam <double>(
                            "Protein amount per cell [pg]").Value *1e-12 * avogadro / mwWeightedNormalizedSummedIntensities;
                    break;

                case 1:                         // histone mode
                    double mwWeightedNormalizedSummedHistoneIntensities = 0;
                    foreach (int row in histoneRows)
                    {
                        if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row]))
                        {
                            mwWeightedNormalizedSummedHistoneIntensities += column[row] / detectabilityNormFactor[row] * mw[row];
                        }
                    }
                    double ploidy =
                        param.GetParamWithSubParams <int>("Scaling mode").GetSubParameters().GetParam <double>("Ploidy").Value;
                    factor = cValue * ploidy * avogadro / mwWeightedNormalizedSummedHistoneIntensities;
                    break;

                default:
                    factor = 1;
                    break;
                }
                normalizationFactors[col] = factor;
            }
            // check averaging mode
            if (param.GetParamWithSubParams <int>("Averaging mode").Value == 1)            // same factor for all
            {
                double factor = ArrayUtils.Mean(normalizationFactors);
                for (int i = 0; i < normalizationFactors.Length; i++)
                {
                    normalizationFactors[i] = factor;
                }
            }
            if (param.GetParamWithSubParams <int>("Averaging mode").Value == 2)            // same factor in each group
            {
                if (param.GetParamWithSubParams <int>("Averaging mode").GetSubParameters().GetParam <int>("Grouping").Value == -1)
                {
                    processInfo.ErrString = "No grouping selected.";
                    return;
                }
                string[][] groupNames =
                    mdata.GetCategoryRowAt(
                        param.GetParamWithSubParams <int>("Averaging mode").GetSubParameters().GetParam <int>("Grouping").Value);
                string[] uniqueGroupNames = Unique(groupNames);
                int[]    grouping         = new int[columns.Count];
                for (int i = 0; i < columns.Count; i++)
                {
                    if (intensityCols[i] >= mdata.ColumnCount)                      // Numeric annotation columns cannot be grouped
                    {
                        grouping[i] = i;
                        continue;
                    }
                    if (ArrayUtils.Contains(uniqueGroupNames, groupNames[i][0]))
                    {
                        grouping[i] = ArrayUtils.IndexOf(uniqueGroupNames, groupNames[i][0]);
                        continue;
                    }
                    grouping[i] = i;
                }
                Dictionary <int, List <double> > factors = new Dictionary <int, List <double> >();
                for (int i = 0; i < columns.Count; i++)
                {
                    if (factors.ContainsKey(grouping[i]))
                    {
                        factors[grouping[i]].Add(normalizationFactors[i]);
                    }
                    else
                    {
                        factors.Add(grouping[i], new List <double> {
                            normalizationFactors[i]
                        });
                    }
                }
                double[] averagedNormalizationFactors = new double[columns.Count];
                for (int i = 0; i < columns.Count; i++)
                {
                    List <double> factor;
                    factors.TryGetValue(grouping[i], out factor);
                    averagedNormalizationFactors[i] = ArrayUtils.Mean(factor);
                }
                normalizationFactors = averagedNormalizationFactors;
            }
            // loop over all selected columns and calculate copy numbers
            for (int col = 0; col < columns.Count; col++)
            {
                string   sampleName     = sampleNames[col];
                double[] column         = columns[col];
                double   factor         = normalizationFactors[col];
                double[] copyNumbers    = new double[mdata.RowCount];
                double[] concentrations = new double[mdata.RowCount];                 // femtoliters
                double[] massFraction   = new double[mdata.RowCount];
                double[] moleFraction   = new double[mdata.RowCount];
                double   totalProtein   = 0;            // picograms
                double   histoneMass    = 0;            // picograms
                double   totalMolecules = 0;
                for (int row = 0; row < mdata.RowCount; row++)
                {
                    if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row]))
                    {
                        copyNumbers[row] = column[row] / detectabilityNormFactor[row] * factor;
                        totalMolecules  += copyNumbers[row];
                        totalProtein    += copyNumbers[row] * mw[row] * 1e12 / avogadro;                // picograms
                        if (ArrayUtils.Contains(histoneRows, row))
                        {
                            histoneMass += copyNumbers[row] * mw[row] * 1e12 / avogadro;                       // picograms
                        }
                    }
                }
                double totalVolume = totalProtein / param.GetParam <double>("Total cellular protein concentration [g/l]").Value *
                                     1000;
                // femtoliters
                for (int row = 0; row < mdata.RowCount; row++)
                {
                    if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row]))
                    {
                        concentrations[row] = copyNumbers[row] / (totalVolume * 1e-15) / avogadro * 1e9;         // nanomolar
                        massFraction[row]   = copyNumbers[row] * mw[row] * 1e12 / avogadro / totalProtein * 1e6; // ppm
                        moleFraction[row]   = copyNumbers[row] / totalMolecules * 1e6;                           // ppm
                    }
                }
                string suffix = sampleName == "" ? "" : " " + sampleName;
                if (ArrayUtils.Contains(outputColumns, 0))
                {
                    mdata.AddNumericColumn("Copy number" + suffix, "", copyNumbers);
                }
                if (ArrayUtils.Contains(outputColumns, 1))
                {
                    mdata.AddNumericColumn("Concentration [nM]" + suffix, "", concentrations);
                }
                if (ArrayUtils.Contains(outputColumns, 2))
                {
                    mdata.AddNumericColumn("Abundance (mass/total mass) [*10^-6]" + suffix, "", massFraction);
                }
                if (ArrayUtils.Contains(outputColumns, 3))
                {
                    mdata.AddNumericColumn("Abundance (molecules/total molecules) [*10^-6]" + suffix, "", moleFraction);
                }
                double[] rank         = ArrayUtils.Rank(copyNumbers);
                double[] relativeRank = new double[mdata.RowCount];
                double   validRanks   = mdata.RowCount;
                for (int row = 0; row < mdata.RowCount; row++)
                {
                    // remove rank for protein with no copy number information
                    if (double.IsNaN(copyNumbers[row]) || copyNumbers[row] == 0)
                    {
                        rank[row] = double.NaN;
                        validRanks--;                         // do not consider as valid
                    }
                    // invert ranking, so that rank 0 is the most abundant protein
                    rank[row] = mdata.RowCount - rank[row];
                }
                for (int row = 0; row < mdata.RowCount; row++)
                {
                    relativeRank[row] = rank[row] / validRanks;
                }
                if (ArrayUtils.Contains(outputColumns, 4))
                {
                    mdata.AddNumericColumn("Copy number rank" + suffix, "", rank);
                }
                if (ArrayUtils.Contains(outputColumns, 5))
                {
                    mdata.AddNumericColumn("Relative copy number rank" + suffix, "", relativeRank);
                }
                if (intensityCols[col] < mdata.ColumnCount && param.GetParamWithSubParams <int>("Averaging mode").Value != 3)
                {
                    inputNameRow[intensityCols[col]]      = inputNames[col];
                    sampleNameRow[intensityCols[col]]     = sampleNames[col];
                    totalProteinRow[intensityCols[col]]   = Math.Round(totalProtein, 2);
                    totalMoleculesRow[intensityCols[col]] = Math.Round(totalMolecules, 0);
                    organismRow[intensityCols[col]]       = new[] { organism.name };
                    histoneMassRow[intensityCols[col]]    = Math.Round(histoneMass, 4);
                    ploidyRow[intensityCols[col]]         = Math.Round(histoneMass * 1e-12 / cValue, 2);
                    cellVolumeRow[intensityCols[col]]     = Math.Round(totalVolume, 2);                 // femtoliters
                }
            }

            // Summary annotation row
            if (param.GetParamWithSubParams <int>("Averaging mode").Value != 3 && ArrayUtils.Contains(outputColumns, 6))
            {
                mdata.AddNumericRow("Total protein [pg/cell]", "", totalProteinRow);
                mdata.AddNumericRow("Total molecules per cell", "", totalMoleculesRow);
                mdata.AddCategoryRow("Organism", "", organismRow);
                mdata.AddNumericRow("Histone mass [pg/cell]", "", histoneMassRow);
                mdata.AddNumericRow("Ploidy", "", ploidyRow);
                mdata.AddNumericRow("Cell volume [fl]", "", cellVolumeRow);
            }

            // Summary matrix
            if (param.GetParamWithSubParams <int>("Averaging mode").Value != 3 && ArrayUtils.Contains(outputColumns, 7))
            {
                supplTables = new IMatrixData[1];
                IMatrixData supplTab = PerseusFactory.CreateMatrixData();
                supplTab.ColumnNames = new List <string>();
                supplTab.Values.Init(totalProteinRow.Length, 0);
                supplTab.SetAnnotationColumns(new List <string> {
                    "Sample", "Input Column"
                },
                                              new List <string[]>()
                {
                    sampleNameRow, inputNameRow
                }, new List <string>()
                {
                    "Organism"
                },
                                              new List <string[][]>()
                {
                    organismRow
                },
                                              new List <string>()
                {
                    "Total protein [pg/cell]",
                    "Total molecules per cell",
                    "Histone mass [pg/cell]",
                    "Ploidy",
                    "Cell volume [fl]"
                },
                                              new List <double[]>()
                {
                    totalProteinRow, totalMoleculesRow, histoneMassRow, ploidyRow, cellVolumeRow
                },
                                              new List <string>(), new List <double[][]>());
                supplTables[0] = supplTab;
            }
        }
Exemplo n.º 7
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            int[] outputColumns = param.GetMultiChoiceParam("Output").Value;
            int proteinIdColumnInd = param.GetSingleChoiceParam("Protein IDs").Value;
            string[] proteinIds = mdata.StringColumns[proteinIdColumnInd];
            int[] intensityCols = param.GetMultiChoiceParam("Intensities").Value;
            if (intensityCols.Length == 0){
                processInfo.ErrString = "Please select at least one column containing protein intensities.";
                return;
            }
            // variable to hold all intensity values
            List<double[]> columns = new List<double[]>();
            string[] sampleNames = new string[intensityCols.Length];
            for (int col = 0; col < intensityCols.Length; col++){
                double[] values;
                if (intensityCols[col] < mdata.ExpressionColumnCount){
                    values = ArrayUtils.ToDoubles(mdata.GetExpressionColumn(intensityCols[col]));
                    sampleNames[col] = mdata.ExpressionColumnNames[intensityCols[col]];
                } else{
                    values = mdata.NumericColumns[intensityCols[col] - mdata.ExpressionColumnCount];
                    sampleNames[col] = mdata.NumericColumnNames[intensityCols[col] - mdata.ExpressionColumnCount];
                }
                sampleNames[col] = new Regex(@"^(?:(?:LFQ )?[Ii]ntensity )?(.*)$").Match(sampleNames[col]).Groups[1].Value;
                columns.Add(values);
            }
            // average over columns if this option is selected
            if (param.GetSingleChoiceWithSubParams("Averaging mode").Value == 3){
                double[] column = new double[mdata.RowCount];
                for (int row = 0; row < mdata.RowCount; row++){
                    double[] values = new double[intensityCols.Length];
                    for (int col = 0; col < intensityCols.Length; col++){
                        values[col] = columns[col][row];
                    }
                    column[row] = ArrayUtils.Median(ExtractValidValues(values, false));
                }
                // delete the original list of columns
                columns = new List<double[]>{column};
                sampleNames = new[]{""};
            }
            // revert logarithm if necessary
            if (param.GetBoolWithSubParams("Logarithmized").Value){
                double[] logBases = new[]{2, Math.E, 10};
                double logBase =
                    logBases[param.GetBoolWithSubParams("Logarithmized").GetSubParameters().GetSingleChoiceParam("log base").Value];
                foreach (double[] t in columns){
                    for (int row = 0; row < mdata.RowCount; row++){
                        if (t[row] == 0){
                            processInfo.ErrString = "Are the columns really logarithmized?\nThey contain zeroes!";
                        }
                        t[row] = Math.Pow(logBase, t[row]);
                    }
                }
            }
            double[] mw = mdata.NumericColumns[param.GetSingleChoiceParam("Molecular masses").Value];
            // detect whether the molecular masses are given in Da or kDa
            if (ArrayUtils.Median(mw) < 250) // likely kDa
            {
                for (int i = 0; i < mw.Length; i++){
                    mw[i] *= 1000;
                }
            }
            double[] detectabilityNormFactor = mw;
            if (param.GetBoolWithSubParams("Detectability correction").Value){
                detectabilityNormFactor =
                    mdata.NumericColumns[
                        param.GetBoolWithSubParams("Detectability correction")
                             .GetSubParameters()
                             .GetSingleChoiceParam("Correction factor")
                             .Value];
            }
            // the normalization factor needs to be nonzero for all proteins
            // check and replace with 1 for all relevant cases
            for (int row = 0; row < mdata.RowCount; row++){
                if (detectabilityNormFactor[row] == 0 || detectabilityNormFactor[row] == double.NaN){
                    detectabilityNormFactor[row] = 1;
                }
            }
            // detect the organism
            Organism organism = DetectOrganism(proteinIds);
            // c value the amount of DNA per cell, see: http://en.wikipedia.org/wiki/C-value
            double cValue = (organism.genomeSize*basePairWeight)/avogadro;
            // find the histones
            int[] histoneRows = FindHistones(proteinIds, organism);
            // write a categorical column indicating the histones
            string[][] histoneCol = new string[mdata.RowCount][];
            for (int row = 0; row < mdata.RowCount; row++){
                histoneCol[row] = (ArrayUtils.Contains(histoneRows, row)) ? new[]{"+"} : new[]{""};
            }
            mdata.AddCategoryColumn("Histones", "", histoneCol);
            // initialize the variables for the annotation rows
            double[] totalProteinRow = new double[mdata.ExpressionColumnCount];
            double[] totalMoleculesRow = new double[mdata.ExpressionColumnCount];
            string[][] organismRow = new string[mdata.ExpressionColumnCount][];
            double[] histoneMassRow = new double[mdata.ExpressionColumnCount];
            double[] ploidyRow = new double[mdata.ExpressionColumnCount];
            double[] cellVolumeRow = new double[mdata.ExpressionColumnCount];
            double[] normalizationFactors = new double[columns.Count];
            // calculate normalization factors for each column
            for (int col = 0; col < columns.Count; col++){
                string sampleName = sampleNames[col];
                double[] column = columns[col];
                // normalization factor to go from intensities to copies,
                // needs to be determined either using the total protein or the histone scaling approach
                double factor;
                switch (param.GetSingleChoiceWithSubParams("Scaling mode").Value){
                    case 0: // total protein amount
                        double mwWeightedNormalizedSummedIntensities = 0;
                        for (int row = 0; row < mdata.RowCount; row++){
                            if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row])){
                                mwWeightedNormalizedSummedIntensities += (column[row]/detectabilityNormFactor[row])*mw[row];
                            }
                        }
                        factor =
                            (param.GetSingleChoiceWithSubParams("Scaling mode")
                                  .GetSubParameters()
                                  .GetDoubleParam("Protein amount per cell [pg]")
                                  .Value*1e-12*avogadro)/mwWeightedNormalizedSummedIntensities;
                        break;
                    case 1: // histone mode
                        double mwWeightedNormalizedSummedHistoneIntensities = 0;
                        foreach (int row in histoneRows){
                            if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row])){
                                mwWeightedNormalizedSummedHistoneIntensities += (column[row]/detectabilityNormFactor[row])*mw[row];
                            }
                        }
                        double ploidy =
                            param.GetSingleChoiceWithSubParams("Scaling mode").GetSubParameters().GetDoubleParam("Ploidy").Value;
                        factor = (cValue*ploidy*avogadro)/mwWeightedNormalizedSummedHistoneIntensities;
                        break;
                    default:
                        factor = 1;
                        break;
                }
                normalizationFactors[col] = factor;
            }
            // check averaging mode
            if (param.GetSingleChoiceWithSubParams("Averaging mode").Value == 1) // same factor for all
            {
                double factor = ArrayUtils.Mean(normalizationFactors);
                for (int i = 0; i < normalizationFactors.Length; i++){
                    normalizationFactors[i] = factor;
                }
            }
            if (param.GetSingleChoiceWithSubParams("Averaging mode").Value == 2) // same factor in each group
            {
                if (
                    param.GetSingleChoiceWithSubParams("Averaging mode").GetSubParameters().GetSingleChoiceParam("Grouping").Value ==
                        -1){
                    processInfo.ErrString = "No grouping selected.";
                    return;
                }
                string[][] groupNames =
                    mdata.GetCategoryRowAt(
                        param.GetSingleChoiceWithSubParams("Averaging mode").GetSubParameters().GetSingleChoiceParam("Grouping").Value);
                string[] uniqueGroupNames = Unique(groupNames);
                int[] grouping = new int[columns.Count];
                for (int i = 0; i < columns.Count; i++){
                    if (intensityCols[i] >= mdata.ExpressionColumnCount){ // Numeric annotation columns cannot be grouped
                        grouping[i] = i;
                        continue;
                    }
                    if (ArrayUtils.Contains(uniqueGroupNames, groupNames[i][0])){
                        grouping[i] = ArrayUtils.IndexOf(uniqueGroupNames, groupNames[i][0]);
                        continue;
                    }
                    grouping[i] = i;
                }
                Dictionary<int, List<double>> factors = new Dictionary<int, List<double>>();
                for (int i = 0; i < columns.Count; i++){
                    if (factors.ContainsKey(grouping[i])){
                        factors[grouping[i]].Add(normalizationFactors[i]);
                    } else{
                        factors.Add(grouping[i], new List<double>{normalizationFactors[i]});
                    }
                }
                double[] averagedNormalizationFactors = new double[columns.Count];
                for (int i = 0; i < columns.Count; i++){
                    List<double> factor;
                    factors.TryGetValue(grouping[i], out factor);
                    averagedNormalizationFactors[i] = ArrayUtils.Mean(factor);
                }
                normalizationFactors = averagedNormalizationFactors;
            }
            // loop over all selected columns and calculate copy numbers
            for (int col = 0; col < columns.Count; col++){
                string sampleName = sampleNames[col];
                double[] column = columns[col];
                double factor = normalizationFactors[col];
                double[] copyNumbers = new double[mdata.RowCount];
                double[] concentrations = new double[mdata.RowCount]; // femtoliters
                double[] massFraction = new double[mdata.RowCount];
                double[] moleFraction = new double[mdata.RowCount];
                double totalProtein = 0; // picograms
                double histoneMass = 0; // picograms
                double totalMolecules = 0;
                for (int row = 0; row < mdata.RowCount; row++){
                    if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row])){
                        copyNumbers[row] = (column[row]/detectabilityNormFactor[row])*factor;
                        totalMolecules += copyNumbers[row];
                        totalProtein += (copyNumbers[row]*mw[row]*1e12)/avogadro; // picograms
                        if (ArrayUtils.Contains(histoneRows, row)){
                            histoneMass += (copyNumbers[row]*mw[row]*1e12)/avogadro; // picograms
                        }
                    }
                }
                double totalVolume = (totalProtein/(param.GetDoubleParam("Total cellular protein concentration [g/l]").Value))*1000;
                // femtoliters
                for (int row = 0; row < mdata.RowCount; row++){
                    if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row])){
                        concentrations[row] = ((copyNumbers[row]/(totalVolume*1e-15))/avogadro)*1e9; // nanomolar
                        massFraction[row] = (((copyNumbers[row]*mw[row]*1e12)/avogadro)/totalProtein)*1e6; // ppm
                        moleFraction[row] = (copyNumbers[row]/totalMolecules)*1e6; // ppm
                    }
                }
                string suffix = (sampleName == "") ? "" : " " + sampleName;
                if (ArrayUtils.Contains(outputColumns, 0)){
                    mdata.AddNumericColumn("Copy number" + suffix, "", copyNumbers);
                }
                if (ArrayUtils.Contains(outputColumns, 1)){
                    mdata.AddNumericColumn("Concentration [nM]" + suffix, "", concentrations);
                }
                if (ArrayUtils.Contains(outputColumns, 2)){
                    mdata.AddNumericColumn("Abundance (mass/total mass) [*10^-6]" + suffix, "", massFraction);
                }
                if (ArrayUtils.Contains(outputColumns, 3)){
                    mdata.AddNumericColumn("Abundance (molecules/total molecules) [*10^-6]" + suffix, "", moleFraction);
                }
                double[] rank = ArrayUtils.Rank(copyNumbers);
                double[] relativeRank = new double[mdata.RowCount];
                double validRanks = mdata.RowCount;
                for (int row = 0; row < mdata.RowCount; row++){
                    // remove rank for protein with no copy number information
                    if (double.IsNaN((copyNumbers[row])) || copyNumbers[row] == 0){
                        rank[row] = double.NaN;
                        validRanks--; // do not consider as valid
                    }
                    // invert ranking, so that rank 0 is the most abundant protein
                    rank[row] = mdata.RowCount - rank[row];
                }
                for (int row = 0; row < mdata.RowCount; row++){
                    relativeRank[row] = rank[row]/validRanks;
                }
                if (ArrayUtils.Contains(outputColumns, 4)){
                    mdata.AddNumericColumn("Copy number rank" + suffix, "", rank);
                }
                if (ArrayUtils.Contains(outputColumns, 5)){
                    mdata.AddNumericColumn("Relative copy number rank" + suffix, "", relativeRank);
                }
                if (intensityCols[col] < mdata.ExpressionColumnCount &&
                    param.GetSingleChoiceWithSubParams("Averaging mode").Value != 3){
                    totalProteinRow[intensityCols[col]] = Math.Round(totalProtein, 2);
                    totalMoleculesRow[intensityCols[col]] = Math.Round(totalMolecules, 0);
                    organismRow[intensityCols[col]] = new string[]{organism.name};
                    histoneMassRow[intensityCols[col]] = Math.Round(histoneMass, 4);
                    ploidyRow[intensityCols[col]] = Math.Round((histoneMass*1e-12)/cValue, 2);
                    cellVolumeRow[intensityCols[col]] = Math.Round(totalVolume, 2); // femtoliters
                }
            }
            if (param.GetSingleChoiceWithSubParams("Averaging mode").Value != 3 && ArrayUtils.Contains(outputColumns, 6)){
                mdata.AddNumericRow("Total protein [pg/cell]", "", totalProteinRow);
                mdata.AddNumericRow("Total molecules per cell", "", totalMoleculesRow);
                mdata.AddCategoryRow("Organism", "", organismRow);
                mdata.AddNumericRow("Histone mass [pg/cell]", "", histoneMassRow);
                mdata.AddNumericRow("Ploidy", "", ploidyRow);
                mdata.AddNumericRow("Cell volume [fl]", "", cellVolumeRow);
            }
        }
        private static void LoadData(IList<string> colNames, IList<string> colDescriptions, IList<int> expressionColIndices,
			IList<int> catColIndices, IList<int> numColIndices, IList<int> textColIndices, IList<int> multiNumColIndices,
			string filename, IMatrixData matrixData, IDictionary<string, string[]> annotationRows, Action<int> progress,
			Action<string> status)
        {
            Dictionary<string, string[]> catAnnotatRows;
            Dictionary<string, string[]> numAnnotatRows;
            status("Reading data");
            SplitAnnotRows(annotationRows, out catAnnotatRows, out numAnnotatRows);
            int nrows = TabSep.GetRowCount(filename, 0, commentPrefix, commentPrefixExceptions);
            float[,] expressionValues = new float[nrows,expressionColIndices.Count];
            List<string[][]> categoryAnnotation = new List<string[][]>();
            foreach (int t in catColIndices){
                categoryAnnotation.Add(new string[nrows][]);
            }
            List<double[]> numericAnnotation = new List<double[]>();
            foreach (int t in numColIndices){
                numericAnnotation.Add(new double[nrows]);
            }
            List<double[][]> multiNumericAnnotation = new List<double[][]>();
            foreach (int t in multiNumColIndices){
                multiNumericAnnotation.Add(new double[nrows][]);
            }
            List<string[]> stringAnnotation = new List<string[]>();
            foreach (int t in textColIndices){
                stringAnnotation.Add(new string[nrows]);
            }
            StreamReader reader = new StreamReader(filename);
            reader.ReadLine();
            int count = 0;
            string line;
            while ((line = reader.ReadLine()) != null){
                progress((100*(count + 1))/nrows);
                if (TabSep.IsCommentLine(line, commentPrefix, commentPrefixExceptions)){
                    continue;
                }
                string[] w = line.Split('\t');
                for (int i = 0; i < expressionColIndices.Count; i++){
                    if (expressionColIndices[i] >= w.Length){
                        expressionValues[count, i] = float.NaN;
                    } else{
                        string s = StringUtils.RemoveWhitespace(w[expressionColIndices[i]]);
                        bool success = float.TryParse(s, out expressionValues[count, i]);
                        if (!success){
                            expressionValues[count, i] = float.NaN;
                        }
                    }
                }
                for (int i = 0; i < multiNumColIndices.Count; i++){
                    if (multiNumColIndices[i] >= w.Length){
                        multiNumericAnnotation[i][count] = new double[0];
                    } else{
                        string q = w[multiNumColIndices[i]].Trim();
                        if (q.Length >= 2 && q[0] == '\"' && q[q.Length - 1] == '\"'){
                            q = q.Substring(1, q.Length - 2);
                        }
                        if (q.Length >= 2 && q[0] == '\'' && q[q.Length - 1] == '\''){
                            q = q.Substring(1, q.Length - 2);
                        }
                        string[] ww = q.Length == 0 ? new string[0] : q.Split(';');
                        multiNumericAnnotation[i][count] = new double[ww.Length];
                        for (int j = 0; j < ww.Length; j++){
                            double q1;
                            bool success = double.TryParse(ww[j], out q1);
                            multiNumericAnnotation[i][count][j] = success ? q1 : double.NaN;
                        }
                    }
                }
                for (int i = 0; i < catColIndices.Count; i++){
                    if (catColIndices[i] >= w.Length){
                        categoryAnnotation[i][count] = new string[0];
                    } else{
                        string q = w[catColIndices[i]].Trim();
                        if (q.Length >= 2 && q[0] == '\"' && q[q.Length - 1] == '\"'){
                            q = q.Substring(1, q.Length - 2);
                        }
                        if (q.Length >= 2 && q[0] == '\'' && q[q.Length - 1] == '\''){
                            q = q.Substring(1, q.Length - 2);
                        }
                        string[] ww = q.Length == 0 ? new string[0] : q.Split(';');
                        Array.Sort(ww);
                        categoryAnnotation[i][count] = ww;
                    }
                }
                for (int i = 0; i < numColIndices.Count; i++){
                    if (numColIndices[i] >= w.Length){
                        numericAnnotation[i][count] = double.NaN;
                    } else{
                        double q;
                        bool success = double.TryParse(w[numColIndices[i]].Trim(), out q);
                        numericAnnotation[i][count] = success ? q : double.NaN;
                    }
                }
                for (int i = 0; i < textColIndices.Count; i++){
                    if (textColIndices[i] >= w.Length){
                        stringAnnotation[i][count] = "";
                    } else{
                        string q = w[textColIndices[i]].Trim();
                        stringAnnotation[i][count] = RemoveSplitWhitespace(RemoveQuotes(q));
                    }
                }
                count++;
            }
            reader.Close();
            string[] columnNames = ArrayUtils.SubArray(colNames, expressionColIndices);
            string[] catColnames = ArrayUtils.SubArray(colNames, catColIndices);
            string[] numColnames = ArrayUtils.SubArray(colNames, numColIndices);
            string[] multiNumColnames = ArrayUtils.SubArray(colNames, multiNumColIndices);
            string[] textColnames = ArrayUtils.SubArray(colNames, textColIndices);
            matrixData.SetData(filename, RemoveQuotes(columnNames), expressionValues, RemoveQuotes(textColnames),
                stringAnnotation, RemoveQuotes(catColnames), categoryAnnotation, RemoveQuotes(numColnames), numericAnnotation,
                RemoveQuotes(multiNumColnames), multiNumericAnnotation);
            if (colDescriptions != null){
                string[] columnDesc = ArrayUtils.SubArray(colDescriptions, expressionColIndices);
                string[] catColDesc = ArrayUtils.SubArray(colDescriptions, catColIndices);
                string[] numColDesc = ArrayUtils.SubArray(colDescriptions, numColIndices);
                string[] multiNumColDesc = ArrayUtils.SubArray(colDescriptions, multiNumColIndices);
                string[] textColDesc = ArrayUtils.SubArray(colDescriptions, textColIndices);
                matrixData.ExpressionColumnDescriptions = new List<string>(columnDesc);
                matrixData.NumericColumnDescriptions = new List<string>(numColDesc);
                matrixData.CategoryColumnDescriptions = new List<string>(catColDesc);
                matrixData.StringColumnDescriptions = new List<string>(textColDesc);
                matrixData.MultiNumericColumnDescriptions = new List<string>(multiNumColDesc);
            }
            foreach (string key in ArrayUtils.GetKeys(catAnnotatRows)){
                string name = key;
                string[] svals = ArrayUtils.SubArray(catAnnotatRows[key], expressionColIndices);
                string[][] cat = new string[svals.Length][];
                for (int i = 0; i < cat.Length; i++){
                    string s = svals[i].Trim();
                    cat[i] = s.Length > 0 ? s.Split(';') : new string[0];
                }
                matrixData.AddCategoryRow(name, name, cat);
            }
            foreach (string key in ArrayUtils.GetKeys(numAnnotatRows)){
                string name = key;
                string[] svals = ArrayUtils.SubArray(numAnnotatRows[key], expressionColIndices);
                double[] num = new double[svals.Length];
                for (int i = 0; i < num.Length; i++){
                    string s = svals[i].Trim();
                    num[i] = double.NaN;
                    double.TryParse(s, out num[i]);
                }
                matrixData.AddNumericRow(name, name, num);
            }
            matrixData.Origin = filename;
            status("");
        }