Esempio n. 1
0
        public void LoadData(IMatrixData mdata, Parameters parameters, ref IMatrixData[] supplTables,
                             ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            string       filename = parameters.GetParam <string>("File").Value;
            BinaryReader reader   = FileUtils.GetBinaryReader(filename);

            byte[] x = reader.ReadBytes((int)reader.BaseStream.Length);
            reader.Close();
            const int     nb        = 16;
            List <string> hexLines  = new List <string>();
            List <string> charLines = new List <string>();

            for (int i = 0; i < x.Length / nb; i++)
            {
                byte[] y = ArrayUtils.SubArray(x, i * nb, (i + 1) * nb);
                hexLines.Add(ToHex(y));
                charLines.Add(ToChar(y));
            }
            if (x.Length / nb > 0)
            {
                byte[] y = ArrayUtils.SubArray(x, x.Length / nb * nb, x.Length);
                hexLines.Add(ToHex(y));
                charLines.Add(ToChar(y));
            }
            mdata.Values.Init(hexLines.Count, 0);
            mdata.SetAnnotationColumns(new List <string>(new[] { "Hex", "Char" }), new List <string>(new[] { "Hex", "Char" }),
                                       new List <string[]>(new[] { hexLines.ToArray(), charLines.ToArray() }), new List <string>(), new List <string>(),
                                       new List <string[][]>(), new List <string>(), new List <string>(), new List <double[]>(), new List <string>(),
                                       new List <string>(), new List <double[][]>());
        }
Esempio n. 2
0
        //gene enrichment analysis for both Basic PECA and PECA-N
        public static IMatrixData GetGOEnr(IMatrixData mdata, string workingDir, int option)//, out string errString)
        {
            char separator = '\t';

            string filename = Path.Combine(workingDir, @".\Goterms.txt");

            IMatrixData mNew = (IMatrixData)mdata.CreateNewInstance();

            string name = "GSA";

            if (option == 0)
            {
                name = name + "_Degradation";
            }
            else if (option == 1)
            {
                name = name + "_Synthesis";
            }

            mNew.Clear();
            mNew.Name    = name;
            mNew.AltName = name;

            //update
            //mNew.AltName = "Gene Set Enrichment Analysis";
            //mNew.Description = "Gene Set Enrichment Analysis";


            string[] colNames = TabSep.GetColumnNames(filename, 0, PerseusUtils.commentPrefix,
                                                      PerseusUtils.commentPrefixExceptions, null, separator);

            string[][] cols = TabSep.GetColumns(colNames, filename, 0, PerseusUtils.commentPrefix,
                                                PerseusUtils.commentPrefixExceptions, separator);

            int nrows = TabSep.GetRowCount(filename);

            mNew.Values.Init(nrows, 0);

            mNew.SetAnnotationColumns(new List <string>(colNames), new List <string>(colNames), new List <string[]>(cols), new List <string>(),
                                      new List <string>(), new List <string[][]>(), new List <string>(), new List <string>(), new List <double[]>(),
                                      new List <string>(), new List <string>(), new List <double[][]>());


            //convert the ones not matching regex to numeric
            string     pattern     = @"^((?!id|name|members).)*$";
            Regex      numericReg  = new Regex(pattern);
            List <int> numericList = new List <int>();

            for (int i = 0; i < colNames.Length; i++)
            {
                if (numericReg.Match(colNames[i]).Success)
                {
                    numericList.Add(i);
                }
            }
            StringToNumerical(numericList, mNew);
            return(mNew);
        }
Esempio n. 3
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            int           nameCol = param.GetParam <int>("New column names").Value;
            List <string> colNames;

            if (nameCol >= 0)
            {
                HashSet <string> taken = new HashSet <string>();
                colNames = new List <string>();
                foreach (string n in mdata.StringColumns[nameCol])
                {
                    string n1 = StringUtils.GetNextAvailableName(n, taken);
                    taken.Add(n1);
                    colNames.Add(n1);
                }
            }
            else
            {
                colNames = new List <string>();
                for (int i = 0; i < mdata.RowCount; i++)
                {
                    colNames.Add("Column" + (i + 1));
                }
            }
            List <string> rowNames = mdata.ColumnNames;

            mdata.Values = mdata.Values.Transpose();
            if (mdata.IsImputed != null)
            {
                mdata.IsImputed = mdata.IsImputed.Transpose();
            }
            if (mdata.Quality != null)
            {
                mdata.Quality = mdata.Quality.Transpose();
            }
            List <string>     stringColumnNames              = mdata.StringColumnNames;
            List <string>     categoryColumnNames            = mdata.CategoryColumnNames;
            List <string>     numericColumnNames             = mdata.NumericColumnNames;
            List <string>     multiNumericColumnNames        = mdata.MultiNumericColumnNames;
            List <string>     stringColumnDescriptions       = mdata.StringColumnDescriptions;
            List <string>     categoryColumnDescriptions     = mdata.CategoryColumnDescriptions;
            List <string>     numericColumnDescriptions      = mdata.NumericColumnDescriptions;
            List <string>     multiNumericColumnDescriptions = mdata.MultiNumericColumnDescriptions;
            List <string[]>   stringColumns       = mdata.StringColumns;
            List <string[][]> categoryColumns     = GetCategoryColumns(mdata);
            List <double[]>   numericColumns      = mdata.NumericColumns;
            List <double[][]> multiNumericColumns = mdata.MultiNumericColumns;

            mdata.SetAnnotationColumns(new List <string>(new[] { "Name" }), new List <string>(new[] { "Name" }),
                                       new List <string[]>(new[] { rowNames.ToArray() }), mdata.CategoryRowNames, mdata.CategoryRowDescriptions,
                                       GetCategoryRows(mdata), mdata.NumericRowNames, mdata.NumericRowDescriptions, mdata.NumericRows, new List <string>(),
                                       new List <string>(), new List <double[][]>());
            mdata.ColumnNames = colNames;
            mdata.SetAnnotationRows(stringColumnNames, stringColumnDescriptions, stringColumns, categoryColumnNames,
                                    categoryColumnDescriptions, categoryColumns, numericColumnNames, numericColumnDescriptions, numericColumns,
                                    multiNumericColumnNames, multiNumericColumnDescriptions, multiNumericColumns);
        }
        private static void CreateMatrixData(CountingResult result, IMatrixData data, int minCount, IEnumerable selection)
        {
            List <string[]> type        = new List <string[]>();
            List <string[]> name        = new List <string[]>();
            List <double>   count       = new List <double>();
            List <double>   percOfTotal = new List <double>();
            List <double>   selCount    = new List <double>();
            List <double>   selPerc     = new List <double>();

            for (int i = 0; i < result.Count; i++)
            {
                int c = result.GetTotalCountAt(i);
                if (c < minCount)
                {
                    continue;
                }
                type.Add(new[] { result.GetType1At(i) });
                name.Add(new[] { result.GetName1At(i) });
                count.Add(c);
                percOfTotal.Add(Math.Round(10000.0 * c / data.RowCount) / 100.0);
                if (selection != null)
                {
                    int c1 = result.GetSelectCountAt(i);
                    selCount.Add(c1);
                    selPerc.Add(Math.Round(1000.0 * c1 / c) / 10.0);
                }
            }
            double[,] ex = new double[type.Count, 0];
            List <string[][]> catCols = new List <string[][]> {
                type.ToArray(), name.ToArray()
            };
            List <string>   catColNames = new List <string>(new[] { "Type", "Name" });
            List <double[]> numCols     = new List <double[]> {
                count.ToArray(), percOfTotal.ToArray()
            };

            if (selection != null)
            {
                numCols.Add(selCount.ToArray());
                numCols.Add(selPerc.ToArray());
            }
            List <string> numColNames = new List <string>(new[] { "Count", "Percentage of total" });

            if (selection != null)
            {
                numColNames.AddRange(new[] { "Selection count", "Selection percentage" });
            }
            data.Clear();
            data.Name        = "Count";
            data.ColumnNames = new List <string>();
            data.Values.Set(ex);
            data.SetAnnotationColumns(new List <string>(), new List <string[]>(), catColNames, catCols, numColNames, numCols,
                                      new List <string>(), new List <double[][]>());
        }
Esempio n. 5
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            int[]         cols = param.GetParam <int[]>("Columns").Value;
            HashSet <int> w    = ArrayUtils.ToHashSet(param.GetParam <int[]>("Calculate").Value);

            bool[]     include = new bool[SummaryStatisticsRows.procs.Length];
            double[][] rowws   = new double[SummaryStatisticsRows.procs.Length][];
            for (int i = 0; i < include.Length; i++)
            {
                include[i] = w.Contains(i);
                if (include[i])
                {
                    rowws[i] = new double[cols.Length];
                }
            }
            for (int i = 0; i < cols.Length; i++)
            {
                double[] vals = GetColumn(cols[i], mdata);
                for (int j = 0; j < include.Length; j++)
                {
                    if (include[j])
                    {
                        rowws[j][i] = SummaryStatisticsRows.procs[j].Item2(vals);
                    }
                }
            }
            List <double[]> ex    = new List <double[]>();
            List <string>   names = new List <string>();

            for (int i = 0; i < include.Length; i++)
            {
                if (include[i])
                {
                    ex.Add(rowws[i]);
                    names.Add(SummaryStatisticsRows.procs[i].Item1);
                }
            }
            double[,] exVals = GetExVals(ex);
            string[] colNames              = GetColNames(mdata, cols);
            var      categoryRowNames      = mdata.CategoryRowNames;
            var      transformedCategories = TransformCategories(mdata, cols, mdata.ColumnCount);
            var      numericRowNames       = mdata.NumericRowNames;
            var      transformedNumeric    = TransformNumeric(mdata.NumericRows, cols, mdata.ColumnCount);

            mdata.Clear();
            mdata.Name               = "Summary";
            mdata.ColumnNames        = new List <string>(names.ToArray());
            mdata.ColumnDescriptions = new List <string>(names.ToArray());
            mdata.Values.Set(exVals);
            mdata.SetAnnotationColumns(new List <string>(new[] { "Columns" }), new List <string[]>(new[] { colNames }),
                                       categoryRowNames, transformedCategories, numericRowNames, transformedNumeric, new List <string>(), new List <double[][]>());
        }
Esempio n. 6
0
        private static void LoadSplit(IMatrixData mdata, string filename, bool csv)
        {
            char separator = csv ? ',' : '\t';

            string[] colNames = TabSep.GetColumnNames(filename, 0, PerseusUtils.commentPrefix,
                                                      PerseusUtils.commentPrefixExceptions, null, separator);
            string[][] cols = TabSep.GetColumns(colNames, filename, 0, PerseusUtils.commentPrefix,
                                                PerseusUtils.commentPrefixExceptions, separator);
            int nrows = TabSep.GetRowCount(filename);

            mdata.Values.Init(nrows, 0);
            mdata.SetAnnotationColumns(new List <string>(colNames), new List <string>(colNames), new List <string[]>(cols), new List <string>(),
                                       new List <string>(), new List <string[][]>(), new List <string>(), new List <string>(), new List <double[]>(),
                                       new List <string>(), new List <string>(), new List <double[][]>());
            mdata.Origin = filename;
        }
Esempio n. 7
0
        private static void LoadNoSplit(IMatrixData mdata, string filename)
        {
            List <string> lines  = new List <string>();
            StreamReader  reader = FileUtils.GetReader(filename);
            string        line;

            while ((line = reader.ReadLine()) != null)
            {
                lines.Add(line);
            }
            reader.Close();
            mdata.Values.Init(lines.Count, 0);
            mdata.SetAnnotationColumns(new List <string>(new[] { "All data" }), new List <string>(new[] { "Complete file in one text column." }),
                                       new List <string[]>(new[] { lines.ToArray() }), new List <string>(), new List <string>(), new List <string[][]>(),
                                       new List <string>(), new List <string>(), new List <double[]>(), new List <string>(), new List <string>(),
                                       new List <double[][]>());
            mdata.Origin = filename;
        }
Esempio n. 8
0
        //this function is modified from PerseusPluginLib/Load/UnstructuredTxtUpload.cs LoadSplit function
        //obtains the output from fdr.exe (so only applicable to PECA CORE and N)
        public static void GetOutput(IMatrixData mdata, Parameters param, Parameters dataParam, string filename, string geneName, string expSeries1 = "Expression Series 1", int numOfSeries = 2)
        {
            char separator = '\t';

            //gene name column name is not included in the file so need to replace it

            //gene name
            ReplaceFirstLine(filename, geneName);


            string[] colNames = TabSep.GetColumnNames(filename, 0, PerseusUtils.commentPrefix,
                                                      PerseusUtils.commentPrefixExceptions, null, separator);

            string[][] cols = TabSep.GetColumns(colNames, filename, 0, PerseusUtils.commentPrefix,
                                                PerseusUtils.commentPrefixExceptions, separator);
            int nrows = TabSep.GetRowCount(filename);



            string[] expressionColumnsNames = ArrayUtils.Concat(mdata.ColumnNames, mdata.NumericColumnNames);


            mdata.Clear();
            mdata.Name = "PECA Analysis";
            mdata.Values.Init(nrows, 0);
            mdata.SetAnnotationColumns(new List <string>(colNames), new List <string>(colNames), new List <string[]>(cols), new List <string>(),
                                       new List <string>(), new List <string[][]>(), new List <string>(), new List <string>(), new List <double[]>(),
                                       new List <string>(), new List <string>(), new List <double[][]>());

            //be careful with changes of Number of time points in the future
            int numOfExpCols = numOfSeries * dataParam.GetParam <int[]>(expSeries1).Value.Length;

            //file format is structured so that expressions columns are before numeric ones
            //so convert the numeric ones before expression columns

            //first column guaranteed to be the name column
            int[] expList     = Enumerable.Range(1, numOfExpCols).ToArray();
            int[] numericList = Enumerable.Range(numOfExpCols + 1, colNames.Count() - numOfExpCols - 1).ToArray();

            StringToNumerical(numericList, mdata);
            StringToExpression(expList, mdata);
        }
        public void ProcessData(IMatrixData data, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            bool   falseAreIndicated = param.GetParam <int>("Indicated are").Value == 0;
            int    catCol            = param.GetParam <int>("In column").Value;
            string word = param.GetParam <string>("Indicator").Value;

            int[] scoreColumns = param.GetParam <int[]>("Scores").Value;
            if (scoreColumns.Length == 0)
            {
                processInfo.ErrString = "Please specify at least one column with scores.";
                return;
            }
            bool largeIsGood = param.GetParam <bool>("Large values are good").Value;

            int[] showColumns = param.GetParam <int[]>("Display quantity").Value;
            if (showColumns.Length == 0)
            {
                processInfo.ErrString = "Please select at least one quantity to display";
                return;
            }
            bool[]         indCol      = GetIndicatorColumn(falseAreIndicated, catCol, word, data);
            List <string>  expColNames = new List <string>();
            List <float[]> expCols     = new List <float[]>();

            foreach (int scoreColumn in scoreColumns)
            {
                double[] vals = scoreColumn < data.NumericColumnCount
                                        ? data.NumericColumns[scoreColumn]
                                        : ArrayUtils.ToDoubles(data.Values.GetColumn(scoreColumn - data.NumericColumnCount));
                string name = scoreColumn < data.NumericColumnCount
                                        ? data.NumericColumnNames[scoreColumn] : data.ColumnNames[scoreColumn - data.NumericColumnCount];
                int[] order = GetOrder(vals, largeIsGood);
                CalcCurve(ArrayUtils.SubArray(indCol, order), showColumns, name, expCols, expColNames);
            }
            float[,] expData = ToMatrix(expCols);
            data.ColumnNames = expColNames;
            data.Values.Set(expData);
            data.SetAnnotationColumns(new List <string>(), new List <string[]>(), new List <string>(),
                                      new List <string[][]>(), new List <string>(), new List <double[]>(), new List <string>(), new List <double[][]>());
        }
Esempio n. 10
0
        public void LoadData(IMatrixData matrixData, Parameters parameters, ref IMatrixData[] supplTables,
                             ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            int             ind            = parameters.GetParam <int>("Organism").Value;
            string          filename       = GetOrganismFiles()[ind];
            List <string>   stringColnames = new List <string>(TabSep.GetColumnNames(filename, '\t'));
            List <string[]> stringCols     = new List <string[]>();

            foreach (string t in stringColnames)
            {
                string[] col = TabSep.GetColumn(t, filename, '\t');
                stringCols.Add(col);
            }
            matrixData.Name        = "Gene list";
            matrixData.ColumnNames = new List <string>();
            matrixData.Values.Init(stringCols[0].Length, 0);
            matrixData.SetAnnotationColumns(stringColnames, stringCols,
                                            new List <string>(), new List <string[][]>(), new List <string>(), new List <double[]>(), new List <string>(),
                                            new List <double[][]>());
            matrixData.Origin = "Gene list";
        }
        /// <summary>
        /// An auxiliary method for testing the action of regular expressions.
        /// Limited to a single column, which should be sufficient for this purpose.
        /// Multiple rows are allowed to test the effect of one regex on several strings.
        /// </summary>
        private static void TestRegex(string regexStr, string[] stringsInit, string[] stringsExpect)
        {
            const string name = "Test";

            IMatrixData[]   supplTables       = null;
            IDocumentData[] documents         = null;
            List <string>   stringColumnNames = new List <string> {
                "Column Name"
            };
            List <string[]> stringColumnsInit = new List <string[]> {
                stringsInit
            };
            List <string[]> stringColumnsExpect = new List <string[]> {
                stringsExpect
            };
            Parameters param =
                new Parameters(new Parameter[] {
                new MultiChoiceParam("Columns", new[] { 0 })
                {
                    Values = stringColumnNames
                },
                new StringParam("Regular expression", regexStr), new BoolParam("Keep original columns", false),
                new BoolParam("Strings separated by semicolons are independent", false)
            });
            IMatrixData mdata = PerseusFactory.CreateNewMatrixData();

            mdata.Clear();
            mdata.Name = name;
            mdata.SetAnnotationColumns(stringColumnNames, stringColumnsInit, mdata.CategoryColumnNames, new List <string[][]>(),
                                       mdata.NumericColumnNames, mdata.NumericColumns, mdata.MultiNumericColumnNames, mdata.MultiNumericColumns);
            var ptc = new ProcessTextColumns();

            ptc.ProcessData(mdata, param, ref supplTables, ref documents, null);
            const bool ignoreCase = false;

            for (int rowInd = 0; rowInd < stringColumnsInit[0].Length; rowInd++)
            {
                Assert.AreEqual(mdata.StringColumns[0][rowInd], stringColumnsExpect[0][rowInd], ignoreCase);
            }
        }
Esempio n. 12
0
        /// <summary>
        /// An auxiliary method for testing the action of regular expressions.
        /// Limited to a single column, which should be sufficient for this purpose.
        /// Multiple rows are allowed to test the effect of one regex on several strings.
        /// </summary>
        private static void TestRegex(string regexStr, string[] stringsInit, string[] stringsExpect)
        {
            const string name = "Test";

            IMatrixData[]   supplTables       = null;
            IDocumentData[] documents         = null;
            List <string>   stringColumnNames = new List <string> {
                "Column Name"
            };
            List <string[]> stringColumnsInit = new List <string[]> {
                stringsInit
            };
            List <string[]> stringColumnsExpect = new List <string[]> {
                stringsExpect
            };
            ProcessTextColumns ptc   = new ProcessTextColumns();
            IMatrixData        mdata = PerseusFactory.CreateMatrixData();

            mdata.Clear();
            mdata.Name = name;
            mdata.SetAnnotationColumns(stringColumnNames, stringColumnsInit, mdata.CategoryColumnNames, new List <string[][]>(),
                                       mdata.NumericColumnNames, mdata.NumericColumns, mdata.MultiNumericColumnNames, mdata.MultiNumericColumns);
            string     errorStr = string.Empty;
            Parameters param    = ptc.GetParameters(mdata, ref errorStr);

            param.GetParam <int[]>("Columns").Value              = new[] { 0 };
            param.GetParam <string>("Regular expression").Value  = regexStr;
            param.GetParam <bool>("Keep original columns").Value = false;
            param.GetParam <bool>("Strings separated by semicolons are independent").Value = false;
            ptc.ProcessData(mdata, param, ref supplTables, ref documents, null);
            for (int rowInd = 0; rowInd < stringColumnsInit[0].Length; rowInd++)
            {
                string expected = mdata.StringColumns[0][rowInd];
                string actual   = stringColumnsExpect[0][rowInd];
                StringAssert.AreEqualIgnoringCase(expected, actual);
            }
        }
Esempio n. 13
0
        private static void LoadMatrixData(IList<string> colNames, IList<string> colDescriptions, IList<int> mainColIndices,
			IList<int> catColIndices, IList<int> numColIndices, IList<int> textColIndices, IList<int> multiNumColIndices,
			string origin, IMatrixData matrixData, IDictionary<string, string[]> annotationRows, Action<int> progress,
			Action<string> status, char separator, TextReader reader, StreamReader auxReader, int nrows,
			bool shortenExpressionNames, List<Tuple<Relation[], int[], bool>> filters)
        {
            Dictionary<string, string[]> catAnnotatRows;
            Dictionary<string, string[]> numAnnotatRows;
            status("Reading data");
            SplitAnnotRows(annotationRows, out catAnnotatRows, out numAnnotatRows);
            List<string[][]> categoryAnnotation = new List<string[][]>();
            for (int i = 0; i < catColIndices.Count; i++){
                categoryAnnotation.Add(new string[nrows][]);
            }
            List<double[]> numericAnnotation = new List<double[]>();
            for (int i = 0; i < numColIndices.Count; i++){
                numericAnnotation.Add(new double[nrows]);
            }
            List<double[][]> multiNumericAnnotation = new List<double[][]>();
            for (int i = 0; i < multiNumColIndices.Count; i++){
                multiNumericAnnotation.Add(new double[nrows][]);
            }
            List<string[]> stringAnnotation = new List<string[]>();
            for (int i = 0; i < textColIndices.Count; i++){
                stringAnnotation.Add(new string[nrows]);
            }
            float[,] mainValues = new float[nrows, mainColIndices.Count];
            float[,] qualityValues = null;
            bool[,] isImputedValues = null;
            bool hasAddtlMatrices = auxReader != null && GetHasAddtlMatrices(auxReader, mainColIndices, separator);
            if (hasAddtlMatrices){
                qualityValues = new float[nrows, mainColIndices.Count];
                isImputedValues = new bool[nrows, mainColIndices.Count];
            }
            reader.ReadLine();
            int count = 0;
            string line;
            while ((line = reader.ReadLine()) != null){
                progress(100*(count + 1)/nrows);
                if (TabSep.IsCommentLine(line, commentPrefix, commentPrefixExceptions)){
                    continue;
                }
                string[] w;
                if (!IsValidLine(line, separator, filters, out w, hasAddtlMatrices)){
                    continue;
                }
                for (int i = 0; i < mainColIndices.Count; i++){
                    if (mainColIndices[i] >= w.Length){
                        mainValues[count, i] = float.NaN;
                    } else{
                        string s = StringUtils.RemoveWhitespace(w[mainColIndices[i]]);
                        if (hasAddtlMatrices){
                            ParseExp(s, out mainValues[count, i], out isImputedValues[count, i], out qualityValues[count, i]);
                        } else{
                            if (count < mainValues.GetLength(0)){
                                bool success = float.TryParse(s, out mainValues[count, i]);
                                if (!success){
                                    mainValues[count, i] = float.NaN;
                                }
                            }
                        }
                    }
                }
                for (int i = 0; i < numColIndices.Count; i++){
                    if (numColIndices[i] >= w.Length){
                        numericAnnotation[i][count] = double.NaN;
                    } else{
                        double q;
                        bool success = double.TryParse(w[numColIndices[i]].Trim(), out q);
                        if (numericAnnotation[i].Length > count){
                            numericAnnotation[i][count] = success ? q : double.NaN;
                        }
                    }
                }
                for (int i = 0; i < multiNumColIndices.Count; i++){
                    if (multiNumColIndices[i] >= w.Length){
                        multiNumericAnnotation[i][count] = new double[0];
                    } else{
                        string q = w[multiNumColIndices[i]].Trim();
                        if (q.Length >= 2 && q[0] == '\"' && q[q.Length - 1] == '\"'){
                            q = q.Substring(1, q.Length - 2);
                        }
                        if (q.Length >= 2 && q[0] == '\'' && q[q.Length - 1] == '\''){
                            q = q.Substring(1, q.Length - 2);
                        }
                        string[] ww = q.Length == 0 ? new string[0] : q.Split(';');
                        multiNumericAnnotation[i][count] = new double[ww.Length];
                        for (int j = 0; j < ww.Length; j++){
                            double q1;
                            bool success = double.TryParse(ww[j], out q1);
                            multiNumericAnnotation[i][count][j] = success ? q1 : double.NaN;
                        }
                    }
                }
                for (int i = 0; i < catColIndices.Count; i++){
                    if (catColIndices[i] >= w.Length){
                        categoryAnnotation[i][count] = new string[0];
                    } else{
                        string q = w[catColIndices[i]].Trim();
                        if (q.Length >= 2 && q[0] == '\"' && q[q.Length - 1] == '\"'){
                            q = q.Substring(1, q.Length - 2);
                        }
                        if (q.Length >= 2 && q[0] == '\'' && q[q.Length - 1] == '\''){
                            q = q.Substring(1, q.Length - 2);
                        }
                        string[] ww = q.Length == 0 ? new string[0] : q.Split(';');
                        List<int> valids = new List<int>();
                        for (int j = 0; j < ww.Length; j++){
                            ww[j] = ww[j].Trim();
                            if (ww[j].Length > 0){
                                valids.Add(j);
                            }
                        }
                        ww = ArrayUtils.SubArray(ww, valids);
                        Array.Sort(ww);
                        if (categoryAnnotation[i].Length > count){
                            categoryAnnotation[i][count] = ww;
                        }
                    }
                }
                for (int i = 0; i < textColIndices.Count; i++){
                    if (textColIndices[i] >= w.Length){
                        stringAnnotation[i][count] = "";
                    } else{
                        string q = w[textColIndices[i]].Trim();
                        if (stringAnnotation[i].Length > count){
                            stringAnnotation[i][count] = RemoveSplitWhitespace(RemoveQuotes(q));
                        }
                    }
                }
                count++;
            }
            reader.Close();
            string[] columnNames = ArrayUtils.SubArray(colNames, mainColIndices);
            if (shortenExpressionNames){
                columnNames = StringUtils.RemoveCommonSubstrings(columnNames, true);
            }
            string[] catColnames = ArrayUtils.SubArray(colNames, catColIndices);
            string[] numColnames = ArrayUtils.SubArray(colNames, numColIndices);
            string[] multiNumColnames = ArrayUtils.SubArray(colNames, multiNumColIndices);
            string[] textColnames = ArrayUtils.SubArray(colNames, textColIndices);
            matrixData.Name = origin;
            matrixData.ColumnNames = RemoveQuotes(columnNames);
            matrixData.Values.Set(mainValues);
            if (hasAddtlMatrices){
                matrixData.Quality.Set(qualityValues);
                matrixData.IsImputed.Set(isImputedValues);
            } else{
                matrixData.Quality.Set(new float[mainValues.GetLength(0), mainValues.GetLength(1)]);
                matrixData.IsImputed.Set(new bool[mainValues.GetLength(0), mainValues.GetLength(1)]);
            }
            matrixData.SetAnnotationColumns(RemoveQuotes(textColnames), stringAnnotation, RemoveQuotes(catColnames),
                categoryAnnotation, RemoveQuotes(numColnames), numericAnnotation, RemoveQuotes(multiNumColnames),
                multiNumericAnnotation);
            if (colDescriptions != null){
                string[] columnDesc = ArrayUtils.SubArray(colDescriptions, mainColIndices);
                string[] catColDesc = ArrayUtils.SubArray(colDescriptions, catColIndices);
                string[] numColDesc = ArrayUtils.SubArray(colDescriptions, numColIndices);
                string[] multiNumColDesc = ArrayUtils.SubArray(colDescriptions, multiNumColIndices);
                string[] textColDesc = ArrayUtils.SubArray(colDescriptions, textColIndices);
                matrixData.ColumnDescriptions = new List<string>(columnDesc);
                matrixData.NumericColumnDescriptions = new List<string>(numColDesc);
                matrixData.CategoryColumnDescriptions = new List<string>(catColDesc);
                matrixData.StringColumnDescriptions = new List<string>(textColDesc);
                matrixData.MultiNumericColumnDescriptions = new List<string>(multiNumColDesc);
            }
            foreach (string key in catAnnotatRows.Keys){
                string name = key;
                string[] svals = ArrayUtils.SubArray(catAnnotatRows[key], mainColIndices);
                string[][] cat = new string[svals.Length][];
                for (int i = 0; i < cat.Length; i++){
                    string s = svals[i].Trim();
                    cat[i] = s.Length > 0 ? s.Split(';') : new string[0];
                    List<int> valids = new List<int>();
                    for (int j = 0; j < cat[i].Length; j++){
                        cat[i][j] = cat[i][j].Trim();
                        if (cat[i][j].Length > 0){
                            valids.Add(j);
                        }
                    }
                    cat[i] = ArrayUtils.SubArray(cat[i], valids);
                    Array.Sort(cat[i]);
                }
                matrixData.AddCategoryRow(name, name, cat);
            }
            foreach (string key in numAnnotatRows.Keys){
                string name = key;
                string[] svals = ArrayUtils.SubArray(numAnnotatRows[key], mainColIndices);
                double[] num = new double[svals.Length];
                for (int i = 0; i < num.Length; i++){
                    string s = svals[i].Trim();
                    num[i] = double.NaN;
                    double.TryParse(s, out num[i]);
                }
                matrixData.AddNumericRow(name, name, num);
            }
            matrixData.Origin = origin;
            progress(0);
            status("");
        }
Esempio n. 14
0
        private static void LoadMatrixData(IList <string> colNames, IList <string> colDescriptions, IList <int> mainColIndices,
                                           IList <int> catColIndices, IList <int> numColIndices, IList <int> textColIndices, IList <int> multiNumColIndices,
                                           string origin, IMatrixData matrixData, IDictionary <string, string[]> annotationRows, Action <int> progress,
                                           Action <string> status, char separator, TextReader reader, StreamReader auxReader, int nrows,
                                           bool shortenExpressionNames, List <Tuple <Relation[], int[], bool> > filters)
        {
            Dictionary <string, string[]> catAnnotatRows;
            Dictionary <string, string[]> numAnnotatRows;

            status("Reading data");
            SplitAnnotRows(annotationRows, out catAnnotatRows, out numAnnotatRows);
            List <string[][]> categoryAnnotation = new List <string[][]>();

            for (int i = 0; i < catColIndices.Count; i++)
            {
                categoryAnnotation.Add(new string[nrows][]);
            }
            List <double[]> numericAnnotation = new List <double[]>();

            for (int i = 0; i < numColIndices.Count; i++)
            {
                numericAnnotation.Add(new double[nrows]);
            }
            List <double[][]> multiNumericAnnotation = new List <double[][]>();

            for (int i = 0; i < multiNumColIndices.Count; i++)
            {
                multiNumericAnnotation.Add(new double[nrows][]);
            }
            List <string[]> stringAnnotation = new List <string[]>();

            for (int i = 0; i < textColIndices.Count; i++)
            {
                stringAnnotation.Add(new string[nrows]);
            }
            float[,] mainValues     = new float[nrows, mainColIndices.Count];
            float[,] qualityValues  = null;
            bool[,] isImputedValues = null;
            bool hasAddtlMatrices = auxReader != null && GetHasAddtlMatrices(auxReader, mainColIndices, separator);

            if (hasAddtlMatrices)
            {
                qualityValues   = new float[nrows, mainColIndices.Count];
                isImputedValues = new bool[nrows, mainColIndices.Count];
            }
            reader.ReadLine();
            int    count = 0;
            string line;

            while ((line = reader.ReadLine()) != null)
            {
                progress(100 * (count + 1) / nrows);
                if (TabSep.IsCommentLine(line, commentPrefix, commentPrefixExceptions))
                {
                    continue;
                }
                string[] w;
                if (!IsValidLine(line, separator, filters, out w, hasAddtlMatrices))
                {
                    continue;
                }
                for (int i = 0; i < mainColIndices.Count; i++)
                {
                    if (mainColIndices[i] >= w.Length)
                    {
                        mainValues[count, i] = float.NaN;
                    }
                    else
                    {
                        string s = StringUtils.RemoveWhitespace(w[mainColIndices[i]]);
                        if (hasAddtlMatrices)
                        {
                            ParseExp(s, out mainValues[count, i], out isImputedValues[count, i], out qualityValues[count, i]);
                        }
                        else
                        {
                            if (count < mainValues.GetLength(0))
                            {
                                bool success = float.TryParse(s, out mainValues[count, i]);
                                if (!success)
                                {
                                    mainValues[count, i] = float.NaN;
                                }
                            }
                        }
                    }
                }
                for (int i = 0; i < numColIndices.Count; i++)
                {
                    if (numColIndices[i] >= w.Length)
                    {
                        numericAnnotation[i][count] = double.NaN;
                    }
                    else
                    {
                        double q;
                        bool   success = double.TryParse(w[numColIndices[i]].Trim(), out q);
                        if (numericAnnotation[i].Length > count)
                        {
                            numericAnnotation[i][count] = success ? q : double.NaN;
                        }
                    }
                }
                for (int i = 0; i < multiNumColIndices.Count; i++)
                {
                    if (multiNumColIndices[i] >= w.Length)
                    {
                        multiNumericAnnotation[i][count] = new double[0];
                    }
                    else
                    {
                        string q = w[multiNumColIndices[i]].Trim();
                        if (q.Length >= 2 && q[0] == '\"' && q[q.Length - 1] == '\"')
                        {
                            q = q.Substring(1, q.Length - 2);
                        }
                        if (q.Length >= 2 && q[0] == '\'' && q[q.Length - 1] == '\'')
                        {
                            q = q.Substring(1, q.Length - 2);
                        }
                        string[] ww = q.Length == 0 ? new string[0] : q.Split(';');
                        multiNumericAnnotation[i][count] = new double[ww.Length];
                        for (int j = 0; j < ww.Length; j++)
                        {
                            double q1;
                            bool   success = double.TryParse(ww[j], out q1);
                            multiNumericAnnotation[i][count][j] = success ? q1 : double.NaN;
                        }
                    }
                }
                for (int i = 0; i < catColIndices.Count; i++)
                {
                    if (catColIndices[i] >= w.Length)
                    {
                        categoryAnnotation[i][count] = new string[0];
                    }
                    else
                    {
                        string q = w[catColIndices[i]].Trim();
                        if (q.Length >= 2 && q[0] == '\"' && q[q.Length - 1] == '\"')
                        {
                            q = q.Substring(1, q.Length - 2);
                        }
                        if (q.Length >= 2 && q[0] == '\'' && q[q.Length - 1] == '\'')
                        {
                            q = q.Substring(1, q.Length - 2);
                        }
                        string[]   ww     = q.Length == 0 ? new string[0] : q.Split(';');
                        List <int> valids = new List <int>();
                        for (int j = 0; j < ww.Length; j++)
                        {
                            ww[j] = ww[j].Trim();
                            if (ww[j].Length > 0)
                            {
                                valids.Add(j);
                            }
                        }
                        ww = ArrayUtils.SubArray(ww, valids);
                        Array.Sort(ww);
                        if (categoryAnnotation[i].Length > count)
                        {
                            categoryAnnotation[i][count] = ww;
                        }
                    }
                }
                for (int i = 0; i < textColIndices.Count; i++)
                {
                    if (textColIndices[i] >= w.Length)
                    {
                        stringAnnotation[i][count] = "";
                    }
                    else
                    {
                        string q = w[textColIndices[i]].Trim();
                        if (stringAnnotation[i].Length > count)
                        {
                            stringAnnotation[i][count] = RemoveSplitWhitespace(RemoveQuotes(q));
                        }
                    }
                }
                count++;
            }
            reader.Close();
            string[] columnNames = ArrayUtils.SubArray(colNames, mainColIndices);
            if (shortenExpressionNames)
            {
                columnNames = StringUtils.RemoveCommonSubstrings(columnNames, true);
            }
            string[] catColnames      = ArrayUtils.SubArray(colNames, catColIndices);
            string[] numColnames      = ArrayUtils.SubArray(colNames, numColIndices);
            string[] multiNumColnames = ArrayUtils.SubArray(colNames, multiNumColIndices);
            string[] textColnames     = ArrayUtils.SubArray(colNames, textColIndices);
            matrixData.Name        = origin;
            matrixData.ColumnNames = RemoveQuotes(columnNames);
            matrixData.Values.Set(mainValues);
            if (hasAddtlMatrices)
            {
                matrixData.Quality.Set(qualityValues);
                matrixData.IsImputed.Set(isImputedValues);
            }
            else
            {
                matrixData.Quality.Set(new float[mainValues.GetLength(0), mainValues.GetLength(1)]);
                matrixData.IsImputed.Set(new bool[mainValues.GetLength(0), mainValues.GetLength(1)]);
            }
            matrixData.SetAnnotationColumns(RemoveQuotes(textColnames), stringAnnotation, RemoveQuotes(catColnames),
                                            categoryAnnotation, RemoveQuotes(numColnames), numericAnnotation, RemoveQuotes(multiNumColnames),
                                            multiNumericAnnotation);
            if (colDescriptions != null)
            {
                string[] columnDesc      = ArrayUtils.SubArray(colDescriptions, mainColIndices);
                string[] catColDesc      = ArrayUtils.SubArray(colDescriptions, catColIndices);
                string[] numColDesc      = ArrayUtils.SubArray(colDescriptions, numColIndices);
                string[] multiNumColDesc = ArrayUtils.SubArray(colDescriptions, multiNumColIndices);
                string[] textColDesc     = ArrayUtils.SubArray(colDescriptions, textColIndices);
                matrixData.ColumnDescriptions             = new List <string>(columnDesc);
                matrixData.NumericColumnDescriptions      = new List <string>(numColDesc);
                matrixData.CategoryColumnDescriptions     = new List <string>(catColDesc);
                matrixData.StringColumnDescriptions       = new List <string>(textColDesc);
                matrixData.MultiNumericColumnDescriptions = new List <string>(multiNumColDesc);
            }
            foreach (string key in catAnnotatRows.Keys)
            {
                string     name  = key;
                string[]   svals = ArrayUtils.SubArray(catAnnotatRows[key], mainColIndices);
                string[][] cat   = new string[svals.Length][];
                for (int i = 0; i < cat.Length; i++)
                {
                    string s = svals[i].Trim();
                    cat[i] = s.Length > 0 ? s.Split(';') : new string[0];
                    List <int> valids = new List <int>();
                    for (int j = 0; j < cat[i].Length; j++)
                    {
                        cat[i][j] = cat[i][j].Trim();
                        if (cat[i][j].Length > 0)
                        {
                            valids.Add(j);
                        }
                    }
                    cat[i] = ArrayUtils.SubArray(cat[i], valids);
                    Array.Sort(cat[i]);
                }
                matrixData.AddCategoryRow(name, name, cat);
            }
            foreach (string key in numAnnotatRows.Keys)
            {
                string   name  = key;
                string[] svals = ArrayUtils.SubArray(numAnnotatRows[key], mainColIndices);
                double[] num   = new double[svals.Length];
                for (int i = 0; i < num.Length; i++)
                {
                    string s = svals[i].Trim();
                    num[i] = double.NaN;
                    double.TryParse(s, out num[i]);
                }
                matrixData.AddNumericRow(name, name, num);
            }
            matrixData.Origin = origin;
            progress(0);
            status("");
        }
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            int[]       catIndices = param.GetParam <int[]>("Categories").Value;
            int         minSize    = param.GetParam <int>("Min. size").Value;
            int         summaryInd = param.GetParam <int>("Average type").Value;
            SummaryType type;

            switch (summaryInd)
            {
            case 0:
                type = SummaryType.Median;
                break;

            case 1:
                type = SummaryType.Mean;
                break;

            case 2:
                type = SummaryType.Sum;
                break;

            case 3:
                type = SummaryType.StandardDeviation;
                break;

            default:
                throw new Exception("Never get here.");
            }
            List <double[]> exVals = new List <double[]>();

            List <string[]>[] stringAnnot = new List <string[]> [mdata.StringColumnCount];
            for (int i = 0; i < stringAnnot.Length; i++)
            {
                stringAnnot[i] = new List <string[]>();
            }
            List <string> catNames = new List <string>();

            foreach (string[][] cat in catIndices.Select(mdata.GetCategoryColumnAt))
            {
                foreach (string[] t in cat)
                {
                    Array.Sort(t);
                }
                string[] allVals = ArrayUtils.UniqueValues(ArrayUtils.Concat(cat));
                foreach (string val in allVals)
                {
                    int[] inds = GetIndices(cat, val);
                    if (inds.Length < minSize)
                    {
                        continue;
                    }
                    double[] expProfile = new double[mdata.ColumnCount];
                    for (int i = 0; i < expProfile.Length; i++)
                    {
                        List <double> vals = new List <double>();
                        foreach (int ind in inds)
                        {
                            double v = mdata.Values.Get(ind, i);
                            if (!double.IsNaN(v) && !double.IsInfinity(v))
                            {
                                vals.Add(v);
                            }
                        }
                        expProfile[i] = vals.Count > 0 ? Calc(vals, type) : double.NaN;
                    }
                    int prevInd = LookupPreviousInd(exVals, expProfile);
                    if (prevInd == -1)
                    {
                        catNames.Add(val);
                        exVals.Add(expProfile);
                        for (int i = 0; i < stringAnnot.Length; i++)
                        {
                            List <string> vals = new List <string>();
                            foreach (int ind in inds)
                            {
                                string v = mdata.StringColumns[i][ind];
                                if (v.Length > 0)
                                {
                                    string[] m1 = v.Split(';');
                                    vals.AddRange(m1);
                                }
                            }
                            string[] q = vals.ToArray();
                            stringAnnot[i].Add(q);
                        }
                    }
                    else
                    {
                        catNames[prevInd] = StringUtils.Concat(";",
                                                               ArrayUtils.UniqueValues(ArrayUtils.Concat(catNames[prevInd].Split(';'), new[] { val })));
                        for (int i = 0; i < stringAnnot.Length; i++)
                        {
                            List <string> vals = new List <string>();
                            foreach (int ind in inds)
                            {
                                string v = mdata.StringColumns[i][ind];
                                if (v.Length > 0)
                                {
                                    string[] m1 = v.Split(';');
                                    vals.AddRange(m1);
                                }
                            }
                            string[] q = vals.ToArray();
                            stringAnnot[i].Add(q);
                        }
                    }
                }
            }
            List <string>   stringColumnNames = new List <string>(new[] { "Category" });
            List <string[]> stringAnn         = new List <string[]> {
                catNames.ToArray()
            };
            List <string>     catColumnNames = mdata.StringColumnNames;
            List <string[][]> catAnn         = new List <string[][]>();

            foreach (string[][] w in stringAnnot.Select(t => t.ToArray()))
            {
                foreach (string[] t1 in w)
                {
                    Array.Sort(t1);
                }
                catAnn.Add(w);
            }
            double[,] expressionValues = new double[exVals.Count, exVals[0].Length];
            for (int i = 0; i < expressionValues.GetLength(0); i++)
            {
                for (int j = 0; j < expressionValues.GetLength(1); j++)
                {
                    expressionValues[i, j] = exVals[i][j];
                }
            }
            mdata.Values.Set(expressionValues);
            mdata.SetAnnotationColumns(stringColumnNames, stringAnn, catColumnNames, catAnn, new List <string>(),
                                       new List <double[]>(), new List <string>(), new List <double[][]>());
        }
Esempio n. 16
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            List <string> expNames    = mdata.ColumnNames;
            string        errorString = null;

            int[,] colInds = SortNumberedNames(expNames, mdata.ColumnDescriptions, maxInd, ref errorString,
                                               out string[] allSuffixes, out string[] allPrefixes, out List <string> allDescriptions);
            if (errorString != null)
            {
                processInfo.ErrString = errorString;
                return;
            }
            int[] normalIndices = ArrayUtils.Complement(To1DArray(colInds), expNames.Count);
            normalIndices = FilterExpressionColIndices(normalIndices, mdata.ColumnNames, allPrefixes);
            int[] validNumCols = GetValidNumCols(mdata.NumericColumnNames, allPrefixes);
            int   nrows        = mdata.RowCount * allSuffixes.Length;
            int   ncols        = normalIndices.Length + allPrefixes.Length;

            double[,] data    = new double[nrows, ncols];
            double[,] quality = new double[nrows, ncols];
            bool[,] imputed   = new bool[nrows, ncols];
            List <double[]> numCols = new List <double[]>();

            for (int i = 0; i < validNumCols.Length; i++)
            {
                numCols.Add(new double[nrows]);
            }
            List <string[]> stringCols = new List <string[]>();

            for (int i = 0; i < mdata.StringColumnCount + 1; i++)
            {
                stringCols.Add(new string[nrows]);
            }
            List <string[][]> catCols = new List <string[][]>();

            for (int i = 0; i < mdata.CategoryColumnCount + 1; i++)
            {
                catCols.Add(new string[nrows][]);
            }
            List <double[][]> multiNumCols = new List <double[][]>();

            for (int i = 0; i < mdata.MultiNumericColumnCount; i++)
            {
                multiNumCols.Add(new double[nrows][]);
            }
            List <string> expColNames        = new List <string>();
            List <string> expColDescriptions = new List <string>();

            foreach (int t in normalIndices)
            {
                expColNames.Add(expNames[t]);
                expColDescriptions.Add(mdata.ColumnDescriptions[t]);
            }
            foreach (Tuple <string, string> t in allPrefixes.Zip(allDescriptions, Tuple.Create))
            {
                expColNames.Add(t.Item1);
                expColDescriptions.Add(t.Item2);
            }
            int count = 0;

            for (int i = 0; i < allSuffixes.Length; i++)
            {
                for (int j = 0; j < mdata.RowCount; j++)
                {
                    count++;
                    int rowInd = i * mdata.RowCount + j;
                    for (int k = 0; k < normalIndices.Length; k++)
                    {
                        data[rowInd, k]    = mdata.Values.Get(j, normalIndices[k]);
                        quality[rowInd, k] = mdata.Quality.Get(j, normalIndices[k]);
                        imputed[rowInd, k] = mdata.IsImputed[j, normalIndices[k]];
                    }
                    for (int k = 0; k < allPrefixes.Length; k++)
                    {
                        data[rowInd, normalIndices.Length + k]    = mdata.Values.Get(j, colInds[k, i]);
                        quality[rowInd, normalIndices.Length + k] = mdata.Quality.Get(j, colInds[k, i]);
                        imputed[rowInd, normalIndices.Length + k] = mdata.IsImputed[j, colInds[k, i]];
                    }
                    for (int k = 0; k < validNumCols.Length; k++)
                    {
                        numCols[k][rowInd] = mdata.NumericColumns[validNumCols[k]][j];
                    }
                    for (int k = 0; k < mdata.StringColumnCount; k++)
                    {
                        stringCols[k][rowInd] = mdata.StringColumns[k][j];
                    }
                    for (int k = 0; k < mdata.CategoryColumnCount; k++)
                    {
                        catCols[k][rowInd] = mdata.GetCategoryColumnEntryAt(k, j);
                    }
                    for (int k = 0; k < mdata.MultiNumericColumnCount; k++)
                    {
                        multiNumCols[k][rowInd] = mdata.MultiNumericColumns[k][j];
                    }
                    catCols[mdata.CategoryColumnCount][rowInd]  = new[] { allSuffixes[i] };
                    stringCols[stringCols.Count - 1][count - 1] = "UID" + count;
                }
            }
            string[] catColNames = ArrayUtils.Concat(mdata.CategoryColumnNames, new[] { "Multiplicity" });
            mdata.ColumnNames        = expColNames;
            mdata.ColumnDescriptions = expColDescriptions;
            mdata.Values.Set(data);
            mdata.Quality.Set(quality);
            mdata.IsImputed.Set(imputed);
            mdata.SetAnnotationColumns(new List <string>(ArrayUtils.Concat(mdata.StringColumnNames, new[] { "Unique identifier" })),
                                       stringCols, new List <string>(catColNames), catCols, ArrayUtils.SubList(mdata.NumericColumnNames, validNumCols),
                                       numCols, mdata.MultiNumericColumnNames, multiNumCols);
        }
        public void ProcessData(IMatrixData mdata, Parameters param1, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            int[] multiNumCols = param1.GetParam <int[]>("Multi-numeric columns").Value;
            Array.Sort(multiNumCols);
            int[] stringCols = param1.GetParam <int[]>("Text columns").Value;
            Array.Sort(stringCols);
            HashSet <int> multinumCols2 = new HashSet <int>(multiNumCols);
            HashSet <int> stringCols2   = new HashSet <int>(stringCols);

            if (multiNumCols.Length + stringCols.Length == 0)
            {
                processInfo.ErrString = "Please select some columns.";
                return;
            }
            int rowCount = GetNewRowCount(mdata, multiNumCols, stringCols);

            float[,] expVals = new float[rowCount, mdata.ColumnCount];
            List <string[]> stringC = new List <string[]>();

            for (int i = 0; i < mdata.StringColumnCount; i++)
            {
                stringC.Add(new string[rowCount]);
            }
            List <double[]> numC = new List <double[]>();

            for (int i = 0; i < mdata.NumericColumnCount; i++)
            {
                numC.Add(new double[rowCount]);
            }
            List <string[][]> catC = new List <string[][]>();

            for (int i = 0; i < mdata.CategoryColumnCount; i++)
            {
                catC.Add(new string[rowCount][]);
            }
            List <double[][]> multiNumC = new List <double[][]>();

            for (int i = 0; i < mdata.MultiNumericColumnCount; i++)
            {
                multiNumC.Add(new double[rowCount][]);
            }
            int count = 0;

            for (int i = 0; i < mdata.RowCount; i++)
            {
                string err;
                int    entryCount = GetEntryCount(i, mdata, multiNumCols, stringCols, out err);
                if (err != null)
                {
                    processInfo.ErrString = err;
                    return;
                }
                bool empty = entryCount == 0;
                entryCount = Math.Max(entryCount, 1);
                for (int j = 0; j < entryCount; j++)
                {
                    for (int k = 0; k < mdata.ColumnCount; k++)
                    {
                        expVals[count + j, k] = mdata.Values[i, k];
                    }
                    for (int k = 0; k < mdata.NumericColumnCount; k++)
                    {
                        numC[k][count + j] = mdata.NumericColumns[k][i];
                    }
                    for (int k = 0; k < mdata.CategoryColumnCount; k++)
                    {
                        catC[k][count + j] = mdata.GetCategoryColumnEntryAt(k, i);
                    }
                }
                for (int k = 0; k < mdata.MultiNumericColumnCount; k++)
                {
                    if (multinumCols2.Contains(k))
                    {
                        if (empty)
                        {
                            multiNumC[k][count] = new double[0];
                        }
                        else
                        {
                            double[] vals = mdata.MultiNumericColumns[k][i];
                            for (int j = 0; j < entryCount; j++)
                            {
                                multiNumC[k][count + j] = new[] { vals[j] };
                            }
                        }
                    }
                    else
                    {
                        for (int j = 0; j < entryCount; j++)
                        {
                            multiNumC[k][count + j] = mdata.MultiNumericColumns[k][i];
                        }
                    }
                }
                for (int k = 0; k < mdata.StringColumnCount; k++)
                {
                    if (stringCols2.Contains(k))
                    {
                        if (empty)
                        {
                            stringC[k][count] = "";
                        }
                        else
                        {
                            string[] vals = mdata.StringColumns[k][i].Split(';');
                            for (int j = 0; j < entryCount; j++)
                            {
                                stringC[k][count + j] = vals[j];
                            }
                        }
                    }
                    else
                    {
                        for (int j = 0; j < entryCount; j++)
                        {
                            stringC[k][count + j] = mdata.StringColumns[k][i];
                        }
                    }
                }
                count += entryCount;
            }
            int[]             multiNumComplement = ArrayUtils.Complement(multiNumCols, mdata.MultiNumericColumnCount);
            List <double[][]> toBeTransformed    = ArrayUtils.SubList(multiNumC, multiNumCols);

            multiNumC = ArrayUtils.SubList(multiNumC, multiNumComplement);
            foreach (double[][] d in toBeTransformed)
            {
                numC.Add(Transform(d));
            }
            mdata.ColumnNames = mdata.ColumnNames;
            mdata.Values.Set(expVals);
            mdata.SetAnnotationColumns(mdata.StringColumnNames, stringC, mdata.CategoryColumnNames, catC,
                                       new List <string>(ArrayUtils.Concat(mdata.NumericColumnNames,
                                                                           ArrayUtils.SubList(mdata.MultiNumericColumnNames, multiNumCols))), numC,
                                       new List <string>(ArrayUtils.SubArray(mdata.MultiNumericColumnNames, multiNumComplement)), multiNumC);
        }
        public void ProcessData(IMatrixData mdata, Parameters param1, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            int[] multiNumCols = param1.GetParam<int[]>("Multi-numeric columns").Value;
            Array.Sort(multiNumCols);
            int[] stringCols = param1.GetParam<int[]>("Text columns").Value;
            Array.Sort(stringCols);
            HashSet<int> multinumCols2 = new HashSet<int>(multiNumCols);
            HashSet<int> stringCols2 = new HashSet<int>(stringCols);
            if (multiNumCols.Length + stringCols.Length == 0){
                processInfo.ErrString = "Please select some columns.";
                return;
            }
            int rowCount = GetNewRowCount(mdata, multiNumCols, stringCols);
            float[,] expVals = new float[rowCount, mdata.ColumnCount];
            List<string[]> stringC = new List<string[]>();
            for (int i = 0; i < mdata.StringColumnCount; i++){
                stringC.Add(new string[rowCount]);
            }
            List<double[]> numC = new List<double[]>();
            for (int i = 0; i < mdata.NumericColumnCount; i++){
                numC.Add(new double[rowCount]);
            }
            List<string[][]> catC = new List<string[][]>();
            for (int i = 0; i < mdata.CategoryColumnCount; i++){
                catC.Add(new string[rowCount][]);
            }
            List<double[][]> multiNumC = new List<double[][]>();
            for (int i = 0; i < mdata.MultiNumericColumnCount; i++){
                multiNumC.Add(new double[rowCount][]);
            }
            int count = 0;
            for (int i = 0; i < mdata.RowCount; i++){
                string err;
                int entryCount = GetEntryCount(i, mdata, multiNumCols, stringCols, out err);
                if (err != null){
                    processInfo.ErrString = err;
                    return;
                }
                bool empty = entryCount == 0;
                entryCount = Math.Max(entryCount, 1);
                for (int j = 0; j < entryCount; j++){
                    for (int k = 0; k < mdata.ColumnCount; k++){
                        expVals[count + j, k] = mdata.Values.Get(i, k);
                    }
                    for (int k = 0; k < mdata.NumericColumnCount; k++){
                        numC[k][count + j] = mdata.NumericColumns[k][i];
                    }
                    for (int k = 0; k < mdata.CategoryColumnCount; k++){
                        catC[k][count + j] = mdata.GetCategoryColumnEntryAt(k, i);
                    }
                }
                for (int k = 0; k < mdata.MultiNumericColumnCount; k++){
                    if (multinumCols2.Contains(k)){
                        if (empty){
                            multiNumC[k][count] = new double[0];
                        } else{
                            double[] vals = mdata.MultiNumericColumns[k][i];
                            for (int j = 0; j < entryCount; j++){
                                multiNumC[k][count + j] = new[]{vals[j]};
                            }
                        }
                    } else{
                        for (int j = 0; j < entryCount; j++){
                            multiNumC[k][count + j] = mdata.MultiNumericColumns[k][i];
                        }
                    }
                }
                for (int k = 0; k < mdata.StringColumnCount; k++){
                    if (stringCols2.Contains(k)){
                        if (empty){
                            stringC[k][count] = "";
                        } else{
                            string[] vals = mdata.StringColumns[k][i].Split(';');
                            for (int j = 0; j < entryCount; j++){
                                stringC[k][count + j] = vals[j];
                            }
                        }
                    } else{
                        for (int j = 0; j < entryCount; j++){
                            stringC[k][count + j] = mdata.StringColumns[k][i];
                        }
                    }
                }
                count += entryCount;
            }
            int[] multiNumComplement = ArrayUtils.Complement(multiNumCols, mdata.MultiNumericColumnCount);
            List<double[][]> toBeTransformed = ArrayUtils.SubList(multiNumC, multiNumCols);
            multiNumC = ArrayUtils.SubList(multiNumC, multiNumComplement);
            foreach (double[][] d in toBeTransformed){
                numC.Add(Transform(d));
            }
            mdata.ColumnNames = mdata.ColumnNames;
            mdata.Values.Set(expVals);
            mdata.SetAnnotationColumns(mdata.StringColumnNames, stringC, mdata.CategoryColumnNames, catC,
                new List<string>(ArrayUtils.Concat(mdata.NumericColumnNames,
                    ArrayUtils.SubList(mdata.MultiNumericColumnNames, multiNumCols))), numC,
                new List<string>(ArrayUtils.SubArray(mdata.MultiNumericColumnNames, multiNumComplement)), multiNumC);
        }
Esempio n. 19
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            int nameCol = param.GetParam<int>("New column names").Value;
            List<string> colNames;
            if (nameCol >= 0){
                HashSet<string> taken = new HashSet<string>();
                colNames = new List<string>();
                foreach (string n in mdata.StringColumns[nameCol]){
                    string n1 = StringUtils.GetNextAvailableName(n, taken);
                    taken.Add(n1);
                    colNames.Add(n1);
                }
            } else{
                colNames = new List<string>();
                for (int i = 0; i < mdata.RowCount; i++){
                    colNames.Add("Column" + (i + 1));
                }
            }
            List<string> rowNames = mdata.ColumnNames;
            mdata.Values = mdata.Values.Transpose();
            if (mdata.IsImputed != null){
                mdata.IsImputed = mdata.IsImputed.Transpose();
            }
            if (mdata.Quality != null){
                mdata.Quality = mdata.Quality.Transpose();
            }
            List<string> stringColumnNames = mdata.StringColumnNames;
            List<string> categoryColumnNames = mdata.CategoryColumnNames;
            List<string> numericColumnNames = mdata.NumericColumnNames;
            List<string> multiNumericColumnNames = mdata.MultiNumericColumnNames;
            List<string> stringColumnDescriptions = mdata.StringColumnDescriptions;
            List<string> categoryColumnDescriptions = mdata.CategoryColumnDescriptions;
            List<string> numericColumnDescriptions = mdata.NumericColumnDescriptions;
            List<string> multiNumericColumnDescriptions = mdata.MultiNumericColumnDescriptions;
            List<string[]> stringColumns = mdata.StringColumns;
            List<string[][]> categoryColumns = GetCategoryColumns(mdata);
            List<double[]> numericColumns = mdata.NumericColumns;
            List<double[][]> multiNumericColumns = mdata.MultiNumericColumns;
            mdata.SetAnnotationColumns(new List<string>(new[]{"Name"}), new List<string>(new[]{"Name"}),
                new List<string[]>(new[]{rowNames.ToArray()}), mdata.CategoryRowNames, mdata.CategoryRowDescriptions,
                GetCategoryRows(mdata), mdata.NumericRowNames, mdata.NumericRowDescriptions, mdata.NumericRows, new List<string>(),
                new List<string>(), new List<double[][]>());
            mdata.ColumnNames = colNames;
            mdata.SetAnnotationRows(stringColumnNames, stringColumnDescriptions, stringColumns, categoryColumnNames,
                categoryColumnDescriptions, categoryColumns, numericColumnNames, numericColumnDescriptions, numericColumns,
                multiNumericColumnNames, multiNumericColumnDescriptions, multiNumericColumns);
        }
Esempio n. 20
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            int[] outputColumns      = param.GetParam <int[]>("Output").Value;
            int   proteinIdColumnInd = param.GetParam <int>("Protein IDs").Value;

            string[] proteinIds    = mdata.StringColumns[proteinIdColumnInd];
            int[]    intensityCols = param.GetParam <int[]>("Intensities").Value;
            if (intensityCols.Length == 0)
            {
                processInfo.ErrString = "Please select at least one column containing protein intensities.";
                return;
            }
            // variable to hold all intensity values
            List <double[]> columns = new List <double[]>();

            string[] inputNames  = new string[intensityCols.Length];
            string[] sampleNames = new string[intensityCols.Length];
            for (int col = 0; col < intensityCols.Length; col++)
            {
                double[] values;
                if (intensityCols[col] < mdata.ColumnCount)
                {
                    values          = ArrayUtils.ToDoubles(mdata.Values.GetColumn(intensityCols[col]));
                    inputNames[col] = mdata.ColumnNames[intensityCols[col]];
                }
                else
                {
                    values          = mdata.NumericColumns[intensityCols[col] - mdata.ColumnCount];
                    inputNames[col] = mdata.NumericColumnNames[intensityCols[col] - mdata.ColumnCount];
                }
                sampleNames[col] = new Regex(@"^(?:(?:LFQ )?[Ii]ntensity )?(.*)$").Match(inputNames[col]).Groups[1].Value;
                columns.Add(values);
            }
            // average over columns if this option is selected
            if (param.GetParamWithSubParams <int>("Averaging mode").Value == 3)
            {
                double[] column = new double[mdata.RowCount];
                for (int row = 0; row < mdata.RowCount; row++)
                {
                    double[] values = new double[intensityCols.Length];
                    for (int col = 0; col < intensityCols.Length; col++)
                    {
                        values[col] = columns[col][row];
                    }
                    column[row] = ArrayUtils.Median(ExtractValidValues(values, false));
                }
                // delete the original list of columns
                columns = new List <double[]> {
                    column
                };
                sampleNames = new[] { "" };
            }
            // revert logarithm if necessary
            if (param.GetParamWithSubParams <bool>("Logarithmized").Value)
            {
                double[] logBases = new[] { 2, Math.E, 10 };
                double   logBase  =
                    logBases[param.GetParamWithSubParams <bool>("Logarithmized").GetSubParameters().GetParam <int>("log base").Value];
                foreach (double[] t in columns)
                {
                    for (int row = 0; row < mdata.RowCount; row++)
                    {
                        if (t[row] == 0)
                        {
                            processInfo.ErrString = "Are the columns really logarithmized?\nThey contain zeroes!";
                        }
                        t[row] = Math.Pow(logBase, t[row]);
                    }
                }
            }
            double[] mw = mdata.NumericColumns[param.GetParam <int>("Molecular masses").Value];
            // define whether the molecular masses are given in Da or kDa
            if (ArrayUtils.Median(mw) < 250)             // most likely kDa
            {
                for (int i = 0; i < mw.Length; i++)
                {
                    mw[i] *= 1000;
                }
            }
            double[] detectabilityNormFactor = mw;
            if (param.GetParamWithSubParams <bool>("Detectability correction").Value)
            {
                detectabilityNormFactor =
                    mdata.NumericColumns[
                        param.GetParamWithSubParams <bool>("Detectability correction").GetSubParameters().GetParam <int>("Correction factor")
                        .Value];
            }
            // the normalization factor needs to be nonzero for all proteins
            // check and replace with 1 for all relevant cases
            for (int row = 0; row < mdata.RowCount; row++)
            {
                if (detectabilityNormFactor[row] == 0 || double.IsNaN(detectabilityNormFactor[row]))
                {
                    detectabilityNormFactor[row] = 1;
                }
            }
            // detect the organism
            Organism organism = DetectOrganism(proteinIds);
            // c value the amount of DNA per haploid genome, see: http://en.wikipedia.org/wiki/C-value
            double cValue = organism.genomeSize * basePairWeight / avogadro;

            // find the histones
            int[] histoneRows = FindHistones(proteinIds, organism);
            // write a categorical column indicating the histones
            string[][] histoneCol = new string[mdata.RowCount][];
            for (int row = 0; row < mdata.RowCount; row++)
            {
                histoneCol[row] = ArrayUtils.Contains(histoneRows, row) ? new[] { "+" } : new string[0];
            }
            mdata.AddCategoryColumn("Histones", "", histoneCol);

            // initialize the variables for the annotation rows
            string[]   sampleNameRow     = new string[mdata.ColumnCount];
            string[]   inputNameRow      = new string[mdata.ColumnCount];
            double[]   totalProteinRow   = new double[mdata.ColumnCount];
            double[]   totalMoleculesRow = new double[mdata.ColumnCount];
            string[][] organismRow       = new string[mdata.ColumnCount][];
            // populate the organismRow variable with empty strings as defaults (not null, which may cause errors when writing the annotations in the end.)
            for (int i = 0; i < organismRow.Length; i++)
            {
                organismRow[i] = new[] { "N/A" };
            }
            double[] histoneMassRow       = new double[mdata.ColumnCount];
            double[] ploidyRow            = new double[mdata.ColumnCount];
            double[] cellVolumeRow        = new double[mdata.ColumnCount];
            double[] normalizationFactors = new double[columns.Count];
            // calculate normalization factors for each column
            for (int col = 0; col < columns.Count; col++)
            {
                string   sampleName = sampleNames[col];
                double[] column     = columns[col];
                // normalization factor to go from intensities to copies,
                // needs to be determined either using the total protein or the histone scaling approach
                double factor;
                switch (param.GetParamWithSubParams <int>("Scaling mode").Value)
                {
                case 0:                         // total protein amount
                    double mwWeightedNormalizedSummedIntensities = 0;
                    for (int row = 0; row < mdata.RowCount; row++)
                    {
                        if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row]))
                        {
                            mwWeightedNormalizedSummedIntensities += column[row] / detectabilityNormFactor[row] * mw[row];
                        }
                    }
                    factor =
                        param.GetParamWithSubParams <int>("Scaling mode").GetSubParameters().GetParam <double>(
                            "Protein amount per cell [pg]").Value *1e-12 * avogadro / mwWeightedNormalizedSummedIntensities;
                    break;

                case 1:                         // histone mode
                    double mwWeightedNormalizedSummedHistoneIntensities = 0;
                    foreach (int row in histoneRows)
                    {
                        if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row]))
                        {
                            mwWeightedNormalizedSummedHistoneIntensities += column[row] / detectabilityNormFactor[row] * mw[row];
                        }
                    }
                    double ploidy =
                        param.GetParamWithSubParams <int>("Scaling mode").GetSubParameters().GetParam <double>("Ploidy").Value;
                    factor = cValue * ploidy * avogadro / mwWeightedNormalizedSummedHistoneIntensities;
                    break;

                default:
                    factor = 1;
                    break;
                }
                normalizationFactors[col] = factor;
            }
            // check averaging mode
            if (param.GetParamWithSubParams <int>("Averaging mode").Value == 1)            // same factor for all
            {
                double factor = ArrayUtils.Mean(normalizationFactors);
                for (int i = 0; i < normalizationFactors.Length; i++)
                {
                    normalizationFactors[i] = factor;
                }
            }
            if (param.GetParamWithSubParams <int>("Averaging mode").Value == 2)            // same factor in each group
            {
                if (param.GetParamWithSubParams <int>("Averaging mode").GetSubParameters().GetParam <int>("Grouping").Value == -1)
                {
                    processInfo.ErrString = "No grouping selected.";
                    return;
                }
                string[][] groupNames =
                    mdata.GetCategoryRowAt(
                        param.GetParamWithSubParams <int>("Averaging mode").GetSubParameters().GetParam <int>("Grouping").Value);
                string[] uniqueGroupNames = Unique(groupNames);
                int[]    grouping         = new int[columns.Count];
                for (int i = 0; i < columns.Count; i++)
                {
                    if (intensityCols[i] >= mdata.ColumnCount)                      // Numeric annotation columns cannot be grouped
                    {
                        grouping[i] = i;
                        continue;
                    }
                    if (ArrayUtils.Contains(uniqueGroupNames, groupNames[i][0]))
                    {
                        grouping[i] = ArrayUtils.IndexOf(uniqueGroupNames, groupNames[i][0]);
                        continue;
                    }
                    grouping[i] = i;
                }
                Dictionary <int, List <double> > factors = new Dictionary <int, List <double> >();
                for (int i = 0; i < columns.Count; i++)
                {
                    if (factors.ContainsKey(grouping[i]))
                    {
                        factors[grouping[i]].Add(normalizationFactors[i]);
                    }
                    else
                    {
                        factors.Add(grouping[i], new List <double> {
                            normalizationFactors[i]
                        });
                    }
                }
                double[] averagedNormalizationFactors = new double[columns.Count];
                for (int i = 0; i < columns.Count; i++)
                {
                    List <double> factor;
                    factors.TryGetValue(grouping[i], out factor);
                    averagedNormalizationFactors[i] = ArrayUtils.Mean(factor);
                }
                normalizationFactors = averagedNormalizationFactors;
            }
            // loop over all selected columns and calculate copy numbers
            for (int col = 0; col < columns.Count; col++)
            {
                string   sampleName     = sampleNames[col];
                double[] column         = columns[col];
                double   factor         = normalizationFactors[col];
                double[] copyNumbers    = new double[mdata.RowCount];
                double[] concentrations = new double[mdata.RowCount];                 // femtoliters
                double[] massFraction   = new double[mdata.RowCount];
                double[] moleFraction   = new double[mdata.RowCount];
                double   totalProtein   = 0;            // picograms
                double   histoneMass    = 0;            // picograms
                double   totalMolecules = 0;
                for (int row = 0; row < mdata.RowCount; row++)
                {
                    if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row]))
                    {
                        copyNumbers[row] = column[row] / detectabilityNormFactor[row] * factor;
                        totalMolecules  += copyNumbers[row];
                        totalProtein    += copyNumbers[row] * mw[row] * 1e12 / avogadro;                // picograms
                        if (ArrayUtils.Contains(histoneRows, row))
                        {
                            histoneMass += copyNumbers[row] * mw[row] * 1e12 / avogadro;                       // picograms
                        }
                    }
                }
                double totalVolume = totalProtein / param.GetParam <double>("Total cellular protein concentration [g/l]").Value *
                                     1000;
                // femtoliters
                for (int row = 0; row < mdata.RowCount; row++)
                {
                    if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row]))
                    {
                        concentrations[row] = copyNumbers[row] / (totalVolume * 1e-15) / avogadro * 1e9;         // nanomolar
                        massFraction[row]   = copyNumbers[row] * mw[row] * 1e12 / avogadro / totalProtein * 1e6; // ppm
                        moleFraction[row]   = copyNumbers[row] / totalMolecules * 1e6;                           // ppm
                    }
                }
                string suffix = sampleName == "" ? "" : " " + sampleName;
                if (ArrayUtils.Contains(outputColumns, 0))
                {
                    mdata.AddNumericColumn("Copy number" + suffix, "", copyNumbers);
                }
                if (ArrayUtils.Contains(outputColumns, 1))
                {
                    mdata.AddNumericColumn("Concentration [nM]" + suffix, "", concentrations);
                }
                if (ArrayUtils.Contains(outputColumns, 2))
                {
                    mdata.AddNumericColumn("Abundance (mass/total mass) [*10^-6]" + suffix, "", massFraction);
                }
                if (ArrayUtils.Contains(outputColumns, 3))
                {
                    mdata.AddNumericColumn("Abundance (molecules/total molecules) [*10^-6]" + suffix, "", moleFraction);
                }
                double[] rank         = ArrayUtils.Rank(copyNumbers);
                double[] relativeRank = new double[mdata.RowCount];
                double   validRanks   = mdata.RowCount;
                for (int row = 0; row < mdata.RowCount; row++)
                {
                    // remove rank for protein with no copy number information
                    if (double.IsNaN(copyNumbers[row]) || copyNumbers[row] == 0)
                    {
                        rank[row] = double.NaN;
                        validRanks--;                         // do not consider as valid
                    }
                    // invert ranking, so that rank 0 is the most abundant protein
                    rank[row] = mdata.RowCount - rank[row];
                }
                for (int row = 0; row < mdata.RowCount; row++)
                {
                    relativeRank[row] = rank[row] / validRanks;
                }
                if (ArrayUtils.Contains(outputColumns, 4))
                {
                    mdata.AddNumericColumn("Copy number rank" + suffix, "", rank);
                }
                if (ArrayUtils.Contains(outputColumns, 5))
                {
                    mdata.AddNumericColumn("Relative copy number rank" + suffix, "", relativeRank);
                }
                if (intensityCols[col] < mdata.ColumnCount && param.GetParamWithSubParams <int>("Averaging mode").Value != 3)
                {
                    inputNameRow[intensityCols[col]]      = inputNames[col];
                    sampleNameRow[intensityCols[col]]     = sampleNames[col];
                    totalProteinRow[intensityCols[col]]   = Math.Round(totalProtein, 2);
                    totalMoleculesRow[intensityCols[col]] = Math.Round(totalMolecules, 0);
                    organismRow[intensityCols[col]]       = new[] { organism.name };
                    histoneMassRow[intensityCols[col]]    = Math.Round(histoneMass, 4);
                    ploidyRow[intensityCols[col]]         = Math.Round(histoneMass * 1e-12 / cValue, 2);
                    cellVolumeRow[intensityCols[col]]     = Math.Round(totalVolume, 2);                 // femtoliters
                }
            }

            // Summary annotation row
            if (param.GetParamWithSubParams <int>("Averaging mode").Value != 3 && ArrayUtils.Contains(outputColumns, 6))
            {
                mdata.AddNumericRow("Total protein [pg/cell]", "", totalProteinRow);
                mdata.AddNumericRow("Total molecules per cell", "", totalMoleculesRow);
                mdata.AddCategoryRow("Organism", "", organismRow);
                mdata.AddNumericRow("Histone mass [pg/cell]", "", histoneMassRow);
                mdata.AddNumericRow("Ploidy", "", ploidyRow);
                mdata.AddNumericRow("Cell volume [fl]", "", cellVolumeRow);
            }

            // Summary matrix
            if (param.GetParamWithSubParams <int>("Averaging mode").Value != 3 && ArrayUtils.Contains(outputColumns, 7))
            {
                supplTables = new IMatrixData[1];
                IMatrixData supplTab = PerseusFactory.CreateMatrixData();
                supplTab.ColumnNames = new List <string>();
                supplTab.Values.Init(totalProteinRow.Length, 0);
                supplTab.SetAnnotationColumns(new List <string> {
                    "Sample", "Input Column"
                },
                                              new List <string[]>()
                {
                    sampleNameRow, inputNameRow
                }, new List <string>()
                {
                    "Organism"
                },
                                              new List <string[][]>()
                {
                    organismRow
                },
                                              new List <string>()
                {
                    "Total protein [pg/cell]",
                    "Total molecules per cell",
                    "Histone mass [pg/cell]",
                    "Ploidy",
                    "Cell volume [fl]"
                },
                                              new List <double[]>()
                {
                    totalProteinRow, totalMoleculesRow, histoneMassRow, ploidyRow, cellVolumeRow
                },
                                              new List <string>(), new List <double[][]>());
                supplTables[0] = supplTab;
            }
        }
        public void ProcessData(IMatrixData data, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            bool falseAreIndicated = param.GetParam<int>("Indicated are").Value == 0;
                int catCol = param.GetParam<int>("In column").Value;
            string word = param.GetParam<string>("Indicator").Value;
            int[] scoreColumns = param.GetParam<int[]>("Scores").Value;
            if (scoreColumns.Length == 0){
                processInfo.ErrString = "Please specify at least one column with scores.";
                return;
            }
            bool largeIsGood = param.GetParam<bool>("Large values are good").Value;
            int[] showColumns = param.GetParam<int[]>("Display quantity").Value;
            if (showColumns.Length == 0){
                processInfo.ErrString = "Please select at least one quantity to display";
                return;
            }
            bool[] indCol = GetIndicatorColumn(falseAreIndicated, catCol, word, data);
            List<string> expColNames = new List<string>();
            List<float[]> expCols = new List<float[]>();
            foreach (int scoreColumn in scoreColumns){
                double[] vals = scoreColumn < data.NumericColumnCount
                    ? data.NumericColumns[scoreColumn]
                    : ArrayUtils.ToDoubles(data.Values.GetColumn(scoreColumn - data.NumericColumnCount));
                string name = scoreColumn < data.NumericColumnCount
                    ? data.NumericColumnNames[scoreColumn] : data.ColumnNames[scoreColumn - data.NumericColumnCount];
                int[] order = GetOrder(vals, largeIsGood);
                CalcCurve(ArrayUtils.SubArray(indCol, order), showColumns, name, expCols, expColNames);
            }
            float[,] expData = ToMatrix(expCols);
            data.ColumnNames = expColNames;
            data.Values.Set(expData);
            data.SetAnnotationColumns( new List<string>(), new List<string[]>(), new List<string>(),
                new List<string[][]>(), new List<string>(), new List<double[]>(), new List<string>(), new List<double[][]>());
        }
Esempio n. 22
0
        public void LoadData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents,
                             ProcessInfo processInfo)
        {
            int nrows       = param.GetParam <int>("Number of rows").Value;
            int ncols       = param.GetParam <int>("Number of columns").Value;
            int missingPerc = param.GetParam <int>("Percentage of missing values").Value;
            int ngroups     = param.GetParam <int>("Number of groups").Value;
            ParameterWithSubParams <bool> setSeed = param.GetParamWithSubParams <bool>("Set seed");
            Random2 randy = setSeed.Value? new Random2(setSeed.GetSubParameters().GetParam <int>("Seed").Value) : new Random2();

            ngroups    = Math.Min(ngroups, ncols);
            float[,] m = new float[nrows, ncols];
            ParameterWithSubParams <int> x = param.GetParamWithSubParams <int>("Mode");
            Parameters        subParams    = x.GetSubParameters();
            List <string>     catColNames  = new List <string>();
            List <string[][]> catCols      = new List <string[][]>();

            switch (x.Value)
            {
            case 0:
                for (int i = 0; i < m.GetLength(0); i++)
                {
                    for (int j = 0; j < m.GetLength(1); j++)
                    {
                        if (randy.NextDouble() * 100 < missingPerc)
                        {
                            m[i, j] = float.NaN;
                        }
                        else
                        {
                            m[i, j] = (float)randy.NextGaussian();
                        }
                    }
                }
                break;

            case 1:
                float      dist = (float)subParams.GetParam <double>("Distance").Value;
                string[][] col  = new string[m.GetLength(0)][];
                for (int i = 0; i < m.GetLength(0); i++)
                {
                    bool which = randy.NextDouble() < 0.5;
                    for (int j = 0; j < m.GetLength(1); j++)
                    {
                        if (randy.NextDouble() * 100 < missingPerc)
                        {
                            m[i, j] = float.NaN;
                        }
                        else
                        {
                            m[i, j] = (float)randy.NextGaussian();
                        }
                    }
                    if (which)
                    {
                        m[i, 0] += dist;
                        col[i]   = new[] { "Group1" };
                    }
                    else
                    {
                        col[i] = new[] { "Group2" };
                    }
                }
                catColNames.Add("Grouping");
                catCols.Add(col);
                break;

            case 2:
                double     boxLen  = subParams.GetParam <double>("Box size").Value;
                int        howMany = subParams.GetParam <int>("How many").Value;
                string[][] col1    = new string[m.GetLength(0)][];
                float[,] centers = new float[howMany, m.GetLength(1)];
                for (int i = 0; i < centers.GetLength(0); i++)
                {
                    for (int j = 0; j < centers.GetLength(1); j++)
                    {
                        centers[i, j] = (float)(randy.NextDouble() * boxLen);
                    }
                }
                for (int i = 0; i < m.GetLength(0); i++)
                {
                    int which = (int)(randy.NextDouble() * howMany);
                    for (int j = 0; j < m.GetLength(1); j++)
                    {
                        if (randy.NextDouble() * 100 < missingPerc)
                        {
                            m[i, j] = float.NaN;
                        }
                        else
                        {
                            m[i, j] = (float)randy.NextGaussian() + centers[which, j];
                        }
                    }
                    col1[i] = new[] { "Group" + (which + 1) };
                }
                catColNames.Add("Grouping");
                catCols.Add(col1);
                break;
            }
            List <string> exprColumnNames = new List <string>();

            for (int i = 0; i < ncols; i++)
            {
                exprColumnNames.Add("Column " + (i + 1));
            }
            mdata.Name        = "Random matrix";
            mdata.ColumnNames = exprColumnNames;
            mdata.Values.Set(m);
            mdata.Quality.Set(new float[m.GetLength(0), m.GetLength(1)]);
            mdata.IsImputed.Set(new bool[m.GetLength(0), m.GetLength(1)]);
            mdata.SetAnnotationColumns(new List <string>(), new List <string[]>(), catColNames, catCols, new List <string>(),
                                       new List <double[]>(), new List <string>(), new List <double[][]>());
            mdata.Origin = "Random matrix";
            string[] names = new string[mdata.RowCount];
            for (int i = 0; i < names.Length; i++)
            {
                names[i] = "Row " + (i + 1);
            }
            mdata.AddStringColumn("Name", "Name", names);
            string[][] grouping = new string[ncols][];
            for (int i = 0; i < ncols; i++)
            {
                int ig = (i * ngroups) / ncols + 1;
                grouping[i] = new[] { "Group" + ig };
            }
            mdata.AddCategoryRow("Grouping", "Grouping", grouping);
        }