Пример #1
0
        public static void ReadParams(bool isVolcano, IMatrixData mdata, Parameters param, ProcessInfo processInfo,
                                      out int groupRowInd, out int[] firstGroupInds, out int[] secondGroupInds, out SecondGroupMode secondGroupMode,
                                      out string[] groupNames, out bool logPval, out double threshold, out TestTruncation truncation,
                                      out int preserveGroupInd, out int nrand, out TwoSampleTest test, out TestSide side, out double s0,
                                      out bool filterValidValues, out int minNumValidValues, out int minNumValidValuesMode, out int minPercValidValues,
                                      out int minPercValidValuesMode, out bool calcCombinedScore, out CombinedScoreMode combinedScoreMode,
                                      out bool combinedScoreQvalue, out bool qval, out string suffix, out bool paired, out OneSampleTest test1,
                                      TwoSampleTest[] allTests)
        {
            ParameterWithSubParams <int> groupingParam = param.GetParamWithSubParams <int>("Grouping");

            groupRowInd = groupingParam.Value;
            Parameters subparam = groupingParam.GetSubParameters();

            firstGroupInds = subparam.GetParam <int[]>("First group (right)").Value;
            ParameterWithSubParams <int> p = subparam.GetParamWithSubParams <int>("Second groups mode");
            int modeInd = p.Value;

            paired = false;
            switch (modeInd)
            {
            case 0:
                secondGroupMode = SecondGroupMode.SpecifiyAll;
                secondGroupInds = p.GetSubParameters().GetParam <int[]>("Second group (left)").Value;
                paired          = p.GetSubParameters().GetParam <bool>("Paired").Value;
                break;

            case 1:
                secondGroupMode = SecondGroupMode.SingleControl;
                int ind = p.GetSubParameters().GetParam <int>("Second group (left)").Value;
                secondGroupInds = new int[firstGroupInds.Length];
                for (int i = 0; i < secondGroupInds.Length; i++)
                {
                    secondGroupInds[i] = ind;
                }
                paired = p.GetSubParameters().GetParam <bool>("Paired").Value;
                break;

            case 2:
                secondGroupMode = SecondGroupMode.Complement;
                secondGroupInds = new int[0];
                break;

            default: throw new Exception("Never get here.");
            }
            groupNames       = null;
            logPval          = false;
            suffix           = "";
            threshold        = 0;
            truncation       = TestTruncation.PermutationBased;
            preserveGroupInd = 0;
            nrand            = 0;
            test             = null;
            side             = TestSide.Both;
            s0 = 0;
            filterValidValues      = false;
            minNumValidValues      = 0;
            minNumValidValuesMode  = 0;
            minPercValidValues     = 0;
            minPercValidValuesMode = 0;
            calcCombinedScore      = false;
            combinedScoreMode      = CombinedScoreMode.Product;
            combinedScoreQvalue    = false;
            test1 = null;
            qval  = false;
            if (secondGroupMode != SecondGroupMode.Complement && firstGroupInds.Length != secondGroupInds.Length)
            {
                processInfo.ErrString = "Please specify the same number of groups in the 'First group' and 'Second group' boxes.";
                return;
            }
            if (firstGroupInds.Length == 0)
            {
                processInfo.ErrString = "Please specify some groups.";
                return;
            }
            if (secondGroupMode != SecondGroupMode.Complement)
            {
                for (int i = 0; i < firstGroupInds.Length; i++)
                {
                    if (firstGroupInds[i] == secondGroupInds[i])
                    {
                        processInfo.ErrString = "Groups to be compared in the test cannot be equal.";
                        return;
                    }
                }
            }
            groupNames = ArrayUtils.UniqueValuesPreserveOrder(ArrayUtils.Concat(mdata.GetCategoryRowAt(groupRowInd)));
            logPval    = param.GetParam <bool>("-Log10 p-value").Value;
            suffix     = param.GetParam <string>("Suffix").Value;
            ParameterWithSubParams <int> truncParam = param.GetParamWithSubParams <int>("Use for truncation");

            if (!isVolcano)
            {
                int truncIndex = truncParam.Value;
                truncation = truncIndex == 0
                                        ? TestTruncation.Pvalue
                                        : (truncIndex == 1 ? TestTruncation.BenjaminiHochberg : TestTruncation.PermutationBased);
            }
            Parameters truncSubParams = truncParam.GetSubParameters();

            threshold = truncation == TestTruncation.Pvalue
                                ? truncParam.GetSubParameters().GetParam <double>("Threshold p-value").Value
                                : truncParam.GetSubParameters().GetParam <double>("FDR").Value;
            ParameterWithSubParams <int> testParam = param.GetParamWithSubParams <int>("Test");
            int testInd = testParam.Value;
            ParameterWithSubParams <bool> validValParam = param.GetParamWithSubParams <bool>("Valid value filter");

            filterValidValues      = validValParam.Value;
            minNumValidValues      = 0;
            minNumValidValuesMode  = 0;
            minPercValidValues     = 0;
            minPercValidValuesMode = 0;
            if (filterValidValues)
            {
                Parameters px = validValParam.GetSubParameters();
                minNumValidValues      = px.GetParam <int>("Min. number of valid values").Value;
                minNumValidValuesMode  = px.GetParam <int>("Min. number mode").Value;
                minPercValidValues     = px.GetParam <int>("Min. percentage of valid values").Value;
                minPercValidValuesMode = px.GetParam <int>("Min. percentage mode").Value;
            }
            int sideInd = 0;

            test = allTests[testInd];
            Parameters testSubParams = testParam.GetSubParameters();

            if (test.HasSides)
            {
                sideInd = testSubParams.GetParam <int>("Side").Value;
            }
            switch (sideInd)
            {
            case 0:
                side = TestSide.Both;
                break;

            case 1:
                side = TestSide.Left;
                break;

            case 2:
                side = TestSide.Right;
                break;

            default: throw new Exception("Never get here.");
            }
            s0 = 0;
            if (test.HasS0)
            {
                s0 = testSubParams.GetParam <double>("S0").Value;
            }
            if (paired)
            {
                test1 = test.GetOneSampleTest();
            }
            ParameterWithSubParams <bool> combinedScoreParam = param.GetParamWithSubParams <bool>("Calculate combined score");

            calcCombinedScore = combinedScoreParam.Value;
            if (firstGroupInds.Length < 2 || truncation != TestTruncation.PermutationBased)
            {
                calcCombinedScore = false;
            }
            combinedScoreMode = CombinedScoreMode.Product;
            if (calcCombinedScore)
            {
                Parameters px = combinedScoreParam.GetSubParameters();
                combinedScoreQvalue = px.GetParam <bool>("Combined q-value").Value;
                int combinedScoreBestInd = px.GetParam <int>("Mode").Value;
                switch (combinedScoreBestInd)
                {
                case 0:
                    combinedScoreMode = CombinedScoreMode.Product;
                    break;

                case 1:
                    combinedScoreMode = CombinedScoreMode.Best;
                    break;

                default: throw new Exception("Never get here.");
                }
            }
            if (truncation != TestTruncation.Pvalue)
            {
                qval = truncSubParams.GetParam <bool>("Report q-value").Value;
            }
            nrand = -1;
            if (truncation == TestTruncation.PermutationBased)
            {
                nrand            = truncSubParams.GetParam <int>("Number of randomizations").Value;
                preserveGroupInd = truncSubParams.GetParam <int>("Preserve grouping in randomizations").Value - 1;
            }
            if (preserveGroupInd >= 0 && calcCombinedScore)
            {
                processInfo.ErrString = "Combination of preserved subgroups and combined score is not yet supported.";
            }
            if (paired && combinedScoreQvalue)
            {
                processInfo.ErrString = "Combined q-value is not supported for paired tests.";
            }
        }
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            SingleChoiceWithSubParams p = param.GetSingleChoiceWithSubParams("Row");
            int colInd = p.Value;
            if (colInd < 0){
                processInfo.ErrString = "No categorical rows available.";
                return;
            }
            MultiChoiceParam mcp = p.GetSubParameters().GetMultiChoiceParam("Values");
            int[] inds = mcp.Value;
            if (inds.Length == 0){
                processInfo.ErrString = "Please select at least one term for filtering.";
                return;
            }
            string[] values = new string[inds.Length];
            for (int i = 0; i < values.Length; i++){
                values[i] = mdata.GetCategoryRowValuesAt(colInd)[inds[i]];
            }
            HashSet<string> value = new HashSet<string>(values);
            bool remove = param.GetSingleChoiceParam("Mode").Value == 0;
            string[][] cats = mdata.GetCategoryRowAt(colInd);
            List<int> valids = new List<int>();
            for (int i = 0; i < cats.Length; i++){
                bool valid = true;
                foreach (string w in cats[i]){
                    if (value.Contains(w)){
                        valid = false;
                        break;
                    }
                }
                if ((valid && remove) || (!valid && !remove)){
                    valids.Add(i);
                }
            }
            PerseusPluginUtils.FilterColumns(mdata, param, valids.ToArray());
        }
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            SingleChoiceWithSubParams p = param.GetSingleChoiceWithSubParams("Row");
            int colInd = p.Value;
            if (colInd < 0) {
                processInfo.ErrString = "No categorical rows available.";
                return;
            }
            MultiChoiceParam mcp = p.GetSubParameters().GetMultiChoiceParam("Values");
            int[] inds = mcp.Value;
            if (inds.Length < 1) {
                processInfo.ErrString = "Please select at least two terms for merging.";
                return;
            }
            string newTerm = param.GetStringParam("New term").Value;
            if (newTerm.Length == 0){
                processInfo.ErrString = "Please specify a new term.";
                return;
            }

            string[] values = new string[inds.Length];
            for (int i = 0; i < values.Length; i++) {
                values[i] = mdata.GetCategoryRowValuesAt(colInd)[inds[i]];
            }
            HashSet<string> value = new HashSet<string>(values);
            string[][] cats = mdata.GetCategoryRowAt(colInd);
            string[][] newCat = new string[cats.Length][];
            for (int i = 0; i < cats.Length; i++){
                string[] w = cats[i];
                bool changed = false;
                for (int j = 0; j < w.Length; j++){
                    if (value.Contains(w[j])){
                        w[j] = newTerm;
                        changed = true;
                    }
                }
                if (changed){
                    Array.Sort(w);
                }
                newCat[i] = w;
            }
            mdata.SetCategoryRowAt(newCat, colInd);
        }
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            SingleChoiceWithSubParams xp = param.GetSingleChoiceWithSubParams("Expression column selection");
            bool groups = xp.Value == 2;
            string[] groupNames = null;
            int[][] colIndsGroups = null;
            if (groups){
                int groupRowInd = xp.GetSubParameters().GetSingleChoiceParam("Group").Value;
                string[][] groupCol = mdata.GetCategoryRowAt(groupRowInd);
                groupNames = ArrayUtils.UniqueValuesPreserveOrder(groupCol);
                colIndsGroups = PerseusPluginUtils.GetExpressionColIndices(groupCol, groupNames);
            }
            int[] useCols = xp.Value == 1
                ? xp.GetSubParameters().GetMultiChoiceParam("Columns").Value
                : ArrayUtils.ConsecutiveInts(mdata.ExpressionColumnCount);
            HashSet<int> w = ArrayUtils.ToHashSet(param.GetMultiChoiceParam("Calculate").Value);
            bool[] include = new bool[procs.Length];
            double[][] columns = new double[procs.Length][];
            double[][][] columnsG = null;
            if (groups){
                columnsG = new double[procs.Length][][];
                for (int i = 0; i < columnsG.Length; i++){
                    columnsG[i] = new double[groupNames.Length][];
                }
            }
            for (int i = 0; i < include.Length; i++){
                include[i] = w.Contains(i);
                if (include[i]){
                    columns[i] = new double[mdata.RowCount];
                    if (groups){
                        for (int j = 0; j < groupNames.Length; j++){
                            columnsG[i][j] = new double[mdata.RowCount];
                        }
                    }
                }
            }
            for (int i = 0; i < mdata.RowCount; i++){
                List<double> v = new List<double>();
                foreach (int j in useCols){
                    double x = mdata[i, j];
                    if (!double.IsNaN(x) && !double.IsInfinity(x)){
                        v.Add(x);
                    }
                }
                for (int j = 0; j < include.Length; j++){
                    if (include[j]){
                        columns[j][i] = procs[j].Item2(v);
                    }
                }
                if (groups){
                    List<double>[] vg = new List<double>[groupNames.Length];
                    for (int j = 0; j < colIndsGroups.Length; j++){
                        vg[j] = new List<double>();
                        for (int k = 0; k < colIndsGroups[j].Length; k++){
                            double x = mdata[i, colIndsGroups[j][k]];
                            if (!double.IsNaN(x) && !double.IsInfinity(x)){
                                vg[j].Add(x);
                            }
                        }
                    }
                    for (int j = 0; j < include.Length; j++){
                        if (include[j]){
                            for (int k = 0; k < groupNames.Length; k++){
                                columnsG[j][k][i] = procs[j].Item2(vg[k]);
                            }
                        }
                    }
                }
            }
            for (int i = 0; i < include.Length; i++){
                if (include[i]){
                    mdata.AddNumericColumn(procs[i].Item1, procs[i].Item3, columns[i]);
                    if (groups){
                        for (int k = 0; k < groupNames.Length; k++){
                            mdata.AddNumericColumn(procs[i].Item1 + " " + groupNames[k], procs[i].Item3, columnsG[i][k]);
                        }
                    }
                }
            }
        }
Пример #5
0
 /// <summary>
 /// Write matrix to file with tab separation
 /// </summary>
 /// <param name="data"></param>
 /// <param name="filename"></param>
 /// <param name="addtlMatrices">if true numbers are converted to triples <code>value;imputed;quality</code></param>
 public static void WriteMatrixToFile(IMatrixData data, string filename, bool addtlMatrices=false)
 {
     using (var writer = new StreamWriter(filename))
     {
         List<string> words = new List<string>();
         for (int i = 0; i < data.ColumnCount; i++)
         {
             words.Add(data.ColumnNames[i]);
         }
         for (int i = 0; i < data.CategoryColumnCount; i++)
         {
             words.Add(data.CategoryColumnNames[i]);
         }
         for (int i = 0; i < data.NumericColumnCount; i++)
         {
             words.Add(data.NumericColumnNames[i]);
         }
         for (int i = 0; i < data.StringColumnCount; i++)
         {
             words.Add(data.StringColumnNames[i]);
         }
         for (int i = 0; i < data.MultiNumericColumnCount; i++)
         {
             words.Add(data.MultiNumericColumnNames[i]);
         }
         writer.WriteLine(StringUtils.Concat("\t", words));
         if (HasAnyDescription(data))
         {
             words = new List<string>();
             for (int i = 0; i < data.ColumnCount; i++)
             {
                 words.Add(data.ColumnDescriptions[i] ?? "");
             }
             for (int i = 0; i < data.CategoryColumnCount; i++)
             {
                 words.Add(data.CategoryColumnDescriptions[i] ?? "");
             }
             for (int i = 0; i < data.NumericColumnCount; i++)
             {
                 words.Add(data.NumericColumnDescriptions[i] ?? "");
             }
             for (int i = 0; i < data.StringColumnCount; i++)
             {
                 words.Add(data.StringColumnDescriptions[i] ?? "");
             }
             for (int i = 0; i < data.MultiNumericColumnCount; i++)
             {
                 words.Add(data.MultiNumericColumnDescriptions[i] ?? "");
             }
             writer.WriteLine("#!{Description}" + StringUtils.Concat("\t", words));
         }
         words = new List<string>();
         for (int i = 0; i < data.ColumnCount; i++)
         {
             words.Add("E");
         }
         for (int i = 0; i < data.CategoryColumnCount; i++)
         {
             words.Add("C");
         }
         for (int i = 0; i < data.NumericColumnCount; i++)
         {
             words.Add("N");
         }
         for (int i = 0; i < data.StringColumnCount; i++)
         {
             words.Add("T");
         }
         for (int i = 0; i < data.MultiNumericColumnCount; i++)
         {
             words.Add("M");
         }
         writer.WriteLine("#!{Type}" + StringUtils.Concat("\t", words));
         for (int i = 0; i < data.NumericRowCount; i++)
         {
             words = new List<string>();
             for (int j = 0; j < data.ColumnCount; j++)
             {
                 words.Add("" + data.NumericRows[i][j]);
             }
             for (int j = 0; j < data.CategoryColumnCount; j++)
             {
                 words.Add("");
             }
             for (int j = 0; j < data.NumericColumnCount; j++)
             {
                 words.Add("");
             }
             for (int j = 0; j < data.StringColumnCount; j++)
             {
                 words.Add("");
             }
             for (int j = 0; j < data.MultiNumericColumnCount; j++)
             {
                 words.Add("");
             }
             writer.WriteLine("#!{N:" + data.NumericRowNames[i] + "}" + StringUtils.Concat("\t", words));
         }
         for (int i = 0; i < data.CategoryRowCount; i++)
         {
             words = new List<string>();
             for (int j = 0; j < data.ColumnCount; j++)
             {
                 string[] s = data.GetCategoryRowAt(i)[j];
                 words.Add(s.Length == 0 ? "" : StringUtils.Concat(";", s));
             }
             for (int j = 0; j < data.CategoryColumnCount; j++)
             {
                 words.Add("");
             }
             for (int j = 0; j < data.NumericColumnCount; j++)
             {
                 words.Add("");
             }
             for (int j = 0; j < data.StringColumnCount; j++)
             {
                 words.Add("");
             }
             for (int j = 0; j < data.MultiNumericColumnCount; j++)
             {
                 words.Add("");
             }
             writer.WriteLine("#!{C:" + data.CategoryRowNames[i] + "}" + StringUtils.Concat("\t", words));
         }
         for (int j = 0; j < data.RowCount; j++)
         {
             words = new List<string>();
             for (int i = 0; i < data.ColumnCount; i++)
             {
                 string s1 = "" + data.Values.Get(j, i);
                 if (addtlMatrices)
                 {
                     s1 += ";" + data.IsImputed[j, i] + ";" + data.Quality.Get(j, i);
                 }
                 words.Add(s1);
             }
             for (int i = 0; i < data.CategoryColumnCount; i++)
             {
                 string[] q = data.GetCategoryColumnEntryAt(i, j) ?? new string[0];
                 words.Add((q.Length > 0 ? StringUtils.Concat(";", q) : ""));
             }
             for (int i = 0; i < data.NumericColumnCount; i++)
             {
                 words.Add("" + data.NumericColumns[i][j]);
             }
             for (int i = 0; i < data.StringColumnCount; i++)
             {
                 words.Add(data.StringColumns[i][j]);
             }
             for (int i = 0; i < data.MultiNumericColumnCount; i++)
             {
                 double[] q = data.MultiNumericColumns[i][j];
                 words.Add((q.Length > 0 ? StringUtils.Concat(";", q) : ""));
             }
             string s = StringUtils.Concat("\t", words);
             writer.WriteLine(s);
         }
     }
 }
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            const bool rows = false;
            int minValids = param.GetIntParam("Min. number of values").Value;
            SingleChoiceWithSubParams modeParam = param.GetSingleChoiceWithSubParams("Mode");
            int modeInd = modeParam.Value;
            if (modeInd != 0 && mdata.CategoryRowNames.Count == 0){
                processInfo.ErrString = "No grouping is defined.";
                return;
            }
            if (modeInd != 0){
                processInfo.ErrString = "Group-wise filtering can only be appled to rows.";
                return;
            }
            SingleChoiceWithSubParams x = param.GetSingleChoiceWithSubParams("Values should be");
            Parameters subParams = x.GetSubParameters();
            int shouldBeIndex = x.Value;
            FilteringMode filterMode;
            double threshold = double.NaN;
            double threshold2 = double.NaN;
            switch (shouldBeIndex){
                case 0:
                    filterMode = FilteringMode.Valid;
                    break;
                case 1:
                    filterMode = FilteringMode.GreaterThan;
                    threshold = subParams.GetDoubleParam("Minimum").Value;
                    break;
                case 2:
                    filterMode = FilteringMode.GreaterEqualThan;
                    threshold = subParams.GetDoubleParam("Minimum").Value;
                    break;
                case 3:
                    filterMode = FilteringMode.LessThan;
                    threshold = subParams.GetDoubleParam("Maximum").Value;
                    break;
                case 4:
                    filterMode = FilteringMode.LessEqualThan;
                    threshold = subParams.GetDoubleParam("Maximum").Value;
                    break;
                case 5:
                    filterMode = FilteringMode.Between;
                    threshold = subParams.GetDoubleParam("Minimum").Value;
                    threshold2 = subParams.GetDoubleParam("Maximum").Value;
                    break;
                case 6:
                    filterMode = FilteringMode.Outside;
                    threshold = subParams.GetDoubleParam("Minimum").Value;
                    threshold2 = subParams.GetDoubleParam("Maximum").Value;
                    break;
                default:
                    throw new Exception("Should not happen.");
            }
            if (modeInd != 0){
                int gind = modeParam.GetSubParameters().GetSingleChoiceParam("Grouping").Value;
                string[][] groupCol = mdata.GetCategoryRowAt(gind);
                NonzeroFilterGroup(minValids, mdata, param, modeInd == 2, threshold, threshold2, filterMode, groupCol);
            } else{
                NonzeroFilter1(rows, minValids, mdata, param, threshold, threshold2, filterMode);
            }
        }
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            const bool rows = true;
            bool percentage;
            int minValids = PerseusPluginUtils.GetMinValids(param, out percentage);
            ParameterWithSubParams<int> modeParam = param.GetParamWithSubParams<int>("Mode");
            int modeInd = modeParam.Value;
            if (modeInd != 0 && mdata.CategoryRowNames.Count == 0){
                processInfo.ErrString = "No grouping is defined.";
                return;
            }
            FilteringMode filterMode;
            double threshold;
            double threshold2;
            PerseusPluginUtils.ReadValuesShouldBeParams(param, out filterMode, out threshold, out threshold2);
            if (modeInd != 0){
                int gind = modeParam.GetSubParameters().GetParam<int>("Grouping").Value;
                string[][] groupCol = mdata.GetCategoryRowAt(gind);
                NonzeroFilterGroup(minValids, percentage, mdata, param, modeInd == 2, threshold, threshold2, filterMode, groupCol);
            } else{
                PerseusPluginUtils.NonzeroFilter1(rows, minValids, percentage, mdata, param, threshold, threshold2, filterMode);
            }
        }
Пример #8
0
 private static void FillMatrixKeep(int groupColInd, int validVals, IMatrixData mdata, Func<IList<double>, double> func)
 {
     string[][] groupCol = mdata.GetCategoryRowAt(groupColInd);
     string[] groupNames = ArrayUtils.UniqueValuesPreserveOrder(groupCol);
     int[][] colInds = PerseusPluginUtils.GetExpressionColIndices(groupCol, groupNames);
     double[][] newNumCols = new double[groupNames.Length][];
     for (int i = 0; i < newNumCols.Length; i++){
         newNumCols[i] = new double[mdata.RowCount];
     }
     for (int i = 0; i < mdata.RowCount; i++){
         for (int j = 0; j < groupNames.Length; j++){
             List<double> vals = new List<double>();
             foreach (int ind in colInds[j]){
                 double val = mdata[i, ind];
                 if (!double.IsNaN(val) && !double.IsInfinity(val)){
                     vals.Add(val);
                 }
             }
             float xy = float.NaN;
             if (vals.Count >= validVals){
                 xy = (float) func(vals);
             }
             newNumCols[j][i] = xy;
         }
     }
     for (int i = 0; i < groupNames.Length; i++){
         mdata.AddNumericColumn(groupNames[i], groupNames[i], newNumCols[i]);
     }
 }
 private static void SetAnnotationRows(IMatrixData result, IMatrixData mdata1, IMatrixData mdata2)
 {
     result.CategoryRowNames.Clear();
     result.CategoryRowDescriptions.Clear();
     result.ClearCategoryRows();
     result.NumericRowNames.Clear();
     result.NumericRowDescriptions.Clear();
     result.NumericRows.Clear();
     string[] allCatNames = ArrayUtils.Concat(mdata1.CategoryRowNames, mdata2.CategoryRowNames);
     allCatNames = ArrayUtils.UniqueValues(allCatNames);
     result.CategoryRowNames = new List<string>();
     string[] allCatDescriptions = new string[allCatNames.Length];
     for (int i = 0; i < allCatNames.Length; i++){
         allCatDescriptions[i] = GetDescription(allCatNames[i], mdata1.CategoryRowNames, mdata2.CategoryRowNames,
             mdata1.CategoryRowDescriptions, mdata2.CategoryRowDescriptions);
     }
     result.CategoryRowDescriptions = new List<string>();
     for (int index = 0; index < allCatNames.Length; index++){
         string t = allCatNames[index];
         string[][] categoryRow = new string[mdata1.ExpressionColumnCount + mdata2.ExpressionColumnCount][];
         for (int j = 0; j < categoryRow.Length; j++){
             categoryRow[j] = new string[0];
         }
         int ind1 = mdata1.CategoryRowNames.IndexOf(t);
         if (ind1 >= 0){
             string[][] c1 = mdata1.GetCategoryRowAt(ind1);
             for (int j = 0; j < c1.Length; j++){
                 categoryRow[j] = c1[j];
             }
         }
         int ind2 = mdata2.CategoryRowNames.IndexOf(t);
         if (ind2 >= 0){
             string[][] c2 = mdata2.GetCategoryRowAt(ind2);
             for (int j = 0; j < c2.Length; j++){
                 categoryRow[mdata1.ExpressionColumnCount + j] = c2[j];
             }
         }
         result.AddCategoryRow(allCatNames[index], allCatDescriptions[index], categoryRow);
     }
     string[] allNumNames = ArrayUtils.Concat(mdata1.NumericRowNames, mdata2.NumericRowNames);
     allNumNames = ArrayUtils.UniqueValues(allNumNames);
     result.NumericRowNames = new List<string>(allNumNames);
     string[] allNumDescriptions = new string[allNumNames.Length];
     for (int i = 0; i < allNumNames.Length; i++){
         allNumDescriptions[i] = GetDescription(allNumNames[i], mdata1.NumericRowNames, mdata2.NumericRowNames,
             mdata1.NumericRowDescriptions, mdata2.NumericRowDescriptions);
     }
     result.NumericRowDescriptions = new List<string>(allNumDescriptions);
     foreach (string t in allNumNames){
         double[] numericRow = new double[mdata1.ExpressionColumnCount + mdata2.ExpressionColumnCount];
         for (int j = 0; j < numericRow.Length; j++){
             numericRow[j] = double.NaN;
         }
         int ind1 = mdata1.NumericRowNames.IndexOf(t);
         if (ind1 >= 0){
             double[] c1 = mdata1.NumericRows[ind1];
             for (int j = 0; j < c1.Length; j++){
                 numericRow[j] = c1[j];
             }
         }
         int ind2 = mdata2.NumericRowNames.IndexOf(t);
         if (ind2 >= 0){
             double[] c2 = mdata2.NumericRows[ind2];
             for (int j = 0; j < c2.Length; j++){
                 numericRow[mdata1.ExpressionColumnCount + j] = c2[j];
             }
         }
         result.NumericRows.Add(numericRow);
     }
 }
 public static List<string[][]> GetCategoryRows(IMatrixData mdata)
 {
     List<string[][]> result = new List<string[][]>();
     for (int index = 0; index < mdata.CategoryRowCount; index++){
         result.Add(mdata.GetCategoryRowAt(index));
     }
     return result;
 }
Пример #11
0
        private static void FillMatrixDontKeep(int groupColInd, int validVals, IMatrixData mdata,
			Func<IList<double>, double> func)
        {
            string[][] groupCol = mdata.GetCategoryRowAt(groupColInd);
            string[] groupNames = ArrayUtils.UniqueValuesPreserveOrder(groupCol);
            int[][] colInds = PerseusPluginUtils.GetExpressionColIndices(groupCol, groupNames);
            float[,] newExCols = new float[mdata.RowCount,groupNames.Length];
            float[,] newQuality = new float[mdata.RowCount,groupNames.Length];
            bool[,] newImputed = new bool[mdata.RowCount,groupNames.Length];
            for (int i = 0; i < newExCols.GetLength(0); i++){
                for (int j = 0; j < newExCols.GetLength(1); j++){
                    List<double> vals = new List<double>();
                    List<bool> imps = new List<bool>();
                    foreach (int ind in colInds[j]){
                        double val = mdata[i, ind];
                        if (!double.IsNaN(val) && !double.IsInfinity(val)){
                            vals.Add(val);
                            imps.Add(mdata.IsImputed[i, ind]);
                        }
                    }
                    bool imp = false;
                    float xy = float.NaN;
                    if (vals.Count >= validVals){
                        xy = (float) func(vals);
                        imp = ArrayUtils.Or(imps);
                    }
                    newExCols[i, j] = xy;
                    newQuality[i, j] = float.NaN;
                    newImputed[i, j] = imp;
                }
            }
            mdata.ExpressionColumnNames = new List<string>(groupNames);
            mdata.ExpressionColumnDescriptions = GetEmpty(groupNames);
            mdata.ExpressionValues = newExCols;
            mdata.QualityValues = newQuality;
            mdata.IsImputed = newImputed;
            mdata.RemoveCategoryRowAt(groupColInd);
            for (int i = 0; i < mdata.CategoryRowCount; i++){
                mdata.SetCategoryRowAt(AverageCategoryRow(mdata.GetCategoryRowAt(i), colInds), i);
            }
            for (int i = 0; i < mdata.NumericRows.Count; i++){
                mdata.NumericRows[i] = AverageNumericRow(mdata.NumericRows[i], colInds);
            }
        }
 public static List<string[][]> GetCategoryRows(IMatrixData mdata, IList<int> inds)
 {
     List<string[][]> result = new List<string[][]>();
     foreach (int ind in inds){
         result.Add(mdata.GetCategoryRowAt(ind));
     }
     return result;
 }
Пример #13
0
 private static void StringToExpression(IList<int> colInds, IMatrixData mdata)
 {
     int[] inds = ArrayUtils.Complement(colInds, mdata.StringColumnCount);
     string[] names = ArrayUtils.SubArray(mdata.StringColumnNames, colInds);
     string[] descriptions = ArrayUtils.SubArray(mdata.StringColumnDescriptions, colInds);
     string[][] str = ArrayUtils.SubArray(mdata.StringColumns, colInds);
     float[][] newEx = new float[str.Length][];
     for (int j = 0; j < str.Length; j++){
         newEx[j] = new float[str[j].Length];
         for (int i = 0; i < newEx[j].Length; i++){
             float f;
             bool success = float.TryParse(str[j][i], out f);
             newEx[j][i] = success ? f : float.NaN;
         }
     }
     float[,] newExp = new float[mdata.RowCount,mdata.ExpressionColumnCount + str.Length];
     float[,] newQual = new float[mdata.RowCount,mdata.ExpressionColumnCount + str.Length];
     bool[,] newIsImputed = new bool[mdata.RowCount,mdata.ExpressionColumnCount + str.Length];
     for (int i = 0; i < mdata.RowCount; i++){
         for (int j = 0; j < mdata.ExpressionColumnCount; j++){
             newExp[i, j] = mdata[i, j];
             newQual[i, j] = mdata.QualityValues[i, j];
             newIsImputed[i, j] = mdata.IsImputed[i, j];
         }
         for (int j = 0; j < newEx.Length; j++){
             newExp[i, j + mdata.ExpressionColumnCount] = newEx[j][i];
             newQual[i, j + mdata.ExpressionColumnCount] = float.NaN;
             newIsImputed[i, j + mdata.ExpressionColumnCount] = false;
         }
     }
     mdata.ExpressionValues = newExp;
     mdata.QualityValues = newQual;
     mdata.IsImputed = newIsImputed;
     mdata.ExpressionColumnNames.AddRange(names);
     mdata.ExpressionColumnDescriptions.AddRange(descriptions);
     mdata.StringColumns = ArrayUtils.SubList(mdata.StringColumns, inds);
     mdata.StringColumnNames = ArrayUtils.SubList(mdata.StringColumnNames, inds);
     mdata.StringColumnDescriptions = ArrayUtils.SubList(mdata.StringColumnDescriptions, inds);
     for (int i = 0; i < mdata.CategoryRowCount; i++){
         mdata.SetCategoryRowAt(ExtendCategoryRow(mdata.GetCategoryRowAt(i), str.Length),i);
     }
     for (int i = 0; i < mdata.NumericRows.Count; i++){
         mdata.NumericRows[i] = ExtendNumericRow(mdata.NumericRows[i], str.Length);
     }
 }
 public static int[] GetIndicesOfCol(IMatrixData data, string categoryName, HashSet<string> values)
 {
     int index = GetIndexOfCol(data, categoryName);
     List<int> result = new List<int>();
     for (int i = 0; i < data.ExpressionColumnCount; i++){
         string[] s = data.GetCategoryRowAt(index)[i];
         foreach (string s1 in s){
             if (values.Contains(s1)){
                 result.Add(i);
                 break;
             }
         }
     }
     return result.ToArray();
 }
Пример #15
0
        private static void PerformSingleTest(string firstGroup, string secondGroup, SecondGroupMode secondGroupMode,
                                              IMatrixData mdata, int groupInd, ref string err, bool log, double threshold, TestTruncation truncation, int nrand,
                                              int preserveGroupInd, TwoSampleTest test, OneSampleTest test1, TestSide side, double s0, bool filterValidValues,
                                              int minNumValidValues, int minNumValidValuesMode, int minPercValidValues, int minPercValidValuesMode,
                                              out double[] pvalsS0, IDictionary <int, int> indMap, bool qval, string[] plotNames, string[] pvals1Name,
                                              string[] fdrs1Name, string[] diffs1Name, string[] statCol1Name, string[] significant1Name, double[][] pvals1,
                                              double[][] fdrs1, double[][] diffs1, double[][] statCol1, string[][][] significant1, int ind, List <string>[] sigCol,
                                              out string suffix, string mainSuffix, bool paired)
        {
            bool randomized = indMap != null;
            bool addQval    = qval && truncation != TestTruncation.Pvalue;

            string[][] groupCol = mdata.GetCategoryRowAt(groupInd);
            int[]      colInds1;
            int[]      colInds2;
            if (secondGroupMode == SecondGroupMode.Complement)
            {
                int[][] colInds = PerseusPluginUtils.GetMainColIndices(groupCol, new[] { firstGroup });
                colInds1 = colInds[0];
                colInds2 = ArrayUtils.Complement(colInds1, mdata.ColumnCount);
            }
            else
            {
                int[][] colInds = PerseusPluginUtils.GetMainColIndices(groupCol, new[] { firstGroup, secondGroup });
                colInds1 = colInds[0];
                colInds2 = colInds[1];
            }
            if (indMap != null)
            {
                Transform(colInds1, indMap);
                Transform(colInds2, indMap);
            }
            Array.Sort(colInds1);
            Array.Sort(colInds2);
            suffix = firstGroup;
            if (secondGroupMode != SecondGroupMode.Complement)
            {
                suffix += "_" + secondGroup;
            }
            if (paired && colInds1.Length != colInds2.Length)
            {
                err     = "Group sizes have to be equal for paired test.";
                pvalsS0 = null;
                return;
            }
            List <int[]> colIndsPreserve1 = null;
            List <int[]> colIndsPreserve2 = null;

            if (truncation == TestTruncation.PermutationBased)
            {
                if (preserveGroupInd >= 0)
                {
                    if (paired)
                    {
                        err     = "Preserved subgroups are not supported for paired tests.";
                        pvalsS0 = null;
                        return;
                    }
                    string[][] preserveGroupCol  = mdata.GetCategoryRowAt(preserveGroupInd);
                    string[]   allGroupsPreserve = ArrayUtils.UniqueValuesPreserveOrder(ArrayUtils.Concat(preserveGroupCol));
                    int[][]    colIndsPreserve   = PerseusPluginUtils.GetMainColIndices(preserveGroupCol, allGroupsPreserve);
                    int[]      allInds           = ArrayUtils.Concat(colIndsPreserve);
                    int[]      allIndsUnique     = ArrayUtils.UniqueValues(allInds);
                    if (allInds.Length != allIndsUnique.Length)
                    {
                        err     = "The grouping for randomizations is not unique";
                        pvalsS0 = null;
                        return;
                    }
                    if (allInds.Length != colInds1.Length + colInds2.Length)
                    {
                        err     = "The grouping for randomizations is not valid because it does not cover all samples.";
                        pvalsS0 = null;
                        return;
                    }
                    colIndsPreserve1 = new List <int[]>();
                    colIndsPreserve2 = new List <int[]>();
                    foreach (int[] inds in colIndsPreserve)
                    {
                        int index = DetermineGroup(colInds1, colInds2, inds);
                        if (index == 0)
                        {
                            err     = "The grouping for randomizations is not hierarchical with respect to the main grouping.";
                            pvalsS0 = null;
                            return;
                        }
                        switch (index)
                        {
                        case 1:
                            colIndsPreserve1.Add(inds);
                            break;

                        case 2:
                            colIndsPreserve2.Add(inds);
                            break;
                        }
                    }
                }
            }
            TwoSamplesTest1(colInds1, colInds2, truncation, threshold, test, test1, side, log, mdata, s0, nrand,
                            colIndsPreserve1, colIndsPreserve2, suffix, mainSuffix, filterValidValues, minNumValidValues, minNumValidValuesMode,
                            minPercValidValues, minPercValidValuesMode, out pvalsS0, randomized, addQval, plotNames, pvals1Name, fdrs1Name,
                            diffs1Name, statCol1Name, significant1Name, pvals1, fdrs1, diffs1, statCol1, significant1, ind, paired);
            if (significant1 != null)
            {
                for (int i = 0; i < significant1[ind].Length; i++)
                {
                    if (significant1[ind][i].Length > 0)
                    {
                        sigCol[i].Add(suffix);
                    }
                }
            }
        }
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            ParameterWithSubParams <int> xp = param.GetParamWithSubParams <int>("Main column selection");
            bool groups = xp.Value == 2;

            string[] groupNames    = null;
            int[][]  colIndsGroups = null;
            if (groups)
            {
                int        groupRowInd = xp.GetSubParameters().GetParam <int>("Group").Value;
                string[][] groupCol    = mdata.GetCategoryRowAt(groupRowInd);
                groupNames    = ArrayUtils.UniqueValuesPreserveOrder(groupCol);
                colIndsGroups = PerseusPluginUtils.GetMainColIndices(groupCol, groupNames);
            }
            int[] useCols = xp.Value == 1
                                ? xp.GetSubParameters().GetParam <int[]>("Columns").Value
                                : ArrayUtils.ConsecutiveInts(mdata.ColumnCount);
            HashSet <int> w = ArrayUtils.ToHashSet(param.GetParam <int[]>("Calculate").Value);

            bool[]       include  = new bool[procs.Length];
            double[][]   columns  = new double[procs.Length][];
            double[][][] columnsG = null;
            if (groups)
            {
                columnsG = new double[procs.Length][][];
                for (int i = 0; i < columnsG.Length; i++)
                {
                    columnsG[i] = new double[groupNames.Length][];
                }
            }
            for (int i = 0; i < include.Length; i++)
            {
                include[i] = w.Contains(i);
                if (include[i])
                {
                    columns[i] = new double[mdata.RowCount];
                    if (groups)
                    {
                        for (int j = 0; j < groupNames.Length; j++)
                        {
                            columnsG[i][j] = new double[mdata.RowCount];
                        }
                    }
                }
            }
            for (int i = 0; i < mdata.RowCount; i++)
            {
                List <double> v = new List <double>();
                foreach (int j in useCols)
                {
                    double x = mdata.Values.Get(i, j);
                    if (!double.IsNaN(x) && !double.IsInfinity(x))
                    {
                        v.Add(x);
                    }
                }
                for (int j = 0; j < include.Length; j++)
                {
                    if (include[j])
                    {
                        columns[j][i] = procs[j].Item2(v);
                    }
                }
                if (groups)
                {
                    List <double>[] vg = new List <double> [groupNames.Length];
                    for (int j = 0; j < colIndsGroups.Length; j++)
                    {
                        vg[j] = new List <double>();
                        for (int k = 0; k < colIndsGroups[j].Length; k++)
                        {
                            double x = mdata.Values.Get(i, colIndsGroups[j][k]);
                            if (!double.IsNaN(x) && !double.IsInfinity(x))
                            {
                                vg[j].Add(x);
                            }
                        }
                    }
                    for (int j = 0; j < include.Length; j++)
                    {
                        if (include[j])
                        {
                            for (int k = 0; k < groupNames.Length; k++)
                            {
                                columnsG[j][k][i] = procs[j].Item2(vg[k]);
                            }
                        }
                    }
                }
            }
            for (int i = 0; i < include.Length; i++)
            {
                if (include[i])
                {
                    mdata.AddNumericColumn(procs[i].Item1, procs[i].Item3, columns[i]);
                    if (groups)
                    {
                        for (int k = 0; k < groupNames.Length; k++)
                        {
                            mdata.AddNumericColumn(procs[i].Item1 + " " + groupNames[k], procs[i].Item3, columnsG[i][k]);
                        }
                    }
                }
            }
        }
Пример #17
0
 public static void DivideByColumn(IMatrixData data, int index)
 {
     int p = data.RowCount;
     int n = data.ExpressionColumnCount;
     float[,] newEx = new float[p,n - 1];
     for (int i = 0; i < p; i++){
         for (int j = 0; j < index; j++){
             newEx[i, j] = data[i, j]/data[i, index];
             if (float.IsInfinity(newEx[i, j])){
                 newEx[i, j] = float.NaN;
             }
         }
         for (int j = index + 1; j < n; j++){
             newEx[i, j - 1] = data[i, j]/data[i, index];
             if (float.IsInfinity(newEx[i, j - 1])){
                 newEx[i, j - 1] = float.NaN;
             }
         }
     }
     bool[,] newImp = new bool[p,n - 1];
     for (int i = 0; i < p; i++){
         for (int j = 0; j < index; j++){
             newImp[i, j] = data.IsImputed[i, j] || data.IsImputed[i, index];
         }
         for (int j = index + 1; j < n; j++){
             newImp[i, j - 1] = data.IsImputed[i, j] || data.IsImputed[i, index];
         }
     }
     data.ExpressionValues = newEx;
     data.IsImputed = newImp;
     data.ExpressionColumnNames.RemoveAt(index);
     data.ExpressionColumnDescriptions.RemoveAt(index);
     for (int i = 0; i < data.CategoryRowCount; i++){
         data.SetCategoryRowAt(ArrayUtils.RemoveAtIndex(data.GetCategoryRowAt(i), index), i);
     }
     for (int i = 0; i < data.NumericRowCount; i++){
         data.NumericRows[i] = ArrayUtils.RemoveAtIndex(data.NumericRows[i], index);
     }
 }
Пример #18
0
 private static List<string[][]> GetCategoryRows(IMatrixData mdata)
 {
     List<string[][]> result = new List<string[][]>();
     for (int i = 0; i < mdata.CategoryRowCount; i++){
         result.Add(mdata.GetCategoryRowAt(i));
     }
     return result;
 }
 public Parameters GetEditParameters(IMatrixData mdata, int ind)
 {
     List<Parameter> par = new List<Parameter>();
     for (int i = 0; i < mdata.ExpressionColumnCount; i++){
         string t = mdata.ExpressionColumnNames[i];
         string help = "Specify a category value for the column '" + t + "'.";
         par.Add(new StringParam(t, StringUtils.Concat(";", mdata.GetCategoryRowAt(ind)[i])){Help = help});
     }
     return new Parameters(par);
 }
Пример #20
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            int[] outputColumns      = param.GetParam <int[]>("Output").Value;
            int   proteinIdColumnInd = param.GetParam <int>("Protein IDs").Value;

            string[] proteinIds    = mdata.StringColumns[proteinIdColumnInd];
            int[]    intensityCols = param.GetParam <int[]>("Intensities").Value;
            if (intensityCols.Length == 0)
            {
                processInfo.ErrString = "Please select at least one column containing protein intensities.";
                return;
            }
            // variable to hold all intensity values
            List <double[]> columns = new List <double[]>();

            string[] inputNames  = new string[intensityCols.Length];
            string[] sampleNames = new string[intensityCols.Length];
            for (int col = 0; col < intensityCols.Length; col++)
            {
                double[] values;
                if (intensityCols[col] < mdata.ColumnCount)
                {
                    values          = ArrayUtils.ToDoubles(mdata.Values.GetColumn(intensityCols[col]));
                    inputNames[col] = mdata.ColumnNames[intensityCols[col]];
                }
                else
                {
                    values          = mdata.NumericColumns[intensityCols[col] - mdata.ColumnCount];
                    inputNames[col] = mdata.NumericColumnNames[intensityCols[col] - mdata.ColumnCount];
                }
                sampleNames[col] = new Regex(@"^(?:(?:LFQ )?[Ii]ntensity )?(.*)$").Match(inputNames[col]).Groups[1].Value;
                columns.Add(values);
            }
            // average over columns if this option is selected
            if (param.GetParamWithSubParams <int>("Averaging mode").Value == 3)
            {
                double[] column = new double[mdata.RowCount];
                for (int row = 0; row < mdata.RowCount; row++)
                {
                    double[] values = new double[intensityCols.Length];
                    for (int col = 0; col < intensityCols.Length; col++)
                    {
                        values[col] = columns[col][row];
                    }
                    column[row] = ArrayUtils.Median(ExtractValidValues(values, false));
                }
                // delete the original list of columns
                columns = new List <double[]> {
                    column
                };
                sampleNames = new[] { "" };
            }
            // revert logarithm if necessary
            if (param.GetParamWithSubParams <bool>("Logarithmized").Value)
            {
                double[] logBases = new[] { 2, Math.E, 10 };
                double   logBase  =
                    logBases[param.GetParamWithSubParams <bool>("Logarithmized").GetSubParameters().GetParam <int>("log base").Value];
                foreach (double[] t in columns)
                {
                    for (int row = 0; row < mdata.RowCount; row++)
                    {
                        if (t[row] == 0)
                        {
                            processInfo.ErrString = "Are the columns really logarithmized?\nThey contain zeroes!";
                        }
                        t[row] = Math.Pow(logBase, t[row]);
                    }
                }
            }
            double[] mw = mdata.NumericColumns[param.GetParam <int>("Molecular masses").Value];
            // define whether the molecular masses are given in Da or kDa
            if (ArrayUtils.Median(mw) < 250)             // most likely kDa
            {
                for (int i = 0; i < mw.Length; i++)
                {
                    mw[i] *= 1000;
                }
            }
            double[] detectabilityNormFactor = mw;
            if (param.GetParamWithSubParams <bool>("Detectability correction").Value)
            {
                detectabilityNormFactor =
                    mdata.NumericColumns[
                        param.GetParamWithSubParams <bool>("Detectability correction").GetSubParameters().GetParam <int>("Correction factor")
                        .Value];
            }
            // the normalization factor needs to be nonzero for all proteins
            // check and replace with 1 for all relevant cases
            for (int row = 0; row < mdata.RowCount; row++)
            {
                if (detectabilityNormFactor[row] == 0 || double.IsNaN(detectabilityNormFactor[row]))
                {
                    detectabilityNormFactor[row] = 1;
                }
            }
            // detect the organism
            Organism organism = DetectOrganism(proteinIds);
            // c value the amount of DNA per haploid genome, see: http://en.wikipedia.org/wiki/C-value
            double cValue = (organism.genomeSize * basePairWeight) / avogadro;

            // find the histones
            int[] histoneRows = FindHistones(proteinIds, organism);
            // write a categorical column indicating the histones
            string[][] histoneCol = new string[mdata.RowCount][];
            for (int row = 0; row < mdata.RowCount; row++)
            {
                histoneCol[row] = (ArrayUtils.Contains(histoneRows, row)) ? new[] { "+" } : new string[0];
            }
            mdata.AddCategoryColumn("Histones", "", histoneCol);

            // initialize the variables for the annotation rows
            string[]   sampleNameRow     = new string[mdata.ColumnCount];
            string[]   inputNameRow      = new string[mdata.ColumnCount];
            double[]   totalProteinRow   = new double[mdata.ColumnCount];
            double[]   totalMoleculesRow = new double[mdata.ColumnCount];
            string[][] organismRow       = new string[mdata.ColumnCount][];
            // populate the organismRow variable with empty strings as defaults (not null, which may cause errors when writing the annotations in the end.)
            for (int i = 0; i < organismRow.Length; i++)
            {
                organismRow[i] = new[] { "N/A" };
            }
            double[] histoneMassRow       = new double[mdata.ColumnCount];
            double[] ploidyRow            = new double[mdata.ColumnCount];
            double[] cellVolumeRow        = new double[mdata.ColumnCount];
            double[] normalizationFactors = new double[columns.Count];
            // calculate normalization factors for each column
            for (int col = 0; col < columns.Count; col++)
            {
                string   sampleName = sampleNames[col];
                double[] column     = columns[col];
                // normalization factor to go from intensities to copies,
                // needs to be determined either using the total protein or the histone scaling approach
                double factor;
                switch (param.GetParamWithSubParams <int>("Scaling mode").Value)
                {
                case 0:                         // total protein amount
                    double mwWeightedNormalizedSummedIntensities = 0;
                    for (int row = 0; row < mdata.RowCount; row++)
                    {
                        if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row]))
                        {
                            mwWeightedNormalizedSummedIntensities += (column[row] / detectabilityNormFactor[row]) * mw[row];
                        }
                    }
                    factor =
                        param.GetParamWithSubParams <int>("Scaling mode").GetSubParameters().GetParam <double>(
                            "Protein amount per cell [pg]").Value *1e-12 * avogadro / mwWeightedNormalizedSummedIntensities;
                    break;

                case 1:                         // histone mode
                    double mwWeightedNormalizedSummedHistoneIntensities = 0;
                    foreach (int row in histoneRows)
                    {
                        if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row]))
                        {
                            mwWeightedNormalizedSummedHistoneIntensities += (column[row] / detectabilityNormFactor[row]) * mw[row];
                        }
                    }
                    double ploidy =
                        param.GetParamWithSubParams <int>("Scaling mode").GetSubParameters().GetParam <double>("Ploidy").Value;
                    factor = (cValue * ploidy * avogadro) / mwWeightedNormalizedSummedHistoneIntensities;
                    break;

                default:
                    factor = 1;
                    break;
                }
                normalizationFactors[col] = factor;
            }
            // check averaging mode
            if (param.GetParamWithSubParams <int>("Averaging mode").Value == 1)            // same factor for all
            {
                double factor = ArrayUtils.Mean(normalizationFactors);
                for (int i = 0; i < normalizationFactors.Length; i++)
                {
                    normalizationFactors[i] = factor;
                }
            }
            if (param.GetParamWithSubParams <int>("Averaging mode").Value == 2)            // same factor in each group
            {
                if (param.GetParamWithSubParams <int>("Averaging mode").GetSubParameters().GetParam <int>("Grouping").Value == -1)
                {
                    processInfo.ErrString = "No grouping selected.";
                    return;
                }
                string[][] groupNames =
                    mdata.GetCategoryRowAt(
                        param.GetParamWithSubParams <int>("Averaging mode").GetSubParameters().GetParam <int>("Grouping").Value);
                string[] uniqueGroupNames = Unique(groupNames);
                int[]    grouping         = new int[columns.Count];
                for (int i = 0; i < columns.Count; i++)
                {
                    if (intensityCols[i] >= mdata.ColumnCount)                      // Numeric annotation columns cannot be grouped
                    {
                        grouping[i] = i;
                        continue;
                    }
                    if (ArrayUtils.Contains(uniqueGroupNames, groupNames[i][0]))
                    {
                        grouping[i] = ArrayUtils.IndexOf(uniqueGroupNames, groupNames[i][0]);
                        continue;
                    }
                    grouping[i] = i;
                }
                Dictionary <int, List <double> > factors = new Dictionary <int, List <double> >();
                for (int i = 0; i < columns.Count; i++)
                {
                    if (factors.ContainsKey(grouping[i]))
                    {
                        factors[grouping[i]].Add(normalizationFactors[i]);
                    }
                    else
                    {
                        factors.Add(grouping[i], new List <double> {
                            normalizationFactors[i]
                        });
                    }
                }
                double[] averagedNormalizationFactors = new double[columns.Count];
                for (int i = 0; i < columns.Count; i++)
                {
                    List <double> factor;
                    factors.TryGetValue(grouping[i], out factor);
                    averagedNormalizationFactors[i] = ArrayUtils.Mean(factor);
                }
                normalizationFactors = averagedNormalizationFactors;
            }
            // loop over all selected columns and calculate copy numbers
            for (int col = 0; col < columns.Count; col++)
            {
                string   sampleName     = sampleNames[col];
                double[] column         = columns[col];
                double   factor         = normalizationFactors[col];
                double[] copyNumbers    = new double[mdata.RowCount];
                double[] concentrations = new double[mdata.RowCount];                 // femtoliters
                double[] massFraction   = new double[mdata.RowCount];
                double[] moleFraction   = new double[mdata.RowCount];
                double   totalProtein   = 0;            // picograms
                double   histoneMass    = 0;            // picograms
                double   totalMolecules = 0;
                for (int row = 0; row < mdata.RowCount; row++)
                {
                    if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row]))
                    {
                        copyNumbers[row] = (column[row] / detectabilityNormFactor[row]) * factor;
                        totalMolecules  += copyNumbers[row];
                        totalProtein    += (copyNumbers[row] * mw[row] * 1e12) / avogadro;                // picograms
                        if (ArrayUtils.Contains(histoneRows, row))
                        {
                            histoneMass += (copyNumbers[row] * mw[row] * 1e12) / avogadro;                       // picograms
                        }
                    }
                }
                double totalVolume = (totalProtein / (param.GetParam <double>("Total cellular protein concentration [g/l]").Value)) *
                                     1000;
                // femtoliters
                for (int row = 0; row < mdata.RowCount; row++)
                {
                    if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row]))
                    {
                        concentrations[row] = ((copyNumbers[row] / (totalVolume * 1e-15)) / avogadro) * 1e9;           // nanomolar
                        massFraction[row]   = (((copyNumbers[row] * mw[row] * 1e12) / avogadro) / totalProtein) * 1e6; // ppm
                        moleFraction[row]   = (copyNumbers[row] / totalMolecules) * 1e6;                               // ppm
                    }
                }
                string suffix = (sampleName == "") ? "" : " " + sampleName;
                if (ArrayUtils.Contains(outputColumns, 0))
                {
                    mdata.AddNumericColumn("Copy number" + suffix, "", copyNumbers);
                }
                if (ArrayUtils.Contains(outputColumns, 1))
                {
                    mdata.AddNumericColumn("Concentration [nM]" + suffix, "", concentrations);
                }
                if (ArrayUtils.Contains(outputColumns, 2))
                {
                    mdata.AddNumericColumn("Abundance (mass/total mass) [*10^-6]" + suffix, "", massFraction);
                }
                if (ArrayUtils.Contains(outputColumns, 3))
                {
                    mdata.AddNumericColumn("Abundance (molecules/total molecules) [*10^-6]" + suffix, "", moleFraction);
                }
                double[] rank         = ArrayUtils.Rank(copyNumbers);
                double[] relativeRank = new double[mdata.RowCount];
                double   validRanks   = mdata.RowCount;
                for (int row = 0; row < mdata.RowCount; row++)
                {
                    // remove rank for protein with no copy number information
                    if (double.IsNaN((copyNumbers[row])) || copyNumbers[row] == 0)
                    {
                        rank[row] = double.NaN;
                        validRanks--;                         // do not consider as valid
                    }
                    // invert ranking, so that rank 0 is the most abundant protein
                    rank[row] = mdata.RowCount - rank[row];
                }
                for (int row = 0; row < mdata.RowCount; row++)
                {
                    relativeRank[row] = rank[row] / validRanks;
                }
                if (ArrayUtils.Contains(outputColumns, 4))
                {
                    mdata.AddNumericColumn("Copy number rank" + suffix, "", rank);
                }
                if (ArrayUtils.Contains(outputColumns, 5))
                {
                    mdata.AddNumericColumn("Relative copy number rank" + suffix, "", relativeRank);
                }
                if (intensityCols[col] < mdata.ColumnCount && param.GetParamWithSubParams <int>("Averaging mode").Value != 3)
                {
                    inputNameRow[intensityCols[col]]      = inputNames[col];
                    sampleNameRow[intensityCols[col]]     = sampleNames[col];
                    totalProteinRow[intensityCols[col]]   = Math.Round(totalProtein, 2);
                    totalMoleculesRow[intensityCols[col]] = Math.Round(totalMolecules, 0);
                    organismRow[intensityCols[col]]       = new[] { organism.name };
                    histoneMassRow[intensityCols[col]]    = Math.Round(histoneMass, 4);
                    ploidyRow[intensityCols[col]]         = Math.Round((histoneMass * 1e-12) / cValue, 2);
                    cellVolumeRow[intensityCols[col]]     = Math.Round(totalVolume, 2);                 // femtoliters
                }
            }

            // Summary annotation row
            if (param.GetParamWithSubParams <int>("Averaging mode").Value != 3 && ArrayUtils.Contains(outputColumns, 6))
            {
                mdata.AddNumericRow("Total protein [pg/cell]", "", totalProteinRow);
                mdata.AddNumericRow("Total molecules per cell", "", totalMoleculesRow);
                mdata.AddCategoryRow("Organism", "", organismRow);
                mdata.AddNumericRow("Histone mass [pg/cell]", "", histoneMassRow);
                mdata.AddNumericRow("Ploidy", "", ploidyRow);
                mdata.AddNumericRow("Cell volume [fl]", "", cellVolumeRow);
            }

            // Summary matrix
            if (param.GetParamWithSubParams <int>("Averaging mode").Value != 3 && ArrayUtils.Contains(outputColumns, 7))
            {
                supplTables = new IMatrixData[1];
                IMatrixData supplTab = PerseusFactory.CreateMatrixData();
                supplTab.ColumnNames = new List <string>();
                supplTab.Values.Init(totalProteinRow.Length, 0);
                supplTab.SetAnnotationColumns(new List <string> {
                    "Sample", "Input Column"
                },
                                              new List <string[]>()
                {
                    sampleNameRow, inputNameRow
                }, new List <string>()
                {
                    "Organism"
                },
                                              new List <string[][]>()
                {
                    organismRow
                },
                                              new List <string>()
                {
                    "Total protein [pg/cell]",
                    "Total molecules per cell",
                    "Histone mass [pg/cell]",
                    "Ploidy",
                    "Cell volume [fl]"
                },
                                              new List <double[]>()
                {
                    totalProteinRow, totalMoleculesRow, histoneMassRow, ploidyRow, cellVolumeRow
                },
                                              new List <string>(), new List <double[][]>());
                supplTables[0] = supplTab;
            }
        }
Пример #21
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            int[] outputColumns = param.GetMultiChoiceParam("Output").Value;
            int proteinIdColumnInd = param.GetSingleChoiceParam("Protein IDs").Value;
            string[] proteinIds = mdata.StringColumns[proteinIdColumnInd];
            int[] intensityCols = param.GetMultiChoiceParam("Intensities").Value;
            if (intensityCols.Length == 0){
                processInfo.ErrString = "Please select at least one column containing protein intensities.";
                return;
            }
            // variable to hold all intensity values
            List<double[]> columns = new List<double[]>();
            string[] sampleNames = new string[intensityCols.Length];
            for (int col = 0; col < intensityCols.Length; col++){
                double[] values;
                if (intensityCols[col] < mdata.ExpressionColumnCount){
                    values = ArrayUtils.ToDoubles(mdata.GetExpressionColumn(intensityCols[col]));
                    sampleNames[col] = mdata.ExpressionColumnNames[intensityCols[col]];
                } else{
                    values = mdata.NumericColumns[intensityCols[col] - mdata.ExpressionColumnCount];
                    sampleNames[col] = mdata.NumericColumnNames[intensityCols[col] - mdata.ExpressionColumnCount];
                }
                sampleNames[col] = new Regex(@"^(?:(?:LFQ )?[Ii]ntensity )?(.*)$").Match(sampleNames[col]).Groups[1].Value;
                columns.Add(values);
            }
            // average over columns if this option is selected
            if (param.GetSingleChoiceWithSubParams("Averaging mode").Value == 3){
                double[] column = new double[mdata.RowCount];
                for (int row = 0; row < mdata.RowCount; row++){
                    double[] values = new double[intensityCols.Length];
                    for (int col = 0; col < intensityCols.Length; col++){
                        values[col] = columns[col][row];
                    }
                    column[row] = ArrayUtils.Median(ExtractValidValues(values, false));
                }
                // delete the original list of columns
                columns = new List<double[]>{column};
                sampleNames = new[]{""};
            }
            // revert logarithm if necessary
            if (param.GetBoolWithSubParams("Logarithmized").Value){
                double[] logBases = new[]{2, Math.E, 10};
                double logBase =
                    logBases[param.GetBoolWithSubParams("Logarithmized").GetSubParameters().GetSingleChoiceParam("log base").Value];
                foreach (double[] t in columns){
                    for (int row = 0; row < mdata.RowCount; row++){
                        if (t[row] == 0){
                            processInfo.ErrString = "Are the columns really logarithmized?\nThey contain zeroes!";
                        }
                        t[row] = Math.Pow(logBase, t[row]);
                    }
                }
            }
            double[] mw = mdata.NumericColumns[param.GetSingleChoiceParam("Molecular masses").Value];
            // detect whether the molecular masses are given in Da or kDa
            if (ArrayUtils.Median(mw) < 250) // likely kDa
            {
                for (int i = 0; i < mw.Length; i++){
                    mw[i] *= 1000;
                }
            }
            double[] detectabilityNormFactor = mw;
            if (param.GetBoolWithSubParams("Detectability correction").Value){
                detectabilityNormFactor =
                    mdata.NumericColumns[
                        param.GetBoolWithSubParams("Detectability correction")
                             .GetSubParameters()
                             .GetSingleChoiceParam("Correction factor")
                             .Value];
            }
            // the normalization factor needs to be nonzero for all proteins
            // check and replace with 1 for all relevant cases
            for (int row = 0; row < mdata.RowCount; row++){
                if (detectabilityNormFactor[row] == 0 || detectabilityNormFactor[row] == double.NaN){
                    detectabilityNormFactor[row] = 1;
                }
            }
            // detect the organism
            Organism organism = DetectOrganism(proteinIds);
            // c value the amount of DNA per cell, see: http://en.wikipedia.org/wiki/C-value
            double cValue = (organism.genomeSize*basePairWeight)/avogadro;
            // find the histones
            int[] histoneRows = FindHistones(proteinIds, organism);
            // write a categorical column indicating the histones
            string[][] histoneCol = new string[mdata.RowCount][];
            for (int row = 0; row < mdata.RowCount; row++){
                histoneCol[row] = (ArrayUtils.Contains(histoneRows, row)) ? new[]{"+"} : new[]{""};
            }
            mdata.AddCategoryColumn("Histones", "", histoneCol);
            // initialize the variables for the annotation rows
            double[] totalProteinRow = new double[mdata.ExpressionColumnCount];
            double[] totalMoleculesRow = new double[mdata.ExpressionColumnCount];
            string[][] organismRow = new string[mdata.ExpressionColumnCount][];
            double[] histoneMassRow = new double[mdata.ExpressionColumnCount];
            double[] ploidyRow = new double[mdata.ExpressionColumnCount];
            double[] cellVolumeRow = new double[mdata.ExpressionColumnCount];
            double[] normalizationFactors = new double[columns.Count];
            // calculate normalization factors for each column
            for (int col = 0; col < columns.Count; col++){
                string sampleName = sampleNames[col];
                double[] column = columns[col];
                // normalization factor to go from intensities to copies,
                // needs to be determined either using the total protein or the histone scaling approach
                double factor;
                switch (param.GetSingleChoiceWithSubParams("Scaling mode").Value){
                    case 0: // total protein amount
                        double mwWeightedNormalizedSummedIntensities = 0;
                        for (int row = 0; row < mdata.RowCount; row++){
                            if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row])){
                                mwWeightedNormalizedSummedIntensities += (column[row]/detectabilityNormFactor[row])*mw[row];
                            }
                        }
                        factor =
                            (param.GetSingleChoiceWithSubParams("Scaling mode")
                                  .GetSubParameters()
                                  .GetDoubleParam("Protein amount per cell [pg]")
                                  .Value*1e-12*avogadro)/mwWeightedNormalizedSummedIntensities;
                        break;
                    case 1: // histone mode
                        double mwWeightedNormalizedSummedHistoneIntensities = 0;
                        foreach (int row in histoneRows){
                            if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row])){
                                mwWeightedNormalizedSummedHistoneIntensities += (column[row]/detectabilityNormFactor[row])*mw[row];
                            }
                        }
                        double ploidy =
                            param.GetSingleChoiceWithSubParams("Scaling mode").GetSubParameters().GetDoubleParam("Ploidy").Value;
                        factor = (cValue*ploidy*avogadro)/mwWeightedNormalizedSummedHistoneIntensities;
                        break;
                    default:
                        factor = 1;
                        break;
                }
                normalizationFactors[col] = factor;
            }
            // check averaging mode
            if (param.GetSingleChoiceWithSubParams("Averaging mode").Value == 1) // same factor for all
            {
                double factor = ArrayUtils.Mean(normalizationFactors);
                for (int i = 0; i < normalizationFactors.Length; i++){
                    normalizationFactors[i] = factor;
                }
            }
            if (param.GetSingleChoiceWithSubParams("Averaging mode").Value == 2) // same factor in each group
            {
                if (
                    param.GetSingleChoiceWithSubParams("Averaging mode").GetSubParameters().GetSingleChoiceParam("Grouping").Value ==
                        -1){
                    processInfo.ErrString = "No grouping selected.";
                    return;
                }
                string[][] groupNames =
                    mdata.GetCategoryRowAt(
                        param.GetSingleChoiceWithSubParams("Averaging mode").GetSubParameters().GetSingleChoiceParam("Grouping").Value);
                string[] uniqueGroupNames = Unique(groupNames);
                int[] grouping = new int[columns.Count];
                for (int i = 0; i < columns.Count; i++){
                    if (intensityCols[i] >= mdata.ExpressionColumnCount){ // Numeric annotation columns cannot be grouped
                        grouping[i] = i;
                        continue;
                    }
                    if (ArrayUtils.Contains(uniqueGroupNames, groupNames[i][0])){
                        grouping[i] = ArrayUtils.IndexOf(uniqueGroupNames, groupNames[i][0]);
                        continue;
                    }
                    grouping[i] = i;
                }
                Dictionary<int, List<double>> factors = new Dictionary<int, List<double>>();
                for (int i = 0; i < columns.Count; i++){
                    if (factors.ContainsKey(grouping[i])){
                        factors[grouping[i]].Add(normalizationFactors[i]);
                    } else{
                        factors.Add(grouping[i], new List<double>{normalizationFactors[i]});
                    }
                }
                double[] averagedNormalizationFactors = new double[columns.Count];
                for (int i = 0; i < columns.Count; i++){
                    List<double> factor;
                    factors.TryGetValue(grouping[i], out factor);
                    averagedNormalizationFactors[i] = ArrayUtils.Mean(factor);
                }
                normalizationFactors = averagedNormalizationFactors;
            }
            // loop over all selected columns and calculate copy numbers
            for (int col = 0; col < columns.Count; col++){
                string sampleName = sampleNames[col];
                double[] column = columns[col];
                double factor = normalizationFactors[col];
                double[] copyNumbers = new double[mdata.RowCount];
                double[] concentrations = new double[mdata.RowCount]; // femtoliters
                double[] massFraction = new double[mdata.RowCount];
                double[] moleFraction = new double[mdata.RowCount];
                double totalProtein = 0; // picograms
                double histoneMass = 0; // picograms
                double totalMolecules = 0;
                for (int row = 0; row < mdata.RowCount; row++){
                    if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row])){
                        copyNumbers[row] = (column[row]/detectabilityNormFactor[row])*factor;
                        totalMolecules += copyNumbers[row];
                        totalProtein += (copyNumbers[row]*mw[row]*1e12)/avogadro; // picograms
                        if (ArrayUtils.Contains(histoneRows, row)){
                            histoneMass += (copyNumbers[row]*mw[row]*1e12)/avogadro; // picograms
                        }
                    }
                }
                double totalVolume = (totalProtein/(param.GetDoubleParam("Total cellular protein concentration [g/l]").Value))*1000;
                // femtoliters
                for (int row = 0; row < mdata.RowCount; row++){
                    if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row])){
                        concentrations[row] = ((copyNumbers[row]/(totalVolume*1e-15))/avogadro)*1e9; // nanomolar
                        massFraction[row] = (((copyNumbers[row]*mw[row]*1e12)/avogadro)/totalProtein)*1e6; // ppm
                        moleFraction[row] = (copyNumbers[row]/totalMolecules)*1e6; // ppm
                    }
                }
                string suffix = (sampleName == "") ? "" : " " + sampleName;
                if (ArrayUtils.Contains(outputColumns, 0)){
                    mdata.AddNumericColumn("Copy number" + suffix, "", copyNumbers);
                }
                if (ArrayUtils.Contains(outputColumns, 1)){
                    mdata.AddNumericColumn("Concentration [nM]" + suffix, "", concentrations);
                }
                if (ArrayUtils.Contains(outputColumns, 2)){
                    mdata.AddNumericColumn("Abundance (mass/total mass) [*10^-6]" + suffix, "", massFraction);
                }
                if (ArrayUtils.Contains(outputColumns, 3)){
                    mdata.AddNumericColumn("Abundance (molecules/total molecules) [*10^-6]" + suffix, "", moleFraction);
                }
                double[] rank = ArrayUtils.Rank(copyNumbers);
                double[] relativeRank = new double[mdata.RowCount];
                double validRanks = mdata.RowCount;
                for (int row = 0; row < mdata.RowCount; row++){
                    // remove rank for protein with no copy number information
                    if (double.IsNaN((copyNumbers[row])) || copyNumbers[row] == 0){
                        rank[row] = double.NaN;
                        validRanks--; // do not consider as valid
                    }
                    // invert ranking, so that rank 0 is the most abundant protein
                    rank[row] = mdata.RowCount - rank[row];
                }
                for (int row = 0; row < mdata.RowCount; row++){
                    relativeRank[row] = rank[row]/validRanks;
                }
                if (ArrayUtils.Contains(outputColumns, 4)){
                    mdata.AddNumericColumn("Copy number rank" + suffix, "", rank);
                }
                if (ArrayUtils.Contains(outputColumns, 5)){
                    mdata.AddNumericColumn("Relative copy number rank" + suffix, "", relativeRank);
                }
                if (intensityCols[col] < mdata.ExpressionColumnCount &&
                    param.GetSingleChoiceWithSubParams("Averaging mode").Value != 3){
                    totalProteinRow[intensityCols[col]] = Math.Round(totalProtein, 2);
                    totalMoleculesRow[intensityCols[col]] = Math.Round(totalMolecules, 0);
                    organismRow[intensityCols[col]] = new string[]{organism.name};
                    histoneMassRow[intensityCols[col]] = Math.Round(histoneMass, 4);
                    ploidyRow[intensityCols[col]] = Math.Round((histoneMass*1e-12)/cValue, 2);
                    cellVolumeRow[intensityCols[col]] = Math.Round(totalVolume, 2); // femtoliters
                }
            }
            if (param.GetSingleChoiceWithSubParams("Averaging mode").Value != 3 && ArrayUtils.Contains(outputColumns, 6)){
                mdata.AddNumericRow("Total protein [pg/cell]", "", totalProteinRow);
                mdata.AddNumericRow("Total molecules per cell", "", totalMoleculesRow);
                mdata.AddCategoryRow("Organism", "", organismRow);
                mdata.AddNumericRow("Histone mass [pg/cell]", "", histoneMassRow);
                mdata.AddNumericRow("Ploidy", "", ploidyRow);
                mdata.AddNumericRow("Cell volume [fl]", "", cellVolumeRow);
            }
        }
Пример #22
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            //            double minValid = Convert.ToDouble(param.GetParam<string>("Min. expression value").Value);
            ParameterWithSubParams <int> minValidType = param.GetParamWithSubParams <int>("Min. expression value");

            if (CheckGroup(minValidType, processInfo) == false)
            {
                return;
            }
            Parameter <int> m = param.GetParam <int>("Mode");
            ParameterWithSubParams <int> va = param.GetParamWithSubParams <int>("Min. valid samples in a group");

            if (CheckGroup(va, processInfo) == false)
            {
                return;
            }
            ParameterWithSubParams <int> p = param.GetParamWithSubParams <int>("Group");

            if (CheckGroup(p, processInfo) == false)
            {
                return;
            }
            int colInd = p.Value;

            string[] groupids = ExtractGroup(mdata, p, processInfo, colInd);
            bool     Unvalid  = CheckGroupIDsValid(groupids, processInfo, m.Value);

            if (Unvalid)
            {
                return;
            }
            HashSet <string> value = new HashSet <string>(groupids);

            string[][] cats = mdata.GetCategoryRowAt(colInd);
            Dictionary <string, List <string> > samples = new Dictionary <string, List <string> >();

            for (int i = 0; i < cats.Length; i++)
            {
                for (int j = 0; j < cats[i].Length; j++)
                {
                    if (value.Contains(cats[i][j]))
                    {
                        if (!samples.ContainsKey(cats[i][j]))
                        {
                            samples.Add(cats[i][j], new List <string>());
                        }
                        samples[cats[i][j]].Add(mdata.ColumnNames[i]);
                        int len = i * cats[i].Length + j;
                    }
                }
            }
            Dictionary <string, int> minValidAmount = new Dictionary <string, int>();
            bool import = ImportMinAmount(va, samples, minValidAmount, processInfo, m.Value);

            if (import == false)
            {
                return;
            }
            ExtractValues(mdata, samples, minValidType, minValidAmount, m.Value);
        }
 private static List<string[][]> TransformCategories(IMatrixData mdata, IList<int> cols, int n)
 {
     List<string[][]> result = new List<string[][]>();
     for (int index = 0; index < mdata.CategoryRowCount; index++){
         string[][] categoryRow = mdata.GetCategoryRowAt(index);
         result.Add(TransformCategories(categoryRow, cols, n));
     }
     return result;
 }
Пример #24
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            ParameterWithSubParams <int> access = param.GetParamWithSubParams <int>("Matrix access");
            bool rows = access.Value == 0;
            int  groupInd;

            if (rows)
            {
                groupInd = access.GetSubParameters().GetParam <int>("Grouping").Value - 1;
            }
            else
            {
                groupInd = -1;
            }
            bool report = param.GetParam <bool>("Report mean and std. dev.").Value;
            bool median = param.GetParam <bool>("Use median").Value;

            if (groupInd < 0)
            {
                double[] means;
                double[] stddevs;
                Zscore(rows, mdata, processInfo.NumThreads, report, median, out means, out stddevs);
                if (report)
                {
                    if (rows)
                    {
                        mdata.AddNumericColumn("Mean", "Mean", means);
                        mdata.AddNumericColumn("Std. dev.", "Std. dev.", stddevs);
                    }
                    else
                    {
                        mdata.AddNumericRow("Mean", "Mean", means);
                        mdata.AddNumericRow("Std. dev.", "Std. dev.", stddevs);
                    }
                }
            }
            else
            {
                string[][] catRow = mdata.GetCategoryRowAt(groupInd);
                foreach (string[] t in catRow)
                {
                    if (t.Length > 1)
                    {
                        processInfo.ErrString = "The groups are overlapping.";
                        return;
                    }
                }
                string[]   groupVals = ArrayUtils.UniqueValuesPreserveOrder(catRow);
                double[][] means;
                double[][] stddevs;
                ZscoreGroups(mdata, catRow, processInfo.NumThreads, report, median, groupVals, out means, out stddevs);
                if (report)
                {
                    for (int i = 0; i < groupVals.Length; i++)
                    {
                        mdata.AddNumericColumn("Mean " + groupVals[i], "Mean", means[i]);
                        mdata.AddNumericColumn("Std. dev. " + groupVals[i], "Std. dev.", stddevs[i]);
                    }
                }
            }
        }
Пример #25
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            SingleChoiceWithSubParams access = param.GetSingleChoiceWithSubParams("Matrix access");
            bool rows = access.Value == 0;
            int groupInd;
            if (rows){
                groupInd = access.GetSubParameters().GetSingleChoiceParam("Grouping").Value - 1;
            } else{
                groupInd = -1;
            }
            int what = param.GetSingleChoiceParam("Subtract what").Value;
            if (groupInd < 0){
                SubtractValues(rows, GetFunc(what), mdata, processInfo.NumThreads);
            } else{
                string[][] catRow = mdata.GetCategoryRowAt(groupInd);
                foreach (string[] t in catRow){
                    if (t.Length > 1){
                        processInfo.ErrString = "The groups are overlapping.";
                        return;
                    }
                }
                SubtractGroups(mdata, catRow, GetFunc(what));
            }
        }
 public void Export(Parameters parameters, IMatrixData data, ProcessInfo processInfo)
 {
     string filename = parameters.GetFileParam("File name").Value;
     StreamWriter writer;
     try{
         writer = new StreamWriter(filename);
     } catch (Exception e){
         processInfo.ErrString = e.Message;
         return;
     }
     List<string> words = new List<string>();
     for (int i = 0; i < data.ExpressionColumnCount; i++){
         words.Add(Trunc(data.ExpressionColumnNames[i]));
     }
     for (int i = 0; i < data.CategoryColumnCount; i++){
         words.Add(Trunc(data.CategoryColumnNames[i]));
     }
     for (int i = 0; i < data.NumericColumnCount; i++){
         words.Add(Trunc(data.NumericColumnNames[i]));
     }
     for (int i = 0; i < data.StringColumnCount; i++){
         words.Add(Trunc(data.StringColumnNames[i]));
     }
     for (int i = 0; i < data.MultiNumericColumnCount; i++){
         words.Add(Trunc(data.MultiNumericColumnNames[i]));
     }
     writer.WriteLine(StringUtils.Concat("\t", words));
     if (HasAnyDescription(data)){
         words = new List<string>();
         for (int i = 0; i < data.ExpressionColumnCount; i++){
             words.Add(Trunc(data.ExpressionColumnDescriptions[i] ?? ""));
         }
         for (int i = 0; i < data.CategoryColumnCount; i++){
             words.Add(Trunc(data.CategoryColumnDescriptions[i] ?? ""));
         }
         for (int i = 0; i < data.NumericColumnCount; i++){
             words.Add(Trunc(data.NumericColumnDescriptions[i] ?? ""));
         }
         for (int i = 0; i < data.StringColumnCount; i++){
             words.Add(Trunc(data.StringColumnDescriptions[i] ?? ""));
         }
         for (int i = 0; i < data.MultiNumericColumnCount; i++){
             words.Add(Trunc(data.MultiNumericColumnDescriptions[i] ?? ""));
         }
         writer.WriteLine("#!{Description}" + StringUtils.Concat("\t", words));
     }
     words = new List<string>();
     for (int i = 0; i < data.ExpressionColumnCount; i++){
         words.Add("E");
     }
     for (int i = 0; i < data.CategoryColumnCount; i++){
         words.Add("C");
     }
     for (int i = 0; i < data.NumericColumnCount; i++){
         words.Add("N");
     }
     for (int i = 0; i < data.StringColumnCount; i++){
         words.Add("T");
     }
     for (int i = 0; i < data.MultiNumericColumnCount; i++){
         words.Add("M");
     }
     writer.WriteLine("#!{Type}" + StringUtils.Concat("\t", words));
     for (int i = 0; i < data.NumericRowCount; i++){
         words = new List<string>();
         for (int j = 0; j < data.ExpressionColumnCount; j++){
             words.Add("" + data.NumericRows[i][j]);
         }
         for (int j = 0; j < data.CategoryColumnCount; j++){
             words.Add("");
         }
         for (int j = 0; j < data.NumericColumnCount; j++){
             words.Add("");
         }
         for (int j = 0; j < data.StringColumnCount; j++){
             words.Add("");
         }
         for (int j = 0; j < data.MultiNumericColumnCount; j++){
             words.Add("");
         }
         writer.WriteLine("#!{N:" + data.NumericRowNames[i] + "}" + StringUtils.Concat("\t", words));
     }
     for (int i = 0; i < data.CategoryRowCount; i++){
         words = new List<string>();
         for (int j = 0; j < data.ExpressionColumnCount; j++){
             string[] s = data.GetCategoryRowAt(i)[j];
             words.Add(s.Length == 0 ? "" : StringUtils.Concat(";", s));
         }
         for (int j = 0; j < data.CategoryColumnCount; j++){
             words.Add("");
         }
         for (int j = 0; j < data.NumericColumnCount; j++){
             words.Add("");
         }
         for (int j = 0; j < data.StringColumnCount; j++){
             words.Add("");
         }
         for (int j = 0; j < data.MultiNumericColumnCount; j++){
             words.Add("");
         }
         writer.WriteLine("#!{C:" + data.CategoryRowNames[i] + "}" + StringUtils.Concat("\t", words));
     }
     for (int j = 0; j < data.RowCount; j++){
         words = new List<string>();
         for (int i = 0; i < data.ExpressionColumnCount; i++){
             words.Add(Trunc("" + data[j, i]));
         }
         for (int i = 0; i < data.CategoryColumnCount; i++){
             string[] q = data.GetCategoryColumnAt(i)[j] ?? new string[0];
             words.Add(Trunc((q.Length > 0 ? StringUtils.Concat(";", q) : "")));
         }
         for (int i = 0; i < data.NumericColumnCount; i++){
             words.Add(Trunc("" + data.NumericColumns[i][j]));
         }
         for (int i = 0; i < data.StringColumnCount; i++){
             words.Add(Trunc(data.StringColumns[i][j]));
         }
         for (int i = 0; i < data.MultiNumericColumnCount; i++){
             double[] q = data.MultiNumericColumns[i][j];
             words.Add(Trunc((q.Length > 0 ? StringUtils.Concat(";", q) : "")));
         }
         string s = StringUtils.Concat("\t", words);
         s = s.Replace("\"", "");
         writer.WriteLine(s);
     }
     writer.Close();
 }