Esempio n. 1
0
        //gene enrichment analysis for both Basic PECA and PECA-N
        public static IMatrixData GetGOEnr(IMatrixData mdata, string workingDir, int option)//, out string errString)
        {
            char separator = '\t';

            string filename = Path.Combine(workingDir, @".\Goterms.txt");

            IMatrixData mNew = (IMatrixData)mdata.CreateNewInstance();

            string name = "GSA";

            if (option == 0)
            {
                name = name + "_Degradation";
            }
            else if (option == 1)
            {
                name = name + "_Synthesis";
            }

            mNew.Clear();
            mNew.Name    = name;
            mNew.AltName = name;

            //update
            //mNew.AltName = "Gene Set Enrichment Analysis";
            //mNew.Description = "Gene Set Enrichment Analysis";


            string[] colNames = TabSep.GetColumnNames(filename, 0, PerseusUtils.commentPrefix,
                                                      PerseusUtils.commentPrefixExceptions, null, separator);

            string[][] cols = TabSep.GetColumns(colNames, filename, 0, PerseusUtils.commentPrefix,
                                                PerseusUtils.commentPrefixExceptions, separator);

            int nrows = TabSep.GetRowCount(filename);

            mNew.Values.Init(nrows, 0);

            mNew.SetAnnotationColumns(new List <string>(colNames), new List <string>(colNames), new List <string[]>(cols), new List <string>(),
                                      new List <string>(), new List <string[][]>(), new List <string>(), new List <string>(), new List <double[]>(),
                                      new List <string>(), new List <string>(), new List <double[][]>());


            //convert the ones not matching regex to numeric
            string     pattern     = @"^((?!id|name|members).)*$";
            Regex      numericReg  = new Regex(pattern);
            List <int> numericList = new List <int>();

            for (int i = 0; i < colNames.Length; i++)
            {
                if (numericReg.Match(colNames[i]).Success)
                {
                    numericList.Add(i);
                }
            }
            StringToNumerical(numericList, mNew);
            return(mNew);
        }
        private static void CreateMatrixData(CountingResult result, IMatrixData data, int minCount, IEnumerable selection)
        {
            List <string[]> type        = new List <string[]>();
            List <string[]> name        = new List <string[]>();
            List <double>   count       = new List <double>();
            List <double>   percOfTotal = new List <double>();
            List <double>   selCount    = new List <double>();
            List <double>   selPerc     = new List <double>();

            for (int i = 0; i < result.Count; i++)
            {
                int c = result.GetTotalCountAt(i);
                if (c < minCount)
                {
                    continue;
                }
                type.Add(new[] { result.GetType1At(i) });
                name.Add(new[] { result.GetName1At(i) });
                count.Add(c);
                percOfTotal.Add(Math.Round(10000.0 * c / data.RowCount) / 100.0);
                if (selection != null)
                {
                    int c1 = result.GetSelectCountAt(i);
                    selCount.Add(c1);
                    selPerc.Add(Math.Round(1000.0 * c1 / c) / 10.0);
                }
            }
            double[,] ex = new double[type.Count, 0];
            List <string[][]> catCols = new List <string[][]> {
                type.ToArray(), name.ToArray()
            };
            List <string>   catColNames = new List <string>(new[] { "Type", "Name" });
            List <double[]> numCols     = new List <double[]> {
                count.ToArray(), percOfTotal.ToArray()
            };

            if (selection != null)
            {
                numCols.Add(selCount.ToArray());
                numCols.Add(selPerc.ToArray());
            }
            List <string> numColNames = new List <string>(new[] { "Count", "Percentage of total" });

            if (selection != null)
            {
                numColNames.AddRange(new[] { "Selection count", "Selection percentage" });
            }
            data.Clear();
            data.Name        = "Count";
            data.ColumnNames = new List <string>();
            data.Values.Set(ex);
            data.SetAnnotationColumns(new List <string>(), new List <string[]>(), catColNames, catCols, numColNames, numCols,
                                      new List <string>(), new List <double[][]>());
        }
Esempio n. 3
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            int[]         cols = param.GetParam <int[]>("Columns").Value;
            HashSet <int> w    = ArrayUtils.ToHashSet(param.GetParam <int[]>("Calculate").Value);

            bool[]     include = new bool[SummaryStatisticsRows.procs.Length];
            double[][] rowws   = new double[SummaryStatisticsRows.procs.Length][];
            for (int i = 0; i < include.Length; i++)
            {
                include[i] = w.Contains(i);
                if (include[i])
                {
                    rowws[i] = new double[cols.Length];
                }
            }
            for (int i = 0; i < cols.Length; i++)
            {
                double[] vals = GetColumn(cols[i], mdata);
                for (int j = 0; j < include.Length; j++)
                {
                    if (include[j])
                    {
                        rowws[j][i] = SummaryStatisticsRows.procs[j].Item2(vals);
                    }
                }
            }
            List <double[]> ex    = new List <double[]>();
            List <string>   names = new List <string>();

            for (int i = 0; i < include.Length; i++)
            {
                if (include[i])
                {
                    ex.Add(rowws[i]);
                    names.Add(SummaryStatisticsRows.procs[i].Item1);
                }
            }
            double[,] exVals = GetExVals(ex);
            string[] colNames              = GetColNames(mdata, cols);
            var      categoryRowNames      = mdata.CategoryRowNames;
            var      transformedCategories = TransformCategories(mdata, cols, mdata.ColumnCount);
            var      numericRowNames       = mdata.NumericRowNames;
            var      transformedNumeric    = TransformNumeric(mdata.NumericRows, cols, mdata.ColumnCount);

            mdata.Clear();
            mdata.Name               = "Summary";
            mdata.ColumnNames        = new List <string>(names.ToArray());
            mdata.ColumnDescriptions = new List <string>(names.ToArray());
            mdata.Values.Set(exVals);
            mdata.SetAnnotationColumns(new List <string>(new[] { "Columns" }), new List <string[]>(new[] { colNames }),
                                       categoryRowNames, transformedCategories, numericRowNames, transformedNumeric, new List <string>(), new List <double[][]>());
        }
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents,
                                ProcessInfo processInfo)
        {
            var remoteExe = param.GetParam <string>(InterpreterLabel).Value;

            if (string.IsNullOrWhiteSpace(remoteExe))
            {
                processInfo.ErrString = Resources.RemoteExeNotSpecified;
                return;
            }
            var inFile = Path.GetTempFileName();

            PerseusUtils.WriteMatrixToFile(mdata, inFile, AdditionalMatrices);
            var outFile = Path.GetTempFileName();

            if (!TryGetCodeFile(param, out string codeFile))
            {
                processInfo.ErrString = $"Code file '{codeFile}' was not found";
                return;
            }
            ;
            if (supplTables == null)
            {
                supplTables = Enumerable.Range(0, NumSupplTables).Select(i => PerseusFactory.CreateMatrixData()).ToArray();
            }
            var suppFiles            = supplTables.Select(i => Path.GetTempFileName()).ToArray();
            var commandLineArguments = GetCommandLineArguments(param);
            var args = $"{codeFile} {commandLineArguments} {inFile} {outFile} {string.Join(" ", suppFiles)}";

            Debug.WriteLine($"executing > {remoteExe} {args}");
            if (Utils.RunProcess(remoteExe, args, processInfo.Status, out string processInfoErrString) != 0)
            {
                processInfo.ErrString = processInfoErrString;
                return;
            }
            ;
            mdata.Clear();
            PerseusUtils.ReadMatrixFromFile(mdata, processInfo, outFile, '\t');
            for (int i = 0; i < NumSupplTables; i++)
            {
                PerseusUtils.ReadMatrixFromFile(supplTables[i], processInfo, suppFiles[i], '\t');
            }
        }
Esempio n. 5
0
        //this function is modified from PerseusPluginLib/Load/UnstructuredTxtUpload.cs LoadSplit function
        //obtains the output from fdr.exe (so only applicable to PECA CORE and N)
        public static void GetOutput(IMatrixData mdata, Parameters param, Parameters dataParam, string filename, string geneName, string expSeries1 = "Expression Series 1", int numOfSeries = 2)
        {
            char separator = '\t';

            //gene name column name is not included in the file so need to replace it

            //gene name
            ReplaceFirstLine(filename, geneName);


            string[] colNames = TabSep.GetColumnNames(filename, 0, PerseusUtils.commentPrefix,
                                                      PerseusUtils.commentPrefixExceptions, null, separator);

            string[][] cols = TabSep.GetColumns(colNames, filename, 0, PerseusUtils.commentPrefix,
                                                PerseusUtils.commentPrefixExceptions, separator);
            int nrows = TabSep.GetRowCount(filename);



            string[] expressionColumnsNames = ArrayUtils.Concat(mdata.ColumnNames, mdata.NumericColumnNames);


            mdata.Clear();
            mdata.Name = "PECA Analysis";
            mdata.Values.Init(nrows, 0);
            mdata.SetAnnotationColumns(new List <string>(colNames), new List <string>(colNames), new List <string[]>(cols), new List <string>(),
                                       new List <string>(), new List <string[][]>(), new List <string>(), new List <string>(), new List <double[]>(),
                                       new List <string>(), new List <string>(), new List <double[][]>());

            //be careful with changes of Number of time points in the future
            int numOfExpCols = numOfSeries * dataParam.GetParam <int[]>(expSeries1).Value.Length;

            //file format is structured so that expressions columns are before numeric ones
            //so convert the numeric ones before expression columns

            //first column guaranteed to be the name column
            int[] expList     = Enumerable.Range(1, numOfExpCols).ToArray();
            int[] numericList = Enumerable.Range(numOfExpCols + 1, colNames.Count() - numOfExpCols - 1).ToArray();

            StringToNumerical(numericList, mdata);
            StringToExpression(expList, mdata);
        }
        /// <summary>
        /// An auxiliary method for testing the action of regular expressions.
        /// Limited to a single column, which should be sufficient for this purpose.
        /// Multiple rows are allowed to test the effect of one regex on several strings.
        /// </summary>
        private static void TestRegex(string regexStr, string[] stringsInit, string[] stringsExpect)
        {
            const string name = "Test";

            IMatrixData[]   supplTables       = null;
            IDocumentData[] documents         = null;
            List <string>   stringColumnNames = new List <string> {
                "Column Name"
            };
            List <string[]> stringColumnsInit = new List <string[]> {
                stringsInit
            };
            List <string[]> stringColumnsExpect = new List <string[]> {
                stringsExpect
            };
            Parameters param =
                new Parameters(new Parameter[] {
                new MultiChoiceParam("Columns", new[] { 0 })
                {
                    Values = stringColumnNames
                },
                new StringParam("Regular expression", regexStr), new BoolParam("Keep original columns", false),
                new BoolParam("Strings separated by semicolons are independent", false)
            });
            IMatrixData mdata = PerseusFactory.CreateNewMatrixData();

            mdata.Clear();
            mdata.Name = name;
            mdata.SetAnnotationColumns(stringColumnNames, stringColumnsInit, mdata.CategoryColumnNames, new List <string[][]>(),
                                       mdata.NumericColumnNames, mdata.NumericColumns, mdata.MultiNumericColumnNames, mdata.MultiNumericColumns);
            var ptc = new ProcessTextColumns();

            ptc.ProcessData(mdata, param, ref supplTables, ref documents, null);
            const bool ignoreCase = false;

            for (int rowInd = 0; rowInd < stringColumnsInit[0].Length; rowInd++)
            {
                Assert.AreEqual(mdata.StringColumns[0][rowInd], stringColumnsExpect[0][rowInd], ignoreCase);
            }
        }
Esempio n. 7
0
        /// <summary>
        /// An auxiliary method for testing the action of regular expressions.
        /// Limited to a single column, which should be sufficient for this purpose.
        /// Multiple rows are allowed to test the effect of one regex on several strings.
        /// </summary>
        private static void TestRegex(string regexStr, string[] stringsInit, string[] stringsExpect)
        {
            const string name = "Test";

            IMatrixData[]   supplTables       = null;
            IDocumentData[] documents         = null;
            List <string>   stringColumnNames = new List <string> {
                "Column Name"
            };
            List <string[]> stringColumnsInit = new List <string[]> {
                stringsInit
            };
            List <string[]> stringColumnsExpect = new List <string[]> {
                stringsExpect
            };
            ProcessTextColumns ptc   = new ProcessTextColumns();
            IMatrixData        mdata = PerseusFactory.CreateMatrixData();

            mdata.Clear();
            mdata.Name = name;
            mdata.SetAnnotationColumns(stringColumnNames, stringColumnsInit, mdata.CategoryColumnNames, new List <string[][]>(),
                                       mdata.NumericColumnNames, mdata.NumericColumns, mdata.MultiNumericColumnNames, mdata.MultiNumericColumns);
            string     errorStr = string.Empty;
            Parameters param    = ptc.GetParameters(mdata, ref errorStr);

            param.GetParam <int[]>("Columns").Value              = new[] { 0 };
            param.GetParam <string>("Regular expression").Value  = regexStr;
            param.GetParam <bool>("Keep original columns").Value = false;
            param.GetParam <bool>("Strings separated by semicolons are independent").Value = false;
            ptc.ProcessData(mdata, param, ref supplTables, ref documents, null);
            for (int rowInd = 0; rowInd < stringColumnsInit[0].Length; rowInd++)
            {
                string expected = mdata.StringColumns[0][rowInd];
                string actual   = stringColumnsExpect[0][rowInd];
                StringAssert.AreEqualIgnoringCase(expected, actual);
            }
        }
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents,
                                ProcessInfo processInfo)
        {
            var remoteExe = param.GetParam <string>(InterpreterLabel).Value;
            var inFile    = Path.GetTempFileName();

            PerseusUtils.WriteMatrixToFile(mdata, inFile, false);
            var paramFile = Path.GetTempFileName();

            param.ToFile(paramFile);
            var outFile  = Path.GetTempFileName();
            var codeFile = GetCodeFile(param);
            var args     = $"{codeFile} {paramFile} {inFile} {outFile}";

            Debug.WriteLine($"executing > {remoteExe} {args}");
            if (Utils.RunProcess(remoteExe, args, processInfo.Status, out string processInfoErrString) != 0)
            {
                processInfo.ErrString = processInfoErrString;
                return;
            }
            ;
            mdata.Clear();
            PerseusUtils.ReadMatrixFromFile(mdata, processInfo, outFile, '\t');
        }