private static string ProcessDataReadFromFile(IDataWithAnnotationRows mdata, Parameters param)
        {
            Parameter <string> fp       = param.GetParam <string>("Input file");
            string             filename = fp.Value;

            string[] colNames;
            try {
                colNames = TabSep.GetColumnNames(filename, '\t');
            } catch (Exception) {
                return("Could not open file " + filename + ". It maybe open in another program.");
            }
            int nameIndex = GetNameIndex(colNames);

            if (nameIndex < 0)
            {
                return("Error: the file has to contain a column called 'Name'.");
            }
            if (colNames.Length < 2)
            {
                return("Error: the file does not contain a numerical column.");
            }
            string[] nameCol             = TabSep.GetColumn(colNames[nameIndex], filename, '\t');
            Dictionary <string, int> map = ArrayUtils.InverseMap(nameCol);

            for (int i = 0; i < colNames.Length; i++)
            {
                if (i == nameIndex)
                {
                    continue;
                }
                string   groupName = colNames[i];
                string[] groupCol  = TabSep.GetColumn(groupName, filename, '\t');
                double[] newCol    = new double[mdata.ColumnCount];
                for (int j = 0; j < newCol.Length; j++)
                {
                    string colName = mdata.ColumnNames[j];
                    if (!map.ContainsKey(colName))
                    {
                        newCol[j] = double.NaN;
                        continue;
                    }
                    int    ind   = map[colName];
                    string group = groupCol[ind] ?? "";
                    group = group.Trim();
                    if (string.IsNullOrEmpty(group))
                    {
                        newCol[j] = double.NaN;
                    }
                    else
                    {
                        if (!Parser.TryDouble(group, out newCol[j]))
                        {
                            newCol[j] = double.NaN;
                        }
                    }
                }
                mdata.AddNumericRow(groupName, groupName, newCol);
            }
            return(null);
        }
        private static string ProcessDataReadFromFile(IDataWithAnnotationRows mdata, Parameters param)
        {
            Parameter <string> fp       = param.GetParam <string>("Input file");
            string             filename = fp.Value;

            string[] colNames;
            try
            {
                colNames = TabSep.GetColumnNames(filename, '\t');
            }
            catch (Exception)
            {
                return("Could not open file " + filename + ". It maybe open in another program.");
            }
            int nameIndex = GetNameIndex(colNames);

            if (nameIndex < 0)
            {
                return("Error: the file has to contain a column called 'Name'.");
            }
            if (colNames.Length < 2)
            {
                return("Error: the file does not contain a grouping column.");
            }
            string[] nameCol = TabSep.GetColumn(colNames[nameIndex], filename, '\t');
            for (int i = 0; i < nameCol.Length; i++)
            {
                MessageBox.Show(nameCol[i].ToString());
            }
            return(null);
        }
        private static string ProcessDataReadFromFileList(IMatrixData mdata, Parameters param)
        {
            Parameter <string> fp       = param.GetParam <string>("Input file");
            string             filename = fp.Value;

            string[] colNames;
            try
            {
                colNames = TabSep.GetColumnNames(filename, '\t');
            }
            catch (Exception)
            {
                return("Could not open file " + filename + ". It maybe open in another program.");
            }
            int nameIndex = GetNameIndex(colNames);

            string[] nameCol = TabSep.GetColumn(colNames[nameIndex], filename, '\t');
            MessageBox.Show(nameCol.Length.ToString());
            MessageBox.Show(mdata.ColumnCount.ToString());
            for (int i = 0; i < mdata.ColumnCount; i++)
            {
                MessageBox.Show(nameCol.Length.ToString());
                MessageBox.Show(mdata.ColumnCount.ToString());

                /*    if (nameCol.Length != mdata.ColumnCount)
                *       {
                *           return "ERROR";
                *       }
                *       else { mdata.ColumnNames[i] = nameCol[i]; } */
            }

            return(null);
        }
        //gene enrichment analysis for both Basic PECA and PECA-N
        public static IMatrixData GetGOEnr(IMatrixData mdata, string workingDir, int option)//, out string errString)
        {
            char separator = '\t';

            string filename = Path.Combine(workingDir, @".\Goterms.txt");

            IMatrixData mNew = (IMatrixData)mdata.CreateNewInstance();

            string name = "GSA";

            if (option == 0)
            {
                name = name + "_Degradation";
            }
            else if (option == 1)
            {
                name = name + "_Synthesis";
            }

            mNew.Clear();
            mNew.Name    = name;
            mNew.AltName = name;

            //update
            //mNew.AltName = "Gene Set Enrichment Analysis";
            //mNew.Description = "Gene Set Enrichment Analysis";


            string[] colNames = TabSep.GetColumnNames(filename, 0, PerseusUtils.commentPrefix,
                                                      PerseusUtils.commentPrefixExceptions, null, separator);

            string[][] cols = TabSep.GetColumns(colNames, filename, 0, PerseusUtils.commentPrefix,
                                                PerseusUtils.commentPrefixExceptions, separator);

            int nrows = TabSep.GetRowCount(filename);

            mNew.Values.Init(nrows, 0);

            mNew.SetAnnotationColumns(new List <string>(colNames), new List <string>(colNames), new List <string[]>(cols), new List <string>(),
                                      new List <string>(), new List <string[][]>(), new List <string>(), new List <string>(), new List <double[]>(),
                                      new List <string>(), new List <string>(), new List <double[][]>());


            //convert the ones not matching regex to numeric
            string     pattern     = @"^((?!id|name|members).)*$";
            Regex      numericReg  = new Regex(pattern);
            List <int> numericList = new List <int>();

            for (int i = 0; i < colNames.Length; i++)
            {
                if (numericReg.Match(colNames[i]).Success)
                {
                    numericList.Add(i);
                }
            }
            StringToNumerical(numericList, mNew);
            return(mNew);
        }
        private static string ProcessDataReadFromFile(IMatrixData mdata, Parameters param)
        {
            Parameter <string> fp       = param.GetParam <string>("Input file");
            string             filename = fp.Value;

            string[] colNames  = TabSep.GetColumnNames(filename, '\t');
            int      nameIndex = GetNameIndex(colNames);

            if (nameIndex < 0)
            {
                return("Error: the file has to contain a column called 'Name'.");
            }
            if (colNames.Length < 2)
            {
                return("Error: the file does not contain a grouping column.");
            }
            string[] nameCol             = TabSep.GetColumn(colNames[nameIndex], filename, '\t');
            Dictionary <string, int> map = ArrayUtils.InverseMap(nameCol);

            for (int i = 0; i < colNames.Length; i++)
            {
                if (i == nameIndex)
                {
                    continue;
                }
                string     groupName = colNames[i];
                string[]   groupCol  = TabSep.GetColumn(groupName, filename, '\t');
                string[][] newCol    = new string[mdata.ColumnCount][];
                for (int j = 0; j < newCol.Length; j++)
                {
                    string colName = mdata.ColumnNames[j];
                    if (!map.ContainsKey(colName))
                    {
                        newCol[j] = new string[0];
                        continue;
                    }
                    int    ind   = map[colName];
                    string group = groupCol[ind] ?? "";
                    group = group.Trim();
                    if (string.IsNullOrEmpty(group))
                    {
                        newCol[j] = new string[0];
                    }
                    else
                    {
                        string[] w = group.Split(';');
                        Array.Sort(w);
                        for (int k = 0; k < w.Length; k++)
                        {
                            w[k] = w[k].Trim();
                        }
                        newCol[j] = w;
                    }
                }
                mdata.AddCategoryRow(groupName, groupName, newCol);
            }
            return(null);
        }
Exemple #6
0
        private static string ProcessDataReadFromFileList(IMatrixData mdata, Parameters param)
        {
            Parameter <string> fp       = param.GetParam <string>("Input file");
            string             filename = fp.Value;

            string[] colNames;
            try{
                colNames = TabSep.GetColumnNames(filename, '\t');
            } catch (Exception) {
                return("Could not open file " + filename + ". It maybe open in another program.");
            }
            int nameIndex = GetNameIndex(colNames);

            string[] nameCol = TabSep.GetColumn(colNames[nameIndex], filename, '\t');
            return(null);
        }
Exemple #7
0
        private static void LoadSplit(IMatrixData mdata, string filename, bool csv)
        {
            char separator = csv ? ',' : '\t';

            string[] colNames = TabSep.GetColumnNames(filename, 0, PerseusUtils.commentPrefix,
                                                      PerseusUtils.commentPrefixExceptions, null, separator);
            string[][] cols = TabSep.GetColumns(colNames, filename, 0, PerseusUtils.commentPrefix,
                                                PerseusUtils.commentPrefixExceptions, separator);
            int nrows = TabSep.GetRowCount(filename);

            mdata.Values.Init(nrows, 0);
            mdata.SetAnnotationColumns(new List <string>(colNames), new List <string>(colNames), new List <string[]>(cols), new List <string>(),
                                       new List <string>(), new List <string[][]>(), new List <string>(), new List <string>(), new List <double[]>(),
                                       new List <string>(), new List <string>(), new List <double[][]>());
            mdata.Origin = filename;
        }
Exemple #8
0
        public void TestGetColumnNamesFromGzippedFile()
        {
            var lines = new[]
            {
                "Col_1 Col_2",
                "a b",
            };
            var tmpFile = Path.GetTempFileName() + ".gz";

            using (var memory = new MemoryStream(Encoding.UTF8.GetBytes(string.Join("\n", lines))))
                using (var outFile = File.Create(tmpFile))
                    using (var gzip = new GZipStream(outFile, CompressionMode.Compress))
                    {
                        memory.CopyTo(gzip);
                    }
            var columnNames = TabSep.GetColumnNames(tmpFile, ' ');

            CollectionAssert.AreEqual(new [] { "Col_1", "Col_2" }, columnNames);
        }
        //this function is modified from PerseusPluginLib/Load/UnstructuredTxtUpload.cs LoadSplit function
        //obtains the output from fdr.exe (so only applicable to PECA CORE and N)
        public static void GetOutput(IMatrixData mdata, Parameters param, Parameters dataParam, string filename, string geneName, string expSeries1 = "Expression Series 1", int numOfSeries = 2)
        {
            char separator = '\t';

            //gene name column name is not included in the file so need to replace it

            //gene name
            ReplaceFirstLine(filename, geneName);


            string[] colNames = TabSep.GetColumnNames(filename, 0, PerseusUtils.commentPrefix,
                                                      PerseusUtils.commentPrefixExceptions, null, separator);

            string[][] cols = TabSep.GetColumns(colNames, filename, 0, PerseusUtils.commentPrefix,
                                                PerseusUtils.commentPrefixExceptions, separator);
            int nrows = TabSep.GetRowCount(filename);



            string[] expressionColumnsNames = ArrayUtils.Concat(mdata.ColumnNames, mdata.NumericColumnNames);


            mdata.Clear();
            mdata.Name = "PECA Analysis";
            mdata.Values.Init(nrows, 0);
            mdata.SetAnnotationColumns(new List <string>(colNames), new List <string>(colNames), new List <string[]>(cols), new List <string>(),
                                       new List <string>(), new List <string[][]>(), new List <string>(), new List <string>(), new List <double[]>(),
                                       new List <string>(), new List <string>(), new List <double[][]>());

            //be careful with changes of Number of time points in the future
            int numOfExpCols = numOfSeries * dataParam.GetParam <int[]>(expSeries1).Value.Length;

            //file format is structured so that expressions columns are before numeric ones
            //so convert the numeric ones before expression columns

            //first column guaranteed to be the name column
            int[] expList     = Enumerable.Range(1, numOfExpCols).ToArray();
            int[] numericList = Enumerable.Range(numOfExpCols + 1, colNames.Count() - numOfExpCols - 1).ToArray();

            StringToNumerical(numericList, mdata);
            StringToExpression(expList, mdata);
        }
        public void LoadData(IMatrixData matrixData, Parameters parameters, ref IMatrixData[] supplTables,
                             ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            int             ind            = parameters.GetParam <int>("Organism").Value;
            string          filename       = GetOrganismFiles()[ind];
            List <string>   stringColnames = new List <string>(TabSep.GetColumnNames(filename, '\t'));
            List <string[]> stringCols     = new List <string[]>();

            foreach (string t in stringColnames)
            {
                string[] col = TabSep.GetColumn(t, filename, '\t');
                stringCols.Add(col);
            }
            matrixData.Name        = "Gene list";
            matrixData.ColumnNames = new List <string>();
            matrixData.Values.Init(stringCols[0].Length, 0);
            matrixData.SetAnnotationColumns(stringColnames, stringCols,
                                            new List <string>(), new List <string[][]>(), new List <string>(), new List <double[]>(), new List <string>(),
                                            new List <double[][]>());
            matrixData.Origin = "Gene list";
        }
        internal void UpdateFile(string filename)
        {
            textBox1.Text = filename;
            bool csv       = filename.ToLower().EndsWith(".csv");
            char separator = csv ? ',' : '\t';

            string[] colNames;
            Dictionary <string, string[]> annotationRows = new Dictionary <string, string[]>();

            try{
                colNames = TabSep.GetColumnNames(filename, PerseusUtils.commentPrefix, PerseusUtils.commentPrefixExceptions,
                                                 annotationRows, separator);
            } catch (Exception) {
                MessageBox.Show("Could not open the file '" + filename +
                                "'. It is probably opened by another program.");
                return;
            }
            string[] colTypes = null;
            if (annotationRows.ContainsKey("Type"))
            {
                colTypes = annotationRows["Type"];
                annotationRows.Remove("Type");
            }
            string msg = TabSep.CanOpen(filename);

            if (msg != null)
            {
                MessageBox.Show(msg);
                return;
            }
            multiListSelectorControl1.Init(colNames);
            if (colTypes != null)
            {
                FormUtils.SelectExact(colNames, colTypes, multiListSelectorControl1);
            }
            else
            {
                FormUtils.SelectHeuristic(colNames, multiListSelectorControl1);
            }
        }
        public void LoadData(IMatrixData mdata, Parameters parameters, ref IMatrixData[] supplTables,
                             ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            PerseusLoadMatrixParam par = (PerseusLoadMatrixParam)parameters.GetParam("File");
            string filename            = par.Filename;

            if (string.IsNullOrEmpty(filename))
            {
                processInfo.ErrString = "Please specify a filename";
                return;
            }
            if (!File.Exists(filename))
            {
                processInfo.ErrString = "File '" + filename + "' does not exist.";
                return;
            }
            string ftl       = filename.ToLower();
            bool   csv       = ftl.EndsWith(".csv") || ftl.EndsWith(".csv.gz");
            char   separator = csv ? ',' : '\t';

            string[] colNames;
            Dictionary <string, string[]> annotationRows = new Dictionary <string, string[]>();

            try{
                colNames = TabSep.GetColumnNames(filename, PerseusUtils.commentPrefix, PerseusUtils.commentPrefixExceptions,
                                                 annotationRows, separator);
            } catch (Exception) {
                processInfo.ErrString = "Could not open the file '" + filename + "'. It is probably opened in another program.";
                return;
            }
            string origin = filename;

            int[] eInds = par.MainColumnIndices;
            int[] nInds = par.NumericalColumnIndices;
            int[] cInds = par.CategoryColumnIndices;
            int[] tInds = par.TextColumnIndices;
            int[] mInds = par.MultiNumericalColumnIndices;
            List <Tuple <Relation[], int[], bool> > filters = new List <Tuple <Relation[], int[], bool> >();
            string errString;

            foreach (Parameters p in par.MainFilterParameters)
            {
                PerseusUtils.AddFilter(filters, p, eInds, out errString);
                if (errString != null)
                {
                    processInfo.ErrString = errString;
                    return;
                }
            }
            foreach (Parameters p in par.NumericalFilterParameters)
            {
                PerseusUtils.AddFilter(filters, p, nInds, out errString);
                if (errString != null)
                {
                    processInfo.ErrString = errString;
                    return;
                }
            }
            int          nrows     = GetRowCount(filename, eInds, filters, separator);
            StreamReader reader    = FileUtils.GetReader(filename);
            StreamReader auxReader = FileUtils.GetReader(filename);

            PerseusUtils.LoadMatrixData(annotationRows, eInds, cInds, nInds, tInds, mInds, processInfo, colNames, mdata, reader,
                                        auxReader, nrows, origin, separator, par.ShortenExpressionColumnNames, filters);
            reader.Close();
            auxReader.Close();
            GC.Collect();
        }