private static string ProcessDataReadFromFileList(IMatrixData mdata, Parameters param)
        {
            Parameter <string> fp       = param.GetParam <string>("Input file");
            string             filename = fp.Value;

            string[] colNames;
            try
            {
                colNames = TabSep.GetColumnNames(filename, '\t');
            }
            catch (Exception)
            {
                return("Could not open file " + filename + ". It maybe open in another program.");
            }
            int nameIndex = GetNameIndex(colNames);

            string[] nameCol = TabSep.GetColumn(colNames[nameIndex], filename, '\t');
            MessageBox.Show(nameCol.Length.ToString());
            MessageBox.Show(mdata.ColumnCount.ToString());
            for (int i = 0; i < mdata.ColumnCount; i++)
            {
                MessageBox.Show(nameCol.Length.ToString());
                MessageBox.Show(mdata.ColumnCount.ToString());

                /*    if (nameCol.Length != mdata.ColumnCount)
                *       {
                *           return "ERROR";
                *       }
                *       else { mdata.ColumnNames[i] = nameCol[i]; } */
            }

            return(null);
        }
        private static string ProcessDataReadFromFile(IDataWithAnnotationRows mdata, Parameters param)
        {
            Parameter <string> fp       = param.GetParam <string>("Input file");
            string             filename = fp.Value;

            string[] colNames;
            try
            {
                colNames = TabSep.GetColumnNames(filename, '\t');
            }
            catch (Exception)
            {
                return("Could not open file " + filename + ". It maybe open in another program.");
            }
            int nameIndex = GetNameIndex(colNames);

            if (nameIndex < 0)
            {
                return("Error: the file has to contain a column called 'Name'.");
            }
            if (colNames.Length < 2)
            {
                return("Error: the file does not contain a grouping column.");
            }
            string[] nameCol = TabSep.GetColumn(colNames[nameIndex], filename, '\t');
            for (int i = 0; i < nameCol.Length; i++)
            {
                MessageBox.Show(nameCol[i].ToString());
            }
            return(null);
        }
        public static bool GetHasAddtlMatrices(StreamReader reader, IList <int> expressionColIndices, char separator)
        {
            if (expressionColIndices.Count == 0)
            {
                return(false);
            }
            int expressionColIndex = expressionColIndices[0];

            reader.ReadLine();
            string line;
            bool   hasAddtl = false;

            while ((line = reader.ReadLine()) != null)
            {
                if (TabSep.IsCommentLine(line, commentPrefix, commentPrefixExceptions))
                {
                    continue;
                }
                string[] w = SplitLine(line, separator);
                if (expressionColIndex < w.Length)
                {
                    string s = StringUtils.RemoveWhitespace(w[expressionColIndex]);
                    hasAddtl = s.Contains(";");
                    break;
                }
            }
            reader.Close();
            return(hasAddtl);
        }
        private static string ProcessDataReadFromFile(IDataWithAnnotationRows mdata, Parameters param)
        {
            Parameter <string> fp       = param.GetParam <string>("Input file");
            string             filename = fp.Value;

            string[] colNames;
            try {
                colNames = TabSep.GetColumnNames(filename, '\t');
            } catch (Exception) {
                return("Could not open file " + filename + ". It maybe open in another program.");
            }
            int nameIndex = GetNameIndex(colNames);

            if (nameIndex < 0)
            {
                return("Error: the file has to contain a column called 'Name'.");
            }
            if (colNames.Length < 2)
            {
                return("Error: the file does not contain a numerical column.");
            }
            string[] nameCol             = TabSep.GetColumn(colNames[nameIndex], filename, '\t');
            Dictionary <string, int> map = ArrayUtils.InverseMap(nameCol);

            for (int i = 0; i < colNames.Length; i++)
            {
                if (i == nameIndex)
                {
                    continue;
                }
                string   groupName = colNames[i];
                string[] groupCol  = TabSep.GetColumn(groupName, filename, '\t');
                double[] newCol    = new double[mdata.ColumnCount];
                for (int j = 0; j < newCol.Length; j++)
                {
                    string colName = mdata.ColumnNames[j];
                    if (!map.ContainsKey(colName))
                    {
                        newCol[j] = double.NaN;
                        continue;
                    }
                    int    ind   = map[colName];
                    string group = groupCol[ind] ?? "";
                    group = group.Trim();
                    if (string.IsNullOrEmpty(group))
                    {
                        newCol[j] = double.NaN;
                    }
                    else
                    {
                        if (!Parser.TryDouble(group, out newCol[j]))
                        {
                            newCol[j] = double.NaN;
                        }
                    }
                }
                mdata.AddNumericRow(groupName, groupName, newCol);
            }
            return(null);
        }
        private static string ProcessDataReadFromFile(IMatrixData mdata, Parameters param)
        {
            Parameter <string> fp       = param.GetParam <string>("Input file");
            string             filename = fp.Value;

            string[] colNames  = TabSep.GetColumnNames(filename, '\t');
            int      nameIndex = GetNameIndex(colNames);

            if (nameIndex < 0)
            {
                return("Error: the file has to contain a column called 'Name'.");
            }
            if (colNames.Length < 2)
            {
                return("Error: the file does not contain a grouping column.");
            }
            string[] nameCol             = TabSep.GetColumn(colNames[nameIndex], filename, '\t');
            Dictionary <string, int> map = ArrayUtils.InverseMap(nameCol);

            for (int i = 0; i < colNames.Length; i++)
            {
                if (i == nameIndex)
                {
                    continue;
                }
                string     groupName = colNames[i];
                string[]   groupCol  = TabSep.GetColumn(groupName, filename, '\t');
                string[][] newCol    = new string[mdata.ColumnCount][];
                for (int j = 0; j < newCol.Length; j++)
                {
                    string colName = mdata.ColumnNames[j];
                    if (!map.ContainsKey(colName))
                    {
                        newCol[j] = new string[0];
                        continue;
                    }
                    int    ind   = map[colName];
                    string group = groupCol[ind] ?? "";
                    group = group.Trim();
                    if (string.IsNullOrEmpty(group))
                    {
                        newCol[j] = new string[0];
                    }
                    else
                    {
                        string[] w = group.Split(';');
                        Array.Sort(w);
                        for (int k = 0; k < w.Length; k++)
                        {
                            w[k] = w[k].Trim();
                        }
                        newCol[j] = w;
                    }
                }
                mdata.AddCategoryRow(groupName, groupName, newCol);
            }
            return(null);
        }
        //gene enrichment analysis for both Basic PECA and PECA-N
        public static IMatrixData GetGOEnr(IMatrixData mdata, string workingDir, int option)//, out string errString)
        {
            char separator = '\t';

            string filename = Path.Combine(workingDir, @".\Goterms.txt");

            IMatrixData mNew = (IMatrixData)mdata.CreateNewInstance();

            string name = "GSA";

            if (option == 0)
            {
                name = name + "_Degradation";
            }
            else if (option == 1)
            {
                name = name + "_Synthesis";
            }

            mNew.Clear();
            mNew.Name    = name;
            mNew.AltName = name;

            //update
            //mNew.AltName = "Gene Set Enrichment Analysis";
            //mNew.Description = "Gene Set Enrichment Analysis";


            string[] colNames = TabSep.GetColumnNames(filename, 0, PerseusUtils.commentPrefix,
                                                      PerseusUtils.commentPrefixExceptions, null, separator);

            string[][] cols = TabSep.GetColumns(colNames, filename, 0, PerseusUtils.commentPrefix,
                                                PerseusUtils.commentPrefixExceptions, separator);

            int nrows = TabSep.GetRowCount(filename);

            mNew.Values.Init(nrows, 0);

            mNew.SetAnnotationColumns(new List <string>(colNames), new List <string>(colNames), new List <string[]>(cols), new List <string>(),
                                      new List <string>(), new List <string[][]>(), new List <string>(), new List <string>(), new List <double[]>(),
                                      new List <string>(), new List <string>(), new List <double[][]>());


            //convert the ones not matching regex to numeric
            string     pattern     = @"^((?!id|name|members).)*$";
            Regex      numericReg  = new Regex(pattern);
            List <int> numericList = new List <int>();

            for (int i = 0; i < colNames.Length; i++)
            {
                if (numericReg.Match(colNames[i]).Success)
                {
                    numericList.Add(i);
                }
            }
            StringToNumerical(numericList, mNew);
            return(mNew);
        }
 public static void ParseKinaseSubstrate(string filename, out string[] seqWins, out string[] subAccs,
                                         out string[] kinases, out string[] kinAccs, out string[] species)
 {
     seqWins = TabSep.GetColumn("SITE_+/-7_AA", filename, 3, '\t');
     subAccs = TabSep.GetColumn("SUB_ACC_ID", filename, 3, '\t');
     kinases = TabSep.GetColumn("KINASE", filename, 3, '\t');
     kinAccs = TabSep.GetColumn("KIN_ACC_ID", filename, 3, '\t');
     species = TabSep.GetColumn("KIN_ORGANISM", filename, 3, '\t');
 }
 public static void ParseKnownMods(string filename, out string[] seqWins, out string[] accs, out string[] pubmedLtp,
                                   out string[] pubmedMs2, out string[] cstMs2, out string[] species)
 {
     seqWins   = TabSep.GetColumn("SITE_+/-7_AA", filename, 3, '\t');
     accs      = TabSep.GetColumn("ACC_ID", filename, 3, '\t');
     pubmedLtp = TabSep.GetColumn("LT_LIT", filename, 3, '\t');
     pubmedMs2 = TabSep.GetColumn("MS_LIT", filename, 3, '\t');
     cstMs2    = TabSep.GetColumn("MS_CST", filename, 3, '\t');
     species   = TabSep.GetColumn("ORGANISM", filename, 3, '\t');
 }
        public static string[] GetAllKinaseSubstrateOrganisms()
        {
            string filename = GetKinaseSubstrateFile();

            if (filename == null)
            {
                return(null);
            }
            string[] species = TabSep.GetColumn("KIN_ORGANISM", filename, 3, '\t');
            return(ArrayUtils.UniqueValues(species));
        }
 public static void ParseRegulatorySites(string filename, out string[] seqWins, out string[] accs,
                                         out string[] function, out string[] process, out string[] protInteract, out string[] otherInteract,
                                         out string[] notes, out string[] species)
 {
     seqWins       = TabSep.GetColumn("SITE_+/-7_AA", filename, 3, '\t');
     accs          = TabSep.GetColumn("ACC_ID", filename, 3, '\t');
     function      = TabSep.GetColumn("ON_FUNCTION", filename, 3, '\t');
     process       = TabSep.GetColumn("ON_PROCESS", filename, 3, '\t');
     protInteract  = TabSep.GetColumn("ON_PROT_INTERACT", filename, 3, '\t');
     otherInteract = TabSep.GetColumn("ON_OTHER_INTERACT", filename, 3, '\t');
     notes         = TabSep.GetColumn("NOTES", filename, 3, '\t');
     species       = TabSep.GetColumn("ORGANISM", filename, 3, '\t');
 }
Exemple #11
0
        private static string ProcessDataReadFromFileList(IMatrixData mdata, Parameters param)
        {
            Parameter <string> fp       = param.GetParam <string>("Input file");
            string             filename = fp.Value;

            string[] colNames;
            try{
                colNames = TabSep.GetColumnNames(filename, '\t');
            } catch (Exception) {
                return("Could not open file " + filename + ". It maybe open in another program.");
            }
            int nameIndex = GetNameIndex(colNames);

            string[] nameCol = TabSep.GetColumn(colNames[nameIndex], filename, '\t');
            return(null);
        }
Exemple #12
0
        private static void LoadSplit(IMatrixData mdata, string filename, bool csv)
        {
            char separator = csv ? ',' : '\t';

            string[] colNames = TabSep.GetColumnNames(filename, 0, PerseusUtils.commentPrefix,
                                                      PerseusUtils.commentPrefixExceptions, null, separator);
            string[][] cols = TabSep.GetColumns(colNames, filename, 0, PerseusUtils.commentPrefix,
                                                PerseusUtils.commentPrefixExceptions, separator);
            int nrows = TabSep.GetRowCount(filename);

            mdata.Values.Init(nrows, 0);
            mdata.SetAnnotationColumns(new List <string>(colNames), new List <string>(colNames), new List <string[]>(cols), new List <string>(),
                                       new List <string>(), new List <string[][]>(), new List <string>(), new List <string>(), new List <double[]>(),
                                       new List <string>(), new List <string>(), new List <double[][]>());
            mdata.Origin = filename;
        }
Exemple #13
0
        public void TestGetColumnNamesFromGzippedFile()
        {
            var lines = new[]
            {
                "Col_1 Col_2",
                "a b",
            };
            var tmpFile = Path.GetTempFileName() + ".gz";

            using (var memory = new MemoryStream(Encoding.UTF8.GetBytes(string.Join("\n", lines))))
                using (var outFile = File.Create(tmpFile))
                    using (var gzip = new GZipStream(outFile, CompressionMode.Compress))
                    {
                        memory.CopyTo(gzip);
                    }
            var columnNames = TabSep.GetColumnNames(tmpFile, ' ');

            CollectionAssert.AreEqual(new [] { "Col_1", "Col_2" }, columnNames);
        }
Exemple #14
0
        //this function is modified from PerseusPluginLib/Load/UnstructuredTxtUpload.cs LoadSplit function
        //obtains the output from fdr.exe (so only applicable to PECA CORE and N)
        public static void GetOutput(IMatrixData mdata, Parameters param, Parameters dataParam, string filename, string geneName, string expSeries1 = "Expression Series 1", int numOfSeries = 2)
        {
            char separator = '\t';

            //gene name column name is not included in the file so need to replace it

            //gene name
            ReplaceFirstLine(filename, geneName);


            string[] colNames = TabSep.GetColumnNames(filename, 0, PerseusUtils.commentPrefix,
                                                      PerseusUtils.commentPrefixExceptions, null, separator);

            string[][] cols = TabSep.GetColumns(colNames, filename, 0, PerseusUtils.commentPrefix,
                                                PerseusUtils.commentPrefixExceptions, separator);
            int nrows = TabSep.GetRowCount(filename);



            string[] expressionColumnsNames = ArrayUtils.Concat(mdata.ColumnNames, mdata.NumericColumnNames);


            mdata.Clear();
            mdata.Name = "PECA Analysis";
            mdata.Values.Init(nrows, 0);
            mdata.SetAnnotationColumns(new List <string>(colNames), new List <string>(colNames), new List <string[]>(cols), new List <string>(),
                                       new List <string>(), new List <string[][]>(), new List <string>(), new List <string>(), new List <double[]>(),
                                       new List <string>(), new List <string>(), new List <double[][]>());

            //be careful with changes of Number of time points in the future
            int numOfExpCols = numOfSeries * dataParam.GetParam <int[]>(expSeries1).Value.Length;

            //file format is structured so that expressions columns are before numeric ones
            //so convert the numeric ones before expression columns

            //first column guaranteed to be the name column
            int[] expList     = Enumerable.Range(1, numOfExpCols).ToArray();
            int[] numericList = Enumerable.Range(numOfExpCols + 1, colNames.Count() - numOfExpCols - 1).ToArray();

            StringToNumerical(numericList, mdata);
            StringToExpression(expList, mdata);
        }
        public void LoadData(IMatrixData matrixData, Parameters parameters, ref IMatrixData[] supplTables,
                             ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            int             ind            = parameters.GetParam <int>("Organism").Value;
            string          filename       = GetOrganismFiles()[ind];
            List <string>   stringColnames = new List <string>(TabSep.GetColumnNames(filename, '\t'));
            List <string[]> stringCols     = new List <string[]>();

            foreach (string t in stringColnames)
            {
                string[] col = TabSep.GetColumn(t, filename, '\t');
                stringCols.Add(col);
            }
            matrixData.Name        = "Gene list";
            matrixData.ColumnNames = new List <string>();
            matrixData.Values.Init(stringCols[0].Length, 0);
            matrixData.SetAnnotationColumns(stringColnames, stringCols,
                                            new List <string>(), new List <string[][]>(), new List <string>(), new List <double[]>(), new List <string>(),
                                            new List <double[][]>());
            matrixData.Origin = "Gene list";
        }
        internal void UpdateFile(string filename)
        {
            textBox1.Text = filename;
            bool csv       = filename.ToLower().EndsWith(".csv");
            char separator = csv ? ',' : '\t';

            string[] colNames;
            Dictionary <string, string[]> annotationRows = new Dictionary <string, string[]>();

            try{
                colNames = TabSep.GetColumnNames(filename, PerseusUtils.commentPrefix, PerseusUtils.commentPrefixExceptions,
                                                 annotationRows, separator);
            } catch (Exception) {
                MessageBox.Show("Could not open the file '" + filename +
                                "'. It is probably opened by another program.");
                return;
            }
            string[] colTypes = null;
            if (annotationRows.ContainsKey("Type"))
            {
                colTypes = annotationRows["Type"];
                annotationRows.Remove("Type");
            }
            string msg = TabSep.CanOpen(filename);

            if (msg != null)
            {
                MessageBox.Show(msg);
                return;
            }
            multiListSelectorControl1.Init(colNames);
            if (colTypes != null)
            {
                FormUtils.SelectExact(colNames, colTypes, multiListSelectorControl1);
            }
            else
            {
                FormUtils.SelectHeuristic(colNames, multiListSelectorControl1);
            }
        }
        public static int GetRowCount(StreamReader reader, StreamReader auxReader, int[] mainColIndices,
                                      List <Tuple <Relation[], int[], bool> > filters, char separator)
        {
            reader.BaseStream.Seek(0, SeekOrigin.Begin);
            reader.ReadLine();
            int    count            = 0;
            bool   hasAddtlMatrices = auxReader != null && GetHasAddtlMatrices(auxReader, mainColIndices, separator);
            string line;

            while ((line = reader.ReadLine()) != null)
            {
                while (TabSep.IsCommentLine(line, commentPrefix, commentPrefixExceptions))
                {
                    line = reader.ReadLine();
                }
                if (IsValidLine(line, separator, filters, hasAddtlMatrices))
                {
                    count++;
                }
            }
            return(count);
        }
        private static void LoadMatrixData(IList <string> colNames, IList <string> colDescriptions, IList <int> mainColIndices,
                                           IList <int> catColIndices, IList <int> numColIndices, IList <int> textColIndices, IList <int> multiNumColIndices,
                                           string origin, IMatrixData matrixData, IDictionary <string, string[]> annotationRows, Action <int> progress,
                                           Action <string> status, char separator, TextReader reader, StreamReader auxReader, int nrows,
                                           bool shortenExpressionNames, List <Tuple <Relation[], int[], bool> > filters)
        {
            Dictionary <string, string[]> catAnnotatRows;
            Dictionary <string, string[]> numAnnotatRows;

            status("Reading data");
            SplitAnnotRows(annotationRows, out catAnnotatRows, out numAnnotatRows);
            List <string[][]> categoryAnnotation = new List <string[][]>();

            for (int i = 0; i < catColIndices.Count; i++)
            {
                categoryAnnotation.Add(new string[nrows][]);
            }
            List <double[]> numericAnnotation = new List <double[]>();

            for (int i = 0; i < numColIndices.Count; i++)
            {
                numericAnnotation.Add(new double[nrows]);
            }
            List <double[][]> multiNumericAnnotation = new List <double[][]>();

            for (int i = 0; i < multiNumColIndices.Count; i++)
            {
                multiNumericAnnotation.Add(new double[nrows][]);
            }
            List <string[]> stringAnnotation = new List <string[]>();

            for (int i = 0; i < textColIndices.Count; i++)
            {
                stringAnnotation.Add(new string[nrows]);
            }
            float[,] mainValues     = new float[nrows, mainColIndices.Count];
            float[,] qualityValues  = null;
            bool[,] isImputedValues = null;
            bool hasAddtlMatrices = auxReader != null && GetHasAddtlMatrices(auxReader, mainColIndices, separator);

            if (hasAddtlMatrices)
            {
                qualityValues   = new float[nrows, mainColIndices.Count];
                isImputedValues = new bool[nrows, mainColIndices.Count];
            }
            reader.ReadLine();
            int    count = 0;
            string line;

            while ((line = reader.ReadLine()) != null)
            {
                progress(100 * (count + 1) / nrows);
                if (TabSep.IsCommentLine(line, commentPrefix, commentPrefixExceptions))
                {
                    continue;
                }
                string[] w;
                if (!IsValidLine(line, separator, filters, out w, hasAddtlMatrices))
                {
                    continue;
                }
                for (int i = 0; i < mainColIndices.Count; i++)
                {
                    if (mainColIndices[i] >= w.Length)
                    {
                        mainValues[count, i] = float.NaN;
                    }
                    else
                    {
                        string s = StringUtils.RemoveWhitespace(w[mainColIndices[i]]);
                        if (hasAddtlMatrices)
                        {
                            ParseExp(s, out mainValues[count, i], out isImputedValues[count, i], out qualityValues[count, i]);
                        }
                        else
                        {
                            if (count < mainValues.GetLength(0))
                            {
                                bool success = float.TryParse(s, out mainValues[count, i]);
                                if (!success)
                                {
                                    mainValues[count, i] = float.NaN;
                                }
                            }
                        }
                    }
                }
                for (int i = 0; i < numColIndices.Count; i++)
                {
                    if (numColIndices[i] >= w.Length)
                    {
                        numericAnnotation[i][count] = double.NaN;
                    }
                    else
                    {
                        double q;
                        bool   success = double.TryParse(w[numColIndices[i]].Trim(), out q);
                        if (numericAnnotation[i].Length > count)
                        {
                            numericAnnotation[i][count] = success ? q : double.NaN;
                        }
                    }
                }
                for (int i = 0; i < multiNumColIndices.Count; i++)
                {
                    if (multiNumColIndices[i] >= w.Length)
                    {
                        multiNumericAnnotation[i][count] = new double[0];
                    }
                    else
                    {
                        string q = w[multiNumColIndices[i]].Trim();
                        if (q.Length >= 2 && q[0] == '\"' && q[q.Length - 1] == '\"')
                        {
                            q = q.Substring(1, q.Length - 2);
                        }
                        if (q.Length >= 2 && q[0] == '\'' && q[q.Length - 1] == '\'')
                        {
                            q = q.Substring(1, q.Length - 2);
                        }
                        string[] ww = q.Length == 0 ? new string[0] : q.Split(';');
                        multiNumericAnnotation[i][count] = new double[ww.Length];
                        for (int j = 0; j < ww.Length; j++)
                        {
                            double q1;
                            bool   success = double.TryParse(ww[j], out q1);
                            multiNumericAnnotation[i][count][j] = success ? q1 : double.NaN;
                        }
                    }
                }
                for (int i = 0; i < catColIndices.Count; i++)
                {
                    if (catColIndices[i] >= w.Length)
                    {
                        categoryAnnotation[i][count] = new string[0];
                    }
                    else
                    {
                        string q = w[catColIndices[i]].Trim();
                        if (q.Length >= 2 && q[0] == '\"' && q[q.Length - 1] == '\"')
                        {
                            q = q.Substring(1, q.Length - 2);
                        }
                        if (q.Length >= 2 && q[0] == '\'' && q[q.Length - 1] == '\'')
                        {
                            q = q.Substring(1, q.Length - 2);
                        }
                        string[]   ww     = q.Length == 0 ? new string[0] : q.Split(';');
                        List <int> valids = new List <int>();
                        for (int j = 0; j < ww.Length; j++)
                        {
                            ww[j] = ww[j].Trim();
                            if (ww[j].Length > 0)
                            {
                                valids.Add(j);
                            }
                        }
                        ww = ArrayUtils.SubArray(ww, valids);
                        Array.Sort(ww);
                        if (categoryAnnotation[i].Length > count)
                        {
                            categoryAnnotation[i][count] = ww;
                        }
                    }
                }
                for (int i = 0; i < textColIndices.Count; i++)
                {
                    if (textColIndices[i] >= w.Length)
                    {
                        stringAnnotation[i][count] = "";
                    }
                    else
                    {
                        string q = w[textColIndices[i]].Trim();
                        if (stringAnnotation[i].Length > count)
                        {
                            stringAnnotation[i][count] = RemoveSplitWhitespace(RemoveQuotes(q));
                        }
                    }
                }
                count++;
            }
            reader.Close();
            string[] columnNames = ArrayUtils.SubArray(colNames, mainColIndices);
            if (shortenExpressionNames)
            {
                columnNames = StringUtils.RemoveCommonSubstrings(columnNames, true);
            }
            string[] catColnames      = ArrayUtils.SubArray(colNames, catColIndices);
            string[] numColnames      = ArrayUtils.SubArray(colNames, numColIndices);
            string[] multiNumColnames = ArrayUtils.SubArray(colNames, multiNumColIndices);
            string[] textColnames     = ArrayUtils.SubArray(colNames, textColIndices);
            matrixData.Name        = origin;
            matrixData.ColumnNames = RemoveQuotes(columnNames);
            matrixData.Values.Set(mainValues);
            if (hasAddtlMatrices)
            {
                matrixData.Quality.Set(qualityValues);
                matrixData.IsImputed.Set(isImputedValues);
            }
            else
            {
                matrixData.Quality.Set(new float[mainValues.GetLength(0), mainValues.GetLength(1)]);
                matrixData.IsImputed.Set(new bool[mainValues.GetLength(0), mainValues.GetLength(1)]);
            }
            matrixData.SetAnnotationColumns(RemoveQuotes(textColnames), stringAnnotation, RemoveQuotes(catColnames),
                                            categoryAnnotation, RemoveQuotes(numColnames), numericAnnotation, RemoveQuotes(multiNumColnames),
                                            multiNumericAnnotation);
            if (colDescriptions != null)
            {
                string[] columnDesc      = ArrayUtils.SubArray(colDescriptions, mainColIndices);
                string[] catColDesc      = ArrayUtils.SubArray(colDescriptions, catColIndices);
                string[] numColDesc      = ArrayUtils.SubArray(colDescriptions, numColIndices);
                string[] multiNumColDesc = ArrayUtils.SubArray(colDescriptions, multiNumColIndices);
                string[] textColDesc     = ArrayUtils.SubArray(colDescriptions, textColIndices);
                matrixData.ColumnDescriptions             = new List <string>(columnDesc);
                matrixData.NumericColumnDescriptions      = new List <string>(numColDesc);
                matrixData.CategoryColumnDescriptions     = new List <string>(catColDesc);
                matrixData.StringColumnDescriptions       = new List <string>(textColDesc);
                matrixData.MultiNumericColumnDescriptions = new List <string>(multiNumColDesc);
            }
            foreach (string key in catAnnotatRows.Keys)
            {
                string     name  = key;
                string[]   svals = ArrayUtils.SubArray(catAnnotatRows[key], mainColIndices);
                string[][] cat   = new string[svals.Length][];
                for (int i = 0; i < cat.Length; i++)
                {
                    string s = svals[i].Trim();
                    cat[i] = s.Length > 0 ? s.Split(';') : new string[0];
                    List <int> valids = new List <int>();
                    for (int j = 0; j < cat[i].Length; j++)
                    {
                        cat[i][j] = cat[i][j].Trim();
                        if (cat[i][j].Length > 0)
                        {
                            valids.Add(j);
                        }
                    }
                    cat[i] = ArrayUtils.SubArray(cat[i], valids);
                    Array.Sort(cat[i]);
                }
                matrixData.AddCategoryRow(name, name, cat);
            }
            foreach (string key in numAnnotatRows.Keys)
            {
                string   name  = key;
                string[] svals = ArrayUtils.SubArray(numAnnotatRows[key], mainColIndices);
                double[] num   = new double[svals.Length];
                for (int i = 0; i < num.Length; i++)
                {
                    string s = svals[i].Trim();
                    num[i] = double.NaN;
                    double.TryParse(s, out num[i]);
                }
                matrixData.AddNumericRow(name, name, num);
            }
            matrixData.Origin = origin;
            progress(0);
            status("");
        }
        public void LoadData(IMatrixData mdata, Parameters parameters, ref IMatrixData[] supplTables,
                             ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            PerseusLoadMatrixParam par = (PerseusLoadMatrixParam)parameters.GetParam("File");
            string filename            = par.Filename;

            if (string.IsNullOrEmpty(filename))
            {
                processInfo.ErrString = "Please specify a filename";
                return;
            }
            if (!File.Exists(filename))
            {
                processInfo.ErrString = "File '" + filename + "' does not exist.";
                return;
            }
            string ftl       = filename.ToLower();
            bool   csv       = ftl.EndsWith(".csv") || ftl.EndsWith(".csv.gz");
            char   separator = csv ? ',' : '\t';

            string[] colNames;
            Dictionary <string, string[]> annotationRows = new Dictionary <string, string[]>();

            try{
                colNames = TabSep.GetColumnNames(filename, PerseusUtils.commentPrefix, PerseusUtils.commentPrefixExceptions,
                                                 annotationRows, separator);
            } catch (Exception) {
                processInfo.ErrString = "Could not open the file '" + filename + "'. It is probably opened in another program.";
                return;
            }
            string origin = filename;

            int[] eInds = par.MainColumnIndices;
            int[] nInds = par.NumericalColumnIndices;
            int[] cInds = par.CategoryColumnIndices;
            int[] tInds = par.TextColumnIndices;
            int[] mInds = par.MultiNumericalColumnIndices;
            List <Tuple <Relation[], int[], bool> > filters = new List <Tuple <Relation[], int[], bool> >();
            string errString;

            foreach (Parameters p in par.MainFilterParameters)
            {
                PerseusUtils.AddFilter(filters, p, eInds, out errString);
                if (errString != null)
                {
                    processInfo.ErrString = errString;
                    return;
                }
            }
            foreach (Parameters p in par.NumericalFilterParameters)
            {
                PerseusUtils.AddFilter(filters, p, nInds, out errString);
                if (errString != null)
                {
                    processInfo.ErrString = errString;
                    return;
                }
            }
            int          nrows     = GetRowCount(filename, eInds, filters, separator);
            StreamReader reader    = FileUtils.GetReader(filename);
            StreamReader auxReader = FileUtils.GetReader(filename);

            PerseusUtils.LoadMatrixData(annotationRows, eInds, cInds, nInds, tInds, mInds, processInfo, colNames, mdata, reader,
                                        auxReader, nrows, origin, separator, par.ShortenExpressionColumnNames, filters);
            reader.Close();
            auxReader.Close();
            GC.Collect();
        }