Esempio n. 1
0
        public static bool GetHasAddtlMatrices(StreamReader reader, IList <int> expressionColIndices, char separator)
        {
            if (expressionColIndices.Count == 0)
            {
                return(false);
            }
            int expressionColIndex = expressionColIndices[0];

            reader.ReadLine();
            string line;
            bool   hasAddtl = false;

            while ((line = reader.ReadLine()) != null)
            {
                if (TabSep.IsCommentLine(line, commentPrefix, commentPrefixExceptions))
                {
                    continue;
                }
                string[] w = SplitLine(line, separator);
                if (expressionColIndex < w.Length)
                {
                    string s = StringUtils.RemoveWhitespace(w[expressionColIndex]);
                    hasAddtl = s.Contains(";");
                    break;
                }
            }
            reader.Close();
            return(hasAddtl);
        }
Esempio n. 2
0
        public static int GetRowCount(StreamReader reader, StreamReader auxReader, int[] mainColIndices,
                                      List <Tuple <Relation[], int[], bool> > filters, char separator)
        {
            reader.BaseStream.Seek(0, SeekOrigin.Begin);
            reader.ReadLine();
            int    count            = 0;
            bool   hasAddtlMatrices = auxReader != null && GetHasAddtlMatrices(auxReader, mainColIndices, separator);
            string line;

            while ((line = reader.ReadLine()) != null)
            {
                while (TabSep.IsCommentLine(line, commentPrefix, commentPrefixExceptions))
                {
                    line = reader.ReadLine();
                }
                if (IsValidLine(line, separator, filters, hasAddtlMatrices))
                {
                    count++;
                }
            }
            return(count);
        }
Esempio n. 3
0
        private static void LoadMatrixData(IList <string> colNames, IList <string> colDescriptions, IList <int> mainColIndices,
                                           IList <int> catColIndices, IList <int> numColIndices, IList <int> textColIndices, IList <int> multiNumColIndices,
                                           string origin, IMatrixData matrixData, IDictionary <string, string[]> annotationRows, Action <int> progress,
                                           Action <string> status, char separator, TextReader reader, StreamReader auxReader, int nrows,
                                           bool shortenExpressionNames, List <Tuple <Relation[], int[], bool> > filters)
        {
            Dictionary <string, string[]> catAnnotatRows;
            Dictionary <string, string[]> numAnnotatRows;

            status("Reading data");
            SplitAnnotRows(annotationRows, out catAnnotatRows, out numAnnotatRows);
            List <string[][]> categoryAnnotation = new List <string[][]>();

            for (int i = 0; i < catColIndices.Count; i++)
            {
                categoryAnnotation.Add(new string[nrows][]);
            }
            List <double[]> numericAnnotation = new List <double[]>();

            for (int i = 0; i < numColIndices.Count; i++)
            {
                numericAnnotation.Add(new double[nrows]);
            }
            List <double[][]> multiNumericAnnotation = new List <double[][]>();

            for (int i = 0; i < multiNumColIndices.Count; i++)
            {
                multiNumericAnnotation.Add(new double[nrows][]);
            }
            List <string[]> stringAnnotation = new List <string[]>();

            for (int i = 0; i < textColIndices.Count; i++)
            {
                stringAnnotation.Add(new string[nrows]);
            }
            float[,] mainValues     = new float[nrows, mainColIndices.Count];
            float[,] qualityValues  = null;
            bool[,] isImputedValues = null;
            bool hasAddtlMatrices = auxReader != null && GetHasAddtlMatrices(auxReader, mainColIndices, separator);

            if (hasAddtlMatrices)
            {
                qualityValues   = new float[nrows, mainColIndices.Count];
                isImputedValues = new bool[nrows, mainColIndices.Count];
            }
            reader.ReadLine();
            int    count = 0;
            string line;

            while ((line = reader.ReadLine()) != null)
            {
                progress(100 * (count + 1) / nrows);
                if (TabSep.IsCommentLine(line, commentPrefix, commentPrefixExceptions))
                {
                    continue;
                }
                string[] w;
                if (!IsValidLine(line, separator, filters, out w, hasAddtlMatrices))
                {
                    continue;
                }
                for (int i = 0; i < mainColIndices.Count; i++)
                {
                    if (mainColIndices[i] >= w.Length)
                    {
                        mainValues[count, i] = float.NaN;
                    }
                    else
                    {
                        string s = StringUtils.RemoveWhitespace(w[mainColIndices[i]]);
                        if (hasAddtlMatrices)
                        {
                            ParseExp(s, out mainValues[count, i], out isImputedValues[count, i], out qualityValues[count, i]);
                        }
                        else
                        {
                            if (count < mainValues.GetLength(0))
                            {
                                bool success = float.TryParse(s, out mainValues[count, i]);
                                if (!success)
                                {
                                    mainValues[count, i] = float.NaN;
                                }
                            }
                        }
                    }
                }
                for (int i = 0; i < numColIndices.Count; i++)
                {
                    if (numColIndices[i] >= w.Length)
                    {
                        numericAnnotation[i][count] = double.NaN;
                    }
                    else
                    {
                        double q;
                        bool   success = double.TryParse(w[numColIndices[i]].Trim(), out q);
                        if (numericAnnotation[i].Length > count)
                        {
                            numericAnnotation[i][count] = success ? q : double.NaN;
                        }
                    }
                }
                for (int i = 0; i < multiNumColIndices.Count; i++)
                {
                    if (multiNumColIndices[i] >= w.Length)
                    {
                        multiNumericAnnotation[i][count] = new double[0];
                    }
                    else
                    {
                        string q = w[multiNumColIndices[i]].Trim();
                        if (q.Length >= 2 && q[0] == '\"' && q[q.Length - 1] == '\"')
                        {
                            q = q.Substring(1, q.Length - 2);
                        }
                        if (q.Length >= 2 && q[0] == '\'' && q[q.Length - 1] == '\'')
                        {
                            q = q.Substring(1, q.Length - 2);
                        }
                        string[] ww = q.Length == 0 ? new string[0] : q.Split(';');
                        multiNumericAnnotation[i][count] = new double[ww.Length];
                        for (int j = 0; j < ww.Length; j++)
                        {
                            double q1;
                            bool   success = double.TryParse(ww[j], out q1);
                            multiNumericAnnotation[i][count][j] = success ? q1 : double.NaN;
                        }
                    }
                }
                for (int i = 0; i < catColIndices.Count; i++)
                {
                    if (catColIndices[i] >= w.Length)
                    {
                        categoryAnnotation[i][count] = new string[0];
                    }
                    else
                    {
                        string q = w[catColIndices[i]].Trim();
                        if (q.Length >= 2 && q[0] == '\"' && q[q.Length - 1] == '\"')
                        {
                            q = q.Substring(1, q.Length - 2);
                        }
                        if (q.Length >= 2 && q[0] == '\'' && q[q.Length - 1] == '\'')
                        {
                            q = q.Substring(1, q.Length - 2);
                        }
                        string[]   ww     = q.Length == 0 ? new string[0] : q.Split(';');
                        List <int> valids = new List <int>();
                        for (int j = 0; j < ww.Length; j++)
                        {
                            ww[j] = ww[j].Trim();
                            if (ww[j].Length > 0)
                            {
                                valids.Add(j);
                            }
                        }
                        ww = ArrayUtils.SubArray(ww, valids);
                        Array.Sort(ww);
                        if (categoryAnnotation[i].Length > count)
                        {
                            categoryAnnotation[i][count] = ww;
                        }
                    }
                }
                for (int i = 0; i < textColIndices.Count; i++)
                {
                    if (textColIndices[i] >= w.Length)
                    {
                        stringAnnotation[i][count] = "";
                    }
                    else
                    {
                        string q = w[textColIndices[i]].Trim();
                        if (stringAnnotation[i].Length > count)
                        {
                            stringAnnotation[i][count] = RemoveSplitWhitespace(RemoveQuotes(q));
                        }
                    }
                }
                count++;
            }
            reader.Close();
            string[] columnNames = ArrayUtils.SubArray(colNames, mainColIndices);
            if (shortenExpressionNames)
            {
                columnNames = StringUtils.RemoveCommonSubstrings(columnNames, true);
            }
            string[] catColnames      = ArrayUtils.SubArray(colNames, catColIndices);
            string[] numColnames      = ArrayUtils.SubArray(colNames, numColIndices);
            string[] multiNumColnames = ArrayUtils.SubArray(colNames, multiNumColIndices);
            string[] textColnames     = ArrayUtils.SubArray(colNames, textColIndices);
            matrixData.Name        = origin;
            matrixData.ColumnNames = RemoveQuotes(columnNames);
            matrixData.Values.Set(mainValues);
            if (hasAddtlMatrices)
            {
                matrixData.Quality.Set(qualityValues);
                matrixData.IsImputed.Set(isImputedValues);
            }
            else
            {
                matrixData.Quality.Set(new float[mainValues.GetLength(0), mainValues.GetLength(1)]);
                matrixData.IsImputed.Set(new bool[mainValues.GetLength(0), mainValues.GetLength(1)]);
            }
            matrixData.SetAnnotationColumns(RemoveQuotes(textColnames), stringAnnotation, RemoveQuotes(catColnames),
                                            categoryAnnotation, RemoveQuotes(numColnames), numericAnnotation, RemoveQuotes(multiNumColnames),
                                            multiNumericAnnotation);
            if (colDescriptions != null)
            {
                string[] columnDesc      = ArrayUtils.SubArray(colDescriptions, mainColIndices);
                string[] catColDesc      = ArrayUtils.SubArray(colDescriptions, catColIndices);
                string[] numColDesc      = ArrayUtils.SubArray(colDescriptions, numColIndices);
                string[] multiNumColDesc = ArrayUtils.SubArray(colDescriptions, multiNumColIndices);
                string[] textColDesc     = ArrayUtils.SubArray(colDescriptions, textColIndices);
                matrixData.ColumnDescriptions             = new List <string>(columnDesc);
                matrixData.NumericColumnDescriptions      = new List <string>(numColDesc);
                matrixData.CategoryColumnDescriptions     = new List <string>(catColDesc);
                matrixData.StringColumnDescriptions       = new List <string>(textColDesc);
                matrixData.MultiNumericColumnDescriptions = new List <string>(multiNumColDesc);
            }
            foreach (string key in catAnnotatRows.Keys)
            {
                string     name  = key;
                string[]   svals = ArrayUtils.SubArray(catAnnotatRows[key], mainColIndices);
                string[][] cat   = new string[svals.Length][];
                for (int i = 0; i < cat.Length; i++)
                {
                    string s = svals[i].Trim();
                    cat[i] = s.Length > 0 ? s.Split(';') : new string[0];
                    List <int> valids = new List <int>();
                    for (int j = 0; j < cat[i].Length; j++)
                    {
                        cat[i][j] = cat[i][j].Trim();
                        if (cat[i][j].Length > 0)
                        {
                            valids.Add(j);
                        }
                    }
                    cat[i] = ArrayUtils.SubArray(cat[i], valids);
                    Array.Sort(cat[i]);
                }
                matrixData.AddCategoryRow(name, name, cat);
            }
            foreach (string key in numAnnotatRows.Keys)
            {
                string   name  = key;
                string[] svals = ArrayUtils.SubArray(numAnnotatRows[key], mainColIndices);
                double[] num   = new double[svals.Length];
                for (int i = 0; i < num.Length; i++)
                {
                    string s = svals[i].Trim();
                    num[i] = double.NaN;
                    double.TryParse(s, out num[i]);
                }
                matrixData.AddNumericRow(name, name, num);
            }
            matrixData.Origin = origin;
            progress(0);
            status("");
        }