public static bool GetHasAddtlMatrices(StreamReader reader, IList <int> expressionColIndices, char separator) { if (expressionColIndices.Count == 0) { return(false); } int expressionColIndex = expressionColIndices[0]; reader.ReadLine(); string line; bool hasAddtl = false; while ((line = reader.ReadLine()) != null) { if (TabSep.IsCommentLine(line, commentPrefix, commentPrefixExceptions)) { continue; } string[] w = SplitLine(line, separator); if (expressionColIndex < w.Length) { string s = StringUtils.RemoveWhitespace(w[expressionColIndex]); hasAddtl = s.Contains(";"); break; } } reader.Close(); return(hasAddtl); }
public static int GetRowCount(StreamReader reader, StreamReader auxReader, int[] mainColIndices, List <Tuple <Relation[], int[], bool> > filters, char separator) { reader.BaseStream.Seek(0, SeekOrigin.Begin); reader.ReadLine(); int count = 0; bool hasAddtlMatrices = auxReader != null && GetHasAddtlMatrices(auxReader, mainColIndices, separator); string line; while ((line = reader.ReadLine()) != null) { while (TabSep.IsCommentLine(line, commentPrefix, commentPrefixExceptions)) { line = reader.ReadLine(); } if (IsValidLine(line, separator, filters, hasAddtlMatrices)) { count++; } } return(count); }
private static void LoadMatrixData(IList <string> colNames, IList <string> colDescriptions, IList <int> mainColIndices, IList <int> catColIndices, IList <int> numColIndices, IList <int> textColIndices, IList <int> multiNumColIndices, string origin, IMatrixData matrixData, IDictionary <string, string[]> annotationRows, Action <int> progress, Action <string> status, char separator, TextReader reader, StreamReader auxReader, int nrows, bool shortenExpressionNames, List <Tuple <Relation[], int[], bool> > filters) { Dictionary <string, string[]> catAnnotatRows; Dictionary <string, string[]> numAnnotatRows; status("Reading data"); SplitAnnotRows(annotationRows, out catAnnotatRows, out numAnnotatRows); List <string[][]> categoryAnnotation = new List <string[][]>(); for (int i = 0; i < catColIndices.Count; i++) { categoryAnnotation.Add(new string[nrows][]); } List <double[]> numericAnnotation = new List <double[]>(); for (int i = 0; i < numColIndices.Count; i++) { numericAnnotation.Add(new double[nrows]); } List <double[][]> multiNumericAnnotation = new List <double[][]>(); for (int i = 0; i < multiNumColIndices.Count; i++) { multiNumericAnnotation.Add(new double[nrows][]); } List <string[]> stringAnnotation = new List <string[]>(); for (int i = 0; i < textColIndices.Count; i++) { stringAnnotation.Add(new string[nrows]); } float[,] mainValues = new float[nrows, mainColIndices.Count]; float[,] qualityValues = null; bool[,] isImputedValues = null; bool hasAddtlMatrices = auxReader != null && GetHasAddtlMatrices(auxReader, mainColIndices, separator); if (hasAddtlMatrices) { qualityValues = new float[nrows, mainColIndices.Count]; isImputedValues = new bool[nrows, mainColIndices.Count]; } reader.ReadLine(); int count = 0; string line; while ((line = reader.ReadLine()) != null) { progress(100 * (count + 1) / nrows); if (TabSep.IsCommentLine(line, commentPrefix, commentPrefixExceptions)) { continue; } string[] w; if (!IsValidLine(line, separator, filters, out w, hasAddtlMatrices)) { continue; } for (int i = 0; i < mainColIndices.Count; i++) { if (mainColIndices[i] >= w.Length) { mainValues[count, i] = float.NaN; } else { string s = StringUtils.RemoveWhitespace(w[mainColIndices[i]]); if (hasAddtlMatrices) { ParseExp(s, out mainValues[count, i], out isImputedValues[count, i], out qualityValues[count, i]); } else { if (count < mainValues.GetLength(0)) { bool success = float.TryParse(s, out mainValues[count, i]); if (!success) { mainValues[count, i] = float.NaN; } } } } } for (int i = 0; i < numColIndices.Count; i++) { if (numColIndices[i] >= w.Length) { numericAnnotation[i][count] = double.NaN; } else { double q; bool success = double.TryParse(w[numColIndices[i]].Trim(), out q); if (numericAnnotation[i].Length > count) { numericAnnotation[i][count] = success ? q : double.NaN; } } } for (int i = 0; i < multiNumColIndices.Count; i++) { if (multiNumColIndices[i] >= w.Length) { multiNumericAnnotation[i][count] = new double[0]; } else { string q = w[multiNumColIndices[i]].Trim(); if (q.Length >= 2 && q[0] == '\"' && q[q.Length - 1] == '\"') { q = q.Substring(1, q.Length - 2); } if (q.Length >= 2 && q[0] == '\'' && q[q.Length - 1] == '\'') { q = q.Substring(1, q.Length - 2); } string[] ww = q.Length == 0 ? new string[0] : q.Split(';'); multiNumericAnnotation[i][count] = new double[ww.Length]; for (int j = 0; j < ww.Length; j++) { double q1; bool success = double.TryParse(ww[j], out q1); multiNumericAnnotation[i][count][j] = success ? q1 : double.NaN; } } } for (int i = 0; i < catColIndices.Count; i++) { if (catColIndices[i] >= w.Length) { categoryAnnotation[i][count] = new string[0]; } else { string q = w[catColIndices[i]].Trim(); if (q.Length >= 2 && q[0] == '\"' && q[q.Length - 1] == '\"') { q = q.Substring(1, q.Length - 2); } if (q.Length >= 2 && q[0] == '\'' && q[q.Length - 1] == '\'') { q = q.Substring(1, q.Length - 2); } string[] ww = q.Length == 0 ? new string[0] : q.Split(';'); List <int> valids = new List <int>(); for (int j = 0; j < ww.Length; j++) { ww[j] = ww[j].Trim(); if (ww[j].Length > 0) { valids.Add(j); } } ww = ArrayUtils.SubArray(ww, valids); Array.Sort(ww); if (categoryAnnotation[i].Length > count) { categoryAnnotation[i][count] = ww; } } } for (int i = 0; i < textColIndices.Count; i++) { if (textColIndices[i] >= w.Length) { stringAnnotation[i][count] = ""; } else { string q = w[textColIndices[i]].Trim(); if (stringAnnotation[i].Length > count) { stringAnnotation[i][count] = RemoveSplitWhitespace(RemoveQuotes(q)); } } } count++; } reader.Close(); string[] columnNames = ArrayUtils.SubArray(colNames, mainColIndices); if (shortenExpressionNames) { columnNames = StringUtils.RemoveCommonSubstrings(columnNames, true); } string[] catColnames = ArrayUtils.SubArray(colNames, catColIndices); string[] numColnames = ArrayUtils.SubArray(colNames, numColIndices); string[] multiNumColnames = ArrayUtils.SubArray(colNames, multiNumColIndices); string[] textColnames = ArrayUtils.SubArray(colNames, textColIndices); matrixData.Name = origin; matrixData.ColumnNames = RemoveQuotes(columnNames); matrixData.Values.Set(mainValues); if (hasAddtlMatrices) { matrixData.Quality.Set(qualityValues); matrixData.IsImputed.Set(isImputedValues); } else { matrixData.Quality.Set(new float[mainValues.GetLength(0), mainValues.GetLength(1)]); matrixData.IsImputed.Set(new bool[mainValues.GetLength(0), mainValues.GetLength(1)]); } matrixData.SetAnnotationColumns(RemoveQuotes(textColnames), stringAnnotation, RemoveQuotes(catColnames), categoryAnnotation, RemoveQuotes(numColnames), numericAnnotation, RemoveQuotes(multiNumColnames), multiNumericAnnotation); if (colDescriptions != null) { string[] columnDesc = ArrayUtils.SubArray(colDescriptions, mainColIndices); string[] catColDesc = ArrayUtils.SubArray(colDescriptions, catColIndices); string[] numColDesc = ArrayUtils.SubArray(colDescriptions, numColIndices); string[] multiNumColDesc = ArrayUtils.SubArray(colDescriptions, multiNumColIndices); string[] textColDesc = ArrayUtils.SubArray(colDescriptions, textColIndices); matrixData.ColumnDescriptions = new List <string>(columnDesc); matrixData.NumericColumnDescriptions = new List <string>(numColDesc); matrixData.CategoryColumnDescriptions = new List <string>(catColDesc); matrixData.StringColumnDescriptions = new List <string>(textColDesc); matrixData.MultiNumericColumnDescriptions = new List <string>(multiNumColDesc); } foreach (string key in catAnnotatRows.Keys) { string name = key; string[] svals = ArrayUtils.SubArray(catAnnotatRows[key], mainColIndices); string[][] cat = new string[svals.Length][]; for (int i = 0; i < cat.Length; i++) { string s = svals[i].Trim(); cat[i] = s.Length > 0 ? s.Split(';') : new string[0]; List <int> valids = new List <int>(); for (int j = 0; j < cat[i].Length; j++) { cat[i][j] = cat[i][j].Trim(); if (cat[i][j].Length > 0) { valids.Add(j); } } cat[i] = ArrayUtils.SubArray(cat[i], valids); Array.Sort(cat[i]); } matrixData.AddCategoryRow(name, name, cat); } foreach (string key in numAnnotatRows.Keys) { string name = key; string[] svals = ArrayUtils.SubArray(numAnnotatRows[key], mainColIndices); double[] num = new double[svals.Length]; for (int i = 0; i < num.Length; i++) { string s = svals[i].Trim(); num[i] = double.NaN; double.TryParse(s, out num[i]); } matrixData.AddNumericRow(name, name, num); } matrixData.Origin = origin; progress(0); status(""); }