/// <summary> /// Create a DenseMatrix from a file in RFile format. /// </summary> /// <param name="rFileName">a file in RFile format with delimited columns</param> /// <param name="missingValue">The special value that represents 'missing'</param> /// <param name="separatorArray">An array of character delimiters</param> /// <param name="parallelOptions">A ParallelOptions instance that configures the multithreaded behavior of this operation.</param> /// <param name="result">The DenseMatrix created</param> /// <param name="errorMsg">If the file is not parsable, an error message about the problem.</param> /// <returns>True if the file is parsable; otherwise false</returns> public static bool TryParseRFileWithDefaultMissing(string rFileName, TValue missingValue, char[] separatorArray, ParallelOptions parallelOptions, out Matrix <TRowKey, TColKey, TValue> result, out string errorMsg) { errorMsg = ""; var matrix = new DenseMatrix <TRowKey, TColKey, TValue>(); result = matrix; matrix._missingValue = missingValue; int rowCount = FileUtils.ReadEachLine(rFileName).Count() - 1; using (TextReader textReader = FileUtils.OpenTextStripComments(rFileName)) { string firstLine = textReader.ReadLine(); //Helper.CheckCondition(null != firstLine, "Expect file to have first line. "); if (null == firstLine) { errorMsg = "Expect file to have first line. "; return(false); } Debug.Assert(rowCount >= 0); // real assert List <string> unparsedRowNames = new List <string>(rowCount); List <string> unparsedColNames = firstLine.Split(separatorArray).ToList(); matrix.ValueArray = new TValue[rowCount, unparsedColNames.Count]; string line; int rowIndex = -1; //while (null != (line = textReader.ReadLine())) while (!string.IsNullOrEmpty(line = textReader.ReadLine())) { ++rowIndex; string[] fields = line.Split(separatorArray); //Helper.CheckCondition(fields.Length >= 1, string.Format("Expect each line to have at least one field (file={0}, rowIndex={1})", rFileName, rowIndex)); if (fields.Length == 0) { errorMsg = string.Format("Expect each line to have at least one field (file={0}, rowIndex={1})", rFileName, rowIndex); return(false); } string rowKey = fields[0]; unparsedRowNames.Add(rowKey); // if the first data row has same length as header row, then header row much contain a name for the column of row names. Remove it and proceed. if (rowIndex == 0 && fields.Length == unparsedColNames.Count) { unparsedColNames.RemoveAt(0); } //Helper.CheckCondition(fields.Length == matrix.ColKeys.Count + 1, string.Format("Line has {0} fields instead of the epxected {1} fields (file={2}, rowKey={3}, rowIndex={4})", fields.Length, matrix.ColKeys.Count + 1, rFileName, rowKey, rowIndex)); if (fields.Length != unparsedColNames.Count + 1) { errorMsg = string.Format("Line has {0} fields instead of the expected {1} fields (file={2}, rowKey={3}, rowIndex={4})", fields.Length, unparsedColNames.Count + 1, rFileName, rowKey, rowIndex); return(false); } //for (int colIndex = 0; colIndex < matrix.ValueArray.GetLength(0); ++colIndex) for (int colIndex = 0; colIndex < unparsedColNames.Count; ++colIndex) { TValue r; if (!Parser.TryParse <TValue>(fields[colIndex + 1], out r)) { errorMsg = string.Format("Unable to parse {0} because field {1} cannot be parsed into an instance of type {2}", rFileName, fields[colIndex + 1], typeof(TValue)); return(false); } matrix.ValueArray[rowIndex, colIndex] = r; } } IList <TRowKey> rowKeys; if (!Parser.TryParseAll <TRowKey>(unparsedRowNames, out rowKeys)) { errorMsg = string.Format("Unable to parse {0} because row names cannot be parsed into an instance of type {1}", rFileName, typeof(TRowKey)); return(false); } IList <TColKey> colKeys; if (!Parser.TryParseAll <TColKey>(unparsedColNames, out colKeys)) { errorMsg = string.Format("Unable to parse {0} because col names cannot be parsed into an instance of type {1}", rFileName, typeof(TColKey)); return(false); } matrix._rowKeys = new ReadOnlyCollection <TRowKey>(rowKeys); matrix._colKeys = new ReadOnlyCollection <TColKey>(colKeys); } //In the case of sparse files, many of the row keys will be the same and so we return false if (matrix._rowKeys.Count != matrix._rowKeys.Distinct().Count()) { errorMsg = string.Format("Some rows have the same values as other (look for blank rows). " + rFileName); return(false); } matrix._indexOfRowKey = matrix.RowKeys.Select((key, index) => new { key, index }).ToDictionary(pair => pair.key, pair => pair.index); matrix._indexOfColKey = matrix.ColKeys.Select((key, index) => new { key, index }).ToDictionary(pair => pair.key, pair => pair.index); return(true); //return matrix; }