Exemplo n.º 1
0
        /// <summary>
        /// Create a DenseMatrix from a file in RFile format.
        /// </summary>
        /// <param name="rFileName">a file in RFile format with delimited columns</param>
        /// <param name="missingValue">The special value that represents 'missing'</param>
        /// <param name="separatorArray">An array of character delimiters</param>
        /// <param name="parallelOptions">A ParallelOptions instance that configures the multithreaded behavior of this operation.</param>
        /// <param name="result">The DenseMatrix created</param>
        /// <param name="errorMsg">If the file is not parsable, an error message about the problem.</param>
        /// <returns>True if the file is parsable; otherwise false</returns>
        public static bool TryParseRFileWithDefaultMissing(string rFileName, TValue missingValue,
                                                           char[] separatorArray, ParallelOptions parallelOptions, out Matrix <TRowKey, TColKey, TValue> result, out string errorMsg)
        {
            errorMsg = "";
            var matrix = new DenseMatrix <TRowKey, TColKey, TValue>();

            result = matrix;
            matrix._missingValue = missingValue;

            int rowCount = FileUtils.ReadEachLine(rFileName).Count() - 1;

            using (TextReader textReader = FileUtils.OpenTextStripComments(rFileName))
            {
                string firstLine = textReader.ReadLine();
                //Helper.CheckCondition(null != firstLine, "Expect file to have first line. ");
                if (null == firstLine)
                {
                    errorMsg = "Expect file to have first line. ";
                    return(false);
                }
                Debug.Assert(rowCount >= 0); // real assert

                List <string> unparsedRowNames = new List <string>(rowCount);
                List <string> unparsedColNames = firstLine.Split(separatorArray).ToList();
                matrix.ValueArray = new TValue[rowCount, unparsedColNames.Count];
                string line;
                int    rowIndex = -1;

                //while (null != (line = textReader.ReadLine()))
                while (!string.IsNullOrEmpty(line = textReader.ReadLine()))
                {
                    ++rowIndex;
                    string[] fields = line.Split(separatorArray);
                    //Helper.CheckCondition(fields.Length >= 1, string.Format("Expect each line to have at least one field (file={0}, rowIndex={1})", rFileName, rowIndex));
                    if (fields.Length == 0)
                    {
                        errorMsg = string.Format("Expect each line to have at least one field (file={0}, rowIndex={1})", rFileName, rowIndex);
                        return(false);
                    }

                    string rowKey = fields[0];
                    unparsedRowNames.Add(rowKey);

                    // if the first data row has same length as header row, then header row much contain a name for the column of row names. Remove it and proceed.
                    if (rowIndex == 0 && fields.Length == unparsedColNames.Count)
                    {
                        unparsedColNames.RemoveAt(0);
                    }

                    //Helper.CheckCondition(fields.Length == matrix.ColKeys.Count + 1, string.Format("Line has {0} fields instead of the epxected {1} fields (file={2}, rowKey={3}, rowIndex={4})", fields.Length, matrix.ColKeys.Count + 1, rFileName, rowKey, rowIndex));
                    if (fields.Length != unparsedColNames.Count + 1)
                    {
                        errorMsg = string.Format("Line has {0} fields instead of the expected {1} fields (file={2}, rowKey={3}, rowIndex={4})", fields.Length, unparsedColNames.Count + 1, rFileName, rowKey, rowIndex);
                        return(false);
                    }

                    //for (int colIndex = 0; colIndex < matrix.ValueArray.GetLength(0); ++colIndex)
                    for (int colIndex = 0; colIndex < unparsedColNames.Count; ++colIndex)
                    {
                        TValue r;
                        if (!Parser.TryParse <TValue>(fields[colIndex + 1], out r))
                        {
                            errorMsg = string.Format("Unable to parse {0} because field {1} cannot be parsed into an instance of type {2}", rFileName, fields[colIndex + 1], typeof(TValue));
                            return(false);
                        }
                        matrix.ValueArray[rowIndex, colIndex] = r;
                    }
                }

                IList <TRowKey> rowKeys;
                if (!Parser.TryParseAll <TRowKey>(unparsedRowNames, out rowKeys))
                {
                    errorMsg = string.Format("Unable to parse {0} because row names cannot be parsed into an instance of type {1}", rFileName, typeof(TRowKey));
                    return(false);
                }
                IList <TColKey> colKeys;
                if (!Parser.TryParseAll <TColKey>(unparsedColNames, out colKeys))
                {
                    errorMsg = string.Format("Unable to parse {0} because col names cannot be parsed into an instance of type {1}", rFileName, typeof(TColKey));
                    return(false);
                }
                matrix._rowKeys = new ReadOnlyCollection <TRowKey>(rowKeys);
                matrix._colKeys = new ReadOnlyCollection <TColKey>(colKeys);
            }

            //In the case of sparse files, many of the row keys will be the same and so we return false
            if (matrix._rowKeys.Count != matrix._rowKeys.Distinct().Count())
            {
                errorMsg = string.Format("Some rows have the same values as other (look for blank rows). " + rFileName);
                return(false);
            }


            matrix._indexOfRowKey = matrix.RowKeys.Select((key, index) => new { key, index }).ToDictionary(pair => pair.key, pair => pair.index);
            matrix._indexOfColKey = matrix.ColKeys.Select((key, index) => new { key, index }).ToDictionary(pair => pair.key, pair => pair.index);

            return(true);
            //return matrix;
        }