protected void GetInstanceFromDenseStructFileNameInternal(string denseStructFileName, ParallelOptions parallelOptions, FileAccess fileAccess = FileAccess.Read, FileShare fileShare = FileShare.Read) { // parallelOptions is not currently used, but it is need so that this method will have the same signature as other, similar methods. lock (this) { using (FileStream fileStream = File.Open(denseStructFileName, FileMode.Open, fileAccess, fileShare)) { using (TextReader textReader = new StreamReader(fileStream)) { string firstLineOrNull = textReader.ReadLine(); Helper.CheckCondition(null != firstLineOrNull, Properties.Resource.ExpectedFileToHaveData, denseStructFileName); Helper.CheckCondition(!firstLineOrNull.StartsWith(FileUtils.CommentHeader, StringComparison.Ordinal), Properties.Resource.ExpectedNoCommentsInRowKeysAnsiFiles, denseStructFileName); } } RowKeyToFilePosition = new Dictionary <string, long>(); DenseStructFileName = denseStructFileName; FileAccess = fileAccess; FileShare = fileShare; long position = 0; string colKeysLineOrNull = ThreadLocalTextReader.ReadLine(); position += colKeysLineOrNull.Length + 2; //!!!const assuming 2 char newslines string[] varAndColKeys = colKeysLineOrNull.Split('\t'); if (!varAndColKeys[0].Equals("var")) { throw new MatrixFormatException("Expect first row's first value to be 'var'"); } ColSerialNumbers = new SerialNumbers <string>(varAndColKeys.Skip(1)); _rowKeys = new List <string>(); if (null == colKeysLineOrNull) { throw new MatrixFormatException("Surprised by empty file. " + denseStructFileName); } CounterWithMessages counterWithMessages = new CounterWithMessages("Reading data file to find location of rows, #{0}", 10000, null); while (true) { counterWithMessages.Increment(); ThreadLocalStream.Position = position; StringBuilder sb = new StringBuilder(); while (true) { int i = ThreadLocalStream.ReadByte(); if (-1 == i) { goto END; } if ('\t' == (char)i) { break; // real break, not conintue } sb.Append((char)i); } string rowKey = sb.ToString(); if (RowKeyToFilePosition.ContainsKey(rowKey)) { throw new MatrixFormatException(string.Format(CultureInfo.InvariantCulture, "The rowkey {0} appears more than once", rowKey)); } _rowKeys.Add(rowKey); position += rowKey.Length + 1; RowKeyToFilePosition.Add(rowKey, position); position += ColCount * BytesPerValue + 2;//!!!assumes two char newlines if (position > ThreadLocalStream.Length) { throw new MatrixFormatException("File seems too short"); } } END :; _indexOfRowKey = RowKeys.Select((key, index) => new { key, index }).ToDictionary(keyAndIndex => keyAndIndex.key, keyAndIndex => keyAndIndex.index); } }
public QDataFrameLite(IEnumerable <TRowKey> rowKeys, IEnumerable <TColKey> colKeys) { rowKeys.ForEach((x, i) => RowKeys.Add(x, i)); colKeys.ForEach((x, i) => ColKeys.Add(x, i)); Data = new TVal[RowKeys.Count, ColKeys.Count]; }
protected void GetInstanceFromRowKeysStructFileNameInternal(string rowKeysStructFileName, ParallelOptions parallelOptions, FileAccess fileAccess = FileAccess.Read, FileShare fileShare = FileShare.Read) { // parallelOptions is not currently used, but it is need so that this method will have the same signature as other, similar methods. lock (this) { string firstLineOrNull = FileUtils.ReadLine(rowKeysStructFileName); Helper.CheckCondition(null != firstLineOrNull, Properties.Resource.ExpectedFileToHaveData, rowKeysStructFileName); Helper.CheckCondition(!firstLineOrNull.StartsWith(FileUtils.CommentHeader, StringComparison.Ordinal), Properties.Resource.ExpectedNoCommentsInRowKeysAnsiFiles, rowKeysStructFileName); RowKeyToFilePosition = new Dictionary <string, long>(); FileAccess = fileAccess; FileShare = fileShare; using (TextReader textReader = File.OpenText(rowKeysStructFileName)) { string colKeysLineOrNull = textReader.ReadLine(); string[] varAndColKeys = colKeysLineOrNull.Split('\t'); if (!varAndColKeys[0].Equals("rowKey")) { throw new MatrixFormatException("Expect first row's first value to be 'rowKey'"); //!!!rowKey } ColSerialNumbers = new SerialNumbers <string>(varAndColKeys.Skip(1)); _rowKeys = new List <string>(); if (null == colKeysLineOrNull) { throw new MatrixFormatException("Surprised by empty file. " + rowKeysStructFileName); } //!!!not really thread-safe string denseStructFileNameInFile = textReader.ReadLine(); DenseStructFileName = Path.Combine(Path.GetDirectoryName(rowKeysStructFileName), denseStructFileNameInFile); CounterWithMessages counterWithMessages = new CounterWithMessages("Reading rowKey file to find location of rows, #{0}", 10000, null); string line = null; while (null != (line = textReader.ReadLine())) { counterWithMessages.Increment(); string[] rowKeyAndPosition = line.Split('\t'); if (rowKeyAndPosition.Length != 2) { throw new MatrixFormatException("Expect rows to have two columns"); } string rowKey = rowKeyAndPosition[0]; long position = long.Parse(rowKeyAndPosition[1], CultureInfo.CurrentCulture); _rowKeys.Add(rowKey); RowKeyToFilePosition.Add(rowKey, position); } } Console.WriteLine("all lines read from file [{0}]", rowKeysStructFileName); _indexOfRowKey = RowKeys.Select((key, index) => new { key, index }).ToDictionary(keyAndIndex => keyAndIndex.key, keyAndIndex => keyAndIndex.index); Console.WriteLine("Dictionary created. Now testing values"); //Test that can really read values from data file if (RowCount > 0 && ColCount > 0) { //!!!kludge - try up to 10 times to get a good value GetValueOrMissing(0, 0); //Console.WriteLine("GetValueOrMissing(0,0)={0} tested", value0); int rowCount = RowCount; //Console.WriteLine("rowCount is {0}", rowCount); int colCount = ColCount; //Console.WriteLine("colCount is {0}", colCount); string rowKey = RowKeys[rowCount - 1]; //Console.WriteLine("rowKey is {0}", rowKey); string colKey = ColKeys[colCount - 1]; //Console.WriteLine("colKey is {0}", colKey); int colIndex = ColSerialNumbers.GetOld(colKey); //Console.WriteLine("colIndex is {0}", colIndex); byte[] byteArray = new byte[23]; //C# will init to 0's //!!!kludge - try up to 10 times to get a good value for (int i = 0; i < 10; ++i) { ThreadLocalStream.Position = 0; ThreadLocalStream.Position = RowKeyToFilePosition[rowKey] + colIndex * BytesPerValue; //Console.WriteLine("ThreadLocalStream.Position is {0}", ThreadLocalStream.Position); byteArray = new byte[BytesPerValue]; int bytesRead = ThreadLocalStream.Read(byteArray, 0, BytesPerValue); //Console.WriteLine("byteArray[0] is {0}", (int)byteArray[0]); //Console.WriteLine("bytesRead is {0}", bytesRead); if ((int)byteArray[0] != 0) { break; } //Console.WriteLine("Read a 0 instead of a 32, going to sleep for 10 seconds"); Thread.Sleep(10000); Helper.CheckCondition(bytesRead == BytesPerValue, "Expected to read all the bytes of a value"); //Console.WriteLine("expected bytes read"); } //string asString = System.Text.Encoding.Default.GetString(byteArray); //Console.WriteLine("bytes to string is {0}", asString); //TValue valueLast = Parser.Parse<TValue>(asString); //Console.WriteLine("value is {0}", valueLast); //Helper.CheckCondition(!valueLast.Equals(MissingValue), "Should not be missing"); //OK to use Equals because double can't be null GetValueOrMissing(RowCount / 2, ColCount / 2); //Console.WriteLine("GetValueOrMissing({0}, {1})={2} tested", RowCount / 2, ColCount / 2, valueMiddle); } //Console.WriteLine("Values tested. Done"); } }
//!!!similar to GetInstanceFromDenseStructFileNameInternal /// <summary> /// Get a instance from a file in a RowKeys format /// </summary> /// <param name="rowKeysStructFileName">The rowKeys file</param> /// <param name="parallelOptions">A ParallelOptions instance that configures the multithreaded behavior of this operation.</param> /// <param name="fileAccess">A FileAccess value that specifies the operations that can be performed on the file. Defaults to 'Read'</param> /// <param name="fileShare">A FileShare value specifying the type of access other threads have to the file. Defaults to 'Read'</param> /// <param name="verbose"></param> protected void GetInstanceFromRowKeysStructFileNameInternal(string rowKeysStructFileName, ParallelOptions parallelOptions, FileAccess fileAccess = FileAccess.Read, FileShare fileShare = FileShare.Read, bool verbose = true) { // parallelOptions is not currently used, but it is need so that this method will have the same signature as other, similar methods. lock (this) { string firstLineOrNull = FileUtils.ReadLine(rowKeysStructFileName); Helper.CheckCondition(null != firstLineOrNull, () => string.Format(CultureInfo.InvariantCulture, Properties.Resource.ExpectedFileToHaveData, rowKeysStructFileName)); Helper.CheckCondition(!firstLineOrNull.StartsWith(FileUtils.CommentHeader, StringComparison.Ordinal), Properties.Resource.ExpectedNoCommentsInRowKeysAnsiFiles, rowKeysStructFileName); RowKeyToFilePosition = new Dictionary <string, long>(); FileAccess = fileAccess; FileShare = fileShare; //using (TextReader textReader = File.OpenText(rowKeysStructFileName)) using (TextReader textReader = FileUtils.OpenTextStripComments(rowKeysStructFileName)) { string colKeysLineOrNull = textReader.ReadLine(); if (null == colKeysLineOrNull) { throw new MatrixFormatException("Surprised by empty file. " + rowKeysStructFileName); } string[] varAndColKeys = colKeysLineOrNull.Split('\t'); if (!varAndColKeys[0].Equals("rowKey")) { throw new MatrixFormatException("Expect first row's first value to be 'rowKey'"); //!!!rowKey } ColSerialNumbers = new SerialNumbers <string>(varAndColKeys.Skip(1)); _rowKeys = new List <string>(); //!!!not really thread-safe string denseStructFileNameInFile = textReader.ReadLine(); DenseStructFileName = Path.Combine(Path.GetDirectoryName(rowKeysStructFileName), denseStructFileNameInFile); CounterWithMessages counterWithMessages = verbose ? new CounterWithMessages("Reading rowKey file to find location of rows, #{0}", 10000, null) : null; string line = null; while (null != (line = textReader.ReadLine())) { if (verbose) { counterWithMessages.Increment(); } string[] rowKeyAndPosition = line.Split('\t'); if (rowKeyAndPosition.Length != 2) { throw new MatrixFormatException("Expect rows to have two columns"); } string rowKey = rowKeyAndPosition[0]; long position = long.Parse(rowKeyAndPosition[1], CultureInfo.CurrentCulture); _rowKeys.Add(rowKey); RowKeyToFilePosition.Add(rowKey, position); } } //Console.WriteLine("all lines read from file [{0}]", rowKeysStructFileName); _indexOfRowKey = RowKeys.Select((key, index) => new { key, index }).ToDictionary(keyAndIndex => keyAndIndex.key, keyAndIndex => keyAndIndex.index); ValueTester(rowKeysStructFileName); } }