Пример #1
0
        protected void GetInstanceFromDenseStructFileNameInternal(string denseStructFileName, ParallelOptions parallelOptions, FileAccess fileAccess = FileAccess.Read, FileShare fileShare = FileShare.Read)
        {
            // parallelOptions is not currently used, but it is need so that this method will have the same signature as other, similar methods.
            lock (this)
            {
                using (FileStream fileStream = File.Open(denseStructFileName, FileMode.Open, fileAccess, fileShare))
                {
                    using (TextReader textReader = new StreamReader(fileStream))
                    {
                        string firstLineOrNull = textReader.ReadLine();
                        Helper.CheckCondition(null != firstLineOrNull, Properties.Resource.ExpectedFileToHaveData, denseStructFileName);
                        Helper.CheckCondition(!firstLineOrNull.StartsWith(FileUtils.CommentHeader, StringComparison.Ordinal), Properties.Resource.ExpectedNoCommentsInRowKeysAnsiFiles, denseStructFileName);
                    }
                }

                RowKeyToFilePosition = new Dictionary <string, long>();

                DenseStructFileName = denseStructFileName;
                FileAccess          = fileAccess;
                FileShare           = fileShare;
                long position = 0;


                string colKeysLineOrNull = ThreadLocalTextReader.ReadLine();
                position += colKeysLineOrNull.Length + 2; //!!!const assuming 2 char newslines
                string[] varAndColKeys = colKeysLineOrNull.Split('\t');
                if (!varAndColKeys[0].Equals("var"))
                {
                    throw new MatrixFormatException("Expect first row's first value to be 'var'");
                }
                ColSerialNumbers = new SerialNumbers <string>(varAndColKeys.Skip(1));
                _rowKeys         = new List <string>();
                if (null == colKeysLineOrNull)
                {
                    throw new MatrixFormatException("Surprised by empty file. " + denseStructFileName);
                }
                CounterWithMessages counterWithMessages = new CounterWithMessages("Reading data file to find location of rows, #{0}", 10000, null);

                while (true)
                {
                    counterWithMessages.Increment();
                    ThreadLocalStream.Position = position;
                    StringBuilder sb = new StringBuilder();
                    while (true)
                    {
                        int i = ThreadLocalStream.ReadByte();
                        if (-1 == i)
                        {
                            goto END;
                        }
                        if ('\t' == (char)i)
                        {
                            break; // real break, not conintue
                        }
                        sb.Append((char)i);
                    }

                    string rowKey = sb.ToString();
                    if (RowKeyToFilePosition.ContainsKey(rowKey))
                    {
                        throw new MatrixFormatException(string.Format(CultureInfo.InvariantCulture, "The rowkey {0} appears more than once", rowKey));
                    }

                    _rowKeys.Add(rowKey);
                    position += rowKey.Length + 1;
                    RowKeyToFilePosition.Add(rowKey, position);
                    position += ColCount * BytesPerValue + 2;//!!!assumes two char newlines
                    if (position > ThreadLocalStream.Length)
                    {
                        throw new MatrixFormatException("File seems too short");
                    }
                }
                END :;

                _indexOfRowKey = RowKeys.Select((key, index) => new { key, index }).ToDictionary(keyAndIndex => keyAndIndex.key, keyAndIndex => keyAndIndex.index);
            }
        }
Пример #2
0
 public QDataFrameLite(IEnumerable <TRowKey> rowKeys, IEnumerable <TColKey> colKeys)
 {
     rowKeys.ForEach((x, i) => RowKeys.Add(x, i));
     colKeys.ForEach((x, i) => ColKeys.Add(x, i));
     Data = new TVal[RowKeys.Count, ColKeys.Count];
 }
Пример #3
0
        protected void GetInstanceFromRowKeysStructFileNameInternal(string rowKeysStructFileName, ParallelOptions parallelOptions, FileAccess fileAccess = FileAccess.Read, FileShare fileShare = FileShare.Read)
        {
            // parallelOptions is not currently used, but it is need so that this method will have the same signature as other, similar methods.
            lock (this)
            {
                string firstLineOrNull = FileUtils.ReadLine(rowKeysStructFileName);
                Helper.CheckCondition(null != firstLineOrNull, Properties.Resource.ExpectedFileToHaveData, rowKeysStructFileName);
                Helper.CheckCondition(!firstLineOrNull.StartsWith(FileUtils.CommentHeader, StringComparison.Ordinal), Properties.Resource.ExpectedNoCommentsInRowKeysAnsiFiles, rowKeysStructFileName);


                RowKeyToFilePosition = new Dictionary <string, long>();
                FileAccess           = fileAccess;
                FileShare            = fileShare;


                using (TextReader textReader = File.OpenText(rowKeysStructFileName))
                {
                    string   colKeysLineOrNull = textReader.ReadLine();
                    string[] varAndColKeys     = colKeysLineOrNull.Split('\t');
                    if (!varAndColKeys[0].Equals("rowKey"))
                    {
                        throw new MatrixFormatException("Expect first row's first value to be 'rowKey'"); //!!!rowKey
                    }
                    ColSerialNumbers = new SerialNumbers <string>(varAndColKeys.Skip(1));
                    _rowKeys         = new List <string>();
                    if (null == colKeysLineOrNull)
                    {
                        throw new MatrixFormatException("Surprised by empty file. " + rowKeysStructFileName);
                    }


                    //!!!not really thread-safe
                    string denseStructFileNameInFile = textReader.ReadLine();
                    DenseStructFileName = Path.Combine(Path.GetDirectoryName(rowKeysStructFileName), denseStructFileNameInFile);

                    CounterWithMessages counterWithMessages = new CounterWithMessages("Reading rowKey file to find location of rows, #{0}", 10000, null);

                    string line = null;
                    while (null != (line = textReader.ReadLine()))
                    {
                        counterWithMessages.Increment();
                        string[] rowKeyAndPosition = line.Split('\t');
                        if (rowKeyAndPosition.Length != 2)
                        {
                            throw new MatrixFormatException("Expect rows to have two columns");
                        }
                        string rowKey   = rowKeyAndPosition[0];
                        long   position = long.Parse(rowKeyAndPosition[1], CultureInfo.CurrentCulture);
                        _rowKeys.Add(rowKey);
                        RowKeyToFilePosition.Add(rowKey, position);
                    }
                }
                Console.WriteLine("all lines read from file [{0}]", rowKeysStructFileName);

                _indexOfRowKey = RowKeys.Select((key, index) => new { key, index }).ToDictionary(keyAndIndex => keyAndIndex.key, keyAndIndex => keyAndIndex.index);
                Console.WriteLine("Dictionary created. Now testing values");


                //Test that can really read values from data file
                if (RowCount > 0 && ColCount > 0)
                {
                    //!!!kludge - try up to 10 times to get a good value
                    GetValueOrMissing(0, 0);
                    //Console.WriteLine("GetValueOrMissing(0,0)={0} tested", value0);

                    int rowCount = RowCount;
                    //Console.WriteLine("rowCount is {0}", rowCount);
                    int colCount = ColCount;
                    //Console.WriteLine("colCount is {0}", colCount);
                    string rowKey = RowKeys[rowCount - 1];
                    //Console.WriteLine("rowKey is {0}", rowKey);
                    string colKey = ColKeys[colCount - 1];
                    //Console.WriteLine("colKey is {0}", colKey);
                    int colIndex = ColSerialNumbers.GetOld(colKey);
                    //Console.WriteLine("colIndex is {0}", colIndex);

                    byte[] byteArray = new byte[23]; //C# will init to 0's
                    //!!!kludge - try up to 10 times to get a good value
                    for (int i = 0; i < 10; ++i)
                    {
                        ThreadLocalStream.Position = 0;
                        ThreadLocalStream.Position = RowKeyToFilePosition[rowKey] + colIndex * BytesPerValue;
                        //Console.WriteLine("ThreadLocalStream.Position is {0}", ThreadLocalStream.Position);
                        byteArray = new byte[BytesPerValue];
                        int bytesRead = ThreadLocalStream.Read(byteArray, 0, BytesPerValue);
                        //Console.WriteLine("byteArray[0] is {0}", (int)byteArray[0]);
                        //Console.WriteLine("bytesRead is {0}", bytesRead);

                        if ((int)byteArray[0] != 0)
                        {
                            break;
                        }

                        //Console.WriteLine("Read a 0 instead of a 32, going to sleep for 10 seconds");
                        Thread.Sleep(10000);

                        Helper.CheckCondition(bytesRead == BytesPerValue, "Expected to read all the bytes of a value");
                        //Console.WriteLine("expected bytes read");
                    }

                    //string asString = System.Text.Encoding.Default.GetString(byteArray);
                    //Console.WriteLine("bytes to string is {0}", asString);
                    //TValue valueLast = Parser.Parse<TValue>(asString);
                    //Console.WriteLine("value is {0}", valueLast);
                    //Helper.CheckCondition(!valueLast.Equals(MissingValue), "Should not be missing"); //OK to use Equals because double can't be null

                    GetValueOrMissing(RowCount / 2, ColCount / 2);
                    //Console.WriteLine("GetValueOrMissing({0}, {1})={2} tested", RowCount / 2, ColCount / 2, valueMiddle);
                }

                //Console.WriteLine("Values tested. Done");
            }
        }
        //!!!similar to GetInstanceFromDenseStructFileNameInternal


        /// <summary>
        /// Get a instance from a file in a RowKeys format
        /// </summary>
        /// <param name="rowKeysStructFileName">The rowKeys file</param>
        /// <param name="parallelOptions">A ParallelOptions instance that configures the multithreaded behavior of this operation.</param>
        /// <param name="fileAccess">A FileAccess value that specifies the operations that can be performed on the file. Defaults to 'Read'</param>
        /// <param name="fileShare">A FileShare value specifying the type of access other threads have to the file. Defaults to 'Read'</param>
        /// <param name="verbose"></param>
        protected void GetInstanceFromRowKeysStructFileNameInternal(string rowKeysStructFileName, ParallelOptions parallelOptions, FileAccess fileAccess = FileAccess.Read, FileShare fileShare = FileShare.Read, bool verbose = true)
        {
            // parallelOptions is not currently used, but it is need so that this method will have the same signature as other, similar methods.
            lock (this)
            {
                string firstLineOrNull = FileUtils.ReadLine(rowKeysStructFileName);
                Helper.CheckCondition(null != firstLineOrNull, () => string.Format(CultureInfo.InvariantCulture, Properties.Resource.ExpectedFileToHaveData, rowKeysStructFileName));
                Helper.CheckCondition(!firstLineOrNull.StartsWith(FileUtils.CommentHeader, StringComparison.Ordinal), Properties.Resource.ExpectedNoCommentsInRowKeysAnsiFiles, rowKeysStructFileName);


                RowKeyToFilePosition = new Dictionary <string, long>();
                FileAccess           = fileAccess;
                FileShare            = fileShare;


                //using (TextReader textReader = File.OpenText(rowKeysStructFileName))
                using (TextReader textReader = FileUtils.OpenTextStripComments(rowKeysStructFileName))
                {
                    string colKeysLineOrNull = textReader.ReadLine();
                    if (null == colKeysLineOrNull)
                    {
                        throw new MatrixFormatException("Surprised by empty file. " + rowKeysStructFileName);
                    }

                    string[] varAndColKeys = colKeysLineOrNull.Split('\t');
                    if (!varAndColKeys[0].Equals("rowKey"))
                    {
                        throw new MatrixFormatException("Expect first row's first value to be 'rowKey'"); //!!!rowKey
                    }

                    ColSerialNumbers = new SerialNumbers <string>(varAndColKeys.Skip(1));
                    _rowKeys         = new List <string>();


                    //!!!not really thread-safe
                    string denseStructFileNameInFile = textReader.ReadLine();
                    DenseStructFileName = Path.Combine(Path.GetDirectoryName(rowKeysStructFileName), denseStructFileNameInFile);

                    CounterWithMessages counterWithMessages = verbose ? new CounterWithMessages("Reading rowKey file to find location of rows, #{0}", 10000, null) : null;

                    string line = null;
                    while (null != (line = textReader.ReadLine()))
                    {
                        if (verbose)
                        {
                            counterWithMessages.Increment();
                        }
                        string[] rowKeyAndPosition = line.Split('\t');
                        if (rowKeyAndPosition.Length != 2)
                        {
                            throw new MatrixFormatException("Expect rows to have two columns");
                        }
                        string rowKey   = rowKeyAndPosition[0];
                        long   position = long.Parse(rowKeyAndPosition[1], CultureInfo.CurrentCulture);
                        _rowKeys.Add(rowKey);
                        RowKeyToFilePosition.Add(rowKey, position);
                    }
                }
                //Console.WriteLine("all lines read from file [{0}]", rowKeysStructFileName);

                _indexOfRowKey = RowKeys.Select((key, index) => new { key, index }).ToDictionary(keyAndIndex => keyAndIndex.key, keyAndIndex => keyAndIndex.index);
                ValueTester(rowKeysStructFileName);
            }
        }