public void CompressedAppendBug4() { string fileName = GetBinFileName(); if (!AllowCreate) { return; } using (var f = new BinCompressedSeriesFile <_CmplxIdx, _4Flds_ComplxIdx>(fileName)) { f.UniqueIndexes = false; f.InitializeNewFile(); f.AppendData(Data <_4Flds_ComplxIdx>(10, 20000)); TestUtils.CollectionAssertEqual( Data <_4Flds_ComplxIdx>(10, 20000), f.Stream(new _CmplxIdx { a = 0 }, new _CmplxIdx { a = 50000 }), "#1"); for (int ix = 0; ix < 5000; ix++) { f.AppendData(Data <_4Flds_ComplxIdx>(20000 + 5 * ix, 20000 + 5 * (ix + 1))); } } }
public void Run() { // Create filenames, deleting existing files if exist string filename = CreateFilename(); // // Set up sample data. // Note that we might get minor rounding errors when storing. // The Equals implementation of the Item accounts for that. // const int itemCount = 10000; IEnumerable <ArraySegment <Item> > data = Utils.GenerateData( 0, itemCount, i => new Item(i)); // Create new BinCompressedSeriesFile file that stores a sequence of Item structs // The file is indexed by a long value inside Item marked with the [Index] attribute. // See the Item struct declaration using (var bf = new BinCompressedSeriesFile <long, Item>(filename)) { // Automatically pick the constructor that would set all the public fields in the struct var cmpxFld = ((ComplexField)bf.RootField); cmpxFld.PopulateFields(ComplexField.Mode.Constructor | ComplexField.Mode.Fields); Console.WriteLine("Serialized Fields:\n {0}\n", string.Join(Environment.NewLine + " ", cmpxFld.Fields)); Console.WriteLine("Deserialized with constrtuctor:\n {0}\n", cmpxFld.Constructor); bf.InitializeNewFile(); // Finish new file initialization and create an empty file bf.AppendData(data); // // Verify that the created files are identical (use the default bitwise value type Equals) // if (!bf.Stream().SequenceEqual(data.Stream())) { throw new BinaryFileException("File does not have the right data"); } Console.WriteLine("File {0} created with {1,10:#,#} bytes", filename, bf.BaseStream.Length); } // // Check that the settings are stored ok in the file and can be re-initialized on open // using (var bf1 = (IWritableFeed <long, Item>)BinaryFile.Open(filename)) { if (!bf1.Stream().SequenceEqual(data.Stream())) { throw new BinaryFileException("File does not have the right data on the second check"); } } // cleanup CreateFilename(); }
public void Run() { // Create filenames, deleting existing files if exist string filename = CreateFilename(); // // Set up sample data. // Note that we might get minor rounding errors when storing. // The Equals implementation of the Item accounts for that. // const int itemCount = 10000; IEnumerable<ArraySegment<Item>> data = Utils.GenerateData( 0, itemCount, i => new Item(i)); // Create new BinCompressedSeriesFile file that stores a sequence of Item structs // The file is indexed by a long value inside Item marked with the [Index] attribute. // See the Item struct declaration using (var bf = new BinCompressedSeriesFile<long, Item>(filename)) { // Automatically pick the constructor that would set all the public fields in the struct var cmpxFld = ((ComplexField) bf.RootField); cmpxFld.PopulateFields(ComplexField.Mode.Constructor | ComplexField.Mode.Fields); Console.WriteLine("Serialized Fields:\n {0}\n", string.Join(Environment.NewLine + " ", cmpxFld.Fields)); Console.WriteLine("Deserialized with constrtuctor:\n {0}\n", cmpxFld.Constructor); bf.InitializeNewFile(); // Finish new file initialization and create an empty file bf.AppendData(data); // // Verify that the created files are identical (use the default bitwise value type Equals) // if (!bf.Stream().SequenceEqual(data.Stream())) throw new BinaryFileException("File does not have the right data"); Console.WriteLine("File {0} created with {1,10:#,#} bytes", filename, bf.BaseStream.Length); } // // Check that the settings are stored ok in the file and can be re-initialized on open // using (var bf1 = (IWritableFeed<long, Item>) BinaryFile.Open(filename)) { if (!bf1.Stream().SequenceEqual(data.Stream())) throw new BinaryFileException("File does not have the right data on the second check"); } // cleanup CreateFilename(); }
private static void CreateSampleFile(string filename) { if (File.Exists(filename)) { File.Delete(filename); } // Create and populate sample file // See DemoBinCompressedSeriesFile for more info using (var bf = new BinCompressedSeriesFile <long, ItemLngDbl>(filename)) { var root = (ComplexField)bf.RootField; ((ScaledDeltaFloatField)root["Value"].Field).Multiplier = 100; bf.InitializeNewFile(); bf.AppendData(Utils.GenerateData(3, 10, i => new ItemLngDbl(i, i / 100.0))); } }
public void Run() { // Create filenames, deleting existing files if exist string filename = CreateFilename(); // // Set up sample data. // Note that we might get minor rounding errors when storing. // The Equals implementation of the ReadonlyItemLngDbl accounts for that. // const int itemCount = 10000; IEnumerable <ArraySegment <ReadonlyItemLngDbl> > data = Utils.GenerateData( 0, itemCount, i => new ReadonlyItemLngDbl(i, (i / 100.0) * 65.0)); // Create new BinCompressedSeriesFile file that stores a sequence of ReadonlyItemLngDbl structs // The file is indexed by a long value inside ReadonlyItemLngDbl marked with the [Index] attribute. // Here we provide a custom field factory that will analyze each field as it is being created, // and may choose to supply a custom field or null to use the default. // The name is the automatically generated, starting with the "root" for the TVal with each // subfield appended afterwards, separated by a dot. // Alternatively, ReadonlyItemLngDbl.SequenceNum can be marked with [Field(typeof(IncrementalIndex))] // For complex types, [Field] attribute can also be set on the type itself. using (var bf = new BinCompressedSeriesFile <long, ReadonlyItemLngDbl>(filename)) { // When a new instance of BinCompressedSeriesFile is created, // RootField will be pre-populated with default configuration objects. // Some fields, such as doubles, require additional configuration before the file can be initialized. var root = (ReadonlyItemLngDblField)bf.RootField; // Index is always increasing var seq = (ScaledDeltaIntField)root.SequenceNumField; seq.DeltaType = DeltaType.Positive; // This double will contain values with no more than 2 digits after the decimal points. // Before serializing, multiply the value by 100 to convert to long. var val1 = (ScaledDeltaFloatField)root.ValueField; val1.Multiplier = 100; bf.UniqueIndexes = true; // enforce index uniqueness - each index is +1 bf.InitializeNewFile(); // Finish new file initialization and create an empty file bf.AppendData(data); // // Verify that the created files are identical (use the default bitwise value type Equals) // if (!bf.Stream().SequenceEqual(data.Stream())) { throw new BinaryFileException("File does not have the right data"); } Console.WriteLine("File {0} created with {1,10:#,#} bytes", filename, bf.BaseStream.Length); } // // Check that the settings are stored ok in the file and can be re-initialized on open // using (var bf1 = (IWritableFeed <long, ReadonlyItemLngDbl>)BinaryFile.Open(filename)) { if (!bf1.Stream().SequenceEqual(data.Stream())) { throw new BinaryFileException("File does not have the right data on the second check"); } } // cleanup CreateFilename(); }
public void Run() { // Create filenames, deleting existing files if exist string filename1 = CreateFilename(1); string filename2 = CreateFilename(2); string filename3 = CreateFilename(3); string filename4 = CreateFilename(4); var sw1 = new Stopwatch(); var sw2 = new Stopwatch(); var sw3 = new Stopwatch(); var sw4 = new Stopwatch(); // // Set up sample data so that the delta between index's long is 1 // and the delta between values is 0.65 => 65 with multiplier, which is bigger than // would fit into a 7 bit signed integer, but would fit into 7 bit unsigned one // const int itemCount = 500000; IEnumerable <ArraySegment <ItemLngDbl> > data = Utils.GenerateData( 0, itemCount, i => new ItemLngDbl(i, Math.Round((i / 100.0) * 65.0, 2))); // Create new BinCompressedSeriesFile file that stores a sequence of ItemLngDbl structs // The file is indexed by a long value inside ItemLngDbl marked with the [Index] attribute. // Here we provide a custom field factory that will analyze each field as it is being created, // and may choose to supply a custom field or null to use the default. // The name is the automatically generated, starting with the "root" for the TVal with each // subfield appended afterwards, separated by a dot. // Alternatively, ItemLngDbl.SequenceNum can be marked with [Field(typeof(IncrementalIndex))] // For complex types, [Field] attribute can also be set on the type itself. using (var bf1 = new BinCompressedSeriesFile <long, ItemLngDbl>( filename1, fieldFactory: (store, type, name) => type == typeof(long) && name == "root.SequenceNum" // For the long field named "SequenceNum" provide custom IncrementalIndex field serializer ? new IncrementalIndex(store, type, name) : null)) using (var bf2 = new BinCompressedSeriesFile <long, ItemLngDbl>(filename2)) using (var bf3 = new BinCompressedSeriesFile <long, ItemLngDbl>(filename3)) using (var bf4 = new BinSeriesFile <long, ItemLngDbl>(filename4)) { // // Configure bf1 to be the most compression optimized: // * use custom incremental field serializer IncrementalIndex // * use positive-only DeltaType for the value (it always increases in this test) // // When a new instance of BinCompressedSeriesFile is created, // RootField will be pre-populated with default configuration objects. // Some fields, such as doubles, require additional configuration before the file can be initialized. // var root1 = (ComplexField)bf1.RootField; // This double will contain values with no more than 2 digits after the decimal points. // Before serializing, multiply the value by 100 to convert to long. // Next value will always be same or larger than the previous one var val1 = (ScaledDeltaFloatField)root1["Value"].Field; val1.Multiplier = 100; val1.DeltaType = DeltaType.Positive; bf1.UniqueIndexes = true; // enforce index uniqueness - each index is +1 bf1.InitializeNewFile(); // Finish new file initialization and create an empty file // // Initialize bf2 same as bf1, but without custom serializer // var val2 = (ScaledDeltaFloatField)((ComplexField)bf2.RootField)["Value"].Field; val2.Multiplier = 100; val2.DeltaType = DeltaType.Positive; bf2.UniqueIndexes = true; bf2.InitializeNewFile(); // // Initialize bf3 in an identical fashion as bf2, but without positive-only delta type. // var val3 = ((ScaledDeltaFloatField)((ComplexField)bf3.RootField)["Value"].Field); val3.Multiplier = 100; bf3.UniqueIndexes = true; bf3.InitializeNewFile(); // // Initialize the third uncompressed file without any parameters. // bf4.UniqueIndexes = true; bf4.InitializeNewFile(); // // Append the same data to all files, measuring how long it takes // Please note that the timing is not very accurate here, and will give different results depending on the order // sw4.Start(); bf4.AppendData(data); sw4.Stop(); sw3.Start(); bf3.AppendData(data); sw3.Stop(); sw2.Start(); bf2.AppendData(data); sw2.Stop(); sw1.Start(); bf1.AppendData(data); sw1.Stop(); // // Verify that the created files are identical (use the default bitwise value type Equals) // if (!bf1.Stream().SequenceEqual(bf2.Stream())) { throw new BinaryFileException("File #1 != #2"); } if (!bf1.Stream().SequenceEqual(bf3.Stream())) { throw new BinaryFileException("File #1 != #3"); } if (!bf1.Stream().SequenceEqual(bf4.Stream())) { throw new BinaryFileException("File #1 != #4"); } // // Print file sizes to see if there was any benefit // Console.WriteLine("Finished creating files with {0:#,#} items:\n", itemCount); Console.WriteLine( "{2,40}: {0,10:#,#} bytes in {1}", bf1.BaseStream.Length, sw1.Elapsed, "DeltaType.Positive and Calculated index"); Console.WriteLine( "{2,40}: {0,10:#,#} bytes in {1}", bf2.BaseStream.Length, sw2.Elapsed, "DeltaType.Positive"); Console.WriteLine( "{2,40}: {0,10:#,#} bytes in {1}", bf3.BaseStream.Length, sw3.Elapsed, "No optimizations"); Console.WriteLine("{2,40}: {0,10:#,#} bytes in {1}", bf4.BaseStream.Length, sw4.Elapsed, "Uncompressed"); Console.WriteLine(); } // // Check that the settings are stored ok in the file and can be re-initialized on open // using (var bf1 = (IWritableFeed <long, ItemLngDbl>)BinaryFile.Open(filename1)) using (var bf2 = (IWritableFeed <long, ItemLngDbl>)BinaryFile.Open(filename2)) { if (!bf1.Stream().SequenceEqual(bf2.Stream())) { throw new BinaryFileException("File #1 != #2"); } } // cleanup CreateFilename(1); CreateFilename(2); CreateFilename(3); CreateFilename(4); }
public void Run() { // Create filenames, deleting existing files if exist string filename1 = CreateFilename(1); string filename2 = CreateFilename(2); string filename3 = CreateFilename(3); string filename4 = CreateFilename(4); var sw1 = new Stopwatch(); var sw2 = new Stopwatch(); var sw3 = new Stopwatch(); var sw4 = new Stopwatch(); // // Set up sample data so that the delta between index's long is 1 // and the delta between values is 0.65 => 65 with multiplier, which is bigger than // would fit into a 7 bit signed integer, but would fit into 7 bit unsigned one // const int itemCount = 500000; IEnumerable<ArraySegment<ItemLngDbl>> data = Utils.GenerateData( 0, itemCount, i => new ItemLngDbl(i, Math.Round((i/100.0)*65.0, 2))); // Create new BinCompressedSeriesFile file that stores a sequence of ItemLngDbl structs // The file is indexed by a long value inside ItemLngDbl marked with the [Index] attribute. // Here we provide a custom field factory that will analyze each field as it is being created, // and may choose to supply a custom field or null to use the default. // The name is the automatically generated, starting with the "root" for the TVal with each // subfield appended afterwards, separated by a dot. // Alternatively, ItemLngDbl.SequenceNum can be marked with [Field(typeof(IncrementalIndex))] // For complex types, [Field] attribute can also be set on the type itself. using (var bf1 = new BinCompressedSeriesFile<long, ItemLngDbl>( filename1, fieldFactory: (store, type, name) => type == typeof (long) && name == "root.SequenceNum" // For the long field named "SequenceNum" provide custom IncrementalIndex field serializer ? new IncrementalIndex(store, type, name) : null)) using (var bf2 = new BinCompressedSeriesFile<long, ItemLngDbl>(filename2)) using (var bf3 = new BinCompressedSeriesFile<long, ItemLngDbl>(filename3)) using (var bf4 = new BinSeriesFile<long, ItemLngDbl>(filename4)) { // // Configure bf1 to be the most compression optimized: // * use custom incremental field serializer IncrementalIndex // * use positive-only DeltaType for the value (it always increases in this test) // // When a new instance of BinCompressedSeriesFile is created, // RootField will be pre-populated with default configuration objects. // Some fields, such as doubles, require additional configuration before the file can be initialized. // var root1 = (ComplexField) bf1.RootField; // This double will contain values with no more than 2 digits after the decimal points. // Before serializing, multiply the value by 100 to convert to long. // Next value will always be same or larger than the previous one var val1 = (ScaledDeltaFloatField) root1["Value"].Field; val1.Multiplier = 100; val1.DeltaType = DeltaType.Positive; bf1.UniqueIndexes = true; // enforce index uniqueness - each index is +1 bf1.InitializeNewFile(); // Finish new file initialization and create an empty file // // Initialize bf2 same as bf1, but without custom serializer // var val2 = (ScaledDeltaFloatField) ((ComplexField) bf2.RootField)["Value"].Field; val2.Multiplier = 100; val2.DeltaType = DeltaType.Positive; bf2.UniqueIndexes = true; bf2.InitializeNewFile(); // // Initialize bf3 in an identical fashion as bf2, but without positive-only delta type. // var val3 = ((ScaledDeltaFloatField) ((ComplexField) bf3.RootField)["Value"].Field); val3.Multiplier = 100; bf3.UniqueIndexes = true; bf3.InitializeNewFile(); // // Initialize the third uncompressed file without any parameters. // bf4.UniqueIndexes = true; bf4.InitializeNewFile(); // // Append the same data to all files, measuring how long it takes // Please note that the timing is not very accurate here, and will give different results depending on the order // sw4.Start(); bf4.AppendData(data); sw4.Stop(); sw3.Start(); bf3.AppendData(data); sw3.Stop(); sw2.Start(); bf2.AppendData(data); sw2.Stop(); sw1.Start(); bf1.AppendData(data); sw1.Stop(); // // Verify that the created files are identical (use the default bitwise value type Equals) // if (!bf1.Stream().SequenceEqual(bf2.Stream())) throw new BinaryFileException("File #1 != #2"); if (!bf1.Stream().SequenceEqual(bf3.Stream())) throw new BinaryFileException("File #1 != #3"); if (!bf1.Stream().SequenceEqual(bf4.Stream())) throw new BinaryFileException("File #1 != #4"); // // Print file sizes to see if there was any benefit // Console.WriteLine("Finished creating files with {0:#,#} items:\n", itemCount); Console.WriteLine( "{2,40}: {0,10:#,#} bytes in {1}", bf1.BaseStream.Length, sw1.Elapsed, "DeltaType.Positive and Calculated index"); Console.WriteLine( "{2,40}: {0,10:#,#} bytes in {1}", bf2.BaseStream.Length, sw2.Elapsed, "DeltaType.Positive"); Console.WriteLine( "{2,40}: {0,10:#,#} bytes in {1}", bf3.BaseStream.Length, sw3.Elapsed, "No optimizations"); Console.WriteLine("{2,40}: {0,10:#,#} bytes in {1}", bf4.BaseStream.Length, sw4.Elapsed, "Uncompressed"); Console.WriteLine(); } // // Check that the settings are stored ok in the file and can be re-initialized on open // using (var bf1 = (IWritableFeed<long, ItemLngDbl>)BinaryFile.Open(filename1)) using (var bf2 = (IWritableFeed<long, ItemLngDbl>)BinaryFile.Open(filename2)) { if (!bf1.Stream().SequenceEqual(bf2.Stream())) throw new BinaryFileException("File #1 != #2"); } // cleanup CreateFilename(1); CreateFilename(2); CreateFilename(3); CreateFilename(4); }
public void Run() { string filename = GetType().Name + ".bts"; if (File.Exists(filename)) File.Delete(filename); // Create new BinCompressedSeriesFile file that stores a sequence of ItemLngDbl structs // The file is indexed by a long value inside ItemLngDbl marked with the [Index] attribute. using (var bf = new BinCompressedSeriesFile<long, ItemLngDbl>(filename)) { // // Initialize new file parameters and create it // bf.UniqueIndexes = true; // enforce index uniqueness bf.Tag = "Sample Data"; // optionally provide a tag to store in the file header // // Configure value storage. This is the only difference with using BinSeriesFile. // // When a new instance of BinCompressedSeriesFile is created, // RootField will be pre-populated with default configuration objects. // Some fields, such as doubles, require additional configuration before the file can be initialized. // var root = (ComplexField) bf.RootField; // This double will contain values with no more than 2 digits after the decimal points. // Before serializing, multiply the value by 100 to convert to long. ((ScaledDeltaFloatField) root["Value"].Field).Multiplier = 100; bf.InitializeNewFile(); // Finish new file initialization and create an empty file // // Set up data generator to generate 10 items starting with index 3 // IEnumerable<ArraySegment<ItemLngDbl>> data = Utils.GenerateData(3, 10, i => new ItemLngDbl(i, i/100.0)); // // Append data to the file // bf.AppendData(data); // // Read all data and print it using Stream() - one value at a time // This method is slower than StreamSegments(), but easier to use for simple one-value iteration // Console.WriteLine(" ** Content of file {0} after the first append", filename); Console.WriteLine("FirstIndex = {0}, LastIndex = {1}", bf.FirstIndex, bf.LastIndex); foreach (ItemLngDbl val in bf.Stream()) Console.WriteLine(val); } // Re-open the file, allowing data modifications // IWritableFeed<,> interface is better as it will work with non-compressed files as well using (var bf = (IWritableFeed<long, ItemLngDbl>) BinaryFile.Open(filename, true)) { // Append a few more items with different ItemLngDbl.Value to tell them appart IEnumerable<ArraySegment<ItemLngDbl>> data = Utils.GenerateData(10, 10, i => new ItemLngDbl(i, i/25.0)); // New data indexes will overlap with existing, so allow truncating old data bf.AppendData(data, true); // Print values Console.WriteLine("\n ** Content of file {0} after the second append", filename); Console.WriteLine("FirstIndex = {0}, LastIndex = {1}", bf.FirstIndex, bf.LastIndex); foreach (ItemLngDbl val in bf.Stream()) Console.WriteLine(val); } // Re-open the file for reading only (file can be opened for reading in parallel, but only one write) // IEnumerableFeed<,> interface is better as it will work with non-compressed files as well using (var bf = (IWritableFeed<long, ItemLngDbl>) BinaryFile.Open(filename, true)) { // Show first item with index >= 5 Console.WriteLine( "\nFirst item on or after index 5 is {0}\n", bf.Stream(5, maxItemCount: 1).First()); // Show last item with index < 7 (iterate backwards) Console.WriteLine( "Last item before index 7 is {0}\n", bf.Stream(7, inReverse: true, maxItemCount: 1).First()); // Average of values for indexes >= 4 and < 8 Console.WriteLine( "Average of values for indexes >= 4 and < 8 is {0}\n", bf.Stream(4, 8).Average(i => i.Value)); // Sum of the first 3 values with index less than 18 and going backwards Console.WriteLine( "Sum of the first 3 values with index less than 18 and going backwards is {0}\n", bf.Stream(18, maxItemCount: 3, inReverse: true).Sum(i => i.Value)); } // cleanup File.Delete(filename); }
public void Run() { string filename1 = GetType().Name + "1.bts"; if (File.Exists(filename1)) { File.Delete(filename1); } string filename2 = GetType().Name + "2.bts"; if (File.Exists(filename2)) { File.Delete(filename2); } string filename3 = GetType().Name + "3.bts"; if (File.Exists(filename3)) { File.Delete(filename3); } // Create new BinCompressedSeriesFile file that stores a sequence of ItemLngDblDbl structs // The file is indexed by a long value inside ItemLngDblDbl marked with the [Index] attribute. // For comparison sake, also create identical but non-state-linked compressed and uncompressed. using (var bf1 = new BinCompressedSeriesFile <long, ItemLngDblDbl>(filename1)) using (var bf2 = new BinCompressedSeriesFile <long, ItemLngDblDbl>(filename2)) using (var bf3 = new BinSeriesFile <long, ItemLngDblDbl>(filename3)) { // // Configure value storage. This is the only difference with using BinSeriesFile. // // When a new instance of BinCompressedSeriesFile is created, // RootField will be pre-populated with default configuration objects. // Some fields, such as doubles, require additional configuration before the file can be initialized. // var root = (ComplexField)bf1.RootField; var fld1 = (ScaledDeltaFloatField)root["Value1"].Field; var fld2 = (ScaledDeltaFloatField)root["Value2"].Field; // This double will contain values with no more than 2 digits after the decimal points. // Before serializing, multiply the value by 100 to convert to long. fld1.Multiplier = 100; fld2.Multiplier = 100; // ** IMPORTANT: Set the second field's state name the same as the first field, linking them together fld2.StateName = fld1.StateName; bf1.InitializeNewFile(); // Finish new file initialization and create an empty file // // Set up data generator to generate items with closely related value1 and value2 // IEnumerable <ArraySegment <ItemLngDblDbl> > data = Utils.GenerateData(1, 10000, i => new ItemLngDblDbl(i, i * 10, i * 10 + Math.Round(1 / (1.0 + i % 100), 2))); // // Append data to the file // bf1.AppendData(data); // // Initialize the second in an identical fashion without linking the states and append the same data // var root2 = (ComplexField)bf2.RootField; ((ScaledDeltaFloatField)root2["Value1"].Field).Multiplier = 100; ((ScaledDeltaFloatField)root2["Value2"].Field).Multiplier = 100; bf2.InitializeNewFile(); bf2.AppendData(data); // // Initialize the third uncompressed file and append the same data. // bf3.InitializeNewFile(); bf3.AppendData(data); // // Print file sizes to see if there was any benefit // Console.WriteLine(" Shared: {0,10:#,#} bytes", bf1.BaseStream.Length); Console.WriteLine(" NonShared: {0,10:#,#} bytes", bf2.BaseStream.Length); Console.WriteLine("Uncompressed: {0,10:#,#} bytes", bf3.BaseStream.Length); Console.WriteLine(); if (!bf1.Stream().SequenceEqual(bf2.Stream())) { throw new BinaryFileException("File #1 != #2"); } if (!bf1.Stream().SequenceEqual(bf3.Stream())) { throw new BinaryFileException("File #1 != #3"); } } // // Check that the settings are stored ok in the file and can be re-initialized on open // using (var bf1 = (IWritableFeed <long, ItemLngDblDbl>)BinaryFile.Open(filename1)) using (var bf2 = (IWritableFeed <long, ItemLngDblDbl>)BinaryFile.Open(filename2)) { if (!bf1.Stream().SequenceEqual(bf2.Stream())) { throw new BinaryFileException("File #1 != #2"); } } // cleanup File.Delete(filename1); File.Delete(filename2); File.Delete(filename3); }
public void Run() { string filename1 = GetType().Name + "1.bts"; if (File.Exists(filename1)) File.Delete(filename1); string filename2 = GetType().Name + "2.bts"; if (File.Exists(filename2)) File.Delete(filename2); string filename3 = GetType().Name + "3.bts"; if (File.Exists(filename3)) File.Delete(filename3); // Create new BinCompressedSeriesFile file that stores a sequence of ItemLngDblDbl structs // The file is indexed by a long value inside ItemLngDblDbl marked with the [Index] attribute. // For comparison sake, also create identical but non-state-linked compressed and uncompressed. using (var bf1 = new BinCompressedSeriesFile<long, ItemLngDblDbl>(filename1)) using (var bf2 = new BinCompressedSeriesFile<long, ItemLngDblDbl>(filename2)) using (var bf3 = new BinSeriesFile<long, ItemLngDblDbl>(filename3)) { // // Configure value storage. This is the only difference with using BinSeriesFile. // // When a new instance of BinCompressedSeriesFile is created, // RootField will be pre-populated with default configuration objects. // Some fields, such as doubles, require additional configuration before the file can be initialized. // var root = (ComplexField) bf1.RootField; var fld1 = (ScaledDeltaFloatField) root["Value1"].Field; var fld2 = (ScaledDeltaFloatField) root["Value2"].Field; // This double will contain values with no more than 2 digits after the decimal points. // Before serializing, multiply the value by 100 to convert to long. fld1.Multiplier = 100; fld2.Multiplier = 100; // ** IMPORTANT: Set the second field's state name the same as the first field, linking them together fld2.StateName = fld1.StateName; bf1.InitializeNewFile(); // Finish new file initialization and create an empty file // // Set up data generator to generate items with closely related value1 and value2 // IEnumerable<ArraySegment<ItemLngDblDbl>> data = Utils.GenerateData(1, 10000, i => new ItemLngDblDbl(i, i*10, i*10 + Math.Round(1/(1.0 + i%100), 2))); // // Append data to the file // bf1.AppendData(data); // // Initialize the second in an identical fashion without linking the states and append the same data // var root2 = (ComplexField) bf2.RootField; ((ScaledDeltaFloatField) root2["Value1"].Field).Multiplier = 100; ((ScaledDeltaFloatField) root2["Value2"].Field).Multiplier = 100; bf2.InitializeNewFile(); bf2.AppendData(data); // // Initialize the third uncompressed file and append the same data. // bf3.InitializeNewFile(); bf3.AppendData(data); // // Print file sizes to see if there was any benefit // Console.WriteLine(" Shared: {0,10:#,#} bytes", bf1.BaseStream.Length); Console.WriteLine(" NonShared: {0,10:#,#} bytes", bf2.BaseStream.Length); Console.WriteLine("Uncompressed: {0,10:#,#} bytes", bf3.BaseStream.Length); Console.WriteLine(); if (!bf1.Stream().SequenceEqual(bf2.Stream())) throw new BinaryFileException("File #1 != #2"); if (!bf1.Stream().SequenceEqual(bf3.Stream())) throw new BinaryFileException("File #1 != #3"); } // // Check that the settings are stored ok in the file and can be re-initialized on open // using (var bf1 = (IWritableFeed<long, ItemLngDblDbl>)BinaryFile.Open(filename1)) using (var bf2 = (IWritableFeed<long, ItemLngDblDbl>)BinaryFile.Open(filename2)) { if (!bf1.Stream().SequenceEqual(bf2.Stream())) throw new BinaryFileException("File #1 != #2"); } // cleanup File.Delete(filename1); File.Delete(filename2); File.Delete(filename3); }
public void Run() { string filename = GetType().Name + ".bts"; if (File.Exists(filename)) { File.Delete(filename); } // Create new BinCompressedSeriesFile file that stores a sequence of ItemLngDbl structs // The file is indexed by a long value inside ItemLngDbl marked with the [Index] attribute. using (var bf = new BinCompressedSeriesFile <long, ItemLngDbl>(filename)) { // // Initialize new file parameters and create it // bf.UniqueIndexes = true; // enforce index uniqueness bf.Tag = "Sample Data"; // optionally provide a tag to store in the file header // // Configure value storage. This is the only difference with using BinSeriesFile. // // When a new instance of BinCompressedSeriesFile is created, // RootField will be pre-populated with default configuration objects. // Some fields, such as doubles, require additional configuration before the file can be initialized. // var root = (ComplexField)bf.RootField; // This double will contain values with no more than 2 digits after the decimal points. // Before serializing, multiply the value by 100 to convert to long. ((ScaledDeltaFloatField)root["Value"].Field).Multiplier = 100; bf.InitializeNewFile(); // Finish new file initialization and create an empty file // // Set up data generator to generate 10 items starting with index 3 // IEnumerable <ArraySegment <ItemLngDbl> > data = Utils.GenerateData(3, 10, i => new ItemLngDbl(i, i / 100.0)); // // Append data to the file // bf.AppendData(data); // // Read all data and print it using Stream() - one value at a time // This method is slower than StreamSegments(), but easier to use for simple one-value iteration // Console.WriteLine(" ** Content of file {0} after the first append", filename); Console.WriteLine("FirstIndex = {0}, LastIndex = {1}", bf.FirstIndex, bf.LastIndex); foreach (ItemLngDbl val in bf.Stream()) { Console.WriteLine(val); } } // Re-open the file, allowing data modifications // IWritableFeed<,> interface is better as it will work with non-compressed files as well using (var bf = (IWritableFeed <long, ItemLngDbl>)BinaryFile.Open(filename, true)) { // Append a few more items with different ItemLngDbl.Value to tell them appart IEnumerable <ArraySegment <ItemLngDbl> > data = Utils.GenerateData(10, 10, i => new ItemLngDbl(i, i / 25.0)); // New data indexes will overlap with existing, so allow truncating old data bf.AppendData(data, true); // Print values Console.WriteLine("\n ** Content of file {0} after the second append", filename); Console.WriteLine("FirstIndex = {0}, LastIndex = {1}", bf.FirstIndex, bf.LastIndex); foreach (ItemLngDbl val in bf.Stream()) { Console.WriteLine(val); } } // Re-open the file for reading only (file can be opened for reading in parallel, but only one write) // IEnumerableFeed<,> interface is better as it will work with non-compressed files as well using (var bf = (IWritableFeed <long, ItemLngDbl>)BinaryFile.Open(filename, true)) { // Show first item with index >= 5 Console.WriteLine( "\nFirst item on or after index 5 is {0}\n", bf.Stream(5, maxItemCount: 1).First()); // Show last item with index < 7 (iterate backwards) Console.WriteLine( "Last item before index 7 is {0}\n", bf.Stream(7, inReverse: true, maxItemCount: 1).First()); // Average of values for indexes >= 4 and < 8 Console.WriteLine( "Average of values for indexes >= 4 and < 8 is {0}\n", bf.Stream(4, 8).Average(i => i.Value)); // Sum of the first 3 values with index less than 18 and going backwards Console.WriteLine( "Sum of the first 3 values with index less than 18 and going backwards is {0}\n", bf.Stream(18, maxItemCount: 3, inReverse: true).Sum(i => i.Value)); } // cleanup File.Delete(filename); }
static void Main(string[] args) { string sourcedirectory = string.Empty; string dbdirectory = Directory.GetCurrentDirectory(); string fileextension = "asc"; for (int i = 0; i < args.Length; i++) { switch (args[i]) { case "-s": sourcedirectory = args[i + 1]; break; case "-o": dbdirectory = args[i + 1]; break; } } if (string.IsNullOrEmpty(sourcedirectory) || string.IsNullOrEmpty(dbdirectory)) { Console.WriteLine("USAGE: TimeSeriesDB.exe -s sourcedirectory [-o output directory]"); return; } Console.WriteLine(DateTime.Now.ToString("yyyy-MM-dd HH-mm-ss.fff")); SortedList <string, List <string> > FilesSortedByMarket = new SortedList <string, List <string> >(); foreach (string file in System.IO.Directory.GetFiles(sourcedirectory, "*." + fileextension, SearchOption.AllDirectories)) { string fname = Path.GetFileNameWithoutExtension(file); string[] fnameelems = fname.Split(new char[] { '_' }); string market = fnameelems[0]; if (FilesSortedByMarket.ContainsKey(market)) { FilesSortedByMarket[market].Add(file); } else { FilesSortedByMarket.Add(market, new List <string>() { file }); } } // get quote files by market and process to build // stream of best bid-offer foreach (string markets in FilesSortedByMarket.Keys) { #region Process Ticker Files if (NameToMarketId.ContainsKey(markets)) { if (MarketName != markets) { MarketName = markets; MarketId = NameToMarketId[MarketName]; Console.WriteLine("Processing {0}", MarketName); Console.WriteLine("\tQuotes"); } List <string> files = FilesSortedByMarket[MarketName]; // loop over all quotes files for the market and insert each quote into sortedquotedata #region Process Ticker Quote Files Parallel.ForEach(files.Where(x => x.Contains("_Q")), new ParallelOptions() { MaxDegreeOfParallelism = Environment.ProcessorCount * 2 }, quotedatafile => { Console.WriteLine("\t\t{0}", quotedatafile); // list that contains sorted time series of trades and quotes for all markets var sortedalltimeseries = CreateList(new { TimeSeriesRecord = (TSRecord)null }); string tradedatafile = quotedatafile.Replace("X_Q", "X_T"); if (!File.Exists(tradedatafile)) { Console.WriteLine("\t\t\tTrade data file does not exist: {0}", tradedatafile); return; } Dictionary <DateTime, uint> markettodt = new Dictionary <DateTime, uint>(); var sortedquotedata = CreateList(new { Key = (TSDateTime)null, Value = (QuoteData)null }); // new[] { new { Key = (TSDateTime)null, Value = (QuoteData)null } }.ToList(); using (StreamReader sr = new StreamReader(quotedatafile)) { String line = null; while ((line = sr.ReadLine()) != null) { QuoteData qd = new QuoteData(line); if (qd.Dt.TimeOfDay >= WhenTimeIsBefore && qd.Dt.TimeOfDay <= WhenTimeIsAfter && qd.Bid != 0.0 && qd.Ask != 0.0 && qd.BidSz != 0 && qd.AskSz != 0 && qd.Bid <= qd.Ask && ExchangeInclusionList.Contains(qd.Exch)) { uint seqno = 1; if (markettodt.ContainsKey(qd.Dt)) { seqno = markettodt[qd.Dt]; markettodt[qd.Dt] = seqno + 1; } else { markettodt[qd.Dt] = 1; } TSDateTime tsdt = new TSDateTime(qd.Dt, MarketId, seqno); //GetSeqNo(qd.Dt, MarketId)); sortedquotedata.Add(new { Key = new TSDateTime(qd.Dt, MarketId, seqno), Value = qd }); } } } // List<QuoteData> quotedata = ParseQuoteData(item, sortedquotedata); Console.WriteLine("\t\t\tSorting quotes for {0}", quotedatafile); sortedquotedata.Sort((p1, p2) => p1.Key.Timestamp.CompareTo(p2.Key.Timestamp)); markettodt.Clear(); #region Build Inside Quotes For Ticker // we have sorted quotes for a market. now build stream of best bid-offer var sortedbiddata = CreateList(new { Exch = string.Empty, Price = 0.0, Size = (uint)0 }); var sortedaskdata = CreateList(new { Exch = string.Empty, Price = 0.0, Size = (uint)0 }); var sortedquotes = CreateList(new { Dt = (ulong)0, Exch = string.Empty, Bid = 0.0, BidSz = (uint)0, Ask = 0.0, AskSz = (uint)0 }); string prevbidexch = string.Empty; string prevaskexch = string.Empty; double prevbidprice = 0.0; double prevaskprice = 0.0; uint prevbidsize = 0; uint prevasksize = 0; DateTime prevdt = DateTime.Now; // walk sortedquotedata to compute inside market // insert inside market records into sortedquotes Console.WriteLine("\t\t\tBuilding inside market for {0}", quotedatafile); foreach (var qd in sortedquotedata) { bool newbiddata = true; bool newaskdata = true; var mqqs = sortedbiddata.FirstOrDefault(x => x.Exch == qd.Value.BidExch); if (mqqs == null) { sortedbiddata.Add(new { Exch = qd.Value.BidExch, Price = qd.Value.Bid, Size = qd.Value.BidSz }); } else if (mqqs != null && (mqqs.Price != qd.Value.Bid || mqqs.Size != qd.Value.BidSz)) { sortedbiddata.Remove(mqqs); sortedbiddata.Add(new { Exch = qd.Value.BidExch, Price = qd.Value.Bid, Size = qd.Value.BidSz }); } else { newbiddata = false; } mqqs = sortedaskdata.FirstOrDefault(x => x.Exch == qd.Value.AskExch); if (mqqs == null) { sortedaskdata.Add(new { Exch = qd.Value.AskExch, Price = qd.Value.Ask, Size = qd.Value.AskSz }); } else if (mqqs != null && (mqqs.Price != qd.Value.Ask || mqqs.Size != qd.Value.AskSz)) { sortedaskdata.Remove(mqqs); sortedaskdata.Add(new { Exch = qd.Value.AskExch, Price = qd.Value.Ask, Size = qd.Value.AskSz }); } else { newaskdata = false; } if (newbiddata) { sortedbiddata = sortedbiddata.OrderByDescending(x => x.Price).ThenByDescending(y => y.Size).ToList(); } if (newaskdata) { sortedaskdata = sortedaskdata.OrderBy(x => x.Price).ThenByDescending(y => y.Size).ToList(); } if ( ((prevbidprice != sortedbiddata[0].Price) || (prevbidsize != sortedbiddata[0].Size) || (prevaskprice != sortedaskdata[0].Price) || (prevasksize != sortedaskdata[0].Size) // || (prevbidexch != sortedbiddata[0].Exch) || (prevaskexch != sortedaskdata[0].Exch) ) && (prevdt != qd.Value.Dt) ) { sortedquotes.Add(new { Dt = (ulong)new TSDateTime(qd.Key.Dt, qd.Key.MarketId, 0).Timestamp, Exch = sortedbiddata[0].Exch, Bid = (double)sortedbiddata[0].Price, BidSz = (uint)sortedbiddata[0].Size, Ask = (double)sortedaskdata[0].Price, AskSz = (uint)sortedaskdata[0].Size }); // Console.WriteLine(string.Format("{0} {1}:{2}:{3} {4}:{5}:{6}", qd.Value.Dt.Ticks, sortedbiddata[0].Exch, sortedbiddata[0].Price, sortedbiddata[0].Size, sortedaskdata[0].Exch, sortedaskdata[0].Price, sortedaskdata[0].Size)); prevbidexch = sortedbiddata[0].Exch; prevbidprice = sortedbiddata[0].Price; prevbidsize = sortedbiddata[0].Size; prevaskexch = sortedaskdata[0].Exch; prevaskprice = sortedaskdata[0].Price; prevasksize = sortedaskdata[0].Size; prevdt = qd.Value.Dt; } } Console.WriteLine("\t\t\tSorting inside market for {0}", quotedatafile); sortedquotes.Sort((p1, p2) => p1.Dt.CompareTo(p2.Dt)); sortedbiddata.Clear(); sortedaskdata.Clear(); sortedquotedata.Clear(); #endregion #region Process Ticker Trades Files var sortedtrades = CreateList(new { Key = (TSDateTime)null, Value = (TradeData)null }); if (sortedquotes.Count > 0) { using (StreamReader sr = new StreamReader(tradedatafile)) { String line = null; while ((line = sr.ReadLine()) != null) { try { TradeData td = new TradeData(line); uint seqno = 1; if (markettodt.ContainsKey(td.Dt)) { seqno = markettodt[td.Dt]; markettodt[td.Dt] = seqno + 1; } else { markettodt[td.Dt] = 1; } if (td.Dt.TimeOfDay >= WhenTimeIsBefore && td.Dt.TimeOfDay <= WhenTimeIsAfter && td.Price > 0.0 && td.Volume > 0 && ExchangeInclusionList.Contains(td.Exch)) { sortedtrades.Add(new { Key = new TSDateTime(td.Dt, TimeSeriesDB.MarketId, seqno), Value = td }); } } catch { } } } // need to process all trades files and create a sorted list of trades Console.WriteLine("\t\t{0}", tradedatafile); } #endregion Console.WriteLine("\tSorting trades"); sortedtrades.Sort((p1, p2) => p1.Key.Timestamp.CompareTo(p2.Key.Timestamp)); #region Build Interleaved Timeseries var sortedtimeseries = CreateList(new { TimeSeriesRecord = (TSRecord)null }); ulong timestamp_msecs = 0; ulong timestamp_quote = 0; // loop over sortedtrades and create time series record Console.WriteLine("\tBuilding interleaved timeseries"); foreach (var x in sortedtrades) { if (10 * ((ulong)x.Key.Dt.Ticks) > timestamp_msecs) { // find nearest quote by timestamp int index = BinarySearchForMatch(sortedquotes, (y) => { return(y.Dt.CompareTo(x.Key.Timestamp)); }); int idx = index == 0 ? 0 : index - 1; var quote = sortedquotes[idx]; timestamp_quote = quote.Dt; timestamp_msecs = 10 * ((ulong)x.Key.Dt.Ticks); } sortedtimeseries.Add(new { TimeSeriesRecord = new TSRecord(x.Key.Timestamp, x.Value.Exch, x.Value.Price, x.Value.Volume) { QuoteIdx = timestamp_quote } }); } sortedtrades.Clear(); foreach (var x in sortedquotes) { sortedtimeseries.Add(new { TimeSeriesRecord = new TSRecord(x.Dt, x.Exch, x.Bid, x.BidSz, x.Ask, x.AskSz) }); } sortedquotes.Clear(); Console.WriteLine("\tSorting timeseries"); sortedtimeseries.Sort((p1, p2) => p1.TimeSeriesRecord.Idx.CompareTo(p2.TimeSeriesRecord.Idx)); #endregion Console.WriteLine("\tAdding {0} timeseries to master timeseries", TimeSeriesDB.MarketName); sortedalltimeseries.AddRange(sortedtimeseries); Console.WriteLine("\tSorting master timeseries"); sortedalltimeseries.Sort((p1, p2) => p1.TimeSeriesRecord.Idx.CompareTo(p2.TimeSeriesRecord.Idx)); #region Write To Timeseries DB if (sortedalltimeseries.Count > 0) { string filename = string.Format(@"{0}\{1}", dbdirectory, Path.GetFileName(quotedatafile).Replace("_Q", "_TSDB").Replace(fileextension, "dts")); //new TSDateTime(sortedalltimeseries[0].TimeSeriesRecord.Idx).Dt.ToString("yyyyMMddHHmmssfff"), new TSDateTime(sortedalltimeseries[sortedalltimeseries.Count - 1].TimeSeriesRecord.Idx).Dt.ToString("yyyyMMddHHmmssfff")); if (File.Exists(filename)) { File.Delete(filename); } using (var file = new BinCompressedSeriesFile <ulong, TSRecord>(filename)) { var root = (ComplexField)file.RootField; ((ScaledDeltaFloatField)root["Bid"].Field).Multiplier = 1000; ((ScaledDeltaFloatField)root["Ask"].Field).Multiplier = 1000; file.UniqueIndexes = false; // enforces index uniqueness file.InitializeNewFile(); // create file and write header List <TSRecord> tsrlist = new List <TSRecord>(); foreach (var tsr in sortedalltimeseries) { tsrlist.Add(tsr.TimeSeriesRecord); } ArraySegment <TSRecord> arr = new ArraySegment <TSRecord>(tsrlist.ToArray()); file.AppendData(new ArraySegment <TSRecord>[] { arr }); } } #endregion }); #endregion } #endregion // merge files together #region merge per day per tick ts files into a single per tick file string mergedfilename = string.Format(@"{0}\{1}", dbdirectory, markets + ".dts"); //new TSDateTime(sortedalltimeseries[0].TimeSeriesRecord.Idx).Dt.ToString("yyyyMMddHHmmssfff"), new TSDateTime(sortedalltimeseries[sortedalltimeseries.Count - 1].TimeSeriesRecord.Idx).Dt.ToString("yyyyMMddHHmmssfff")); if (File.Exists(mergedfilename)) { File.Delete(mergedfilename); } List <string> filestobemerged = Directory.GetFiles(dbdirectory, markets + "*.dts").ToList(); do { var pairs = filestobemerged.Where((x, i) => i % 2 == 0).Zip(filestobemerged.Where((x, i) => i % 2 == 1), (second, first) => new[] { first, second }).ToList(); Parallel.ForEach(pairs, new ParallelOptions() { MaxDegreeOfParallelism = Environment.ProcessorCount * 2 }, pair => { string mergedfile = Path.GetTempFileName(); File.Copy(mergedfile, mergedfile = dbdirectory + "\\" + Path.GetFileName(mergedfile)); File.Delete(mergedfile); string mergefile1 = pair[0]; string mergefile2 = pair[1]; List <TSDBEnumerator> sortedtsdbenumerators = new List <TSDBEnumerator>(); List <TSDBEnumerator> TSDBEnumerators = pair.Select(x => new TSDBEnumerator(new TSStreamer(x), new TSDateTime(DateTime.MinValue, 0, 0).Timestamp, new TSDateTime(DateTime.MaxValue, 999, 99).Timestamp)).ToList(); //.TimeSeriesDB.Stream(new TSDateTime(DateTime.MinValue, 0, 0).Timestamp, new TSDateTime(DateTime.MaxValue, 999, 99).Timestamp).GetEnumerator())).ToList(); using (var file = new BinCompressedSeriesFile <ulong, TSRecord>(mergedfile)) { List <TSRecord> tsrlist = new List <TSRecord>(); var root = (ComplexField)file.RootField; ((ScaledDeltaFloatField)root["Bid"].Field).Multiplier = 1000; ((ScaledDeltaFloatField)root["Ask"].Field).Multiplier = 1000; file.UniqueIndexes = false; // enforces index uniqueness file.InitializeNewFile(); // create file and write header sortedtsdbenumerators.AddRange(TSDBEnumerators.Select(x => x)); do { foreach (var tsdbenumerator in TSDBEnumerators.Where(x => x.GetNext == true && x.TSEnumerator != null)) { tsdbenumerator.GetNext = false; if (true == tsdbenumerator.TSEnumerator.MoveNext()) { tsdbenumerator.TSRecord = tsdbenumerator.TSEnumerator.Current; } else { tsdbenumerator.Dispose(); tsdbenumerator.TSEnumerator = null; tsdbenumerator.TSRecord = null; sortedtsdbenumerators.Remove(tsdbenumerator); } } if (sortedtsdbenumerators.Count > 0) { sortedtsdbenumerators.Sort((item1, item2) => item1.TSRecord.Idx.CompareTo(item2.TSRecord.Idx)); sortedtsdbenumerators[0].GetNext = true; tsrlist.Add(sortedtsdbenumerators[0].TSRecord); } if (tsrlist.Count == 10000000) { ArraySegment <TSRecord> arr = new ArraySegment <TSRecord>(tsrlist.ToArray()); file.AppendData(new ArraySegment <TSRecord>[] { arr }); tsrlist.Clear(); } } while (sortedtsdbenumerators.Count > 0); if (tsrlist.Count > 0) { ArraySegment <TSRecord> arr = new ArraySegment <TSRecord>(tsrlist.ToArray()); file.AppendData(new ArraySegment <TSRecord>[] { arr }); } } filestobemerged.Remove(mergefile1); if (mergefile1.Contains(".tmp")) { File.Delete(mergefile1); } filestobemerged.Remove(mergefile2); if (mergefile2.Contains(".tmp")) { File.Delete(mergefile2); } filestobemerged.Add(mergedfile); }); } while (filestobemerged.Count > 1); if (filestobemerged.Count == 1) { File.Move(filestobemerged[0], dbdirectory + "\\" + markets + ".dts"); } #endregion } Console.WriteLine(DateTime.Now.ToString("yyyy-MM-dd HH-mm-ss.fff")); }