/// <summary> /// Give the reference date and source directory, convert the algoseek options data into n-resolutions LEAN format. /// </summary> /// <param name="symbolFilter">HashSet of symbols as string to process. *Only used for testing*</param> public void Convert(HashSet <string> symbolFilter = null) { //Get the list of all the files, then for each file open a separate streamer. var compressedRawDatafiles = Directory.EnumerateFiles(_remote, _remoteMask).Select(f => new FileInfo(f)).ToList(); var rawDatafiles = new List <FileInfo>(); Log.Trace("AlgoSeekOptionsConverter.Convert(): Loading {0} AlgoSeekOptionsReader for {1} ", compressedRawDatafiles.Count, _referenceDate); //Initialize parameters var totalLinesProcessed = 0L; var totalFiles = compressedRawDatafiles.Count; var totalFilesProcessed = 0; var start = DateTime.MinValue; foreach (var compressedRawDatafile in compressedRawDatafiles) { var counter = 1; var timer = DateTime.UtcNow; var rawDataFile = new FileInfo(Path.Combine(_source, compressedRawDatafile.Name.Replace(".bz2", ""))); var decompressSuccessful = false; do { var attempt = counter == 1 ? string.Empty : $" attempt {counter} of 3"; Log.Trace($"AlgoSeekOptionsConverter.Convert(): Extracting {compressedRawDatafile.Name}{attempt}."); decompressSuccessful = DecompressOpraFile(compressedRawDatafile, rawDataFile); counter++; } while (!decompressSuccessful && counter <= 3); if (!decompressSuccessful) { Log.Error($"Error decompressing {compressedRawDatafile}. Process Stop."); throw new NotImplementedException(); } Log.Trace($"AlgoSeekOptionsConverter.Convert(): Extraction successful in {DateTime.UtcNow - timer:g}."); rawDatafiles.Add(rawDataFile); } //Process each file massively in parallel. Parallel.ForEach(rawDatafiles, parallelOptionsProcessing, rawDataFile => { Log.Trace("Source File :" + rawDataFile.Name); // setting up local processors and the flush event var processors = new Processors(); var waitForFlush = new ManualResetEvent(true); // symbol filters // var symbolFilterNames = new string[] { "AAPL", "TWX", "NWSA", "FOXA", "AIG", "EGLE", "EGEC" }; // var symbolFilter = symbolFilterNames.SelectMany(name => new[] { name, name + "1", name + ".1" }).ToHashSet(); // var reader = new AlgoSeekOptionsReader(csvFile, _referenceDate, symbolFilter); using (var reader = new AlgoSeekOptionsReader(rawDataFile.FullName, _referenceDate, symbolFilter)) { if (start == DateTime.MinValue) { start = DateTime.Now; } var flushStep = TimeSpan.FromMinutes(10); if (reader.Current != null) // reader contains the data { var previousFlush = reader.Current.Time.RoundDown(flushStep); do { var tick = reader.Current as Tick; //If the next minute has clocked over; flush the consolidators; serialize and store data to disk. if (tick.Time.RoundDown(flushStep) > previousFlush) { previousFlush = WriteToDisk(processors, waitForFlush, tick.Time, flushStep); processors = new Processors(); } //Add or create the consolidator-flush mechanism for symbol: List <AlgoSeekOptionsProcessor> symbolProcessors; if (!processors.TryGetValue(tick.Symbol, out symbolProcessors)) { symbolProcessors = new List <AlgoSeekOptionsProcessor>(3) { new AlgoSeekOptionsProcessor(tick.Symbol, _referenceDate, TickType.Trade, _resolution, _destination), new AlgoSeekOptionsProcessor(tick.Symbol, _referenceDate, TickType.Quote, _resolution, _destination), new AlgoSeekOptionsProcessor(tick.Symbol, _referenceDate, TickType.OpenInterest, _resolution, _destination) }; processors[tick.Symbol] = symbolProcessors; } // Pass current tick into processor: enum 0 = trade; 1 = quote, , 2 = oi symbolProcessors[(int)tick.TickType].Process(tick); if (Interlocked.Increment(ref totalLinesProcessed) % 1000000m == 0) { Log.Trace( "AlgoSeekOptionsConverter.Convert(): Processed {0,3}M ticks( {1}k / sec); Memory in use: {2} MB; Total progress: {3}%", Math.Round(totalLinesProcessed / 1000000m, 2), Math.Round(totalLinesProcessed / 1000L / (DateTime.Now - start).TotalSeconds), Process.GetCurrentProcess().WorkingSet64 / (1024 * 1024), 100 * totalFilesProcessed / totalFiles); } } while (reader.MoveNext()); Log.Trace("AlgoSeekOptionsConverter.Convert(): Performing final flush to disk... "); Flush(processors, DateTime.MaxValue, true); WriteToDisk(processors, waitForFlush, DateTime.MaxValue, flushStep, true); } Log.Trace("AlgoSeekOptionsConverter.Convert(): Cleaning up extracted options file {0}", rawDataFile.FullName); } rawDataFile.Delete(); processors = null; Log.Trace("AlgoSeekOptionsConverter.Convert(): Finished processing file: " + rawDataFile); Interlocked.Increment(ref totalFilesProcessed); }); }
/// <summary> /// Give the reference date and source directory, convert the algoseek options data into n-resolutions LEAN format. /// </summary> public void Convert() { //Get the list of all the files, then for each file open a separate streamer. var files = Directory.EnumerateFiles(_remote, "*.bz2"); Log.Trace("AlgoSeekOptionsConverter.Convert(): Loading {0} AlgoSeekOptionsReader for {1} ", files.Count(), _referenceDate); //Initialize parameters var totalLinesProcessed = 0L; var totalFiles = files.Count(); var totalFilesProcessed = 0; var start = DateTime.MinValue; var zipper = OS.IsWindows ? "C:/Program Files/7-Zip/7z.exe" : "7z"; var random = new Random((int)DateTime.Now.Ticks); //Extract each file massively in parallel. Parallel.ForEach(files, parallelOptionsProcessing, file => { try { Log.Trace("Remote File :" + file); var csvFile = Path.Combine(_source, Path.GetFileName(file).Replace(".bz2", "")); Log.Trace("Source File :" + csvFile); if (!File.Exists(csvFile)) { Log.Trace("AlgoSeekOptionsConverter.Convert(): Extracting " + file); var cmdArgs = " e " + file + " -o" + _source; RunZipper(zipper, cmdArgs); } // setting up local processors and the flush event var processors = new Processors(); var waitForFlush = new ManualResetEvent(true); // symbol filters // var symbolFilterNames = new string[] { "AAPL", "TWX", "NWSA", "FOXA", "AIG", "EGLE", "EGEC" }; // var symbolFilter = symbolFilterNames.SelectMany(name => new[] { name, name + "1", name + ".1" }).ToHashSet(); // var reader = new AlgoSeekOptionsReader(csvFile, _referenceDate, symbolFilter); var reader = new AlgoSeekOptionsReader(csvFile, _referenceDate); if (start == DateTime.MinValue) { start = DateTime.Now; } var flushStep = TimeSpan.FromMinutes(15 + random.NextDouble() * 5); if (reader.Current != null) // reader contains the data { var previousFlush = reader.Current.Time.RoundDown(flushStep); do { var tick = reader.Current as Tick; //If the next minute has clocked over; flush the consolidators; serialize and store data to disk. if (tick.Time.RoundDown(flushStep) > previousFlush) { previousFlush = WriteToDisk(processors, waitForFlush, tick.Time, flushStep); processors = new Processors(); } //Add or create the consolidator-flush mechanism for symbol: List <AlgoSeekOptionsProcessor> symbolProcessors; if (!processors.TryGetValue(tick.Symbol, out symbolProcessors)) { symbolProcessors = new List <AlgoSeekOptionsProcessor>(3) { new AlgoSeekOptionsProcessor(tick.Symbol, _referenceDate, TickType.Trade, _resolution, _destination), new AlgoSeekOptionsProcessor(tick.Symbol, _referenceDate, TickType.Quote, _resolution, _destination), new AlgoSeekOptionsProcessor(tick.Symbol, _referenceDate, TickType.OpenInterest, _resolution, _destination) }; processors[tick.Symbol] = symbolProcessors; } // Pass current tick into processor: enum 0 = trade; 1 = quote, , 2 = oi symbolProcessors[(int)tick.TickType].Process(tick); if (Interlocked.Increment(ref totalLinesProcessed) % 1000000m == 0) { Log.Trace("AlgoSeekOptionsConverter.Convert(): Processed {0,3}M ticks( {1}k / sec); Memory in use: {2} MB; Total progress: {3}%", Math.Round(totalLinesProcessed / 1000000m, 2), Math.Round(totalLinesProcessed / 1000L / (DateTime.Now - start).TotalSeconds), Process.GetCurrentProcess().WorkingSet64 / (1024 * 1024), 100 * totalFilesProcessed / totalFiles); } }while (reader.MoveNext()); Log.Trace("AlgoSeekOptionsConverter.Convert(): Performing final flush to disk... "); Flush(processors, DateTime.MaxValue, true); WriteToDisk(processors, waitForFlush, DateTime.MaxValue, flushStep, true); } processors = null; Log.Trace("AlgoSeekOptionsConverter.Convert(): Finished processing file: " + file); Interlocked.Increment(ref totalFilesProcessed); } catch (Exception err) { Log.Error("Exception caught! File: {0} Err: {1} Source {2} Stack {3}", file, err.Message, err.Source, err.StackTrace); } }); }