/// <summary> /// Give the reference date and source directory, convert the algoseek data into n-resolutions LEAN format. /// </summary> public void Convert() { //Get the list of all the files, then for each file open a separate streamer. var files = Directory.EnumerateFiles(_remote, _remoteMask); files = files.Where(x => Path.GetFileNameWithoutExtension(x).ToLower().IndexOf("option") == -1); Log.Trace("AlgoSeekFuturesConverter.Convert(): Loading {0} AlgoSeekFuturesReader for {1} ", files.Count(), _referenceDate); //Initialize parameters var totalLinesProcessed = 0L; var totalFiles = files.Count(); var totalFilesProcessed = 0; var start = DateTime.MinValue; var zipper = OS.IsWindows ? "C:/Program Files/7-Zip/7z.exe" : "7z"; var random = new Random((int)DateTime.Now.Ticks); var symbolMultipliers = LoadSymbolMultipliers(); //Extract each file massively in parallel. Parallel.ForEach(files, parallelOptions, file => { try { Log.Trace("Remote File :" + file); var csvFile = Path.Combine(_source, Path.GetFileName(file).Replace(Path.GetExtension(file), "")); Log.Trace("Source File :" + csvFile); if (!File.Exists(csvFile)) { Log.Trace("AlgoSeekFuturesConverter.Convert(): Extracting " + file); var psi = new ProcessStartInfo(zipper, " e " + file + " -o" + _source) { CreateNoWindow = true, WindowStyle = ProcessWindowStyle.Hidden, UseShellExecute = false, RedirectStandardOutput = true }; var process = new Process(); process.StartInfo = psi; process.Start(); while (!process.StandardOutput.EndOfStream) { process.StandardOutput.ReadLine(); } if (!process.WaitForExit(execTimeout * 1000)) { Log.Error("7Zip timed out: " + file); } else { if (process.ExitCode > 0) { Log.Error("7Zip Exited Unsuccessfully: " + file); } } } // setting up local processors var processors = new Processors(); // symbol filters // var symbolFilterNames = new string[] { "AAPL", "TWX", "NWSA", "FOXA", "AIG", "EGLE", "EGEC" }; // var symbolFilter = symbolFilterNames.SelectMany(name => new[] { name, name + "1", name + ".1" }).ToHashSet(); // var reader = new AlgoSeekFuturesReader(csvFile, symbolFilter); var reader = new AlgoSeekFuturesReader(csvFile, symbolMultipliers); if (start == DateTime.MinValue) { start = DateTime.Now; } if (reader.Current != null) // reader contains the data { do { var tick = reader.Current as Tick; //Add or create the consolidator-flush mechanism for symbol: List <List <AlgoSeekFuturesProcessor> > symbolProcessors; if (!processors.TryGetValue(tick.Symbol, out symbolProcessors)) { symbolProcessors = new List <List <AlgoSeekFuturesProcessor> >(3) { { _resolutions.Select(x => new AlgoSeekFuturesProcessor(tick.Symbol, _referenceDate, TickType.Trade, x, _destination)).ToList() }, { _resolutions.Select(x => new AlgoSeekFuturesProcessor(tick.Symbol, _referenceDate, TickType.Quote, x, _destination)).ToList() }, { _resolutions.Select(x => new AlgoSeekFuturesProcessor(tick.Symbol, _referenceDate, TickType.OpenInterest, x, _destination)).ToList() } }; processors[tick.Symbol] = symbolProcessors; } // Pass current tick into processor: enum 0 = trade; 1 = quote, 2 = oi foreach (var processor in symbolProcessors[(int)tick.TickType]) { processor.Process(tick); } if (Interlocked.Increment(ref totalLinesProcessed) % 1000000m == 0) { var pro = (double)processors.Values.SelectMany(p => p.SelectMany(x => x)).Count(); var symbols = (double)processors.Keys.Count(); Log.Trace("AlgoSeekFuturesConverter.Convert(): Processed {0,3}M ticks( {1}k / sec); Memory in use: {2} MB; Total progress: {3}%, Processor per symbol {4}", Math.Round(totalLinesProcessed / 1000000m, 2), Math.Round(totalLinesProcessed / 1000L / (DateTime.Now - start).TotalSeconds), Process.GetCurrentProcess().WorkingSet64 / (1024 * 1024), 100 * totalFilesProcessed / totalFiles, pro / symbols); } }while (reader.MoveNext()); Log.Trace("AlgoSeekFuturesConverter.Convert(): Performing final flush to disk... "); Flush(processors, DateTime.MaxValue, true); } processors = null; GC.Collect(); GC.WaitForPendingFinalizers(); Log.Trace("AlgoSeekFuturesConverter.Convert(): Finished processing file: " + file); Interlocked.Increment(ref totalFilesProcessed); } catch (Exception err) { Log.Error("Exception caught! File: {0} Err: {1} Source {2} Stack {3}", file, err.Message, err.Source, err.StackTrace); } }); }
/// <summary> /// Give the reference date and source directory, convert the algoseek data into n-resolutions LEAN format. /// </summary> public void Convert() { Log.Trace("AlgoSeekFuturesConverter.Convert(): Copying remote raw data files locally."); //Get the list of available raw files, copy from its remote location to a local folder and then for each file open a separate streamer. var files = GetFilesInRawFolder() .Where(f => (f.Extension == ".gz" || f.Extension == ".bz2") && !f.Name.Contains("option")) .Select(remote => remote.CopyTo(Path.Combine(Path.GetTempPath(), remote.Name), true)) .ToList(); Log.Trace("AlgoSeekFuturesConverter.Convert(): Loading {0} AlgoSeekFuturesReader for {1} ", files.Count(), _referenceDate); //Initialize parameters var totalLinesProcessed = 0L; var totalFiles = files.Count(); var totalFilesProcessed = 0; var start = DateTime.MinValue; var symbolMultipliers = LoadSymbolMultipliers(); //Extract each file massively in parallel. Parallel.ForEach(files, file => { try { Log.Trace("Remote File :" + file); var csvFile = Path.Combine(_source.FullName, Path.GetFileNameWithoutExtension(file.Name)); Log.Trace("Source File :" + csvFile); if (!File.Exists(csvFile)) { // create the directory first or else 7z will fail var csvFileInfo = new FileInfo(csvFile); Directory.CreateDirectory(csvFileInfo.DirectoryName); Log.Trace("AlgoSeekFuturesConverter.Convert(): Extracting " + file); // Never time out extracting an archive; they can be pretty big // and take a while to extract depending on the computer running this application Compression.Extract7ZipArchive(file.FullName, _source.FullName, -1); } // setting up local processors var processors = new Processors(); var reader = new AlgoSeekFuturesReader(csvFile, symbolMultipliers, _symbolFilter); if (start == DateTime.MinValue) { start = DateTime.Now; } if (reader.Current != null) // reader contains the data { do { var tick = reader.Current as Tick; //Add or create the consolidator-flush mechanism for symbol: List <List <AlgoSeekFuturesProcessor> > symbolProcessors; if (!processors.TryGetValue(tick.Symbol, out symbolProcessors)) { symbolProcessors = new List <List <AlgoSeekFuturesProcessor> >(3) { { _resolutions.Select(x => new AlgoSeekFuturesProcessor(tick.Symbol, _referenceDate, TickType.Trade, x, _destination)).ToList() }, { _resolutions.Select(x => new AlgoSeekFuturesProcessor(tick.Symbol, _referenceDate, TickType.Quote, x, _destination)).ToList() }, { _resolutions.Select(x => new AlgoSeekFuturesProcessor(tick.Symbol, _referenceDate, TickType.OpenInterest, x, _destination)).ToList() } }; processors[tick.Symbol] = symbolProcessors; } // Pass current tick into processor: enum 0 = trade; 1 = quote, 2 = oi foreach (var processor in symbolProcessors[(int)tick.TickType]) { processor.Process(tick); } if (Interlocked.Increment(ref totalLinesProcessed) % 1000000m == 0) { var pro = (double)processors.Values.SelectMany(p => p.SelectMany(x => x)).Count(); var symbols = (double)processors.Keys.Count(); Log.Trace("AlgoSeekFuturesConverter.Convert(): Processed {0,3}M ticks( {1}k / sec); Memory in use: {2} MB; Total progress: {3}%, Processor per symbol {4}", Math.Round(totalLinesProcessed / 1000000m, 2), Math.Round(totalLinesProcessed / 1000L / (DateTime.Now - start).TotalSeconds), Process.GetCurrentProcess().WorkingSet64 / (1024 * 1024), 100 * totalFilesProcessed / totalFiles, pro / symbols); } }while (reader.MoveNext()); Log.Trace("AlgoSeekFuturesConverter.Convert(): Performing final flush to disk... "); Flush(processors, DateTime.MaxValue, true); } processors = null; GC.Collect(); GC.WaitForPendingFinalizers(); Log.Trace("AlgoSeekFuturesConverter.Convert(): Finished processing file: " + file); Interlocked.Increment(ref totalFilesProcessed); } catch (Exception err) { Log.Error("Exception caught! File: {0} Err: {1} Source {2} Stack {3}", file, err.Message, err.Source, err.StackTrace); } }); }