Beispiel #1
0
        /// <summary>
        /// Give the reference date and source directory, convert the algoseek options data into n-resolutions LEAN format.
        /// </summary>
        /// <param name="symbolFilter">HashSet of symbols as string to process. *Only used for testing*</param>
        public void Convert(HashSet <string> symbolFilter = null)
        {
            //Get the list of all the files, then for each file open a separate streamer.
            var compressedRawDatafiles = Directory.EnumerateFiles(_remote, _remoteMask).Select(f => new FileInfo(f)).ToList();
            var rawDatafiles           = new List <FileInfo>();

            Log.Trace("AlgoSeekOptionsConverter.Convert(): Loading {0} AlgoSeekOptionsReader for {1} ", compressedRawDatafiles.Count, _referenceDate);

            //Initialize parameters
            var totalLinesProcessed = 0L;
            var totalFiles          = compressedRawDatafiles.Count;
            var totalFilesProcessed = 0;
            var start = DateTime.MinValue;

            foreach (var compressedRawDatafile in compressedRawDatafiles)
            {
                var counter              = 1;
                var timer                = DateTime.UtcNow;
                var rawDataFile          = new FileInfo(Path.Combine(_source, compressedRawDatafile.Name.Replace(".bz2", "")));
                var decompressSuccessful = false;
                do
                {
                    var attempt = counter == 1 ? string.Empty : $" attempt {counter} of 3";
                    Log.Trace($"AlgoSeekOptionsConverter.Convert(): Extracting {compressedRawDatafile.Name}{attempt}.");
                    decompressSuccessful = DecompressOpraFile(compressedRawDatafile, rawDataFile);
                    counter++;
                } while (!decompressSuccessful && counter <= 3);

                if (!decompressSuccessful)
                {
                    Log.Error($"Error decompressing {compressedRawDatafile}. Process Stop.");
                    throw new NotImplementedException();
                }

                Log.Trace($"AlgoSeekOptionsConverter.Convert(): Extraction successful in {DateTime.UtcNow - timer:g}.");
                rawDatafiles.Add(rawDataFile);
            }

            //Process each file massively in parallel.
            Parallel.ForEach(rawDatafiles, parallelOptionsProcessing, rawDataFile =>
            {
                Log.Trace("Source File :" + rawDataFile.Name);

                // setting up local processors and the flush event
                var processors   = new Processors();
                var waitForFlush = new ManualResetEvent(true);

                // symbol filters
                // var symbolFilterNames = new string[] { "AAPL", "TWX", "NWSA", "FOXA", "AIG", "EGLE", "EGEC" };
                // var symbolFilter = symbolFilterNames.SelectMany(name => new[] { name, name + "1", name + ".1" }).ToHashSet();
                // var reader = new AlgoSeekOptionsReader(csvFile, _referenceDate, symbolFilter);

                using (var reader = new AlgoSeekOptionsReader(rawDataFile.FullName, _referenceDate, symbolFilter))
                {
                    if (start == DateTime.MinValue)
                    {
                        start = DateTime.Now;
                    }
                    var flushStep = TimeSpan.FromMinutes(10);
                    if (reader.Current != null) // reader contains the data
                    {
                        var previousFlush = reader.Current.Time.RoundDown(flushStep);
                        do
                        {
                            var tick = reader.Current as Tick;
                            //If the next minute has clocked over; flush the consolidators; serialize and store data to disk.
                            if (tick.Time.RoundDown(flushStep) > previousFlush)
                            {
                                previousFlush = WriteToDisk(processors, waitForFlush, tick.Time, flushStep);
                                processors    = new Processors();
                            }
                            //Add or create the consolidator-flush mechanism for symbol:
                            List <AlgoSeekOptionsProcessor> symbolProcessors;
                            if (!processors.TryGetValue(tick.Symbol, out symbolProcessors))
                            {
                                symbolProcessors = new List <AlgoSeekOptionsProcessor>(3)
                                {
                                    new AlgoSeekOptionsProcessor(tick.Symbol, _referenceDate, TickType.Trade, _resolution, _destination),
                                    new AlgoSeekOptionsProcessor(tick.Symbol, _referenceDate, TickType.Quote, _resolution, _destination),
                                    new AlgoSeekOptionsProcessor(tick.Symbol, _referenceDate, TickType.OpenInterest, _resolution, _destination)
                                };
                                processors[tick.Symbol] = symbolProcessors;
                            }
                            // Pass current tick into processor: enum 0 = trade; 1 = quote, , 2 = oi
                            symbolProcessors[(int)tick.TickType].Process(tick);
                            if (Interlocked.Increment(ref totalLinesProcessed) % 1000000m == 0)
                            {
                                Log.Trace(
                                    "AlgoSeekOptionsConverter.Convert(): Processed {0,3}M ticks( {1}k / sec); Memory in use: {2} MB; Total progress: {3}%",
                                    Math.Round(totalLinesProcessed / 1000000m, 2),
                                    Math.Round(totalLinesProcessed / 1000L / (DateTime.Now - start).TotalSeconds),
                                    Process.GetCurrentProcess().WorkingSet64 / (1024 * 1024),
                                    100 * totalFilesProcessed / totalFiles);
                            }
                        } while (reader.MoveNext());
                        Log.Trace("AlgoSeekOptionsConverter.Convert(): Performing final flush to disk... ");
                        Flush(processors, DateTime.MaxValue, true);
                        WriteToDisk(processors, waitForFlush, DateTime.MaxValue, flushStep, true);
                    }
                    Log.Trace("AlgoSeekOptionsConverter.Convert(): Cleaning up extracted options file {0}", rawDataFile.FullName);
                }
                rawDataFile.Delete();
                processors = null;
                Log.Trace("AlgoSeekOptionsConverter.Convert(): Finished processing file: " + rawDataFile);
                Interlocked.Increment(ref totalFilesProcessed);
            });
        }
Beispiel #2
0
        /// <summary>
        /// Give the reference date and source directory, convert the algoseek options data into n-resolutions LEAN format.
        /// </summary>
        public void Convert()
        {
            //Get the list of all the files, then for each file open a separate streamer.
            var files = Directory.EnumerateFiles(_remote, "*.bz2");

            Log.Trace("AlgoSeekOptionsConverter.Convert(): Loading {0} AlgoSeekOptionsReader for {1} ", files.Count(), _referenceDate);

            //Initialize parameters
            var totalLinesProcessed = 0L;
            var totalFiles          = files.Count();
            var totalFilesProcessed = 0;
            var start = DateTime.MinValue;

            var zipper = OS.IsWindows ? "C:/Program Files/7-Zip/7z.exe" : "7z";
            var random = new Random((int)DateTime.Now.Ticks);

            //Extract each file massively in parallel.
            Parallel.ForEach(files, parallelOptionsProcessing, file =>
            {
                try
                {
                    Log.Trace("Remote File :" + file);

                    var csvFile = Path.Combine(_source, Path.GetFileName(file).Replace(".bz2", ""));

                    Log.Trace("Source File :" + csvFile);

                    if (!File.Exists(csvFile))
                    {
                        Log.Trace("AlgoSeekOptionsConverter.Convert(): Extracting " + file);

                        var cmdArgs = " e " + file + " -o" + _source;
                        RunZipper(zipper, cmdArgs);
                    }

                    // setting up local processors and the flush event
                    var processors   = new Processors();
                    var waitForFlush = new ManualResetEvent(true);

                    // symbol filters
                    // var symbolFilterNames = new string[] { "AAPL", "TWX", "NWSA", "FOXA", "AIG", "EGLE", "EGEC" };
                    // var symbolFilter = symbolFilterNames.SelectMany(name => new[] { name, name + "1", name + ".1" }).ToHashSet();
                    // var reader = new AlgoSeekOptionsReader(csvFile, _referenceDate, symbolFilter);

                    var reader = new AlgoSeekOptionsReader(csvFile, _referenceDate);
                    if (start == DateTime.MinValue)
                    {
                        start = DateTime.Now;
                    }

                    var flushStep = TimeSpan.FromMinutes(15 + random.NextDouble() * 5);

                    if (reader.Current != null) // reader contains the data
                    {
                        var previousFlush = reader.Current.Time.RoundDown(flushStep);

                        do
                        {
                            var tick = reader.Current as Tick;

                            //If the next minute has clocked over; flush the consolidators; serialize and store data to disk.
                            if (tick.Time.RoundDown(flushStep) > previousFlush)
                            {
                                previousFlush = WriteToDisk(processors, waitForFlush, tick.Time, flushStep);
                                processors    = new Processors();
                            }

                            //Add or create the consolidator-flush mechanism for symbol:
                            List <AlgoSeekOptionsProcessor> symbolProcessors;
                            if (!processors.TryGetValue(tick.Symbol, out symbolProcessors))
                            {
                                symbolProcessors = new List <AlgoSeekOptionsProcessor>(3)
                                {
                                    new AlgoSeekOptionsProcessor(tick.Symbol, _referenceDate, TickType.Trade, _resolution, _destination),
                                    new AlgoSeekOptionsProcessor(tick.Symbol, _referenceDate, TickType.Quote, _resolution, _destination),
                                    new AlgoSeekOptionsProcessor(tick.Symbol, _referenceDate, TickType.OpenInterest, _resolution, _destination)
                                };

                                processors[tick.Symbol] = symbolProcessors;
                            }

                            // Pass current tick into processor: enum 0 = trade; 1 = quote, , 2 = oi
                            symbolProcessors[(int)tick.TickType].Process(tick);

                            if (Interlocked.Increment(ref totalLinesProcessed) % 1000000m == 0)
                            {
                                Log.Trace("AlgoSeekOptionsConverter.Convert(): Processed {0,3}M ticks( {1}k / sec); Memory in use: {2} MB; Total progress: {3}%", Math.Round(totalLinesProcessed / 1000000m, 2), Math.Round(totalLinesProcessed / 1000L / (DateTime.Now - start).TotalSeconds), Process.GetCurrentProcess().WorkingSet64 / (1024 * 1024), 100 * totalFilesProcessed / totalFiles);
                            }
                        }while (reader.MoveNext());

                        Log.Trace("AlgoSeekOptionsConverter.Convert(): Performing final flush to disk... ");
                        Flush(processors, DateTime.MaxValue, true);
                        WriteToDisk(processors, waitForFlush, DateTime.MaxValue, flushStep, true);
                    }

                    processors = null;

                    Log.Trace("AlgoSeekOptionsConverter.Convert(): Finished processing file: " + file);
                    Interlocked.Increment(ref totalFilesProcessed);
                }
                catch (Exception err)
                {
                    Log.Error("Exception caught! File: {0} Err: {1} Source {2} Stack {3}", file, err.Message, err.Source, err.StackTrace);
                }
            });
        }