/// <summary> /// Processes the entry. /// </summary> /// <param name="coinapiDataReader">The coinapi data reader.</param> /// <param name="file">The file.</param> private void ProcessEntry(CoinApiDataReader coinapiDataReader, FileInfo file) { var entryData = coinapiDataReader.GetCoinApiEntryData(file, _processingDate); if (!SupportedMarkets.Contains(entryData.Symbol.ID.Market)) { // only convert data for supported exchanges return; } // materialize the enumerable into a list, since we need to enumerate over it twice var ticks = coinapiDataReader.ProcessCoinApiEntry(entryData, file).OrderBy(t => t.Time).ToList(); var writer = new LeanDataWriter(Resolution.Tick, entryData.Symbol, _destinationFolder.FullName, entryData.TickType); writer.Write(ticks); Log.Trace($"CoinApiDataConverter(): Starting consolidation for {entryData.Symbol.Value} {entryData.TickType}"); var consolidators = new List <TickAggregator>(); if (entryData.TickType == TickType.Trade) { consolidators.AddRange(new[] { new TradeTickAggregator(Resolution.Second), new TradeTickAggregator(Resolution.Minute) }); } else { consolidators.AddRange(new[] { new QuoteTickAggregator(Resolution.Second), new QuoteTickAggregator(Resolution.Minute) }); } foreach (var tick in ticks) { if (tick.Suspicious) { // When CoinAPI loses connectivity to the exchange, they indicate // it in the data by providing a value of `-1` for bid/ask price. // We will keep it in tick data, but will remove it from consolidated data. continue; } foreach (var consolidator in consolidators) { consolidator.Update(tick); } } foreach (var consolidator in consolidators) { writer = new LeanDataWriter(consolidator.Resolution, entryData.Symbol, _destinationFolder.FullName, entryData.TickType); writer.Write(consolidator.Flush()); } }
/// <summary> /// Processes the entry. /// </summary> /// <param name="coinapiDataReader">The coinapi data reader.</param> /// <param name="file">The file.</param> private void ProcessEntry(CoinApiDataReader coinapiDataReader, FileInfo file) { var entryData = coinapiDataReader.GetCoinApiEntryData(file, _processingDate, _market); if (!SupportedMarkets.Contains(entryData.Symbol.ID.Market)) { // only convert data for supported exchanges return; } // materialize the enumerable into a list, since we need to enumerate over it twice var ticks = coinapiDataReader.ProcessCoinApiEntry(entryData, file).ToList(); var writer = new LeanDataWriter(Resolution.Tick, entryData.Symbol, _destinationFolder.FullName, entryData.TickType); writer.Write(ticks); Log.Trace($"CoinApiDataConverter(): Starting consolidation for {entryData.Symbol.Value} {entryData.TickType}"); var consolidators = new List <TickAggregator>(); if (entryData.TickType == TickType.Trade) { consolidators.AddRange(new[] { new TradeTickAggregator(Resolution.Second), new TradeTickAggregator(Resolution.Minute) }); } else { consolidators.AddRange(new[] { new QuoteTickAggregator(Resolution.Second), new QuoteTickAggregator(Resolution.Minute) }); } foreach (var tick in ticks) { foreach (var consolidator in consolidators) { consolidator.Update(tick); } } foreach (var consolidator in consolidators) { writer = new LeanDataWriter(consolidator.Resolution, entryData.Symbol, _destinationFolder.FullName, entryData.TickType); writer.Write(consolidator.Flush()); } }
/// <summary> /// Runs this instance. /// </summary> /// <returns></returns> public bool Run() { var stopwatch = Stopwatch.StartNew(); var symbolMapper = new CoinApiSymbolMapper(); var success = true; // There were cases of files with with an extra suffix, following pattern: // <TickType>-<ID>-<Exchange>_SPOT_<BaseCurrency>_<QuoteCurrency>_<ExtraSuffix>.csv.gz // Those cases should be ignored for SPOT prices. var tradesFolder = new DirectoryInfo( Path.Combine( _rawDataFolder.FullName, "trades", _processingDate.ToStringInvariant(DateFormat.EightCharacter))); var quotesFolder = new DirectoryInfo( Path.Combine( _rawDataFolder.FullName, "quotes", _processingDate.ToStringInvariant(DateFormat.EightCharacter))); // Distinct by tick type and first two parts of the raw file name, separated by '-'. // This prevents us from double processing the same ticker twice, in case we're given // two raw data files for the same symbol. Related: https://github.com/QuantConnect/Lean/pull/3262 var apiDataReader = new CoinApiDataReader(symbolMapper); var fileToProcess = tradesFolder.EnumerateFiles("*.gz") .Concat(quotesFolder.EnumerateFiles("*.gz")) .Where(f => f.Name.Contains("SPOT")) .Where(f => f.Name.Split('_').Length == 4) .DistinctBy(x => x.Directory.Parent.Name + apiDataReader.GetCoinApiEntryData(x, _processingDate).Symbol.ID); Parallel.ForEach(fileToProcess, (file, loopState) => { Log.Trace($"CoinApiDataConverter(): Starting data conversion from source file: {file.Name}..."); try { ProcessEntry(apiDataReader, file); } catch (Exception e) { Log.Error(e, $"CoinApiDataConverter(): Error processing entry: {file.Name}"); success = false; loopState.Break(); } } ); Log.Trace($"CoinApiDataConverter(): Finished in {stopwatch.Elapsed}"); return(success); }
/// <summary> /// CoinAPI data converter entry point. /// </summary> /// <param name="sourceDirectory">The source directory where all CoinAPI raw files are stored.</param> /// <exception cref="ArgumentException">Source folder does not exists.</exception> /// <remarks>This converter will automatically convert data for every exchange, date and tick type contained in each raw data file in the sourceDirectory</remarks> public static void CoinApiDataConverter(string sourceDirectory) { var folderPath = new DirectoryInfo(sourceDirectory); if (!folderPath.Exists) { throw new ArgumentException($"CoinApiDataConverter(): Source folder not found: {folderPath.FullName}"); } var stopwatch = Stopwatch.StartNew(); var coinapiDataReader = new CoinApiDataReader(); foreach (var fileName in folderPath.EnumerateFiles("*.tar")) { Log.Trace($"CoinApiDataConverter(): Starting data conversion from source file: {fileName.Name}..."); using (var stream = new FileStream(fileName.FullName, FileMode.Open)) { using (var tar = new TarInputStream(stream)) { TarEntry entry; while ((entry = tar.GetNextEntry()) != null) { if (entry.IsDirectory) { continue; } try { ProcessEntry(coinapiDataReader, tar, entry); } catch (Exception e) { Log.Error(e, $"CoinApiDataConverter(): Error processing entry: {entry.Name}"); } } } } } Log.Trace($"CoinApiDataConverter(): Finished in {stopwatch.Elapsed}"); }
/// <summary> /// Runs this instance. /// </summary> /// <returns></returns> public bool Run() { var stopwatch = Stopwatch.StartNew(); var symbolMapper = new CoinApiSymbolMapper(); var success = true; // There were cases of files with with an extra suffix, following pattern: // <TickType>-<ID>-<Exchange>_SPOT_<BaseCurrency>_<QuoteCurrency>_<ExtraSuffix>.csv.gz // Those cases should be ignored for SPOT prices. var tradesFolder = new DirectoryInfo( Path.Combine( _rawDataFolder.FullName, "trades", _processingDate.ToStringInvariant(DateFormat.EightCharacter))); var quotesFolder = new DirectoryInfo( Path.Combine( _rawDataFolder.FullName, "quotes", _processingDate.ToStringInvariant(DateFormat.EightCharacter))); // Distinct by tick type and first two parts of the raw file name, separated by '-'. // This prevents us from double processing the same ticker twice, in case we're given // two raw data files for the same symbol. Related: https://github.com/QuantConnect/Lean/pull/3262 var apiDataReader = new CoinApiDataReader(symbolMapper); var filesToProcessCandidates = tradesFolder.EnumerateFiles("*.gz") .Concat(quotesFolder.EnumerateFiles("*.gz")) .Where(f => f.Name.Contains("SPOT")) .Where(f => f.Name.Split('_').Length == 4) .ToList(); var filesToProcessKeys = new HashSet <string>(); var filesToProcess = new List <FileInfo>(); foreach (var candidate in filesToProcessCandidates) { try { var key = candidate.Directory.Parent.Name + apiDataReader.GetCoinApiEntryData(candidate, _processingDate).Symbol.ID; if (filesToProcessKeys.Add(key)) { // Separate list from HashSet to preserve ordering of viable candidates filesToProcess.Add(candidate); } } catch (Exception err) { // Most likely the exchange isn't supported. Log exception message to avoid excessive stack trace spamming in console output Log.Error(err.Message); } } Parallel.ForEach(filesToProcess, (file, loopState) => { Log.Trace($"CoinApiDataConverter(): Starting data conversion from source file: {file.Name}..."); try { ProcessEntry(apiDataReader, file); } catch (Exception e) { Log.Error(e, $"CoinApiDataConverter(): Error processing entry: {file.Name}"); success = false; loopState.Break(); } } ); Log.Trace($"CoinApiDataConverter(): Finished in {stopwatch.Elapsed}"); return(success); }
/// <summary> /// Processes the entry. /// </summary> /// <param name="coinapiDataReader">The coinapi data reader.</param> /// <param name="file">The file.</param> private void ProcessEntry(CoinApiDataReader coinapiDataReader, FileInfo file) { var entryData = coinapiDataReader.GetCoinApiEntryData(file, _processingDate); if (!SupportedMarkets.Contains(entryData.Symbol.ID.Market)) { // only convert data for supported exchanges return; } var tickData = coinapiDataReader.ProcessCoinApiEntry(entryData, file); // in some cases the first data points from '_processingDate' get's included in the previous date file // so we will ready previous date data and drop most of it just to save these midnight ticks var yesterdayDate = _processingDate.AddDays(-1); var yesterdaysFile = new FileInfo(file.FullName.Replace( _processingDate.ToStringInvariant(DateFormat.EightCharacter), yesterdayDate.ToStringInvariant(DateFormat.EightCharacter))); if (yesterdaysFile.Exists) { var yesterdaysEntryData = coinapiDataReader.GetCoinApiEntryData(yesterdaysFile, yesterdayDate); tickData = tickData.Concat(coinapiDataReader.ProcessCoinApiEntry(yesterdaysEntryData, yesterdaysFile)); } else { Log.Error($"CoinApiDataConverter(): yesterdays data file not found '{yesterdaysFile.FullName}'"); } // materialize the enumerable into a list, since we need to enumerate over it twice var ticks = tickData.Where(tick => tick.Time.Date == _processingDate) .OrderBy(t => t.Time) .ToList(); var writer = new LeanDataWriter(Resolution.Tick, entryData.Symbol, _destinationFolder.FullName, entryData.TickType); writer.Write(ticks); Log.Trace($"CoinApiDataConverter(): Starting consolidation for {entryData.Symbol.Value} {entryData.TickType}"); var consolidators = new List <TickAggregator>(); if (entryData.TickType == TickType.Trade) { consolidators.AddRange(new[] { new TradeTickAggregator(Resolution.Second), new TradeTickAggregator(Resolution.Minute) }); } else { consolidators.AddRange(new[] { new QuoteTickAggregator(Resolution.Second), new QuoteTickAggregator(Resolution.Minute) }); } foreach (var tick in ticks) { if (tick.Suspicious) { // When CoinAPI loses connectivity to the exchange, they indicate // it in the data by providing a value of `-1` for bid/ask price. // We will keep it in tick data, but will remove it from consolidated data. continue; } foreach (var consolidator in consolidators) { consolidator.Update(tick); } } foreach (var consolidator in consolidators) { writer = new LeanDataWriter(consolidator.Resolution, entryData.Symbol, _destinationFolder.FullName, entryData.TickType); writer.Write(consolidator.Flush()); } }