private string LoadFutureData(Symbol future, TickType tickType, Resolution res) { var dataType = LeanData.GetDataType(res, tickType); var config = new SubscriptionDataConfig(dataType, future, res, TimeZones.NewYork, TimeZones.NewYork, false, false, false, false, tickType); var date = _fromDate; var sb = new StringBuilder(); while (date <= _toDate) { var leanDataReader = new LeanDataReader(config, future, res, date, _dataDirectory); foreach (var bar in leanDataReader.Parse()) { //write base data type back to string sb.AppendLine(LeanData.GenerateLine(bar, SecurityType.Future, res)); } date = date.AddDays(1); } var csv = sb.ToString(); return(csv); }
private void SaveMinuteOrSecondOrTick( List <Symbol> symbols, DateTime startTimeUtc, DateTime endTimeUtc, Symbol canonicalSymbol, IReadOnlyDictionary <Symbol, List <IGrouping <DateTime, BaseData> > > historyBySymbol) { var date = startTimeUtc; while (date <= endTimeUtc) { var zipFileName = Path.Combine( Globals.DataFolder, LeanData.GenerateRelativeZipFilePath(canonicalSymbol, date, _resolution, _tickType)); var folder = Path.GetDirectoryName(zipFileName); if (!Directory.Exists(folder)) { Directory.CreateDirectory(folder); } if (File.Exists(zipFileName)) { File.Delete(zipFileName); } using (var zip = new ZipFile(zipFileName)) { foreach (var symbol in symbols) { var zipEntryName = LeanData.GenerateZipEntryName(symbol, date, _resolution, _tickType); foreach (var group in historyBySymbol[symbol]) { if (group.Key == date) { var sb = new StringBuilder(); foreach (var row in group) { var line = LeanData.GenerateLine(row, _securityType, _resolution); sb.AppendLine(line); } zip.AddEntry(zipEntryName, sb.ToString()); break; } } } if (zip.Count > 0) { zip.Save(); } } date = date.AddDays(1); } }
/// <summary> /// Output a list of basedata objects into a string csv line. /// </summary> /// <param name="processor"></param> /// <returns></returns> private string FileBuilder(AlgoSeekOptionsProcessor processor) { var sb = new StringBuilder(); foreach (var data in processor.Queue) { sb.AppendLine(LeanData.GenerateLine(data, SecurityType.Option, processor.Resolution)); } return(sb.ToString()); }
public void GenerateLine(LeanDataLineTestParameters parameters) { var line = LeanData.GenerateLine(parameters.Data, parameters.SecurityType, parameters.Resolution); Assert.AreEqual(parameters.ExpectedLine, line); if (parameters.Config.Type == typeof(QuoteBar)) { Assert.AreEqual(line.Split(',').Length, 11); } if (parameters.Config.Type == typeof(TradeBar)) { Assert.AreEqual(line.Split(',').Length, 6); } }
/// <summary> /// Invoked for each piece of data from the source file /// </summary> /// <param name="data">The data to be processed</param> public void Process(BaseData data) { Writer writer; if (!_writers.TryGetValue(data.Symbol, out writer)) { writer = CreateTextWriter(data); _writers[data.Symbol] = writer; } // flush every so often if (++writer.ProcessCount % TicksPerFlush == 0) { writer.TextWriter.Flush(); } var line = LeanData.GenerateLine(data, data.Symbol.ID.SecurityType, _resolution); writer.TextWriter.WriteLine(line); }
/// <summary> /// Write out the data in LEAN format (minute, second or tick resolutions) /// </summary> /// <param name="source">IEnumerable source of the data: sorted from oldest to newest.</param> /// <remarks>This function overwrites existing data files</remarks> private void WriteMinuteOrSecondOrTick(IEnumerable <BaseData> source) { var sb = new StringBuilder(); var lastTime = new DateTime(); int i = 0; // Loop through all the data and write to file as we go foreach (var data in source) { i++; // Ensure the data is sorted if (data.Time < lastTime) { throw new Exception("The data must be pre-sorted from oldest to newest"); } // Based on the security type and resolution, write the data to the zip file if (lastTime != DateTime.MinValue && data.Time.Date > lastTime.Date) { // Write and clear the file contents var outputFile = GetZipOutputFileName(_dataDirectory, lastTime); WriteFile(outputFile, sb.ToString(), lastTime); sb.Clear(); } lastTime = data.Time; // Build the line and append it to the file sb.Append(LeanData.GenerateLine(data, _securityType, _resolution) + Environment.NewLine); } // Write the last file if (sb.Length > 0) { var outputFile = GetZipOutputFileName(_dataDirectory, lastTime); WriteFile(outputFile, sb.ToString(), lastTime); } }
/// <summary> /// Write the in memory queues to the disk. /// </summary> /// <param name="frontierTime">Current foremost tick time</param> /// <param name="finalFlush">Indicates is this is the final push to disk at the end of the data</param> public void FlushBuffer(DateTime frontierTime, bool finalFlush) { //Force the consolidation if time has past the bar _consolidator.Scan(frontierTime); // If this is the final packet dump it to the queue if (finalFlush) { if (_consolidator.WorkingData != null) { _streamWriter.WriteLine(LeanData.GenerateLine(_consolidator.WorkingData, SecurityType.Future, Resolution)); } _streamWriter.Flush(); _streamWriter.Close(); _streamWriter = null; Interlocked.Add(ref _curFileCount, -1); if (_curFileCount % 1000 == 0) { Log.Trace("Closed some files: {0}", _curFileCount); } } }
/// <summary> /// Write out the data in LEAN format (daily or hour resolutions) /// </summary> /// <param name="source">IEnumerable source of the data: sorted from oldest to newest.</param> /// <remarks>This function performs a merge (insert/append/overwrite) with the existing Lean zip file</remarks> private void WriteDailyOrHour(IEnumerable <BaseData> source) { var sb = new StringBuilder(); var lastTime = new DateTime(); // Determine file path var outputFile = GetZipOutputFileName(_dataDirectory, lastTime); // Load new data rows into a SortedDictionary for easy merge/update var newRows = new SortedDictionary <DateTime, string>(source.ToDictionary(x => x.Time, x => LeanData.GenerateLine(x, _securityType, _resolution))); SortedDictionary <DateTime, string> rows; if (File.Exists(outputFile)) { // If file exists, we load existing data and perform merge rows = LoadHourlyOrDailyFile(outputFile); foreach (var kvp in newRows) { rows[kvp.Key] = kvp.Value; } } else { // No existing file, just use the new data rows = newRows; } // Loop through the SortedDictionary and write to file contents foreach (var kvp in rows) { // Build the line and append it to the file sb.Append(kvp.Value + Environment.NewLine); } // Write the file contents if (sb.Length > 0) { WriteFile(outputFile, sb.ToString(), lastTime); } }
private void SaveDailyOrHour( List <Symbol> symbols, Symbol canonicalSymbol, IReadOnlyDictionary <Symbol, List <BaseData> > historyBySymbol) { var zipFileName = Path.Combine( _dataDirectory, LeanData.GenerateRelativeZipFilePath(canonicalSymbol, DateTime.MinValue, _resolution, _tickType)); var folder = Path.GetDirectoryName(zipFileName); if (!Directory.Exists(folder)) { Directory.CreateDirectory(folder); } using (var zip = new ZipFile(zipFileName)) { foreach (var symbol in symbols) { // Load new data rows into a SortedDictionary for easy merge/update var newRows = new SortedDictionary <DateTime, string>(historyBySymbol[symbol] .ToDictionary(x => x.Time, x => LeanData.GenerateLine(x, _securityType, _resolution))); var rows = new SortedDictionary <DateTime, string>(); var zipEntryName = LeanData.GenerateZipEntryName(symbol, DateTime.MinValue, _resolution, _tickType); if (zip.ContainsEntry(zipEntryName)) { // If file exists, we load existing data and perform merge using (var stream = new MemoryStream()) { zip[zipEntryName].Extract(stream); stream.Seek(0, SeekOrigin.Begin); using (var reader = new StreamReader(stream)) { string line; while ((line = reader.ReadLine()) != null) { var time = Parse.DateTimeExact(line.Substring(0, DateFormat.TwelveCharacter.Length), DateFormat.TwelveCharacter); rows[time] = line; } } } foreach (var kvp in newRows) { rows[kvp.Key] = kvp.Value; } } else { // No existing file, just use the new data rows = newRows; } // Loop through the SortedDictionary and write to zip entry var sb = new StringBuilder(); foreach (var kvp in rows) { // Build the line and append it to the file sb.AppendLine(kvp.Value); } // Write the zip entry if (sb.Length > 0) { if (zip.ContainsEntry(zipEntryName)) { zip.RemoveEntry(zipEntryName); } zip.AddEntry(zipEntryName, sb.ToString()); } } if (zip.Count > 0) { zip.Save(); } } }
private void ConvertMinuteFuturesData(Symbol canonical, TickType tickType, Resolution outputResolution, Resolution inputResolution = Resolution.Minute) { var timeSpans = new Dictionary <Resolution, TimeSpan>() { { Resolution.Daily, TimeSpan.FromHours(24) }, { Resolution.Hour, TimeSpan.FromHours(1) }, }; var timeSpan = timeSpans[outputResolution]; var tickTypeConsolidatorMap = new Dictionary <TickType, Func <IDataConsolidator> >() { { TickType.Quote, () => new QuoteBarConsolidator(timeSpan) }, { TickType.OpenInterest, () => new OpenInterestConsolidator(timeSpan) }, { TickType.Trade, () => new TradeBarConsolidator(timeSpan) } }; var consolidators = new Dictionary <string, IDataConsolidator>(); var configs = new Dictionary <string, SubscriptionDataConfig>(); var outputFiles = new Dictionary <string, StringBuilder>(); var futures = new Dictionary <string, Symbol>(); var date = _fromDate; while (date <= _toDate) { var futureChain = LoadFutureChain(canonical, date, tickType, inputResolution); foreach (var future in futureChain) { if (!futures.ContainsKey(future.Value)) { futures[future.Value] = future; var config = new SubscriptionDataConfig(LeanData.GetDataType(outputResolution, tickType), future, inputResolution, TimeZones.NewYork, TimeZones.NewYork, false, false, false, false, tickType); configs[future.Value] = config; consolidators[future.Value] = tickTypeConsolidatorMap[tickType].Invoke(); var sb = new StringBuilder(); outputFiles[future.Value] = sb; consolidators[future.Value].DataConsolidated += (sender, bar) => { sb.Append(LeanData.GenerateLine(bar, SecurityType.Future, outputResolution) + Environment.NewLine); }; } var leanDataReader = new LeanDataReader(configs[future.Value], future, inputResolution, date, _dataDirectory); var consolidator = consolidators[future.Value]; foreach (var bar in leanDataReader.Parse()) { consolidator.Update(bar); } } date = date.AddDays(1); } //write all results foreach (var consolidator in consolidators.Values) { consolidator.Scan(date); } var zip = LeanData.GenerateRelativeZipFilePath(canonical, _fromDate, outputResolution, tickType); var zipPath = Path.Combine(_dataDirectory, zip); var fi = new FileInfo(zipPath); if (!fi.Directory.Exists) { fi.Directory.Create(); } foreach (var future in futures.Values) { var zipEntry = LeanData.GenerateZipEntryName(future, _fromDate, outputResolution, tickType); var sb = outputFiles[future.Value]; //Uncomment to write zip files //QuantConnect.Compression.ZipCreateAppendData(zipPath, zipEntry, sb.ToString()); Assert.IsTrue(sb.Length > 0); } }
public void GenerateLine(LeanDataLineTestParameters parameters) { var line = LeanData.GenerateLine(parameters.Data, parameters.SecurityType, parameters.Resolution); Assert.AreEqual(parameters.ExpectedLine, line); }
/// <summary> /// Given the constructor parameters, write out the data in LEAN format. /// </summary> /// <param name="source">IEnumerable source of the data: sorted from oldest to newest.</param> public void Write(IEnumerable <BaseData> source) { var lastTime = DateTime.MinValue; var outputFile = string.Empty; var currentFileData = new List <(DateTime, string)>(); var writeTasks = new Queue <Task>(); foreach (var data in source) { // Ensure the data is sorted as a safety check if (data.Time < lastTime) { throw new Exception("The data must be pre-sorted from oldest to newest"); } // Update our output file // Only do this on date change, because we know we don't have a any data zips smaller than a day, saves time if (data.Time.Date != lastTime.Date) { // Get the latest file name, if it has changed, we have entered a new file, write our current data to file var latestOutputFile = GetZipOutputFileName(_dataDirectory, data.Time); if (outputFile.IsNullOrEmpty() || outputFile != latestOutputFile) { if (!currentFileData.IsNullOrEmpty()) { // Launch a write task for the current file and data set var file = outputFile; var fileData = currentFileData; writeTasks.Enqueue(Task.Run(() => { WriteFile(file, fileData, data.Time); })); } // Reset our dictionary and store new output file currentFileData = new List <(DateTime, string)>(); outputFile = latestOutputFile; } } // Add data to our current dictionary var line = LeanData.GenerateLine(data, _securityType, _resolution); currentFileData.Add((data.Time, line)); // Update our time lastTime = data.Time; } // Finish off my processing the last file as well if (!currentFileData.IsNullOrEmpty()) { writeTasks.Enqueue(Task.Run(() => { WriteFile(outputFile, currentFileData, lastTime); })); } // Wait for all our write tasks to finish while (writeTasks.Count > 0) { var task = writeTasks.Dequeue(); task.Wait(); } }
/// <summary> /// Create a new AlgoSeekFuturesProcessor for enquing consolidated bars and flushing them to disk /// </summary> /// <param name="symbol">Symbol for the processor</param> /// <param name="date">Reference date for the processor</param> /// <param name="tickType">TradeBar or QuoteBar to generate</param> /// <param name="resolution">Resolution to consolidate</param> /// <param name="dataDirectory">Data directory for LEAN</param> public AlgoSeekFuturesProcessor(Symbol symbol, DateTime date, TickType tickType, Resolution resolution, string dataDirectory) { _symbol = Safe(symbol); _tickType = tickType; _referenceDate = date; _resolution = resolution; _dataDirectory = dataDirectory; // Setup the consolidator for the requested resolution if (resolution == Resolution.Tick) { _consolidator = new IdentityDataConsolidator <Tick>(); } else { switch (tickType) { case TickType.Trade: _consolidator = new TickConsolidator(resolution.ToTimeSpan()); break; case TickType.Quote: _consolidator = new TickQuoteBarConsolidator(resolution.ToTimeSpan()); break; case TickType.OpenInterest: _consolidator = new OpenInterestConsolidator(resolution.ToTimeSpan()); break; } } var path = ZipPath.Replace(".zip", string.Empty); Directory.CreateDirectory(path); var file = Path.Combine(path, EntryPath); try { _streamWriter = new LazyStreamWriter(file); } catch (Exception err) { // we are unable to open new file - it is already opened due to bug in algoseek data Log.Error("File: {0} Err: {1} Source: {2} Stack: {3}", file, err.Message, err.Source, err.StackTrace); var newRandomizedName = (file + "-" + Math.Abs(file.GetHashCode()).ToString()).Replace(".csv", string.Empty) + ".csv"; // we store the information under different (randomized) name Log.Trace("Changing name from {0} to {1}", file, newRandomizedName); _streamWriter = new LazyStreamWriter(newRandomizedName); } // On consolidating the bars put the bar into a queue in memory to be written to disk later. _consolidator.DataConsolidated += (sender, consolidated) => { _streamWriter.WriteLine(LeanData.GenerateLine(consolidated, SecurityType.Future, Resolution)); }; Interlocked.Add(ref _curFileCount, 1); if (_curFileCount % 1000 == 0) { Log.Trace("Opened more files: {0}", _curFileCount); } }