public void TestAlgorithmManagerSpeed() { var algorithmManager = new AlgorithmManager(false); var algorithm = PerformanceBenchmarkAlgorithms.SingleSecurity_Second; var job = new BacktestNodePacket(1, 2, "3", null, 9m, $"{nameof(AlgorithmManagerTests)}.{nameof(TestAlgorithmManagerSpeed)}"); var feed = new MockDataFeed(); var transactions = new BacktestingTransactionHandler(); var results = new BacktestingResultHandler(); var realtime = new BacktestingRealTimeHandler(); var leanManager = new NullLeanManager(); var alphas = new NullAlphaHandler(); var token = new CancellationToken(); algorithm.Initialize(); results.Initialize(job, new QuantConnect.Messaging.Messaging(), new Api.Api(), feed, new BacktestingSetupHandler(), transactions); results.SetAlgorithm(algorithm); transactions.Initialize(algorithm, new BacktestingBrokerage(algorithm), results); feed.Initialize(algorithm, job, results, null, null, null); Log.Trace("Starting algorithm manager loop to process " + feed.Count + " time slices"); var sw = Stopwatch.StartNew(); algorithmManager.Run(job, algorithm, feed, transactions, results, realtime, leanManager, alphas, token); sw.Stop(); var thousands = feed.Count / 1000d; var seconds = sw.Elapsed.TotalSeconds; Log.Trace("COUNT: " + feed.Count + " KPS: " + thousands / seconds); }
public void DoesNotLeakMemory() { var symbol = Symbols.AAPL; var startDate = new DateTime(2014, 4, 30); var endDate = new DateTime(2014, 4, 30); var config = new SubscriptionDataConfig(typeof(FineFundamental), symbol, Resolution.Daily, TimeZones.NewYork, TimeZones.NewYork, false, false, false, false, TickType.Trade, false); var security = new Security(SecurityExchangeHours.AlwaysOpen(TimeZones.NewYork), config, new Cash(CashBook.AccountCurrency, 0, 1), SymbolProperties.GetDefault(CashBook.AccountCurrency)); var request = new SubscriptionRequest(false, null, security, config, startDate, endDate); var fileProvider = new DefaultDataProvider(); var factory = new FineFundamentalSubscriptionEnumeratorFactory(false); GC.Collect(); var ramUsageBeforeLoop = OS.TotalPhysicalMemoryUsed; const int iterations = 1000; for (var i = 0; i < iterations; i++) { using (var enumerator = factory.CreateEnumerator(request, fileProvider)) { enumerator.MoveNext(); } } GC.Collect(); var ramUsageAfterLoop = OS.TotalPhysicalMemoryUsed; Log.Trace($"RAM usage - before: {ramUsageBeforeLoop} MB, after: {ramUsageAfterLoop} MB"); Assert.IsTrue(ramUsageAfterLoop - ramUsageBeforeLoop < 10); }
public void ReadsFineFundamental(FineFundamentalTestParameters parameters) { var stopwatch = Stopwatch.StartNew(); var rows = new List <FineFundamental>(); var config = new SubscriptionDataConfig(typeof(FineFundamental), parameters.Symbol, Resolution.Daily, TimeZones.NewYork, TimeZones.NewYork, false, false, false, false, TickType.Trade, false); var security = new Security( SecurityExchangeHours.AlwaysOpen(TimeZones.NewYork), config, new Cash(Currencies.USD, 0, 1), SymbolProperties.GetDefault(Currencies.USD), ErrorCurrencyConverter.Instance, RegisteredSecurityDataTypesProvider.Null, new SecurityCache() ); var request = new SubscriptionRequest(false, null, security, config, parameters.StartDate, parameters.EndDate); var fileProvider = new DefaultDataProvider(); var factory = new FineFundamentalSubscriptionEnumeratorFactory(parameters.LiveMode); var enumerator = factory.CreateEnumerator(request, fileProvider); while (enumerator.MoveNext()) { var current = enumerator.Current as FineFundamental; rows.Add(current); } stopwatch.Stop(); Log.Trace("Total rows: {0}, elapsed time: {1}", rows.Count, stopwatch.Elapsed); Assert.AreEqual(parameters.RowCount, rows.Count); if (parameters.RowCount != 1) { return; } var row = rows[0]; Assert.AreEqual(parameters.CompanyShortName, row.CompanyReference.ShortName); Assert.AreEqual(parameters.Symbol, row.Symbol); Assert.IsTrue(row.CompanyReference.PrimarySymbol == parameters.Symbol.Value || row.CompanyReference.PrimarySymbol == null); Assert.IsTrue(row.SecurityReference.SecuritySymbol == parameters.Symbol.Value || row.SecurityReference.SecuritySymbol == null); Assert.AreEqual(parameters.Ebitda3M, row.FinancialStatements.IncomeStatement.EBITDA.ThreeMonths); Assert.AreEqual(parameters.Ebitda12M, row.FinancialStatements.IncomeStatement.EBITDA.TwelveMonths); Assert.AreEqual(parameters.Ebitda12M, (decimal)row.FinancialStatements.IncomeStatement.EBITDA); Assert.AreEqual(parameters.CostOfRevenue3M, row.FinancialStatements.IncomeStatement.CostOfRevenue.ThreeMonths); Assert.AreEqual(parameters.CostOfRevenue12M, row.FinancialStatements.IncomeStatement.CostOfRevenue.TwelveMonths); Assert.AreEqual(parameters.CostOfRevenue12M, (decimal)row.FinancialStatements.IncomeStatement.CostOfRevenue); Assert.AreEqual(parameters.EquityPerShareGrowth1Y, row.EarningRatios.EquityPerShareGrowth.OneYear); Assert.AreEqual(parameters.EquityPerShareGrowth1Y, (decimal)row.EarningRatios.EquityPerShareGrowth); Assert.AreEqual(parameters.PeRatio, row.ValuationRatios.PERatio); if (!parameters.FinancialHealthGrade.IsNullOrEmpty()) { Assert.AreEqual(parameters.FinancialHealthGrade, row.AssetClassification.FinancialHealthGrade); } enumerator.Dispose(); }
public void TestAlgorithmManagerSpeed() { var algorithm = PerformanceBenchmarkAlgorithms.SingleSecurity_Second; var algorithmManager = new AlgorithmManager(false); var job = new BacktestNodePacket(1, 2, "3", null, 9m, $"{nameof(AlgorithmManagerTests)}.{nameof(TestAlgorithmManagerSpeed)}"); var feed = new MockDataFeed(); var marketHoursDatabase = MarketHoursDatabase.FromDataFolder(); var symbolPropertiesDataBase = SymbolPropertiesDatabase.FromDataFolder(); var dataPermissionManager = new DataPermissionManager(); var dataManager = new DataManager(feed, new UniverseSelection( algorithm, new SecurityService(algorithm.Portfolio.CashBook, marketHoursDatabase, symbolPropertiesDataBase, algorithm, RegisteredSecurityDataTypesProvider.Null, new SecurityCacheProvider(algorithm.Portfolio)), dataPermissionManager, new DefaultDataProvider()), algorithm, algorithm.TimeKeeper, marketHoursDatabase, false, RegisteredSecurityDataTypesProvider.Null, dataPermissionManager); algorithm.SubscriptionManager.SetDataManager(dataManager); var transactions = new BacktestingTransactionHandler(); var results = new BacktestingResultHandler(); var realtime = new BacktestingRealTimeHandler(); var leanManager = new NullLeanManager(); var alphas = new NullAlphaHandler(); var token = new CancellationToken(); var nullSynchronizer = new NullSynchronizer(algorithm); algorithm.Initialize(); algorithm.PostInitialize(); results.Initialize(job, new QuantConnect.Messaging.Messaging(), new Api.Api(), transactions); results.SetAlgorithm(algorithm, algorithm.Portfolio.TotalPortfolioValue); transactions.Initialize(algorithm, new BacktestingBrokerage(algorithm), results); feed.Initialize(algorithm, job, results, null, null, null, dataManager, null, null); Log.Trace("Starting algorithm manager loop to process " + nullSynchronizer.Count + " time slices"); var sw = Stopwatch.StartNew(); algorithmManager.Run(job, algorithm, nullSynchronizer, transactions, results, realtime, leanManager, alphas, token); sw.Stop(); realtime.Exit(); results.Exit(); var thousands = nullSynchronizer.Count / 1000d; var seconds = sw.Elapsed.TotalSeconds; Log.Trace("COUNT: " + nullSynchronizer.Count + " KPS: " + thousands / seconds); }
/// <summary> /// Reads the specified exclusions file into a new hash set. /// Returns an empty set if the file does not exist /// </summary> public static HashSet <string> ReadExclusionsFile(string exclusionsFile) { var exclusions = new HashSet <string>(); if (File.Exists(exclusionsFile)) { var excludedSymbols = File.ReadLines(exclusionsFile).Select(x => x.Trim()).Where(x => !x.StartsWith("#")); exclusions = new HashSet <string>(excludedSymbols, StringComparer.InvariantCultureIgnoreCase); Log.Trace("CoarseGenerator.ReadExclusionsFile(): Loaded {0} symbols into the exclusion set", exclusions.Count); } return(exclusions); }
/// <summary> /// Populates the sid contex. /// </summary> /// <param name="mapFileResolver">The map file resolver.</param> /// <param name="exclusions">The exclusions.</param> /// <returns></returns> private IEnumerable <SecurityIdentifierContext> PopulateSidContex(MapFileResolver mapFileResolver, HashSet <string> exclusions) { Log.Trace($"CoarseUniverseGeneratorProgram.PopulateSidContex(): Generating SID context from QuantQuote's map files."); foreach (var mapFile in mapFileResolver) { if (exclusions.Contains(mapFile.Last().MappedSymbol)) { continue; } yield return(new SecurityIdentifierContext(mapFile, _market)); } }
/// <summary> /// Sends an update of the current optimization status to the user /// </summary> protected override void SendUpdate() { // end handler will already log a nice message on end if (Status != OptimizationStatus.Ended && Status != OptimizationStatus.Aborted) { var currentEstimate = GetCurrentEstimate(); var message = $"ConsoleLeanOptimizer.SendUpdate(): {currentEstimate}"; var currentBestBacktest = Strategy.Solution; if (currentBestBacktest != null) { message += $". Best id:'{currentBestBacktest.BacktestId}'. {OptimizationTarget}. Parameters ({currentBestBacktest.ParameterSet})"; } Log.Trace(message); } }
/// <summary> /// Sends an update of the current optimization status to the user /// </summary> protected override void SendUpdate() { // end handler will already log a nice message on end if (Status != OptimizationStatus.Completed && Status != OptimizationStatus.Aborted) { var currentEstimate = GetCurrentEstimate(); var stats = GetRuntimeStatistics(); var message = $"ConsoleLeanOptimizer.SendUpdate(): {currentEstimate} {string.Join(", ", stats.Select(pair => $"{pair.Key}:{pair.Value}"))}"; var currentBestBacktest = Strategy.Solution; if (currentBestBacktest != null) { message += $". Best id:'{currentBestBacktest.BacktestId}'. {OptimizationTarget}. Parameters ({currentBestBacktest.ParameterSet})"; } Log.Trace(message); } }
/// <summary> /// This program generates the coarse files requires by lean for universe selection. /// Universe selection is planned to happen in two stages, the first stage, the 'coarse' /// stage serves to cull the set using coarse filters, such as price, market, and dollar volume. /// Later we'll support full fundamental data such as ratios and financial statements, and these /// would be run AFTER the initial coarse filter /// /// The files are generated from LEAN formatted daily trade bar equity files /// </summary> /// <param name="args">Unused argument</param> public static void Main(string[] args) { // read out the configuration file JToken jtoken; var config = JObject.Parse(File.ReadAllText("CoarseUniverseGenerator/config.json")); var ignoreMaplessSymbols = false; var updateMode = false; var updateTime = TimeSpan.Zero; DateTime?startDate = null; if (config.TryGetValue("update-mode", out jtoken)) { updateMode = jtoken.Value <bool>(); if (config.TryGetValue("update-time-of-day", out jtoken)) { updateTime = TimeSpan.Parse(jtoken.Value <string>()); } } var dataDirectory = Globals.DataFolder; if (config.TryGetValue("data-directory", out jtoken)) { dataDirectory = jtoken.Value <string>(); } //Ignore symbols without a map file: // Typically these are nothing symbols (NASDAQ test symbols, or symbols listed for a few days who aren't actually ever traded). if (config.TryGetValue("ignore-mapless", out jtoken)) { ignoreMaplessSymbols = jtoken.Value <bool>(); } if (config.TryGetValue("coarse-universe-generator-start-date", out jtoken)) { string startDateStr = jtoken.Value <string>(); startDate = DateTime.ParseExact(startDateStr, "yyyyMMdd", null); Log.Trace("Generating coarse data from {0}", startDate); } do { ProcessEquityDirectories(dataDirectory, ignoreMaplessSymbols, startDate); }while (WaitUntilTimeInUpdateMode(updateMode, updateTime)); }
/// <summary> /// Checks if there is fundamental data for /// </summary> /// <param name="ticker">The ticker.</param> /// <param name="date">The date.</param> /// <param name="mapFile">The map file.</param> /// <param name="fineAvailableDates"></param> /// <param name="fineFundamentalFolder">The fine fundamental folder.</param> /// <returns></returns> private static bool CheckFundamentalData(DateTime date, MapFile mapFile, IEnumerable <DateTime> fineAvailableDates, DirectoryInfo fineFundamentalFolder) { // Check if security has fine file within a trailing month for a date-ticker set. // There are tricky cases where a folder named by a ticker can have data for multiple securities. // e.g GOOG -> GOOGL (GOOG T1AZ164W5VTX) / GOOCV -> GOOG (GOOCV VP83T1ZUHROL) case. // The fine data in the 'fundamental/fine/goog' folder will be for 'GOOG T1AZ164W5VTX' up to the 2014-04-02 and for 'GOOCV VP83T1ZUHROL' afterward. // Therefore, date before checking if the security has fundamental data for a date, we need to filter the fine files the map's first date. var firstDate = mapFile?.FirstDate ?? DateTime.MinValue; var hasFundamentalDataForDate = fineAvailableDates.Where(d => d >= firstDate).Any(d => date.AddMonths(-1) <= d && d <= date); // The following section handles mergers and acquisitions cases. // e.g. YHOO -> AABA (YHOO R735QTJ8XC9X) // The dates right after the acquisition, valid fine fundamental data for AABA are still under the former ticker folder. // Therefore if no fine fundamental data is found in the 'fundamental/fine/aaba' folder, it searches into the 'yhoo' folder. if (mapFile != null && mapFile.Count() > 2 && !hasFundamentalDataForDate) { var previousTicker = mapFile.LastOrDefault(m => m.Date < date)?.MappedSymbol; if (previousTicker != null) { var previousTickerFineFundamentalFolder = Path.Combine(fineFundamentalFolder.FullName, previousTicker); if (Directory.Exists(previousTickerFineFundamentalFolder)) { var previousTickerFineAvailableDates = Directory.GetFiles(previousTickerFineFundamentalFolder, "*.zip") .Select(f => DateTime.ParseExact(Path.GetFileNameWithoutExtension(f), DateFormat.EightCharacter, CultureInfo.InvariantCulture)) .ToList(); hasFundamentalDataForDate = previousTickerFineAvailableDates.Where(d => d >= firstDate).Any(d => date.AddMonths(-1) <= d && d <= date); } else { Log.Debug($"CoarseUniverseGeneratorProgram.CheckFundamentalData(): fine folder was not found at '{previousTickerFineFundamentalFolder}'"); } } } return(hasFundamentalDataForDate); }
/// <summary> /// Iterates each daily file in the specified <paramref name="dailyFolder"/> and adds a line for each /// day to the approriate coarse file /// </summary> /// <param name="dailyFolder">The folder with daily data</param> /// <param name="coarseFolder">The coarse output folder</param> /// <param name="mapFileResolver"></param> /// <param name="exclusions">The symbols to be excluded from processing</param> /// <param name="ignoreMapless">Ignore the symbols without a map file.</param> /// <param name="startDate">The starting date for processing</param> /// <param name="symbolResolver">Function used to provide symbol resolution. Default resolution uses the zip file name to resolve /// the symbol, specify null for this behavior.</param> /// <returns>A collection of the generated coarse files</returns> public static ICollection <string> ProcessDailyFolder(string dailyFolder, string coarseFolder, MapFileResolver mapFileResolver, HashSet <string> exclusions, bool ignoreMapless, DateTime startDate, Func <string, string> symbolResolver = null) { const decimal scaleFactor = 10000m; Log.Trace("Processing: {0}", dailyFolder); var start = DateTime.UtcNow; // load map files into memory var symbols = 0; var maplessCount = 0; var dates = new HashSet <DateTime>(); // instead of opening/closing these constantly, open them once and dispose at the end (~3x speed improvement) var writers = new Dictionary <string, StreamWriter>(); var dailyFolderDirectoryInfo = new DirectoryInfo(dailyFolder).Parent; if (dailyFolderDirectoryInfo == null) { throw new Exception("Unable to resolve market for daily folder: " + dailyFolder); } var market = dailyFolderDirectoryInfo.Name.ToLower(); var fundamentalDirectoryInfo = new DirectoryInfo(coarseFolder).Parent; if (fundamentalDirectoryInfo == null) { throw new Exception("Unable to resolve fundamental path for coarse folder: " + coarseFolder); } var fineFundamentalFolder = Path.Combine(fundamentalDirectoryInfo.FullName, "fine"); var mapFileProvider = new LocalDiskMapFileProvider(); var factorFileProvider = new LocalDiskFactorFileProvider(mapFileProvider); // open up each daily file to get the values and append to the daily coarse files foreach (var file in Directory.EnumerateFiles(dailyFolder)) { try { var symbol = Path.GetFileNameWithoutExtension(file); if (symbol == null) { Log.Trace("CoarseGenerator.ProcessDailyFolder(): Unable to resolve symbol from file: {0}", file); continue; } if (symbolResolver != null) { symbol = symbolResolver(symbol); } symbol = symbol.ToUpper(); if (exclusions.Contains(symbol)) { Log.Trace("Excluded symbol: {0}", symbol); continue; } // check if symbol has any fine fundamental data var firstFineSymbolDate = DateTime.MaxValue; if (Directory.Exists(fineFundamentalFolder)) { var fineSymbolFolder = Path.Combine(fineFundamentalFolder, symbol.ToLower()); var firstFineSymbolFileName = Directory.Exists(fineSymbolFolder) ? Directory.GetFiles(fineSymbolFolder).OrderBy(x => x).FirstOrDefault() : string.Empty; if (firstFineSymbolFileName.Length > 0) { firstFineSymbolDate = DateTime.ParseExact(Path.GetFileNameWithoutExtension(firstFineSymbolFileName), "yyyyMMdd", CultureInfo.InvariantCulture); } } ZipFile zip; using (var reader = Compression.Unzip(file, out zip)) { var checkedForMapFile = false; symbols++; string line; while ((line = reader.ReadLine()) != null) { //20150625.csv var csv = line.Split(','); var date = DateTime.ParseExact(csv[0], DateFormat.TwelveCharacter, CultureInfo.InvariantCulture); // spin past old data if (date < startDate) { continue; } if (ignoreMapless && !checkedForMapFile) { checkedForMapFile = true; if (!mapFileResolver.ResolveMapFile(symbol, date).Any()) { // if the resolved map file has zero entries then it's a mapless symbol maplessCount++; break; } } var close = decimal.Parse(csv[4]) / scaleFactor; var volume = long.Parse(csv[5]); var dollarVolume = close * volume; var coarseFile = Path.Combine(coarseFolder, date.ToString("yyyyMMdd") + ".csv"); dates.Add(date); // try to resolve a map file and if found, regen the sid var sid = SecurityIdentifier.GenerateEquity(SecurityIdentifier.DefaultDate, symbol, market); var mapFile = mapFileResolver.ResolveMapFile(symbol, date); if (!mapFile.IsNullOrEmpty()) { // if available, us the permtick in the coarse files, because of this, we need // to update the coarse files each time new map files are added/permticks change sid = SecurityIdentifier.GenerateEquity(mapFile.FirstDate, mapFile.OrderBy(x => x.Date).First().MappedSymbol, market); } if (mapFile == null && ignoreMapless) { // if we're ignoring mapless files then we should always be able to resolve this Log.Error(string.Format("CoarseGenerator.ProcessDailyFolder(): Unable to resolve map file for {0} as of {1}", symbol, date.ToShortDateString())); continue; } // check if symbol has fine fundamental data for the current date var hasFundamentalDataForDate = date >= firstFineSymbolDate; // get price and split factors from factor files var leanSymbol = new Symbol(sid, symbol); var factorFile = factorFileProvider.Get(leanSymbol); var factorFileRow = factorFile?.GetScalingFactors(date); var priceFactor = factorFileRow?.PriceFactor ?? 1m; var splitFactor = factorFileRow?.SplitFactor ?? 1m; // sid,symbol,close,volume,dollar volume,has fundamental data,price factor,split factor var coarseFileLine = $"{sid},{symbol},{close},{volume},{Math.Truncate(dollarVolume)},{hasFundamentalDataForDate},{priceFactor},{splitFactor}"; StreamWriter writer; if (!writers.TryGetValue(coarseFile, out writer)) { writer = new StreamWriter(new FileStream(coarseFile, FileMode.Create, FileAccess.Write, FileShare.Write)); writers[coarseFile] = writer; } writer.WriteLine(coarseFileLine); } } if (symbols % 1000 == 0) { Log.Trace("CoarseGenerator.ProcessDailyFolder(): Completed processing {0} symbols. Current elapsed: {1} seconds", symbols, (DateTime.UtcNow - start).TotalSeconds.ToString("0.00")); } } catch (Exception err) { // log the error and continue with the process Log.Error(err.ToString()); } } Log.Trace("CoarseGenerator.ProcessDailyFolder(): Saving {0} coarse files to disk", dates.Count); // dispose all the writers at the end of processing foreach (var writer in writers) { writer.Value.Dispose(); } var stop = DateTime.UtcNow; Log.Trace("CoarseGenerator.ProcessDailyFolder(): Processed {0} symbols into {1} coarse files in {2} seconds", symbols, dates.Count, (stop - start).TotalSeconds.ToString("0.00")); Log.Trace("CoarseGenerator.ProcessDailyFolder(): Excluded {0} mapless symbols.", maplessCount); return(writers.Keys); }
/// <summary> /// Runs this instance. /// </summary> /// <returns></returns> public bool Run() { var startTime = DateTime.UtcNow; var success = true; Log.Trace($"CoarseUniverseGeneratorProgram.ProcessDailyFolder(): Processing: {_dailyDataFolder.FullName}"); var symbolsProcessed = 0; var filesRead = 0; var dailyFilesNotFound = 0; var coarseFilesGenerated = 0; var mapFileResolver = _mapFileProvider.Get(_market); var blackListedTickers = new HashSet <string>(); if (_blackListedTickersFile.Exists) { blackListedTickers = File.ReadAllLines(_blackListedTickersFile.FullName).ToHashSet(); } var marketFolder = _dailyDataFolder.Parent; var fineFundamentalFolder = new DirectoryInfo(Path.Combine(marketFolder.FullName, "fundamental", "fine")); if (!fineFundamentalFolder.Exists) { Log.Error($"CoarseUniverseGenerator.Run(): FAIL, Fine Fundamental folder not found at {fineFundamentalFolder}! "); return(false); } var securityIdentifierContexts = PopulateSidContex(mapFileResolver, blackListedTickers); var dailyPricesByTicker = new ConcurrentDictionary <string, List <TradeBar> >(); var outputCoarseContent = new ConcurrentDictionary <DateTime, List <string> >(); var parallelOptions = new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount / 2 }; try { Parallel.ForEach(securityIdentifierContexts, parallelOptions, sidContext => { var symbol = new Symbol(sidContext.SID, sidContext.LastTicker); var symbolCount = Interlocked.Increment(ref symbolsProcessed); Log.Debug($"CoarseUniverseGeneratorProgram.Run(): Processing {symbol}"); var factorFile = _factorFileProvider.Get(symbol); // Populate dailyPricesByTicker with all daily data by ticker for all tickers of this security. foreach (var ticker in sidContext.Tickers) { var dailyFile = new FileInfo(Path.Combine(_dailyDataFolder.FullName, $"{ticker}.zip")); if (!dailyFile.Exists) { Log.Error($"CoarseUniverseGeneratorProgram.Run(): {dailyFile} not found!"); Interlocked.Increment(ref dailyFilesNotFound); continue; } if (!dailyPricesByTicker.ContainsKey(ticker)) { dailyPricesByTicker.AddOrUpdate(ticker, ParseDailyFile(dailyFile)); Interlocked.Increment(ref filesRead); } } // Look for daily data for each ticker of the actual security for (int mapFileRowIndex = sidContext.MapFileRows.Length - 1; mapFileRowIndex >= 1; mapFileRowIndex--) { var ticker = sidContext.MapFileRows[mapFileRowIndex].Item2.ToLowerInvariant(); var endDate = sidContext.MapFileRows[mapFileRowIndex].Item1; var startDate = sidContext.MapFileRows[mapFileRowIndex - 1].Item1; List <TradeBar> tickerDailyData; if (!dailyPricesByTicker.TryGetValue(ticker, out tickerDailyData)) { Log.Error($"CoarseUniverseGeneratorProgram.Run(): Daily data for ticker {ticker.ToUpperInvariant()} not found!"); continue; } var tickerFineFundamentalFolder = Path.Combine(fineFundamentalFolder.FullName, ticker); var fineAvailableDates = Enumerable.Empty <DateTime>(); if (Directory.Exists(tickerFineFundamentalFolder)) { fineAvailableDates = Directory.GetFiles(tickerFineFundamentalFolder, "*.zip") .Select(f => DateTime.ParseExact(Path.GetFileNameWithoutExtension(f), DateFormat.EightCharacter, CultureInfo.InvariantCulture)) .ToList(); } // Get daily data only for the time the ticker was foreach (var tradeBar in tickerDailyData.Where(tb => tb.Time >= startDate && tb.Time <= endDate)) { var coarseRow = GenerateFactorFileRow(ticker, sidContext, factorFile, tradeBar, fineAvailableDates, fineFundamentalFolder); outputCoarseContent.AddOrUpdate(tradeBar.Time, new List <string> { coarseRow }, (time, list) => { lock (list) { list.Add(coarseRow); return(list); } }); } } if (symbolCount % 1000 == 0) { var elapsed = DateTime.UtcNow - startTime; Log.Trace($"CoarseUniverseGeneratorProgram.Run(): Processed {symbolCount} in {elapsed:g} at {symbolCount / elapsed.TotalMinutes:F2} symbols/minute "); } }); _destinationFolder.Create(); var startWriting = DateTime.UtcNow; Parallel.ForEach(outputCoarseContent, coarseByDate => { var filename = $"{coarseByDate.Key.ToString(DateFormat.EightCharacter, CultureInfo.InvariantCulture)}.csv"; var filePath = Path.Combine(_destinationFolder.FullName, filename); Log.Debug($"CoarseUniverseGeneratorProgram.Run(): Saving {filename} with {coarseByDate.Value.Count} entries."); File.WriteAllLines(filePath, coarseByDate.Value.OrderBy(cr => cr)); var filesCount = Interlocked.Increment(ref coarseFilesGenerated); if (filesCount % 1000 == 0) { var elapsed = DateTime.UtcNow - startWriting; Log.Trace($"CoarseUniverseGeneratorProgram.Run(): Processed {filesCount} in {elapsed:g} at {filesCount / elapsed.TotalSeconds:F2} files/second "); } }); Log.Trace($"\n\nTotal of {coarseFilesGenerated} coarse files generated in {DateTime.UtcNow - startTime:g}:\n" + $"\t => {filesRead} daily data files read.\n"); } catch (Exception e) { Log.Error(e, $"CoarseUniverseGeneratorProgram.Run(): FAILED!"); success = false; } return(success); }
/// <summary> /// Iterates each daily file in the specified <paramref name="dailyFolder"/> and adds a line for each /// day to the appropriate coarse file /// </summary> /// <param name="dailyFolder">The folder with daily data.</param> /// <param name="coarseFolder">The coarse output folder.</param> /// <param name="mapFileResolver">The map file resolver.</param> /// <param name="factorFileProvider">The factor file provider.</param> /// <param name="exclusions">The symbols to be excluded from processing.</param> /// <param name="ignoreMapless">Ignore the symbols without a map file.</param> /// <param name="symbolResolver">Function used to provide symbol resolution. Default resolution uses the zip file name to resolve /// the symbol, specify null for this behavior.</param> /// <returns>Collection with the names of the newly generated coarse files.</returns> /// <exception cref="Exception"> /// Unable to resolve market for daily folder: " + dailyFolder /// or /// Unable to resolve fundamental path for coarse folder: " + coarseFolder /// </exception> public static ICollection <string> ProcessDailyFolder(string dailyFolder, string coarseFolder, MapFileResolver mapFileResolver, IFactorFileProvider factorFileProvider, HashSet <string> exclusions, bool ignoreMapless, Func <string, string> symbolResolver = null) { const decimal scaleFactor = 10000m; Log.Trace("Processing: {0}", dailyFolder); var start = DateTime.UtcNow; // load map files into memory var symbols = 0; var maplessCount = 0; var dates = new HashSet <DateTime>(); // instead of opening/closing these constantly, open them once and dispose at the end (~3x speed improvement) var writers = new Dictionary <string, StreamWriter>(); var marketDirectoryInfo = new DirectoryInfo(dailyFolder).Parent; if (marketDirectoryInfo == null) { throw new Exception($"Unable to resolve market for daily folder: {dailyFolder}"); } var market = marketDirectoryInfo.Name.ToLowerInvariant(); var fundamentalDirectoryInfo = new DirectoryInfo(coarseFolder).Parent; if (fundamentalDirectoryInfo == null) { throw new Exception($"Unable to resolve fundamental path for coarse folder: {coarseFolder}"); } var fineFundamentalFolder = Path.Combine(marketDirectoryInfo.FullName, "fundamental", "fine"); // open up each daily file to get the values and append to the daily coarse files foreach (var file in Directory.EnumerateFiles(dailyFolder, "*.zip")) { try { var ticker = Path.GetFileNameWithoutExtension(file); var fineAvailableDates = Enumerable.Empty <DateTime>(); var tickerFineFundamentalFolder = Path.Combine(fineFundamentalFolder, ticker); if (Directory.Exists(tickerFineFundamentalFolder)) { fineAvailableDates = Directory.GetFiles(tickerFineFundamentalFolder, "*.zip") .Select(f => DateTime.ParseExact(Path.GetFileNameWithoutExtension(f), DateFormat.EightCharacter, CultureInfo.InvariantCulture)) .ToList(); } if (ticker == null) { Log.Trace("CoarseGenerator.ProcessDailyFolder(): Unable to resolve symbol from file: {0}", file); continue; } if (symbolResolver != null) { ticker = symbolResolver(ticker); } ticker = ticker.ToUpperInvariant(); if (exclusions != null && exclusions.Contains(ticker)) { Log.Trace("Excluded symbol: {0}", ticker); continue; } ZipFile zip; using (var reader = Compression.Unzip(file, out zip)) { var checkedForMapFile = false; symbols++; string line; while ((line = reader.ReadLine()) != null) { //20150625.csv var csv = line.Split(','); var date = DateTime.ParseExact(csv[0], DateFormat.TwelveCharacter, CultureInfo.InvariantCulture); if (ignoreMapless && !checkedForMapFile) { checkedForMapFile = true; if (!mapFileResolver.ResolveMapFile(ticker, date).Any()) { // if the resolved map file has zero entries then it's a mapless symbol maplessCount++; break; } } var close = Parse.Decimal(csv[4]) / scaleFactor; var volume = Parse.Long(csv[5]); var dollarVolume = close * volume; var coarseFile = Path.Combine(coarseFolder, date.ToStringInvariant("yyyyMMdd") + ".csv"); dates.Add(date); // try to resolve a map file and if found, regen the sid var sid = SecurityIdentifier.GenerateEquity(SecurityIdentifier.DefaultDate, ticker, market); var mapFile = mapFileResolver.ResolveMapFile(ticker, date); if (!mapFile.IsNullOrEmpty()) { // if available, us the permtick in the coarse files, because of this, we need // to update the coarse files each time new map files are added/permticks change sid = SecurityIdentifier.GenerateEquity(mapFile.FirstDate, mapFile.OrderBy(x => x.Date).First().MappedSymbol, market); } if (mapFile == null && ignoreMapless) { // if we're ignoring mapless files then we should always be able to resolve this Log.Error($"CoarseGenerator.ProcessDailyFolder(): Unable to resolve map file for {ticker} as of {date.ToStringInvariant("d")}"); continue; } // get price and split factors from factor files var symbol = new Symbol(sid, ticker); var factorFile = factorFileProvider.Get(symbol); var factorFileRow = factorFile?.GetScalingFactors(date); var priceFactor = factorFileRow?.PriceFactor ?? 1m; var splitFactor = factorFileRow?.SplitFactor ?? 1m; // Check if security has fine file within a trailing month for a date-ticker set. // There are tricky cases where a folder named by a ticker can have data for multiple securities. // e.g GOOG -> GOOGL (GOOG T1AZ164W5VTX) / GOOCV -> GOOG (GOOCV VP83T1ZUHROL) case. // The fine data in the 'fundamental/fine/goog' folder will be for 'GOOG T1AZ164W5VTX' up to the 2014-04-02 and for 'GOOCV VP83T1ZUHROL' afterward. // Therefore, date before checking if the security has fundamental data for a date, we need to filter the fine files the map's first date. var firstDate = mapFile?.FirstDate ?? DateTime.MinValue; var hasFundamentalDataForDate = fineAvailableDates.Where(d => d >= firstDate).Any(d => date.AddMonths(-1) <= d && d <= date); // The following section handles mergers and acquisitions cases. // e.g. YHOO -> AABA (YHOO R735QTJ8XC9X) // The dates right after the acquisition, valid fine fundamental data for AABA are still under the former ticker folder. // Therefore if no fine fundamental data is found in the 'fundamental/fine/aaba' folder, it searches into the 'yhoo' folder. if (mapFile != null && mapFile.Count() > 2 && !hasFundamentalDataForDate) { var previousTicker = mapFile.LastOrDefault(m => m.Date < date)?.MappedSymbol; if (previousTicker != null) { var previousTickerFineFundamentalFolder = Path.Combine(fineFundamentalFolder, previousTicker); if (Directory.Exists(previousTickerFineFundamentalFolder)) { var previousTickerFineAvailableDates = Directory.GetFiles(previousTickerFineFundamentalFolder, "*.zip") .Select(f => DateTime.ParseExact(Path.GetFileNameWithoutExtension(f), DateFormat.EightCharacter, CultureInfo.InvariantCulture)) .ToList(); hasFundamentalDataForDate = previousTickerFineAvailableDates.Where(d => d >= firstDate).Any(d => date.AddMonths(-1) <= d && d <= date); } } } // sid,symbol,close,volume,dollar volume,has fundamental data,price factor,split factor var coarseFileLine = string.Format( CultureInfo.InvariantCulture, "{0},{1},{2},{3},{4},{5},{6},{7}", sid, ticker, close, volume, Math.Truncate(dollarVolume), hasFundamentalDataForDate, priceFactor, splitFactor); StreamWriter writer; if (!writers.TryGetValue(coarseFile, out writer)) { writer = new StreamWriter(new FileStream(coarseFile, FileMode.Create, FileAccess.Write, FileShare.Write)); writers[coarseFile] = writer; } writer.WriteLine(coarseFileLine); } } if (symbols % 1000 == 0) { Log.Trace($"CoarseGenerator.ProcessDailyFolder(): Completed processing {symbols} symbols. Current elapsed: {(DateTime.UtcNow - start).TotalSeconds.ToStringInvariant("0.00")} seconds"); } } catch (Exception err) { // log the error and continue with the process Log.Error(err.ToString()); } } Log.Trace("CoarseGenerator.ProcessDailyFolder(): Saving {0} coarse files to disk", dates.Count); // dispose all the writers at the end of processing foreach (var writer in writers) { writer.Value.Dispose(); } var stop = DateTime.UtcNow; Log.Trace($"CoarseGenerator.ProcessDailyFolder(): Processed {symbols} symbols into {dates.Count} coarse files in {(stop - start).TotalSeconds.ToStringInvariant("0.00")} seconds"); Log.Trace($"CoarseGenerator.ProcessDailyFolder(): Excluded {maplessCount} mapless symbols."); return(writers.Keys); }
/// <summary> /// Iterates each daily file in the specified <paramref name="dailyFolder"/> and adds a line for each /// day to the approriate coarse file /// </summary> /// <param name="dailyFolder">The folder with daily data</param> /// <param name="coarseFolder">The coarse output folder</param> /// <param name="mapFileResolver"></param> /// <param name="exclusions">The symbols to be excluded from processing</param> /// <param name="ignoreMapless">Ignore the symbols without a map file.</param> /// <param name="symbolResolver">Function used to provide symbol resolution. Default resolution uses the zip file name to resolve /// the symbol, specify null for this behavior.</param> /// <returns>A collection of the generated coarse files</returns> public static ICollection <string> ProcessDailyFolder(string dailyFolder, string coarseFolder, MapFileResolver mapFileResolver, HashSet <string> exclusions, bool ignoreMapless, Func <string, string> symbolResolver = null) { const decimal scaleFactor = 10000m; Log.Trace("Processing: {0}", dailyFolder); var stopwatch = Stopwatch.StartNew(); // load map files into memory var symbols = 0; var maplessCount = 0; var dates = new HashSet <DateTime>(); // instead of opening/closing these constantly, open them once and dispose at the end (~3x speed improvement) var writers = new Dictionary <string, StreamWriter>(); // open up each daily file to get the values and append to the daily coarse files foreach (var file in Directory.EnumerateFiles(dailyFolder)) { try { var symbol = Path.GetFileNameWithoutExtension(file); if (symbol == null) { Log.Trace("CoarseGenerator.ProcessDailyFolder(): Unable to resolve symbol from file: {0}", file); continue; } if (symbolResolver != null) { symbol = symbolResolver(symbol); } symbol = symbol.ToUpper(); if (exclusions.Contains(symbol)) { Log.Trace("Excluded symbol: {0}", symbol); continue; } ZipFile zip; using (var reader = Compression.Unzip(file, out zip)) { // 30 period EMA constant const decimal k = 2m / (30 + 1); var seeded = false; var runningAverageVolume = 0m; var checkedForMapFile = false; symbols++; string line; while ((line = reader.ReadLine()) != null) { //20150625.csv var csv = line.Split(','); var date = DateTime.ParseExact(csv[0], DateFormat.TwelveCharacter, CultureInfo.InvariantCulture); if (ignoreMapless && !checkedForMapFile) { checkedForMapFile = true; if (!mapFileResolver.ResolveMapFile(symbol, date).Any()) { // if the resolved map file has zero entries then it's a mapless symbol maplessCount++; break; } } var close = decimal.Parse(csv[4]) / scaleFactor; var volume = long.Parse(csv[5]); // compute the current volume EMA for dollar volume calculations runningAverageVolume = seeded ? volume * k + runningAverageVolume * (1 - k) : volume; seeded = true; var dollarVolume = close * runningAverageVolume; var coarseFile = Path.Combine(coarseFolder, date.ToString("yyyyMMdd") + ".csv"); dates.Add(date); // try to resolve a map file and if found, use the permtick as the symbol var sid = symbol; var mapFile = mapFileResolver.ResolveMapFile(sid, date); if (mapFile != null) { // if available, us the permtick in the coarse files, because of this, we need // to update the coarse files each time new map files are added/permticks change sid = mapFile.Permtick; } if (mapFile == null && ignoreMapless) { // if we're ignoring mapless files then we should always be able to resolve this Log.Error(string.Format("CoarseGenerator.ProcessDailyFolder(): Unable to resolve map file for {0} as of {1}", symbol, date.ToShortDateString())); continue; } // sid,symbol,close,volume,dollar volume var coarseFileLine = sid + "," + symbol + "," + close + "," + volume + "," + Math.Truncate(dollarVolume); StreamWriter writer; if (!writers.TryGetValue(coarseFile, out writer)) { writer = new StreamWriter(new FileStream(coarseFile, FileMode.Append, FileAccess.Write, FileShare.Write)); writers[coarseFile] = writer; } writer.WriteLine(coarseFileLine); } } if (symbols % 1000 == 0) { Log.Trace("CoarseGenerator.ProcessDailyFolder(): Completed processing {0} symbols. Current elapsed: {1} seconds", symbols, stopwatch.Elapsed.TotalSeconds.ToString("0.00")); } } catch (Exception err) { // log the error and continue with the process Log.Error(err.ToString()); } } Log.Trace("CoarseGenerator.ProcessDailyFolder(): Saving {0} coarse files to disk", dates.Count); // dispose all the writers at the end of processing foreach (var writer in writers) { writer.Value.Dispose(); } stopwatch.Stop(); Log.Trace("CoarseGenerator.ProcessDailyFolder(): Processed {0} symbols into {1} coarse files in {2} seconds", symbols, dates.Count, stopwatch.Elapsed.TotalSeconds.ToString("0.00")); Log.Trace("CoarseGenerator.ProcessDailyFolder(): Excluded {0} mapless symbols.", maplessCount); return(writers.Keys); }