public void DoesNotLeakMemory() { var symbol = Symbols.AAPL; var config = new SubscriptionDataConfig(typeof(TradeBar), symbol, Resolution.Daily, TimeZones.NewYork, TimeZones.NewYork, false, false, false, false, TickType.Trade, false); var security = new Security( SecurityExchangeHours.AlwaysOpen(TimeZones.NewYork), config, new Cash(CashBook.AccountCurrency, 0, 1), SymbolProperties.GetDefault(CashBook.AccountCurrency), ErrorCurrencyConverter.Instance ); var mapFileProvider = new LocalDiskMapFileProvider(); var factorFileProvider = new LocalDiskFactorFileProvider(mapFileProvider); var mapFileResolver = mapFileProvider.Get(security.Symbol.ID.Market); var fileProvider = new DefaultDataProvider(); var factory = new BaseDataSubscriptionEnumeratorFactory(false, mapFileResolver, factorFileProvider); GC.Collect(); var ramUsageBeforeLoop = OS.TotalPhysicalMemoryUsed; var date = new DateTime(1998, 1, 1); const int iterations = 1000; for (var i = 0; i < iterations; i++) { var request = new SubscriptionRequest(false, null, security, config, date, date); using (var enumerator = factory.CreateEnumerator(request, fileProvider)) { enumerator.MoveNext(); } date = date.AddDays(1); } GC.Collect(); var ramUsageAfterLoop = OS.TotalPhysicalMemoryUsed; Log.Trace($"RAM usage - before: {ramUsageBeforeLoop} MB, after: {ramUsageAfterLoop} MB"); Assert.IsTrue(ramUsageAfterLoop - ramUsageBeforeLoop < 10); }
public void FindsFactorFilesWithErrors() { var provider = new LocalDiskFactorFileProvider(); var factorFileFolder = Path.Combine(Globals.DataFolder, "equity", QuantConnect.Market.USA, "factor_files"); foreach (var fileName in Directory.EnumerateFiles(factorFileFolder)) { var ticker = Path.GetFileNameWithoutExtension(fileName).ToUpper(CultureInfo.InvariantCulture); var symbol = Symbol.Create(ticker, SecurityType.Equity, QuantConnect.Market.USA); try { provider.Get(symbol); } catch (Exception exception) { Console.WriteLine(ticker + ": " + exception.Message); } } }
public void TestsFileSystemDataFeedSpeed() { var job = new BacktestNodePacket(); var resultHandler = new BacktestingResultHandler(); var mapFileProvider = new LocalDiskMapFileProvider(); var factorFileProvider = new LocalDiskFactorFileProvider(mapFileProvider); var dataProvider = new DefaultDataProvider(); var algorithm = new BenchmarkTest(); var feed = new FileSystemDataFeed(); feed.Initialize(algorithm, job, resultHandler, mapFileProvider, factorFileProvider, dataProvider); algorithm.Initialize(); var feedThreadStarted = new ManualResetEvent(false); Task.Factory.StartNew(() => { feedThreadStarted.Set(); feed.Run(); }); feedThreadStarted.WaitOne(); var stopwatch = Stopwatch.StartNew(); var lastMonth = -1; var count = 0; foreach (var timeSlice in feed) { if (timeSlice.Time.Month != lastMonth) { Console.WriteLine(DateTime.Now + " - Time: " + timeSlice.Time); lastMonth = timeSlice.Time.Month; } count++; } Console.WriteLine("Count: " + count); stopwatch.Stop(); Console.WriteLine("Elapsed time: " + stopwatch.Elapsed); }
public override void Initialize() { SetStartDate(2014, 6, 5); //Set Start Date SetEndDate(2014, 6, 5); //Set End Date UniverseSettings.DataNormalizationMode = DataNormalizationMode.SplitAdjusted; _aapl = AddEquity(Ticker, Resolution.Minute).Symbol; var dataProvider = Composer.Instance.GetExportedValueByTypeName <IDataProvider>(Config.Get("data-provider", "DefaultDataProvider")); var mapFileProvider = new LocalDiskMapFileProvider(); mapFileProvider.Initialize(dataProvider); var factorFileProvider = new LocalDiskFactorFileProvider(); factorFileProvider.Initialize(mapFileProvider, dataProvider); _factorFile = factorFileProvider.Get(_aapl) as CorporateFactorProvider; }
public void TestsFileSystemDataFeedSpeed() { var job = new BacktestNodePacket(); var resultHandler = new BacktestingResultHandler(); var mapFileProvider = new LocalDiskMapFileProvider(); var factorFileProvider = new LocalDiskFactorFileProvider(mapFileProvider); var dataProvider = new DefaultDataProvider(); var algorithm = PerformanceBenchmarkAlgorithms.SingleSecurity_Second; var feed = new FileSystemDataFeed(); var dataManager = new DataManager(feed, new UniverseSelection(feed, algorithm), algorithm.Settings, algorithm.TimeKeeper); algorithm.SubscriptionManager.SetDataManager(dataManager); feed.Initialize(algorithm, job, resultHandler, mapFileProvider, factorFileProvider, dataProvider, dataManager); algorithm.Initialize(); algorithm.PostInitialize(); var count = 0; var stopwatch = Stopwatch.StartNew(); var lastMonth = algorithm.StartDate.Month; foreach (var timeSlice in feed) { if (timeSlice.Time.Month != lastMonth) { var elapsed = stopwatch.Elapsed.TotalSeconds; var thousands = count / 1000d; Console.WriteLine($"{DateTime.Now} - Time: {timeSlice.Time}: KPS: {thousands/elapsed}"); lastMonth = timeSlice.Time.Month; } count++; } Console.WriteLine("Count: " + count); stopwatch.Stop(); feed.Exit(); Console.WriteLine($"Elapsed time: {stopwatch.Elapsed} KPS: {count/1000d/stopwatch.Elapsed.TotalSeconds}"); }
/// <summary> /// Iterates over each equity directory and aggregates the data into the coarse file /// </summary> /// <param name="dataDirectory">The Lean /Data directory</param> /// <param name="ignoreMaplessSymbols">Ignore symbols without a QuantQuote map file.</param> public static IEnumerable <string> ProcessEquityDirectories(string dataDirectory, bool ignoreMaplessSymbols) { var exclusions = ReadExclusionsFile(ExclusionsFile); var equity = Path.Combine(dataDirectory, "equity"); foreach (var directory in Directory.EnumerateDirectories(equity)) { var dailyFolder = Path.Combine(directory, "daily"); var mapFileFolder = Path.Combine(directory, "map_files"); var coarseFolder = Path.Combine(directory, "fundamental", "coarse"); if (!Directory.Exists(coarseFolder)) { Directory.CreateDirectory(coarseFolder); } var factorFileProvider = new LocalDiskFactorFileProvider(); var files = ProcessDailyFolder(dailyFolder, coarseFolder, MapFileResolver.Create(mapFileFolder), factorFileProvider, exclusions, ignoreMaplessSymbols); foreach (var file in files) { yield return(file); } } }
/// <summary> /// Iterates each daily file in the specified <paramref name="dailyFolder"/> and adds a line for each /// day to the approriate coarse file /// </summary> /// <param name="dailyFolder">The folder with daily data</param> /// <param name="coarseFolder">The coarse output folder</param> /// <param name="mapFileResolver"></param> /// <param name="exclusions">The symbols to be excluded from processing</param> /// <param name="ignoreMapless">Ignore the symbols without a map file.</param> /// <param name="startDate">The starting date for processing</param> /// <param name="symbolResolver">Function used to provide symbol resolution. Default resolution uses the zip file name to resolve /// the symbol, specify null for this behavior.</param> /// <returns>A collection of the generated coarse files</returns> public static ICollection <string> ProcessDailyFolder(string dailyFolder, string coarseFolder, MapFileResolver mapFileResolver, HashSet <string> exclusions, bool ignoreMapless, DateTime startDate, Func <string, string> symbolResolver = null) { const decimal scaleFactor = 10000m; Log.Trace("Processing: {0}", dailyFolder); var start = DateTime.UtcNow; // load map files into memory var symbols = 0; var maplessCount = 0; var dates = new HashSet <DateTime>(); // instead of opening/closing these constantly, open them once and dispose at the end (~3x speed improvement) var writers = new Dictionary <string, StreamWriter>(); var dailyFolderDirectoryInfo = new DirectoryInfo(dailyFolder).Parent; if (dailyFolderDirectoryInfo == null) { throw new Exception("Unable to resolve market for daily folder: " + dailyFolder); } var market = dailyFolderDirectoryInfo.Name.ToLower(); var fundamentalDirectoryInfo = new DirectoryInfo(coarseFolder).Parent; if (fundamentalDirectoryInfo == null) { throw new Exception("Unable to resolve fundamental path for coarse folder: " + coarseFolder); } var fineFundamentalFolder = Path.Combine(fundamentalDirectoryInfo.FullName, "fine"); var mapFileProvider = new LocalDiskMapFileProvider(); var factorFileProvider = new LocalDiskFactorFileProvider(mapFileProvider); // open up each daily file to get the values and append to the daily coarse files foreach (var file in Directory.EnumerateFiles(dailyFolder)) { try { var symbol = Path.GetFileNameWithoutExtension(file); if (symbol == null) { Log.Trace("CoarseGenerator.ProcessDailyFolder(): Unable to resolve symbol from file: {0}", file); continue; } if (symbolResolver != null) { symbol = symbolResolver(symbol); } symbol = symbol.ToUpper(); if (exclusions.Contains(symbol)) { Log.Trace("Excluded symbol: {0}", symbol); continue; } // check if symbol has any fine fundamental data var firstFineSymbolDate = DateTime.MaxValue; if (Directory.Exists(fineFundamentalFolder)) { var fineSymbolFolder = Path.Combine(fineFundamentalFolder, symbol.ToLower()); var firstFineSymbolFileName = Directory.Exists(fineSymbolFolder) ? Directory.GetFiles(fineSymbolFolder).OrderBy(x => x).FirstOrDefault() : string.Empty; if (firstFineSymbolFileName.Length > 0) { firstFineSymbolDate = DateTime.ParseExact(Path.GetFileNameWithoutExtension(firstFineSymbolFileName), "yyyyMMdd", CultureInfo.InvariantCulture); } } ZipFile zip; using (var reader = Compression.Unzip(file, out zip)) { var checkedForMapFile = false; symbols++; string line; while ((line = reader.ReadLine()) != null) { //20150625.csv var csv = line.Split(','); var date = DateTime.ParseExact(csv[0], DateFormat.TwelveCharacter, CultureInfo.InvariantCulture); // spin past old data if (date < startDate) { continue; } if (ignoreMapless && !checkedForMapFile) { checkedForMapFile = true; if (!mapFileResolver.ResolveMapFile(symbol, date).Any()) { // if the resolved map file has zero entries then it's a mapless symbol maplessCount++; break; } } var close = decimal.Parse(csv[4]) / scaleFactor; var volume = long.Parse(csv[5]); var dollarVolume = close * volume; var coarseFile = Path.Combine(coarseFolder, date.ToString("yyyyMMdd") + ".csv"); dates.Add(date); // try to resolve a map file and if found, regen the sid var sid = SecurityIdentifier.GenerateEquity(SecurityIdentifier.DefaultDate, symbol, market); var mapFile = mapFileResolver.ResolveMapFile(symbol, date); if (!mapFile.IsNullOrEmpty()) { // if available, us the permtick in the coarse files, because of this, we need // to update the coarse files each time new map files are added/permticks change sid = SecurityIdentifier.GenerateEquity(mapFile.FirstDate, mapFile.OrderBy(x => x.Date).First().MappedSymbol, market); } if (mapFile == null && ignoreMapless) { // if we're ignoring mapless files then we should always be able to resolve this Log.Error(string.Format("CoarseGenerator.ProcessDailyFolder(): Unable to resolve map file for {0} as of {1}", symbol, date.ToShortDateString())); continue; } // check if symbol has fine fundamental data for the current date var hasFundamentalDataForDate = date >= firstFineSymbolDate; // get price and split factors from factor files var leanSymbol = new Symbol(sid, symbol); var factorFile = factorFileProvider.Get(leanSymbol); var factorFileRow = factorFile?.GetScalingFactors(date); var priceFactor = factorFileRow?.PriceFactor ?? 1m; var splitFactor = factorFileRow?.SplitFactor ?? 1m; // sid,symbol,close,volume,dollar volume,has fundamental data,price factor,split factor var coarseFileLine = $"{sid},{symbol},{close},{volume},{Math.Truncate(dollarVolume)},{hasFundamentalDataForDate},{priceFactor},{splitFactor}"; StreamWriter writer; if (!writers.TryGetValue(coarseFile, out writer)) { writer = new StreamWriter(new FileStream(coarseFile, FileMode.Create, FileAccess.Write, FileShare.Write)); writers[coarseFile] = writer; } writer.WriteLine(coarseFileLine); } } if (symbols % 1000 == 0) { Log.Trace("CoarseGenerator.ProcessDailyFolder(): Completed processing {0} symbols. Current elapsed: {1} seconds", symbols, (DateTime.UtcNow - start).TotalSeconds.ToString("0.00")); } } catch (Exception err) { // log the error and continue with the process Log.Error(err.ToString()); } } Log.Trace("CoarseGenerator.ProcessDailyFolder(): Saving {0} coarse files to disk", dates.Count); // dispose all the writers at the end of processing foreach (var writer in writers) { writer.Value.Dispose(); } var stop = DateTime.UtcNow; Log.Trace("CoarseGenerator.ProcessDailyFolder(): Processed {0} symbols into {1} coarse files in {2} seconds", symbols, dates.Count, (stop - start).TotalSeconds.ToString("0.00")); Log.Trace("CoarseGenerator.ProcessDailyFolder(): Excluded {0} mapless symbols.", maplessCount); return(writers.Keys); }