private static FactorFile GetFactorFileToUse( SubscriptionDataConfig config, IFactorFileProvider factorFileProvider) { var factorFileToUse = new FactorFile(config.Symbol.Value, new List <FactorFileRow>()); if (!config.IsCustomData && config.SecurityType == SecurityType.Equity) { try { var factorFile = factorFileProvider.Get(config.Symbol); if (factorFile != null) { factorFileToUse = factorFile; } } catch (Exception err) { Log.Error(err, "CorporateEventEnumeratorFactory.GetFactorFileToUse(): Factors File: " + config.Symbol.ID + ": "); } } return(factorFileToUse); }
/// <summary> /// Advances the enumerator to the next element of the collection. /// </summary> /// <returns> /// True if the enumerator was successfully advanced to the next element; /// False if the enumerator has passed the end of the collection. /// </returns> public bool MoveNext() { var underlyingReturnValue = _rawDataEnumerator.MoveNext(); Current = _rawDataEnumerator.Current; if (underlyingReturnValue && Current != null && _factorFileProvider != null && _config.DataNormalizationMode != DataNormalizationMode.Raw) { if (Current.Time >= _nextTradableDate) { _factorFile = _factorFileProvider.Get(_config.Symbol); _config.PriceScaleFactor = _factorFile.GetPriceScale(Current.Time.Date, _config.DataNormalizationMode, _config.ContractDepthOffset, _config.DataMappingMode); // update factor files every day _nextTradableDate = Current.Time.Date.AddDays(1); if (_liveMode) { // in live trading we add a offset to make sure new factor files are available _nextTradableDate = _nextTradableDate.Add(Time.LiveAuxiliaryDataOffset); } } Current = Current.Normalize(_config.PriceScaleFactor, _config.DataNormalizationMode, _config.SumOfDividends); } return(underlyingReturnValue); }
/// <summary> /// Initializes this instance /// </summary> /// <param name="config">The <see cref="SubscriptionDataConfig"/></param> /// <param name="factorFileProvider">The factor file provider to use</param> /// <param name="mapFileProvider">The <see cref="Data.Auxiliary.MapFile"/> provider to use</param> /// <param name="startTime">Start date for the data request</param> public void Initialize( SubscriptionDataConfig config, IFactorFileProvider factorFileProvider, IMapFileProvider mapFileProvider, DateTime startTime) { _config = config; _mapFile = mapFileProvider.ResolveMapFile(_config); _factorFile = factorFileProvider.Get(_config.Symbol) as CorporateFactorProvider; }
/// <summary> /// Advances the enumerator to the next element of the collection. /// </summary> /// <returns> /// True if the enumerator was successfully advanced to the next element; /// False if the enumerator has passed the end of the collection. /// </returns> public bool MoveNext() { var underlyingReturnValue = _rawDataEnumerator.MoveNext(); Current = _rawDataEnumerator.Current; if (underlyingReturnValue && Current != null && _factorFileProvider != null && _config.DataNormalizationMode != DataNormalizationMode.Raw) { if (Current.Time.Date > _lastTradableDate) { _factorFile = _factorFileProvider.Get(_config.Symbol); _lastTradableDate = Current.Time.Date; _config.PriceScaleFactor = _factorFile.GetPriceScale(_lastTradableDate, _config.DataNormalizationMode, _config.ContractDepthOffset, _config.DataMappingMode); } Current = Current.Normalize(_config.PriceScaleFactor, _config.DataNormalizationMode, _config.SumOfDividends); } return(underlyingReturnValue); }
/// <summary> /// Subscription data reader takes a subscription request, loads the type, accepts the data source and enumerate on the results. /// </summary> /// <param name="config">Subscription configuration object</param> /// <param name="periodStart">Start date for the data request/backtest</param> /// <param name="periodFinish">Finish date for the data request/backtest</param> /// <param name="resultHandler">Result handler used to push error messages and perform sampling on skipped days</param> /// <param name="mapFileResolver">Used for resolving the correct map files</param> /// <param name="factorFileProvider">Used for getting factor files</param> /// <param name="tradeableDates">Defines the dates for which we'll request data, in order, in the security's exchange time zone</param> /// <param name="isLiveMode">True if we're in live mode, false otherwise</param> /// <param name="includeAuxilliaryData">True if we want to emit aux data, false to only emit price data</param> public SubscriptionDataReader(SubscriptionDataConfig config, DateTime periodStart, DateTime periodFinish, IResultHandler resultHandler, MapFileResolver mapFileResolver, IFactorFileProvider factorFileProvider, IEnumerable <DateTime> tradeableDates, bool isLiveMode, bool includeAuxilliaryData = true) { //Save configuration of data-subscription: _config = config; _auxiliaryData = new Queue <BaseData>(); //Save Start and End Dates: _periodStart = periodStart; _periodFinish = periodFinish; //Save access to securities _isLiveMode = isLiveMode; _includeAuxilliaryData = includeAuxilliaryData; //Save the type of data we'll be getting from the source. //Create the dynamic type-activators: var objectActivator = ObjectActivator.GetActivator(config.Type); _resultHandler = resultHandler; _tradeableDates = tradeableDates.GetEnumerator(); if (objectActivator == null) { _resultHandler.ErrorMessage("Custom data type '" + config.Type.Name + "' missing parameterless constructor E.g. public " + config.Type.Name + "() { }"); _endOfStream = true; return; } //Create an instance of the "Type": var userObj = objectActivator.Invoke(new object[] {}); _dataFactory = userObj as BaseData; //If its quandl set the access token in data factory: var quandl = _dataFactory as Quandl; if (quandl != null) { if (!Quandl.IsAuthCodeSet) { Quandl.SetAuthCode(Config.Get("quandl-auth-token")); } } _factorFile = new FactorFile(config.Symbol.Value, new List <FactorFileRow>()); _mapFile = new MapFile(config.Symbol.Value, new List <MapFileRow>()); // load up the map and factor files for equities if (!config.IsCustomData && config.SecurityType == SecurityType.Equity) { try { var mapFile = mapFileResolver.ResolveMapFile(config.Symbol.ID.Symbol, config.Symbol.ID.Date); // only take the resolved map file if it has data, otherwise we'll use the empty one we defined above if (mapFile.Any()) { _mapFile = mapFile; } var factorFile = factorFileProvider.Get(_config.Symbol); _hasScaleFactors = factorFile != null; if (_hasScaleFactors) { _factorFile = factorFile; } } catch (Exception err) { Log.Error(err, "Fetching Price/Map Factors: " + config.Symbol.ID + ": "); } } _subscriptionFactoryEnumerator = ResolveDataEnumerator(true); }
/// <summary> /// Initializes the <see cref="SubscriptionDataReader"/> instance /// </summary> /// <remarks>Should be called after all consumers of <see cref="NewTradableDate"/> event are set, /// since it will produce events.</remarks> public void Initialize() { if (_initialized) { return; } //Save the type of data we'll be getting from the source. try { _dataFactory = _config.GetBaseDataInstance(); } catch (ArgumentException exception) { OnInvalidConfigurationDetected(new InvalidConfigurationDetectedEventArgs(_config.Symbol, exception.Message)); _endOfStream = true; return; } //If its quandl set the access token in data factory: var quandl = _dataFactory as Quandl; if (quandl != null) { if (!Quandl.IsAuthCodeSet) { Quandl.SetAuthCode(Config.Get("quandl-auth-token")); } } // If Tiingo data, set the access token in data factory var tiingo = _dataFactory as TiingoPrice; if (tiingo != null) { if (!Tiingo.IsAuthCodeSet) { Tiingo.SetAuthCode(Config.Get("tiingo-auth-token")); } } // If USEnergyAPI data, set the access token in data factory var energyInformation = _dataFactory as USEnergyAPI; if (energyInformation != null) { if (!USEnergyAPI.IsAuthCodeSet) { USEnergyAPI.SetAuthCode(Config.Get("us-energy-information-auth-token")); } } // If Fred data, set the access token in data factory var fred = _dataFactory as FredApi; if (fred != null) { if (!FredApi.IsAuthCodeSet) { FredApi.SetAuthCode(Config.Get("fred-auth-token")); } } _factorFile = new FactorFile(_config.Symbol.Value, new List <FactorFileRow>()); _mapFile = new MapFile(_config.Symbol.Value, new List <MapFileRow>()); // load up the map files for equities, options, and custom data if it supports it. // Only load up factor files for equities if (_dataFactory.RequiresMapping()) { try { var mapFile = _mapFileResolver.ResolveMapFile(_config.Symbol, _config.Type); // only take the resolved map file if it has data, otherwise we'll use the empty one we defined above if (mapFile.Any()) { _mapFile = mapFile; } if (!_config.IsCustomData && !_config.SecurityType.IsOption()) { var factorFile = _factorFileProvider.Get(_config.Symbol); _hasScaleFactors = factorFile != null; if (_hasScaleFactors) { _factorFile = factorFile; // if factor file has minimum date, update start period if before minimum date if (!_isLiveMode && _factorFile != null && _factorFile.FactorFileMinimumDate.HasValue) { if (_periodStart < _factorFile.FactorFileMinimumDate.Value) { _periodStart = _factorFile.FactorFileMinimumDate.Value; OnNumericalPrecisionLimited( new NumericalPrecisionLimitedEventArgs(_config.Symbol, $"[{_config.Symbol.Value}, {_factorFile.FactorFileMinimumDate.Value.ToShortDateString()}]")); } } } if (_periodStart < mapFile.FirstDate) { _periodStart = mapFile.FirstDate; OnStartDateLimited( new StartDateLimitedEventArgs(_config.Symbol, $"[{_config.Symbol.Value}," + $" {mapFile.FirstDate.ToString("yyyy-MM-dd", CultureInfo.InvariantCulture)}]")); } } } catch (Exception err) { Log.Error(err, "Fetching Price/Map Factors: " + _config.Symbol.ID + ": "); } } _delistingDate = _config.Symbol.GetDelistingDate(_mapFile); // adding a day so we stop at EOD _delistingDate = _delistingDate.AddDays(1); UpdateDataEnumerator(true); _initialized = true; }
/// <summary> /// Iterates each daily file in the specified <paramref name="dailyFolder"/> and adds a line for each /// day to the approriate coarse file /// </summary> /// <param name="dailyFolder">The folder with daily data</param> /// <param name="coarseFolder">The coarse output folder</param> /// <param name="mapFileResolver"></param> /// <param name="exclusions">The symbols to be excluded from processing</param> /// <param name="ignoreMapless">Ignore the symbols without a map file.</param> /// <param name="symbolResolver">Function used to provide symbol resolution. Default resolution uses the zip file name to resolve /// the symbol, specify null for this behavior.</param> /// <returns>A collection of the generated coarse files</returns> public static ICollection <string> ProcessDailyFolder(string dailyFolder, string coarseFolder, MapFileResolver mapFileResolver, IFactorFileProvider factorFileProvider, HashSet <string> exclusions, bool ignoreMapless, Func <string, string> symbolResolver = null) { const decimal scaleFactor = 10000m; Log.Trace("Processing: {0}", dailyFolder); var start = DateTime.UtcNow; // load map files into memory var symbols = 0; var maplessCount = 0; var dates = new HashSet <DateTime>(); // instead of opening/closing these constantly, open them once and dispose at the end (~3x speed improvement) var writers = new Dictionary <string, StreamWriter>(); var dailyFolderDirectoryInfo = new DirectoryInfo(dailyFolder).Parent; if (dailyFolderDirectoryInfo == null) { throw new Exception("Unable to resolve market for daily folder: " + dailyFolder); } var market = dailyFolderDirectoryInfo.Name.ToLower(); var fundamentalDirectoryInfo = new DirectoryInfo(coarseFolder).Parent; if (fundamentalDirectoryInfo == null) { throw new Exception("Unable to resolve fundamental path for coarse folder: " + coarseFolder); } var fineFundamentalFolder = Path.Combine(Globals.DataFolder, SecurityType.Equity.ToLower(), Market.USA.ToLower(), "fundamental", "fine"); var mapFileProvider = new LocalDiskMapFileProvider(); // open up each daily file to get the values and append to the daily coarse files foreach (var file in Directory.EnumerateFiles(dailyFolder, "*.zip")) { try { var symbol = Path.GetFileNameWithoutExtension(file); if (symbol == null) { Log.Trace("CoarseGenerator.ProcessDailyFolder(): Unable to resolve symbol from file: {0}", file); continue; } if (symbolResolver != null) { symbol = symbolResolver(symbol); } symbol = symbol.ToUpper(); if (exclusions.Contains(symbol)) { Log.Trace("Excluded symbol: {0}", symbol); continue; } // check if symbol has any fine fundamental data var firstFineSymbolDate = DateTime.MaxValue; if (Directory.Exists(fineFundamentalFolder)) { var fineSymbolFolder = Path.Combine(fineFundamentalFolder, symbol.ToLower()); var firstFineSymbolFileName = Directory.Exists(fineSymbolFolder) ? Directory.GetFiles(fineSymbolFolder).OrderBy(x => x).FirstOrDefault() : string.Empty; if (firstFineSymbolFileName.Length > 0) { firstFineSymbolDate = DateTime.ParseExact(Path.GetFileNameWithoutExtension(firstFineSymbolFileName), "yyyyMMdd", CultureInfo.InvariantCulture); } } ZipFile zip; using (var reader = Compression.Unzip(file, out zip)) { var checkedForMapFile = false; symbols++; string line; while ((line = reader.ReadLine()) != null) { //20150625.csv var csv = line.Split(','); var date = DateTime.ParseExact(csv[0], DateFormat.TwelveCharacter, CultureInfo.InvariantCulture); if (ignoreMapless && !checkedForMapFile) { checkedForMapFile = true; if (!mapFileResolver.ResolveMapFile(symbol, date).Any()) { // if the resolved map file has zero entries then it's a mapless symbol maplessCount++; break; } } var close = decimal.Parse(csv[4]) / scaleFactor; var volume = long.Parse(csv[5]); var dollarVolume = close * volume; var coarseFile = Path.Combine(coarseFolder, date.ToString("yyyyMMdd") + ".csv"); dates.Add(date); // try to resolve a map file and if found, regen the sid var sid = SecurityIdentifier.GenerateEquity(SecurityIdentifier.DefaultDate, symbol, market); var mapFile = mapFileResolver.ResolveMapFile(symbol, date); if (!mapFile.IsNullOrEmpty()) { // if available, us the permtick in the coarse files, because of this, we need // to update the coarse files each time new map files are added/permticks change sid = SecurityIdentifier.GenerateEquity(mapFile.FirstDate, mapFile.OrderBy(x => x.Date).First().MappedSymbol, market); } if (mapFile == null && ignoreMapless) { // if we're ignoring mapless files then we should always be able to resolve this Log.Error(string.Format("CoarseGenerator.ProcessDailyFolder(): Unable to resolve map file for {0} as of {1}", symbol, date.ToShortDateString())); continue; } // check if symbol has fine fundamental data for the current date var hasFundamentalDataForDate = date >= firstFineSymbolDate; // get price and split factors from factor files var leanSymbol = new Symbol(sid, symbol); var factorFile = factorFileProvider.Get(leanSymbol); var factorFileRow = factorFile?.GetScalingFactors(date); var priceFactor = factorFileRow?.PriceFactor ?? 1m; var splitFactor = factorFileRow?.SplitFactor ?? 1m; // sid,symbol,close,volume,dollar volume,has fundamental data,price factor,split factor var coarseFileLine = $"{sid},{symbol},{close},{volume},{Math.Truncate(dollarVolume)},{hasFundamentalDataForDate},{priceFactor},{splitFactor}"; StreamWriter writer; if (!writers.TryGetValue(coarseFile, out writer)) { writer = new StreamWriter(new FileStream(coarseFile, FileMode.Create, FileAccess.Write, FileShare.Write)); writers[coarseFile] = writer; } writer.WriteLine(coarseFileLine); } } if (symbols % 1000 == 0) { Log.Trace("CoarseGenerator.ProcessDailyFolder(): Completed processing {0} symbols. Current elapsed: {1} seconds", symbols, (DateTime.UtcNow - start).TotalSeconds.ToString("0.00")); } } catch (Exception err) { // log the error and continue with the process Log.Error(err.ToString()); } } Log.Trace("CoarseGenerator.ProcessDailyFolder(): Saving {0} coarse files to disk", dates.Count); // dispose all the writers at the end of processing foreach (var writer in writers) { writer.Value.Dispose(); } var stop = DateTime.UtcNow; Log.Trace("CoarseGenerator.ProcessDailyFolder(): Processed {0} symbols into {1} coarse files in {2} seconds", symbols, dates.Count, (stop - start).TotalSeconds.ToString("0.00")); Log.Trace("CoarseGenerator.ProcessDailyFolder(): Excluded {0} mapless symbols.", maplessCount); return(writers.Keys); }
/// <summary> /// Runs this instance. /// </summary> /// <returns></returns> public bool Run() { var startTime = DateTime.UtcNow; var success = true; Log.Trace($"CoarseUniverseGeneratorProgram.ProcessDailyFolder(): Processing: {_dailyDataFolder.FullName}"); var symbolsProcessed = 0; var filesRead = 0; var dailyFilesNotFound = 0; var coarseFilesGenerated = 0; var mapFileResolver = _mapFileProvider.Get(_market); var blackListedTickers = new HashSet <string>(); if (_blackListedTickersFile.Exists) { blackListedTickers = File.ReadAllLines(_blackListedTickersFile.FullName).ToHashSet(); } var marketFolder = _dailyDataFolder.Parent; var fineFundamentalFolder = new DirectoryInfo(Path.Combine(marketFolder.FullName, "fundamental", "fine")); if (!fineFundamentalFolder.Exists) { Log.Error($"CoarseUniverseGenerator.Run(): FAIL, Fine Fundamental folder not found at {fineFundamentalFolder}! "); return(false); } var securityIdentifierContexts = PopulateSidContex(mapFileResolver, blackListedTickers); var dailyPricesByTicker = new ConcurrentDictionary <string, List <TradeBar> >(); var outputCoarseContent = new ConcurrentDictionary <DateTime, List <string> >(); var parallelOptions = new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount / 2 }; try { Parallel.ForEach(securityIdentifierContexts, parallelOptions, sidContext => { var symbol = new Symbol(sidContext.SID, sidContext.LastTicker); var symbolCount = Interlocked.Increment(ref symbolsProcessed); Log.Debug($"CoarseUniverseGeneratorProgram.Run(): Processing {symbol}"); var factorFile = _factorFileProvider.Get(symbol); // Populate dailyPricesByTicker with all daily data by ticker for all tickers of this security. foreach (var ticker in sidContext.Tickers) { var dailyFile = new FileInfo(Path.Combine(_dailyDataFolder.FullName, $"{ticker}.zip")); if (!dailyFile.Exists) { Log.Error($"CoarseUniverseGeneratorProgram.Run(): {dailyFile} not found!"); Interlocked.Increment(ref dailyFilesNotFound); continue; } if (!dailyPricesByTicker.ContainsKey(ticker)) { dailyPricesByTicker.AddOrUpdate(ticker, ParseDailyFile(dailyFile)); Interlocked.Increment(ref filesRead); } } // Look for daily data for each ticker of the actual security for (int mapFileRowIndex = sidContext.MapFileRows.Length - 1; mapFileRowIndex >= 1; mapFileRowIndex--) { var ticker = sidContext.MapFileRows[mapFileRowIndex].Item2.ToLowerInvariant(); var endDate = sidContext.MapFileRows[mapFileRowIndex].Item1; var startDate = sidContext.MapFileRows[mapFileRowIndex - 1].Item1; List <TradeBar> tickerDailyData; if (!dailyPricesByTicker.TryGetValue(ticker, out tickerDailyData)) { Log.Error($"CoarseUniverseGeneratorProgram.Run(): Daily data for ticker {ticker.ToUpperInvariant()} not found!"); continue; } var tickerFineFundamentalFolder = Path.Combine(fineFundamentalFolder.FullName, ticker); var fineAvailableDates = Enumerable.Empty <DateTime>(); if (Directory.Exists(tickerFineFundamentalFolder)) { fineAvailableDates = Directory.GetFiles(tickerFineFundamentalFolder, "*.zip") .Select(f => DateTime.ParseExact(Path.GetFileNameWithoutExtension(f), DateFormat.EightCharacter, CultureInfo.InvariantCulture)) .ToList(); } // Get daily data only for the time the ticker was foreach (var tradeBar in tickerDailyData.Where(tb => tb.Time >= startDate && tb.Time <= endDate)) { var coarseRow = GenerateFactorFileRow(ticker, sidContext, factorFile, tradeBar, fineAvailableDates, fineFundamentalFolder); outputCoarseContent.AddOrUpdate(tradeBar.Time, new List <string> { coarseRow }, (time, list) => { lock (list) { list.Add(coarseRow); return(list); } }); } } if (symbolCount % 1000 == 0) { var elapsed = DateTime.UtcNow - startTime; Log.Trace($"CoarseUniverseGeneratorProgram.Run(): Processed {symbolCount} in {elapsed:g} at {symbolCount / elapsed.TotalMinutes:F2} symbols/minute "); } }); _destinationFolder.Create(); var startWriting = DateTime.UtcNow; Parallel.ForEach(outputCoarseContent, coarseByDate => { var filename = $"{coarseByDate.Key.ToString(DateFormat.EightCharacter, CultureInfo.InvariantCulture)}.csv"; var filePath = Path.Combine(_destinationFolder.FullName, filename); Log.Debug($"CoarseUniverseGeneratorProgram.Run(): Saving {filename} with {coarseByDate.Value.Count} entries."); File.WriteAllLines(filePath, coarseByDate.Value.OrderBy(cr => cr)); var filesCount = Interlocked.Increment(ref coarseFilesGenerated); if (filesCount % 1000 == 0) { var elapsed = DateTime.UtcNow - startWriting; Log.Trace($"CoarseUniverseGeneratorProgram.Run(): Processed {filesCount} in {elapsed:g} at {filesCount / elapsed.TotalSeconds:F2} files/second "); } }); Log.Trace($"\n\nTotal of {coarseFilesGenerated} coarse files generated in {DateTime.UtcNow - startTime:g}:\n" + $"\t => {filesRead} daily data files read.\n"); } catch (Exception e) { Log.Error(e, $"CoarseUniverseGeneratorProgram.Run(): FAILED!"); success = false; } return(success); }
/// <summary> /// Iterates each daily file in the specified <paramref name="dailyFolder"/> and adds a line for each /// day to the appropriate coarse file /// </summary> /// <param name="dailyFolder">The folder with daily data.</param> /// <param name="coarseFolder">The coarse output folder.</param> /// <param name="mapFileResolver">The map file resolver.</param> /// <param name="factorFileProvider">The factor file provider.</param> /// <param name="exclusions">The symbols to be excluded from processing.</param> /// <param name="ignoreMapless">Ignore the symbols without a map file.</param> /// <param name="symbolResolver">Function used to provide symbol resolution. Default resolution uses the zip file name to resolve /// the symbol, specify null for this behavior.</param> /// <returns>Collection with the names of the newly generated coarse files.</returns> /// <exception cref="Exception"> /// Unable to resolve market for daily folder: " + dailyFolder /// or /// Unable to resolve fundamental path for coarse folder: " + coarseFolder /// </exception> public static ICollection <string> ProcessDailyFolder(string dailyFolder, string coarseFolder, MapFileResolver mapFileResolver, IFactorFileProvider factorFileProvider, HashSet <string> exclusions, bool ignoreMapless, Func <string, string> symbolResolver = null) { const decimal scaleFactor = 10000m; Log.Trace("Processing: {0}", dailyFolder); var start = DateTime.UtcNow; // load map files into memory var symbols = 0; var maplessCount = 0; var dates = new HashSet <DateTime>(); // instead of opening/closing these constantly, open them once and dispose at the end (~3x speed improvement) var writers = new Dictionary <string, StreamWriter>(); var marketDirectoryInfo = new DirectoryInfo(dailyFolder).Parent; if (marketDirectoryInfo == null) { throw new Exception($"Unable to resolve market for daily folder: {dailyFolder}"); } var market = marketDirectoryInfo.Name.ToLowerInvariant(); var fundamentalDirectoryInfo = new DirectoryInfo(coarseFolder).Parent; if (fundamentalDirectoryInfo == null) { throw new Exception($"Unable to resolve fundamental path for coarse folder: {coarseFolder}"); } var fineFundamentalFolder = Path.Combine(marketDirectoryInfo.FullName, "fundamental", "fine"); // open up each daily file to get the values and append to the daily coarse files foreach (var file in Directory.EnumerateFiles(dailyFolder, "*.zip")) { try { var ticker = Path.GetFileNameWithoutExtension(file); var fineAvailableDates = Enumerable.Empty <DateTime>(); var tickerFineFundamentalFolder = Path.Combine(fineFundamentalFolder, ticker); if (Directory.Exists(tickerFineFundamentalFolder)) { fineAvailableDates = Directory.GetFiles(tickerFineFundamentalFolder, "*.zip") .Select(f => DateTime.ParseExact(Path.GetFileNameWithoutExtension(f), DateFormat.EightCharacter, CultureInfo.InvariantCulture)) .ToList(); } if (ticker == null) { Log.Trace("CoarseGenerator.ProcessDailyFolder(): Unable to resolve symbol from file: {0}", file); continue; } if (symbolResolver != null) { ticker = symbolResolver(ticker); } ticker = ticker.ToUpperInvariant(); if (exclusions != null && exclusions.Contains(ticker)) { Log.Trace("Excluded symbol: {0}", ticker); continue; } ZipFile zip; using (var reader = Compression.Unzip(file, out zip)) { var checkedForMapFile = false; symbols++; string line; while ((line = reader.ReadLine()) != null) { //20150625.csv var csv = line.Split(','); var date = DateTime.ParseExact(csv[0], DateFormat.TwelveCharacter, CultureInfo.InvariantCulture); if (ignoreMapless && !checkedForMapFile) { checkedForMapFile = true; if (!mapFileResolver.ResolveMapFile(ticker, date).Any()) { // if the resolved map file has zero entries then it's a mapless symbol maplessCount++; break; } } var close = Parse.Decimal(csv[4]) / scaleFactor; var volume = Parse.Long(csv[5]); var dollarVolume = close * volume; var coarseFile = Path.Combine(coarseFolder, date.ToStringInvariant("yyyyMMdd") + ".csv"); dates.Add(date); // try to resolve a map file and if found, regen the sid var sid = SecurityIdentifier.GenerateEquity(SecurityIdentifier.DefaultDate, ticker, market); var mapFile = mapFileResolver.ResolveMapFile(ticker, date); if (!mapFile.IsNullOrEmpty()) { // if available, us the permtick in the coarse files, because of this, we need // to update the coarse files each time new map files are added/permticks change sid = SecurityIdentifier.GenerateEquity(mapFile.FirstDate, mapFile.OrderBy(x => x.Date).First().MappedSymbol, market); } if (mapFile == null && ignoreMapless) { // if we're ignoring mapless files then we should always be able to resolve this Log.Error($"CoarseGenerator.ProcessDailyFolder(): Unable to resolve map file for {ticker} as of {date.ToStringInvariant("d")}"); continue; } // get price and split factors from factor files var symbol = new Symbol(sid, ticker); var factorFile = factorFileProvider.Get(symbol); var factorFileRow = factorFile?.GetScalingFactors(date); var priceFactor = factorFileRow?.PriceFactor ?? 1m; var splitFactor = factorFileRow?.SplitFactor ?? 1m; // Check if security has fine file within a trailing month for a date-ticker set. // There are tricky cases where a folder named by a ticker can have data for multiple securities. // e.g GOOG -> GOOGL (GOOG T1AZ164W5VTX) / GOOCV -> GOOG (GOOCV VP83T1ZUHROL) case. // The fine data in the 'fundamental/fine/goog' folder will be for 'GOOG T1AZ164W5VTX' up to the 2014-04-02 and for 'GOOCV VP83T1ZUHROL' afterward. // Therefore, date before checking if the security has fundamental data for a date, we need to filter the fine files the map's first date. var firstDate = mapFile?.FirstDate ?? DateTime.MinValue; var hasFundamentalDataForDate = fineAvailableDates.Where(d => d >= firstDate).Any(d => date.AddMonths(-1) <= d && d <= date); // The following section handles mergers and acquisitions cases. // e.g. YHOO -> AABA (YHOO R735QTJ8XC9X) // The dates right after the acquisition, valid fine fundamental data for AABA are still under the former ticker folder. // Therefore if no fine fundamental data is found in the 'fundamental/fine/aaba' folder, it searches into the 'yhoo' folder. if (mapFile != null && mapFile.Count() > 2 && !hasFundamentalDataForDate) { var previousTicker = mapFile.LastOrDefault(m => m.Date < date)?.MappedSymbol; if (previousTicker != null) { var previousTickerFineFundamentalFolder = Path.Combine(fineFundamentalFolder, previousTicker); if (Directory.Exists(previousTickerFineFundamentalFolder)) { var previousTickerFineAvailableDates = Directory.GetFiles(previousTickerFineFundamentalFolder, "*.zip") .Select(f => DateTime.ParseExact(Path.GetFileNameWithoutExtension(f), DateFormat.EightCharacter, CultureInfo.InvariantCulture)) .ToList(); hasFundamentalDataForDate = previousTickerFineAvailableDates.Where(d => d >= firstDate).Any(d => date.AddMonths(-1) <= d && d <= date); } } } // sid,symbol,close,volume,dollar volume,has fundamental data,price factor,split factor var coarseFileLine = string.Format( CultureInfo.InvariantCulture, "{0},{1},{2},{3},{4},{5},{6},{7}", sid, ticker, close, volume, Math.Truncate(dollarVolume), hasFundamentalDataForDate, priceFactor, splitFactor); StreamWriter writer; if (!writers.TryGetValue(coarseFile, out writer)) { writer = new StreamWriter(new FileStream(coarseFile, FileMode.Create, FileAccess.Write, FileShare.Write)); writers[coarseFile] = writer; } writer.WriteLine(coarseFileLine); } } if (symbols % 1000 == 0) { Log.Trace($"CoarseGenerator.ProcessDailyFolder(): Completed processing {symbols} symbols. Current elapsed: {(DateTime.UtcNow - start).TotalSeconds.ToStringInvariant("0.00")} seconds"); } } catch (Exception err) { // log the error and continue with the process Log.Error(err.ToString()); } } Log.Trace("CoarseGenerator.ProcessDailyFolder(): Saving {0} coarse files to disk", dates.Count); // dispose all the writers at the end of processing foreach (var writer in writers) { writer.Value.Dispose(); } var stop = DateTime.UtcNow; Log.Trace($"CoarseGenerator.ProcessDailyFolder(): Processed {symbols} symbols into {dates.Count} coarse files in {(stop - start).TotalSeconds.ToStringInvariant("0.00")} seconds"); Log.Trace($"CoarseGenerator.ProcessDailyFolder(): Excluded {maplessCount} mapless symbols."); return(writers.Keys); }
/// <summary> /// Initializes the <see cref="SubscriptionDataReader"/> instance /// </summary> /// <remarks>Should be called after all consumers of <see cref="NewTradableDate"/> event are set, /// since it will produce events.</remarks> public void Initialize() { if (_initialized) { return; } //Save the type of data we'll be getting from the source. try { _dataFactory = _config.Type.GetBaseDataInstance(); } catch (ArgumentException exception) { OnInvalidConfigurationDetected(new InvalidConfigurationDetectedEventArgs(exception.Message)); _endOfStream = true; return; } //If its quandl set the access token in data factory: var quandl = _dataFactory as Quandl; if (quandl != null) { if (!Quandl.IsAuthCodeSet) { Quandl.SetAuthCode(Config.Get("quandl-auth-token")); } } // If Tiingo data, set the access token in data factory var tiingo = _dataFactory as TiingoDailyData; if (tiingo != null) { if (!Tiingo.IsAuthCodeSet) { Tiingo.SetAuthCode(Config.Get("tiingo-auth-token")); } } // If USEnergyInformation data, set the access token in data factory var energyInformation = _dataFactory as USEnergyInformation; if (energyInformation != null) { if (!USEnergyInformation.IsAuthCodeSet) { USEnergyInformation.SetAuthCode(Config.Get("us-energy-information-auth-token")); } } _factorFile = new FactorFile(_config.Symbol.Value, new List <FactorFileRow>()); _mapFile = new MapFile(_config.Symbol.Value, new List <MapFileRow>()); // load up the map files for equities, options, and custom data if it supports it. // Only load up factor files for equities if (_config.TickerShouldBeMapped()) { try { var mapFile = _mapFileResolver.ResolveMapFile(_config.Symbol, _config.Type); // only take the resolved map file if it has data, otherwise we'll use the empty one we defined above if (mapFile.Any()) { _mapFile = mapFile; } if (!_config.IsCustomData && _config.SecurityType != SecurityType.Option) { var factorFile = _factorFileProvider.Get(_config.Symbol); _hasScaleFactors = factorFile != null; if (_hasScaleFactors) { _factorFile = factorFile; // if factor file has minimum date, update start period if before minimum date if (!_isLiveMode && _factorFile != null && _factorFile.FactorFileMinimumDate.HasValue) { if (_periodStart < _factorFile.FactorFileMinimumDate.Value) { _periodStart = _factorFile.FactorFileMinimumDate.Value; OnNumericalPrecisionLimited( new NumericalPrecisionLimitedEventArgs( $"Data for symbol {_config.Symbol.Value} has been limited due to numerical precision issues in the factor file. " + $"The starting date has been set to {_factorFile.FactorFileMinimumDate.Value.ToShortDateString()}.")); } } } } } catch (Exception err) { Log.Error(err, "Fetching Price/Map Factors: " + _config.Symbol.ID + ": "); } } // Estimate delisting date. switch (_config.Symbol.ID.SecurityType) { case SecurityType.Future: _delistingDate = _config.Symbol.ID.Date; break; case SecurityType.Option: _delistingDate = OptionSymbol.GetLastDayOfTrading(_config.Symbol); break; default: _delistingDate = _mapFile.DelistingDate; break; } // adding a day so we stop at EOD _delistingDate = _delistingDate.AddDays(1); _subscriptionFactoryEnumerator = ResolveDataEnumerator(true); _initialized = true; }
/// <summary> /// Subscription data reader takes a subscription request, loads the type, accepts the data source and enumerate on the results. /// </summary> /// <param name="config">Subscription configuration object</param> /// <param name="periodStart">Start date for the data request/backtest</param> /// <param name="periodFinish">Finish date for the data request/backtest</param> /// <param name="resultHandler">Result handler used to push error messages and perform sampling on skipped days</param> /// <param name="mapFileResolver">Used for resolving the correct map files</param> /// <param name="factorFileProvider">Used for getting factor files</param> /// <param name="dataProvider">Used for getting files not present on disk</param> /// <param name="dataCacheProvider">Used for caching files</param> /// <param name="tradeableDates">Defines the dates for which we'll request data, in order, in the security's exchange time zone</param> /// <param name="isLiveMode">True if we're in live mode, false otherwise</param> /// <param name="includeAuxilliaryData">True if we want to emit aux data, false to only emit price data</param> public SubscriptionDataReader(SubscriptionDataConfig config, DateTime periodStart, DateTime periodFinish, IResultHandler resultHandler, MapFileResolver mapFileResolver, IFactorFileProvider factorFileProvider, IDataProvider dataProvider, IEnumerable <DateTime> tradeableDates, bool isLiveMode, IDataCacheProvider dataCacheProvider, bool includeAuxilliaryData = true) { //Save configuration of data-subscription: _config = config; _auxiliaryData = new Queue <BaseData>(); //Save Start and End Dates: _periodStart = periodStart; _periodFinish = periodFinish; _dataProvider = dataProvider; _dataCacheProvider = dataCacheProvider; //Save access to securities _isLiveMode = isLiveMode; _includeAuxilliaryData = includeAuxilliaryData; //Save the type of data we'll be getting from the source. //Create the dynamic type-activators: var objectActivator = ObjectActivator.GetActivator(config.Type); _resultHandler = resultHandler; _tradeableDates = tradeableDates.GetEnumerator(); if (objectActivator == null) { _resultHandler.ErrorMessage("Custom data type '" + config.Type.Name + "' missing parameterless constructor E.g. public " + config.Type.Name + "() { }"); _endOfStream = true; return; } //Create an instance of the "Type": var userObj = objectActivator.Invoke(new object[] { config.Type }); _dataFactory = userObj as BaseData; //If its quandl set the access token in data factory: var quandl = _dataFactory as Quandl; if (quandl != null) { if (!Quandl.IsAuthCodeSet) { Quandl.SetAuthCode(Config.Get("quandl-auth-token")); } } // If Tiingo data, set the access token in data factory var tiingo = _dataFactory as TiingoDailyData; if (tiingo != null) { if (!Tiingo.IsAuthCodeSet) { Tiingo.SetAuthCode(Config.Get("tiingo-auth-token")); } } _factorFile = new FactorFile(config.Symbol.Value, new List <FactorFileRow>()); _mapFile = new MapFile(config.Symbol.Value, new List <MapFileRow>()); // load up the map and factor files for equities if (!config.IsCustomData && config.SecurityType == SecurityType.Equity) { try { var mapFile = mapFileResolver.ResolveMapFile(config.Symbol.ID.Symbol, config.Symbol.ID.Date); // only take the resolved map file if it has data, otherwise we'll use the empty one we defined above if (mapFile.Any()) { _mapFile = mapFile; } var factorFile = factorFileProvider.Get(_config.Symbol); _hasScaleFactors = factorFile != null; if (_hasScaleFactors) { _factorFile = factorFile; // if factor file has minimum date, update start period if before minimum date if (!_isLiveMode && _factorFile != null && _factorFile.FactorFileMinimumDate.HasValue) { if (_periodStart < _factorFile.FactorFileMinimumDate.Value) { _periodStart = _factorFile.FactorFileMinimumDate.Value; _resultHandler.DebugMessage( string.Format("Data for symbol {0} has been limited due to numerical precision issues in the factor file. The starting date has been set to {1}.", config.Symbol.Value, _factorFile.FactorFileMinimumDate.Value.ToShortDateString())); } } } } catch (Exception err) { Log.Error(err, "Fetching Price/Map Factors: " + config.Symbol.ID + ": "); } } // load up the map and factor files for underlying of equity option if (!config.IsCustomData && config.SecurityType == SecurityType.Option) { try { var mapFile = mapFileResolver.ResolveMapFile(config.Symbol.Underlying.ID.Symbol, config.Symbol.Underlying.ID.Date); // only take the resolved map file if it has data, otherwise we'll use the empty one we defined above if (mapFile.Any()) { _mapFile = mapFile; } } catch (Exception err) { Log.Error(err, "Map Factors: " + config.Symbol.ID + ": "); } } // Estimate delisting date. switch (_config.Symbol.ID.SecurityType) { case SecurityType.Future: _delistingDate = _config.Symbol.ID.Date; break; case SecurityType.Option: _delistingDate = OptionSymbol.GetLastDayOfTrading(_config.Symbol); break; default: _delistingDate = _mapFile.DelistingDate; break; } _subscriptionFactoryEnumerator = ResolveDataEnumerator(true); }
/// <summary> /// Initializes the <see cref="SubscriptionDataReader"/> instance /// </summary> public void Initialize() { if (_initialized) { return; } //Save the type of data we'll be getting from the source. //Create the dynamic type-activators: var objectActivator = ObjectActivator.GetActivator(_config.Type); if (objectActivator == null) { OnInvalidConfigurationDetected( new InvalidConfigurationDetectedEventArgs( $"Custom data type \'{_config.Type.Name}\' missing parameterless constructor " + $"E.g. public {_config.Type.Name}() {{ }}")); _endOfStream = true; return; } //Create an instance of the "Type": var userObj = objectActivator.Invoke(new object[] { _config.Type }); _dataFactory = userObj as BaseData; //If its quandl set the access token in data factory: var quandl = _dataFactory as Quandl; if (quandl != null) { if (!Quandl.IsAuthCodeSet) { Quandl.SetAuthCode(Config.Get("quandl-auth-token")); } } // If Tiingo data, set the access token in data factory var tiingo = _dataFactory as TiingoDailyData; if (tiingo != null) { if (!Tiingo.IsAuthCodeSet) { Tiingo.SetAuthCode(Config.Get("tiingo-auth-token")); } } _factorFile = new FactorFile(_config.Symbol.Value, new List <FactorFileRow>()); _mapFile = new MapFile(_config.Symbol.Value, new List <MapFileRow>()); // load up the map and factor files for equities if (!_config.IsCustomData && _config.SecurityType == SecurityType.Equity) { try { var mapFile = _mapFileResolver.ResolveMapFile(_config.Symbol.ID.Symbol, _config.Symbol.ID.Date); // only take the resolved map file if it has data, otherwise we'll use the empty one we defined above if (mapFile.Any()) { _mapFile = mapFile; } var factorFile = _factorFileProvider.Get(_config.Symbol); _hasScaleFactors = factorFile != null; if (_hasScaleFactors) { _factorFile = factorFile; // if factor file has minimum date, update start period if before minimum date if (!_isLiveMode && _factorFile != null && _factorFile.FactorFileMinimumDate.HasValue) { if (_periodStart < _factorFile.FactorFileMinimumDate.Value) { _periodStart = _factorFile.FactorFileMinimumDate.Value; OnNumericalPrecisionLimited( new NumericalPrecisionLimitedEventArgs( $"Data for symbol {_config.Symbol.Value} has been limited due to numerical precision issues in the factor file. " + $"The starting date has been set to {_factorFile.FactorFileMinimumDate.Value.ToShortDateString()}.")); } } } } catch (Exception err) { Log.Error(err, "Fetching Price/Map Factors: " + _config.Symbol.ID + ": "); } } // load up the map and factor files for underlying of equity option if (!_config.IsCustomData && _config.SecurityType == SecurityType.Option) { try { var mapFile = _mapFileResolver.ResolveMapFile(_config.Symbol.Underlying.ID.Symbol, _config.Symbol.Underlying.ID.Date); // only take the resolved map file if it has data, otherwise we'll use the empty one we defined above if (mapFile.Any()) { _mapFile = mapFile; } } catch (Exception err) { Log.Error(err, "Map Factors: " + _config.Symbol.ID + ": "); } } // Estimate delisting date. switch (_config.Symbol.ID.SecurityType) { case SecurityType.Future: _delistingDate = _config.Symbol.ID.Date; break; case SecurityType.Option: _delistingDate = OptionSymbol.GetLastDayOfTrading(_config.Symbol); break; default: _delistingDate = _mapFile.DelistingDate; break; } _subscriptionFactoryEnumerator = ResolveDataEnumerator(true); _initialized = true; }
public void RetrievesFromDisk() { var factorFile = FactorFileProvider.Get(Symbols.SPY); Assert.IsNotNull(factorFile); }
/// <summary> /// Setups a new <see cref="Subscription"/> which will consume a blocking <see cref="EnqueueableEnumerator{T}"/> /// that will be feed by a worker task /// </summary> /// <param name="request">The subscription data request</param> /// <param name="enumerator">The data enumerator stack</param> /// <param name="factorFileProvider">The factor file provider</param> /// <param name="enablePriceScale">Enables price factoring</param> /// <returns>A new subscription instance ready to consume</returns> public static Subscription CreateAndScheduleWorker( SubscriptionRequest request, IEnumerator <BaseData> enumerator, IFactorFileProvider factorFileProvider, bool enablePriceScale) { var exchangeHours = request.Security.Exchange.Hours; var enqueueable = new EnqueueableEnumerator <SubscriptionData>(true); var timeZoneOffsetProvider = new TimeZoneOffsetProvider(request.Configuration.ExchangeTimeZone, request.StartTimeUtc, request.EndTimeUtc); var subscription = new Subscription(request, enqueueable, timeZoneOffsetProvider); var config = subscription.Configuration; enablePriceScale = enablePriceScale && config.PricesShouldBeScaled(); var lastTradableDate = DateTime.MinValue; Func <int, bool> produce = (workBatchSize) => { try { var count = 0; while (enumerator.MoveNext()) { // subscription has been removed, no need to continue enumerating if (enqueueable.HasFinished) { enumerator.DisposeSafely(); return(false); } var data = enumerator.Current; // Use our config filter to see if we should emit this // This currently catches Auxiliary data that we don't want to emit if (data != null && !config.ShouldEmitData(data, request.IsUniverseSubscription)) { continue; } // In the event we have "Raw" configuration, we will force our subscription data // to precalculate adjusted data. The data will still be emitted as raw, but // if the config is changed at any point it can emit adjusted data as well // See SubscriptionData.Create() and PrecalculatedSubscriptionData for more var requestMode = config.DataNormalizationMode; if (config.SecurityType == SecurityType.Equity) { requestMode = requestMode != DataNormalizationMode.Raw ? requestMode : DataNormalizationMode.Adjusted; } // We update our price scale factor when the date changes for non fill forward bars or if we haven't initialized yet. // We don't take into account auxiliary data because we don't scale it and because the underlying price data could be fill forwarded if (enablePriceScale && data?.Time.Date > lastTradableDate && data.DataType != MarketDataType.Auxiliary && (!data.IsFillForward || lastTradableDate == DateTime.MinValue)) { var factorFile = factorFileProvider.Get(request.Configuration.Symbol); lastTradableDate = data.Time.Date; request.Configuration.PriceScaleFactor = factorFile.GetPriceScale(data.Time.Date, requestMode, config.ContractDepthOffset, config.DataMappingMode); } SubscriptionData subscriptionData = SubscriptionData.Create( config, exchangeHours, subscription.OffsetProvider, data, requestMode, enablePriceScale ? request.Configuration.PriceScaleFactor : null); // drop the data into the back of the enqueueable enqueueable.Enqueue(subscriptionData); count++; // stop executing if added more data than the work batch size, we don't want to fill the ram if (count > workBatchSize) { return(true); } } } catch (Exception exception) { Log.Error(exception, $"Subscription worker task exception {request.Configuration}."); } // we made it here because MoveNext returned false or we exploded, stop the enqueueable enqueueable.Stop(); // we have to dispose of the enumerator enumerator.DisposeSafely(); return(false); }; WeightedWorkScheduler.Instance.QueueWork(config.Symbol, produce, // if the subscription finished we return 0, so the work is prioritized and gets removed () => { if (enqueueable.HasFinished) { return(0); } var count = enqueueable.Count; return(count > WeightedWorkScheduler.MaxWorkWeight ? WeightedWorkScheduler.MaxWorkWeight : count); } ); return(subscription); }