public void ResolvesStraightMapping() { var spyMapFile = _resolver.ResolveMapFile("SPY", new DateTime(2015, 08, 23)); Assert.IsNotNull(spyMapFile); Assert.AreEqual("SPY", spyMapFile.GetMappedSymbol(new DateTime(2015, 08, 23))); }
private string GetMappedSymbol(SubscriptionDataConfig config, DateTime date) { var mapFile = config.Symbol.HasUnderlying ? _mapFileResolver.ResolveMapFile(config.Symbol.Underlying.ID.Symbol, config.Symbol.Underlying.ID.Date) : _mapFileResolver.ResolveMapFile(config.Symbol.ID.Symbol, config.Symbol.ID.Date); return(mapFile.GetMappedSymbol(date, config.MappedSymbol)); }
private static MapFile GetMapFileToUse( SubscriptionDataConfig config, MapFileResolver mapFileResolver) { var mapFileToUse = new MapFile(config.Symbol.Value, new List <MapFileRow>()); // load up the map and factor files for equities if (!config.IsCustomData && config.SecurityType == SecurityType.Equity) { try { var mapFile = mapFileResolver.ResolveMapFile( config.Symbol.ID.Symbol, config.Symbol.ID.Date); // only take the resolved map file if it has data, otherwise we'll use the empty one we defined above if (mapFile.Any()) { mapFileToUse = mapFile; } } catch (Exception err) { Log.Error(err, "CorporateEventEnumeratorFactory.GetMapFileToUse():" + " Map File: " + config.Symbol.ID + ": "); } } // load up the map and factor files for underlying of equity option if (!config.IsCustomData && config.SecurityType == SecurityType.Option) { try { var mapFile = mapFileResolver.ResolveMapFile( config.Symbol.Underlying.ID.Symbol, config.Symbol.Underlying.ID.Date); // only take the resolved map file if it has data, otherwise we'll use the empty one we defined above if (mapFile.Any()) { mapFileToUse = mapFile; } } catch (Exception err) { Log.Error(err, "CorporateEventEnumeratorFactory.GetMapFileToUse():" + " Map File: " + config.Symbol.ID + ": "); } } return(mapFileToUse); }
private string GetMappedSymbol(SubscriptionRequest request, DateTime date) { var config = request.Configuration; if (config.Symbol.ID.SecurityType == SecurityType.Option || config.Symbol.ID.SecurityType == SecurityType.Equity) { var mapFile = config.Symbol.HasUnderlying ? _mapFileResolver.ResolveMapFile(config.Symbol.Underlying.ID.Symbol, config.Symbol.Underlying.ID.Date) : _mapFileResolver.ResolveMapFile(config.Symbol.ID.Symbol, config.Symbol.ID.Date); return(mapFile.GetMappedSymbol(date, config.MappedSymbol)); } return(config.MappedSymbol); }
private static MapFile GetMapFileToUse( SubscriptionDataConfig config, MapFileResolver mapFileResolver) { var mapFileToUse = new MapFile(config.Symbol.Value, new List<MapFileRow>()); // load up the map and factor files for equities, options, and custom data if (config.TickerShouldBeMapped()) { try { var mapFile = mapFileResolver.ResolveMapFile(config.Symbol, config.Type); // only take the resolved map file if it has data, otherwise we'll use the empty one we defined above if (mapFile.Any()) { mapFileToUse = mapFile; } } catch (Exception err) { Log.Error(err, "CorporateEventEnumeratorFactory.GetMapFileToUse():" + " Map File: " + config.Symbol.ID + ": "); } } return mapFileToUse; }
/// <summary> /// Converts a specific file to Lean alternative data format. Note that you must flush /// after you're done converting a file to ensure that all data gets written to disk. /// You can do that by calling <see cref="Dispose"/> once you've finished processing /// /// Note: Assumes that it will be given files in ascending order by date /// </summary> /// <param name="sourceFilePath">File to process and convert</param> public void Convert(Stream stream) { if (_disposedValue) { throw new ObjectDisposedException("PsychSignalDataConverter has already been disposed"); } var previousTicker = string.Empty; var currentLineCount = 0; using (var reader = new StreamReader(stream)) { string line; while ((line = reader.ReadLine()) != null) { currentLineCount++; var csv = line.Split(','); var ticker = csv[1].ToLowerInvariant(); DateTime timestamp; if (csv[0] == "SOURCE") { Log.Trace($"PsychSignalDataConverter.Convert(): Skipping line {currentLineCount} - Line contains header information"); continue; } if (!DateTime.TryParseExact(csv[2], @"yyyy-MM-dd\THH:mm:ss\Z", CultureInfo.InvariantCulture, DateTimeStyles.AdjustToUniversal, out timestamp)) { Log.Trace($"PsychSignalDataConverter.Convert(): Skipping line {currentLineCount} - Failed to parse date properly"); continue; } if (!_mapFileResolver.ResolveMapFile(ticker, timestamp).Any()) { // Because all tickers are all clustered together, we can detect // duplicate messages and prevent ourselves from spamming the status log if (ticker != previousTicker) { Log.Trace($"PsychSignalDataDownloader.Convert(): Skipping line {currentLineCount} - Could not resolve map file for ticker {ticker}"); } previousTicker = ticker; continue; } TickerData handle; if (!_fileHandles.TryGetValue(ticker, out handle)) { handle = new TickerData(ticker, timestamp.Date, _destinationDirectory); _fileHandles[ticker] = handle; } handle.Append(timestamp, csv); previousTicker = ticker; } } }
/// <summary> /// Gets the ticker using map files. If the ticker is empty, we can't resolve a map file, or we can't /// resolve a ticker within a map file, we return null /// </summary> /// <param name="tradingEconomicsEarnings">TE Earnings data</param> /// <returns>Mapped ticker or null</returns> private string GetMappedSymbol(TradingEconomicsEarnings tradingEconomicsEarnings) { var ticker = tradingEconomicsEarnings.Symbol; var mapFile = _mapFileResolver.ResolveMapFile(ticker, tradingEconomicsEarnings.LastUpdate); if (!mapFile.Any()) { Log.Error($"TradingEconomicsEarningsDownloader.GetMappedSymbol(): No mapfile found for ticker {ticker}"); return(string.Empty); } var symbol = mapFile.GetMappedSymbol(tradingEconomicsEarnings.LastUpdate); if (string.IsNullOrEmpty(symbol)) { Log.Error($"TradingEconomicsEarningsDownloader.GetMappedSymbol(): No mapped symbol found for ticker {ticker}"); return(string.Empty); } return(symbol.ToLower()); }
/// <summary> /// Initializes the <see cref="SubscriptionDataReader"/> instance /// </summary> public void Initialize() { if (_initialized) { return; } //Save the type of data we'll be getting from the source. //Create the dynamic type-activators: var objectActivator = ObjectActivator.GetActivator(_config.Type); if (objectActivator == null) { OnInvalidConfigurationDetected( new InvalidConfigurationDetectedEventArgs( $"Custom data type \'{_config.Type.Name}\' missing parameterless constructor " + $"E.g. public {_config.Type.Name}() {{ }}")); _endOfStream = true; return; } //Create an instance of the "Type": var userObj = objectActivator.Invoke(new object[] { _config.Type }); _dataFactory = userObj as BaseData; //If its quandl set the access token in data factory: var quandl = _dataFactory as Quandl; if (quandl != null) { if (!Quandl.IsAuthCodeSet) { Quandl.SetAuthCode(Config.Get("quandl-auth-token")); } } // If Tiingo data, set the access token in data factory var tiingo = _dataFactory as TiingoDailyData; if (tiingo != null) { if (!Tiingo.IsAuthCodeSet) { Tiingo.SetAuthCode(Config.Get("tiingo-auth-token")); } } _factorFile = new FactorFile(_config.Symbol.Value, new List <FactorFileRow>()); _mapFile = new MapFile(_config.Symbol.Value, new List <MapFileRow>()); // load up the map and factor files for equities if (!_config.IsCustomData && _config.SecurityType == SecurityType.Equity) { try { var mapFile = _mapFileResolver.ResolveMapFile(_config.Symbol.ID.Symbol, _config.Symbol.ID.Date); // only take the resolved map file if it has data, otherwise we'll use the empty one we defined above if (mapFile.Any()) { _mapFile = mapFile; } var factorFile = _factorFileProvider.Get(_config.Symbol); _hasScaleFactors = factorFile != null; if (_hasScaleFactors) { _factorFile = factorFile; // if factor file has minimum date, update start period if before minimum date if (!_isLiveMode && _factorFile != null && _factorFile.FactorFileMinimumDate.HasValue) { if (_periodStart < _factorFile.FactorFileMinimumDate.Value) { _periodStart = _factorFile.FactorFileMinimumDate.Value; OnNumericalPrecisionLimited( new NumericalPrecisionLimitedEventArgs( $"Data for symbol {_config.Symbol.Value} has been limited due to numerical precision issues in the factor file. " + $"The starting date has been set to {_factorFile.FactorFileMinimumDate.Value.ToShortDateString()}.")); } } } } catch (Exception err) { Log.Error(err, "Fetching Price/Map Factors: " + _config.Symbol.ID + ": "); } } // load up the map and factor files for underlying of equity option if (!_config.IsCustomData && _config.SecurityType == SecurityType.Option) { try { var mapFile = _mapFileResolver.ResolveMapFile(_config.Symbol.Underlying.ID.Symbol, _config.Symbol.Underlying.ID.Date); // only take the resolved map file if it has data, otherwise we'll use the empty one we defined above if (mapFile.Any()) { _mapFile = mapFile; } } catch (Exception err) { Log.Error(err, "Map Factors: " + _config.Symbol.ID + ": "); } } // Estimate delisting date. switch (_config.Symbol.ID.SecurityType) { case SecurityType.Future: _delistingDate = _config.Symbol.ID.Date; break; case SecurityType.Option: _delistingDate = OptionSymbol.GetLastDayOfTrading(_config.Symbol); break; default: _delistingDate = _mapFile.DelistingDate; break; } _subscriptionFactoryEnumerator = ResolveDataEnumerator(true); _initialized = true; }
/// <summary> /// Iterates each daily file in the specified <paramref name="dailyFolder"/> and adds a line for each /// day to the appropriate coarse file /// </summary> /// <param name="dailyFolder">The folder with daily data.</param> /// <param name="coarseFolder">The coarse output folder.</param> /// <param name="mapFileResolver">The map file resolver.</param> /// <param name="factorFileProvider">The factor file provider.</param> /// <param name="exclusions">The symbols to be excluded from processing.</param> /// <param name="ignoreMapless">Ignore the symbols without a map file.</param> /// <param name="symbolResolver">Function used to provide symbol resolution. Default resolution uses the zip file name to resolve /// the symbol, specify null for this behavior.</param> /// <returns>Collection with the names of the newly generated coarse files.</returns> /// <exception cref="Exception"> /// Unable to resolve market for daily folder: " + dailyFolder /// or /// Unable to resolve fundamental path for coarse folder: " + coarseFolder /// </exception> public static ICollection <string> ProcessDailyFolder(string dailyFolder, string coarseFolder, MapFileResolver mapFileResolver, IFactorFileProvider factorFileProvider, HashSet <string> exclusions, bool ignoreMapless, Func <string, string> symbolResolver = null) { const decimal scaleFactor = 10000m; Log.Trace("Processing: {0}", dailyFolder); var start = DateTime.UtcNow; // load map files into memory var symbols = 0; var maplessCount = 0; var dates = new HashSet <DateTime>(); // instead of opening/closing these constantly, open them once and dispose at the end (~3x speed improvement) var writers = new Dictionary <string, StreamWriter>(); var marketDirectoryInfo = new DirectoryInfo(dailyFolder).Parent; if (marketDirectoryInfo == null) { throw new Exception($"Unable to resolve market for daily folder: {dailyFolder}"); } var market = marketDirectoryInfo.Name.ToLowerInvariant(); var fundamentalDirectoryInfo = new DirectoryInfo(coarseFolder).Parent; if (fundamentalDirectoryInfo == null) { throw new Exception($"Unable to resolve fundamental path for coarse folder: {coarseFolder}"); } var fineFundamentalFolder = Path.Combine(marketDirectoryInfo.FullName, "fundamental", "fine"); // open up each daily file to get the values and append to the daily coarse files foreach (var file in Directory.EnumerateFiles(dailyFolder, "*.zip")) { try { var ticker = Path.GetFileNameWithoutExtension(file); var fineAvailableDates = Enumerable.Empty <DateTime>(); var tickerFineFundamentalFolder = Path.Combine(fineFundamentalFolder, ticker); if (Directory.Exists(tickerFineFundamentalFolder)) { fineAvailableDates = Directory.GetFiles(tickerFineFundamentalFolder, "*.zip") .Select(f => DateTime.ParseExact(Path.GetFileNameWithoutExtension(f), DateFormat.EightCharacter, CultureInfo.InvariantCulture)) .ToList(); } if (ticker == null) { Log.Trace("CoarseGenerator.ProcessDailyFolder(): Unable to resolve symbol from file: {0}", file); continue; } if (symbolResolver != null) { ticker = symbolResolver(ticker); } ticker = ticker.ToUpperInvariant(); if (exclusions != null && exclusions.Contains(ticker)) { Log.Trace("Excluded symbol: {0}", ticker); continue; } ZipFile zip; using (var reader = Compression.Unzip(file, out zip)) { var checkedForMapFile = false; symbols++; string line; while ((line = reader.ReadLine()) != null) { //20150625.csv var csv = line.Split(','); var date = DateTime.ParseExact(csv[0], DateFormat.TwelveCharacter, CultureInfo.InvariantCulture); if (ignoreMapless && !checkedForMapFile) { checkedForMapFile = true; if (!mapFileResolver.ResolveMapFile(ticker, date).Any()) { // if the resolved map file has zero entries then it's a mapless symbol maplessCount++; break; } } var close = Parse.Decimal(csv[4]) / scaleFactor; var volume = Parse.Long(csv[5]); var dollarVolume = close * volume; var coarseFile = Path.Combine(coarseFolder, date.ToStringInvariant("yyyyMMdd") + ".csv"); dates.Add(date); // try to resolve a map file and if found, regen the sid var sid = SecurityIdentifier.GenerateEquity(SecurityIdentifier.DefaultDate, ticker, market); var mapFile = mapFileResolver.ResolveMapFile(ticker, date); if (!mapFile.IsNullOrEmpty()) { // if available, us the permtick in the coarse files, because of this, we need // to update the coarse files each time new map files are added/permticks change sid = SecurityIdentifier.GenerateEquity(mapFile.FirstDate, mapFile.OrderBy(x => x.Date).First().MappedSymbol, market); } if (mapFile == null && ignoreMapless) { // if we're ignoring mapless files then we should always be able to resolve this Log.Error($"CoarseGenerator.ProcessDailyFolder(): Unable to resolve map file for {ticker} as of {date.ToStringInvariant("d")}"); continue; } // get price and split factors from factor files var symbol = new Symbol(sid, ticker); var factorFile = factorFileProvider.Get(symbol); var factorFileRow = factorFile?.GetScalingFactors(date); var priceFactor = factorFileRow?.PriceFactor ?? 1m; var splitFactor = factorFileRow?.SplitFactor ?? 1m; // Check if security has fine file within a trailing month for a date-ticker set. // There are tricky cases where a folder named by a ticker can have data for multiple securities. // e.g GOOG -> GOOGL (GOOG T1AZ164W5VTX) / GOOCV -> GOOG (GOOCV VP83T1ZUHROL) case. // The fine data in the 'fundamental/fine/goog' folder will be for 'GOOG T1AZ164W5VTX' up to the 2014-04-02 and for 'GOOCV VP83T1ZUHROL' afterward. // Therefore, date before checking if the security has fundamental data for a date, we need to filter the fine files the map's first date. var firstDate = mapFile?.FirstDate ?? DateTime.MinValue; var hasFundamentalDataForDate = fineAvailableDates.Where(d => d >= firstDate).Any(d => date.AddMonths(-1) <= d && d <= date); // The following section handles mergers and acquisitions cases. // e.g. YHOO -> AABA (YHOO R735QTJ8XC9X) // The dates right after the acquisition, valid fine fundamental data for AABA are still under the former ticker folder. // Therefore if no fine fundamental data is found in the 'fundamental/fine/aaba' folder, it searches into the 'yhoo' folder. if (mapFile != null && mapFile.Count() > 2 && !hasFundamentalDataForDate) { var previousTicker = mapFile.LastOrDefault(m => m.Date < date)?.MappedSymbol; if (previousTicker != null) { var previousTickerFineFundamentalFolder = Path.Combine(fineFundamentalFolder, previousTicker); if (Directory.Exists(previousTickerFineFundamentalFolder)) { var previousTickerFineAvailableDates = Directory.GetFiles(previousTickerFineFundamentalFolder, "*.zip") .Select(f => DateTime.ParseExact(Path.GetFileNameWithoutExtension(f), DateFormat.EightCharacter, CultureInfo.InvariantCulture)) .ToList(); hasFundamentalDataForDate = previousTickerFineAvailableDates.Where(d => d >= firstDate).Any(d => date.AddMonths(-1) <= d && d <= date); } } } // sid,symbol,close,volume,dollar volume,has fundamental data,price factor,split factor var coarseFileLine = string.Format( CultureInfo.InvariantCulture, "{0},{1},{2},{3},{4},{5},{6},{7}", sid, ticker, close, volume, Math.Truncate(dollarVolume), hasFundamentalDataForDate, priceFactor, splitFactor); StreamWriter writer; if (!writers.TryGetValue(coarseFile, out writer)) { writer = new StreamWriter(new FileStream(coarseFile, FileMode.Create, FileAccess.Write, FileShare.Write)); writers[coarseFile] = writer; } writer.WriteLine(coarseFileLine); } } if (symbols % 1000 == 0) { Log.Trace($"CoarseGenerator.ProcessDailyFolder(): Completed processing {symbols} symbols. Current elapsed: {(DateTime.UtcNow - start).TotalSeconds.ToStringInvariant("0.00")} seconds"); } } catch (Exception err) { // log the error and continue with the process Log.Error(err.ToString()); } } Log.Trace("CoarseGenerator.ProcessDailyFolder(): Saving {0} coarse files to disk", dates.Count); // dispose all the writers at the end of processing foreach (var writer in writers) { writer.Value.Dispose(); } var stop = DateTime.UtcNow; Log.Trace($"CoarseGenerator.ProcessDailyFolder(): Processed {symbols} symbols into {dates.Count} coarse files in {(stop - start).TotalSeconds.ToStringInvariant("0.00")} seconds"); Log.Trace($"CoarseGenerator.ProcessDailyFolder(): Excluded {maplessCount} mapless symbols."); return(writers.Keys); }
/// <summary> /// Initializes the <see cref="SubscriptionDataReader"/> instance /// </summary> /// <remarks>Should be called after all consumers of <see cref="NewTradableDate"/> event are set, /// since it will produce events.</remarks> public void Initialize() { if (_initialized) { return; } //Save the type of data we'll be getting from the source. try { _dataFactory = _config.Type.GetBaseDataInstance(); } catch (ArgumentException exception) { OnInvalidConfigurationDetected(new InvalidConfigurationDetectedEventArgs(exception.Message)); _endOfStream = true; return; } //If its quandl set the access token in data factory: var quandl = _dataFactory as Quandl; if (quandl != null) { if (!Quandl.IsAuthCodeSet) { Quandl.SetAuthCode(Config.Get("quandl-auth-token")); } } // If Tiingo data, set the access token in data factory var tiingo = _dataFactory as TiingoDailyData; if (tiingo != null) { if (!Tiingo.IsAuthCodeSet) { Tiingo.SetAuthCode(Config.Get("tiingo-auth-token")); } } // If USEnergyInformation data, set the access token in data factory var energyInformation = _dataFactory as USEnergyInformation; if (energyInformation != null) { if (!USEnergyInformation.IsAuthCodeSet) { USEnergyInformation.SetAuthCode(Config.Get("us-energy-information-auth-token")); } } _factorFile = new FactorFile(_config.Symbol.Value, new List <FactorFileRow>()); _mapFile = new MapFile(_config.Symbol.Value, new List <MapFileRow>()); // load up the map files for equities, options, and custom data if it supports it. // Only load up factor files for equities if (_config.TickerShouldBeMapped()) { try { var mapFile = _mapFileResolver.ResolveMapFile(_config.Symbol, _config.Type); // only take the resolved map file if it has data, otherwise we'll use the empty one we defined above if (mapFile.Any()) { _mapFile = mapFile; } if (!_config.IsCustomData && _config.SecurityType != SecurityType.Option) { var factorFile = _factorFileProvider.Get(_config.Symbol); _hasScaleFactors = factorFile != null; if (_hasScaleFactors) { _factorFile = factorFile; // if factor file has minimum date, update start period if before minimum date if (!_isLiveMode && _factorFile != null && _factorFile.FactorFileMinimumDate.HasValue) { if (_periodStart < _factorFile.FactorFileMinimumDate.Value) { _periodStart = _factorFile.FactorFileMinimumDate.Value; OnNumericalPrecisionLimited( new NumericalPrecisionLimitedEventArgs( $"Data for symbol {_config.Symbol.Value} has been limited due to numerical precision issues in the factor file. " + $"The starting date has been set to {_factorFile.FactorFileMinimumDate.Value.ToShortDateString()}.")); } } } } } catch (Exception err) { Log.Error(err, "Fetching Price/Map Factors: " + _config.Symbol.ID + ": "); } } // Estimate delisting date. switch (_config.Symbol.ID.SecurityType) { case SecurityType.Future: _delistingDate = _config.Symbol.ID.Date; break; case SecurityType.Option: _delistingDate = OptionSymbol.GetLastDayOfTrading(_config.Symbol); break; default: _delistingDate = _mapFile.DelistingDate; break; } // adding a day so we stop at EOD _delistingDate = _delistingDate.AddDays(1); _subscriptionFactoryEnumerator = ResolveDataEnumerator(true); _initialized = true; }
public static ICollection <string> ProcessDailyFolder(string dailyFolder, string coarseFolder, MapFileResolver mapFileResolver, HashSet <string> exclusions, bool ignoreMapless, DateTime startDate, Func <string, string> symbolResolver = null) { const decimal scaleFactor = 10000m; Log.Trace("Processing: {0}", dailyFolder); var start = DateTime.UtcNow; var symbols = 0; var maplessCount = 0; var dates = new HashSet <DateTime>(); var writers = new Dictionary <string, StreamWriter>(); var dailyFolderDirectoryInfo = new DirectoryInfo(dailyFolder).Parent; if (dailyFolderDirectoryInfo == null) { throw new Exception("Unable to resolve market for daily folder: " + dailyFolder); } var market = dailyFolderDirectoryInfo.Name.ToLower(); foreach (var file in Directory.EnumerateFiles(dailyFolder)) { try { var symbol = Path.GetFileNameWithoutExtension(file); if (symbol == null) { Log.Trace("CoarseGenerator.ProcessDailyFolder(): Unable to resolve symbol from file: {0}", file); continue; } if (symbolResolver != null) { symbol = symbolResolver(symbol); } symbol = symbol.ToUpper(); if (exclusions.Contains(symbol)) { Log.Trace("Excluded symbol: {0}", symbol); continue; } ZipFile zip; using (var reader = Compression.Unzip(file, out zip)) { const decimal k = 2m / (30 + 1); var seeded = false; var runningAverageVolume = 0m; var checkedForMapFile = false; symbols++; string line; while ((line = reader.ReadLine()) != null) { var csv = line.Split(','); var date = DateTime.ParseExact(csv[0], DateFormat.TwelveCharacter, CultureInfo.InvariantCulture); if (date < startDate) { continue; } if (ignoreMapless && !checkedForMapFile) { checkedForMapFile = true; if (!mapFileResolver.ResolveMapFile(symbol, date).Any()) { maplessCount++; break; } } var close = decimal.Parse(csv[4]) / scaleFactor; var volume = long.Parse(csv[5]); runningAverageVolume = seeded ? volume * k + runningAverageVolume * (1 - k) : volume; seeded = true; var dollarVolume = close * runningAverageVolume; var coarseFile = Path.Combine(coarseFolder, date.ToString("yyyyMMdd") + ".csv"); dates.Add(date); var sid = SecurityIdentifier.GenerateEquity(SecurityIdentifier.DefaultDate, symbol, market); var mapFile = mapFileResolver.ResolveMapFile(symbol, date); if (!mapFile.IsNullOrEmpty()) { sid = SecurityIdentifier.GenerateEquity(mapFile.FirstDate, mapFile.OrderBy(x => x.Date).First().MappedSymbol, market); } if (mapFile == null && ignoreMapless) { Log.Error(string.Format("CoarseGenerator.ProcessDailyFolder(): Unable to resolve map file for {0} as of {1}", symbol, date.ToShortDateString())); continue; } var coarseFileLine = sid + "," + symbol + "," + close + "," + volume + "," + Math.Truncate(dollarVolume); StreamWriter writer; if (!writers.TryGetValue(coarseFile, out writer)) { writer = new StreamWriter(new FileStream(coarseFile, FileMode.Create, FileAccess.Write, FileShare.Write)); writers[coarseFile] = writer; } writer.WriteLine(coarseFileLine); } } if (symbols % 1000 == 0) { Log.Trace("CoarseGenerator.ProcessDailyFolder(): Completed processing {0} symbols. Current elapsed: {1} seconds", symbols, (DateTime.UtcNow - start).TotalSeconds.ToString("0.00")); } } catch (Exception err) { Log.Error(err.ToString()); } } Log.Trace("CoarseGenerator.ProcessDailyFolder(): Saving {0} coarse files to disk", dates.Count); foreach (var writer in writers) { writer.Value.Dispose(); } var stop = DateTime.UtcNow; Log.Trace("CoarseGenerator.ProcessDailyFolder(): Processed {0} symbols into {1} coarse files in {2} seconds", symbols, dates.Count, (stop - start).TotalSeconds.ToString("0.00")); Log.Trace("CoarseGenerator.ProcessDailyFolder(): Excluded {0} mapless symbols.", maplessCount); return(writers.Keys); }
/// <summary> /// Processes the data /// </summary> /// <typeparam name="T"><see cref="SmartInsiderEvent"/> inherited instance</typeparam> /// <param name="sourceFile">File to read data from</param> /// <returns>Dictionary keyed by ticker that contains all the lines that appeared in the file</returns> private Dictionary <string, List <T> > Process <T>(FileInfo sourceFile) where T : SmartInsiderEvent, new() { var previousMarket = string.Empty; var previousTicker = string.Empty; var lines = new Dictionary <string, List <T> >(); var i = 0; Log.Trace($"SmartInsiderConverter.Process(): Processing file: {sourceFile.FullName}"); foreach (var line in File.ReadLines(sourceFile.FullName)) { i++; // First line is the header row, but make sure we don't encounter it anywhere else in the data if (line.StartsWith("\"TransactionID")) { Log.Trace($"SmartInsiderConverter.Process(): Header row on line {i}. Skipping..."); continue; } try { // Yes, there are ONE HUNDRED total fields in this dataset. // However, we will only take the first 60 since the rest are reserved fields var tsv = line.Split('\t') .Take(60) .Select(x => x.Replace("\"", "")) .ToList(); // If we have a null value on a non-nullable field, consider it invalid data if (string.IsNullOrWhiteSpace(tsv[2])) { Log.Trace($"SmartInsiderConverter.Process(): Null value encountered on non-nullable value on line {i}"); continue; } // Remove in descending order to maintain index order // while we delete lower indexed values tsv.RemoveAt(46); // ShowOriginal tsv.RemoveAt(36); // PreviousClosePrice tsv.RemoveAt(14); // ShortCompanyName tsv.RemoveAt(7); // CompanyPageURL var finalLine = string.Join("\t", tsv); var dataInstance = new T(); dataInstance.FromRawData(finalLine); var ticker = dataInstance.TickerSymbol; // For now, only support US markets if (dataInstance.TickerCountry != "US") { if (dataInstance.TickerCountry != previousMarket && ticker != previousTicker) { Log.Error($"SmartInsiderConverter.Process(): Market {dataInstance.TickerCountry} is not supported at this time for ticker {ticker} on line {i}"); } previousMarket = dataInstance.TickerCountry; previousTicker = ticker; continue; } var mapFile = _mapFileResolver.ResolveMapFile(ticker, dataInstance.LastUpdate); if (!mapFile.Any()) { Log.Error($"SmartInsiderConverter.Process(): Failed to find mapfile for ticker {ticker} on {dataInstance.LastUpdate} on line {i}"); previousMarket = dataInstance.TickerCountry; previousTicker = ticker; continue; } var newTicker = mapFile.GetMappedSymbol(dataInstance.LastUpdate); if (string.IsNullOrEmpty(newTicker)) { Log.Error($"SmartInsiderConverter.Process(): Failed to resolve ticker for old ticker {ticker} on line {i}"); previousMarket = dataInstance.TickerCountry; previousTicker = ticker; continue; } // Log any mapping events since this can be a point of failure if (ticker != newTicker) { Log.Trace($"SmartInsiderConverter.Process(): Mapped ticker from {ticker} to {newTicker}"); } List <T> symbolLines; if (!lines.TryGetValue(newTicker, out symbolLines)) { symbolLines = new List <T>(); lines[newTicker] = symbolLines; } symbolLines.Add(dataInstance); previousMarket = dataInstance.TickerCountry; previousTicker = ticker; } catch (Exception e) { Log.Error(e, $"SmartInsiderConverter.Process(): Error on line {i}"); } } return(lines); }
/// <summary> /// Runs the instance of the object. /// </summary> /// <returns>True if process all downloads successfully</returns> public override bool Run() { var stopwatch = Stopwatch.StartNew(); try { var companies = GetCompanies().Result.DistinctBy(x => x.Ticker).ToList(); var count = companies.Count; var currentPercent = 0.05; var percent = 0.05; var i = 0; Log.Trace($"EstimizeReleaseDataDownloader.Run(): Start processing {count} companies"); var tasks = new List <Task>(); foreach (var company in companies) { try { // Makes sure we don't overrun Estimize rate limits accidentally IndexGate.WaitToProceed(); } // This is super super rare, but it failures in RateGate (RG) can still happen nonetheless. Let's not // rely on RG operating successfully all the time so that if RG fails, our download process doesn't fail catch (ArgumentOutOfRangeException e) { Log.Error(e, $"EstimizeReleaseDataDownloader.Run(): RateGate failed. Sleeping for 110 milliseconds with Thread.Sleep()"); Thread.Sleep(110); } var ticker = company.Ticker; if (ticker.IndexOf("defunct", StringComparison.OrdinalIgnoreCase) > 0) { var length = ticker.IndexOf('-'); ticker = ticker.Substring(0, length).Trim(); } Log.Trace($"EstimizeReleaseDataDownloader.Run(): Processing {ticker}"); tasks.Add( HttpRequester($"/companies/{ticker}/releases") .ContinueWith( y => { i++; if (y.IsFaulted) { Log.Error($"EstimizeReleaseDataDownloader.Run(): Failed to get data for {company}"); return; } var result = y.Result; if (string.IsNullOrEmpty(result)) { // We've already logged inside HttpRequester return; } // Just like TradingEconomics, we only want the events that already occured // instead of having "forecasts" that will change in the future taint our // data and make backtests non-deterministic. We want to have // consistency with our data in live trading historical requests as well var releases = JsonConvert.DeserializeObject <List <EstimizeRelease> >(result) .Where(x => x.Eps != null) .GroupBy(x => { var releaseDate = x.ReleaseDate; try { var mapFile = _mapFileResolver.ResolveMapFile(ticker, releaseDate); var oldTicker = ticker; var newTicker = ticker; // Ensure we're writing to the correct historical ticker if (!mapFile.Any()) { Log.Trace($"EstimizeReleaseDataDownloader.Run(): Failed to find map file for: {newTicker} - on: {releaseDate}"); return(string.Empty); } newTicker = mapFile.GetMappedSymbol(releaseDate); if (string.IsNullOrWhiteSpace(newTicker)) { Log.Trace($"EstimizeReleaseDataDownloader.Run(): Failed to find mapping for null new ticker. Old ticker: {oldTicker} - on: {releaseDate}"); return(string.Empty); } if (oldTicker != newTicker) { Log.Trace($"EstimizeReleaseDataDownloader.Run(): Remapped from {oldTicker} to {newTicker} for {releaseDate}"); } return(newTicker); } // We get a failure inside the map file constructor rarely. It tries // to access the last element of an empty list. Maybe this is a bug? catch (InvalidOperationException e) { Log.Error(e, $"EstimizeReleaseDataDownloader.Run(): Failed to load map file for: {ticker} - on: {releaseDate}"); return(string.Empty); } }) .Where(x => !string.IsNullOrEmpty(x.Key)); foreach (var kvp in releases) { var csvContents = kvp.Select(x => $"{x.ReleaseDate.ToUniversalTime():yyyyMMdd HH:mm:ss},{x.Id},{x.FiscalYear},{x.FiscalQuarter},{x.Eps},{x.Revenue},{x.ConsensusEpsEstimate},{x.ConsensusRevenueEstimate},{x.WallStreetEpsEstimate},{x.WallStreetRevenueEstimate},{x.ConsensusWeightedEpsEstimate},{x.ConsensusWeightedRevenueEstimate}"); SaveContentToFile(_destinationFolder, kvp.Key, csvContents); } var percentDone = i / count; if (percentDone >= currentPercent) { Log.Trace($"EstimizeEstimateDataDownloader.Run(): {percentDone:P2} complete"); currentPercent += percent; } } ) ); } Task.WaitAll(tasks.ToArray()); } catch (Exception e) { Log.Error(e); return(false); } Log.Trace($"EstimizeReleaseDataDownloader.Run(): Finished in {stopwatch.Elapsed}"); return(true); }
private string GetMappedSymbol(SubscriptionDataConfig config, DateTime date) { return(_mapFileResolver.ResolveMapFile(config.Symbol, config.Type) .GetMappedSymbol(date, config.MappedSymbol)); }
/// <summary> /// Runs the instance of the object. /// </summary> /// <returns>True if process all downloads successfully</returns> public override bool Run() { var stopwatch = Stopwatch.StartNew(); try { var companies = GetCompanies().Result.DistinctBy(x => x.Ticker).ToList(); var count = companies.Count; var currentPercent = 0.05; var percent = 0.05; var i = 0; Log.Trace($"EstimizeEstimateDataDownloader.Run(): Start processing {count.ToStringInvariant()} companies"); var tasks = new List <Task>(); foreach (var company in companies) { // Include tickers that are "defunct". // Remove the tag because it cannot be part of the API endpoint. // This is separate from the NormalizeTicker(...) method since // we don't convert tickers with `-`s into the format we can successfully // index mapfiles with. var estimizeTicker = company.Ticker; string ticker; if (!TryNormalizeDefunctTicker(estimizeTicker, out ticker)) { Log.Error($"EstimizeEstimateDataDownloader(): Defunct ticker {estimizeTicker} is unable to be parsed. Continuing..."); continue; } if (_processTickers != null && !_processTickers.Contains(ticker, StringComparer.InvariantCultureIgnoreCase)) { Log.Trace($"EstimizeEstimateDataDownloader.Run(): Skipping {ticker} since it is not in the list of predefined tickers"); continue; } // Begin processing ticker with a normalized value Log.Trace($"EstimizeEstimateDataDownloader.Run(): Processing {ticker}"); // Makes sure we don't overrun Estimize rate limits accidentally IndexGate.WaitToProceed(); tasks.Add( HttpRequester($"/companies/{ticker}/estimates") .ContinueWith( y => { i++; if (y.IsFaulted) { Log.Error($"EstimizeEstimateDataDownloader.Run(): Failed to get data for {company}"); return; } var result = y.Result; if (string.IsNullOrEmpty(result)) { // We've already logged inside HttpRequester return; } var estimates = JsonConvert.DeserializeObject <List <EstimizeEstimate> >(result, JsonSerializerSettings) .GroupBy(estimate => { var normalizedTicker = NormalizeTicker(ticker); var oldTicker = normalizedTicker; var newTicker = normalizedTicker; var createdAt = estimate.CreatedAt; try { var mapFile = _mapFileResolver.ResolveMapFile(normalizedTicker, createdAt); // Ensure we're writing to the correct historical ticker if (!mapFile.Any()) { Log.Trace($"EstimizeEstimateDataDownloader.Run(): Failed to find map file for: {newTicker} - on: {createdAt}"); return(string.Empty); } newTicker = mapFile.GetMappedSymbol(createdAt); if (string.IsNullOrWhiteSpace(newTicker)) { Log.Trace($"EstimizeEstimateDataDownloader.Run(): New ticker is null. Old ticker: {oldTicker} - on: {createdAt.ToStringInvariant()}"); return(string.Empty); } if (!string.Equals(oldTicker, newTicker, StringComparison.InvariantCultureIgnoreCase)) { Log.Trace($"EstimizeEstimateDataDownloader.Run(): Remapping {oldTicker} to {newTicker}"); } } // We get a failure inside the map file constructor rarely. It tries // to access the last element of an empty list. Maybe this is a bug? catch (InvalidOperationException e) { Log.Error(e, $"EstimizeEstimateDataDownloader.Run(): Failed to load map file for: {oldTicker} - on {createdAt}"); return(string.Empty); } return(newTicker); }) .Where(kvp => !string.IsNullOrEmpty(kvp.Key)); foreach (var kvp in estimates) { var csvContents = kvp.Select(x => $"{x.CreatedAt.ToStringInvariant("yyyyMMdd HH:mm:ss")}," + $"{x.Id}," + $"{x.AnalystId}," + $"{x.UserName}," + $"{x.FiscalYear.ToStringInvariant()}," + $"{x.FiscalQuarter.ToStringInvariant()}," + $"{x.Eps.ToStringInvariant()}," + $"{x.Revenue.ToStringInvariant()}," + $"{x.Flagged.ToStringInvariant().ToLowerInvariant()}" ); SaveContentToFile(_destinationFolder, kvp.Key, csvContents); } var percentageDone = i / count; if (percentageDone >= currentPercent) { Log.Trace($"EstimizeEstimateDataDownloader.Run(): {percentageDone.ToStringInvariant("P2")} complete"); currentPercent += percent; } } ) ); } Task.WaitAll(tasks.ToArray()); } catch (Exception e) { Log.Error(e); return(false); } Log.Trace($"EstimizeEstimateDataDownloader.Run(): Finished in {stopwatch.Elapsed.ToStringInvariant(null)}"); return(true); }
/// <summary> /// Iterates each daily file in the specified <paramref name="dailyFolder"/> and adds a line for each /// day to the approriate coarse file /// </summary> /// <param name="dailyFolder">The folder with daily data</param> /// <param name="coarseFolder">The coarse output folder</param> /// <param name="mapFileResolver"></param> /// <param name="exclusions">The symbols to be excluded from processing</param> /// <param name="ignoreMapless">Ignore the symbols without a map file.</param> /// <param name="startDate">The starting date for processing</param> /// <param name="symbolResolver">Function used to provide symbol resolution. Default resolution uses the zip file name to resolve /// the symbol, specify null for this behavior.</param> /// <returns>A collection of the generated coarse files</returns> public static ICollection <string> ProcessDailyFolder(string dailyFolder, string coarseFolder, MapFileResolver mapFileResolver, HashSet <string> exclusions, bool ignoreMapless, DateTime startDate, Func <string, string> symbolResolver = null) { const decimal scaleFactor = 10000m; Log.Trace("Processing: {0}", dailyFolder); var start = DateTime.UtcNow; // load map files into memory var symbols = 0; var maplessCount = 0; var dates = new HashSet <DateTime>(); // instead of opening/closing these constantly, open them once and dispose at the end (~3x speed improvement) var writers = new Dictionary <string, StreamWriter>(); var dailyFolderDirectoryInfo = new DirectoryInfo(dailyFolder).Parent; if (dailyFolderDirectoryInfo == null) { throw new Exception("Unable to resolve market for daily folder: " + dailyFolder); } var market = dailyFolderDirectoryInfo.Name.ToLower(); var fundamentalDirectoryInfo = new DirectoryInfo(coarseFolder).Parent; if (fundamentalDirectoryInfo == null) { throw new Exception("Unable to resolve fundamental path for coarse folder: " + coarseFolder); } var fineFundamentalFolder = Path.Combine(fundamentalDirectoryInfo.FullName, "fine"); var mapFileProvider = new LocalDiskMapFileProvider(); var factorFileProvider = new LocalDiskFactorFileProvider(mapFileProvider); // open up each daily file to get the values and append to the daily coarse files foreach (var file in Directory.EnumerateFiles(dailyFolder)) { try { var symbol = Path.GetFileNameWithoutExtension(file); if (symbol == null) { Log.Trace("CoarseGenerator.ProcessDailyFolder(): Unable to resolve symbol from file: {0}", file); continue; } if (symbolResolver != null) { symbol = symbolResolver(symbol); } symbol = symbol.ToUpper(); if (exclusions.Contains(symbol)) { Log.Trace("Excluded symbol: {0}", symbol); continue; } // check if symbol has any fine fundamental data var firstFineSymbolDate = DateTime.MaxValue; if (Directory.Exists(fineFundamentalFolder)) { var fineSymbolFolder = Path.Combine(fineFundamentalFolder, symbol.ToLower()); var firstFineSymbolFileName = Directory.Exists(fineSymbolFolder) ? Directory.GetFiles(fineSymbolFolder).OrderBy(x => x).FirstOrDefault() : string.Empty; if (firstFineSymbolFileName.Length > 0) { firstFineSymbolDate = DateTime.ParseExact(Path.GetFileNameWithoutExtension(firstFineSymbolFileName), "yyyyMMdd", CultureInfo.InvariantCulture); } } ZipFile zip; using (var reader = Compression.Unzip(file, out zip)) { var checkedForMapFile = false; symbols++; string line; while ((line = reader.ReadLine()) != null) { //20150625.csv var csv = line.Split(','); var date = DateTime.ParseExact(csv[0], DateFormat.TwelveCharacter, CultureInfo.InvariantCulture); // spin past old data if (date < startDate) { continue; } if (ignoreMapless && !checkedForMapFile) { checkedForMapFile = true; if (!mapFileResolver.ResolveMapFile(symbol, date).Any()) { // if the resolved map file has zero entries then it's a mapless symbol maplessCount++; break; } } var close = decimal.Parse(csv[4]) / scaleFactor; var volume = long.Parse(csv[5]); var dollarVolume = close * volume; var coarseFile = Path.Combine(coarseFolder, date.ToString("yyyyMMdd") + ".csv"); dates.Add(date); // try to resolve a map file and if found, regen the sid var sid = SecurityIdentifier.GenerateEquity(SecurityIdentifier.DefaultDate, symbol, market); var mapFile = mapFileResolver.ResolveMapFile(symbol, date); if (!mapFile.IsNullOrEmpty()) { // if available, us the permtick in the coarse files, because of this, we need // to update the coarse files each time new map files are added/permticks change sid = SecurityIdentifier.GenerateEquity(mapFile.FirstDate, mapFile.OrderBy(x => x.Date).First().MappedSymbol, market); } if (mapFile == null && ignoreMapless) { // if we're ignoring mapless files then we should always be able to resolve this Log.Error(string.Format("CoarseGenerator.ProcessDailyFolder(): Unable to resolve map file for {0} as of {1}", symbol, date.ToShortDateString())); continue; } // check if symbol has fine fundamental data for the current date var hasFundamentalDataForDate = date >= firstFineSymbolDate; // get price and split factors from factor files var leanSymbol = new Symbol(sid, symbol); var factorFile = factorFileProvider.Get(leanSymbol); var factorFileRow = factorFile?.GetScalingFactors(date); var priceFactor = factorFileRow?.PriceFactor ?? 1m; var splitFactor = factorFileRow?.SplitFactor ?? 1m; // sid,symbol,close,volume,dollar volume,has fundamental data,price factor,split factor var coarseFileLine = $"{sid},{symbol},{close},{volume},{Math.Truncate(dollarVolume)},{hasFundamentalDataForDate},{priceFactor},{splitFactor}"; StreamWriter writer; if (!writers.TryGetValue(coarseFile, out writer)) { writer = new StreamWriter(new FileStream(coarseFile, FileMode.Create, FileAccess.Write, FileShare.Write)); writers[coarseFile] = writer; } writer.WriteLine(coarseFileLine); } } if (symbols % 1000 == 0) { Log.Trace("CoarseGenerator.ProcessDailyFolder(): Completed processing {0} symbols. Current elapsed: {1} seconds", symbols, (DateTime.UtcNow - start).TotalSeconds.ToString("0.00")); } } catch (Exception err) { // log the error and continue with the process Log.Error(err.ToString()); } } Log.Trace("CoarseGenerator.ProcessDailyFolder(): Saving {0} coarse files to disk", dates.Count); // dispose all the writers at the end of processing foreach (var writer in writers) { writer.Value.Dispose(); } var stop = DateTime.UtcNow; Log.Trace("CoarseGenerator.ProcessDailyFolder(): Processed {0} symbols into {1} coarse files in {2} seconds", symbols, dates.Count, (stop - start).TotalSeconds.ToString("0.00")); Log.Trace("CoarseGenerator.ProcessDailyFolder(): Excluded {0} mapless symbols.", maplessCount); return(writers.Keys); }
/// <summary> /// Converts the data from raw format (*.nz.tar.gz) to json files consumable by LEAN /// </summary> /// <param name="processingDate">Date to process SEC filings for</param> public void Process(DateTime processingDate) { // Process data into dictionary of CIK -> List{T} of tickers foreach (var line in File.ReadLines(Path.Combine(RawSource, "cik-ticker-mappings.txt"))) { var tickerCik = line.Split('\t'); var ticker = tickerCik[0]; // tickerCik[0] = symbol, tickerCik[1] = CIK // Note that SEC tickers come in lowercase, so we don't have to alter the ticker var cikFormatted = tickerCik[1].PadLeft(10, '0'); List <string> symbol; if (!CikTicker.TryGetValue(cikFormatted, out symbol)) { symbol = new List <string>(); CikTicker[cikFormatted] = symbol; } // SEC data list contains a null value in the ticker.txt file if (!string.IsNullOrWhiteSpace(ticker)) { symbol.Add(ticker); } } // Merge both data sources to a single CIK -> List{T} of tickers foreach (var line in File.ReadLines(Path.Combine(RawSource, "cik-ticker-mappings-rankandfile.txt"))) { var tickerInfo = line.Split('|'); var companyCik = tickerInfo[0].PadLeft(10, '0'); var companyTicker = tickerInfo[1].ToLowerInvariant(); List <string> symbol; if (!CikTicker.TryGetValue(companyCik, out symbol)) { symbol = new List <string>(); CikTicker[companyCik] = symbol; } // Add null check just in case data comes malformed if (!symbol.Contains(companyTicker) && !string.IsNullOrWhiteSpace(companyTicker)) { symbol.Add(companyTicker); } } var formattedDate = processingDate.ToStringInvariant(DateFormat.EightCharacter); var remoteRawData = new FileInfo(Path.Combine(RawSource, $"{formattedDate}.nc.tar.gz")); if (!remoteRawData.Exists) { if (Holidays.Contains(processingDate) || USHoliday.Dates.Contains(processingDate)) { Log.Trace("SECDataConverter.Process(): File is missing, but we expected it to be missing. Nothing to do."); return; } throw new Exception($"SECDataConverter.Process(): Raw data {remoteRawData} not found. No processing can be done."); } // Copy the raw data to a temp path on disk Log.Trace($"SECDataConverter.Process(): Copying raw data locally..."); var tempPath = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToStringInvariant(null)); var localRawData = remoteRawData.CopyTo(tempPath); Log.Trace($"SECDataConverter.Process(): Copied raw data from {remoteRawData.FullName} - to: {tempPath}"); Log.Trace($"SECDataConverter.Process(): Start processing..."); var ncFilesRead = 0; var startingTime = DateTime.Now; var loopStartingTime = startingTime; // For the meantime, let's only process .nc files, and deal with correction files later. Parallel.ForEach( Compression.UnTar(localRawData.OpenRead(), isTarGz: true).Where(kvp => kvp.Key.EndsWith(".nc")), new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount / 2 }, rawReportFilePath => { var factory = new SECReportFactory(); var xmlText = new StringBuilder(); // We need to escape any nested XML to ensure our deserialization happens smoothly var parsingText = false; // SEC data is line separated by UNIX style line endings. No need to worry about a carriage line here. foreach (var line in Encoding.UTF8.GetString(rawReportFilePath.Value).Split('\n')) { var newTextLine = line; var currentTagName = GetTagNameFromLine(newTextLine); // This tag is present rarely in SEC reports, but is unclosed without value when encountered. // Verified by searching with ripgrep for "CONFIRMING-COPY" // // Sometimes, ASSIGNED-SIC contains no value and causes errors. Check to make sure that when // we encounter that tag we check if it has a value. // // "Appearance of the <FLAWED> tag in // an EX-27 document header signals unreliable tagging within the // following document text stream; however, in the absence of a // <FLAWED> tag, tagging is still not guaranteed to be complete // because of allowance in the financial data specifications for // omitted tags when the submission also includes a financial data // schedule of article type CT." if (currentTagName == "CONFIRMING-COPY" || (currentTagName == "ASSIGNED-SIC" && !HasValue(line)) || currentTagName == "FLAWED") { continue; } // Indicates that the form is a paper submission and that the current file has no contents if (currentTagName == "PAPER") { continue; } // Don't encode the closing tag if (currentTagName == "/TEXT") { parsingText = false; } // To ensure that we can serialize/deserialize data with hours, minutes, seconds if (currentTagName == "FILING-DATE" || currentTagName == "PERIOD" || currentTagName == "DATE-OF-FILING-CHANGE" || currentTagName == "DATE-CHANGED") { newTextLine = $"{newTextLine.TrimEnd()} 00:00:00"; } // Encode all contents inside tags to prevent errors in XML parsing. // The json deserializer will convert these values back to their original form if (!parsingText && HasValue(newTextLine)) { newTextLine = $"<{currentTagName}>{SecurityElement.Escape(GetTagValueFromLine(newTextLine))}</{currentTagName}>"; } // Escape all contents inside TEXT tags else if (parsingText) { newTextLine = SecurityElement.Escape(newTextLine); } // Don't encode the opening tag if (currentTagName == "TEXT") { parsingText = true; } xmlText.AppendLine(newTextLine); } var counter = Interlocked.Increment(ref ncFilesRead); if (counter % 100 == 0) { var interval = DateTime.Now - loopStartingTime; Log.Trace($"SECDataConverter.Process(): {counter.ToStringInvariant()} nc files read at {(100 / interval.TotalMinutes).ToStringInvariant("N2")} files/min."); loopStartingTime = DateTime.Now; } ISECReport report; try { report = factory.CreateSECReport(xmlText.ToString()); } // Ignore unsupported form types for now catch (DataException) { return; } catch (XmlException e) { Log.Error(e, $"SECDataConverter.Process(): Failed to parse XML from file: {rawReportFilePath.Key}"); return; } catch (Exception e) { Log.Error(e, "SECDataConverter.Process(): Unknown error encountered"); return; } // First filer listed in SEC report is usually the company listed on stock exchanges var companyCik = report.Report.Filers.First().CompanyData.Cik; // Some companies can operate under two tickers, but have the same CIK. // Don't bother continuing if we don't find any tickers for the given CIK List <string> tickers; if (!CikTicker.TryGetValue(companyCik, out tickers)) { return; } if (!File.Exists(Path.Combine(RawSource, "indexes", $"{companyCik}.json"))) { Log.Error($"SECDataConverter.Process(): {report.Report.FilingDate.ToStringInvariant("yyyy-MM-dd")}:{rawReportFilePath.Key} - Failed to find index file for ticker {tickers.FirstOrDefault()} with CIK: {companyCik}"); return; } try { // The index file can potentially be corrupted GetPublicationDate(report, companyCik); } catch (Exception e) { Log.Error(e, $"SECDataConverter.Process(): {report.Report.FilingDate.ToStringInvariant("yyyy-MM-dd")}:{rawReportFilePath.Key} - Index file loading failed for ticker: {tickers.FirstOrDefault()} with CIK: {companyCik} even though it exists"); } // Default to company CIK if no known ticker is found. // If the equity is not does not resolve to a map file or // it is not found in the map files, we skip writing it. foreach (var ticker in tickers) { var tickerMapFile = _mapFileResolver.ResolveMapFile(ticker, processingDate); if (!tickerMapFile.Any()) { Log.Trace($"SECDataConverter.Process(): {processingDate.ToStringInvariant()} - Failed to find map file for ticker: {ticker}"); continue; } // Map the current ticker to the ticker it was in the past using the map file system var mappedTicker = tickerMapFile.GetMappedSymbol(processingDate); // If no suitable date is found for the symbol in the map file, we skip writing the data if (string.IsNullOrEmpty(mappedTicker)) { Log.Trace($"SECDataConverter.Process(): {processingDate.ToStringInvariant()} - Failed to find mapped symbol for ticker: {ticker}"); continue; } var tickerReports = Reports.GetOrAdd( mappedTicker, _ => new ConcurrentDictionary <DateTime, List <ISECReport> >() ); var reports = tickerReports.GetOrAdd( report.Report.FilingDate.Date, _ => new List <ISECReport>() ); reports.Add(report); } } ); Log.Trace($"SECDataConverter.Process(): {ncFilesRead} nc files read finished in {(DateTime.Now - startingTime).ToStringInvariant("g")}."); Parallel.ForEach( Reports.Keys, ticker => { List <ISECReport> reports; if (!Reports[ticker].TryRemove(processingDate, out reports)) { return; } WriteReport(reports, ticker); } ); // Delete the raw data we copied to the temp folder File.Delete(tempPath); }
/// <summary> /// Iterates each daily file in the specified <paramref name="dailyFolder"/> and adds a line for each /// day to the approriate coarse file /// </summary> /// <param name="dailyFolder">The folder with daily data</param> /// <param name="coarseFolder">The coarse output folder</param> /// <param name="mapFileResolver"></param> /// <param name="exclusions">The symbols to be excluded from processing</param> /// <param name="ignoreMapless">Ignore the symbols without a map file.</param> /// <param name="symbolResolver">Function used to provide symbol resolution. Default resolution uses the zip file name to resolve /// the symbol, specify null for this behavior.</param> /// <returns>A collection of the generated coarse files</returns> public static ICollection <string> ProcessDailyFolder(string dailyFolder, string coarseFolder, MapFileResolver mapFileResolver, HashSet <string> exclusions, bool ignoreMapless, Func <string, string> symbolResolver = null) { const decimal scaleFactor = 10000m; Log.Trace("Processing: {0}", dailyFolder); var stopwatch = Stopwatch.StartNew(); // load map files into memory var symbols = 0; var maplessCount = 0; var dates = new HashSet <DateTime>(); // instead of opening/closing these constantly, open them once and dispose at the end (~3x speed improvement) var writers = new Dictionary <string, StreamWriter>(); // open up each daily file to get the values and append to the daily coarse files foreach (var file in Directory.EnumerateFiles(dailyFolder)) { try { var symbol = Path.GetFileNameWithoutExtension(file); if (symbol == null) { Log.Trace("CoarseGenerator.ProcessDailyFolder(): Unable to resolve symbol from file: {0}", file); continue; } if (symbolResolver != null) { symbol = symbolResolver(symbol); } symbol = symbol.ToUpper(); if (exclusions.Contains(symbol)) { Log.Trace("Excluded symbol: {0}", symbol); continue; } ZipFile zip; using (var reader = Compression.Unzip(file, out zip)) { // 30 period EMA constant const decimal k = 2m / (30 + 1); var seeded = false; var runningAverageVolume = 0m; var checkedForMapFile = false; symbols++; string line; while ((line = reader.ReadLine()) != null) { //20150625.csv var csv = line.Split(','); var date = DateTime.ParseExact(csv[0], DateFormat.TwelveCharacter, CultureInfo.InvariantCulture); if (ignoreMapless && !checkedForMapFile) { checkedForMapFile = true; if (!mapFileResolver.ResolveMapFile(symbol, date).Any()) { // if the resolved map file has zero entries then it's a mapless symbol maplessCount++; break; } } var close = decimal.Parse(csv[4]) / scaleFactor; var volume = long.Parse(csv[5]); // compute the current volume EMA for dollar volume calculations runningAverageVolume = seeded ? volume * k + runningAverageVolume * (1 - k) : volume; seeded = true; var dollarVolume = close * runningAverageVolume; var coarseFile = Path.Combine(coarseFolder, date.ToString("yyyyMMdd") + ".csv"); dates.Add(date); // try to resolve a map file and if found, use the permtick as the symbol var sid = symbol; var mapFile = mapFileResolver.ResolveMapFile(sid, date); if (mapFile != null) { // if available, us the permtick in the coarse files, because of this, we need // to update the coarse files each time new map files are added/permticks change sid = mapFile.Permtick; } if (mapFile == null && ignoreMapless) { // if we're ignoring mapless files then we should always be able to resolve this Log.Error(string.Format("CoarseGenerator.ProcessDailyFolder(): Unable to resolve map file for {0} as of {1}", symbol, date.ToShortDateString())); continue; } // sid,symbol,close,volume,dollar volume var coarseFileLine = sid + "," + symbol + "," + close + "," + volume + "," + Math.Truncate(dollarVolume); StreamWriter writer; if (!writers.TryGetValue(coarseFile, out writer)) { writer = new StreamWriter(new FileStream(coarseFile, FileMode.Append, FileAccess.Write, FileShare.Write)); writers[coarseFile] = writer; } writer.WriteLine(coarseFileLine); } } if (symbols % 1000 == 0) { Log.Trace("CoarseGenerator.ProcessDailyFolder(): Completed processing {0} symbols. Current elapsed: {1} seconds", symbols, stopwatch.Elapsed.TotalSeconds.ToString("0.00")); } } catch (Exception err) { // log the error and continue with the process Log.Error(err.ToString()); } } Log.Trace("CoarseGenerator.ProcessDailyFolder(): Saving {0} coarse files to disk", dates.Count); // dispose all the writers at the end of processing foreach (var writer in writers) { writer.Value.Dispose(); } stopwatch.Stop(); Log.Trace("CoarseGenerator.ProcessDailyFolder(): Processed {0} symbols into {1} coarse files in {2} seconds", symbols, dates.Count, stopwatch.Elapsed.TotalSeconds.ToString("0.00")); Log.Trace("CoarseGenerator.ProcessDailyFolder(): Excluded {0} mapless symbols.", maplessCount); return(writers.Keys); }
/// <summary> /// Iterates each daily file in the specified <paramref name="dailyFolder"/> and adds a line for each /// day to the approriate coarse file /// </summary> /// <param name="dailyFolder">The folder with daily data</param> /// <param name="coarseFolder">The coarse output folder</param> /// <param name="mapFileResolver"></param> /// <param name="exclusions">The symbols to be excluded from processing</param> /// <param name="ignoreMapless">Ignore the symbols without a map file.</param> /// <param name="symbolResolver">Function used to provide symbol resolution. Default resolution uses the zip file name to resolve /// the symbol, specify null for this behavior.</param> /// <returns>A collection of the generated coarse files</returns> public static ICollection<string> ProcessDailyFolder(string dailyFolder, string coarseFolder, MapFileResolver mapFileResolver, HashSet<string> exclusions, bool ignoreMapless, Func<string, string> symbolResolver = null) { const decimal scaleFactor = 10000m; Log.Trace("Processing: {0}", dailyFolder); var stopwatch = Stopwatch.StartNew(); // load map files into memory var symbols = 0; var maplessCount = 0; var dates = new HashSet<DateTime>(); // instead of opening/closing these constantly, open them once and dispose at the end (~3x speed improvement) var writers = new Dictionary<string, StreamWriter>(); // open up each daily file to get the values and append to the daily coarse files foreach (var file in Directory.EnumerateFiles(dailyFolder)) { try { var symbol = Path.GetFileNameWithoutExtension(file); if (symbol == null) { Log.Trace("CoarseGenerator.ProcessDailyFolder(): Unable to resolve symbol from file: {0}", file); continue; } if (symbolResolver != null) { symbol = symbolResolver(symbol); } symbol = symbol.ToUpper(); if (exclusions.Contains(symbol)) { Log.Trace("Excluded symbol: {0}", symbol); continue; } ZipFile zip; using (var reader = Compression.Unzip(file, out zip)) { // 30 period EMA constant const decimal k = 2m / (30 + 1); var seeded = false; var runningAverageVolume = 0m; var checkedForMapFile = false; symbols++; string line; while ((line = reader.ReadLine()) != null) { //20150625.csv var csv = line.Split(','); var date = DateTime.ParseExact(csv[0], DateFormat.TwelveCharacter, CultureInfo.InvariantCulture); if (ignoreMapless && !checkedForMapFile) { checkedForMapFile = true; if (!mapFileResolver.ResolveMapFile(symbol, date).Any()) { // if the resolved map file has zero entries then it's a mapless symbol maplessCount++; break; } } var close = decimal.Parse(csv[4])/scaleFactor; var volume = long.Parse(csv[5]); // compute the current volume EMA for dollar volume calculations runningAverageVolume = seeded ? volume*k + runningAverageVolume*(1 - k) : volume; seeded = true; var dollarVolume = close*runningAverageVolume; var coarseFile = Path.Combine(coarseFolder, date.ToString("yyyyMMdd") + ".csv"); dates.Add(date); // try to resolve a map file and if found, use the permtick as the symbol var sid = symbol; var mapFile = mapFileResolver.ResolveMapFile(sid, date); if (mapFile != null) { // if available, us the permtick in the coarse files, because of this, we need // to update the coarse files each time new map files are added/permticks change sid = mapFile.Permtick; } if (mapFile == null && ignoreMapless) { // if we're ignoring mapless files then we should always be able to resolve this Log.Error(string.Format("CoarseGenerator.ProcessDailyFolder(): Unable to resolve map file for {0} as of {1}", symbol, date.ToShortDateString())); continue; } // sid,symbol,close,volume,dollar volume var coarseFileLine = sid + "," + symbol + "," + close + "," + volume + "," + Math.Truncate(dollarVolume); StreamWriter writer; if (!writers.TryGetValue(coarseFile, out writer)) { writer = new StreamWriter(new FileStream(coarseFile, FileMode.Append, FileAccess.Write, FileShare.Write)); writers[coarseFile] = writer; } writer.WriteLine(coarseFileLine); } } if (symbols%1000 == 0) { Log.Trace("CoarseGenerator.ProcessDailyFolder(): Completed processing {0} symbols. Current elapsed: {1} seconds", symbols, stopwatch.Elapsed.TotalSeconds.ToString("0.00")); } } catch (Exception err) { // log the error and continue with the process Log.Error(err.ToString()); } } Log.Trace("CoarseGenerator.ProcessDailyFolder(): Saving {0} coarse files to disk", dates.Count); // dispose all the writers at the end of processing foreach (var writer in writers) { writer.Value.Dispose(); } stopwatch.Stop(); Log.Trace("CoarseGenerator.ProcessDailyFolder(): Processed {0} symbols into {1} coarse files in {2} seconds", symbols, dates.Count, stopwatch.Elapsed.TotalSeconds.ToString("0.00")); Log.Trace("CoarseGenerator.ProcessDailyFolder(): Excluded {0} mapless symbols.", maplessCount); return writers.Keys; }
/// <summary> /// Runs the instance of the object. /// </summary> /// <returns>True if process all downloads successfully</returns> public override bool Run() { var stopwatch = Stopwatch.StartNew(); try { var companies = GetCompanies().Result.DistinctBy(x => x.Ticker).ToList(); var count = companies.Count; var currentPercent = 0.05; var percent = 0.05; var i = 0; Log.Trace($"EstimizeReleaseDataDownloader.Run(): Start processing {count} companies"); var tasks = new List <Task>(); foreach (var company in companies) { // Makes sure we don't overrun Estimize rate limits accidentally IndexGate.WaitToProceed(); // Include tickers that are "defunct". // Remove the tag because it cannot be part of the API endpoint. // This is separate from the NormalizeTicker(...) method since // we don't convert tickers with `-`s into the format we can successfully // index mapfiles with. var estimizeTicker = company.Ticker; string ticker; if (!TryNormalizeDefunctTicker(estimizeTicker, out ticker)) { Log.Error($"EstimizeReleaseDataDownloader(): Defunct ticker {estimizeTicker} is unable to be parsed. Continuing..."); continue; } // Begin processing ticker with a normalized value Log.Trace($"EstimizeReleaseDataDownloader.Run(): Processing {ticker}"); tasks.Add( HttpRequester($"/companies/{ticker}/releases") .ContinueWith( y => { i++; if (y.IsFaulted) { Log.Error($"EstimizeReleaseDataDownloader.Run(): Failed to get data for {company}"); return; } var result = y.Result; if (string.IsNullOrEmpty(result)) { // We've already logged inside HttpRequester return; } // Just like TradingEconomics, we only want the events that already occured // instead of having "forecasts" that will change in the future taint our // data and make backtests non-deterministic. We want to have // consistency with our data in live trading historical requests as well var releases = JsonConvert.DeserializeObject <List <EstimizeRelease> >(result, JsonSerializerSettings) .Where(x => x.Eps != null) .GroupBy(x => { var normalizedTicker = NormalizeTicker(ticker); var releaseDate = x.ReleaseDate; try { var mapFile = _mapFileResolver.ResolveMapFile(normalizedTicker, releaseDate); var oldTicker = normalizedTicker; var newTicker = normalizedTicker; // Ensure we're writing to the correct historical ticker if (!mapFile.Any()) { Log.Trace($"EstimizeReleaseDataDownloader.Run(): Failed to find map file for: {newTicker} - on: {releaseDate}"); return(string.Empty); } newTicker = mapFile.GetMappedSymbol(releaseDate); if (string.IsNullOrWhiteSpace(newTicker)) { Log.Trace($"EstimizeReleaseDataDownloader.Run(): Failed to find mapping for null new ticker. Old ticker: {oldTicker} - on: {releaseDate}"); return(string.Empty); } if (oldTicker != newTicker) { Log.Trace($"EstimizeReleaseDataDownloader.Run(): Remapped from {oldTicker} to {newTicker} for {releaseDate}"); } return(newTicker); } // We get a failure inside the map file constructor rarely. It tries // to access the last element of an empty list. Maybe this is a bug? catch (InvalidOperationException e) { Log.Error(e, $"EstimizeReleaseDataDownloader.Run(): Failed to load map file for: {normalizedTicker} - on: {releaseDate}"); return(string.Empty); } }) .Where(x => !string.IsNullOrEmpty(x.Key)); foreach (var kvp in releases) { var csvContents = kvp.Select(x => $"{x.ReleaseDate.ToUniversalTime():yyyyMMdd HH:mm:ss},{x.Id},{x.FiscalYear},{x.FiscalQuarter},{x.Eps},{x.Revenue},{x.ConsensusEpsEstimate},{x.ConsensusRevenueEstimate},{x.WallStreetEpsEstimate},{x.WallStreetRevenueEstimate},{x.ConsensusWeightedEpsEstimate},{x.ConsensusWeightedRevenueEstimate}"); SaveContentToFile(_destinationFolder, kvp.Key, csvContents); } var percentDone = i / count; if (percentDone >= currentPercent) { Log.Trace($"EstimizeEstimateDataDownloader.Run(): {percentDone:P2} complete"); currentPercent += percent; } } ) ); } Task.WaitAll(tasks.ToArray()); } catch (Exception e) { Log.Error(e); return(false); } Log.Trace($"EstimizeReleaseDataDownloader.Run(): Finished in {stopwatch.Elapsed}"); return(true); }
/// <summary> /// Subscription data reader takes a subscription request, loads the type, accepts the data source and enumerate on the results. /// </summary> /// <param name="config">Subscription configuration object</param> /// <param name="periodStart">Start date for the data request/backtest</param> /// <param name="periodFinish">Finish date for the data request/backtest</param> /// <param name="resultHandler">Result handler used to push error messages and perform sampling on skipped days</param> /// <param name="mapFileResolver">Used for resolving the correct map files</param> /// <param name="factorFileProvider">Used for getting factor files</param> /// <param name="dataProvider">Used for getting files not present on disk</param> /// <param name="dataCacheProvider">Used for caching files</param> /// <param name="tradeableDates">Defines the dates for which we'll request data, in order, in the security's exchange time zone</param> /// <param name="isLiveMode">True if we're in live mode, false otherwise</param> /// <param name="includeAuxilliaryData">True if we want to emit aux data, false to only emit price data</param> public SubscriptionDataReader(SubscriptionDataConfig config, DateTime periodStart, DateTime periodFinish, IResultHandler resultHandler, MapFileResolver mapFileResolver, IFactorFileProvider factorFileProvider, IDataProvider dataProvider, IEnumerable <DateTime> tradeableDates, bool isLiveMode, IDataCacheProvider dataCacheProvider, bool includeAuxilliaryData = true) { //Save configuration of data-subscription: _config = config; _auxiliaryData = new Queue <BaseData>(); //Save Start and End Dates: _periodStart = periodStart; _periodFinish = periodFinish; _dataProvider = dataProvider; _dataCacheProvider = dataCacheProvider; //Save access to securities _isLiveMode = isLiveMode; _includeAuxilliaryData = includeAuxilliaryData; //Save the type of data we'll be getting from the source. //Create the dynamic type-activators: var objectActivator = ObjectActivator.GetActivator(config.Type); _resultHandler = resultHandler; _tradeableDates = tradeableDates.GetEnumerator(); if (objectActivator == null) { _resultHandler.ErrorMessage("Custom data type '" + config.Type.Name + "' missing parameterless constructor E.g. public " + config.Type.Name + "() { }"); _endOfStream = true; return; } //Create an instance of the "Type": var userObj = objectActivator.Invoke(new object[] { config.Type }); _dataFactory = userObj as BaseData; //If its quandl set the access token in data factory: var quandl = _dataFactory as Quandl; if (quandl != null) { if (!Quandl.IsAuthCodeSet) { Quandl.SetAuthCode(Config.Get("quandl-auth-token")); } } // If Tiingo data, set the access token in data factory var tiingo = _dataFactory as TiingoDailyData; if (tiingo != null) { if (!Tiingo.IsAuthCodeSet) { Tiingo.SetAuthCode(Config.Get("tiingo-auth-token")); } } _factorFile = new FactorFile(config.Symbol.Value, new List <FactorFileRow>()); _mapFile = new MapFile(config.Symbol.Value, new List <MapFileRow>()); // load up the map and factor files for equities if (!config.IsCustomData && config.SecurityType == SecurityType.Equity) { try { var mapFile = mapFileResolver.ResolveMapFile(config.Symbol.ID.Symbol, config.Symbol.ID.Date); // only take the resolved map file if it has data, otherwise we'll use the empty one we defined above if (mapFile.Any()) { _mapFile = mapFile; } var factorFile = factorFileProvider.Get(_config.Symbol); _hasScaleFactors = factorFile != null; if (_hasScaleFactors) { _factorFile = factorFile; // if factor file has minimum date, update start period if before minimum date if (!_isLiveMode && _factorFile != null && _factorFile.FactorFileMinimumDate.HasValue) { if (_periodStart < _factorFile.FactorFileMinimumDate.Value) { _periodStart = _factorFile.FactorFileMinimumDate.Value; _resultHandler.DebugMessage( string.Format("Data for symbol {0} has been limited due to numerical precision issues in the factor file. The starting date has been set to {1}.", config.Symbol.Value, _factorFile.FactorFileMinimumDate.Value.ToShortDateString())); } } } } catch (Exception err) { Log.Error(err, "Fetching Price/Map Factors: " + config.Symbol.ID + ": "); } } // load up the map and factor files for underlying of equity option if (!config.IsCustomData && config.SecurityType == SecurityType.Option) { try { var mapFile = mapFileResolver.ResolveMapFile(config.Symbol.Underlying.ID.Symbol, config.Symbol.Underlying.ID.Date); // only take the resolved map file if it has data, otherwise we'll use the empty one we defined above if (mapFile.Any()) { _mapFile = mapFile; } } catch (Exception err) { Log.Error(err, "Map Factors: " + config.Symbol.ID + ": "); } } // Estimate delisting date. switch (_config.Symbol.ID.SecurityType) { case SecurityType.Future: _delistingDate = _config.Symbol.ID.Date; break; case SecurityType.Option: _delistingDate = OptionSymbol.GetLastDayOfTrading(_config.Symbol); break; default: _delistingDate = _mapFile.DelistingDate; break; } _subscriptionFactoryEnumerator = ResolveDataEnumerator(true); }
/// <summary> /// Initializes the <see cref="SubscriptionDataReader"/> instance /// </summary> /// <remarks>Should be called after all consumers of <see cref="NewTradableDate"/> event are set, /// since it will produce events.</remarks> public void Initialize() { if (_initialized) { return; } //Save the type of data we'll be getting from the source. try { _dataFactory = _config.GetBaseDataInstance(); } catch (ArgumentException exception) { OnInvalidConfigurationDetected(new InvalidConfigurationDetectedEventArgs(_config.Symbol, exception.Message)); _endOfStream = true; return; } //If its quandl set the access token in data factory: var quandl = _dataFactory as Quandl; if (quandl != null) { if (!Quandl.IsAuthCodeSet) { Quandl.SetAuthCode(Config.Get("quandl-auth-token")); } } // If Tiingo data, set the access token in data factory var tiingo = _dataFactory as TiingoPrice; if (tiingo != null) { if (!Tiingo.IsAuthCodeSet) { Tiingo.SetAuthCode(Config.Get("tiingo-auth-token")); } } // If USEnergyAPI data, set the access token in data factory var energyInformation = _dataFactory as USEnergyAPI; if (energyInformation != null) { if (!USEnergyAPI.IsAuthCodeSet) { USEnergyAPI.SetAuthCode(Config.Get("us-energy-information-auth-token")); } } // If Fred data, set the access token in data factory var fred = _dataFactory as FredApi; if (fred != null) { if (!FredApi.IsAuthCodeSet) { FredApi.SetAuthCode(Config.Get("fred-auth-token")); } } _factorFile = new FactorFile(_config.Symbol.Value, new List <FactorFileRow>()); _mapFile = new MapFile(_config.Symbol.Value, new List <MapFileRow>()); // load up the map files for equities, options, and custom data if it supports it. // Only load up factor files for equities if (_dataFactory.RequiresMapping()) { try { var mapFile = _mapFileResolver.ResolveMapFile(_config.Symbol, _config.Type); // only take the resolved map file if it has data, otherwise we'll use the empty one we defined above if (mapFile.Any()) { _mapFile = mapFile; } if (!_config.IsCustomData && !_config.SecurityType.IsOption()) { var factorFile = _factorFileProvider.Get(_config.Symbol); _hasScaleFactors = factorFile != null; if (_hasScaleFactors) { _factorFile = factorFile; // if factor file has minimum date, update start period if before minimum date if (!_isLiveMode && _factorFile != null && _factorFile.FactorFileMinimumDate.HasValue) { if (_periodStart < _factorFile.FactorFileMinimumDate.Value) { _periodStart = _factorFile.FactorFileMinimumDate.Value; OnNumericalPrecisionLimited( new NumericalPrecisionLimitedEventArgs(_config.Symbol, $"[{_config.Symbol.Value}, {_factorFile.FactorFileMinimumDate.Value.ToShortDateString()}]")); } } } if (_periodStart < mapFile.FirstDate) { _periodStart = mapFile.FirstDate; OnStartDateLimited( new StartDateLimitedEventArgs(_config.Symbol, $"[{_config.Symbol.Value}," + $" {mapFile.FirstDate.ToString("yyyy-MM-dd", CultureInfo.InvariantCulture)}]")); } } } catch (Exception err) { Log.Error(err, "Fetching Price/Map Factors: " + _config.Symbol.ID + ": "); } } _delistingDate = _config.Symbol.GetDelistingDate(_mapFile); // adding a day so we stop at EOD _delistingDate = _delistingDate.AddDays(1); UpdateDataEnumerator(true); _initialized = true; }
/// <summary> /// Runs the instance of the object. /// </summary> /// <returns>True if process all downloads successfully</returns> public override bool Run() { var stopwatch = Stopwatch.StartNew(); try { var companies = GetCompanies().Result.DistinctBy(x => x.Ticker).ToList(); var count = companies.Count; var currentPercent = 0.05; var percent = 0.05; var i = 0; Log.Trace($"EstimizeEstimateDataDownloader.Run(): Start processing {count} companies"); var tasks = new List <Task>(); foreach (var company in companies) { var ticker = company.Ticker; // Include tickers that are "defunct". // Remove the tag because it cannot be part of the API endpoint if (ticker.IndexOf("defunct", StringComparison.OrdinalIgnoreCase) > 0) { var length = ticker.IndexOf('-'); ticker = ticker.Substring(0, length).Trim(); } Log.Trace($"EstimizeEstimateDataDownloader.Run(): Processing {ticker}"); try { // Makes sure we don't overrun Estimize rate limits accidentally IndexGate.WaitToProceed(); } // This is super super rare, but it failures in RateGate (RG) can still happen nonetheless. Let's not // rely on RG operating successfully all the time so that if RG fails, our download process doesn't fail catch (ArgumentOutOfRangeException e) { Log.Error(e, $"EstimizeEstimateDataDownloader.Run(): RateGate failed. Sleeping for 110 milliseconds with Thread.Sleep()"); Thread.Sleep(110); } tasks.Add( HttpRequester($"/companies/{ticker}/estimates") .ContinueWith( y => { i++; if (y.IsFaulted) { Log.Error($"EstimizeEstimateDataDownloader.Run(): Failed to get data for {company}"); return; } var result = y.Result; if (string.IsNullOrEmpty(result)) { // We've already logged inside HttpRequester return; } var estimates = JsonConvert.DeserializeObject <List <EstimizeEstimate> >(result) .GroupBy(estimate => { var oldTicker = ticker; var newTicker = ticker; var createdAt = estimate.CreatedAt; try { var mapFile = _mapFileResolver.ResolveMapFile(ticker, createdAt); // Ensure we're writing to the correct historical ticker if (!mapFile.Any()) { Log.Trace($"EstimizeEstimateDataDownloader.Run(): Failed to find map file for: {newTicker} - on: {createdAt}"); return(string.Empty); } newTicker = mapFile.GetMappedSymbol(createdAt); if (string.IsNullOrWhiteSpace(newTicker)) { Log.Trace($"EstimizeEstimateDataDownloader.Run(): New ticker is null. Old ticker: {oldTicker} - on: {createdAt}"); return(string.Empty); } if (oldTicker != newTicker) { Log.Trace($"EstimizeEstimateDataDonwloader.Run(): Remapping {oldTicker} to {newTicker}"); } } // We get a failure inside the map file constructor rarely. It tries // to access the last element of an empty list. Maybe this is a bug? catch (InvalidOperationException e) { Log.Error(e, $"EstimizeEstimateDataDownloader.Run(): Failed to load map file for: {oldTicker} - on {createdAt}"); return(string.Empty); } return(newTicker); }) .Where(kvp => !string.IsNullOrEmpty(kvp.Key)); foreach (var kvp in estimates) { var csvContents = kvp.Select(x => $"{x.CreatedAt.ToUniversalTime():yyyyMMdd HH:mm:ss},{x.Id},{x.AnalystId},{x.UserName},{x.FiscalYear},{x.FiscalQuarter},{x.Eps},{x.Revenue},{x.Flagged.ToString().ToLower()}"); SaveContentToFile(_destinationFolder, kvp.Key, csvContents); } var percentageDone = i / count; if (percentageDone >= currentPercent) { Log.Trace($"EstimizeEstimateDataDownloader.Run(): {percentageDone:P2} complete"); currentPercent += percent; } } ) ); } Task.WaitAll(tasks.ToArray()); } catch (Exception e) { Log.Error(e); return(false); } Log.Trace($"EstimizeEstimateDataDownloader.Run(): Finished in {stopwatch.Elapsed}"); return(true); }
/// <summary> /// Subscription data reader takes a subscription request, loads the type, accepts the data source and enumerate on the results. /// </summary> /// <param name="config">Subscription configuration object</param> /// <param name="periodStart">Start date for the data request/backtest</param> /// <param name="periodFinish">Finish date for the data request/backtest</param> /// <param name="resultHandler">Result handler used to push error messages and perform sampling on skipped days</param> /// <param name="mapFileResolver">Used for resolving the correct map files</param> /// <param name="factorFileProvider">Used for getting factor files</param> /// <param name="tradeableDates">Defines the dates for which we'll request data, in order, in the security's exchange time zone</param> /// <param name="isLiveMode">True if we're in live mode, false otherwise</param> /// <param name="includeAuxilliaryData">True if we want to emit aux data, false to only emit price data</param> public SubscriptionDataReader(SubscriptionDataConfig config, DateTime periodStart, DateTime periodFinish, IResultHandler resultHandler, MapFileResolver mapFileResolver, IFactorFileProvider factorFileProvider, IEnumerable <DateTime> tradeableDates, bool isLiveMode, bool includeAuxilliaryData = true) { //Save configuration of data-subscription: _config = config; _auxiliaryData = new Queue <BaseData>(); //Save Start and End Dates: _periodStart = periodStart; _periodFinish = periodFinish; //Save access to securities _isLiveMode = isLiveMode; _includeAuxilliaryData = includeAuxilliaryData; //Save the type of data we'll be getting from the source. //Create the dynamic type-activators: var objectActivator = ObjectActivator.GetActivator(config.Type); _resultHandler = resultHandler; _tradeableDates = tradeableDates.GetEnumerator(); if (objectActivator == null) { _resultHandler.ErrorMessage("Custom data type '" + config.Type.Name + "' missing parameterless constructor E.g. public " + config.Type.Name + "() { }"); _endOfStream = true; return; } //Create an instance of the "Type": var userObj = objectActivator.Invoke(new object[] {}); _dataFactory = userObj as BaseData; //If its quandl set the access token in data factory: var quandl = _dataFactory as Quandl; if (quandl != null) { if (!Quandl.IsAuthCodeSet) { Quandl.SetAuthCode(Config.Get("quandl-auth-token")); } } _factorFile = new FactorFile(config.Symbol.Value, new List <FactorFileRow>()); _mapFile = new MapFile(config.Symbol.Value, new List <MapFileRow>()); // load up the map and factor files for equities if (!config.IsCustomData && config.SecurityType == SecurityType.Equity) { try { var mapFile = mapFileResolver.ResolveMapFile(config.Symbol.ID.Symbol, config.Symbol.ID.Date); // only take the resolved map file if it has data, otherwise we'll use the empty one we defined above if (mapFile.Any()) { _mapFile = mapFile; } var factorFile = factorFileProvider.Get(_config.Symbol); _hasScaleFactors = factorFile != null; if (_hasScaleFactors) { _factorFile = factorFile; } } catch (Exception err) { Log.Error(err, "Fetching Price/Map Factors: " + config.Symbol.ID + ": "); } } _subscriptionFactoryEnumerator = ResolveDataEnumerator(true); }
/// <summary> /// Iterates each daily file in the specified <paramref name="dailyFolder"/> and adds a line for each /// day to the approriate coarse file /// </summary> /// <param name="dailyFolder">The folder with daily data</param> /// <param name="coarseFolder">The coarse output folder</param> /// <param name="mapFileResolver"></param> /// <param name="exclusions">The symbols to be excluded from processing</param> /// <param name="ignoreMapless">Ignore the symbols without a map file.</param> /// <param name="startDate">The starting date for processing</param> /// <param name="symbolResolver">Function used to provide symbol resolution. Default resolution uses the zip file name to resolve /// the symbol, specify null for this behavior.</param> /// <returns>A collection of the generated coarse files</returns> public static ICollection<string> ProcessDailyFolder(string dailyFolder, string coarseFolder, MapFileResolver mapFileResolver, HashSet<string> exclusions, bool ignoreMapless, DateTime startDate, Func<string, string> symbolResolver = null) { const decimal scaleFactor = 10000m; Log.Trace("Processing: {0}", dailyFolder); var start = DateTime.UtcNow; // load map files into memory var symbols = 0; var maplessCount = 0; var dates = new HashSet<DateTime>(); // instead of opening/closing these constantly, open them once and dispose at the end (~3x speed improvement) var writers = new Dictionary<string, StreamWriter>(); var dailyFolderDirectoryInfo = new DirectoryInfo(dailyFolder).Parent; if (dailyFolderDirectoryInfo == null) { throw new Exception("Unable to resolve market for daily folder: " + dailyFolder); } var market = dailyFolderDirectoryInfo.Name.ToLower(); var fundamentalDirectoryInfo = new DirectoryInfo(coarseFolder).Parent; if (fundamentalDirectoryInfo == null) { throw new Exception("Unable to resolve fundamental path for coarse folder: " + coarseFolder); } var fineFundamentalFolder = Path.Combine(fundamentalDirectoryInfo.FullName, "fine"); // open up each daily file to get the values and append to the daily coarse files foreach (var file in Directory.EnumerateFiles(dailyFolder)) { try { var symbol = Path.GetFileNameWithoutExtension(file); if (symbol == null) { Log.Trace("CoarseGenerator.ProcessDailyFolder(): Unable to resolve symbol from file: {0}", file); continue; } if (symbolResolver != null) { symbol = symbolResolver(symbol); } symbol = symbol.ToUpper(); if (exclusions.Contains(symbol)) { Log.Trace("Excluded symbol: {0}", symbol); continue; } // check if symbol has any fine fundamental data var firstFineSymbolDate = DateTime.MaxValue; if (Directory.Exists(fineFundamentalFolder)) { var fineSymbolFolder = Path.Combine(fineFundamentalFolder, symbol.ToLower()); var firstFineSymbolFileName = Directory.Exists(fineSymbolFolder) ? Directory.GetFiles(fineSymbolFolder).OrderBy(x => x).FirstOrDefault() : string.Empty; if (firstFineSymbolFileName.Length > 0) { firstFineSymbolDate = DateTime.ParseExact(Path.GetFileNameWithoutExtension(firstFineSymbolFileName), "yyyyMMdd", CultureInfo.InvariantCulture); } } ZipFile zip; using (var reader = Compression.Unzip(file, out zip)) { // 30 period EMA constant const decimal k = 2m / (30 + 1); var seeded = false; var runningAverageVolume = 0m; var checkedForMapFile = false; symbols++; string line; while ((line = reader.ReadLine()) != null) { //20150625.csv var csv = line.Split(','); var date = DateTime.ParseExact(csv[0], DateFormat.TwelveCharacter, CultureInfo.InvariantCulture); // spin past old data if (date < startDate) continue; if (ignoreMapless && !checkedForMapFile) { checkedForMapFile = true; if (!mapFileResolver.ResolveMapFile(symbol, date).Any()) { // if the resolved map file has zero entries then it's a mapless symbol maplessCount++; break; } } var close = decimal.Parse(csv[4])/scaleFactor; var volume = long.Parse(csv[5]); // compute the current volume EMA for dollar volume calculations runningAverageVolume = seeded ? volume*k + runningAverageVolume*(1 - k) : volume; seeded = true; var dollarVolume = close * runningAverageVolume; var coarseFile = Path.Combine(coarseFolder, date.ToString("yyyyMMdd") + ".csv"); dates.Add(date); // try to resolve a map file and if found, regen the sid var sid = SecurityIdentifier.GenerateEquity(SecurityIdentifier.DefaultDate, symbol, market); var mapFile = mapFileResolver.ResolveMapFile(symbol, date); if (!mapFile.IsNullOrEmpty()) { // if available, us the permtick in the coarse files, because of this, we need // to update the coarse files each time new map files are added/permticks change sid = SecurityIdentifier.GenerateEquity(mapFile.FirstDate, mapFile.OrderBy(x => x.Date).First().MappedSymbol, market); } if (mapFile == null && ignoreMapless) { // if we're ignoring mapless files then we should always be able to resolve this Log.Error(string.Format("CoarseGenerator.ProcessDailyFolder(): Unable to resolve map file for {0} as of {1}", symbol, date.ToShortDateString())); continue; } // check if symbol has fine fundamental data for the current date var hasFundamentalDataForDate = date >= firstFineSymbolDate; // sid,symbol,close,volume,dollar volume,has fundamental data var coarseFileLine = sid + "," + symbol + "," + close + "," + volume + "," + Math.Truncate(dollarVolume) + "," + hasFundamentalDataForDate; StreamWriter writer; if (!writers.TryGetValue(coarseFile, out writer)) { writer = new StreamWriter(new FileStream(coarseFile, FileMode.Create, FileAccess.Write, FileShare.Write)); writers[coarseFile] = writer; } writer.WriteLine(coarseFileLine); } } if (symbols%1000 == 0) { Log.Trace("CoarseGenerator.ProcessDailyFolder(): Completed processing {0} symbols. Current elapsed: {1} seconds", symbols, (DateTime.UtcNow - start).TotalSeconds.ToString("0.00")); } } catch (Exception err) { // log the error and continue with the process Log.Error(err.ToString()); } } Log.Trace("CoarseGenerator.ProcessDailyFolder(): Saving {0} coarse files to disk", dates.Count); // dispose all the writers at the end of processing foreach (var writer in writers) { writer.Value.Dispose(); } var stop = DateTime.UtcNow; Log.Trace("CoarseGenerator.ProcessDailyFolder(): Processed {0} symbols into {1} coarse files in {2} seconds", symbols, dates.Count, (stop - start).TotalSeconds.ToString("0.00")); Log.Trace("CoarseGenerator.ProcessDailyFolder(): Excluded {0} mapless symbols.", maplessCount); return writers.Keys; }