/// <summary> /// Creates a new <see cref="AuxiliaryDataEnumerator"/> that will hold the /// corporate event providers /// </summary> /// <param name="rawDataEnumerator">The underlying raw data enumerator</param> /// <param name="config">The <see cref="SubscriptionDataConfig"/></param> /// <param name="factorFileProvider">Used for getting factor files</param> /// <param name="tradableDayNotifier">Tradable dates provider</param> /// <param name="mapFileResolver">Used for resolving the correct map files</param> /// <param name="includeAuxiliaryData">True to emit auxiliary data</param> /// <param name="startTime">Start date for the data request</param> /// <returns>The new auxiliary data enumerator</returns> public static IEnumerator <BaseData> CreateEnumerators( IEnumerator <BaseData> rawDataEnumerator, SubscriptionDataConfig config, IFactorFileProvider factorFileProvider, ITradableDatesNotifier tradableDayNotifier, MapFileResolver mapFileResolver, bool includeAuxiliaryData, DateTime startTime) { var lazyFactorFile = new Lazy <FactorFile>(() => GetFactorFileToUse(config, factorFileProvider)); var enumerator = new AuxiliaryDataEnumerator( config, lazyFactorFile, new Lazy <MapFile>(() => GetMapFileToUse(config, mapFileResolver)), new ITradableDateEventProvider[] { new MappingEventProvider(), new SplitEventProvider(), new DividendEventProvider(), new DelistingEventProvider() }, tradableDayNotifier, includeAuxiliaryData, startTime); var priceScaleFactorEnumerator = new PriceScaleFactorEnumerator( rawDataEnumerator, config, lazyFactorFile); return(new SynchronizingEnumerator(priceScaleFactorEnumerator, enumerator)); }
/// <summary> /// Creates a new <see cref="AuxiliaryDataEnumerator"/> that will hold the /// corporate event providers /// </summary> /// <param name="config">The <see cref="SubscriptionDataConfig"/></param> /// <param name="factorFileProvider">Used for getting factor files</param> /// <param name="tradableDayNotifier">Tradable dates provider</param> /// <param name="mapFileResolver">Used for resolving the correct map files</param> /// <param name="includeAuxiliaryData">True to emit auxiliary data</param> /// <returns>The new auxiliary data enumerator</returns> public static IEnumerator <BaseData> CreateEnumerators( SubscriptionDataConfig config, IFactorFileProvider factorFileProvider, ITradableDatesNotifier tradableDayNotifier, MapFileResolver mapFileResolver, bool includeAuxiliaryData) { var mapFileToUse = GetMapFileToUse(config, mapFileResolver); var factorFile = GetFactorFileToUse(config, factorFileProvider); var enumerator = new AuxiliaryDataEnumerator( config, factorFile, mapFileToUse, new ITradableDateEventProvider[] { new MappingEventProvider(), new SplitEventProvider(), new DividendEventProvider(), new DelistingEventProvider() }, tradableDayNotifier, includeAuxiliaryData); return(enumerator); }
private void Initialize() { Log.Trace("StrategyCapacity.Initialize(): Initializing..."); Capacity = new List <ChartPoint>(); _symbolData = new Dictionary <Symbol, SymbolData>(); _securityManager = new SecurityManager(new TimeKeeper(DateTime.UtcNow, TimeZones.NewYork, TimeZones.Utc)); _cashBook = new CashBook(); _subscriptionManager = new SubscriptionManager(); _subscriptionManager.SetDataManager(new StubDataManager()); _mhdb = MarketHoursDatabase.FromDataFolder(); _spdb = SymbolPropertiesDatabase.FromDataFolder(); _mapFileCache = new Dictionary <Symbol, MapFile>(); _mapFileResolver = Composer.Instance.GetExportedValueByTypeName <IMapFileProvider>(Config.Get("map-file-provider", "LocalDiskMapFileProvider")) .Get("usa"); _securityService = new SecurityService( _cashBook, _mhdb, _spdb, new QCAlgorithm(), new RegisteredSecurityDataTypesProvider(), new SecurityCacheProvider(new ReportSecurityProvider())); }
public void DoesNotEmitDataBeyondTradableDate(string data, bool shouldEmitSecondDataPoint, Resolution dataResolution) { var start = new DateTime(2019, 12, 9); var end = new DateTime(2019, 12, 12); var mapFileProvider = new LocalDiskMapFileProvider(); var mapFileResolver = new MapFileResolver(mapFileProvider.Get(Market.USA)); var dataReader = new SubscriptionDataReader( new SubscriptionDataConfig(typeof(TradeBar), Symbols.SPY, dataResolution, TimeZones.NewYork, TimeZones.NewYork, false, false, false), start, end, mapFileResolver, new LocalDiskFactorFileProvider(mapFileProvider), LinqExtensions.Range(start, end, time => time + TimeSpan.FromDays(1)), false, new TestDataCacheProvider { Data = data } ); Assert.IsTrue(dataReader.MoveNext()); Assert.AreEqual(shouldEmitSecondDataPoint, dataReader.MoveNext()); }
/// <summary> /// Subscription data reader takes a subscription request, loads the type, accepts the data source and enumerate on the results. /// </summary> /// <param name="config">Subscription configuration object</param> /// <param name="periodStart">Start date for the data request/backtest</param> /// <param name="periodFinish">Finish date for the data request/backtest</param> /// <param name="mapFileResolver">Used for resolving the correct map files</param> /// <param name="factorFileProvider">Used for getting factor files</param> /// <param name="dataCacheProvider">Used for caching files</param> /// <param name="tradeableDates">Defines the dates for which we'll request data, in order, in the security's data time zone</param> /// <param name="isLiveMode">True if we're in live mode, false otherwise</param> /// <param name="dataProvider">The data provider to use</param> public SubscriptionDataReader(SubscriptionDataConfig config, DateTime periodStart, DateTime periodFinish, MapFileResolver mapFileResolver, IFactorFileProvider factorFileProvider, IEnumerable <DateTime> tradeableDates, bool isLiveMode, IDataCacheProvider dataCacheProvider, IDataProvider dataProvider) { //Save configuration of data-subscription: _config = config; //Save Start and End Dates: _periodStart = periodStart; _periodFinish = periodFinish; _mapFileResolver = mapFileResolver; _factorFileProvider = factorFileProvider; _dataCacheProvider = dataCacheProvider; //Save access to securities _isLiveMode = isLiveMode; _tradeableDates = tradeableDates.GetEnumerator(); _dataProvider = dataProvider; }
/// <summary> /// Initializes a new instance of the <see cref="BaseDataSubscriptionEnumeratorFactory"/> class /// </summary> /// <param name="isLiveMode">True for live mode, false otherwise</param> /// <param name="mapFileResolver">Used for resolving the correct map files</param> /// <param name="factorFileProvider">Used for getting factor files</param> /// <param name="tradableDaysProvider">Function used to provide the tradable dates to be enumerator. /// Specify null to default to <see cref="SubscriptionRequest.TradableDays"/></param> public BaseDataSubscriptionEnumeratorFactory(bool isLiveMode, MapFileResolver mapFileResolver, IFactorFileProvider factorFileProvider, Func <SubscriptionRequest, IEnumerable <DateTime> > tradableDaysProvider = null) { _isLiveMode = isLiveMode; _tradableDaysProvider = tradableDaysProvider ?? (request => request.TradableDays); _mapFileResolver = mapFileResolver; _factorFileProvider = factorFileProvider; }
private static MapFile GetMapFileToUse( SubscriptionDataConfig config, MapFileResolver mapFileResolver) { var mapFileToUse = new MapFile(config.Symbol.Value, new List<MapFileRow>()); // load up the map and factor files for equities, options, and custom data if (config.TickerShouldBeMapped()) { try { var mapFile = mapFileResolver.ResolveMapFile(config.Symbol, config.Type); // only take the resolved map file if it has data, otherwise we'll use the empty one we defined above if (mapFile.Any()) { mapFileToUse = mapFile; } } catch (Exception err) { Log.Error(err, "CorporateEventEnumeratorFactory.GetMapFileToUse():" + " Map File: " + config.Symbol.ID + ": "); } } return mapFileToUse; }
/// <summary> /// Iterates over each equity directory and aggregates the data into the coarse file /// </summary> /// <param name="dataDirectory">The Lean /Data directory</param> /// <param name="ignoreMaplessSymbols">Ignore symbols without a QuantQuote map file.</param> public static IEnumerable <string> ProcessEquityDirectories(string dataDirectory, bool ignoreMaplessSymbols, DateTime?startDate) { var exclusions = ReadExclusionsFile(ExclusionsFile); var equity = Path.Combine(dataDirectory, "equity"); foreach (var directory in Directory.EnumerateDirectories(equity)) { var dailyFolder = Path.Combine(directory, "daily"); var mapFileFolder = Path.Combine(directory, "map_files"); var coarseFolder = Path.Combine(directory, "fundamental", "coarse"); if (!Directory.Exists(coarseFolder)) { Directory.CreateDirectory(coarseFolder); } var lastProcessedDate = startDate ?? GetLastProcessedDate(coarseFolder); var factorFileProvider = new LocalDiskFactorFileProvider(); var files = ProcessDailyFolder(dailyFolder, coarseFolder, MapFileResolver.Create(mapFileFolder), factorFileProvider, exclusions, ignoreMaplessSymbols, lastProcessedDate); foreach (var file in files) { yield return(file); } } }
/// <summary> /// Creates a new instance of <see cref="EstimizeReleaseDataDownloader"/> /// </summary> /// <param name="destinationFolder">The folder where the data will be saved</param> public EstimizeReleaseDataDownloader(string destinationFolder) { _destinationFolder = Path.Combine(destinationFolder, "release"); _mapFileResolver = Composer.Instance.GetExportedValueByTypeName <IMapFileProvider>(Config.Get("map-file-provider", "LocalDiskMapFileProvider")) .Get(Market.USA); Directory.CreateDirectory(_destinationFolder); }
/// <summary> /// Public constructor creates CIK -> Ticker list from various sources /// </summary> /// <param name="rawSource">Source of raw data</param> /// <param name="destination">Destination of formatted data</param> public SECDataConverter(string rawSource, string destination) { RawSource = rawSource; Destination = destination; _mapFileResolver = Composer.Instance.GetExportedValueByTypeName <IMapFileProvider>(Config.Get("map-file-provider", "LocalDiskMapFileProvider")) .Get(Market.USA); }
/// <summary> /// Initializes a new instance of the <see cref="OptionChainUniverseSubscriptionEnumeratorFactory"/> class /// </summary> /// <param name="enumeratorConfigurator">Function used to configure the sub-enumerators before sync (fill-forward/filter/ect...)</param> /// <param name="mapFileResolver">Map file resolver</param> /// <param name="factorFileProvider">Factor file provider</param> public OptionChainUniverseSubscriptionEnumeratorFactory(Func <SubscriptionRequest, IEnumerator <BaseData>, IEnumerator <BaseData> > enumeratorConfigurator, MapFileResolver mapFileResolver, IFactorFileProvider factorFileProvider) { _isLiveMode = false; _enumeratorConfigurator = enumeratorConfigurator; _mapFileResolver = mapFileResolver; _factorFileProvider = factorFileProvider; }
public TradingEconomicsEarningsDownloader(string destinationFolder) { _fromDate = new DateTime(1998, 1, 1); _toDate = DateTime.Now; _destinationFolder = Path.Combine(destinationFolder, "earnings"); _requestGate = new RateGate(1, TimeSpan.FromSeconds(1)); _mapFileResolver = MapFileResolver.Create(Globals.DataFolder, Market.USA); Directory.CreateDirectory(_destinationFolder); }
/// <summary> /// Creates an instance of the converter /// </summary> /// <param name="sourceFile"></param> /// <param name="destinationFile"></param> public SmartInsiderConverter(DirectoryInfo sourceDirectory, DirectoryInfo destinationDirectory) { _sourceDirectory = sourceDirectory; _destinationDirectory = destinationDirectory; _mapFileResolver = Composer.Instance.GetExportedValueByTypeName <IMapFileProvider>(Config.Get("map-file-provider", "LocalDiskMapFileProvider")) .Get(Market.USA); Directory.CreateDirectory(Path.Combine(_destinationDirectory.FullName, "intentions")); Directory.CreateDirectory(Path.Combine(_destinationDirectory.FullName, "transactions")); }
/// <summary> /// Creates a new instance of <see cref="EstimizeEstimateDataDownloader"/> /// </summary> /// <param name="destinationFolder">The folder where the data will be saved</param> public EstimizeEstimateDataDownloader(string destinationFolder) { _destinationFolder = Path.Combine(destinationFolder, "estimate"); _mapFileResolver = Composer.Instance.GetExportedValueByTypeName <IMapFileProvider>(Config.Get("map-file-provider", "LocalDiskMapFileProvider")) .Get(Market.USA); _processTickers = Config.Get("process-tickers", null)?.Split(",").ToHashSet(); Directory.CreateDirectory(_destinationFolder); }
/// <summary> /// Public constructor creates CIK -> Ticker list from various sources /// </summary> /// <param name="rawSource">Source of raw data</param> /// <param name="destination">Destination of formatted data</param> public SECDataConverter(string rawSource, string destination) { RawSource = rawSource; Destination = destination; var dataProvider = Composer.Instance.GetExportedValueByTypeName <IDataProvider>(Config.Get("data-provider", "QuantConnect.Lean.Engine.DataFeeds.DefaultDataProvider")); var mapFileProvider = Composer.Instance.GetExportedValueByTypeName <IMapFileProvider>(Config.Get("map-file-provider", "LocalDiskMapFileProvider")); mapFileProvider.Initialize(dataProvider); _mapFileResolver = mapFileProvider.Get(Market.USA); }
/// <summary> /// Converts psychsignal raw data into a format usable by Lean /// </summary> /// <param name="sourceDirectory">Directory to source our raw data from</param> /// <param name="destinationDirectory">Directory to write formatted data to</param> public PsychSignalDataConverter(string sourceDirectory, string destinationDirectory) { _rawSourceDirectory = new DirectoryInfo(sourceDirectory); _destinationDirectory = new DirectoryInfo(destinationDirectory); _fileHandles = new Dictionary <string, TickerData>(); _mapFileResolver = Composer.Instance.GetExportedValueByTypeName <IMapFileProvider>(Config.Get("map-file-provider", "LocalDiskMapFileProvider")) .Get(Market.USA); _destinationDirectory.Create(); }
/// <summary> /// Creates a new <see cref="AuxiliaryDataEnumerator"/> that will hold the /// corporate event providers /// </summary> /// <param name="rawDataEnumerator">The underlying raw data enumerator</param> /// <param name="config">The <see cref="SubscriptionDataConfig"/></param> /// <param name="factorFileProvider">Used for getting factor files</param> /// <param name="tradableDayNotifier">Tradable dates provider</param> /// <param name="mapFileResolver">Used for resolving the correct map files</param> /// <param name="includeAuxiliaryData">True to emit auxiliary data</param> /// <param name="startTime">Start date for the data request</param> /// <param name="enablePriceScaling">Applies price factor</param> /// <returns>The new auxiliary data enumerator</returns> public static IEnumerator <BaseData> CreateEnumerators( IEnumerator <BaseData> rawDataEnumerator, SubscriptionDataConfig config, IFactorFileProvider factorFileProvider, ITradableDatesNotifier tradableDayNotifier, MapFileResolver mapFileResolver, bool includeAuxiliaryData, DateTime startTime, bool enablePriceScaling = true) { var lazyFactorFile = new Lazy <FactorFile>(() => SubscriptionUtils.GetFactorFileToUse(config, factorFileProvider)); var tradableEventProviders = new List <ITradableDateEventProvider>(); if (config.Symbol.SecurityType == SecurityType.Equity) { tradableEventProviders.Add(new SplitEventProvider()); tradableEventProviders.Add(new DividendEventProvider()); } if (config.Symbol.SecurityType == SecurityType.Equity || config.Symbol.SecurityType == SecurityType.Base || config.Symbol.SecurityType == SecurityType.Option) { tradableEventProviders.Add(new MappingEventProvider()); } tradableEventProviders.Add(new DelistingEventProvider()); var enumerator = new AuxiliaryDataEnumerator( config, lazyFactorFile, new Lazy <MapFile>(() => GetMapFileToUse(config, mapFileResolver)), tradableEventProviders.ToArray(), tradableDayNotifier, includeAuxiliaryData, startTime); // avoid price scaling for backtesting; calculate it directly in worker // and allow subscription to extract the the data depending on config data mode var dataEnumerator = rawDataEnumerator; if (enablePriceScaling) { dataEnumerator = new PriceScaleFactorEnumerator( rawDataEnumerator, config, lazyFactorFile); } return(new SynchronizingEnumerator(dataEnumerator, enumerator)); }
private static MapFile GetMapFileToUse( SubscriptionDataConfig config, MapFileResolver mapFileResolver) { var mapFileToUse = new MapFile(config.Symbol.Value, new List <MapFileRow>()); // load up the map and factor files for equities if (!config.IsCustomData && config.SecurityType == SecurityType.Equity) { try { var mapFile = mapFileResolver.ResolveMapFile( config.Symbol.ID.Symbol, config.Symbol.ID.Date); // only take the resolved map file if it has data, otherwise we'll use the empty one we defined above if (mapFile.Any()) { mapFileToUse = mapFile; } } catch (Exception err) { Log.Error(err, "CorporateEventEnumeratorFactory.GetMapFileToUse():" + " Map File: " + config.Symbol.ID + ": "); } } // load up the map and factor files for underlying of equity option if (!config.IsCustomData && config.SecurityType == SecurityType.Option) { try { var mapFile = mapFileResolver.ResolveMapFile( config.Symbol.Underlying.ID.Symbol, config.Symbol.Underlying.ID.Date); // only take the resolved map file if it has data, otherwise we'll use the empty one we defined above if (mapFile.Any()) { mapFileToUse = mapFile; } } catch (Exception err) { Log.Error(err, "CorporateEventEnumeratorFactory.GetMapFileToUse():" + " Map File: " + config.Symbol.ID + ": "); } } return(mapFileToUse); }
/// <summary> /// Populates the sid contex. /// </summary> /// <param name="mapFileResolver">The map file resolver.</param> /// <param name="exclusions">The exclusions.</param> /// <returns></returns> private IEnumerable <SecurityIdentifierContext> PopulateSidContex(MapFileResolver mapFileResolver, HashSet <string> exclusions) { Log.Trace($"CoarseUniverseGeneratorProgram.PopulateSidContex(): Generating SID context from QuantQuote's map files."); foreach (var mapFile in mapFileResolver) { if (exclusions.Contains(mapFile.Last().MappedSymbol)) { continue; } yield return(new SecurityIdentifierContext(mapFile, _market)); } }
/// <summary> /// Initializes a new instance of the <see cref="SubscriptionDataReaderSubscriptionEnumeratorFactory"/> class /// </summary> /// <param name="resultHandler">The result handler for the algorithm</param> /// <param name="mapFileResolver">The map file resolver</param> /// <param name="factorFileProvider">The factory file provider</param> /// <param name="isLiveMode">True if runnig live algorithm, false otherwise</param> /// <param name="includeAuxiliaryData">True to check for auxiliary data, false otherwise</param> /// <param name="tradableDaysProvider">Function used to provide the tradable dates to be enumerator. /// Specify null to default to <see cref="SubscriptionRequest.TradableDays"/></param> public SubscriptionDataReaderSubscriptionEnumeratorFactory(IResultHandler resultHandler, MapFileResolver mapFileResolver, IFactorFileProvider factorFileProvider, bool isLiveMode, bool includeAuxiliaryData, Func <SubscriptionRequest, IEnumerable <DateTime> > tradableDaysProvider = null ) { _resultHandler = resultHandler; _mapFileResolver = mapFileResolver; _factorFileProvider = factorFileProvider; _isLiveMode = isLiveMode; _includeAuxiliaryData = includeAuxiliaryData; _tradableDaysProvider = tradableDaysProvider ?? (request => request.TradableDays); }
/// <summary> /// Initializes a new instance of the <see cref="SubscriptionDataReaderSubscriptionEnumeratorFactory"/> class /// </summary> /// <param name="resultHandler">The result handler for the algorithm</param> /// <param name="mapFileResolver">The map file resolver</param> /// <param name="factorFileProvider">The factory file provider</param> /// <param name="isLiveMode">True if runnig live algorithm, false otherwise</param> /// <param name="includeAuxiliaryData">True to check for auxiliary data, false otherwise</param> /// <param name="tradableDaysProvider">Function used to provide the tradable dates to be enumerator. /// Specify null to default to <see cref="SubscriptionRequest.TradableDays"/></param> public SubscriptionDataReaderSubscriptionEnumeratorFactory(IResultHandler resultHandler, MapFileResolver mapFileResolver, IFactorFileProvider factorFileProvider, bool isLiveMode, bool includeAuxiliaryData, Func<SubscriptionRequest, IEnumerable<DateTime>> tradableDaysProvider = null ) { _resultHandler = resultHandler; _mapFileResolver = mapFileResolver; _factorFileProvider = factorFileProvider; _isLiveMode = isLiveMode; _includeAuxiliaryData = includeAuxiliaryData; _tradableDaysProvider = tradableDaysProvider ?? (request => request.TradableDays); }
/// <summary> /// Creates an enumerator for the specified security/configuration /// </summary> private IEnumerator <BaseData> CreateSubscriptionEnumerator(Security security, SubscriptionDataConfig config, DateTime localStartTime, DateTime localEndTime, MapFileResolver mapFileResolver, IEnumerable <DateTime> tradeableDates, bool useSubscriptionDataReader, bool aggregate) { IEnumerator <BaseData> enumerator; if (useSubscriptionDataReader) { enumerator = new SubscriptionDataReader(config, localStartTime, localEndTime, _resultHandler, mapFileResolver, _factorFileProvider, tradeableDates, false); } else { var sourceFactory = (BaseData)Activator.CreateInstance(config.Type); enumerator = (from date in tradeableDates let source = sourceFactory.GetSource(config, date, false) let factory = SubscriptionDataSourceReader.ForSource(source, config, date, false) let entriesForDate = factory.Read(source) from entry in entriesForDate select entry).GetEnumerator(); } if (aggregate) { enumerator = new BaseDataCollectionAggregatorEnumerator(enumerator, config.Symbol); } // optionally apply fill forward logic, but never for tick data if (config.FillDataForward && config.Resolution != Resolution.Tick) { enumerator = new FillForwardEnumerator(enumerator, security.Exchange, _fillForwardResolution, security.IsExtendedMarketHours, localEndTime, config.Resolution.ToTimeSpan()); } // optionally apply exchange/user filters if (config.IsFilteredSubscription) { enumerator = SubscriptionFilterEnumerator.WrapForDataFeed(_resultHandler, enumerator, security, localEndTime); } return(enumerator); }
/// <summary> /// Iterates over each equity directory and aggregates the data into the coarse file /// </summary> /// <param name="dataDirectory">The Lean /Data directory</param> /// <param name="ignoreMaplessSymbols">Ignore symbols without a QuantQuote map file.</param> public static void ProcessEquityDirectories(string dataDirectory, bool ignoreMaplessSymbols) { var exclusions = ReadExclusionsFile(ExclusionsFile); var equity = Path.Combine(dataDirectory, "equity"); foreach (var directory in Directory.EnumerateDirectories(equity)) { var dailyFolder = Path.Combine(directory, "daily"); var mapFileFolder = Path.Combine(directory, "map_files"); var coarseFolder = Path.Combine(directory, "fundamental", "coarse"); if (!Directory.Exists(coarseFolder)) { Directory.CreateDirectory(coarseFolder); } ProcessDailyFolder(dailyFolder, coarseFolder, MapFileResolver.Create(mapFileFolder), exclusions, ignoreMaplessSymbols); } }
/// <summary> /// Creates an enumerator for the specified security/configuration /// </summary> private IEnumerator <BaseData> CreateSubscriptionEnumerator(Security security, SubscriptionDataConfig config, DateTime localStartTime, DateTime localEndTime, MapFileResolver mapFileResolver, IEnumerable <DateTime> tradeableDates) { IEnumerator <BaseData> enumerator = new SubscriptionDataReader(config, localStartTime, localEndTime, _resultHandler, mapFileResolver, _factorFileProvider, tradeableDates, false); // optionally apply fill forward logic, but never for tick data if (config.FillDataForward && config.Resolution != Resolution.Tick) { enumerator = new FillForwardEnumerator(enumerator, security.Exchange, _fillForwardResolution, security.IsExtendedMarketHours, localEndTime, config.Resolution.ToTimeSpan()); } // optionally apply exchange/user filters if (config.IsFilteredSubscription) { enumerator = SubscriptionFilterEnumerator.WrapForDataFeed(_resultHandler, enumerator, security, localEndTime); } return(enumerator); }
/// <summary> /// Iterates each daily file in the specified <paramref name="dailyFolder"/> and adds a line for each /// day to the approriate coarse file /// </summary> /// <param name="dailyFolder">The folder with daily data</param> /// <param name="coarseFolder">The coarse output folder</param> /// <param name="mapFileResolver"></param> /// <param name="exclusions">The symbols to be excluded from processing</param> /// <param name="ignoreMapless">Ignore the symbols without a map file.</param> /// <param name="startDate">The starting date for processing</param> /// <param name="symbolResolver">Function used to provide symbol resolution. Default resolution uses the zip file name to resolve /// the symbol, specify null for this behavior.</param> /// <returns>A collection of the generated coarse files</returns> public static ICollection <string> ProcessDailyFolder(string dailyFolder, string coarseFolder, MapFileResolver mapFileResolver, HashSet <string> exclusions, bool ignoreMapless, DateTime startDate, Func <string, string> symbolResolver = null) { const decimal scaleFactor = 10000m; Log.Trace("Processing: {0}", dailyFolder); var start = DateTime.UtcNow; // load map files into memory var symbols = 0; var maplessCount = 0; var dates = new HashSet <DateTime>(); // instead of opening/closing these constantly, open them once and dispose at the end (~3x speed improvement) var writers = new Dictionary <string, StreamWriter>(); var dailyFolderDirectoryInfo = new DirectoryInfo(dailyFolder).Parent; if (dailyFolderDirectoryInfo == null) { throw new Exception("Unable to resolve market for daily folder: " + dailyFolder); } var market = dailyFolderDirectoryInfo.Name.ToLower(); var fundamentalDirectoryInfo = new DirectoryInfo(coarseFolder).Parent; if (fundamentalDirectoryInfo == null) { throw new Exception("Unable to resolve fundamental path for coarse folder: " + coarseFolder); } var fineFundamentalFolder = Path.Combine(fundamentalDirectoryInfo.FullName, "fine"); var mapFileProvider = new LocalDiskMapFileProvider(); var factorFileProvider = new LocalDiskFactorFileProvider(mapFileProvider); // open up each daily file to get the values and append to the daily coarse files foreach (var file in Directory.EnumerateFiles(dailyFolder)) { try { var symbol = Path.GetFileNameWithoutExtension(file); if (symbol == null) { Log.Trace("CoarseGenerator.ProcessDailyFolder(): Unable to resolve symbol from file: {0}", file); continue; } if (symbolResolver != null) { symbol = symbolResolver(symbol); } symbol = symbol.ToUpper(); if (exclusions.Contains(symbol)) { Log.Trace("Excluded symbol: {0}", symbol); continue; } // check if symbol has any fine fundamental data var firstFineSymbolDate = DateTime.MaxValue; if (Directory.Exists(fineFundamentalFolder)) { var fineSymbolFolder = Path.Combine(fineFundamentalFolder, symbol.ToLower()); var firstFineSymbolFileName = Directory.Exists(fineSymbolFolder) ? Directory.GetFiles(fineSymbolFolder).OrderBy(x => x).FirstOrDefault() : string.Empty; if (firstFineSymbolFileName.Length > 0) { firstFineSymbolDate = DateTime.ParseExact(Path.GetFileNameWithoutExtension(firstFineSymbolFileName), "yyyyMMdd", CultureInfo.InvariantCulture); } } ZipFile zip; using (var reader = Compression.Unzip(file, out zip)) { var checkedForMapFile = false; symbols++; string line; while ((line = reader.ReadLine()) != null) { //20150625.csv var csv = line.Split(','); var date = DateTime.ParseExact(csv[0], DateFormat.TwelveCharacter, CultureInfo.InvariantCulture); // spin past old data if (date < startDate) { continue; } if (ignoreMapless && !checkedForMapFile) { checkedForMapFile = true; if (!mapFileResolver.ResolveMapFile(symbol, date).Any()) { // if the resolved map file has zero entries then it's a mapless symbol maplessCount++; break; } } var close = decimal.Parse(csv[4]) / scaleFactor; var volume = long.Parse(csv[5]); var dollarVolume = close * volume; var coarseFile = Path.Combine(coarseFolder, date.ToString("yyyyMMdd") + ".csv"); dates.Add(date); // try to resolve a map file and if found, regen the sid var sid = SecurityIdentifier.GenerateEquity(SecurityIdentifier.DefaultDate, symbol, market); var mapFile = mapFileResolver.ResolveMapFile(symbol, date); if (!mapFile.IsNullOrEmpty()) { // if available, us the permtick in the coarse files, because of this, we need // to update the coarse files each time new map files are added/permticks change sid = SecurityIdentifier.GenerateEquity(mapFile.FirstDate, mapFile.OrderBy(x => x.Date).First().MappedSymbol, market); } if (mapFile == null && ignoreMapless) { // if we're ignoring mapless files then we should always be able to resolve this Log.Error(string.Format("CoarseGenerator.ProcessDailyFolder(): Unable to resolve map file for {0} as of {1}", symbol, date.ToShortDateString())); continue; } // check if symbol has fine fundamental data for the current date var hasFundamentalDataForDate = date >= firstFineSymbolDate; // get price and split factors from factor files var leanSymbol = new Symbol(sid, symbol); var factorFile = factorFileProvider.Get(leanSymbol); var factorFileRow = factorFile?.GetScalingFactors(date); var priceFactor = factorFileRow?.PriceFactor ?? 1m; var splitFactor = factorFileRow?.SplitFactor ?? 1m; // sid,symbol,close,volume,dollar volume,has fundamental data,price factor,split factor var coarseFileLine = $"{sid},{symbol},{close},{volume},{Math.Truncate(dollarVolume)},{hasFundamentalDataForDate},{priceFactor},{splitFactor}"; StreamWriter writer; if (!writers.TryGetValue(coarseFile, out writer)) { writer = new StreamWriter(new FileStream(coarseFile, FileMode.Create, FileAccess.Write, FileShare.Write)); writers[coarseFile] = writer; } writer.WriteLine(coarseFileLine); } } if (symbols % 1000 == 0) { Log.Trace("CoarseGenerator.ProcessDailyFolder(): Completed processing {0} symbols. Current elapsed: {1} seconds", symbols, (DateTime.UtcNow - start).TotalSeconds.ToString("0.00")); } } catch (Exception err) { // log the error and continue with the process Log.Error(err.ToString()); } } Log.Trace("CoarseGenerator.ProcessDailyFolder(): Saving {0} coarse files to disk", dates.Count); // dispose all the writers at the end of processing foreach (var writer in writers) { writer.Value.Dispose(); } var stop = DateTime.UtcNow; Log.Trace("CoarseGenerator.ProcessDailyFolder(): Processed {0} symbols into {1} coarse files in {2} seconds", symbols, dates.Count, (stop - start).TotalSeconds.ToString("0.00")); Log.Trace("CoarseGenerator.ProcessDailyFolder(): Excluded {0} mapless symbols.", maplessCount); return(writers.Keys); }
/// <summary> /// Iterates each daily file in the specified <paramref name="dailyFolder"/> and adds a line for each /// day to the appropriate coarse file /// </summary> /// <param name="dailyFolder">The folder with daily data.</param> /// <param name="coarseFolder">The coarse output folder.</param> /// <param name="mapFileResolver">The map file resolver.</param> /// <param name="factorFileProvider">The factor file provider.</param> /// <param name="exclusions">The symbols to be excluded from processing.</param> /// <param name="ignoreMapless">Ignore the symbols without a map file.</param> /// <param name="symbolResolver">Function used to provide symbol resolution. Default resolution uses the zip file name to resolve /// the symbol, specify null for this behavior.</param> /// <returns>Collection with the names of the newly generated coarse files.</returns> /// <exception cref="Exception"> /// Unable to resolve market for daily folder: " + dailyFolder /// or /// Unable to resolve fundamental path for coarse folder: " + coarseFolder /// </exception> public static ICollection <string> ProcessDailyFolder(string dailyFolder, string coarseFolder, MapFileResolver mapFileResolver, IFactorFileProvider factorFileProvider, HashSet <string> exclusions, bool ignoreMapless, Func <string, string> symbolResolver = null) { const decimal scaleFactor = 10000m; Log.Trace("Processing: {0}", dailyFolder); var start = DateTime.UtcNow; // load map files into memory var symbols = 0; var maplessCount = 0; var dates = new HashSet <DateTime>(); // instead of opening/closing these constantly, open them once and dispose at the end (~3x speed improvement) var writers = new Dictionary <string, StreamWriter>(); var marketDirectoryInfo = new DirectoryInfo(dailyFolder).Parent; if (marketDirectoryInfo == null) { throw new Exception($"Unable to resolve market for daily folder: {dailyFolder}"); } var market = marketDirectoryInfo.Name.ToLowerInvariant(); var fundamentalDirectoryInfo = new DirectoryInfo(coarseFolder).Parent; if (fundamentalDirectoryInfo == null) { throw new Exception($"Unable to resolve fundamental path for coarse folder: {coarseFolder}"); } var fineFundamentalFolder = Path.Combine(marketDirectoryInfo.FullName, "fundamental", "fine"); // open up each daily file to get the values and append to the daily coarse files foreach (var file in Directory.EnumerateFiles(dailyFolder, "*.zip")) { try { var ticker = Path.GetFileNameWithoutExtension(file); var fineAvailableDates = Enumerable.Empty <DateTime>(); var tickerFineFundamentalFolder = Path.Combine(fineFundamentalFolder, ticker); if (Directory.Exists(tickerFineFundamentalFolder)) { fineAvailableDates = Directory.GetFiles(tickerFineFundamentalFolder, "*.zip") .Select(f => DateTime.ParseExact(Path.GetFileNameWithoutExtension(f), DateFormat.EightCharacter, CultureInfo.InvariantCulture)) .ToList(); } if (ticker == null) { Log.Trace("CoarseGenerator.ProcessDailyFolder(): Unable to resolve symbol from file: {0}", file); continue; } if (symbolResolver != null) { ticker = symbolResolver(ticker); } ticker = ticker.ToUpperInvariant(); if (exclusions != null && exclusions.Contains(ticker)) { Log.Trace("Excluded symbol: {0}", ticker); continue; } ZipFile zip; using (var reader = Compression.Unzip(file, out zip)) { var checkedForMapFile = false; symbols++; string line; while ((line = reader.ReadLine()) != null) { //20150625.csv var csv = line.Split(','); var date = DateTime.ParseExact(csv[0], DateFormat.TwelveCharacter, CultureInfo.InvariantCulture); if (ignoreMapless && !checkedForMapFile) { checkedForMapFile = true; if (!mapFileResolver.ResolveMapFile(ticker, date).Any()) { // if the resolved map file has zero entries then it's a mapless symbol maplessCount++; break; } } var close = Parse.Decimal(csv[4]) / scaleFactor; var volume = Parse.Long(csv[5]); var dollarVolume = close * volume; var coarseFile = Path.Combine(coarseFolder, date.ToStringInvariant("yyyyMMdd") + ".csv"); dates.Add(date); // try to resolve a map file and if found, regen the sid var sid = SecurityIdentifier.GenerateEquity(SecurityIdentifier.DefaultDate, ticker, market); var mapFile = mapFileResolver.ResolveMapFile(ticker, date); if (!mapFile.IsNullOrEmpty()) { // if available, us the permtick in the coarse files, because of this, we need // to update the coarse files each time new map files are added/permticks change sid = SecurityIdentifier.GenerateEquity(mapFile.FirstDate, mapFile.OrderBy(x => x.Date).First().MappedSymbol, market); } if (mapFile == null && ignoreMapless) { // if we're ignoring mapless files then we should always be able to resolve this Log.Error($"CoarseGenerator.ProcessDailyFolder(): Unable to resolve map file for {ticker} as of {date.ToStringInvariant("d")}"); continue; } // get price and split factors from factor files var symbol = new Symbol(sid, ticker); var factorFile = factorFileProvider.Get(symbol); var factorFileRow = factorFile?.GetScalingFactors(date); var priceFactor = factorFileRow?.PriceFactor ?? 1m; var splitFactor = factorFileRow?.SplitFactor ?? 1m; // Check if security has fine file within a trailing month for a date-ticker set. // There are tricky cases where a folder named by a ticker can have data for multiple securities. // e.g GOOG -> GOOGL (GOOG T1AZ164W5VTX) / GOOCV -> GOOG (GOOCV VP83T1ZUHROL) case. // The fine data in the 'fundamental/fine/goog' folder will be for 'GOOG T1AZ164W5VTX' up to the 2014-04-02 and for 'GOOCV VP83T1ZUHROL' afterward. // Therefore, date before checking if the security has fundamental data for a date, we need to filter the fine files the map's first date. var firstDate = mapFile?.FirstDate ?? DateTime.MinValue; var hasFundamentalDataForDate = fineAvailableDates.Where(d => d >= firstDate).Any(d => date.AddMonths(-1) <= d && d <= date); // The following section handles mergers and acquisitions cases. // e.g. YHOO -> AABA (YHOO R735QTJ8XC9X) // The dates right after the acquisition, valid fine fundamental data for AABA are still under the former ticker folder. // Therefore if no fine fundamental data is found in the 'fundamental/fine/aaba' folder, it searches into the 'yhoo' folder. if (mapFile != null && mapFile.Count() > 2 && !hasFundamentalDataForDate) { var previousTicker = mapFile.LastOrDefault(m => m.Date < date)?.MappedSymbol; if (previousTicker != null) { var previousTickerFineFundamentalFolder = Path.Combine(fineFundamentalFolder, previousTicker); if (Directory.Exists(previousTickerFineFundamentalFolder)) { var previousTickerFineAvailableDates = Directory.GetFiles(previousTickerFineFundamentalFolder, "*.zip") .Select(f => DateTime.ParseExact(Path.GetFileNameWithoutExtension(f), DateFormat.EightCharacter, CultureInfo.InvariantCulture)) .ToList(); hasFundamentalDataForDate = previousTickerFineAvailableDates.Where(d => d >= firstDate).Any(d => date.AddMonths(-1) <= d && d <= date); } } } // sid,symbol,close,volume,dollar volume,has fundamental data,price factor,split factor var coarseFileLine = string.Format( CultureInfo.InvariantCulture, "{0},{1},{2},{3},{4},{5},{6},{7}", sid, ticker, close, volume, Math.Truncate(dollarVolume), hasFundamentalDataForDate, priceFactor, splitFactor); StreamWriter writer; if (!writers.TryGetValue(coarseFile, out writer)) { writer = new StreamWriter(new FileStream(coarseFile, FileMode.Create, FileAccess.Write, FileShare.Write)); writers[coarseFile] = writer; } writer.WriteLine(coarseFileLine); } } if (symbols % 1000 == 0) { Log.Trace($"CoarseGenerator.ProcessDailyFolder(): Completed processing {symbols} symbols. Current elapsed: {(DateTime.UtcNow - start).TotalSeconds.ToStringInvariant("0.00")} seconds"); } } catch (Exception err) { // log the error and continue with the process Log.Error(err.ToString()); } } Log.Trace("CoarseGenerator.ProcessDailyFolder(): Saving {0} coarse files to disk", dates.Count); // dispose all the writers at the end of processing foreach (var writer in writers) { writer.Value.Dispose(); } var stop = DateTime.UtcNow; Log.Trace($"CoarseGenerator.ProcessDailyFolder(): Processed {symbols} symbols into {dates.Count} coarse files in {(stop - start).TotalSeconds.ToStringInvariant("0.00")} seconds"); Log.Trace($"CoarseGenerator.ProcessDailyFolder(): Excluded {maplessCount} mapless symbols."); return(writers.Keys); }
/// <summary> /// Iterates each daily file in the specified <paramref name="dailyFolder"/> and adds a line for each /// day to the approriate coarse file /// </summary> /// <param name="dailyFolder">The folder with daily data</param> /// <param name="coarseFolder">The coarse output folder</param> /// <param name="mapFileResolver"></param> /// <param name="exclusions">The symbols to be excluded from processing</param> /// <param name="ignoreMapless">Ignore the symbols without a map file.</param> /// <param name="symbolResolver">Function used to provide symbol resolution. Default resolution uses the zip file name to resolve /// the symbol, specify null for this behavior.</param> /// <returns>A collection of the generated coarse files</returns> public static ICollection<string> ProcessDailyFolder(string dailyFolder, string coarseFolder, MapFileResolver mapFileResolver, HashSet<string> exclusions, bool ignoreMapless, Func<string, string> symbolResolver = null) { const decimal scaleFactor = 10000m; Log.Trace("Processing: {0}", dailyFolder); var stopwatch = Stopwatch.StartNew(); // load map files into memory var symbols = 0; var maplessCount = 0; var dates = new HashSet<DateTime>(); // instead of opening/closing these constantly, open them once and dispose at the end (~3x speed improvement) var writers = new Dictionary<string, StreamWriter>(); // open up each daily file to get the values and append to the daily coarse files foreach (var file in Directory.EnumerateFiles(dailyFolder)) { try { var symbol = Path.GetFileNameWithoutExtension(file); if (symbol == null) { Log.Trace("CoarseGenerator.ProcessDailyFolder(): Unable to resolve symbol from file: {0}", file); continue; } if (symbolResolver != null) { symbol = symbolResolver(symbol); } symbol = symbol.ToUpper(); if (exclusions.Contains(symbol)) { Log.Trace("Excluded symbol: {0}", symbol); continue; } ZipFile zip; using (var reader = Compression.Unzip(file, out zip)) { // 30 period EMA constant const decimal k = 2m / (30 + 1); var seeded = false; var runningAverageVolume = 0m; var checkedForMapFile = false; symbols++; string line; while ((line = reader.ReadLine()) != null) { //20150625.csv var csv = line.Split(','); var date = DateTime.ParseExact(csv[0], DateFormat.TwelveCharacter, CultureInfo.InvariantCulture); if (ignoreMapless && !checkedForMapFile) { checkedForMapFile = true; if (!mapFileResolver.ResolveMapFile(symbol, date).Any()) { // if the resolved map file has zero entries then it's a mapless symbol maplessCount++; break; } } var close = decimal.Parse(csv[4])/scaleFactor; var volume = long.Parse(csv[5]); // compute the current volume EMA for dollar volume calculations runningAverageVolume = seeded ? volume*k + runningAverageVolume*(1 - k) : volume; seeded = true; var dollarVolume = close*runningAverageVolume; var coarseFile = Path.Combine(coarseFolder, date.ToString("yyyyMMdd") + ".csv"); dates.Add(date); // try to resolve a map file and if found, use the permtick as the symbol var sid = symbol; var mapFile = mapFileResolver.ResolveMapFile(sid, date); if (mapFile != null) { // if available, us the permtick in the coarse files, because of this, we need // to update the coarse files each time new map files are added/permticks change sid = mapFile.Permtick; } if (mapFile == null && ignoreMapless) { // if we're ignoring mapless files then we should always be able to resolve this Log.Error(string.Format("CoarseGenerator.ProcessDailyFolder(): Unable to resolve map file for {0} as of {1}", symbol, date.ToShortDateString())); continue; } // sid,symbol,close,volume,dollar volume var coarseFileLine = sid + "," + symbol + "," + close + "," + volume + "," + Math.Truncate(dollarVolume); StreamWriter writer; if (!writers.TryGetValue(coarseFile, out writer)) { writer = new StreamWriter(new FileStream(coarseFile, FileMode.Append, FileAccess.Write, FileShare.Write)); writers[coarseFile] = writer; } writer.WriteLine(coarseFileLine); } } if (symbols%1000 == 0) { Log.Trace("CoarseGenerator.ProcessDailyFolder(): Completed processing {0} symbols. Current elapsed: {1} seconds", symbols, stopwatch.Elapsed.TotalSeconds.ToString("0.00")); } } catch (Exception err) { // log the error and continue with the process Log.Error(err.ToString()); } } Log.Trace("CoarseGenerator.ProcessDailyFolder(): Saving {0} coarse files to disk", dates.Count); // dispose all the writers at the end of processing foreach (var writer in writers) { writer.Value.Dispose(); } stopwatch.Stop(); Log.Trace("CoarseGenerator.ProcessDailyFolder(): Processed {0} symbols into {1} coarse files in {2} seconds", symbols, dates.Count, stopwatch.Elapsed.TotalSeconds.ToString("0.00")); Log.Trace("CoarseGenerator.ProcessDailyFolder(): Excluded {0} mapless symbols.", maplessCount); return writers.Keys; }
/// <summary> /// Creates an enumerator for the specified security/configuration /// </summary> private IEnumerator<BaseData> CreateSubscriptionEnumerator(Security security, SubscriptionDataConfig config, DateTime localStartTime, DateTime localEndTime, MapFileResolver mapFileResolver, IEnumerable<DateTime> tradeableDates, bool applySubscripterFilterEnumerator = true) { IEnumerator<BaseData> enumerator = new SubscriptionDataReader(config, localStartTime, localEndTime, _resultHandler, mapFileResolver, _factorFileProvider, tradeableDates, false); // optionally apply fill forward logic, but never for tick data if (config.FillDataForward && config.Resolution != Resolution.Tick) { enumerator = new FillForwardEnumerator(enumerator, security.Exchange, _fillForwardResolution, security.IsExtendedMarketHours, localEndTime, config.Resolution.ToTimeSpan()); } // optionally apply exchange/user filters if (applySubscripterFilterEnumerator) { enumerator = SubscriptionFilterEnumerator.WrapForDataFeed(_resultHandler, enumerator, security, localEndTime); } return enumerator; }
/// <summary> /// Iterates each daily file in the specified <paramref name="dailyFolder"/> and adds a line for each /// day to the approriate coarse file /// </summary> /// <param name="dailyFolder">The folder with daily data</param> /// <param name="coarseFolder">The coarse output folder</param> /// <param name="mapFileResolver"></param> /// <param name="exclusions">The symbols to be excluded from processing</param> /// <param name="ignoreMapless">Ignore the symbols without a map file.</param> /// <param name="symbolResolver">Function used to provide symbol resolution. Default resolution uses the zip file name to resolve /// the symbol, specify null for this behavior.</param> /// <returns>A collection of the generated coarse files</returns> public static ICollection <string> ProcessDailyFolder(string dailyFolder, string coarseFolder, MapFileResolver mapFileResolver, HashSet <string> exclusions, bool ignoreMapless, Func <string, string> symbolResolver = null) { const decimal scaleFactor = 10000m; Log.Trace("Processing: {0}", dailyFolder); var stopwatch = Stopwatch.StartNew(); // load map files into memory var symbols = 0; var maplessCount = 0; var dates = new HashSet <DateTime>(); // instead of opening/closing these constantly, open them once and dispose at the end (~3x speed improvement) var writers = new Dictionary <string, StreamWriter>(); // open up each daily file to get the values and append to the daily coarse files foreach (var file in Directory.EnumerateFiles(dailyFolder)) { try { var symbol = Path.GetFileNameWithoutExtension(file); if (symbol == null) { Log.Trace("CoarseGenerator.ProcessDailyFolder(): Unable to resolve symbol from file: {0}", file); continue; } if (symbolResolver != null) { symbol = symbolResolver(symbol); } symbol = symbol.ToUpper(); if (exclusions.Contains(symbol)) { Log.Trace("Excluded symbol: {0}", symbol); continue; } ZipFile zip; using (var reader = Compression.Unzip(file, out zip)) { // 30 period EMA constant const decimal k = 2m / (30 + 1); var seeded = false; var runningAverageVolume = 0m; var checkedForMapFile = false; symbols++; string line; while ((line = reader.ReadLine()) != null) { //20150625.csv var csv = line.Split(','); var date = DateTime.ParseExact(csv[0], DateFormat.TwelveCharacter, CultureInfo.InvariantCulture); if (ignoreMapless && !checkedForMapFile) { checkedForMapFile = true; if (!mapFileResolver.ResolveMapFile(symbol, date).Any()) { // if the resolved map file has zero entries then it's a mapless symbol maplessCount++; break; } } var close = decimal.Parse(csv[4]) / scaleFactor; var volume = long.Parse(csv[5]); // compute the current volume EMA for dollar volume calculations runningAverageVolume = seeded ? volume * k + runningAverageVolume * (1 - k) : volume; seeded = true; var dollarVolume = close * runningAverageVolume; var coarseFile = Path.Combine(coarseFolder, date.ToString("yyyyMMdd") + ".csv"); dates.Add(date); // try to resolve a map file and if found, use the permtick as the symbol var sid = symbol; var mapFile = mapFileResolver.ResolveMapFile(sid, date); if (mapFile != null) { // if available, us the permtick in the coarse files, because of this, we need // to update the coarse files each time new map files are added/permticks change sid = mapFile.Permtick; } if (mapFile == null && ignoreMapless) { // if we're ignoring mapless files then we should always be able to resolve this Log.Error(string.Format("CoarseGenerator.ProcessDailyFolder(): Unable to resolve map file for {0} as of {1}", symbol, date.ToShortDateString())); continue; } // sid,symbol,close,volume,dollar volume var coarseFileLine = sid + "," + symbol + "," + close + "," + volume + "," + Math.Truncate(dollarVolume); StreamWriter writer; if (!writers.TryGetValue(coarseFile, out writer)) { writer = new StreamWriter(new FileStream(coarseFile, FileMode.Append, FileAccess.Write, FileShare.Write)); writers[coarseFile] = writer; } writer.WriteLine(coarseFileLine); } } if (symbols % 1000 == 0) { Log.Trace("CoarseGenerator.ProcessDailyFolder(): Completed processing {0} symbols. Current elapsed: {1} seconds", symbols, stopwatch.Elapsed.TotalSeconds.ToString("0.00")); } } catch (Exception err) { // log the error and continue with the process Log.Error(err.ToString()); } } Log.Trace("CoarseGenerator.ProcessDailyFolder(): Saving {0} coarse files to disk", dates.Count); // dispose all the writers at the end of processing foreach (var writer in writers) { writer.Value.Dispose(); } stopwatch.Stop(); Log.Trace("CoarseGenerator.ProcessDailyFolder(): Processed {0} symbols into {1} coarse files in {2} seconds", symbols, dates.Count, stopwatch.Elapsed.TotalSeconds.ToString("0.00")); Log.Trace("CoarseGenerator.ProcessDailyFolder(): Excluded {0} mapless symbols.", maplessCount); return(writers.Keys); }
/// <summary> /// Subscription data reader takes a subscription request, loads the type, accepts the data source and enumerate on the results. /// </summary> /// <param name="config">Subscription configuration object</param> /// <param name="periodStart">Start date for the data request/backtest</param> /// <param name="periodFinish">Finish date for the data request/backtest</param> /// <param name="resultHandler">Result handler used to push error messages and perform sampling on skipped days</param> /// <param name="mapFileResolver">Used for resolving the correct map files</param> /// <param name="factorFileProvider">Used for getting factor files</param> /// <param name="dataProvider">Used for getting files not present on disk</param> /// <param name="dataCacheProvider">Used for caching files</param> /// <param name="tradeableDates">Defines the dates for which we'll request data, in order, in the security's exchange time zone</param> /// <param name="isLiveMode">True if we're in live mode, false otherwise</param> /// <param name="includeAuxilliaryData">True if we want to emit aux data, false to only emit price data</param> public SubscriptionDataReader(SubscriptionDataConfig config, DateTime periodStart, DateTime periodFinish, IResultHandler resultHandler, MapFileResolver mapFileResolver, IFactorFileProvider factorFileProvider, IDataProvider dataProvider, IEnumerable <DateTime> tradeableDates, bool isLiveMode, IDataCacheProvider dataCacheProvider, bool includeAuxilliaryData = true) { //Save configuration of data-subscription: _config = config; _auxiliaryData = new Queue <BaseData>(); //Save Start and End Dates: _periodStart = periodStart; _periodFinish = periodFinish; _dataProvider = dataProvider; _dataCacheProvider = dataCacheProvider; //Save access to securities _isLiveMode = isLiveMode; _includeAuxilliaryData = includeAuxilliaryData; //Save the type of data we'll be getting from the source. //Create the dynamic type-activators: var objectActivator = ObjectActivator.GetActivator(config.Type); _resultHandler = resultHandler; _tradeableDates = tradeableDates.GetEnumerator(); if (objectActivator == null) { _resultHandler.ErrorMessage("Custom data type '" + config.Type.Name + "' missing parameterless constructor E.g. public " + config.Type.Name + "() { }"); _endOfStream = true; return; } //Create an instance of the "Type": var userObj = objectActivator.Invoke(new object[] { config.Type }); _dataFactory = userObj as BaseData; //If its quandl set the access token in data factory: var quandl = _dataFactory as Quandl; if (quandl != null) { if (!Quandl.IsAuthCodeSet) { Quandl.SetAuthCode(Config.Get("quandl-auth-token")); } } // If Tiingo data, set the access token in data factory var tiingo = _dataFactory as TiingoDailyData; if (tiingo != null) { if (!Tiingo.IsAuthCodeSet) { Tiingo.SetAuthCode(Config.Get("tiingo-auth-token")); } } _factorFile = new FactorFile(config.Symbol.Value, new List <FactorFileRow>()); _mapFile = new MapFile(config.Symbol.Value, new List <MapFileRow>()); // load up the map and factor files for equities if (!config.IsCustomData && config.SecurityType == SecurityType.Equity) { try { var mapFile = mapFileResolver.ResolveMapFile(config.Symbol.ID.Symbol, config.Symbol.ID.Date); // only take the resolved map file if it has data, otherwise we'll use the empty one we defined above if (mapFile.Any()) { _mapFile = mapFile; } var factorFile = factorFileProvider.Get(_config.Symbol); _hasScaleFactors = factorFile != null; if (_hasScaleFactors) { _factorFile = factorFile; // if factor file has minimum date, update start period if before minimum date if (!_isLiveMode && _factorFile != null && _factorFile.FactorFileMinimumDate.HasValue) { if (_periodStart < _factorFile.FactorFileMinimumDate.Value) { _periodStart = _factorFile.FactorFileMinimumDate.Value; _resultHandler.DebugMessage( string.Format("Data for symbol {0} has been limited due to numerical precision issues in the factor file. The starting date has been set to {1}.", config.Symbol.Value, _factorFile.FactorFileMinimumDate.Value.ToShortDateString())); } } } } catch (Exception err) { Log.Error(err, "Fetching Price/Map Factors: " + config.Symbol.ID + ": "); } } // load up the map and factor files for underlying of equity option if (!config.IsCustomData && config.SecurityType == SecurityType.Option) { try { var mapFile = mapFileResolver.ResolveMapFile(config.Symbol.Underlying.ID.Symbol, config.Symbol.Underlying.ID.Date); // only take the resolved map file if it has data, otherwise we'll use the empty one we defined above if (mapFile.Any()) { _mapFile = mapFile; } } catch (Exception err) { Log.Error(err, "Map Factors: " + config.Symbol.ID + ": "); } } // Estimate delisting date. switch (_config.Symbol.ID.SecurityType) { case SecurityType.Future: _delistingDate = _config.Symbol.ID.Date; break; case SecurityType.Option: _delistingDate = OptionSymbol.GetLastDayOfTrading(_config.Symbol); break; default: _delistingDate = _mapFile.DelistingDate; break; } _subscriptionFactoryEnumerator = ResolveDataEnumerator(true); }
/// <summary> /// Iterates each daily file in the specified <paramref name="dailyFolder"/> and adds a line for each /// day to the approriate coarse file /// </summary> /// <param name="dailyFolder">The folder with daily data</param> /// <param name="coarseFolder">The coarse output folder</param> /// <param name="mapFileResolver"></param> /// <param name="exclusions">The symbols to be excluded from processing</param> /// <param name="ignoreMapless">Ignore the symbols without a map file.</param> /// <param name="startDate">The starting date for processing</param> /// <param name="symbolResolver">Function used to provide symbol resolution. Default resolution uses the zip file name to resolve /// the symbol, specify null for this behavior.</param> /// <returns>A collection of the generated coarse files</returns> public static ICollection<string> ProcessDailyFolder(string dailyFolder, string coarseFolder, MapFileResolver mapFileResolver, HashSet<string> exclusions, bool ignoreMapless, DateTime startDate, Func<string, string> symbolResolver = null) { const decimal scaleFactor = 10000m; Log.Trace("Processing: {0}", dailyFolder); var start = DateTime.UtcNow; // load map files into memory var symbols = 0; var maplessCount = 0; var dates = new HashSet<DateTime>(); // instead of opening/closing these constantly, open them once and dispose at the end (~3x speed improvement) var writers = new Dictionary<string, StreamWriter>(); var dailyFolderDirectoryInfo = new DirectoryInfo(dailyFolder).Parent; if (dailyFolderDirectoryInfo == null) { throw new Exception("Unable to resolve market for daily folder: " + dailyFolder); } var market = dailyFolderDirectoryInfo.Name.ToLower(); var fundamentalDirectoryInfo = new DirectoryInfo(coarseFolder).Parent; if (fundamentalDirectoryInfo == null) { throw new Exception("Unable to resolve fundamental path for coarse folder: " + coarseFolder); } var fineFundamentalFolder = Path.Combine(fundamentalDirectoryInfo.FullName, "fine"); // open up each daily file to get the values and append to the daily coarse files foreach (var file in Directory.EnumerateFiles(dailyFolder)) { try { var symbol = Path.GetFileNameWithoutExtension(file); if (symbol == null) { Log.Trace("CoarseGenerator.ProcessDailyFolder(): Unable to resolve symbol from file: {0}", file); continue; } if (symbolResolver != null) { symbol = symbolResolver(symbol); } symbol = symbol.ToUpper(); if (exclusions.Contains(symbol)) { Log.Trace("Excluded symbol: {0}", symbol); continue; } // check if symbol has any fine fundamental data var firstFineSymbolDate = DateTime.MaxValue; if (Directory.Exists(fineFundamentalFolder)) { var fineSymbolFolder = Path.Combine(fineFundamentalFolder, symbol.ToLower()); var firstFineSymbolFileName = Directory.Exists(fineSymbolFolder) ? Directory.GetFiles(fineSymbolFolder).OrderBy(x => x).FirstOrDefault() : string.Empty; if (firstFineSymbolFileName.Length > 0) { firstFineSymbolDate = DateTime.ParseExact(Path.GetFileNameWithoutExtension(firstFineSymbolFileName), "yyyyMMdd", CultureInfo.InvariantCulture); } } ZipFile zip; using (var reader = Compression.Unzip(file, out zip)) { // 30 period EMA constant const decimal k = 2m / (30 + 1); var seeded = false; var runningAverageVolume = 0m; var checkedForMapFile = false; symbols++; string line; while ((line = reader.ReadLine()) != null) { //20150625.csv var csv = line.Split(','); var date = DateTime.ParseExact(csv[0], DateFormat.TwelveCharacter, CultureInfo.InvariantCulture); // spin past old data if (date < startDate) continue; if (ignoreMapless && !checkedForMapFile) { checkedForMapFile = true; if (!mapFileResolver.ResolveMapFile(symbol, date).Any()) { // if the resolved map file has zero entries then it's a mapless symbol maplessCount++; break; } } var close = decimal.Parse(csv[4])/scaleFactor; var volume = long.Parse(csv[5]); // compute the current volume EMA for dollar volume calculations runningAverageVolume = seeded ? volume*k + runningAverageVolume*(1 - k) : volume; seeded = true; var dollarVolume = close * runningAverageVolume; var coarseFile = Path.Combine(coarseFolder, date.ToString("yyyyMMdd") + ".csv"); dates.Add(date); // try to resolve a map file and if found, regen the sid var sid = SecurityIdentifier.GenerateEquity(SecurityIdentifier.DefaultDate, symbol, market); var mapFile = mapFileResolver.ResolveMapFile(symbol, date); if (!mapFile.IsNullOrEmpty()) { // if available, us the permtick in the coarse files, because of this, we need // to update the coarse files each time new map files are added/permticks change sid = SecurityIdentifier.GenerateEquity(mapFile.FirstDate, mapFile.OrderBy(x => x.Date).First().MappedSymbol, market); } if (mapFile == null && ignoreMapless) { // if we're ignoring mapless files then we should always be able to resolve this Log.Error(string.Format("CoarseGenerator.ProcessDailyFolder(): Unable to resolve map file for {0} as of {1}", symbol, date.ToShortDateString())); continue; } // check if symbol has fine fundamental data for the current date var hasFundamentalDataForDate = date >= firstFineSymbolDate; // sid,symbol,close,volume,dollar volume,has fundamental data var coarseFileLine = sid + "," + symbol + "," + close + "," + volume + "," + Math.Truncate(dollarVolume) + "," + hasFundamentalDataForDate; StreamWriter writer; if (!writers.TryGetValue(coarseFile, out writer)) { writer = new StreamWriter(new FileStream(coarseFile, FileMode.Create, FileAccess.Write, FileShare.Write)); writers[coarseFile] = writer; } writer.WriteLine(coarseFileLine); } } if (symbols%1000 == 0) { Log.Trace("CoarseGenerator.ProcessDailyFolder(): Completed processing {0} symbols. Current elapsed: {1} seconds", symbols, (DateTime.UtcNow - start).TotalSeconds.ToString("0.00")); } } catch (Exception err) { // log the error and continue with the process Log.Error(err.ToString()); } } Log.Trace("CoarseGenerator.ProcessDailyFolder(): Saving {0} coarse files to disk", dates.Count); // dispose all the writers at the end of processing foreach (var writer in writers) { writer.Value.Dispose(); } var stop = DateTime.UtcNow; Log.Trace("CoarseGenerator.ProcessDailyFolder(): Processed {0} symbols into {1} coarse files in {2} seconds", symbols, dates.Count, (stop - start).TotalSeconds.ToString("0.00")); Log.Trace("CoarseGenerator.ProcessDailyFolder(): Excluded {0} mapless symbols.", maplessCount); return writers.Keys; }
public static ICollection <string> ProcessDailyFolder(string dailyFolder, string coarseFolder, MapFileResolver mapFileResolver, HashSet <string> exclusions, bool ignoreMapless, DateTime startDate, Func <string, string> symbolResolver = null) { const decimal scaleFactor = 10000m; Log.Trace("Processing: {0}", dailyFolder); var start = DateTime.UtcNow; var symbols = 0; var maplessCount = 0; var dates = new HashSet <DateTime>(); var writers = new Dictionary <string, StreamWriter>(); var dailyFolderDirectoryInfo = new DirectoryInfo(dailyFolder).Parent; if (dailyFolderDirectoryInfo == null) { throw new Exception("Unable to resolve market for daily folder: " + dailyFolder); } var market = dailyFolderDirectoryInfo.Name.ToLower(); foreach (var file in Directory.EnumerateFiles(dailyFolder)) { try { var symbol = Path.GetFileNameWithoutExtension(file); if (symbol == null) { Log.Trace("CoarseGenerator.ProcessDailyFolder(): Unable to resolve symbol from file: {0}", file); continue; } if (symbolResolver != null) { symbol = symbolResolver(symbol); } symbol = symbol.ToUpper(); if (exclusions.Contains(symbol)) { Log.Trace("Excluded symbol: {0}", symbol); continue; } ZipFile zip; using (var reader = Compression.Unzip(file, out zip)) { const decimal k = 2m / (30 + 1); var seeded = false; var runningAverageVolume = 0m; var checkedForMapFile = false; symbols++; string line; while ((line = reader.ReadLine()) != null) { var csv = line.Split(','); var date = DateTime.ParseExact(csv[0], DateFormat.TwelveCharacter, CultureInfo.InvariantCulture); if (date < startDate) { continue; } if (ignoreMapless && !checkedForMapFile) { checkedForMapFile = true; if (!mapFileResolver.ResolveMapFile(symbol, date).Any()) { maplessCount++; break; } } var close = decimal.Parse(csv[4]) / scaleFactor; var volume = long.Parse(csv[5]); runningAverageVolume = seeded ? volume * k + runningAverageVolume * (1 - k) : volume; seeded = true; var dollarVolume = close * runningAverageVolume; var coarseFile = Path.Combine(coarseFolder, date.ToString("yyyyMMdd") + ".csv"); dates.Add(date); var sid = SecurityIdentifier.GenerateEquity(SecurityIdentifier.DefaultDate, symbol, market); var mapFile = mapFileResolver.ResolveMapFile(symbol, date); if (!mapFile.IsNullOrEmpty()) { sid = SecurityIdentifier.GenerateEquity(mapFile.FirstDate, mapFile.OrderBy(x => x.Date).First().MappedSymbol, market); } if (mapFile == null && ignoreMapless) { Log.Error(string.Format("CoarseGenerator.ProcessDailyFolder(): Unable to resolve map file for {0} as of {1}", symbol, date.ToShortDateString())); continue; } var coarseFileLine = sid + "," + symbol + "," + close + "," + volume + "," + Math.Truncate(dollarVolume); StreamWriter writer; if (!writers.TryGetValue(coarseFile, out writer)) { writer = new StreamWriter(new FileStream(coarseFile, FileMode.Create, FileAccess.Write, FileShare.Write)); writers[coarseFile] = writer; } writer.WriteLine(coarseFileLine); } } if (symbols % 1000 == 0) { Log.Trace("CoarseGenerator.ProcessDailyFolder(): Completed processing {0} symbols. Current elapsed: {1} seconds", symbols, (DateTime.UtcNow - start).TotalSeconds.ToString("0.00")); } } catch (Exception err) { Log.Error(err.ToString()); } } Log.Trace("CoarseGenerator.ProcessDailyFolder(): Saving {0} coarse files to disk", dates.Count); foreach (var writer in writers) { writer.Value.Dispose(); } var stop = DateTime.UtcNow; Log.Trace("CoarseGenerator.ProcessDailyFolder(): Processed {0} symbols into {1} coarse files in {2} seconds", symbols, dates.Count, (stop - start).TotalSeconds.ToString("0.00")); Log.Trace("CoarseGenerator.ProcessDailyFolder(): Excluded {0} mapless symbols.", maplessCount); return(writers.Keys); }
/// <summary> /// Subscription data reader takes a subscription request, loads the type, accepts the data source and enumerate on the results. /// </summary> /// <param name="config">Subscription configuration object</param> /// <param name="periodStart">Start date for the data request/backtest</param> /// <param name="periodFinish">Finish date for the data request/backtest</param> /// <param name="resultHandler">Result handler used to push error messages and perform sampling on skipped days</param> /// <param name="mapFileResolver">Used for resolving the correct map files</param> /// <param name="factorFileProvider">Used for getting factor files</param> /// <param name="tradeableDates">Defines the dates for which we'll request data, in order, in the security's exchange time zone</param> /// <param name="isLiveMode">True if we're in live mode, false otherwise</param> /// <param name="includeAuxilliaryData">True if we want to emit aux data, false to only emit price data</param> public SubscriptionDataReader(SubscriptionDataConfig config, DateTime periodStart, DateTime periodFinish, IResultHandler resultHandler, MapFileResolver mapFileResolver, IFactorFileProvider factorFileProvider, IEnumerable <DateTime> tradeableDates, bool isLiveMode, bool includeAuxilliaryData = true) { //Save configuration of data-subscription: _config = config; _auxiliaryData = new Queue <BaseData>(); //Save Start and End Dates: _periodStart = periodStart; _periodFinish = periodFinish; //Save access to securities _isLiveMode = isLiveMode; _includeAuxilliaryData = includeAuxilliaryData; //Save the type of data we'll be getting from the source. //Create the dynamic type-activators: var objectActivator = ObjectActivator.GetActivator(config.Type); _resultHandler = resultHandler; _tradeableDates = tradeableDates.GetEnumerator(); if (objectActivator == null) { _resultHandler.ErrorMessage("Custom data type '" + config.Type.Name + "' missing parameterless constructor E.g. public " + config.Type.Name + "() { }"); _endOfStream = true; return; } //Create an instance of the "Type": var userObj = objectActivator.Invoke(new object[] {}); _dataFactory = userObj as BaseData; //If its quandl set the access token in data factory: var quandl = _dataFactory as Quandl; if (quandl != null) { if (!Quandl.IsAuthCodeSet) { Quandl.SetAuthCode(Config.Get("quandl-auth-token")); } } _factorFile = new FactorFile(config.Symbol.Value, new List <FactorFileRow>()); _mapFile = new MapFile(config.Symbol.Value, new List <MapFileRow>()); // load up the map and factor files for equities if (!config.IsCustomData && config.SecurityType == SecurityType.Equity) { try { var mapFile = mapFileResolver.ResolveMapFile(config.Symbol.ID.Symbol, config.Symbol.ID.Date); // only take the resolved map file if it has data, otherwise we'll use the empty one we defined above if (mapFile.Any()) { _mapFile = mapFile; } var factorFile = factorFileProvider.Get(_config.Symbol); _hasScaleFactors = factorFile != null; if (_hasScaleFactors) { _factorFile = factorFile; } } catch (Exception err) { Log.Error(err, "Fetching Price/Map Factors: " + config.Symbol.ID + ": "); } } _subscriptionFactoryEnumerator = ResolveDataEnumerator(true); }
public void DoesNotEmitInvalidData() { var startTime = new DateTime(2014, 06, 06, 0, 0, 0); var endTime = new DateTime(2014, 06, 09, 20, 0, 0); var canonicalSymbol = Symbol.Create("AAPL", SecurityType.Option, Market.USA, "?AAPL"); var quoteCurrency = new Cash(Currencies.USD, 0, 1); var exchangeHours = MarketHoursDatabase.FromDataFolder().GetExchangeHours(Market.USA, canonicalSymbol, SecurityType.Option); var config = new SubscriptionDataConfig( typeof(ZipEntryName), canonicalSymbol, Resolution.Minute, TimeZones.Utc, TimeZones.NewYork, true, false, false, false, TickType.Quote, false, DataNormalizationMode.Raw ); var option = new Option( canonicalSymbol, exchangeHours, quoteCurrency, new OptionSymbolProperties(SymbolProperties.GetDefault(Currencies.USD)), ErrorCurrencyConverter.Instance, RegisteredSecurityDataTypesProvider.Null, new SecurityCache() ); var fillForwardResolution = Ref.CreateReadOnly(() => Resolution.Minute.ToTimeSpan()); Func <SubscriptionRequest, IEnumerator <BaseData>, IEnumerator <BaseData> > underlyingEnumeratorFunc = (req, input) => { input = new BaseDataCollectionAggregatorEnumerator(input, req.Configuration.Symbol); return(new FillForwardEnumerator( input, option.Exchange, fillForwardResolution, false, endTime, Resolution.Minute.ToTimeSpan(), TimeZones.Utc, startTime)); }; var factory = new OptionChainUniverseSubscriptionEnumeratorFactory(underlyingEnumeratorFunc, MapFileResolver.Create(Globals.DataFolder, Market.USA), new LocalDiskFactorFileProvider(new LocalDiskMapFileProvider())); var request = new SubscriptionRequest(true, null, option, config, startTime, endTime); var enumerator = factory.CreateEnumerator(request, new DefaultDataProvider()); var emittedCount = 0; foreach (var data in enumerator.AsEnumerable()) { emittedCount++; var optionData = data as OptionChainUniverseDataCollection; Assert.IsNotNull(optionData); Assert.IsNotNull(optionData.Underlying); Assert.AreNotEqual(0, optionData.Data.Count); } // 9:30 to 15:59 -> 6.5 hours * 60 => 390 minutes * 2 days = 780 Assert.AreEqual(780, emittedCount); }