Ejemplo n.º 1
0
        public void ResolvesStraightMapping()
        {
            var spyMapFile = _resolver.ResolveMapFile("SPY", new DateTime(2015, 08, 23));

            Assert.IsNotNull(spyMapFile);
            Assert.AreEqual("SPY", spyMapFile.GetMappedSymbol(new DateTime(2015, 08, 23)));
        }
Ejemplo n.º 2
0
        private string GetMappedSymbol(SubscriptionDataConfig config, DateTime date)
        {
            var mapFile = config.Symbol.HasUnderlying ?
                          _mapFileResolver.ResolveMapFile(config.Symbol.Underlying.ID.Symbol, config.Symbol.Underlying.ID.Date) :
                          _mapFileResolver.ResolveMapFile(config.Symbol.ID.Symbol, config.Symbol.ID.Date);

            return(mapFile.GetMappedSymbol(date, config.MappedSymbol));
        }
        private static MapFile GetMapFileToUse(
            SubscriptionDataConfig config,
            MapFileResolver mapFileResolver)
        {
            var mapFileToUse = new MapFile(config.Symbol.Value, new List <MapFileRow>());

            // load up the map and factor files for equities
            if (!config.IsCustomData && config.SecurityType == SecurityType.Equity)
            {
                try
                {
                    var mapFile = mapFileResolver.ResolveMapFile(
                        config.Symbol.ID.Symbol,
                        config.Symbol.ID.Date);

                    // only take the resolved map file if it has data, otherwise we'll use the empty one we defined above
                    if (mapFile.Any())
                    {
                        mapFileToUse = mapFile;
                    }
                }
                catch (Exception err)
                {
                    Log.Error(err, "CorporateEventEnumeratorFactory.GetMapFileToUse():" +
                              " Map File: " + config.Symbol.ID + ": ");
                }
            }

            // load up the map and factor files for underlying of equity option
            if (!config.IsCustomData && config.SecurityType == SecurityType.Option)
            {
                try
                {
                    var mapFile = mapFileResolver.ResolveMapFile(
                        config.Symbol.Underlying.ID.Symbol,
                        config.Symbol.Underlying.ID.Date);

                    // only take the resolved map file if it has data, otherwise we'll use the empty one we defined above
                    if (mapFile.Any())
                    {
                        mapFileToUse = mapFile;
                    }
                }
                catch (Exception err)
                {
                    Log.Error(err, "CorporateEventEnumeratorFactory.GetMapFileToUse():" +
                              " Map File: " + config.Symbol.ID + ": ");
                }
            }

            return(mapFileToUse);
        }
        private string GetMappedSymbol(SubscriptionRequest request, DateTime date)
        {
            var config = request.Configuration;

            if (config.Symbol.ID.SecurityType == SecurityType.Option ||
                config.Symbol.ID.SecurityType == SecurityType.Equity)
            {
                var mapFile = config.Symbol.HasUnderlying ?
                              _mapFileResolver.ResolveMapFile(config.Symbol.Underlying.ID.Symbol, config.Symbol.Underlying.ID.Date) :
                              _mapFileResolver.ResolveMapFile(config.Symbol.ID.Symbol, config.Symbol.ID.Date);

                return(mapFile.GetMappedSymbol(date, config.MappedSymbol));
            }
            return(config.MappedSymbol);
        }
        private static MapFile GetMapFileToUse(
            SubscriptionDataConfig config,
            MapFileResolver mapFileResolver)
        {
            var mapFileToUse = new MapFile(config.Symbol.Value, new List<MapFileRow>());

            // load up the map and factor files for equities, options, and custom data
            if (config.TickerShouldBeMapped())
            {
                try
                {
                    var mapFile = mapFileResolver.ResolveMapFile(config.Symbol, config.Type);

                    // only take the resolved map file if it has data, otherwise we'll use the empty one we defined above
                    if (mapFile.Any())
                    {
                        mapFileToUse = mapFile;
                    }
                }
                catch (Exception err)
                {
                    Log.Error(err, "CorporateEventEnumeratorFactory.GetMapFileToUse():" +
                        " Map File: " + config.Symbol.ID + ": ");
                }
            }

            return mapFileToUse;
        }
Ejemplo n.º 6
0
        /// <summary>
        /// Converts a specific file to Lean alternative data format. Note that you must flush
        /// after you're done converting a file to ensure that all data gets written to disk.
        /// You can do that by calling <see cref="Dispose"/> once you've finished processing
        ///
        /// Note: Assumes that it will be given files in ascending order by date
        /// </summary>
        /// <param name="sourceFilePath">File to process and convert</param>
        public void Convert(Stream stream)
        {
            if (_disposedValue)
            {
                throw new ObjectDisposedException("PsychSignalDataConverter has already been disposed");
            }

            var previousTicker   = string.Empty;
            var currentLineCount = 0;

            using (var reader = new StreamReader(stream))
            {
                string line;
                while ((line = reader.ReadLine()) != null)
                {
                    currentLineCount++;

                    var      csv    = line.Split(',');
                    var      ticker = csv[1].ToLowerInvariant();
                    DateTime timestamp;

                    if (csv[0] == "SOURCE")
                    {
                        Log.Trace($"PsychSignalDataConverter.Convert(): Skipping line {currentLineCount} - Line contains header information");
                        continue;
                    }
                    if (!DateTime.TryParseExact(csv[2], @"yyyy-MM-dd\THH:mm:ss\Z", CultureInfo.InvariantCulture, DateTimeStyles.AdjustToUniversal, out timestamp))
                    {
                        Log.Trace($"PsychSignalDataConverter.Convert(): Skipping line {currentLineCount} - Failed to parse date properly");
                        continue;
                    }
                    if (!_mapFileResolver.ResolveMapFile(ticker, timestamp).Any())
                    {
                        // Because all tickers are all clustered together, we can detect
                        // duplicate messages and prevent ourselves from spamming the status log
                        if (ticker != previousTicker)
                        {
                            Log.Trace($"PsychSignalDataDownloader.Convert(): Skipping line {currentLineCount} - Could not resolve map file for ticker {ticker}");
                        }
                        previousTicker = ticker;
                        continue;
                    }

                    TickerData handle;
                    if (!_fileHandles.TryGetValue(ticker, out handle))
                    {
                        handle = new TickerData(ticker, timestamp.Date, _destinationDirectory);
                        _fileHandles[ticker] = handle;
                    }

                    handle.Append(timestamp, csv);
                    previousTicker = ticker;
                }
            }
        }
Ejemplo n.º 7
0
        /// <summary>
        /// Gets the ticker using map files. If the ticker is empty, we can't resolve a map file, or we can't
        /// resolve a ticker within a map file, we return null
        /// </summary>
        /// <param name="tradingEconomicsEarnings">TE Earnings data</param>
        /// <returns>Mapped ticker or null</returns>
        private string GetMappedSymbol(TradingEconomicsEarnings tradingEconomicsEarnings)
        {
            var ticker  = tradingEconomicsEarnings.Symbol;
            var mapFile = _mapFileResolver.ResolveMapFile(ticker, tradingEconomicsEarnings.LastUpdate);

            if (!mapFile.Any())
            {
                Log.Error($"TradingEconomicsEarningsDownloader.GetMappedSymbol(): No mapfile found for ticker {ticker}");
                return(string.Empty);
            }

            var symbol = mapFile.GetMappedSymbol(tradingEconomicsEarnings.LastUpdate);

            if (string.IsNullOrEmpty(symbol))
            {
                Log.Error($"TradingEconomicsEarningsDownloader.GetMappedSymbol(): No mapped symbol found for ticker {ticker}");
                return(string.Empty);
            }

            return(symbol.ToLower());
        }
Ejemplo n.º 8
0
        /// <summary>
        /// Initializes the <see cref="SubscriptionDataReader"/> instance
        /// </summary>
        public void Initialize()
        {
            if (_initialized)
            {
                return;
            }

            //Save the type of data we'll be getting from the source.

            //Create the dynamic type-activators:
            var objectActivator = ObjectActivator.GetActivator(_config.Type);

            if (objectActivator == null)
            {
                OnInvalidConfigurationDetected(
                    new InvalidConfigurationDetectedEventArgs(
                        $"Custom data type \'{_config.Type.Name}\' missing parameterless constructor " +
                        $"E.g. public {_config.Type.Name}() {{ }}"));

                _endOfStream = true;
                return;
            }

            //Create an instance of the "Type":
            var userObj = objectActivator.Invoke(new object[] { _config.Type });

            _dataFactory = userObj as BaseData;

            //If its quandl set the access token in data factory:
            var quandl = _dataFactory as Quandl;

            if (quandl != null)
            {
                if (!Quandl.IsAuthCodeSet)
                {
                    Quandl.SetAuthCode(Config.Get("quandl-auth-token"));
                }
            }

            // If Tiingo data, set the access token in data factory
            var tiingo = _dataFactory as TiingoDailyData;

            if (tiingo != null)
            {
                if (!Tiingo.IsAuthCodeSet)
                {
                    Tiingo.SetAuthCode(Config.Get("tiingo-auth-token"));
                }
            }

            _factorFile = new FactorFile(_config.Symbol.Value, new List <FactorFileRow>());
            _mapFile    = new MapFile(_config.Symbol.Value, new List <MapFileRow>());

            // load up the map and factor files for equities
            if (!_config.IsCustomData && _config.SecurityType == SecurityType.Equity)
            {
                try
                {
                    var mapFile = _mapFileResolver.ResolveMapFile(_config.Symbol.ID.Symbol, _config.Symbol.ID.Date);

                    // only take the resolved map file if it has data, otherwise we'll use the empty one we defined above
                    if (mapFile.Any())
                    {
                        _mapFile = mapFile;
                    }

                    var factorFile = _factorFileProvider.Get(_config.Symbol);
                    _hasScaleFactors = factorFile != null;
                    if (_hasScaleFactors)
                    {
                        _factorFile = factorFile;

                        // if factor file has minimum date, update start period if before minimum date
                        if (!_isLiveMode && _factorFile != null && _factorFile.FactorFileMinimumDate.HasValue)
                        {
                            if (_periodStart < _factorFile.FactorFileMinimumDate.Value)
                            {
                                _periodStart = _factorFile.FactorFileMinimumDate.Value;

                                OnNumericalPrecisionLimited(
                                    new NumericalPrecisionLimitedEventArgs(
                                        $"Data for symbol {_config.Symbol.Value} has been limited due to numerical precision issues in the factor file. " +
                                        $"The starting date has been set to {_factorFile.FactorFileMinimumDate.Value.ToShortDateString()}."));
                            }
                        }
                    }
                }
                catch (Exception err)
                {
                    Log.Error(err, "Fetching Price/Map Factors: " + _config.Symbol.ID + ": ");
                }
            }

            // load up the map and factor files for underlying of equity option
            if (!_config.IsCustomData && _config.SecurityType == SecurityType.Option)
            {
                try
                {
                    var mapFile = _mapFileResolver.ResolveMapFile(_config.Symbol.Underlying.ID.Symbol, _config.Symbol.Underlying.ID.Date);

                    // only take the resolved map file if it has data, otherwise we'll use the empty one we defined above
                    if (mapFile.Any())
                    {
                        _mapFile = mapFile;
                    }
                }
                catch (Exception err)
                {
                    Log.Error(err, "Map Factors: " + _config.Symbol.ID + ": ");
                }
            }

            // Estimate delisting date.
            switch (_config.Symbol.ID.SecurityType)
            {
            case SecurityType.Future:
                _delistingDate = _config.Symbol.ID.Date;
                break;

            case SecurityType.Option:
                _delistingDate = OptionSymbol.GetLastDayOfTrading(_config.Symbol);
                break;

            default:
                _delistingDate = _mapFile.DelistingDate;
                break;
            }

            _subscriptionFactoryEnumerator = ResolveDataEnumerator(true);

            _initialized = true;
        }
        /// <summary>
        /// Iterates each daily file in the specified <paramref name="dailyFolder"/> and adds a line for each
        /// day to the appropriate coarse file
        /// </summary>
        /// <param name="dailyFolder">The folder with daily data.</param>
        /// <param name="coarseFolder">The coarse output folder.</param>
        /// <param name="mapFileResolver">The map file resolver.</param>
        /// <param name="factorFileProvider">The factor file provider.</param>
        /// <param name="exclusions">The symbols to be excluded from processing.</param>
        /// <param name="ignoreMapless">Ignore the symbols without a map file.</param>
        /// <param name="symbolResolver">Function used to provide symbol resolution. Default resolution uses the zip file name to resolve
        /// the symbol, specify null for this behavior.</param>
        /// <returns>Collection with the names of the newly generated coarse files.</returns>
        /// <exception cref="Exception">
        /// Unable to resolve market for daily folder: " + dailyFolder
        /// or
        /// Unable to resolve fundamental path for coarse folder: " + coarseFolder
        /// </exception>
        public static ICollection <string> ProcessDailyFolder(string dailyFolder, string coarseFolder, MapFileResolver mapFileResolver, IFactorFileProvider factorFileProvider,
                                                              HashSet <string> exclusions, bool ignoreMapless, Func <string, string> symbolResolver = null)
        {
            const decimal scaleFactor = 10000m;

            Log.Trace("Processing: {0}", dailyFolder);

            var start = DateTime.UtcNow;

            // load map files into memory

            var symbols      = 0;
            var maplessCount = 0;
            var dates        = new HashSet <DateTime>();

            // instead of opening/closing these constantly, open them once and dispose at the end (~3x speed improvement)
            var writers = new Dictionary <string, StreamWriter>();

            var marketDirectoryInfo = new DirectoryInfo(dailyFolder).Parent;

            if (marketDirectoryInfo == null)
            {
                throw new Exception($"Unable to resolve market for daily folder: {dailyFolder}");
            }
            var market = marketDirectoryInfo.Name.ToLowerInvariant();

            var fundamentalDirectoryInfo = new DirectoryInfo(coarseFolder).Parent;

            if (fundamentalDirectoryInfo == null)
            {
                throw new Exception($"Unable to resolve fundamental path for coarse folder: {coarseFolder}");
            }
            var fineFundamentalFolder = Path.Combine(marketDirectoryInfo.FullName, "fundamental", "fine");

            // open up each daily file to get the values and append to the daily coarse files
            foreach (var file in Directory.EnumerateFiles(dailyFolder, "*.zip"))
            {
                try
                {
                    var ticker             = Path.GetFileNameWithoutExtension(file);
                    var fineAvailableDates = Enumerable.Empty <DateTime>();

                    var tickerFineFundamentalFolder = Path.Combine(fineFundamentalFolder, ticker);
                    if (Directory.Exists(tickerFineFundamentalFolder))
                    {
                        fineAvailableDates = Directory.GetFiles(tickerFineFundamentalFolder, "*.zip")
                                             .Select(f => DateTime.ParseExact(Path.GetFileNameWithoutExtension(f), DateFormat.EightCharacter, CultureInfo.InvariantCulture))
                                             .ToList();
                    }

                    if (ticker == null)
                    {
                        Log.Trace("CoarseGenerator.ProcessDailyFolder(): Unable to resolve symbol from file: {0}", file);
                        continue;
                    }

                    if (symbolResolver != null)
                    {
                        ticker = symbolResolver(ticker);
                    }

                    ticker = ticker.ToUpperInvariant();

                    if (exclusions != null && exclusions.Contains(ticker))
                    {
                        Log.Trace("Excluded symbol: {0}", ticker);
                        continue;
                    }

                    ZipFile zip;
                    using (var reader = Compression.Unzip(file, out zip))
                    {
                        var checkedForMapFile = false;

                        symbols++;
                        string line;
                        while ((line = reader.ReadLine()) != null)
                        {
                            //20150625.csv
                            var csv  = line.Split(',');
                            var date = DateTime.ParseExact(csv[0], DateFormat.TwelveCharacter, CultureInfo.InvariantCulture);

                            if (ignoreMapless && !checkedForMapFile)
                            {
                                checkedForMapFile = true;
                                if (!mapFileResolver.ResolveMapFile(ticker, date).Any())
                                {
                                    // if the resolved map file has zero entries then it's a mapless symbol
                                    maplessCount++;
                                    break;
                                }
                            }

                            var close  = Parse.Decimal(csv[4]) / scaleFactor;
                            var volume = Parse.Long(csv[5]);

                            var dollarVolume = close * volume;

                            var coarseFile = Path.Combine(coarseFolder, date.ToStringInvariant("yyyyMMdd") + ".csv");
                            dates.Add(date);

                            // try to resolve a map file and if found, regen the sid
                            var sid     = SecurityIdentifier.GenerateEquity(SecurityIdentifier.DefaultDate, ticker, market);
                            var mapFile = mapFileResolver.ResolveMapFile(ticker, date);
                            if (!mapFile.IsNullOrEmpty())
                            {
                                // if available, us the permtick in the coarse files, because of this, we need
                                // to update the coarse files each time new map files are added/permticks change
                                sid = SecurityIdentifier.GenerateEquity(mapFile.FirstDate, mapFile.OrderBy(x => x.Date).First().MappedSymbol, market);
                            }

                            if (mapFile == null && ignoreMapless)
                            {
                                // if we're ignoring mapless files then we should always be able to resolve this
                                Log.Error($"CoarseGenerator.ProcessDailyFolder(): Unable to resolve map file for {ticker} as of {date.ToStringInvariant("d")}");
                                continue;
                            }

                            // get price and split factors from factor files
                            var symbol        = new Symbol(sid, ticker);
                            var factorFile    = factorFileProvider.Get(symbol);
                            var factorFileRow = factorFile?.GetScalingFactors(date);
                            var priceFactor   = factorFileRow?.PriceFactor ?? 1m;
                            var splitFactor   = factorFileRow?.SplitFactor ?? 1m;


                            // Check if security has fine file within a trailing month for a date-ticker set.
                            // There are tricky cases where a folder named by a ticker can have data for multiple securities.
                            // e.g  GOOG -> GOOGL (GOOG T1AZ164W5VTX) / GOOCV -> GOOG (GOOCV VP83T1ZUHROL) case.
                            // The fine data in the 'fundamental/fine/goog' folder will be for 'GOOG T1AZ164W5VTX' up to the 2014-04-02 and for 'GOOCV VP83T1ZUHROL' afterward.
                            // Therefore, date before checking if the security has fundamental data for a date, we need to filter the fine files the map's first date.
                            var firstDate = mapFile?.FirstDate ?? DateTime.MinValue;
                            var hasFundamentalDataForDate = fineAvailableDates.Where(d => d >= firstDate).Any(d => date.AddMonths(-1) <= d && d <= date);

                            // The following section handles mergers and acquisitions cases.
                            // e.g. YHOO -> AABA (YHOO R735QTJ8XC9X)
                            // The dates right after the acquisition, valid fine fundamental data for AABA are still under the former ticker folder.
                            // Therefore if no fine fundamental data is found in the 'fundamental/fine/aaba' folder, it searches into the 'yhoo' folder.
                            if (mapFile != null && mapFile.Count() > 2 && !hasFundamentalDataForDate)
                            {
                                var previousTicker = mapFile.LastOrDefault(m => m.Date < date)?.MappedSymbol;
                                if (previousTicker != null)
                                {
                                    var previousTickerFineFundamentalFolder = Path.Combine(fineFundamentalFolder, previousTicker);
                                    if (Directory.Exists(previousTickerFineFundamentalFolder))
                                    {
                                        var previousTickerFineAvailableDates = Directory.GetFiles(previousTickerFineFundamentalFolder, "*.zip")
                                                                               .Select(f => DateTime.ParseExact(Path.GetFileNameWithoutExtension(f), DateFormat.EightCharacter, CultureInfo.InvariantCulture))
                                                                               .ToList();
                                        hasFundamentalDataForDate = previousTickerFineAvailableDates.Where(d => d >= firstDate).Any(d => date.AddMonths(-1) <= d && d <= date);
                                    }
                                }
                            }

                            // sid,symbol,close,volume,dollar volume,has fundamental data,price factor,split factor
                            var coarseFileLine = string.Format(
                                CultureInfo.InvariantCulture,
                                "{0},{1},{2},{3},{4},{5},{6},{7}",
                                sid,
                                ticker,
                                close,
                                volume,
                                Math.Truncate(dollarVolume),
                                hasFundamentalDataForDate,
                                priceFactor,
                                splitFactor);

                            StreamWriter writer;
                            if (!writers.TryGetValue(coarseFile, out writer))
                            {
                                writer = new StreamWriter(new FileStream(coarseFile, FileMode.Create, FileAccess.Write, FileShare.Write));
                                writers[coarseFile] = writer;
                            }
                            writer.WriteLine(coarseFileLine);
                        }
                    }

                    if (symbols % 1000 == 0)
                    {
                        Log.Trace($"CoarseGenerator.ProcessDailyFolder(): Completed processing {symbols} symbols. Current elapsed: {(DateTime.UtcNow - start).TotalSeconds.ToStringInvariant("0.00")} seconds");
                    }
                }
                catch (Exception err)
                {
                    // log the error and continue with the process
                    Log.Error(err.ToString());
                }
            }

            Log.Trace("CoarseGenerator.ProcessDailyFolder(): Saving {0} coarse files to disk", dates.Count);

            // dispose all the writers at the end of processing
            foreach (var writer in writers)
            {
                writer.Value.Dispose();
            }

            var stop = DateTime.UtcNow;

            Log.Trace($"CoarseGenerator.ProcessDailyFolder(): Processed {symbols} symbols into {dates.Count} coarse files in {(stop - start).TotalSeconds.ToStringInvariant("0.00")} seconds");
            Log.Trace($"CoarseGenerator.ProcessDailyFolder(): Excluded {maplessCount} mapless symbols.");

            return(writers.Keys);
        }
Ejemplo n.º 10
0
        /// <summary>
        /// Initializes the <see cref="SubscriptionDataReader"/> instance
        /// </summary>
        /// <remarks>Should be called after all consumers of <see cref="NewTradableDate"/> event are set,
        /// since it will produce events.</remarks>
        public void Initialize()
        {
            if (_initialized)
            {
                return;
            }

            //Save the type of data we'll be getting from the source.
            try
            {
                _dataFactory = _config.Type.GetBaseDataInstance();
            }
            catch (ArgumentException exception)
            {
                OnInvalidConfigurationDetected(new InvalidConfigurationDetectedEventArgs(exception.Message));
                _endOfStream = true;
                return;
            }

            //If its quandl set the access token in data factory:
            var quandl = _dataFactory as Quandl;

            if (quandl != null)
            {
                if (!Quandl.IsAuthCodeSet)
                {
                    Quandl.SetAuthCode(Config.Get("quandl-auth-token"));
                }
            }

            // If Tiingo data, set the access token in data factory
            var tiingo = _dataFactory as TiingoDailyData;

            if (tiingo != null)
            {
                if (!Tiingo.IsAuthCodeSet)
                {
                    Tiingo.SetAuthCode(Config.Get("tiingo-auth-token"));
                }
            }

            // If USEnergyInformation data, set the access token in data factory
            var energyInformation = _dataFactory as USEnergyInformation;

            if (energyInformation != null)
            {
                if (!USEnergyInformation.IsAuthCodeSet)
                {
                    USEnergyInformation.SetAuthCode(Config.Get("us-energy-information-auth-token"));
                }
            }

            _factorFile = new FactorFile(_config.Symbol.Value, new List <FactorFileRow>());
            _mapFile    = new MapFile(_config.Symbol.Value, new List <MapFileRow>());

            // load up the map files for equities, options, and custom data if it supports it.
            // Only load up factor files for equities
            if (_config.TickerShouldBeMapped())
            {
                try
                {
                    var mapFile = _mapFileResolver.ResolveMapFile(_config.Symbol, _config.Type);

                    // only take the resolved map file if it has data, otherwise we'll use the empty one we defined above
                    if (mapFile.Any())
                    {
                        _mapFile = mapFile;
                    }

                    if (!_config.IsCustomData && _config.SecurityType != SecurityType.Option)
                    {
                        var factorFile = _factorFileProvider.Get(_config.Symbol);
                        _hasScaleFactors = factorFile != null;
                        if (_hasScaleFactors)
                        {
                            _factorFile = factorFile;

                            // if factor file has minimum date, update start period if before minimum date
                            if (!_isLiveMode && _factorFile != null && _factorFile.FactorFileMinimumDate.HasValue)
                            {
                                if (_periodStart < _factorFile.FactorFileMinimumDate.Value)
                                {
                                    _periodStart = _factorFile.FactorFileMinimumDate.Value;

                                    OnNumericalPrecisionLimited(
                                        new NumericalPrecisionLimitedEventArgs(
                                            $"Data for symbol {_config.Symbol.Value} has been limited due to numerical precision issues in the factor file. " +
                                            $"The starting date has been set to {_factorFile.FactorFileMinimumDate.Value.ToShortDateString()}."));
                                }
                            }
                        }
                    }
                }
                catch (Exception err)
                {
                    Log.Error(err, "Fetching Price/Map Factors: " + _config.Symbol.ID + ": ");
                }
            }

            // Estimate delisting date.
            switch (_config.Symbol.ID.SecurityType)
            {
            case SecurityType.Future:
                _delistingDate = _config.Symbol.ID.Date;
                break;

            case SecurityType.Option:
                _delistingDate = OptionSymbol.GetLastDayOfTrading(_config.Symbol);
                break;

            default:
                _delistingDate = _mapFile.DelistingDate;
                break;
            }
            // adding a day so we stop at EOD
            _delistingDate = _delistingDate.AddDays(1);

            _subscriptionFactoryEnumerator = ResolveDataEnumerator(true);

            _initialized = true;
        }
Ejemplo n.º 11
0
        public static ICollection <string> ProcessDailyFolder(string dailyFolder, string coarseFolder, MapFileResolver mapFileResolver, HashSet <string> exclusions, bool ignoreMapless, DateTime startDate, Func <string, string> symbolResolver = null)
        {
            const decimal scaleFactor = 10000m;

            Log.Trace("Processing: {0}", dailyFolder);

            var start = DateTime.UtcNow;

            var symbols      = 0;
            var maplessCount = 0;
            var dates        = new HashSet <DateTime>();

            var writers = new Dictionary <string, StreamWriter>();

            var dailyFolderDirectoryInfo = new DirectoryInfo(dailyFolder).Parent;

            if (dailyFolderDirectoryInfo == null)
            {
                throw new Exception("Unable to resolve market for daily folder: " + dailyFolder);
            }
            var market = dailyFolderDirectoryInfo.Name.ToLower();

            foreach (var file in Directory.EnumerateFiles(dailyFolder))
            {
                try
                {
                    var symbol = Path.GetFileNameWithoutExtension(file);
                    if (symbol == null)
                    {
                        Log.Trace("CoarseGenerator.ProcessDailyFolder(): Unable to resolve symbol from file: {0}", file);
                        continue;
                    }

                    if (symbolResolver != null)
                    {
                        symbol = symbolResolver(symbol);
                    }

                    symbol = symbol.ToUpper();

                    if (exclusions.Contains(symbol))
                    {
                        Log.Trace("Excluded symbol: {0}", symbol);
                        continue;
                    }

                    ZipFile zip;
                    using (var reader = Compression.Unzip(file, out zip))
                    {
                        const decimal k = 2m / (30 + 1);

                        var seeded = false;
                        var runningAverageVolume = 0m;

                        var checkedForMapFile = false;

                        symbols++;
                        string line;
                        while ((line = reader.ReadLine()) != null)
                        {
                            var csv  = line.Split(',');
                            var date = DateTime.ParseExact(csv[0], DateFormat.TwelveCharacter, CultureInfo.InvariantCulture);

                            if (date < startDate)
                            {
                                continue;
                            }

                            if (ignoreMapless && !checkedForMapFile)
                            {
                                checkedForMapFile = true;
                                if (!mapFileResolver.ResolveMapFile(symbol, date).Any())
                                {
                                    maplessCount++;
                                    break;
                                }
                            }

                            var close  = decimal.Parse(csv[4]) / scaleFactor;
                            var volume = long.Parse(csv[5]);

                            runningAverageVolume = seeded
                                ? volume * k + runningAverageVolume * (1 - k)
                                : volume;

                            seeded = true;

                            var dollarVolume = close * runningAverageVolume;

                            var coarseFile = Path.Combine(coarseFolder, date.ToString("yyyyMMdd") + ".csv");
                            dates.Add(date);

                            var sid     = SecurityIdentifier.GenerateEquity(SecurityIdentifier.DefaultDate, symbol, market);
                            var mapFile = mapFileResolver.ResolveMapFile(symbol, date);
                            if (!mapFile.IsNullOrEmpty())
                            {
                                sid = SecurityIdentifier.GenerateEquity(mapFile.FirstDate, mapFile.OrderBy(x => x.Date).First().MappedSymbol, market);
                            }
                            if (mapFile == null && ignoreMapless)
                            {
                                Log.Error(string.Format("CoarseGenerator.ProcessDailyFolder(): Unable to resolve map file for {0} as of {1}", symbol, date.ToShortDateString()));
                                continue;
                            }

                            var coarseFileLine = sid + "," + symbol + "," + close + "," + volume + "," + Math.Truncate(dollarVolume);

                            StreamWriter writer;
                            if (!writers.TryGetValue(coarseFile, out writer))
                            {
                                writer = new StreamWriter(new FileStream(coarseFile, FileMode.Create, FileAccess.Write, FileShare.Write));
                                writers[coarseFile] = writer;
                            }
                            writer.WriteLine(coarseFileLine);
                        }
                    }

                    if (symbols % 1000 == 0)
                    {
                        Log.Trace("CoarseGenerator.ProcessDailyFolder(): Completed processing {0} symbols. Current elapsed: {1} seconds", symbols, (DateTime.UtcNow - start).TotalSeconds.ToString("0.00"));
                    }
                }
                catch (Exception err)
                {
                    Log.Error(err.ToString());
                }
            }

            Log.Trace("CoarseGenerator.ProcessDailyFolder(): Saving {0} coarse files to disk", dates.Count);

            foreach (var writer in writers)
            {
                writer.Value.Dispose();
            }

            var stop = DateTime.UtcNow;

            Log.Trace("CoarseGenerator.ProcessDailyFolder(): Processed {0} symbols into {1} coarse files in {2} seconds", symbols, dates.Count, (stop - start).TotalSeconds.ToString("0.00"));
            Log.Trace("CoarseGenerator.ProcessDailyFolder(): Excluded {0} mapless symbols.", maplessCount);

            return(writers.Keys);
        }
Ejemplo n.º 12
0
        /// <summary>
        /// Processes the data
        /// </summary>
        /// <typeparam name="T"><see cref="SmartInsiderEvent"/> inherited instance</typeparam>
        /// <param name="sourceFile">File to read data from</param>
        /// <returns>Dictionary keyed by ticker that contains all the lines that appeared in the file</returns>
        private Dictionary <string, List <T> > Process <T>(FileInfo sourceFile)
            where T : SmartInsiderEvent, new()
        {
            var previousMarket = string.Empty;
            var previousTicker = string.Empty;
            var lines          = new Dictionary <string, List <T> >();
            var i = 0;

            Log.Trace($"SmartInsiderConverter.Process(): Processing file: {sourceFile.FullName}");

            foreach (var line in File.ReadLines(sourceFile.FullName))
            {
                i++;

                // First line is the header row, but make sure we don't encounter it anywhere else in the data
                if (line.StartsWith("\"TransactionID"))
                {
                    Log.Trace($"SmartInsiderConverter.Process(): Header row on line {i}. Skipping...");
                    continue;
                }

                try
                {
                    // Yes, there are ONE HUNDRED total fields in this dataset.
                    // However, we will only take the first 60 since the rest are reserved fields
                    var tsv = line.Split('\t')
                              .Take(60)
                              .Select(x => x.Replace("\"", ""))
                              .ToList();

                    // If we have a null value on a non-nullable field, consider it invalid data
                    if (string.IsNullOrWhiteSpace(tsv[2]))
                    {
                        Log.Trace($"SmartInsiderConverter.Process(): Null value encountered on non-nullable value on line {i}");
                        continue;
                    }

                    // Remove in descending order to maintain index order
                    // while we delete lower indexed values
                    tsv.RemoveAt(46); // ShowOriginal
                    tsv.RemoveAt(36); // PreviousClosePrice
                    tsv.RemoveAt(14); // ShortCompanyName
                    tsv.RemoveAt(7);  // CompanyPageURL

                    var finalLine = string.Join("\t", tsv);

                    var dataInstance = new T();
                    dataInstance.FromRawData(finalLine);

                    var ticker = dataInstance.TickerSymbol;

                    // For now, only support US markets
                    if (dataInstance.TickerCountry != "US")
                    {
                        if (dataInstance.TickerCountry != previousMarket && ticker != previousTicker)
                        {
                            Log.Error($"SmartInsiderConverter.Process(): Market {dataInstance.TickerCountry} is not supported at this time for ticker {ticker} on line {i}");
                        }

                        previousMarket = dataInstance.TickerCountry;
                        previousTicker = ticker;

                        continue;
                    }

                    var mapFile = _mapFileResolver.ResolveMapFile(ticker, dataInstance.LastUpdate);
                    if (!mapFile.Any())
                    {
                        Log.Error($"SmartInsiderConverter.Process(): Failed to find mapfile for ticker {ticker} on {dataInstance.LastUpdate} on line {i}");

                        previousMarket = dataInstance.TickerCountry;
                        previousTicker = ticker;

                        continue;
                    }

                    var newTicker = mapFile.GetMappedSymbol(dataInstance.LastUpdate);
                    if (string.IsNullOrEmpty(newTicker))
                    {
                        Log.Error($"SmartInsiderConverter.Process(): Failed to resolve ticker for old ticker {ticker} on line {i}");

                        previousMarket = dataInstance.TickerCountry;
                        previousTicker = ticker;

                        continue;
                    }

                    // Log any mapping events since this can be a point of failure
                    if (ticker != newTicker)
                    {
                        Log.Trace($"SmartInsiderConverter.Process(): Mapped ticker from {ticker} to {newTicker}");
                    }

                    List <T> symbolLines;
                    if (!lines.TryGetValue(newTicker, out symbolLines))
                    {
                        symbolLines      = new List <T>();
                        lines[newTicker] = symbolLines;
                    }

                    symbolLines.Add(dataInstance);

                    previousMarket = dataInstance.TickerCountry;
                    previousTicker = ticker;
                }
                catch (Exception e)
                {
                    Log.Error(e, $"SmartInsiderConverter.Process(): Error on line {i}");
                }
            }

            return(lines);
        }
        /// <summary>
        /// Runs the instance of the object.
        /// </summary>
        /// <returns>True if process all downloads successfully</returns>
        public override bool Run()
        {
            var stopwatch = Stopwatch.StartNew();

            try
            {
                var companies      = GetCompanies().Result.DistinctBy(x => x.Ticker).ToList();
                var count          = companies.Count;
                var currentPercent = 0.05;
                var percent        = 0.05;
                var i = 0;

                Log.Trace($"EstimizeReleaseDataDownloader.Run(): Start processing {count} companies");

                var tasks = new List <Task>();

                foreach (var company in companies)
                {
                    try
                    {
                        // Makes sure we don't overrun Estimize rate limits accidentally
                        IndexGate.WaitToProceed();
                    }
                    // This is super super rare, but it failures in RateGate (RG) can still happen nonetheless. Let's not
                    // rely on RG operating successfully all the time so that if RG fails, our download process doesn't fail
                    catch (ArgumentOutOfRangeException e)
                    {
                        Log.Error(e, $"EstimizeReleaseDataDownloader.Run(): RateGate failed. Sleeping for 110 milliseconds with Thread.Sleep()");
                        Thread.Sleep(110);
                    }

                    var ticker = company.Ticker;
                    if (ticker.IndexOf("defunct", StringComparison.OrdinalIgnoreCase) > 0)
                    {
                        var length = ticker.IndexOf('-');
                        ticker = ticker.Substring(0, length).Trim();
                    }

                    Log.Trace($"EstimizeReleaseDataDownloader.Run(): Processing {ticker}");

                    tasks.Add(
                        HttpRequester($"/companies/{ticker}/releases")
                        .ContinueWith(
                            y =>
                    {
                        i++;

                        if (y.IsFaulted)
                        {
                            Log.Error($"EstimizeReleaseDataDownloader.Run(): Failed to get data for {company}");
                            return;
                        }

                        var result = y.Result;
                        if (string.IsNullOrEmpty(result))
                        {
                            // We've already logged inside HttpRequester
                            return;
                        }

                        // Just like TradingEconomics, we only want the events that already occured
                        // instead of having "forecasts" that will change in the future taint our
                        // data and make backtests non-deterministic. We want to have
                        // consistency with our data in live trading historical requests as well
                        var releases = JsonConvert.DeserializeObject <List <EstimizeRelease> >(result)
                                       .Where(x => x.Eps != null)
                                       .GroupBy(x =>
                        {
                            var releaseDate = x.ReleaseDate;

                            try
                            {
                                var mapFile   = _mapFileResolver.ResolveMapFile(ticker, releaseDate);
                                var oldTicker = ticker;
                                var newTicker = ticker;

                                // Ensure we're writing to the correct historical ticker
                                if (!mapFile.Any())
                                {
                                    Log.Trace($"EstimizeReleaseDataDownloader.Run(): Failed to find map file for: {newTicker} - on: {releaseDate}");
                                    return(string.Empty);
                                }

                                newTicker = mapFile.GetMappedSymbol(releaseDate);
                                if (string.IsNullOrWhiteSpace(newTicker))
                                {
                                    Log.Trace($"EstimizeReleaseDataDownloader.Run(): Failed to find mapping for null new ticker. Old ticker: {oldTicker} - on: {releaseDate}");
                                    return(string.Empty);
                                }

                                if (oldTicker != newTicker)
                                {
                                    Log.Trace($"EstimizeReleaseDataDownloader.Run(): Remapped from {oldTicker} to {newTicker} for {releaseDate}");
                                }

                                return(newTicker);
                            }
                            // We get a failure inside the map file constructor rarely. It tries
                            // to access the last element of an empty list. Maybe this is a bug?
                            catch (InvalidOperationException e)
                            {
                                Log.Error(e, $"EstimizeReleaseDataDownloader.Run(): Failed to load map file for: {ticker} - on: {releaseDate}");
                                return(string.Empty);
                            }
                        })
                                       .Where(x => !string.IsNullOrEmpty(x.Key));

                        foreach (var kvp in releases)
                        {
                            var csvContents = kvp.Select(x => $"{x.ReleaseDate.ToUniversalTime():yyyyMMdd HH:mm:ss},{x.Id},{x.FiscalYear},{x.FiscalQuarter},{x.Eps},{x.Revenue},{x.ConsensusEpsEstimate},{x.ConsensusRevenueEstimate},{x.WallStreetEpsEstimate},{x.WallStreetRevenueEstimate},{x.ConsensusWeightedEpsEstimate},{x.ConsensusWeightedRevenueEstimate}");
                            SaveContentToFile(_destinationFolder, kvp.Key, csvContents);
                        }

                        var percentDone = i / count;
                        if (percentDone >= currentPercent)
                        {
                            Log.Trace($"EstimizeEstimateDataDownloader.Run(): {percentDone:P2} complete");
                            currentPercent += percent;
                        }
                    }
                            )
                        );
                }

                Task.WaitAll(tasks.ToArray());
            }
            catch (Exception e)
            {
                Log.Error(e);
                return(false);
            }

            Log.Trace($"EstimizeReleaseDataDownloader.Run(): Finished in {stopwatch.Elapsed}");
            return(true);
        }
Ejemplo n.º 14
0
 private string GetMappedSymbol(SubscriptionDataConfig config, DateTime date)
 {
     return(_mapFileResolver.ResolveMapFile(config.Symbol, config.Type)
            .GetMappedSymbol(date, config.MappedSymbol));
 }
        /// <summary>
        /// Runs the instance of the object.
        /// </summary>
        /// <returns>True if process all downloads successfully</returns>
        public override bool Run()
        {
            var stopwatch = Stopwatch.StartNew();

            try
            {
                var companies      = GetCompanies().Result.DistinctBy(x => x.Ticker).ToList();
                var count          = companies.Count;
                var currentPercent = 0.05;
                var percent        = 0.05;
                var i = 0;

                Log.Trace($"EstimizeEstimateDataDownloader.Run(): Start processing {count.ToStringInvariant()} companies");

                var tasks = new List <Task>();

                foreach (var company in companies)
                {
                    // Include tickers that are "defunct".
                    // Remove the tag because it cannot be part of the API endpoint.
                    // This is separate from the NormalizeTicker(...) method since
                    // we don't convert tickers with `-`s into the format we can successfully
                    // index mapfiles with.
                    var    estimizeTicker = company.Ticker;
                    string ticker;

                    if (!TryNormalizeDefunctTicker(estimizeTicker, out ticker))
                    {
                        Log.Error($"EstimizeEstimateDataDownloader(): Defunct ticker {estimizeTicker} is unable to be parsed. Continuing...");
                        continue;
                    }

                    if (_processTickers != null && !_processTickers.Contains(ticker, StringComparer.InvariantCultureIgnoreCase))
                    {
                        Log.Trace($"EstimizeEstimateDataDownloader.Run(): Skipping {ticker} since it is not in the list of predefined tickers");
                        continue;
                    }

                    // Begin processing ticker with a normalized value
                    Log.Trace($"EstimizeEstimateDataDownloader.Run(): Processing {ticker}");

                    // Makes sure we don't overrun Estimize rate limits accidentally
                    IndexGate.WaitToProceed();

                    tasks.Add(
                        HttpRequester($"/companies/{ticker}/estimates")
                        .ContinueWith(
                            y =>
                    {
                        i++;

                        if (y.IsFaulted)
                        {
                            Log.Error($"EstimizeEstimateDataDownloader.Run(): Failed to get data for {company}");
                            return;
                        }

                        var result = y.Result;
                        if (string.IsNullOrEmpty(result))
                        {
                            // We've already logged inside HttpRequester
                            return;
                        }

                        var estimates = JsonConvert.DeserializeObject <List <EstimizeEstimate> >(result, JsonSerializerSettings)
                                        .GroupBy(estimate =>
                        {
                            var normalizedTicker = NormalizeTicker(ticker);
                            var oldTicker        = normalizedTicker;
                            var newTicker        = normalizedTicker;
                            var createdAt        = estimate.CreatedAt;

                            try
                            {
                                var mapFile = _mapFileResolver.ResolveMapFile(normalizedTicker, createdAt);

                                // Ensure we're writing to the correct historical ticker
                                if (!mapFile.Any())
                                {
                                    Log.Trace($"EstimizeEstimateDataDownloader.Run(): Failed to find map file for: {newTicker} - on: {createdAt}");
                                    return(string.Empty);
                                }

                                newTicker = mapFile.GetMappedSymbol(createdAt);
                                if (string.IsNullOrWhiteSpace(newTicker))
                                {
                                    Log.Trace($"EstimizeEstimateDataDownloader.Run(): New ticker is null. Old ticker: {oldTicker} - on: {createdAt.ToStringInvariant()}");
                                    return(string.Empty);
                                }

                                if (!string.Equals(oldTicker, newTicker, StringComparison.InvariantCultureIgnoreCase))
                                {
                                    Log.Trace($"EstimizeEstimateDataDownloader.Run(): Remapping {oldTicker} to {newTicker}");
                                }
                            }
                            // We get a failure inside the map file constructor rarely. It tries
                            // to access the last element of an empty list. Maybe this is a bug?
                            catch (InvalidOperationException e)
                            {
                                Log.Error(e, $"EstimizeEstimateDataDownloader.Run(): Failed to load map file for: {oldTicker} - on {createdAt}");
                                return(string.Empty);
                            }

                            return(newTicker);
                        })
                                        .Where(kvp => !string.IsNullOrEmpty(kvp.Key));

                        foreach (var kvp in estimates)
                        {
                            var csvContents = kvp.Select(x =>
                                                         $"{x.CreatedAt.ToStringInvariant("yyyyMMdd HH:mm:ss")}," +
                                                         $"{x.Id}," +
                                                         $"{x.AnalystId}," +
                                                         $"{x.UserName}," +
                                                         $"{x.FiscalYear.ToStringInvariant()}," +
                                                         $"{x.FiscalQuarter.ToStringInvariant()}," +
                                                         $"{x.Eps.ToStringInvariant()}," +
                                                         $"{x.Revenue.ToStringInvariant()}," +
                                                         $"{x.Flagged.ToStringInvariant().ToLowerInvariant()}"
                                                         );
                            SaveContentToFile(_destinationFolder, kvp.Key, csvContents);
                        }

                        var percentageDone = i / count;
                        if (percentageDone >= currentPercent)
                        {
                            Log.Trace($"EstimizeEstimateDataDownloader.Run(): {percentageDone.ToStringInvariant("P2")} complete");
                            currentPercent += percent;
                        }
                    }
                            )
                        );
                }

                Task.WaitAll(tasks.ToArray());
            }
            catch (Exception e)
            {
                Log.Error(e);
                return(false);
            }

            Log.Trace($"EstimizeEstimateDataDownloader.Run(): Finished in {stopwatch.Elapsed.ToStringInvariant(null)}");
            return(true);
        }
Ejemplo n.º 16
0
        /// <summary>
        /// Iterates each daily file in the specified <paramref name="dailyFolder"/> and adds a line for each
        /// day to the approriate coarse file
        /// </summary>
        /// <param name="dailyFolder">The folder with daily data</param>
        /// <param name="coarseFolder">The coarse output folder</param>
        /// <param name="mapFileResolver"></param>
        /// <param name="exclusions">The symbols to be excluded from processing</param>
        /// <param name="ignoreMapless">Ignore the symbols without a map file.</param>
        /// <param name="startDate">The starting date for processing</param>
        /// <param name="symbolResolver">Function used to provide symbol resolution. Default resolution uses the zip file name to resolve
        /// the symbol, specify null for this behavior.</param>
        /// <returns>A collection of the generated coarse files</returns>
        public static ICollection <string> ProcessDailyFolder(string dailyFolder, string coarseFolder, MapFileResolver mapFileResolver, HashSet <string> exclusions, bool ignoreMapless, DateTime startDate, Func <string, string> symbolResolver = null)
        {
            const decimal scaleFactor = 10000m;

            Log.Trace("Processing: {0}", dailyFolder);

            var start = DateTime.UtcNow;

            // load map files into memory

            var symbols      = 0;
            var maplessCount = 0;
            var dates        = new HashSet <DateTime>();

            // instead of opening/closing these constantly, open them once and dispose at the end (~3x speed improvement)
            var writers = new Dictionary <string, StreamWriter>();

            var dailyFolderDirectoryInfo = new DirectoryInfo(dailyFolder).Parent;

            if (dailyFolderDirectoryInfo == null)
            {
                throw new Exception("Unable to resolve market for daily folder: " + dailyFolder);
            }
            var market = dailyFolderDirectoryInfo.Name.ToLower();

            var fundamentalDirectoryInfo = new DirectoryInfo(coarseFolder).Parent;

            if (fundamentalDirectoryInfo == null)
            {
                throw new Exception("Unable to resolve fundamental path for coarse folder: " + coarseFolder);
            }
            var fineFundamentalFolder = Path.Combine(fundamentalDirectoryInfo.FullName, "fine");

            var mapFileProvider    = new LocalDiskMapFileProvider();
            var factorFileProvider = new LocalDiskFactorFileProvider(mapFileProvider);

            // open up each daily file to get the values and append to the daily coarse files
            foreach (var file in Directory.EnumerateFiles(dailyFolder))
            {
                try
                {
                    var symbol = Path.GetFileNameWithoutExtension(file);
                    if (symbol == null)
                    {
                        Log.Trace("CoarseGenerator.ProcessDailyFolder(): Unable to resolve symbol from file: {0}", file);
                        continue;
                    }

                    if (symbolResolver != null)
                    {
                        symbol = symbolResolver(symbol);
                    }

                    symbol = symbol.ToUpper();

                    if (exclusions.Contains(symbol))
                    {
                        Log.Trace("Excluded symbol: {0}", symbol);
                        continue;
                    }

                    // check if symbol has any fine fundamental data
                    var firstFineSymbolDate = DateTime.MaxValue;
                    if (Directory.Exists(fineFundamentalFolder))
                    {
                        var fineSymbolFolder = Path.Combine(fineFundamentalFolder, symbol.ToLower());

                        var firstFineSymbolFileName = Directory.Exists(fineSymbolFolder) ? Directory.GetFiles(fineSymbolFolder).OrderBy(x => x).FirstOrDefault() : string.Empty;
                        if (firstFineSymbolFileName.Length > 0)
                        {
                            firstFineSymbolDate = DateTime.ParseExact(Path.GetFileNameWithoutExtension(firstFineSymbolFileName), "yyyyMMdd", CultureInfo.InvariantCulture);
                        }
                    }

                    ZipFile zip;
                    using (var reader = Compression.Unzip(file, out zip))
                    {
                        var checkedForMapFile = false;

                        symbols++;
                        string line;
                        while ((line = reader.ReadLine()) != null)
                        {
                            //20150625.csv
                            var csv  = line.Split(',');
                            var date = DateTime.ParseExact(csv[0], DateFormat.TwelveCharacter, CultureInfo.InvariantCulture);

                            // spin past old data
                            if (date < startDate)
                            {
                                continue;
                            }

                            if (ignoreMapless && !checkedForMapFile)
                            {
                                checkedForMapFile = true;
                                if (!mapFileResolver.ResolveMapFile(symbol, date).Any())
                                {
                                    // if the resolved map file has zero entries then it's a mapless symbol
                                    maplessCount++;
                                    break;
                                }
                            }

                            var close  = decimal.Parse(csv[4]) / scaleFactor;
                            var volume = long.Parse(csv[5]);

                            var dollarVolume = close * volume;

                            var coarseFile = Path.Combine(coarseFolder, date.ToString("yyyyMMdd") + ".csv");
                            dates.Add(date);

                            // try to resolve a map file and if found, regen the sid
                            var sid     = SecurityIdentifier.GenerateEquity(SecurityIdentifier.DefaultDate, symbol, market);
                            var mapFile = mapFileResolver.ResolveMapFile(symbol, date);
                            if (!mapFile.IsNullOrEmpty())
                            {
                                // if available, us the permtick in the coarse files, because of this, we need
                                // to update the coarse files each time new map files are added/permticks change
                                sid = SecurityIdentifier.GenerateEquity(mapFile.FirstDate, mapFile.OrderBy(x => x.Date).First().MappedSymbol, market);
                            }
                            if (mapFile == null && ignoreMapless)
                            {
                                // if we're ignoring mapless files then we should always be able to resolve this
                                Log.Error(string.Format("CoarseGenerator.ProcessDailyFolder(): Unable to resolve map file for {0} as of {1}", symbol, date.ToShortDateString()));
                                continue;
                            }

                            // check if symbol has fine fundamental data for the current date
                            var hasFundamentalDataForDate = date >= firstFineSymbolDate;

                            // get price and split factors from factor files
                            var leanSymbol    = new Symbol(sid, symbol);
                            var factorFile    = factorFileProvider.Get(leanSymbol);
                            var factorFileRow = factorFile?.GetScalingFactors(date);
                            var priceFactor   = factorFileRow?.PriceFactor ?? 1m;
                            var splitFactor   = factorFileRow?.SplitFactor ?? 1m;

                            // sid,symbol,close,volume,dollar volume,has fundamental data,price factor,split factor
                            var coarseFileLine = $"{sid},{symbol},{close},{volume},{Math.Truncate(dollarVolume)},{hasFundamentalDataForDate},{priceFactor},{splitFactor}";

                            StreamWriter writer;
                            if (!writers.TryGetValue(coarseFile, out writer))
                            {
                                writer = new StreamWriter(new FileStream(coarseFile, FileMode.Create, FileAccess.Write, FileShare.Write));
                                writers[coarseFile] = writer;
                            }
                            writer.WriteLine(coarseFileLine);
                        }
                    }

                    if (symbols % 1000 == 0)
                    {
                        Log.Trace("CoarseGenerator.ProcessDailyFolder(): Completed processing {0} symbols. Current elapsed: {1} seconds", symbols, (DateTime.UtcNow - start).TotalSeconds.ToString("0.00"));
                    }
                }
                catch (Exception err)
                {
                    // log the error and continue with the process
                    Log.Error(err.ToString());
                }
            }

            Log.Trace("CoarseGenerator.ProcessDailyFolder(): Saving {0} coarse files to disk", dates.Count);

            // dispose all the writers at the end of processing
            foreach (var writer in writers)
            {
                writer.Value.Dispose();
            }

            var stop = DateTime.UtcNow;

            Log.Trace("CoarseGenerator.ProcessDailyFolder(): Processed {0} symbols into {1} coarse files in {2} seconds", symbols, dates.Count, (stop - start).TotalSeconds.ToString("0.00"));
            Log.Trace("CoarseGenerator.ProcessDailyFolder(): Excluded {0} mapless symbols.", maplessCount);

            return(writers.Keys);
        }
Ejemplo n.º 17
0
        /// <summary>
        /// Converts the data from raw format (*.nz.tar.gz) to json files consumable by LEAN
        /// </summary>
        /// <param name="processingDate">Date to process SEC filings for</param>
        public void Process(DateTime processingDate)
        {
            // Process data into dictionary of CIK -> List{T} of tickers
            foreach (var line in File.ReadLines(Path.Combine(RawSource, "cik-ticker-mappings.txt")))
            {
                var tickerCik = line.Split('\t');
                var ticker    = tickerCik[0];
                // tickerCik[0] = symbol, tickerCik[1] = CIK
                // Note that SEC tickers come in lowercase, so we don't have to alter the ticker
                var cikFormatted = tickerCik[1].PadLeft(10, '0');

                List <string> symbol;
                if (!CikTicker.TryGetValue(cikFormatted, out symbol))
                {
                    symbol = new List <string>();
                    CikTicker[cikFormatted] = symbol;
                }

                // SEC data list contains a null value in the ticker.txt file
                if (!string.IsNullOrWhiteSpace(ticker))
                {
                    symbol.Add(ticker);
                }
            }

            // Merge both data sources to a single CIK -> List{T} of tickers
            foreach (var line in File.ReadLines(Path.Combine(RawSource, "cik-ticker-mappings-rankandfile.txt")))
            {
                var tickerInfo = line.Split('|');

                var companyCik    = tickerInfo[0].PadLeft(10, '0');
                var companyTicker = tickerInfo[1].ToLowerInvariant();

                List <string> symbol;
                if (!CikTicker.TryGetValue(companyCik, out symbol))
                {
                    symbol = new List <string>();
                    CikTicker[companyCik] = symbol;
                }
                // Add null check just in case data comes malformed
                if (!symbol.Contains(companyTicker) && !string.IsNullOrWhiteSpace(companyTicker))
                {
                    symbol.Add(companyTicker);
                }
            }

            var formattedDate = processingDate.ToStringInvariant(DateFormat.EightCharacter);
            var remoteRawData = new FileInfo(Path.Combine(RawSource, $"{formattedDate}.nc.tar.gz"));

            if (!remoteRawData.Exists)
            {
                if (Holidays.Contains(processingDate) || USHoliday.Dates.Contains(processingDate))
                {
                    Log.Trace("SECDataConverter.Process(): File is missing, but we expected it to be missing. Nothing to do.");
                    return;
                }
                throw new Exception($"SECDataConverter.Process(): Raw data {remoteRawData} not found. No processing can be done.");
            }

            // Copy the raw data to a temp path on disk
            Log.Trace($"SECDataConverter.Process(): Copying raw data locally...");
            var tempPath     = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToStringInvariant(null));
            var localRawData = remoteRawData.CopyTo(tempPath);

            Log.Trace($"SECDataConverter.Process(): Copied raw data from {remoteRawData.FullName} - to: {tempPath}");

            Log.Trace($"SECDataConverter.Process(): Start processing...");

            var ncFilesRead      = 0;
            var startingTime     = DateTime.Now;
            var loopStartingTime = startingTime;

            // For the meantime, let's only process .nc files, and deal with correction files later.
            Parallel.ForEach(
                Compression.UnTar(localRawData.OpenRead(), isTarGz: true).Where(kvp => kvp.Key.EndsWith(".nc")),
                new ParallelOptions {
                MaxDegreeOfParallelism = Environment.ProcessorCount / 2
            },
                rawReportFilePath =>
            {
                var factory = new SECReportFactory();
                var xmlText = new StringBuilder();

                // We need to escape any nested XML to ensure our deserialization happens smoothly
                var parsingText = false;

                // SEC data is line separated by UNIX style line endings. No need to worry about a carriage line here.
                foreach (var line in Encoding.UTF8.GetString(rawReportFilePath.Value).Split('\n'))
                {
                    var newTextLine    = line;
                    var currentTagName = GetTagNameFromLine(newTextLine);

                    // This tag is present rarely in SEC reports, but is unclosed without value when encountered.
                    // Verified by searching with ripgrep for "CONFIRMING-COPY"
                    //
                    // Sometimes, ASSIGNED-SIC contains no value and causes errors. Check to make sure that when
                    // we encounter that tag we check if it has a value.
                    //
                    // "Appearance of the <FLAWED> tag  in
                    //  an EX-27  document header signals unreliable tagging within  the
                    //  following  document text stream; however, in  the absence  of a
                    //  <FLAWED>  tag, tagging is still not guaranteed to  be complete
                    //  because of  allowance in the financial data specifications  for
                    //  omitted tags when the submission also includes a financial  data
                    //  schedule  of article type CT."
                    if (currentTagName == "CONFIRMING-COPY" || (currentTagName == "ASSIGNED-SIC" && !HasValue(line)) || currentTagName == "FLAWED")
                    {
                        continue;
                    }

                    // Indicates that the form is a paper submission and that the current file has no contents
                    if (currentTagName == "PAPER")
                    {
                        continue;
                    }

                    // Don't encode the closing tag
                    if (currentTagName == "/TEXT")
                    {
                        parsingText = false;
                    }

                    // To ensure that we can serialize/deserialize data with hours, minutes, seconds
                    if (currentTagName == "FILING-DATE" || currentTagName == "PERIOD" ||
                        currentTagName == "DATE-OF-FILING-CHANGE" || currentTagName == "DATE-CHANGED")
                    {
                        newTextLine = $"{newTextLine.TrimEnd()} 00:00:00";
                    }

                    // Encode all contents inside tags to prevent errors in XML parsing.
                    // The json deserializer will convert these values back to their original form
                    if (!parsingText && HasValue(newTextLine))
                    {
                        newTextLine =
                            $"<{currentTagName}>{SecurityElement.Escape(GetTagValueFromLine(newTextLine))}</{currentTagName}>";
                    }
                    // Escape all contents inside TEXT tags
                    else if (parsingText)
                    {
                        newTextLine = SecurityElement.Escape(newTextLine);
                    }

                    // Don't encode the opening tag
                    if (currentTagName == "TEXT")
                    {
                        parsingText = true;
                    }

                    xmlText.AppendLine(newTextLine);
                }

                var counter = Interlocked.Increment(ref ncFilesRead);
                if (counter % 100 == 0)
                {
                    var interval = DateTime.Now - loopStartingTime;
                    Log.Trace($"SECDataConverter.Process(): {counter.ToStringInvariant()} nc files read at {(100 / interval.TotalMinutes).ToStringInvariant("N2")} files/min.");
                    loopStartingTime = DateTime.Now;
                }

                ISECReport report;
                try
                {
                    report = factory.CreateSECReport(xmlText.ToString());
                }
                // Ignore unsupported form types for now
                catch (DataException)
                {
                    return;
                }
                catch (XmlException e)
                {
                    Log.Error(e, $"SECDataConverter.Process(): Failed to parse XML from file: {rawReportFilePath.Key}");
                    return;
                }
                catch (Exception e)
                {
                    Log.Error(e, "SECDataConverter.Process(): Unknown error encountered");
                    return;
                }

                // First filer listed in SEC report is usually the company listed on stock exchanges
                var companyCik = report.Report.Filers.First().CompanyData.Cik;

                // Some companies can operate under two tickers, but have the same CIK.
                // Don't bother continuing if we don't find any tickers for the given CIK
                List <string> tickers;
                if (!CikTicker.TryGetValue(companyCik, out tickers))
                {
                    return;
                }

                if (!File.Exists(Path.Combine(RawSource, "indexes", $"{companyCik}.json")))
                {
                    Log.Error($"SECDataConverter.Process(): {report.Report.FilingDate.ToStringInvariant("yyyy-MM-dd")}:{rawReportFilePath.Key} - Failed to find index file for ticker {tickers.FirstOrDefault()} with CIK: {companyCik}");
                    return;
                }

                try
                {
                    // The index file can potentially be corrupted
                    GetPublicationDate(report, companyCik);
                }
                catch (Exception e)
                {
                    Log.Error(e, $"SECDataConverter.Process(): {report.Report.FilingDate.ToStringInvariant("yyyy-MM-dd")}:{rawReportFilePath.Key} - Index file loading failed for ticker: {tickers.FirstOrDefault()} with CIK: {companyCik} even though it exists");
                }

                // Default to company CIK if no known ticker is found.
                // If the equity is not does not resolve to a map file or
                // it is not found in the map files, we skip writing it.
                foreach (var ticker in tickers)
                {
                    var tickerMapFile = _mapFileResolver.ResolveMapFile(ticker, processingDate);
                    if (!tickerMapFile.Any())
                    {
                        Log.Trace($"SECDataConverter.Process(): {processingDate.ToStringInvariant()} - Failed to find map file for ticker: {ticker}");
                        continue;
                    }

                    // Map the current ticker to the ticker it was in the past using the map file system
                    var mappedTicker = tickerMapFile.GetMappedSymbol(processingDate);

                    // If no suitable date is found for the symbol in the map file, we skip writing the data
                    if (string.IsNullOrEmpty(mappedTicker))
                    {
                        Log.Trace($"SECDataConverter.Process(): {processingDate.ToStringInvariant()} - Failed to find mapped symbol for ticker: {ticker}");
                        continue;
                    }

                    var tickerReports = Reports.GetOrAdd(
                        mappedTicker,
                        _ => new ConcurrentDictionary <DateTime, List <ISECReport> >()
                        );
                    var reports = tickerReports.GetOrAdd(
                        report.Report.FilingDate.Date,
                        _ => new List <ISECReport>()
                        );

                    reports.Add(report);
                }
            }
                );

            Log.Trace($"SECDataConverter.Process(): {ncFilesRead} nc files read finished in {(DateTime.Now - startingTime).ToStringInvariant("g")}.");

            Parallel.ForEach(
                Reports.Keys,
                ticker =>
            {
                List <ISECReport> reports;
                if (!Reports[ticker].TryRemove(processingDate, out reports))
                {
                    return;
                }

                WriteReport(reports, ticker);
            }
                );

            // Delete the raw data we copied to the temp folder
            File.Delete(tempPath);
        }
Ejemplo n.º 18
0
        /// <summary>
        /// Iterates each daily file in the specified <paramref name="dailyFolder"/> and adds a line for each
        /// day to the approriate coarse file
        /// </summary>
        /// <param name="dailyFolder">The folder with daily data</param>
        /// <param name="coarseFolder">The coarse output folder</param>
        /// <param name="mapFileResolver"></param>
        /// <param name="exclusions">The symbols to be excluded from processing</param>
        /// <param name="ignoreMapless">Ignore the symbols without a map file.</param>
        /// <param name="symbolResolver">Function used to provide symbol resolution. Default resolution uses the zip file name to resolve
        /// the symbol, specify null for this behavior.</param>
        /// <returns>A collection of the generated coarse files</returns>
        public static ICollection <string> ProcessDailyFolder(string dailyFolder, string coarseFolder, MapFileResolver mapFileResolver, HashSet <string> exclusions, bool ignoreMapless, Func <string, string> symbolResolver = null)
        {
            const decimal scaleFactor = 10000m;

            Log.Trace("Processing: {0}", dailyFolder);

            var stopwatch = Stopwatch.StartNew();

            // load map files into memory

            var symbols      = 0;
            var maplessCount = 0;
            var dates        = new HashSet <DateTime>();

            // instead of opening/closing these constantly, open them once and dispose at the end (~3x speed improvement)
            var writers = new Dictionary <string, StreamWriter>();

            // open up each daily file to get the values and append to the daily coarse files
            foreach (var file in Directory.EnumerateFiles(dailyFolder))
            {
                try
                {
                    var symbol = Path.GetFileNameWithoutExtension(file);
                    if (symbol == null)
                    {
                        Log.Trace("CoarseGenerator.ProcessDailyFolder(): Unable to resolve symbol from file: {0}", file);
                        continue;
                    }

                    if (symbolResolver != null)
                    {
                        symbol = symbolResolver(symbol);
                    }

                    symbol = symbol.ToUpper();

                    if (exclusions.Contains(symbol))
                    {
                        Log.Trace("Excluded symbol: {0}", symbol);
                        continue;
                    }

                    ZipFile zip;
                    using (var reader = Compression.Unzip(file, out zip))
                    {
                        // 30 period EMA constant
                        const decimal k = 2m / (30 + 1);

                        var seeded = false;
                        var runningAverageVolume = 0m;

                        var checkedForMapFile = false;

                        symbols++;
                        string line;
                        while ((line = reader.ReadLine()) != null)
                        {
                            //20150625.csv
                            var csv  = line.Split(',');
                            var date = DateTime.ParseExact(csv[0], DateFormat.TwelveCharacter, CultureInfo.InvariantCulture);

                            if (ignoreMapless && !checkedForMapFile)
                            {
                                checkedForMapFile = true;
                                if (!mapFileResolver.ResolveMapFile(symbol, date).Any())
                                {
                                    // if the resolved map file has zero entries then it's a mapless symbol
                                    maplessCount++;
                                    break;
                                }
                            }

                            var close  = decimal.Parse(csv[4]) / scaleFactor;
                            var volume = long.Parse(csv[5]);

                            // compute the current volume EMA for dollar volume calculations
                            runningAverageVolume = seeded
                                ? volume * k + runningAverageVolume * (1 - k)
                                : volume;

                            seeded = true;

                            var dollarVolume = close * runningAverageVolume;

                            var coarseFile = Path.Combine(coarseFolder, date.ToString("yyyyMMdd") + ".csv");
                            dates.Add(date);

                            // try to resolve a map file and if found, use the permtick as the symbol
                            var sid     = symbol;
                            var mapFile = mapFileResolver.ResolveMapFile(sid, date);
                            if (mapFile != null)
                            {
                                // if available, us the permtick in the coarse files, because of this, we need
                                // to update the coarse files each time new map files are added/permticks change
                                sid = mapFile.Permtick;
                            }
                            if (mapFile == null && ignoreMapless)
                            {
                                // if we're ignoring mapless files then we should always be able to resolve this
                                Log.Error(string.Format("CoarseGenerator.ProcessDailyFolder(): Unable to resolve map file for {0} as of {1}", symbol, date.ToShortDateString()));
                                continue;
                            }

                            // sid,symbol,close,volume,dollar volume
                            var coarseFileLine = sid + "," + symbol + "," + close + "," + volume + "," + Math.Truncate(dollarVolume);

                            StreamWriter writer;
                            if (!writers.TryGetValue(coarseFile, out writer))
                            {
                                writer = new StreamWriter(new FileStream(coarseFile, FileMode.Append, FileAccess.Write, FileShare.Write));
                                writers[coarseFile] = writer;
                            }
                            writer.WriteLine(coarseFileLine);
                        }
                    }

                    if (symbols % 1000 == 0)
                    {
                        Log.Trace("CoarseGenerator.ProcessDailyFolder(): Completed processing {0} symbols. Current elapsed: {1} seconds", symbols, stopwatch.Elapsed.TotalSeconds.ToString("0.00"));
                    }
                }
                catch (Exception err)
                {
                    // log the error and continue with the process
                    Log.Error(err.ToString());
                }
            }

            Log.Trace("CoarseGenerator.ProcessDailyFolder(): Saving {0} coarse files to disk", dates.Count);

            // dispose all the writers at the end of processing
            foreach (var writer in writers)
            {
                writer.Value.Dispose();
            }

            stopwatch.Stop();

            Log.Trace("CoarseGenerator.ProcessDailyFolder(): Processed {0} symbols into {1} coarse files in {2} seconds", symbols, dates.Count, stopwatch.Elapsed.TotalSeconds.ToString("0.00"));
            Log.Trace("CoarseGenerator.ProcessDailyFolder(): Excluded {0} mapless symbols.", maplessCount);

            return(writers.Keys);
        }
Ejemplo n.º 19
0
        /// <summary>
        /// Iterates each daily file in the specified <paramref name="dailyFolder"/> and adds a line for each
        /// day to the approriate coarse file
        /// </summary>
        /// <param name="dailyFolder">The folder with daily data</param>
        /// <param name="coarseFolder">The coarse output folder</param>
        /// <param name="mapFileResolver"></param>
        /// <param name="exclusions">The symbols to be excluded from processing</param>
        /// <param name="ignoreMapless">Ignore the symbols without a map file.</param>
        /// <param name="symbolResolver">Function used to provide symbol resolution. Default resolution uses the zip file name to resolve
        /// the symbol, specify null for this behavior.</param>
        /// <returns>A collection of the generated coarse files</returns>
        public static ICollection<string> ProcessDailyFolder(string dailyFolder, string coarseFolder, MapFileResolver mapFileResolver, HashSet<string> exclusions, bool ignoreMapless, Func<string, string> symbolResolver = null)
        {
            const decimal scaleFactor = 10000m;

            Log.Trace("Processing: {0}", dailyFolder);

            var stopwatch = Stopwatch.StartNew();

            // load map files into memory

            var symbols = 0;
            var maplessCount = 0;
            var dates = new HashSet<DateTime>();

            // instead of opening/closing these constantly, open them once and dispose at the end (~3x speed improvement)
            var writers = new Dictionary<string, StreamWriter>();

            // open up each daily file to get the values and append to the daily coarse files
            foreach (var file in Directory.EnumerateFiles(dailyFolder))
            {
                try
                {
                    var symbol = Path.GetFileNameWithoutExtension(file);
                    if (symbol == null)
                    {
                        Log.Trace("CoarseGenerator.ProcessDailyFolder(): Unable to resolve symbol from file: {0}", file);
                        continue;
                    }

                    if (symbolResolver != null)
                    {
                        symbol = symbolResolver(symbol);
                    }

                    symbol = symbol.ToUpper();

                    if (exclusions.Contains(symbol))
                    {
                        Log.Trace("Excluded symbol: {0}", symbol);
                        continue;
                    }

                    ZipFile zip;
                    using (var reader = Compression.Unzip(file, out zip))
                    {
                        // 30 period EMA constant
                        const decimal k = 2m / (30 + 1);

                        var seeded = false;
                        var runningAverageVolume = 0m;

                        var checkedForMapFile = false;

                        symbols++;
                        string line;
                        while ((line = reader.ReadLine()) != null)
                        {
                            //20150625.csv
                            var csv = line.Split(',');
                            var date = DateTime.ParseExact(csv[0], DateFormat.TwelveCharacter, CultureInfo.InvariantCulture);

                            if (ignoreMapless && !checkedForMapFile)
                            {
                                checkedForMapFile = true;
                                if (!mapFileResolver.ResolveMapFile(symbol, date).Any())
                                {
                                    // if the resolved map file has zero entries then it's a mapless symbol
                                    maplessCount++;
                                    break;
                                }
                            }

                            var close = decimal.Parse(csv[4])/scaleFactor;
                            var volume = long.Parse(csv[5]);

                            // compute the current volume EMA for dollar volume calculations
                            runningAverageVolume = seeded
                                ? volume*k + runningAverageVolume*(1 - k)
                                : volume;

                            seeded = true;

                            var dollarVolume = close*runningAverageVolume;

                            var coarseFile = Path.Combine(coarseFolder, date.ToString("yyyyMMdd") + ".csv");
                            dates.Add(date);

                            // try to resolve a map file and if found, use the permtick as the symbol
                            var sid = symbol;
                            var mapFile = mapFileResolver.ResolveMapFile(sid, date);
                            if (mapFile != null)
                            {
                                // if available, us the permtick in the coarse files, because of this, we need
                                // to update the coarse files each time new map files are added/permticks change
                                sid = mapFile.Permtick;
                            }
                            if (mapFile == null && ignoreMapless)
                            {
                                // if we're ignoring mapless files then we should always be able to resolve this
                                Log.Error(string.Format("CoarseGenerator.ProcessDailyFolder(): Unable to resolve map file for {0} as of {1}", symbol, date.ToShortDateString()));
                                continue;
                            }

                            // sid,symbol,close,volume,dollar volume
                            var coarseFileLine = sid + "," + symbol + "," + close + "," + volume + "," + Math.Truncate(dollarVolume);

                            StreamWriter writer;
                            if (!writers.TryGetValue(coarseFile, out writer))
                            {
                                writer = new StreamWriter(new FileStream(coarseFile, FileMode.Append, FileAccess.Write, FileShare.Write));
                                writers[coarseFile] = writer;
                            }
                            writer.WriteLine(coarseFileLine);
                        }
                    }

                    if (symbols%1000 == 0)
                    {
                        Log.Trace("CoarseGenerator.ProcessDailyFolder(): Completed processing {0} symbols. Current elapsed: {1} seconds", symbols, stopwatch.Elapsed.TotalSeconds.ToString("0.00"));
                    }
                }
                catch (Exception err)
                {
                    // log the error and continue with the process
                    Log.Error(err.ToString());
                }
            }

            Log.Trace("CoarseGenerator.ProcessDailyFolder(): Saving {0} coarse files to disk", dates.Count);

            // dispose all the writers at the end of processing
            foreach (var writer in writers)
            {
                writer.Value.Dispose();
            }

            stopwatch.Stop();

            Log.Trace("CoarseGenerator.ProcessDailyFolder(): Processed {0} symbols into {1} coarse files in {2} seconds", symbols, dates.Count, stopwatch.Elapsed.TotalSeconds.ToString("0.00"));
            Log.Trace("CoarseGenerator.ProcessDailyFolder(): Excluded {0} mapless symbols.", maplessCount);

            return writers.Keys;
        }
        /// <summary>
        /// Runs the instance of the object.
        /// </summary>
        /// <returns>True if process all downloads successfully</returns>
        public override bool Run()
        {
            var stopwatch = Stopwatch.StartNew();

            try
            {
                var companies      = GetCompanies().Result.DistinctBy(x => x.Ticker).ToList();
                var count          = companies.Count;
                var currentPercent = 0.05;
                var percent        = 0.05;
                var i = 0;

                Log.Trace($"EstimizeReleaseDataDownloader.Run(): Start processing {count} companies");

                var tasks = new List <Task>();

                foreach (var company in companies)
                {
                    // Makes sure we don't overrun Estimize rate limits accidentally
                    IndexGate.WaitToProceed();

                    // Include tickers that are "defunct".
                    // Remove the tag because it cannot be part of the API endpoint.
                    // This is separate from the NormalizeTicker(...) method since
                    // we don't convert tickers with `-`s into the format we can successfully
                    // index mapfiles with.
                    var    estimizeTicker = company.Ticker;
                    string ticker;

                    if (!TryNormalizeDefunctTicker(estimizeTicker, out ticker))
                    {
                        Log.Error($"EstimizeReleaseDataDownloader(): Defunct ticker {estimizeTicker} is unable to be parsed. Continuing...");
                        continue;
                    }

                    // Begin processing ticker with a normalized value
                    Log.Trace($"EstimizeReleaseDataDownloader.Run(): Processing {ticker}");

                    tasks.Add(
                        HttpRequester($"/companies/{ticker}/releases")
                        .ContinueWith(
                            y =>
                    {
                        i++;

                        if (y.IsFaulted)
                        {
                            Log.Error($"EstimizeReleaseDataDownloader.Run(): Failed to get data for {company}");
                            return;
                        }

                        var result = y.Result;
                        if (string.IsNullOrEmpty(result))
                        {
                            // We've already logged inside HttpRequester
                            return;
                        }

                        // Just like TradingEconomics, we only want the events that already occured
                        // instead of having "forecasts" that will change in the future taint our
                        // data and make backtests non-deterministic. We want to have
                        // consistency with our data in live trading historical requests as well
                        var releases = JsonConvert.DeserializeObject <List <EstimizeRelease> >(result, JsonSerializerSettings)
                                       .Where(x => x.Eps != null)
                                       .GroupBy(x =>
                        {
                            var normalizedTicker = NormalizeTicker(ticker);
                            var releaseDate      = x.ReleaseDate;

                            try
                            {
                                var mapFile   = _mapFileResolver.ResolveMapFile(normalizedTicker, releaseDate);
                                var oldTicker = normalizedTicker;
                                var newTicker = normalizedTicker;

                                // Ensure we're writing to the correct historical ticker
                                if (!mapFile.Any())
                                {
                                    Log.Trace($"EstimizeReleaseDataDownloader.Run(): Failed to find map file for: {newTicker} - on: {releaseDate}");
                                    return(string.Empty);
                                }

                                newTicker = mapFile.GetMappedSymbol(releaseDate);
                                if (string.IsNullOrWhiteSpace(newTicker))
                                {
                                    Log.Trace($"EstimizeReleaseDataDownloader.Run(): Failed to find mapping for null new ticker. Old ticker: {oldTicker} - on: {releaseDate}");
                                    return(string.Empty);
                                }

                                if (oldTicker != newTicker)
                                {
                                    Log.Trace($"EstimizeReleaseDataDownloader.Run(): Remapped from {oldTicker} to {newTicker} for {releaseDate}");
                                }

                                return(newTicker);
                            }
                            // We get a failure inside the map file constructor rarely. It tries
                            // to access the last element of an empty list. Maybe this is a bug?
                            catch (InvalidOperationException e)
                            {
                                Log.Error(e, $"EstimizeReleaseDataDownloader.Run(): Failed to load map file for: {normalizedTicker} - on: {releaseDate}");
                                return(string.Empty);
                            }
                        })
                                       .Where(x => !string.IsNullOrEmpty(x.Key));

                        foreach (var kvp in releases)
                        {
                            var csvContents = kvp.Select(x => $"{x.ReleaseDate.ToUniversalTime():yyyyMMdd HH:mm:ss},{x.Id},{x.FiscalYear},{x.FiscalQuarter},{x.Eps},{x.Revenue},{x.ConsensusEpsEstimate},{x.ConsensusRevenueEstimate},{x.WallStreetEpsEstimate},{x.WallStreetRevenueEstimate},{x.ConsensusWeightedEpsEstimate},{x.ConsensusWeightedRevenueEstimate}");
                            SaveContentToFile(_destinationFolder, kvp.Key, csvContents);
                        }

                        var percentDone = i / count;
                        if (percentDone >= currentPercent)
                        {
                            Log.Trace($"EstimizeEstimateDataDownloader.Run(): {percentDone:P2} complete");
                            currentPercent += percent;
                        }
                    }
                            )
                        );
                }

                Task.WaitAll(tasks.ToArray());
            }
            catch (Exception e)
            {
                Log.Error(e);
                return(false);
            }

            Log.Trace($"EstimizeReleaseDataDownloader.Run(): Finished in {stopwatch.Elapsed}");
            return(true);
        }
Ejemplo n.º 21
0
        /// <summary>
        /// Subscription data reader takes a subscription request, loads the type, accepts the data source and enumerate on the results.
        /// </summary>
        /// <param name="config">Subscription configuration object</param>
        /// <param name="periodStart">Start date for the data request/backtest</param>
        /// <param name="periodFinish">Finish date for the data request/backtest</param>
        /// <param name="resultHandler">Result handler used to push error messages and perform sampling on skipped days</param>
        /// <param name="mapFileResolver">Used for resolving the correct map files</param>
        /// <param name="factorFileProvider">Used for getting factor files</param>
        /// <param name="dataProvider">Used for getting files not present on disk</param>
        /// <param name="dataCacheProvider">Used for caching files</param>
        /// <param name="tradeableDates">Defines the dates for which we'll request data, in order, in the security's exchange time zone</param>
        /// <param name="isLiveMode">True if we're in live mode, false otherwise</param>
        /// <param name="includeAuxilliaryData">True if we want to emit aux data, false to only emit price data</param>
        public SubscriptionDataReader(SubscriptionDataConfig config,
                                      DateTime periodStart,
                                      DateTime periodFinish,
                                      IResultHandler resultHandler,
                                      MapFileResolver mapFileResolver,
                                      IFactorFileProvider factorFileProvider,
                                      IDataProvider dataProvider,
                                      IEnumerable <DateTime> tradeableDates,
                                      bool isLiveMode,
                                      IDataCacheProvider dataCacheProvider,
                                      bool includeAuxilliaryData = true)
        {
            //Save configuration of data-subscription:
            _config = config;

            _auxiliaryData = new Queue <BaseData>();

            //Save Start and End Dates:
            _periodStart       = periodStart;
            _periodFinish      = periodFinish;
            _dataProvider      = dataProvider;
            _dataCacheProvider = dataCacheProvider;

            //Save access to securities
            _isLiveMode            = isLiveMode;
            _includeAuxilliaryData = includeAuxilliaryData;

            //Save the type of data we'll be getting from the source.

            //Create the dynamic type-activators:
            var objectActivator = ObjectActivator.GetActivator(config.Type);

            _resultHandler  = resultHandler;
            _tradeableDates = tradeableDates.GetEnumerator();
            if (objectActivator == null)
            {
                _resultHandler.ErrorMessage("Custom data type '" + config.Type.Name + "' missing parameterless constructor E.g. public " + config.Type.Name + "() { }");
                _endOfStream = true;
                return;
            }

            //Create an instance of the "Type":
            var userObj = objectActivator.Invoke(new object[] { config.Type });

            _dataFactory = userObj as BaseData;

            //If its quandl set the access token in data factory:
            var quandl = _dataFactory as Quandl;

            if (quandl != null)
            {
                if (!Quandl.IsAuthCodeSet)
                {
                    Quandl.SetAuthCode(Config.Get("quandl-auth-token"));
                }
            }

            // If Tiingo data, set the access token in data factory
            var tiingo = _dataFactory as TiingoDailyData;

            if (tiingo != null)
            {
                if (!Tiingo.IsAuthCodeSet)
                {
                    Tiingo.SetAuthCode(Config.Get("tiingo-auth-token"));
                }
            }

            _factorFile = new FactorFile(config.Symbol.Value, new List <FactorFileRow>());
            _mapFile    = new MapFile(config.Symbol.Value, new List <MapFileRow>());

            // load up the map and factor files for equities
            if (!config.IsCustomData && config.SecurityType == SecurityType.Equity)
            {
                try
                {
                    var mapFile = mapFileResolver.ResolveMapFile(config.Symbol.ID.Symbol, config.Symbol.ID.Date);

                    // only take the resolved map file if it has data, otherwise we'll use the empty one we defined above
                    if (mapFile.Any())
                    {
                        _mapFile = mapFile;
                    }

                    var factorFile = factorFileProvider.Get(_config.Symbol);
                    _hasScaleFactors = factorFile != null;
                    if (_hasScaleFactors)
                    {
                        _factorFile = factorFile;

                        // if factor file has minimum date, update start period if before minimum date
                        if (!_isLiveMode && _factorFile != null && _factorFile.FactorFileMinimumDate.HasValue)
                        {
                            if (_periodStart < _factorFile.FactorFileMinimumDate.Value)
                            {
                                _periodStart = _factorFile.FactorFileMinimumDate.Value;

                                _resultHandler.DebugMessage(
                                    string.Format("Data for symbol {0} has been limited due to numerical precision issues in the factor file. The starting date has been set to {1}.",
                                                  config.Symbol.Value,
                                                  _factorFile.FactorFileMinimumDate.Value.ToShortDateString()));
                            }
                        }
                    }
                }
                catch (Exception err)
                {
                    Log.Error(err, "Fetching Price/Map Factors: " + config.Symbol.ID + ": ");
                }
            }

            // load up the map and factor files for underlying of equity option
            if (!config.IsCustomData && config.SecurityType == SecurityType.Option)
            {
                try
                {
                    var mapFile = mapFileResolver.ResolveMapFile(config.Symbol.Underlying.ID.Symbol, config.Symbol.Underlying.ID.Date);

                    // only take the resolved map file if it has data, otherwise we'll use the empty one we defined above
                    if (mapFile.Any())
                    {
                        _mapFile = mapFile;
                    }
                }
                catch (Exception err)
                {
                    Log.Error(err, "Map Factors: " + config.Symbol.ID + ": ");
                }
            }

            // Estimate delisting date.
            switch (_config.Symbol.ID.SecurityType)
            {
            case SecurityType.Future:
                _delistingDate = _config.Symbol.ID.Date;
                break;

            case SecurityType.Option:
                _delistingDate = OptionSymbol.GetLastDayOfTrading(_config.Symbol);
                break;

            default:
                _delistingDate = _mapFile.DelistingDate;
                break;
            }
            _subscriptionFactoryEnumerator = ResolveDataEnumerator(true);
        }
Ejemplo n.º 22
0
        /// <summary>
        /// Initializes the <see cref="SubscriptionDataReader"/> instance
        /// </summary>
        /// <remarks>Should be called after all consumers of <see cref="NewTradableDate"/> event are set,
        /// since it will produce events.</remarks>
        public void Initialize()
        {
            if (_initialized)
            {
                return;
            }

            //Save the type of data we'll be getting from the source.
            try
            {
                _dataFactory = _config.GetBaseDataInstance();
            }
            catch (ArgumentException exception)
            {
                OnInvalidConfigurationDetected(new InvalidConfigurationDetectedEventArgs(_config.Symbol, exception.Message));
                _endOfStream = true;
                return;
            }

            //If its quandl set the access token in data factory:
            var quandl = _dataFactory as Quandl;

            if (quandl != null)
            {
                if (!Quandl.IsAuthCodeSet)
                {
                    Quandl.SetAuthCode(Config.Get("quandl-auth-token"));
                }
            }

            // If Tiingo data, set the access token in data factory
            var tiingo = _dataFactory as TiingoPrice;

            if (tiingo != null)
            {
                if (!Tiingo.IsAuthCodeSet)
                {
                    Tiingo.SetAuthCode(Config.Get("tiingo-auth-token"));
                }
            }

            // If USEnergyAPI data, set the access token in data factory
            var energyInformation = _dataFactory as USEnergyAPI;

            if (energyInformation != null)
            {
                if (!USEnergyAPI.IsAuthCodeSet)
                {
                    USEnergyAPI.SetAuthCode(Config.Get("us-energy-information-auth-token"));
                }
            }

            // If Fred data, set the access token in data factory
            var fred = _dataFactory as FredApi;

            if (fred != null)
            {
                if (!FredApi.IsAuthCodeSet)
                {
                    FredApi.SetAuthCode(Config.Get("fred-auth-token"));
                }
            }

            _factorFile = new FactorFile(_config.Symbol.Value, new List <FactorFileRow>());
            _mapFile    = new MapFile(_config.Symbol.Value, new List <MapFileRow>());

            // load up the map files for equities, options, and custom data if it supports it.
            // Only load up factor files for equities
            if (_dataFactory.RequiresMapping())
            {
                try
                {
                    var mapFile = _mapFileResolver.ResolveMapFile(_config.Symbol, _config.Type);

                    // only take the resolved map file if it has data, otherwise we'll use the empty one we defined above
                    if (mapFile.Any())
                    {
                        _mapFile = mapFile;
                    }

                    if (!_config.IsCustomData && !_config.SecurityType.IsOption())
                    {
                        var factorFile = _factorFileProvider.Get(_config.Symbol);
                        _hasScaleFactors = factorFile != null;
                        if (_hasScaleFactors)
                        {
                            _factorFile = factorFile;

                            // if factor file has minimum date, update start period if before minimum date
                            if (!_isLiveMode && _factorFile != null && _factorFile.FactorFileMinimumDate.HasValue)
                            {
                                if (_periodStart < _factorFile.FactorFileMinimumDate.Value)
                                {
                                    _periodStart = _factorFile.FactorFileMinimumDate.Value;

                                    OnNumericalPrecisionLimited(
                                        new NumericalPrecisionLimitedEventArgs(_config.Symbol,
                                                                               $"[{_config.Symbol.Value}, {_factorFile.FactorFileMinimumDate.Value.ToShortDateString()}]"));
                                }
                            }
                        }

                        if (_periodStart < mapFile.FirstDate)
                        {
                            _periodStart = mapFile.FirstDate;

                            OnStartDateLimited(
                                new StartDateLimitedEventArgs(_config.Symbol,
                                                              $"[{_config.Symbol.Value}," +
                                                              $" {mapFile.FirstDate.ToString("yyyy-MM-dd", CultureInfo.InvariantCulture)}]"));
                        }
                    }
                }
                catch (Exception err)
                {
                    Log.Error(err, "Fetching Price/Map Factors: " + _config.Symbol.ID + ": ");
                }
            }

            _delistingDate = _config.Symbol.GetDelistingDate(_mapFile);

            // adding a day so we stop at EOD
            _delistingDate = _delistingDate.AddDays(1);

            UpdateDataEnumerator(true);

            _initialized = true;
        }
Ejemplo n.º 23
0
        /// <summary>
        /// Runs the instance of the object.
        /// </summary>
        /// <returns>True if process all downloads successfully</returns>
        public override bool Run()
        {
            var stopwatch = Stopwatch.StartNew();

            try
            {
                var companies      = GetCompanies().Result.DistinctBy(x => x.Ticker).ToList();
                var count          = companies.Count;
                var currentPercent = 0.05;
                var percent        = 0.05;
                var i = 0;

                Log.Trace($"EstimizeEstimateDataDownloader.Run(): Start processing {count} companies");

                var tasks = new List <Task>();

                foreach (var company in companies)
                {
                    var ticker = company.Ticker;

                    // Include tickers that are "defunct".
                    // Remove the tag because it cannot be part of the API endpoint
                    if (ticker.IndexOf("defunct", StringComparison.OrdinalIgnoreCase) > 0)
                    {
                        var length = ticker.IndexOf('-');
                        ticker = ticker.Substring(0, length).Trim();
                    }

                    Log.Trace($"EstimizeEstimateDataDownloader.Run(): Processing {ticker}");

                    try
                    {
                        // Makes sure we don't overrun Estimize rate limits accidentally
                        IndexGate.WaitToProceed();
                    }
                    // This is super super rare, but it failures in RateGate (RG) can still happen nonetheless. Let's not
                    // rely on RG operating successfully all the time so that if RG fails, our download process doesn't fail
                    catch (ArgumentOutOfRangeException e)
                    {
                        Log.Error(e, $"EstimizeEstimateDataDownloader.Run(): RateGate failed. Sleeping for 110 milliseconds with Thread.Sleep()");
                        Thread.Sleep(110);
                    }

                    tasks.Add(
                        HttpRequester($"/companies/{ticker}/estimates")
                        .ContinueWith(
                            y =>
                    {
                        i++;

                        if (y.IsFaulted)
                        {
                            Log.Error($"EstimizeEstimateDataDownloader.Run(): Failed to get data for {company}");
                            return;
                        }

                        var result = y.Result;
                        if (string.IsNullOrEmpty(result))
                        {
                            // We've already logged inside HttpRequester
                            return;
                        }

                        var estimates = JsonConvert.DeserializeObject <List <EstimizeEstimate> >(result)
                                        .GroupBy(estimate =>
                        {
                            var oldTicker = ticker;
                            var newTicker = ticker;
                            var createdAt = estimate.CreatedAt;

                            try
                            {
                                var mapFile = _mapFileResolver.ResolveMapFile(ticker, createdAt);

                                // Ensure we're writing to the correct historical ticker
                                if (!mapFile.Any())
                                {
                                    Log.Trace($"EstimizeEstimateDataDownloader.Run(): Failed to find map file for: {newTicker} - on: {createdAt}");
                                    return(string.Empty);
                                }

                                newTicker = mapFile.GetMappedSymbol(createdAt);
                                if (string.IsNullOrWhiteSpace(newTicker))
                                {
                                    Log.Trace($"EstimizeEstimateDataDownloader.Run(): New ticker is null. Old ticker: {oldTicker} - on: {createdAt}");
                                    return(string.Empty);
                                }

                                if (oldTicker != newTicker)
                                {
                                    Log.Trace($"EstimizeEstimateDataDonwloader.Run(): Remapping {oldTicker} to {newTicker}");
                                }
                            }
                            // We get a failure inside the map file constructor rarely. It tries
                            // to access the last element of an empty list. Maybe this is a bug?
                            catch (InvalidOperationException e)
                            {
                                Log.Error(e, $"EstimizeEstimateDataDownloader.Run(): Failed to load map file for: {oldTicker} - on {createdAt}");
                                return(string.Empty);
                            }

                            return(newTicker);
                        })
                                        .Where(kvp => !string.IsNullOrEmpty(kvp.Key));

                        foreach (var kvp in estimates)
                        {
                            var csvContents = kvp.Select(x => $"{x.CreatedAt.ToUniversalTime():yyyyMMdd HH:mm:ss},{x.Id},{x.AnalystId},{x.UserName},{x.FiscalYear},{x.FiscalQuarter},{x.Eps},{x.Revenue},{x.Flagged.ToString().ToLower()}");
                            SaveContentToFile(_destinationFolder, kvp.Key, csvContents);
                        }

                        var percentageDone = i / count;
                        if (percentageDone >= currentPercent)
                        {
                            Log.Trace($"EstimizeEstimateDataDownloader.Run(): {percentageDone:P2} complete");
                            currentPercent += percent;
                        }
                    }
                            )
                        );
                }

                Task.WaitAll(tasks.ToArray());
            }
            catch (Exception e)
            {
                Log.Error(e);
                return(false);
            }

            Log.Trace($"EstimizeEstimateDataDownloader.Run(): Finished in {stopwatch.Elapsed}");
            return(true);
        }
Ejemplo n.º 24
0
        /// <summary>
        /// Subscription data reader takes a subscription request, loads the type, accepts the data source and enumerate on the results.
        /// </summary>
        /// <param name="config">Subscription configuration object</param>
        /// <param name="periodStart">Start date for the data request/backtest</param>
        /// <param name="periodFinish">Finish date for the data request/backtest</param>
        /// <param name="resultHandler">Result handler used to push error messages and perform sampling on skipped days</param>
        /// <param name="mapFileResolver">Used for resolving the correct map files</param>
        /// <param name="factorFileProvider">Used for getting factor files</param>
        /// <param name="tradeableDates">Defines the dates for which we'll request data, in order, in the security's exchange time zone</param>
        /// <param name="isLiveMode">True if we're in live mode, false otherwise</param>
        /// <param name="includeAuxilliaryData">True if we want to emit aux data, false to only emit price data</param>
        public SubscriptionDataReader(SubscriptionDataConfig config,
                                      DateTime periodStart,
                                      DateTime periodFinish,
                                      IResultHandler resultHandler,
                                      MapFileResolver mapFileResolver,
                                      IFactorFileProvider factorFileProvider,
                                      IEnumerable <DateTime> tradeableDates,
                                      bool isLiveMode,
                                      bool includeAuxilliaryData = true)
        {
            //Save configuration of data-subscription:
            _config = config;

            _auxiliaryData = new Queue <BaseData>();

            //Save Start and End Dates:
            _periodStart  = periodStart;
            _periodFinish = periodFinish;

            //Save access to securities
            _isLiveMode            = isLiveMode;
            _includeAuxilliaryData = includeAuxilliaryData;

            //Save the type of data we'll be getting from the source.

            //Create the dynamic type-activators:
            var objectActivator = ObjectActivator.GetActivator(config.Type);

            _resultHandler  = resultHandler;
            _tradeableDates = tradeableDates.GetEnumerator();
            if (objectActivator == null)
            {
                _resultHandler.ErrorMessage("Custom data type '" + config.Type.Name + "' missing parameterless constructor E.g. public " + config.Type.Name + "() { }");
                _endOfStream = true;
                return;
            }

            //Create an instance of the "Type":
            var userObj = objectActivator.Invoke(new object[] {});

            _dataFactory = userObj as BaseData;

            //If its quandl set the access token in data factory:
            var quandl = _dataFactory as Quandl;

            if (quandl != null)
            {
                if (!Quandl.IsAuthCodeSet)
                {
                    Quandl.SetAuthCode(Config.Get("quandl-auth-token"));
                }
            }

            _factorFile = new FactorFile(config.Symbol.Value, new List <FactorFileRow>());
            _mapFile    = new MapFile(config.Symbol.Value, new List <MapFileRow>());

            // load up the map and factor files for equities
            if (!config.IsCustomData && config.SecurityType == SecurityType.Equity)
            {
                try
                {
                    var mapFile = mapFileResolver.ResolveMapFile(config.Symbol.ID.Symbol, config.Symbol.ID.Date);

                    // only take the resolved map file if it has data, otherwise we'll use the empty one we defined above
                    if (mapFile.Any())
                    {
                        _mapFile = mapFile;
                    }

                    var factorFile = factorFileProvider.Get(_config.Symbol);
                    _hasScaleFactors = factorFile != null;
                    if (_hasScaleFactors)
                    {
                        _factorFile = factorFile;
                    }
                }
                catch (Exception err)
                {
                    Log.Error(err, "Fetching Price/Map Factors: " + config.Symbol.ID + ": ");
                }
            }

            _subscriptionFactoryEnumerator = ResolveDataEnumerator(true);
        }
Ejemplo n.º 25
0
        /// <summary>
        /// Iterates each daily file in the specified <paramref name="dailyFolder"/> and adds a line for each
        /// day to the approriate coarse file
        /// </summary>
        /// <param name="dailyFolder">The folder with daily data</param>
        /// <param name="coarseFolder">The coarse output folder</param>
        /// <param name="mapFileResolver"></param>
        /// <param name="exclusions">The symbols to be excluded from processing</param>
        /// <param name="ignoreMapless">Ignore the symbols without a map file.</param>
        /// <param name="startDate">The starting date for processing</param>
        /// <param name="symbolResolver">Function used to provide symbol resolution. Default resolution uses the zip file name to resolve
        /// the symbol, specify null for this behavior.</param>
        /// <returns>A collection of the generated coarse files</returns>
        public static ICollection<string> ProcessDailyFolder(string dailyFolder, string coarseFolder, MapFileResolver mapFileResolver, HashSet<string> exclusions, bool ignoreMapless, DateTime startDate, Func<string, string> symbolResolver = null)
        {
            const decimal scaleFactor = 10000m;

            Log.Trace("Processing: {0}", dailyFolder);

            var start = DateTime.UtcNow;

            // load map files into memory

            var symbols = 0;
            var maplessCount = 0;
            var dates = new HashSet<DateTime>();

            // instead of opening/closing these constantly, open them once and dispose at the end (~3x speed improvement)
            var writers = new Dictionary<string, StreamWriter>();

            var dailyFolderDirectoryInfo = new DirectoryInfo(dailyFolder).Parent;
            if (dailyFolderDirectoryInfo == null)
            {
                throw new Exception("Unable to resolve market for daily folder: " + dailyFolder);
            }
            var market = dailyFolderDirectoryInfo.Name.ToLower();

            var fundamentalDirectoryInfo = new DirectoryInfo(coarseFolder).Parent;
            if (fundamentalDirectoryInfo == null)
            {
                throw new Exception("Unable to resolve fundamental path for coarse folder: " + coarseFolder);
            }
            var fineFundamentalFolder = Path.Combine(fundamentalDirectoryInfo.FullName, "fine");

            // open up each daily file to get the values and append to the daily coarse files
            foreach (var file in Directory.EnumerateFiles(dailyFolder))
            {
                try
                {
                    var symbol = Path.GetFileNameWithoutExtension(file);
                    if (symbol == null)
                    {
                        Log.Trace("CoarseGenerator.ProcessDailyFolder(): Unable to resolve symbol from file: {0}", file);
                        continue;
                    }

                    if (symbolResolver != null)
                    {
                        symbol = symbolResolver(symbol);
                    }

                    symbol = symbol.ToUpper();

                    if (exclusions.Contains(symbol))
                    {
                        Log.Trace("Excluded symbol: {0}", symbol);
                        continue;
                    }

                    // check if symbol has any fine fundamental data
                    var firstFineSymbolDate = DateTime.MaxValue;
                    if (Directory.Exists(fineFundamentalFolder))
                    {
                        var fineSymbolFolder = Path.Combine(fineFundamentalFolder, symbol.ToLower());

                        var firstFineSymbolFileName = Directory.Exists(fineSymbolFolder) ? Directory.GetFiles(fineSymbolFolder).OrderBy(x => x).FirstOrDefault() : string.Empty;
                        if (firstFineSymbolFileName.Length > 0)
                        {
                            firstFineSymbolDate = DateTime.ParseExact(Path.GetFileNameWithoutExtension(firstFineSymbolFileName), "yyyyMMdd", CultureInfo.InvariantCulture);
                        }
                    }

                    ZipFile zip;
                    using (var reader = Compression.Unzip(file, out zip))
                    {
                        // 30 period EMA constant
                        const decimal k = 2m / (30 + 1);

                        var seeded = false;
                        var runningAverageVolume = 0m;

                        var checkedForMapFile = false;

                        symbols++;
                        string line;
                        while ((line = reader.ReadLine()) != null)
                        {
                            //20150625.csv
                            var csv = line.Split(',');
                            var date = DateTime.ParseExact(csv[0], DateFormat.TwelveCharacter, CultureInfo.InvariantCulture);
                            
                            // spin past old data
                            if (date < startDate) continue;

                            if (ignoreMapless && !checkedForMapFile)
                            {
                                checkedForMapFile = true;
                                if (!mapFileResolver.ResolveMapFile(symbol, date).Any())
                                {
                                    // if the resolved map file has zero entries then it's a mapless symbol
                                    maplessCount++;
                                    break;
                                }
                            }

                            var close = decimal.Parse(csv[4])/scaleFactor;
                            var volume = long.Parse(csv[5]);

                            // compute the current volume EMA for dollar volume calculations
                            runningAverageVolume = seeded
                                ? volume*k + runningAverageVolume*(1 - k)
                                : volume;

                            seeded = true;

                            var dollarVolume = close * runningAverageVolume;

                            var coarseFile = Path.Combine(coarseFolder, date.ToString("yyyyMMdd") + ".csv");
                            dates.Add(date);

                            // try to resolve a map file and if found, regen the sid
                            var sid = SecurityIdentifier.GenerateEquity(SecurityIdentifier.DefaultDate, symbol, market);
                            var mapFile = mapFileResolver.ResolveMapFile(symbol, date);
                            if (!mapFile.IsNullOrEmpty())
                            {
                                // if available, us the permtick in the coarse files, because of this, we need
                                // to update the coarse files each time new map files are added/permticks change
                                sid = SecurityIdentifier.GenerateEquity(mapFile.FirstDate, mapFile.OrderBy(x => x.Date).First().MappedSymbol, market);
                            }
                            if (mapFile == null && ignoreMapless)
                            {
                                // if we're ignoring mapless files then we should always be able to resolve this
                                Log.Error(string.Format("CoarseGenerator.ProcessDailyFolder(): Unable to resolve map file for {0} as of {1}", symbol, date.ToShortDateString()));
                                continue;
                            }

                            // check if symbol has fine fundamental data for the current date
                            var hasFundamentalDataForDate = date >= firstFineSymbolDate;

                            // sid,symbol,close,volume,dollar volume,has fundamental data
                            var coarseFileLine = sid + "," + symbol + "," + close + "," + volume + "," + Math.Truncate(dollarVolume) + "," + hasFundamentalDataForDate;

                            StreamWriter writer;
                            if (!writers.TryGetValue(coarseFile, out writer))
                            {
                                writer = new StreamWriter(new FileStream(coarseFile, FileMode.Create, FileAccess.Write, FileShare.Write));
                                writers[coarseFile] = writer;
                            }
                            writer.WriteLine(coarseFileLine);
                        }
                    }

                    if (symbols%1000 == 0)
                    {
                        Log.Trace("CoarseGenerator.ProcessDailyFolder(): Completed processing {0} symbols. Current elapsed: {1} seconds", symbols, (DateTime.UtcNow - start).TotalSeconds.ToString("0.00"));
                    }
                }
                catch (Exception err)
                {
                    // log the error and continue with the process
                    Log.Error(err.ToString());
                }
            }

            Log.Trace("CoarseGenerator.ProcessDailyFolder(): Saving {0} coarse files to disk", dates.Count);

            // dispose all the writers at the end of processing
            foreach (var writer in writers)
            {
                writer.Value.Dispose();
            }

            var stop = DateTime.UtcNow;

            Log.Trace("CoarseGenerator.ProcessDailyFolder(): Processed {0} symbols into {1} coarse files in {2} seconds", symbols, dates.Count, (stop - start).TotalSeconds.ToString("0.00"));
            Log.Trace("CoarseGenerator.ProcessDailyFolder(): Excluded {0} mapless symbols.", maplessCount);

            return writers.Keys;
        }