Ejemplo n.º 1
0
        public void TestAlgorithmManagerSpeed()
        {
            var algorithmManager = new AlgorithmManager(false);
            var algorithm        = PerformanceBenchmarkAlgorithms.SingleSecurity_Second;
            var job          = new BacktestNodePacket(1, 2, "3", null, 9m, $"{nameof(AlgorithmManagerTests)}.{nameof(TestAlgorithmManagerSpeed)}");
            var feed         = new MockDataFeed();
            var transactions = new BacktestingTransactionHandler();
            var results      = new BacktestingResultHandler();
            var realtime     = new BacktestingRealTimeHandler();
            var leanManager  = new NullLeanManager();
            var alphas       = new NullAlphaHandler();
            var token        = new CancellationToken();

            algorithm.Initialize();
            results.Initialize(job, new QuantConnect.Messaging.Messaging(), new Api.Api(), feed, new BacktestingSetupHandler(), transactions);
            results.SetAlgorithm(algorithm);
            transactions.Initialize(algorithm, new BacktestingBrokerage(algorithm), results);
            feed.Initialize(algorithm, job, results, null, null, null);

            Log.Trace("Starting algorithm manager loop to process " + feed.Count + " time slices");
            var sw = Stopwatch.StartNew();

            algorithmManager.Run(job, algorithm, feed, transactions, results, realtime, leanManager, alphas, token);
            sw.Stop();

            var thousands = feed.Count / 1000d;
            var seconds   = sw.Elapsed.TotalSeconds;

            Log.Trace("COUNT: " + feed.Count + "  KPS: " + thousands / seconds);
        }
Ejemplo n.º 2
0
        public void DoesNotLeakMemory()
        {
            var symbol    = Symbols.AAPL;
            var startDate = new DateTime(2014, 4, 30);
            var endDate   = new DateTime(2014, 4, 30);

            var config       = new SubscriptionDataConfig(typeof(FineFundamental), symbol, Resolution.Daily, TimeZones.NewYork, TimeZones.NewYork, false, false, false, false, TickType.Trade, false);
            var security     = new Security(SecurityExchangeHours.AlwaysOpen(TimeZones.NewYork), config, new Cash(CashBook.AccountCurrency, 0, 1), SymbolProperties.GetDefault(CashBook.AccountCurrency));
            var request      = new SubscriptionRequest(false, null, security, config, startDate, endDate);
            var fileProvider = new DefaultDataProvider();
            var factory      = new FineFundamentalSubscriptionEnumeratorFactory(false);

            GC.Collect();
            var ramUsageBeforeLoop = OS.TotalPhysicalMemoryUsed;

            const int iterations = 1000;

            for (var i = 0; i < iterations; i++)
            {
                using (var enumerator = factory.CreateEnumerator(request, fileProvider))
                {
                    enumerator.MoveNext();
                }
            }

            GC.Collect();
            var ramUsageAfterLoop = OS.TotalPhysicalMemoryUsed;

            Log.Trace($"RAM usage - before: {ramUsageBeforeLoop} MB, after: {ramUsageAfterLoop} MB");

            Assert.IsTrue(ramUsageAfterLoop - ramUsageBeforeLoop < 10);
        }
        public void ReadsFineFundamental(FineFundamentalTestParameters parameters)
        {
            var stopwatch = Stopwatch.StartNew();
            var rows      = new List <FineFundamental>();

            var config   = new SubscriptionDataConfig(typeof(FineFundamental), parameters.Symbol, Resolution.Daily, TimeZones.NewYork, TimeZones.NewYork, false, false, false, false, TickType.Trade, false);
            var security = new Security(
                SecurityExchangeHours.AlwaysOpen(TimeZones.NewYork),
                config,
                new Cash(Currencies.USD, 0, 1),
                SymbolProperties.GetDefault(Currencies.USD),
                ErrorCurrencyConverter.Instance,
                RegisteredSecurityDataTypesProvider.Null,
                new SecurityCache()
                );
            var request      = new SubscriptionRequest(false, null, security, config, parameters.StartDate, parameters.EndDate);
            var fileProvider = new DefaultDataProvider();

            var factory    = new FineFundamentalSubscriptionEnumeratorFactory(parameters.LiveMode);
            var enumerator = factory.CreateEnumerator(request, fileProvider);

            while (enumerator.MoveNext())
            {
                var current = enumerator.Current as FineFundamental;
                rows.Add(current);
            }

            stopwatch.Stop();
            Log.Trace("Total rows: {0}, elapsed time: {1}", rows.Count, stopwatch.Elapsed);

            Assert.AreEqual(parameters.RowCount, rows.Count);

            if (parameters.RowCount != 1)
            {
                return;
            }

            var row = rows[0];

            Assert.AreEqual(parameters.CompanyShortName, row.CompanyReference.ShortName);
            Assert.AreEqual(parameters.Symbol, row.Symbol);
            Assert.IsTrue(row.CompanyReference.PrimarySymbol == parameters.Symbol.Value || row.CompanyReference.PrimarySymbol == null);
            Assert.IsTrue(row.SecurityReference.SecuritySymbol == parameters.Symbol.Value || row.SecurityReference.SecuritySymbol == null);
            Assert.AreEqual(parameters.Ebitda3M, row.FinancialStatements.IncomeStatement.EBITDA.ThreeMonths);
            Assert.AreEqual(parameters.Ebitda12M, row.FinancialStatements.IncomeStatement.EBITDA.TwelveMonths);
            Assert.AreEqual(parameters.Ebitda12M, (decimal)row.FinancialStatements.IncomeStatement.EBITDA);
            Assert.AreEqual(parameters.CostOfRevenue3M, row.FinancialStatements.IncomeStatement.CostOfRevenue.ThreeMonths);
            Assert.AreEqual(parameters.CostOfRevenue12M, row.FinancialStatements.IncomeStatement.CostOfRevenue.TwelveMonths);
            Assert.AreEqual(parameters.CostOfRevenue12M, (decimal)row.FinancialStatements.IncomeStatement.CostOfRevenue);
            Assert.AreEqual(parameters.EquityPerShareGrowth1Y, row.EarningRatios.EquityPerShareGrowth.OneYear);
            Assert.AreEqual(parameters.EquityPerShareGrowth1Y, (decimal)row.EarningRatios.EquityPerShareGrowth);
            Assert.AreEqual(parameters.PeRatio, row.ValuationRatios.PERatio);
            if (!parameters.FinancialHealthGrade.IsNullOrEmpty())
            {
                Assert.AreEqual(parameters.FinancialHealthGrade, row.AssetClassification.FinancialHealthGrade);
            }
            enumerator.Dispose();
        }
Ejemplo n.º 4
0
        public void TestAlgorithmManagerSpeed()
        {
            var algorithm        = PerformanceBenchmarkAlgorithms.SingleSecurity_Second;
            var algorithmManager = new AlgorithmManager(false);
            var job  = new BacktestNodePacket(1, 2, "3", null, 9m, $"{nameof(AlgorithmManagerTests)}.{nameof(TestAlgorithmManagerSpeed)}");
            var feed = new MockDataFeed();
            var marketHoursDatabase      = MarketHoursDatabase.FromDataFolder();
            var symbolPropertiesDataBase = SymbolPropertiesDatabase.FromDataFolder();
            var dataPermissionManager    = new DataPermissionManager();
            var dataManager = new DataManager(feed,
                                              new UniverseSelection(
                                                  algorithm,
                                                  new SecurityService(algorithm.Portfolio.CashBook,
                                                                      marketHoursDatabase,
                                                                      symbolPropertiesDataBase,
                                                                      algorithm,
                                                                      RegisteredSecurityDataTypesProvider.Null,
                                                                      new SecurityCacheProvider(algorithm.Portfolio)),
                                                  dataPermissionManager,
                                                  new DefaultDataProvider()),
                                              algorithm,
                                              algorithm.TimeKeeper,
                                              marketHoursDatabase,
                                              false,
                                              RegisteredSecurityDataTypesProvider.Null,
                                              dataPermissionManager);

            algorithm.SubscriptionManager.SetDataManager(dataManager);
            var transactions     = new BacktestingTransactionHandler();
            var results          = new BacktestingResultHandler();
            var realtime         = new BacktestingRealTimeHandler();
            var leanManager      = new NullLeanManager();
            var alphas           = new NullAlphaHandler();
            var token            = new CancellationToken();
            var nullSynchronizer = new NullSynchronizer(algorithm);

            algorithm.Initialize();
            algorithm.PostInitialize();

            results.Initialize(job, new QuantConnect.Messaging.Messaging(), new Api.Api(), transactions);
            results.SetAlgorithm(algorithm, algorithm.Portfolio.TotalPortfolioValue);
            transactions.Initialize(algorithm, new BacktestingBrokerage(algorithm), results);
            feed.Initialize(algorithm, job, results, null, null, null, dataManager, null, null);

            Log.Trace("Starting algorithm manager loop to process " + nullSynchronizer.Count + " time slices");
            var sw = Stopwatch.StartNew();

            algorithmManager.Run(job, algorithm, nullSynchronizer, transactions, results, realtime, leanManager, alphas, token);
            sw.Stop();

            realtime.Exit();
            results.Exit();
            var thousands = nullSynchronizer.Count / 1000d;
            var seconds   = sw.Elapsed.TotalSeconds;

            Log.Trace("COUNT: " + nullSynchronizer.Count + "  KPS: " + thousands / seconds);
        }
Ejemplo n.º 5
0
        /// <summary>
        /// Reads the specified exclusions file into a new hash set.
        /// Returns an empty set if the file does not exist
        /// </summary>
        public static HashSet <string> ReadExclusionsFile(string exclusionsFile)
        {
            var exclusions = new HashSet <string>();

            if (File.Exists(exclusionsFile))
            {
                var excludedSymbols = File.ReadLines(exclusionsFile).Select(x => x.Trim()).Where(x => !x.StartsWith("#"));
                exclusions = new HashSet <string>(excludedSymbols, StringComparer.InvariantCultureIgnoreCase);
                Log.Trace("CoarseGenerator.ReadExclusionsFile(): Loaded {0} symbols into the exclusion set", exclusions.Count);
            }
            return(exclusions);
        }
        /// <summary>
        /// Populates the sid contex.
        /// </summary>
        /// <param name="mapFileResolver">The map file resolver.</param>
        /// <param name="exclusions">The exclusions.</param>
        /// <returns></returns>
        private IEnumerable <SecurityIdentifierContext> PopulateSidContex(MapFileResolver mapFileResolver, HashSet <string> exclusions)
        {
            Log.Trace($"CoarseUniverseGeneratorProgram.PopulateSidContex(): Generating SID context from QuantQuote's map files.");
            foreach (var mapFile in mapFileResolver)
            {
                if (exclusions.Contains(mapFile.Last().MappedSymbol))
                {
                    continue;
                }

                yield return(new SecurityIdentifierContext(mapFile, _market));
            }
        }
Ejemplo n.º 7
0
 /// <summary>
 /// Sends an update of the current optimization status to the user
 /// </summary>
 protected override void SendUpdate()
 {
     // end handler will already log a nice message on end
     if (Status != OptimizationStatus.Ended && Status != OptimizationStatus.Aborted)
     {
         var currentEstimate     = GetCurrentEstimate();
         var message             = $"ConsoleLeanOptimizer.SendUpdate(): {currentEstimate}";
         var currentBestBacktest = Strategy.Solution;
         if (currentBestBacktest != null)
         {
             message += $". Best id:'{currentBestBacktest.BacktestId}'. {OptimizationTarget}. Parameters ({currentBestBacktest.ParameterSet})";
         }
         Log.Trace(message);
     }
 }
Ejemplo n.º 8
0
 /// <summary>
 /// Sends an update of the current optimization status to the user
 /// </summary>
 protected override void SendUpdate()
 {
     // end handler will already log a nice message on end
     if (Status != OptimizationStatus.Completed && Status != OptimizationStatus.Aborted)
     {
         var currentEstimate     = GetCurrentEstimate();
         var stats               = GetRuntimeStatistics();
         var message             = $"ConsoleLeanOptimizer.SendUpdate(): {currentEstimate} {string.Join(", ", stats.Select(pair => $"{pair.Key}:{pair.Value}"))}";
         var currentBestBacktest = Strategy.Solution;
         if (currentBestBacktest != null)
         {
             message += $". Best id:'{currentBestBacktest.BacktestId}'. {OptimizationTarget}. Parameters ({currentBestBacktest.ParameterSet})";
         }
         Log.Trace(message);
     }
 }
Ejemplo n.º 9
0
        /// <summary>
        /// This program generates the coarse files requires by lean for universe selection.
        /// Universe selection is planned to happen in two stages, the first stage, the 'coarse'
        /// stage serves to cull the set using coarse filters, such as price, market, and dollar volume.
        /// Later we'll support full fundamental data such as ratios and financial statements, and these
        /// would be run AFTER the initial coarse filter
        ///
        /// The files are generated from LEAN formatted daily trade bar equity files
        /// </summary>
        /// <param name="args">Unused argument</param>
        public static void Main(string[] args)
        {
            // read out the configuration file
            JToken jtoken;
            var    config = JObject.Parse(File.ReadAllText("CoarseUniverseGenerator/config.json"));

            var      ignoreMaplessSymbols = false;
            var      updateMode           = false;
            var      updateTime           = TimeSpan.Zero;
            DateTime?startDate            = null;

            if (config.TryGetValue("update-mode", out jtoken))
            {
                updateMode = jtoken.Value <bool>();
                if (config.TryGetValue("update-time-of-day", out jtoken))
                {
                    updateTime = TimeSpan.Parse(jtoken.Value <string>());
                }
            }

            var dataDirectory = Globals.DataFolder;

            if (config.TryGetValue("data-directory", out jtoken))
            {
                dataDirectory = jtoken.Value <string>();
            }

            //Ignore symbols without a map file:
            // Typically these are nothing symbols (NASDAQ test symbols, or symbols listed for a few days who aren't actually ever traded).
            if (config.TryGetValue("ignore-mapless", out jtoken))
            {
                ignoreMaplessSymbols = jtoken.Value <bool>();
            }

            if (config.TryGetValue("coarse-universe-generator-start-date", out jtoken))
            {
                string startDateStr = jtoken.Value <string>();
                startDate = DateTime.ParseExact(startDateStr, "yyyyMMdd", null);
                Log.Trace("Generating coarse data from {0}", startDate);
            }

            do
            {
                ProcessEquityDirectories(dataDirectory, ignoreMaplessSymbols, startDate);
            }while (WaitUntilTimeInUpdateMode(updateMode, updateTime));
        }
        /// <summary>
        /// Checks if there is fundamental data for
        /// </summary>
        /// <param name="ticker">The ticker.</param>
        /// <param name="date">The date.</param>
        /// <param name="mapFile">The map file.</param>
        /// <param name="fineAvailableDates"></param>
        /// <param name="fineFundamentalFolder">The fine fundamental folder.</param>
        /// <returns></returns>
        private static bool CheckFundamentalData(DateTime date, MapFile mapFile, IEnumerable <DateTime> fineAvailableDates, DirectoryInfo fineFundamentalFolder)
        {
            // Check if security has fine file within a trailing month for a date-ticker set.
            // There are tricky cases where a folder named by a ticker can have data for multiple securities.
            // e.g  GOOG -> GOOGL (GOOG T1AZ164W5VTX) / GOOCV -> GOOG (GOOCV VP83T1ZUHROL) case.
            // The fine data in the 'fundamental/fine/goog' folder will be for 'GOOG T1AZ164W5VTX' up to the 2014-04-02 and for 'GOOCV VP83T1ZUHROL' afterward.
            // Therefore, date before checking if the security has fundamental data for a date, we need to filter the fine files the map's first date.
            var firstDate = mapFile?.FirstDate ?? DateTime.MinValue;
            var hasFundamentalDataForDate = fineAvailableDates.Where(d => d >= firstDate).Any(d => date.AddMonths(-1) <= d && d <= date);

            // The following section handles mergers and acquisitions cases.
            // e.g. YHOO -> AABA (YHOO R735QTJ8XC9X)
            // The dates right after the acquisition, valid fine fundamental data for AABA are still under the former ticker folder.
            // Therefore if no fine fundamental data is found in the 'fundamental/fine/aaba' folder, it searches into the 'yhoo' folder.
            if (mapFile != null && mapFile.Count() > 2 && !hasFundamentalDataForDate)
            {
                var previousTicker = mapFile.LastOrDefault(m => m.Date < date)?.MappedSymbol;
                if (previousTicker != null)
                {
                    var previousTickerFineFundamentalFolder = Path.Combine(fineFundamentalFolder.FullName, previousTicker);
                    if (Directory.Exists(previousTickerFineFundamentalFolder))
                    {
                        var previousTickerFineAvailableDates = Directory.GetFiles(previousTickerFineFundamentalFolder, "*.zip")
                                                               .Select(f => DateTime.ParseExact(Path.GetFileNameWithoutExtension(f), DateFormat.EightCharacter, CultureInfo.InvariantCulture))
                                                               .ToList();
                        hasFundamentalDataForDate = previousTickerFineAvailableDates.Where(d => d >= firstDate).Any(d => date.AddMonths(-1) <= d && d <= date);
                    }
                    else
                    {
                        Log.Debug($"CoarseUniverseGeneratorProgram.CheckFundamentalData(): fine folder was not found at '{previousTickerFineFundamentalFolder}'");
                    }
                }
            }

            return(hasFundamentalDataForDate);
        }
Ejemplo n.º 11
0
        /// <summary>
        /// Iterates each daily file in the specified <paramref name="dailyFolder"/> and adds a line for each
        /// day to the approriate coarse file
        /// </summary>
        /// <param name="dailyFolder">The folder with daily data</param>
        /// <param name="coarseFolder">The coarse output folder</param>
        /// <param name="mapFileResolver"></param>
        /// <param name="exclusions">The symbols to be excluded from processing</param>
        /// <param name="ignoreMapless">Ignore the symbols without a map file.</param>
        /// <param name="startDate">The starting date for processing</param>
        /// <param name="symbolResolver">Function used to provide symbol resolution. Default resolution uses the zip file name to resolve
        /// the symbol, specify null for this behavior.</param>
        /// <returns>A collection of the generated coarse files</returns>
        public static ICollection <string> ProcessDailyFolder(string dailyFolder, string coarseFolder, MapFileResolver mapFileResolver, HashSet <string> exclusions, bool ignoreMapless, DateTime startDate, Func <string, string> symbolResolver = null)
        {
            const decimal scaleFactor = 10000m;

            Log.Trace("Processing: {0}", dailyFolder);

            var start = DateTime.UtcNow;

            // load map files into memory

            var symbols      = 0;
            var maplessCount = 0;
            var dates        = new HashSet <DateTime>();

            // instead of opening/closing these constantly, open them once and dispose at the end (~3x speed improvement)
            var writers = new Dictionary <string, StreamWriter>();

            var dailyFolderDirectoryInfo = new DirectoryInfo(dailyFolder).Parent;

            if (dailyFolderDirectoryInfo == null)
            {
                throw new Exception("Unable to resolve market for daily folder: " + dailyFolder);
            }
            var market = dailyFolderDirectoryInfo.Name.ToLower();

            var fundamentalDirectoryInfo = new DirectoryInfo(coarseFolder).Parent;

            if (fundamentalDirectoryInfo == null)
            {
                throw new Exception("Unable to resolve fundamental path for coarse folder: " + coarseFolder);
            }
            var fineFundamentalFolder = Path.Combine(fundamentalDirectoryInfo.FullName, "fine");

            var mapFileProvider    = new LocalDiskMapFileProvider();
            var factorFileProvider = new LocalDiskFactorFileProvider(mapFileProvider);

            // open up each daily file to get the values and append to the daily coarse files
            foreach (var file in Directory.EnumerateFiles(dailyFolder))
            {
                try
                {
                    var symbol = Path.GetFileNameWithoutExtension(file);
                    if (symbol == null)
                    {
                        Log.Trace("CoarseGenerator.ProcessDailyFolder(): Unable to resolve symbol from file: {0}", file);
                        continue;
                    }

                    if (symbolResolver != null)
                    {
                        symbol = symbolResolver(symbol);
                    }

                    symbol = symbol.ToUpper();

                    if (exclusions.Contains(symbol))
                    {
                        Log.Trace("Excluded symbol: {0}", symbol);
                        continue;
                    }

                    // check if symbol has any fine fundamental data
                    var firstFineSymbolDate = DateTime.MaxValue;
                    if (Directory.Exists(fineFundamentalFolder))
                    {
                        var fineSymbolFolder = Path.Combine(fineFundamentalFolder, symbol.ToLower());

                        var firstFineSymbolFileName = Directory.Exists(fineSymbolFolder) ? Directory.GetFiles(fineSymbolFolder).OrderBy(x => x).FirstOrDefault() : string.Empty;
                        if (firstFineSymbolFileName.Length > 0)
                        {
                            firstFineSymbolDate = DateTime.ParseExact(Path.GetFileNameWithoutExtension(firstFineSymbolFileName), "yyyyMMdd", CultureInfo.InvariantCulture);
                        }
                    }

                    ZipFile zip;
                    using (var reader = Compression.Unzip(file, out zip))
                    {
                        var checkedForMapFile = false;

                        symbols++;
                        string line;
                        while ((line = reader.ReadLine()) != null)
                        {
                            //20150625.csv
                            var csv  = line.Split(',');
                            var date = DateTime.ParseExact(csv[0], DateFormat.TwelveCharacter, CultureInfo.InvariantCulture);

                            // spin past old data
                            if (date < startDate)
                            {
                                continue;
                            }

                            if (ignoreMapless && !checkedForMapFile)
                            {
                                checkedForMapFile = true;
                                if (!mapFileResolver.ResolveMapFile(symbol, date).Any())
                                {
                                    // if the resolved map file has zero entries then it's a mapless symbol
                                    maplessCount++;
                                    break;
                                }
                            }

                            var close  = decimal.Parse(csv[4]) / scaleFactor;
                            var volume = long.Parse(csv[5]);

                            var dollarVolume = close * volume;

                            var coarseFile = Path.Combine(coarseFolder, date.ToString("yyyyMMdd") + ".csv");
                            dates.Add(date);

                            // try to resolve a map file and if found, regen the sid
                            var sid     = SecurityIdentifier.GenerateEquity(SecurityIdentifier.DefaultDate, symbol, market);
                            var mapFile = mapFileResolver.ResolveMapFile(symbol, date);
                            if (!mapFile.IsNullOrEmpty())
                            {
                                // if available, us the permtick in the coarse files, because of this, we need
                                // to update the coarse files each time new map files are added/permticks change
                                sid = SecurityIdentifier.GenerateEquity(mapFile.FirstDate, mapFile.OrderBy(x => x.Date).First().MappedSymbol, market);
                            }
                            if (mapFile == null && ignoreMapless)
                            {
                                // if we're ignoring mapless files then we should always be able to resolve this
                                Log.Error(string.Format("CoarseGenerator.ProcessDailyFolder(): Unable to resolve map file for {0} as of {1}", symbol, date.ToShortDateString()));
                                continue;
                            }

                            // check if symbol has fine fundamental data for the current date
                            var hasFundamentalDataForDate = date >= firstFineSymbolDate;

                            // get price and split factors from factor files
                            var leanSymbol    = new Symbol(sid, symbol);
                            var factorFile    = factorFileProvider.Get(leanSymbol);
                            var factorFileRow = factorFile?.GetScalingFactors(date);
                            var priceFactor   = factorFileRow?.PriceFactor ?? 1m;
                            var splitFactor   = factorFileRow?.SplitFactor ?? 1m;

                            // sid,symbol,close,volume,dollar volume,has fundamental data,price factor,split factor
                            var coarseFileLine = $"{sid},{symbol},{close},{volume},{Math.Truncate(dollarVolume)},{hasFundamentalDataForDate},{priceFactor},{splitFactor}";

                            StreamWriter writer;
                            if (!writers.TryGetValue(coarseFile, out writer))
                            {
                                writer = new StreamWriter(new FileStream(coarseFile, FileMode.Create, FileAccess.Write, FileShare.Write));
                                writers[coarseFile] = writer;
                            }
                            writer.WriteLine(coarseFileLine);
                        }
                    }

                    if (symbols % 1000 == 0)
                    {
                        Log.Trace("CoarseGenerator.ProcessDailyFolder(): Completed processing {0} symbols. Current elapsed: {1} seconds", symbols, (DateTime.UtcNow - start).TotalSeconds.ToString("0.00"));
                    }
                }
                catch (Exception err)
                {
                    // log the error and continue with the process
                    Log.Error(err.ToString());
                }
            }

            Log.Trace("CoarseGenerator.ProcessDailyFolder(): Saving {0} coarse files to disk", dates.Count);

            // dispose all the writers at the end of processing
            foreach (var writer in writers)
            {
                writer.Value.Dispose();
            }

            var stop = DateTime.UtcNow;

            Log.Trace("CoarseGenerator.ProcessDailyFolder(): Processed {0} symbols into {1} coarse files in {2} seconds", symbols, dates.Count, (stop - start).TotalSeconds.ToString("0.00"));
            Log.Trace("CoarseGenerator.ProcessDailyFolder(): Excluded {0} mapless symbols.", maplessCount);

            return(writers.Keys);
        }
        /// <summary>
        /// Runs this instance.
        /// </summary>
        /// <returns></returns>
        public bool Run()
        {
            var startTime = DateTime.UtcNow;
            var success   = true;

            Log.Trace($"CoarseUniverseGeneratorProgram.ProcessDailyFolder(): Processing: {_dailyDataFolder.FullName}");

            var symbolsProcessed     = 0;
            var filesRead            = 0;
            var dailyFilesNotFound   = 0;
            var coarseFilesGenerated = 0;

            var mapFileResolver = _mapFileProvider.Get(_market);

            var blackListedTickers = new HashSet <string>();

            if (_blackListedTickersFile.Exists)
            {
                blackListedTickers = File.ReadAllLines(_blackListedTickersFile.FullName).ToHashSet();
            }

            var marketFolder          = _dailyDataFolder.Parent;
            var fineFundamentalFolder = new DirectoryInfo(Path.Combine(marketFolder.FullName, "fundamental", "fine"));

            if (!fineFundamentalFolder.Exists)
            {
                Log.Error($"CoarseUniverseGenerator.Run(): FAIL, Fine Fundamental folder not found at {fineFundamentalFolder}! ");
                return(false);
            }

            var securityIdentifierContexts = PopulateSidContex(mapFileResolver, blackListedTickers);
            var dailyPricesByTicker        = new ConcurrentDictionary <string, List <TradeBar> >();
            var outputCoarseContent        = new ConcurrentDictionary <DateTime, List <string> >();

            var parallelOptions = new ParallelOptions {
                MaxDegreeOfParallelism = Environment.ProcessorCount / 2
            };

            try
            {
                Parallel.ForEach(securityIdentifierContexts, parallelOptions, sidContext =>
                {
                    var symbol      = new Symbol(sidContext.SID, sidContext.LastTicker);
                    var symbolCount = Interlocked.Increment(ref symbolsProcessed);
                    Log.Debug($"CoarseUniverseGeneratorProgram.Run(): Processing {symbol}");
                    var factorFile = _factorFileProvider.Get(symbol);

                    // Populate dailyPricesByTicker with all daily data by ticker for all tickers of this security.
                    foreach (var ticker in sidContext.Tickers)
                    {
                        var dailyFile = new FileInfo(Path.Combine(_dailyDataFolder.FullName, $"{ticker}.zip"));
                        if (!dailyFile.Exists)
                        {
                            Log.Error($"CoarseUniverseGeneratorProgram.Run(): {dailyFile} not found!");
                            Interlocked.Increment(ref dailyFilesNotFound);
                            continue;
                        }

                        if (!dailyPricesByTicker.ContainsKey(ticker))
                        {
                            dailyPricesByTicker.AddOrUpdate(ticker, ParseDailyFile(dailyFile));
                            Interlocked.Increment(ref filesRead);
                        }
                    }

                    // Look for daily data for each ticker of the actual security
                    for (int mapFileRowIndex = sidContext.MapFileRows.Length - 1; mapFileRowIndex >= 1; mapFileRowIndex--)
                    {
                        var ticker    = sidContext.MapFileRows[mapFileRowIndex].Item2.ToLowerInvariant();
                        var endDate   = sidContext.MapFileRows[mapFileRowIndex].Item1;
                        var startDate = sidContext.MapFileRows[mapFileRowIndex - 1].Item1;
                        List <TradeBar> tickerDailyData;
                        if (!dailyPricesByTicker.TryGetValue(ticker, out tickerDailyData))
                        {
                            Log.Error($"CoarseUniverseGeneratorProgram.Run(): Daily data for ticker {ticker.ToUpperInvariant()} not found!");
                            continue;
                        }

                        var tickerFineFundamentalFolder = Path.Combine(fineFundamentalFolder.FullName, ticker);
                        var fineAvailableDates          = Enumerable.Empty <DateTime>();
                        if (Directory.Exists(tickerFineFundamentalFolder))
                        {
                            fineAvailableDates = Directory.GetFiles(tickerFineFundamentalFolder, "*.zip")
                                                 .Select(f => DateTime.ParseExact(Path.GetFileNameWithoutExtension(f), DateFormat.EightCharacter, CultureInfo.InvariantCulture))
                                                 .ToList();
                        }

                        // Get daily data only for the time the ticker was
                        foreach (var tradeBar in tickerDailyData.Where(tb => tb.Time >= startDate && tb.Time <= endDate))
                        {
                            var coarseRow = GenerateFactorFileRow(ticker, sidContext, factorFile, tradeBar, fineAvailableDates, fineFundamentalFolder);

                            outputCoarseContent.AddOrUpdate(tradeBar.Time,
                                                            new List <string> {
                                coarseRow
                            },
                                                            (time, list) =>
                            {
                                lock (list)
                                {
                                    list.Add(coarseRow);
                                    return(list);
                                }
                            });
                        }
                    }

                    if (symbolCount % 1000 == 0)
                    {
                        var elapsed = DateTime.UtcNow - startTime;
                        Log.Trace($"CoarseUniverseGeneratorProgram.Run(): Processed {symbolCount} in {elapsed:g} at {symbolCount / elapsed.TotalMinutes:F2} symbols/minute ");
                    }
                });

                _destinationFolder.Create();
                var startWriting = DateTime.UtcNow;
                Parallel.ForEach(outputCoarseContent, coarseByDate =>
                {
                    var filename = $"{coarseByDate.Key.ToString(DateFormat.EightCharacter, CultureInfo.InvariantCulture)}.csv";
                    var filePath = Path.Combine(_destinationFolder.FullName, filename);
                    Log.Debug($"CoarseUniverseGeneratorProgram.Run(): Saving {filename} with {coarseByDate.Value.Count} entries.");
                    File.WriteAllLines(filePath, coarseByDate.Value.OrderBy(cr => cr));
                    var filesCount = Interlocked.Increment(ref coarseFilesGenerated);
                    if (filesCount % 1000 == 0)
                    {
                        var elapsed = DateTime.UtcNow - startWriting;
                        Log.Trace($"CoarseUniverseGeneratorProgram.Run(): Processed {filesCount} in {elapsed:g} at {filesCount / elapsed.TotalSeconds:F2} files/second ");
                    }
                });

                Log.Trace($"\n\nTotal of {coarseFilesGenerated} coarse files generated in {DateTime.UtcNow - startTime:g}:\n" +
                          $"\t => {filesRead} daily data files read.\n");
            }
            catch (Exception e)
            {
                Log.Error(e, $"CoarseUniverseGeneratorProgram.Run(): FAILED!");
                success = false;
            }

            return(success);
        }
        /// <summary>
        /// Iterates each daily file in the specified <paramref name="dailyFolder"/> and adds a line for each
        /// day to the appropriate coarse file
        /// </summary>
        /// <param name="dailyFolder">The folder with daily data.</param>
        /// <param name="coarseFolder">The coarse output folder.</param>
        /// <param name="mapFileResolver">The map file resolver.</param>
        /// <param name="factorFileProvider">The factor file provider.</param>
        /// <param name="exclusions">The symbols to be excluded from processing.</param>
        /// <param name="ignoreMapless">Ignore the symbols without a map file.</param>
        /// <param name="symbolResolver">Function used to provide symbol resolution. Default resolution uses the zip file name to resolve
        /// the symbol, specify null for this behavior.</param>
        /// <returns>Collection with the names of the newly generated coarse files.</returns>
        /// <exception cref="Exception">
        /// Unable to resolve market for daily folder: " + dailyFolder
        /// or
        /// Unable to resolve fundamental path for coarse folder: " + coarseFolder
        /// </exception>
        public static ICollection <string> ProcessDailyFolder(string dailyFolder, string coarseFolder, MapFileResolver mapFileResolver, IFactorFileProvider factorFileProvider,
                                                              HashSet <string> exclusions, bool ignoreMapless, Func <string, string> symbolResolver = null)
        {
            const decimal scaleFactor = 10000m;

            Log.Trace("Processing: {0}", dailyFolder);

            var start = DateTime.UtcNow;

            // load map files into memory

            var symbols      = 0;
            var maplessCount = 0;
            var dates        = new HashSet <DateTime>();

            // instead of opening/closing these constantly, open them once and dispose at the end (~3x speed improvement)
            var writers = new Dictionary <string, StreamWriter>();

            var marketDirectoryInfo = new DirectoryInfo(dailyFolder).Parent;

            if (marketDirectoryInfo == null)
            {
                throw new Exception($"Unable to resolve market for daily folder: {dailyFolder}");
            }
            var market = marketDirectoryInfo.Name.ToLowerInvariant();

            var fundamentalDirectoryInfo = new DirectoryInfo(coarseFolder).Parent;

            if (fundamentalDirectoryInfo == null)
            {
                throw new Exception($"Unable to resolve fundamental path for coarse folder: {coarseFolder}");
            }
            var fineFundamentalFolder = Path.Combine(marketDirectoryInfo.FullName, "fundamental", "fine");

            // open up each daily file to get the values and append to the daily coarse files
            foreach (var file in Directory.EnumerateFiles(dailyFolder, "*.zip"))
            {
                try
                {
                    var ticker             = Path.GetFileNameWithoutExtension(file);
                    var fineAvailableDates = Enumerable.Empty <DateTime>();

                    var tickerFineFundamentalFolder = Path.Combine(fineFundamentalFolder, ticker);
                    if (Directory.Exists(tickerFineFundamentalFolder))
                    {
                        fineAvailableDates = Directory.GetFiles(tickerFineFundamentalFolder, "*.zip")
                                             .Select(f => DateTime.ParseExact(Path.GetFileNameWithoutExtension(f), DateFormat.EightCharacter, CultureInfo.InvariantCulture))
                                             .ToList();
                    }

                    if (ticker == null)
                    {
                        Log.Trace("CoarseGenerator.ProcessDailyFolder(): Unable to resolve symbol from file: {0}", file);
                        continue;
                    }

                    if (symbolResolver != null)
                    {
                        ticker = symbolResolver(ticker);
                    }

                    ticker = ticker.ToUpperInvariant();

                    if (exclusions != null && exclusions.Contains(ticker))
                    {
                        Log.Trace("Excluded symbol: {0}", ticker);
                        continue;
                    }

                    ZipFile zip;
                    using (var reader = Compression.Unzip(file, out zip))
                    {
                        var checkedForMapFile = false;

                        symbols++;
                        string line;
                        while ((line = reader.ReadLine()) != null)
                        {
                            //20150625.csv
                            var csv  = line.Split(',');
                            var date = DateTime.ParseExact(csv[0], DateFormat.TwelveCharacter, CultureInfo.InvariantCulture);

                            if (ignoreMapless && !checkedForMapFile)
                            {
                                checkedForMapFile = true;
                                if (!mapFileResolver.ResolveMapFile(ticker, date).Any())
                                {
                                    // if the resolved map file has zero entries then it's a mapless symbol
                                    maplessCount++;
                                    break;
                                }
                            }

                            var close  = Parse.Decimal(csv[4]) / scaleFactor;
                            var volume = Parse.Long(csv[5]);

                            var dollarVolume = close * volume;

                            var coarseFile = Path.Combine(coarseFolder, date.ToStringInvariant("yyyyMMdd") + ".csv");
                            dates.Add(date);

                            // try to resolve a map file and if found, regen the sid
                            var sid     = SecurityIdentifier.GenerateEquity(SecurityIdentifier.DefaultDate, ticker, market);
                            var mapFile = mapFileResolver.ResolveMapFile(ticker, date);
                            if (!mapFile.IsNullOrEmpty())
                            {
                                // if available, us the permtick in the coarse files, because of this, we need
                                // to update the coarse files each time new map files are added/permticks change
                                sid = SecurityIdentifier.GenerateEquity(mapFile.FirstDate, mapFile.OrderBy(x => x.Date).First().MappedSymbol, market);
                            }

                            if (mapFile == null && ignoreMapless)
                            {
                                // if we're ignoring mapless files then we should always be able to resolve this
                                Log.Error($"CoarseGenerator.ProcessDailyFolder(): Unable to resolve map file for {ticker} as of {date.ToStringInvariant("d")}");
                                continue;
                            }

                            // get price and split factors from factor files
                            var symbol        = new Symbol(sid, ticker);
                            var factorFile    = factorFileProvider.Get(symbol);
                            var factorFileRow = factorFile?.GetScalingFactors(date);
                            var priceFactor   = factorFileRow?.PriceFactor ?? 1m;
                            var splitFactor   = factorFileRow?.SplitFactor ?? 1m;


                            // Check if security has fine file within a trailing month for a date-ticker set.
                            // There are tricky cases where a folder named by a ticker can have data for multiple securities.
                            // e.g  GOOG -> GOOGL (GOOG T1AZ164W5VTX) / GOOCV -> GOOG (GOOCV VP83T1ZUHROL) case.
                            // The fine data in the 'fundamental/fine/goog' folder will be for 'GOOG T1AZ164W5VTX' up to the 2014-04-02 and for 'GOOCV VP83T1ZUHROL' afterward.
                            // Therefore, date before checking if the security has fundamental data for a date, we need to filter the fine files the map's first date.
                            var firstDate = mapFile?.FirstDate ?? DateTime.MinValue;
                            var hasFundamentalDataForDate = fineAvailableDates.Where(d => d >= firstDate).Any(d => date.AddMonths(-1) <= d && d <= date);

                            // The following section handles mergers and acquisitions cases.
                            // e.g. YHOO -> AABA (YHOO R735QTJ8XC9X)
                            // The dates right after the acquisition, valid fine fundamental data for AABA are still under the former ticker folder.
                            // Therefore if no fine fundamental data is found in the 'fundamental/fine/aaba' folder, it searches into the 'yhoo' folder.
                            if (mapFile != null && mapFile.Count() > 2 && !hasFundamentalDataForDate)
                            {
                                var previousTicker = mapFile.LastOrDefault(m => m.Date < date)?.MappedSymbol;
                                if (previousTicker != null)
                                {
                                    var previousTickerFineFundamentalFolder = Path.Combine(fineFundamentalFolder, previousTicker);
                                    if (Directory.Exists(previousTickerFineFundamentalFolder))
                                    {
                                        var previousTickerFineAvailableDates = Directory.GetFiles(previousTickerFineFundamentalFolder, "*.zip")
                                                                               .Select(f => DateTime.ParseExact(Path.GetFileNameWithoutExtension(f), DateFormat.EightCharacter, CultureInfo.InvariantCulture))
                                                                               .ToList();
                                        hasFundamentalDataForDate = previousTickerFineAvailableDates.Where(d => d >= firstDate).Any(d => date.AddMonths(-1) <= d && d <= date);
                                    }
                                }
                            }

                            // sid,symbol,close,volume,dollar volume,has fundamental data,price factor,split factor
                            var coarseFileLine = string.Format(
                                CultureInfo.InvariantCulture,
                                "{0},{1},{2},{3},{4},{5},{6},{7}",
                                sid,
                                ticker,
                                close,
                                volume,
                                Math.Truncate(dollarVolume),
                                hasFundamentalDataForDate,
                                priceFactor,
                                splitFactor);

                            StreamWriter writer;
                            if (!writers.TryGetValue(coarseFile, out writer))
                            {
                                writer = new StreamWriter(new FileStream(coarseFile, FileMode.Create, FileAccess.Write, FileShare.Write));
                                writers[coarseFile] = writer;
                            }
                            writer.WriteLine(coarseFileLine);
                        }
                    }

                    if (symbols % 1000 == 0)
                    {
                        Log.Trace($"CoarseGenerator.ProcessDailyFolder(): Completed processing {symbols} symbols. Current elapsed: {(DateTime.UtcNow - start).TotalSeconds.ToStringInvariant("0.00")} seconds");
                    }
                }
                catch (Exception err)
                {
                    // log the error and continue with the process
                    Log.Error(err.ToString());
                }
            }

            Log.Trace("CoarseGenerator.ProcessDailyFolder(): Saving {0} coarse files to disk", dates.Count);

            // dispose all the writers at the end of processing
            foreach (var writer in writers)
            {
                writer.Value.Dispose();
            }

            var stop = DateTime.UtcNow;

            Log.Trace($"CoarseGenerator.ProcessDailyFolder(): Processed {symbols} symbols into {dates.Count} coarse files in {(stop - start).TotalSeconds.ToStringInvariant("0.00")} seconds");
            Log.Trace($"CoarseGenerator.ProcessDailyFolder(): Excluded {maplessCount} mapless symbols.");

            return(writers.Keys);
        }
Ejemplo n.º 14
0
        /// <summary>
        /// Iterates each daily file in the specified <paramref name="dailyFolder"/> and adds a line for each
        /// day to the approriate coarse file
        /// </summary>
        /// <param name="dailyFolder">The folder with daily data</param>
        /// <param name="coarseFolder">The coarse output folder</param>
        /// <param name="mapFileResolver"></param>
        /// <param name="exclusions">The symbols to be excluded from processing</param>
        /// <param name="ignoreMapless">Ignore the symbols without a map file.</param>
        /// <param name="symbolResolver">Function used to provide symbol resolution. Default resolution uses the zip file name to resolve
        /// the symbol, specify null for this behavior.</param>
        /// <returns>A collection of the generated coarse files</returns>
        public static ICollection <string> ProcessDailyFolder(string dailyFolder, string coarseFolder, MapFileResolver mapFileResolver, HashSet <string> exclusions, bool ignoreMapless, Func <string, string> symbolResolver = null)
        {
            const decimal scaleFactor = 10000m;

            Log.Trace("Processing: {0}", dailyFolder);

            var stopwatch = Stopwatch.StartNew();

            // load map files into memory

            var symbols      = 0;
            var maplessCount = 0;
            var dates        = new HashSet <DateTime>();

            // instead of opening/closing these constantly, open them once and dispose at the end (~3x speed improvement)
            var writers = new Dictionary <string, StreamWriter>();

            // open up each daily file to get the values and append to the daily coarse files
            foreach (var file in Directory.EnumerateFiles(dailyFolder))
            {
                try
                {
                    var symbol = Path.GetFileNameWithoutExtension(file);
                    if (symbol == null)
                    {
                        Log.Trace("CoarseGenerator.ProcessDailyFolder(): Unable to resolve symbol from file: {0}", file);
                        continue;
                    }

                    if (symbolResolver != null)
                    {
                        symbol = symbolResolver(symbol);
                    }

                    symbol = symbol.ToUpper();

                    if (exclusions.Contains(symbol))
                    {
                        Log.Trace("Excluded symbol: {0}", symbol);
                        continue;
                    }

                    ZipFile zip;
                    using (var reader = Compression.Unzip(file, out zip))
                    {
                        // 30 period EMA constant
                        const decimal k = 2m / (30 + 1);

                        var seeded = false;
                        var runningAverageVolume = 0m;

                        var checkedForMapFile = false;

                        symbols++;
                        string line;
                        while ((line = reader.ReadLine()) != null)
                        {
                            //20150625.csv
                            var csv  = line.Split(',');
                            var date = DateTime.ParseExact(csv[0], DateFormat.TwelveCharacter, CultureInfo.InvariantCulture);

                            if (ignoreMapless && !checkedForMapFile)
                            {
                                checkedForMapFile = true;
                                if (!mapFileResolver.ResolveMapFile(symbol, date).Any())
                                {
                                    // if the resolved map file has zero entries then it's a mapless symbol
                                    maplessCount++;
                                    break;
                                }
                            }

                            var close  = decimal.Parse(csv[4]) / scaleFactor;
                            var volume = long.Parse(csv[5]);

                            // compute the current volume EMA for dollar volume calculations
                            runningAverageVolume = seeded
                                ? volume * k + runningAverageVolume * (1 - k)
                                : volume;

                            seeded = true;

                            var dollarVolume = close * runningAverageVolume;

                            var coarseFile = Path.Combine(coarseFolder, date.ToString("yyyyMMdd") + ".csv");
                            dates.Add(date);

                            // try to resolve a map file and if found, use the permtick as the symbol
                            var sid     = symbol;
                            var mapFile = mapFileResolver.ResolveMapFile(sid, date);
                            if (mapFile != null)
                            {
                                // if available, us the permtick in the coarse files, because of this, we need
                                // to update the coarse files each time new map files are added/permticks change
                                sid = mapFile.Permtick;
                            }
                            if (mapFile == null && ignoreMapless)
                            {
                                // if we're ignoring mapless files then we should always be able to resolve this
                                Log.Error(string.Format("CoarseGenerator.ProcessDailyFolder(): Unable to resolve map file for {0} as of {1}", symbol, date.ToShortDateString()));
                                continue;
                            }

                            // sid,symbol,close,volume,dollar volume
                            var coarseFileLine = sid + "," + symbol + "," + close + "," + volume + "," + Math.Truncate(dollarVolume);

                            StreamWriter writer;
                            if (!writers.TryGetValue(coarseFile, out writer))
                            {
                                writer = new StreamWriter(new FileStream(coarseFile, FileMode.Append, FileAccess.Write, FileShare.Write));
                                writers[coarseFile] = writer;
                            }
                            writer.WriteLine(coarseFileLine);
                        }
                    }

                    if (symbols % 1000 == 0)
                    {
                        Log.Trace("CoarseGenerator.ProcessDailyFolder(): Completed processing {0} symbols. Current elapsed: {1} seconds", symbols, stopwatch.Elapsed.TotalSeconds.ToString("0.00"));
                    }
                }
                catch (Exception err)
                {
                    // log the error and continue with the process
                    Log.Error(err.ToString());
                }
            }

            Log.Trace("CoarseGenerator.ProcessDailyFolder(): Saving {0} coarse files to disk", dates.Count);

            // dispose all the writers at the end of processing
            foreach (var writer in writers)
            {
                writer.Value.Dispose();
            }

            stopwatch.Stop();

            Log.Trace("CoarseGenerator.ProcessDailyFolder(): Processed {0} symbols into {1} coarse files in {2} seconds", symbols, dates.Count, stopwatch.Elapsed.TotalSeconds.ToString("0.00"));
            Log.Trace("CoarseGenerator.ProcessDailyFolder(): Excluded {0} mapless symbols.", maplessCount);

            return(writers.Keys);
        }