private static void ComputeMetrics(string configFileForTest, ModelMetricTypes metricsType) { // Compute metrics. bool needComputePerplexity = (metricsType == ModelMetricTypes.Intr || metricsType == ModelMetricTypes.Both); var tuner = new LDAParameterTuner("getmetrics", TestSampleName, configFileForTest, "", needComputePerplexity); tuner.Run(); ConsoleColor color; FileManager.DeleteFile(Path.Combine(Path.GetDirectoryName(configFileForTest), @"build\DocumentTopicAllocations.txt"), out color); }
public static bool HaveMetricsBeenComputed(string configFileForTest, string testSampleName, ModelMetricTypes metricsType) { if (metricsType == ModelMetricTypes.Both) { return(HaveMetricsBeenComputed(configFileForTest, testSampleName, ModelMetricTypes.Intr) && HaveMetricsBeenComputed(configFileForTest, testSampleName, ModelMetricTypes.Extr)); } // Get model directory string modelDir = Path.GetDirectoryName(configFileForTest); // Get metrics file path string metricsFile = ""; if (metricsType == ModelMetricTypes.Intr) { metricsFile = Path.Combine(modelDir, string.Format(@"build\{0}.Perplexity.txt", testSampleName)); } else if (metricsType == ModelMetricTypes.Extr) { metricsFile = Path.Combine(modelDir, @"build\ExtrinsicMetrics.tsv"); } if (!File.Exists(metricsFile)) { return(false); } // file is locked for writing - important for multi-threading, // to avoid potential conflict with another thread that might be computing metrics for the same model. if (FileManager.IsFileLocked(metricsFile)) { return(true); } long length = FileManager.GetFileLength(metricsFile); return(length > 0L); }
public static void Main(string[] args) { if (args.Length < 2) { System.Console.WriteLine("Usage: WrapperForFastLDATuning.exe <WorkerRole> <NumOfThreads> [MetricsType]"); System.Console.WriteLine(DetailUsage); Environment.Exit(1); } if (!Enum.TryParse(args[0], true, out WorkerRole)) { StatusMessage.Write("Unrecognized value for worker role. Exiting..."); return; } if (!Int32.TryParse(args[1], out NumOfThreads)) { StatusMessage.Write("Invalid value for number of threads. Exiting..."); return; } if (WorkerRole == WorkerRoles.Metrics) { if (args.Length < 3) { // By default we compute both intrinsic and extrinsic metrics. MetricsType = ModelMetricTypes.Both; } else if (!Enum.TryParse(args[2], true, out MetricsType)) { StatusMessage.Write("Unrecognized value for metrics type. Exiting..."); return; } if (args.Length > 3) { RunAllMetrics = args[3].Equals("all", StringComparison.OrdinalIgnoreCase); } } Initialize(); // Load default LDA config as seed var defaultLDAConfig = LoadLDAConfig(DefaultModelConfigFile); // Get the folders of parameter range files and training config files. // Examples of folder structure: d:\ModelRepository\TuneLDAParameters\RangesOfParams // d:\ModelRepository\TuneLDAParameters\LDALearningConfigFiles string folderOfParamRangeFiles = Path.Combine(ModelRepositoryPath, ProjResourceSubFolder, RangeOfParamsSubFolder); string folderOfTrainingConfigFiles = Path.Combine(ModelRepositoryPath, ProjResourceSubFolder, LDALearningConfigsSubFolder); List <string> listOfLDAConfigFilesForFeaturization = new List <string>(); List <string> listOfLDAConfigFilesForTest = new List <string>(); // Generate multiple LDA configs for featurization, training and test. List <string> listOfLDAConfigFilesForTrain; if (NeedLoadLDAConfigFiles) { listOfLDAConfigFilesForTrain = LDAConfigFileGenerator.LoadLDAConfigFiles(ModelRepositoryPath, TrainingSampleName, defaultLDAConfig, SourceFolderOfLDAConfigFiles, folderOfTrainingConfigFiles, ref listOfLDAConfigFilesForFeaturization, ref listOfLDAConfigFilesForTest); } else if (NeedLoadLDAParameterTable) { listOfLDAConfigFilesForTrain = LDAConfigFileGenerator.LoadLDAParameterTable(ModelRepositoryPath, TrainingSampleName, defaultLDAConfig, LDAParameterTablePath, folderOfTrainingConfigFiles, ref listOfLDAConfigFilesForFeaturization, ref listOfLDAConfigFilesForTest); } else { listOfLDAConfigFilesForTrain = LDAConfigFileGenerator.GenerateLDAConfigFiles(ModelRepositoryPath, folderOfParamRangeFiles, TrainingSampleName, defaultLDAConfig, folderOfTrainingConfigFiles, ref listOfLDAConfigFilesForFeaturization, ref listOfLDAConfigFilesForTest); } switch (WorkerRole) { case WorkerRoles.Training: FeaturizeSample(listOfLDAConfigFilesForFeaturization, TrainingSampleName, NumOfThreads); if (NeedCopyToRemote) { CopyVocabularies(listOfLDAConfigFilesForTest.First(), RemoteModelRepositoryPath); // Start a thread that: // 1). monitors model directory; // 2). copies models to remote model repository when they are done; // 3). deletes them once copy is successful. Thread newThread = new Thread(Program.CopyModels); newThread.Start(NeedDeleteFromLocal); } TrainLDAModels(listOfLDAConfigFilesForTrain, listOfLDAConfigFilesForTest, NumOfThreads); if (NeedCopyToRemote) { WaitForCopyThread(); } break; case WorkerRoles.Metrics: if (RunAllMetrics) { long numOfModelsMeasured = 0; ComputeMetrics(listOfLDAConfigFilesForTest, NumOfThreads, MetricsType, ref numOfModelsMeasured); break; } // Get common parent of individual model directories. string commonParentOfModelDirectories = FileManager.GetGrandparentOfFilePath(listOfLDAConfigFilesForTest.First()); ComputeMetrics(commonParentOfModelDirectories, NumOfThreads, MetricsType); break; default: return; } StatusMessage.Write("Done!"); }
/// <summary> /// Compute metrics for each config file within a group. /// </summary> /// <param name="groupOfConfigFiles"></param> private static void ComputeMetrics(IGrouping <object, string> groupOfConfigFiles, ModelMetricTypes metricsType, int numOfThreadsPerGroup = 1) { long count = 0; int totalCount = groupOfConfigFiles.Count(); Parallel.ForEach(groupOfConfigFiles, new ParallelOptions { MaxDegreeOfParallelism = numOfThreadsPerGroup }, configFile => { Interlocked.Increment(ref count); StatusMessage.Write(string.Format("Computing metrics for model #{0} of {1} within group {2},\r\n\tunder {3}\r\n", Interlocked.Read(ref count), totalCount, groupOfConfigFiles.Key, Path.GetDirectoryName(configFile))); ComputeMetrics(configFile, metricsType); }); }
private static void ComputeMetrics(List <string> configFilesReady, int numOfThreads, ModelMetricTypes metricsType, ref long totalNumOfModelsMeasured) { long numOfModelsMeasured = Interlocked.Read(ref totalNumOfModelsMeasured); // Group LDAConfig files by featurization parameters, i.e. <min, max>. var groupsOfLDAConfigFiles = configFilesReady.GroupBy(f => new { ExtractFeaturizationParameters(f).MinWordDocumentFrequency, ExtractFeaturizationParameters(f).MaxRalativeWordDocumentFrequency }).ToArray(); // Compute metrics for very first item to generate DocumentVocabularies in case of need. ComputeMetrics(groupsOfLDAConfigFiles.First().First(), metricsType); int numOfThreadsPerGroup = (int)(Math.Ceiling((decimal)numOfThreads / groupsOfLDAConfigFiles.Length)); // Run multi-threading over different groups, then run single-thread within each group. Parallel.ForEach(groupsOfLDAConfigFiles, new ParallelOptions { MaxDegreeOfParallelism = numOfThreads }, groupOfConfigFiles => { long groupSize = groupOfConfigFiles.Count(); ComputeMetrics(groupOfConfigFiles, metricsType, numOfThreadsPerGroup); Interlocked.Add(ref numOfModelsMeasured, groupSize); StatusMessage.Write( string.Format("Metrics of model #{0} computed.", Interlocked.Read(ref numOfModelsMeasured)), ConsoleColor.Green); }); }
private static void ComputeMetrics(string commonParentOfModelDirs, int numOfThreads, ModelMetricTypes metricsType) { long i = 0; long oneHour = 60 * 60 * 1000L; long numOfModelsMeasured = 0; while (true) { int numOfModelsDemanded = 0; // Find all LDAConfig files (.json) List <string> listOfLDAConfigFilesForTest = FileManager.SearchFileInDir(commonParentOfModelDirs, "*.LDAConfig.json"); do { // Find the models that are ready and need to compute metrics. var configFilesReady = listOfLDAConfigFilesForTest.Where(configFile => LDAModelStatusChecker.AreModelFilesReady(configFile) && !LDAModelStatusChecker.HaveMetricsBeenComputed(configFile, TestSampleName, metricsType)).ToList(); if (configFilesReady.Count() == 0) { Thread.Sleep(1); continue; } ComputeMetrics(configFilesReady, numOfThreads, metricsType, ref numOfModelsMeasured); Thread.Sleep(1); // Find the number of models that demand computation of metrics. numOfModelsDemanded = listOfLDAConfigFilesForTest.Count(configFile => !LDAModelStatusChecker.HaveMetricsBeenComputed(configFile, TestSampleName, metricsType)); } while (numOfModelsDemanded > 0); Thread.Sleep(1); if (i++ % oneHour == 0) { // Display the message when the metrics thread has been idle for every hour. StatusMessage.Write("Waiting for models to be ready..."); } } }