Esempio n. 1
0
        private static void ComputeMetrics(string configFileForTest, ModelMetricTypes metricsType)
        {
            // Compute metrics.
            bool needComputePerplexity = (metricsType == ModelMetricTypes.Intr ||
                                          metricsType == ModelMetricTypes.Both);
            var tuner = new LDAParameterTuner("getmetrics", TestSampleName, configFileForTest, "", needComputePerplexity);

            tuner.Run();

            ConsoleColor color;

            FileManager.DeleteFile(Path.Combine(Path.GetDirectoryName(configFileForTest), @"build\DocumentTopicAllocations.txt"), out color);
        }
Esempio n. 2
0
        public static bool HaveMetricsBeenComputed(string configFileForTest, string testSampleName, ModelMetricTypes metricsType)
        {
            if (metricsType == ModelMetricTypes.Both)
            {
                return(HaveMetricsBeenComputed(configFileForTest, testSampleName, ModelMetricTypes.Intr) &&
                       HaveMetricsBeenComputed(configFileForTest, testSampleName, ModelMetricTypes.Extr));
            }

            // Get model directory
            string modelDir = Path.GetDirectoryName(configFileForTest);

            // Get metrics file path
            string metricsFile = "";

            if (metricsType == ModelMetricTypes.Intr)
            {
                metricsFile = Path.Combine(modelDir, string.Format(@"build\{0}.Perplexity.txt", testSampleName));
            }
            else if (metricsType == ModelMetricTypes.Extr)
            {
                metricsFile = Path.Combine(modelDir, @"build\ExtrinsicMetrics.tsv");
            }

            if (!File.Exists(metricsFile))
            {
                return(false);
            }

            // file is locked for writing - important for multi-threading,
            // to avoid potential conflict with another thread that might be computing metrics for the same model.
            if (FileManager.IsFileLocked(metricsFile))
            {
                return(true);
            }

            long length = FileManager.GetFileLength(metricsFile);

            return(length > 0L);
        }
Esempio n. 3
0
        public static void Main(string[] args)
        {
            if (args.Length < 2)
            {
                System.Console.WriteLine("Usage: WrapperForFastLDATuning.exe <WorkerRole> <NumOfThreads> [MetricsType]");
                System.Console.WriteLine(DetailUsage);
                Environment.Exit(1);
            }

            if (!Enum.TryParse(args[0], true, out WorkerRole))
            {
                StatusMessage.Write("Unrecognized value for worker role. Exiting...");
                return;
            }

            if (!Int32.TryParse(args[1], out NumOfThreads))
            {
                StatusMessage.Write("Invalid value for number of threads. Exiting...");
                return;
            }

            if (WorkerRole == WorkerRoles.Metrics)
            {
                if (args.Length < 3)
                {
                    // By default we compute both intrinsic and extrinsic metrics.
                    MetricsType = ModelMetricTypes.Both;
                }
                else if (!Enum.TryParse(args[2], true, out MetricsType))
                {
                    StatusMessage.Write("Unrecognized value for metrics type. Exiting...");
                    return;
                }

                if (args.Length > 3)
                {
                    RunAllMetrics = args[3].Equals("all", StringComparison.OrdinalIgnoreCase);
                }
            }

            Initialize();

            // Load default LDA config as seed
            var defaultLDAConfig = LoadLDAConfig(DefaultModelConfigFile);

            // Get the folders of parameter range files and training config files.
            // Examples of folder structure: d:\ModelRepository\TuneLDAParameters\RangesOfParams
            //                               d:\ModelRepository\TuneLDAParameters\LDALearningConfigFiles
            string folderOfParamRangeFiles     = Path.Combine(ModelRepositoryPath, ProjResourceSubFolder, RangeOfParamsSubFolder);
            string folderOfTrainingConfigFiles = Path.Combine(ModelRepositoryPath, ProjResourceSubFolder, LDALearningConfigsSubFolder);

            List <string> listOfLDAConfigFilesForFeaturization = new List <string>();
            List <string> listOfLDAConfigFilesForTest          = new List <string>();

            // Generate multiple LDA configs for featurization, training and test.
            List <string> listOfLDAConfigFilesForTrain;

            if (NeedLoadLDAConfigFiles)
            {
                listOfLDAConfigFilesForTrain =
                    LDAConfigFileGenerator.LoadLDAConfigFiles(ModelRepositoryPath,
                                                              TrainingSampleName,
                                                              defaultLDAConfig,
                                                              SourceFolderOfLDAConfigFiles,
                                                              folderOfTrainingConfigFiles,
                                                              ref listOfLDAConfigFilesForFeaturization,
                                                              ref listOfLDAConfigFilesForTest);
            }
            else if (NeedLoadLDAParameterTable)
            {
                listOfLDAConfigFilesForTrain =
                    LDAConfigFileGenerator.LoadLDAParameterTable(ModelRepositoryPath,
                                                                 TrainingSampleName,
                                                                 defaultLDAConfig,
                                                                 LDAParameterTablePath,
                                                                 folderOfTrainingConfigFiles,
                                                                 ref listOfLDAConfigFilesForFeaturization,
                                                                 ref listOfLDAConfigFilesForTest);
            }
            else
            {
                listOfLDAConfigFilesForTrain =
                    LDAConfigFileGenerator.GenerateLDAConfigFiles(ModelRepositoryPath,
                                                                  folderOfParamRangeFiles,
                                                                  TrainingSampleName,
                                                                  defaultLDAConfig,
                                                                  folderOfTrainingConfigFiles,
                                                                  ref listOfLDAConfigFilesForFeaturization,
                                                                  ref listOfLDAConfigFilesForTest);
            }

            switch (WorkerRole)
            {
            case WorkerRoles.Training:
                FeaturizeSample(listOfLDAConfigFilesForFeaturization, TrainingSampleName, NumOfThreads);
                if (NeedCopyToRemote)
                {
                    CopyVocabularies(listOfLDAConfigFilesForTest.First(), RemoteModelRepositoryPath);

                    // Start a thread that:
                    // 1). monitors model directory;
                    // 2). copies models to remote model repository when they are done;
                    // 3). deletes them once copy is successful.
                    Thread newThread = new Thread(Program.CopyModels);
                    newThread.Start(NeedDeleteFromLocal);
                }
                TrainLDAModels(listOfLDAConfigFilesForTrain, listOfLDAConfigFilesForTest, NumOfThreads);
                if (NeedCopyToRemote)
                {
                    WaitForCopyThread();
                }
                break;

            case WorkerRoles.Metrics:
                if (RunAllMetrics)
                {
                    long numOfModelsMeasured = 0;
                    ComputeMetrics(listOfLDAConfigFilesForTest, NumOfThreads, MetricsType, ref numOfModelsMeasured);
                    break;
                }

                // Get common parent of individual model directories.
                string commonParentOfModelDirectories = FileManager.GetGrandparentOfFilePath(listOfLDAConfigFilesForTest.First());
                ComputeMetrics(commonParentOfModelDirectories, NumOfThreads, MetricsType);
                break;

            default:
                return;
            }

            StatusMessage.Write("Done!");
        }
Esempio n. 4
0
        /// <summary>
        /// Compute metrics for each config file within a group.
        /// </summary>
        /// <param name="groupOfConfigFiles"></param>
        private static void ComputeMetrics(IGrouping <object, string> groupOfConfigFiles, ModelMetricTypes metricsType, int numOfThreadsPerGroup = 1)
        {
            long count      = 0;
            int  totalCount = groupOfConfigFiles.Count();

            Parallel.ForEach(groupOfConfigFiles, new ParallelOptions {
                MaxDegreeOfParallelism = numOfThreadsPerGroup
            },
                             configFile =>
            {
                Interlocked.Increment(ref count);
                StatusMessage.Write(string.Format("Computing metrics for model #{0} of {1} within group {2},\r\n\tunder {3}\r\n",
                                                  Interlocked.Read(ref count),
                                                  totalCount,
                                                  groupOfConfigFiles.Key,
                                                  Path.GetDirectoryName(configFile)));

                ComputeMetrics(configFile, metricsType);
            });
        }
Esempio n. 5
0
        private static void ComputeMetrics(List <string> configFilesReady, int numOfThreads, ModelMetricTypes metricsType, ref long totalNumOfModelsMeasured)
        {
            long numOfModelsMeasured = Interlocked.Read(ref totalNumOfModelsMeasured);

            // Group LDAConfig files by featurization parameters, i.e. <min, max>.
            var groupsOfLDAConfigFiles = configFilesReady.GroupBy(f => new
            {
                ExtractFeaturizationParameters(f).MinWordDocumentFrequency,
                ExtractFeaturizationParameters(f).MaxRalativeWordDocumentFrequency
            }).ToArray();

            // Compute metrics for very first item to generate DocumentVocabularies in case of need.
            ComputeMetrics(groupsOfLDAConfigFiles.First().First(), metricsType);

            int numOfThreadsPerGroup = (int)(Math.Ceiling((decimal)numOfThreads / groupsOfLDAConfigFiles.Length));

            // Run multi-threading over different groups, then run single-thread within each group.
            Parallel.ForEach(groupsOfLDAConfigFiles, new ParallelOptions {
                MaxDegreeOfParallelism = numOfThreads
            },
                             groupOfConfigFiles =>
            {
                long groupSize = groupOfConfigFiles.Count();
                ComputeMetrics(groupOfConfigFiles, metricsType, numOfThreadsPerGroup);

                Interlocked.Add(ref numOfModelsMeasured, groupSize);
                StatusMessage.Write(
                    string.Format("Metrics of model #{0} computed.", Interlocked.Read(ref numOfModelsMeasured)),
                    ConsoleColor.Green);
            });
        }
Esempio n. 6
0
        private static void ComputeMetrics(string commonParentOfModelDirs, int numOfThreads, ModelMetricTypes metricsType)
        {
            long i                   = 0;
            long oneHour             = 60 * 60 * 1000L;
            long numOfModelsMeasured = 0;

            while (true)
            {
                int numOfModelsDemanded = 0;
                // Find all LDAConfig files (.json)
                List <string> listOfLDAConfigFilesForTest = FileManager.SearchFileInDir(commonParentOfModelDirs, "*.LDAConfig.json");
                do
                {
                    // Find the models that are ready and need to compute metrics.
                    var configFilesReady = listOfLDAConfigFilesForTest.Where(configFile =>
                                                                             LDAModelStatusChecker.AreModelFilesReady(configFile) &&
                                                                             !LDAModelStatusChecker.HaveMetricsBeenComputed(configFile, TestSampleName, metricsType)).ToList();

                    if (configFilesReady.Count() == 0)
                    {
                        Thread.Sleep(1);
                        continue;
                    }

                    ComputeMetrics(configFilesReady, numOfThreads, metricsType, ref numOfModelsMeasured);

                    Thread.Sleep(1);

                    // Find the number of models that demand computation of metrics.
                    numOfModelsDemanded = listOfLDAConfigFilesForTest.Count(configFile => !LDAModelStatusChecker.HaveMetricsBeenComputed(configFile, TestSampleName, metricsType));
                } while (numOfModelsDemanded > 0);

                Thread.Sleep(1);
                if (i++ % oneHour == 0)
                {
                    // Display the message when the metrics thread has been idle for every hour.
                    StatusMessage.Write("Waiting for models to be ready...");
                }
            }
        }