private static void ComputeMetrics(string configFileForTest, ModelMetricTypes metricsType) { // Compute metrics. bool needComputePerplexity = (metricsType == ModelMetricTypes.Intr || metricsType == ModelMetricTypes.Both); var tuner = new LDAParameterTuner("getmetrics", TestSampleName, configFileForTest, "", needComputePerplexity); tuner.Run(); ConsoleColor color; FileManager.DeleteFile(Path.Combine(Path.GetDirectoryName(configFileForTest), @"build\DocumentTopicAllocations.txt"), out color); }
/// <summary> /// Compute corpus vocabulary, document vocabularies, and featurized documents /// for each combination of min/max(relative) word document frequency. /// </summary> /// <param name="listOfLDAConfigFilesForFeaturization">a list of LDAConfig files for featurization.</param> /// <param name="sampleName">the name of the sample to be featurized</param> private static void FeaturizeSample(List <string> listOfLDAConfigFilesForFeaturization, string sampleName, int numOfThreads) { var tuner = new LDAParameterTuner("featurizedocs", sampleName, listOfLDAConfigFilesForFeaturization.First()); tuner.Run(); Parallel.ForEach(listOfLDAConfigFilesForFeaturization.Skip(1), new ParallelOptions { MaxDegreeOfParallelism = numOfThreads * 2 / 3 }, config => { tuner = new LDAParameterTuner("featurizedocs", sampleName, config); tuner.Run(); }); }
/// <summary> /// Generate doc vector given a LDA config file. /// </summary> /// <param name="configFileForTest"></param> private static void GenerateDV(string configFileForTest) { // Run DvGen for training corpus. LDAConfig ldaConfig; try { ldaConfig = JsonConvert.DeserializeObject <LDAConfig>(File.ReadAllText(configFileForTest)); } catch (Exception) { throw; } string corpusPrefix = Path.Combine(ModelRepositoryPath, string.Format(@"Corpora\{0}\{1}", ldaConfig.Locale, ldaConfig.Corpus)); string sampleFileFullPath = Path.Combine(corpusPrefix, TrainingSampleName); var tuner = new LDAParameterTuner("generatedocvectors", sampleFileFullPath, configFileForTest); tuner.Run(); // Copy dv to model directory. string src = Path.Combine(corpusPrefix, string.Format("{0}.dv", TrainingSampleName), TrainingSampleName + "." + ldaConfig.modelName, "DocumentVectors.L1.dv"); string dest = ldaConfig.DocumentTopicAllocations; StatusMessage.Write(string.Format("Waiting DV file ready: {0}", src)); do { Thread.Sleep(5); } while (!File.Exists(src) || FileManager.IsFileLocked(src)); StatusMessage.Write(string.Format("DV file ready. Copying from {0} to {1}", src, dest)); try { File.Copy(src, dest, true); } catch (Exception) { throw; } StatusMessage.Write(string.Format("Successfully copied file\r\n<---{0}\r\n--->{1}", src, dest)); }
private static void TrainLDAModels(List <string> listOfLDAConfigFilesForTrain, List <string> listOfLDAConfigFilesForTest, int numOfThreads) { // Generate a list of indexes // for accessing each element of listOfLDAConfigFilesForTraining and listOfLDAConfigFilesForTest List <int> indexes = Enumerable.Range(0, listOfLDAConfigFilesForTrain.Count).ToList(); long totalNumber = listOfLDAConfigFilesForTrain.Count; long numOfModelsTrained = 0; // Spawn multiple threads for training Parallel.ForEach(indexes, new ParallelOptions { MaxDegreeOfParallelism = numOfThreads }, index => { string configFileForTrain = listOfLDAConfigFilesForTrain[index]; string configFileForTest = listOfLDAConfigFilesForTest[index]; var tuner = new LDAParameterTuner("learnlda", TrainingSampleName, configFileForTrain, configFileForTest); int retVal = tuner.Run(); if (retVal == 0) { // Push the configFileForTest into queue once the model training is done. configFileQueue.Enqueue(configFileForTest); Interlocked.Increment(ref numOfModelsTrained); StatusMessage.Write(string.Format("Model #{0} out of {1} has been trained.", Interlocked.Read(ref numOfModelsTrained), totalNumber), ConsoleColor.Green); if (NeedDeleteConfig) { ConsoleColor color; var message = FileManager.DeleteFile(configFileForTrain, out color); StatusMessage.Write(message); } } else { StatusMessage.Write("Learn LDA failed! config = " + configFileForTrain, ConsoleColor.Red); } }); // Tell the copy thread that no more models to be generated. Interlocked.Decrement(ref flag); }