예제 #1
0
파일: Program.cs 프로젝트: oswaldor/Plume
        private static void ComputeMetrics(string configFileForTest, ModelMetricTypes metricsType)
        {
            // Compute metrics.
            bool needComputePerplexity = (metricsType == ModelMetricTypes.Intr ||
                                          metricsType == ModelMetricTypes.Both);
            var tuner = new LDAParameterTuner("getmetrics", TestSampleName, configFileForTest, "", needComputePerplexity);

            tuner.Run();

            ConsoleColor color;

            FileManager.DeleteFile(Path.Combine(Path.GetDirectoryName(configFileForTest), @"build\DocumentTopicAllocations.txt"), out color);
        }
예제 #2
0
파일: Program.cs 프로젝트: oswaldor/Plume
        /// <summary>
        /// Compute corpus vocabulary, document vocabularies, and featurized documents
        /// for each combination of min/max(relative) word document frequency.
        /// </summary>
        /// <param name="listOfLDAConfigFilesForFeaturization">a list of LDAConfig files for featurization.</param>
        /// <param name="sampleName">the name of the sample to be featurized</param>
        private static void FeaturizeSample(List <string> listOfLDAConfigFilesForFeaturization, string sampleName, int numOfThreads)
        {
            var tuner = new LDAParameterTuner("featurizedocs", sampleName, listOfLDAConfigFilesForFeaturization.First());

            tuner.Run();

            Parallel.ForEach(listOfLDAConfigFilesForFeaturization.Skip(1), new ParallelOptions {
                MaxDegreeOfParallelism = numOfThreads * 2 / 3
            }, config =>
            {
                tuner = new LDAParameterTuner("featurizedocs", sampleName, config);
                tuner.Run();
            });
        }
예제 #3
0
파일: Program.cs 프로젝트: oswaldor/Plume
        /// <summary>
        /// Generate doc vector given a LDA config file.
        /// </summary>
        /// <param name="configFileForTest"></param>
        private static void GenerateDV(string configFileForTest)
        {
            // Run DvGen for training corpus.
            LDAConfig ldaConfig;

            try
            {
                ldaConfig = JsonConvert.DeserializeObject <LDAConfig>(File.ReadAllText(configFileForTest));
            }
            catch (Exception)
            {
                throw;
            }
            string corpusPrefix       = Path.Combine(ModelRepositoryPath, string.Format(@"Corpora\{0}\{1}", ldaConfig.Locale, ldaConfig.Corpus));
            string sampleFileFullPath = Path.Combine(corpusPrefix, TrainingSampleName);
            var    tuner = new LDAParameterTuner("generatedocvectors", sampleFileFullPath, configFileForTest);

            tuner.Run();

            // Copy dv to model directory.
            string src = Path.Combine(corpusPrefix,
                                      string.Format("{0}.dv", TrainingSampleName),
                                      TrainingSampleName + "." + ldaConfig.modelName,
                                      "DocumentVectors.L1.dv");
            string dest = ldaConfig.DocumentTopicAllocations;

            StatusMessage.Write(string.Format("Waiting DV file ready: {0}", src));
            do
            {
                Thread.Sleep(5);
            } while (!File.Exists(src) || FileManager.IsFileLocked(src));

            StatusMessage.Write(string.Format("DV file ready. Copying from {0} to {1}", src, dest));
            try
            {
                File.Copy(src, dest, true);
            }
            catch (Exception)
            {
                throw;
            }

            StatusMessage.Write(string.Format("Successfully copied file\r\n<---{0}\r\n--->{1}", src, dest));
        }
예제 #4
0
파일: Program.cs 프로젝트: oswaldor/Plume
        private static void TrainLDAModels(List <string> listOfLDAConfigFilesForTrain, List <string> listOfLDAConfigFilesForTest, int numOfThreads)
        {
            // Generate a list of indexes
            // for accessing each element of listOfLDAConfigFilesForTraining and listOfLDAConfigFilesForTest
            List <int> indexes = Enumerable.Range(0, listOfLDAConfigFilesForTrain.Count).ToList();

            long totalNumber        = listOfLDAConfigFilesForTrain.Count;
            long numOfModelsTrained = 0;

            // Spawn multiple threads for training
            Parallel.ForEach(indexes, new ParallelOptions {
                MaxDegreeOfParallelism = numOfThreads
            }, index =>
            {
                string configFileForTrain = listOfLDAConfigFilesForTrain[index];
                string configFileForTest  = listOfLDAConfigFilesForTest[index];
                var tuner  = new LDAParameterTuner("learnlda", TrainingSampleName, configFileForTrain, configFileForTest);
                int retVal = tuner.Run();

                if (retVal == 0)
                {
                    // Push the configFileForTest into queue once the model training is done.
                    configFileQueue.Enqueue(configFileForTest);

                    Interlocked.Increment(ref numOfModelsTrained);
                    StatusMessage.Write(string.Format("Model #{0} out of {1} has been trained.", Interlocked.Read(ref numOfModelsTrained), totalNumber), ConsoleColor.Green);

                    if (NeedDeleteConfig)
                    {
                        ConsoleColor color;
                        var message = FileManager.DeleteFile(configFileForTrain, out color);
                        StatusMessage.Write(message);
                    }
                }
                else
                {
                    StatusMessage.Write("Learn LDA failed! config = " + configFileForTrain, ConsoleColor.Red);
                }
            });

            // Tell the copy thread that no more models to be generated.
            Interlocked.Decrement(ref flag);
        }