Exemple #1
0
        /// <summary>
        /// Adds a row to the list of Models  (in the ModelsDb) if none exists and returns an object of type ModelDatabase,
        /// representing the new model.  In the process creates a new db to hold the new model's data.  If thes db has already been created it simply opens a connection to it.
        /// </summary>
        /// <param name="sqlServer"></param>
        /// <param name="modelsDbName"></param>
        /// <param name="ldaConfig"></param>
        /// <returns></returns>
        private static ModelDatabase AddModelParametersToModelsDb(ModelsDb modelsDb, LDAConfig ldaConfig, string modelRepositoryPath, ref bool success)
        {
            var metrics = ExtractModelMetrics(ref ldaConfig, modelRepositoryPath);

            if (metrics == null)
            {
                return(null);
            }

            string modelDbName;
            int    modelId;

            try
            {
                StatusMessage.Write("Adding metrics to Db: " + ldaConfig.ExtrinsicMetricsProcessed);
                modelsDb.AddModel(ldaConfig, metrics, out modelDbName, out modelId);
                success = true;
            }
            catch (Exception e)
            {
                StatusMessage.Write("Could not add a record to the Topic models db:" + e.ToString());
                throw;
            }

            var model = new ModelDatabase("", modelsDb.serverName, modelDbName, false);

            if (model.Open())
            {
                //The database has already been created
            }

            return(model);
        }
        public override void Compute()
        {
            TLArtifactsCollection source = (TLArtifactsCollection)Workspace.Load("SourceArtifacts");
            TLArtifactsCollection target = (TLArtifactsCollection)Workspace.Load("TargetArtifacts");
            LDAConfig             config = (LDAConfig)Workspace.Load("Config");
            REngine            engine    = new REngine(config.RScriptPath);
            TLSimilarityMatrix sims      = (TLSimilarityMatrix)engine.Execute(new LDAScript(source, target, config));

            Workspace.Store("Similarities", sims);
        }
Exemple #3
0
        private static Dictionary <string, double> ExtractModelMetrics(ref LDAConfig ldaConfig, string modelRepositoryPath)
        {
            if (ldaConfig == null)
            {
                return(null);
            }

            ldaConfig.ModelRepositoryPath = modelRepositoryPath;
            ldaConfig.ModelDirectory      = null; // Reset all subdirectories

            var metrics = ReadMetrics(ldaConfig.ExtrinsicMetricsProcessed);

            return(metrics);
        }
Exemple #4
0
        public void AddModel(LDAConfig ldaConfig, Dictionary <string, double> metrics, out string modelDbName, out int modelId)
        {
            var parameters = this.modelStatistics.SelectCommand.Parameters;

            // Corpus attributes
            parameters["@culture"].Value                          = "en-us";
            parameters["@corpus"].Value                           = ldaConfig.Corpus;
            parameters["@sample"].Value                           = ldaConfig.SampleName;
            parameters["@documentCount"].Value                    = ldaConfig.ModelStatistics.DocumentCount;
            parameters["@wordCount"].Value                        = ldaConfig.ModelStatistics.VocabularySize;
            parameters["@minWordDocumentFrequency"].Value         = ldaConfig.FeaturizationParameters.MinWordDocumentFrequency;
            parameters["@maxRelativeWordDocumentFrequency"].Value = ldaConfig.FeaturizationParameters.MaxRalativeWordDocumentFrequency;

            // Model sttributes
            parameters["@topicCount"].Value = ldaConfig.LDAParameters.NumTopics;
            parameters["@alpha"].Value      = ldaConfig.LDAParameters.Alpha;
            parameters["@rho"].Value        = ldaConfig.LDAParameters.Rho;
            parameters["@minibatch"].Value  = ldaConfig.LDAParameters.Minibatch;
            parameters["@passes"].Value     = ldaConfig.LDAParameters.Passes;
            parameters["@initialT"].Value   = ldaConfig.LDAParameters.InitialT;
            parameters["@powerT"].Value     = ldaConfig.LDAParameters.PowerT;

            // Some meta data
            parameters["@modelName"].Value       = ldaConfig.modelName;
            parameters["@metricsFilePath"].Value = ldaConfig.ExtrinsicMetricsProcessed;

            // Metrics

            foreach (var metric in metrics)
            {
                if (metric.Key == "goodtopics")
                {
                    parameters["@goodTopicCount"].Value = (int)metric.Value;
                }
                else
                {
                    parameters["@" + metric.Key].Value = metric.Value;
                }
            }

            // Excecute
            modelStatistics.SelectCommand.Connection.Open();
            this.modelStatistics.SelectCommand.ExecuteReader();
            modelStatistics.SelectCommand.Connection.Close();

            modelDbName = parameters["@dbName"].Value.ToString();
            modelId     = int.Parse(parameters["@modelId"].Value.ToString());
        }
Exemple #5
0
        /// <summary>
        /// Adds a row to model metrics destination path (tsv file).
        /// </summary>
        /// <param name="modelMetricsDestPath"></param>
        /// <param name="ldaConfig"></param>
        /// <param name="modelRepositoryPath"></param>
        /// <param name="success"></param>
        private static void AddModelParametersToExcel(StreamWriter writer, LDAConfig ldaConfig, string modelRepositoryPath, ref bool success, bool needWriteTableHeader)
        {
            var metrics = ExtractModelMetrics(ref ldaConfig, modelRepositoryPath);

            if (metrics == null)
            {
                return;
            }

            if (needWriteTableHeader)
            {
                // write the table header.
                StatusMessage.Write("Writing table header");
                writer.Write("Locale\tCorpus\tSample\tMin\tMax\tK\tAlpha\tRho\tMinibatch\tPasses\tInitialT\tPowerT");
                foreach (var metric in metrics)
                {
                    writer.Write("\t{0}", metric.Key);
                }
                writer.Write("\tmodelName\tmetricsFilePath");
                writer.WriteLine();
            }

            StatusMessage.Write("Adding metrics to EXCEL: " + ldaConfig.ExtrinsicMetricsProcessed);
            writer.Write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\t{10}\t{11}",
                         ldaConfig.Locale,
                         ldaConfig.Corpus,
                         ldaConfig.SampleName,
                         ldaConfig.FeaturizationParameters.MinWordDocumentFrequency,
                         ldaConfig.FeaturizationParameters.MaxRalativeWordDocumentFrequency,
                         ldaConfig.LDAParameters.NumTopics,
                         ldaConfig.LDAParameters.Alpha,
                         ldaConfig.LDAParameters.Rho,
                         ldaConfig.LDAParameters.Minibatch,
                         ldaConfig.LDAParameters.Passes,
                         ldaConfig.LDAParameters.InitialT,
                         ldaConfig.LDAParameters.PowerT);
            foreach (var metric in metrics)
            {
                writer.Write("\t{0}", metric.Value);
            }
            writer.Write("\t{0}\t{1}", ldaConfig.modelName, ldaConfig.ExtrinsicMetricsProcessed);
            writer.WriteLine();
            success = true;
        }
Exemple #6
0
        private static string GetDatabaseName(LDAConfig ldaConfig, IDictionary <string, string> parameters)
        {
            var sampleName = ldaConfig.SampleName;

            if (string.IsNullOrWhiteSpace(sampleName))
            {
                StatusMessage.Write("Sample name not specified.");
                return(string.Empty);
            }

            string dbName;

            if (!parameters.TryGetValue(Options.DatabaseName, out dbName))
            {
                dbName = string.Format("{0}_{1}_{2}", ldaConfig.SampleName, ldaConfig.FeaturizationParameters.MinWordDocumentFrequency, ldaConfig.FeaturizationParameters.MaxRalativeWordDocumentFrequency);
            }

            return(dbName);
        }
Exemple #7
0
 /// <summary>
 /// Constructor
 /// </summary>
 /// <param name="source">Source artifacts</param>
 /// <param name="target">Target artifacts</param>
 /// <param name="config">Configuration object</param>
 public LDAScript(TermDocumentMatrix source, TermDocumentMatrix target, LDAConfig config) : base()
 {
     _source = source;
     _target = target;
     _config = config;
 }
Exemple #8
0
 /// <summary>
 /// Constructor
 /// </summary>
 /// <param name="source">Source artifacts</param>
 /// <param name="target">Target artifacts</param>
 /// <param name="config">Configuration object</param>
 public LDAScript(TLArtifactsCollection source, TLArtifactsCollection target, LDAConfig config) : base()
 {
     _source = new TermDocumentMatrix(source);
     _target = new TermDocumentMatrix(target);
     _config = config;
 }
Exemple #9
0
 public LDAComponent(ComponentLogger log)
     : base(log)
 {
     _config       = new LDAConfig();
     Configuration = _config;
 }