/// <summary> /// Adds a row to the list of Models (in the ModelsDb) if none exists and returns an object of type ModelDatabase, /// representing the new model. In the process creates a new db to hold the new model's data. If thes db has already been created it simply opens a connection to it. /// </summary> /// <param name="sqlServer"></param> /// <param name="modelsDbName"></param> /// <param name="ldaConfig"></param> /// <returns></returns> private static ModelDatabase AddModelParametersToModelsDb(ModelsDb modelsDb, LDAConfig ldaConfig, string modelRepositoryPath, ref bool success) { var metrics = ExtractModelMetrics(ref ldaConfig, modelRepositoryPath); if (metrics == null) { return(null); } string modelDbName; int modelId; try { StatusMessage.Write("Adding metrics to Db: " + ldaConfig.ExtrinsicMetricsProcessed); modelsDb.AddModel(ldaConfig, metrics, out modelDbName, out modelId); success = true; } catch (Exception e) { StatusMessage.Write("Could not add a record to the Topic models db:" + e.ToString()); throw; } var model = new ModelDatabase("", modelsDb.serverName, modelDbName, false); if (model.Open()) { //The database has already been created } return(model); }
public override void Compute() { TLArtifactsCollection source = (TLArtifactsCollection)Workspace.Load("SourceArtifacts"); TLArtifactsCollection target = (TLArtifactsCollection)Workspace.Load("TargetArtifacts"); LDAConfig config = (LDAConfig)Workspace.Load("Config"); REngine engine = new REngine(config.RScriptPath); TLSimilarityMatrix sims = (TLSimilarityMatrix)engine.Execute(new LDAScript(source, target, config)); Workspace.Store("Similarities", sims); }
private static Dictionary <string, double> ExtractModelMetrics(ref LDAConfig ldaConfig, string modelRepositoryPath) { if (ldaConfig == null) { return(null); } ldaConfig.ModelRepositoryPath = modelRepositoryPath; ldaConfig.ModelDirectory = null; // Reset all subdirectories var metrics = ReadMetrics(ldaConfig.ExtrinsicMetricsProcessed); return(metrics); }
public void AddModel(LDAConfig ldaConfig, Dictionary <string, double> metrics, out string modelDbName, out int modelId) { var parameters = this.modelStatistics.SelectCommand.Parameters; // Corpus attributes parameters["@culture"].Value = "en-us"; parameters["@corpus"].Value = ldaConfig.Corpus; parameters["@sample"].Value = ldaConfig.SampleName; parameters["@documentCount"].Value = ldaConfig.ModelStatistics.DocumentCount; parameters["@wordCount"].Value = ldaConfig.ModelStatistics.VocabularySize; parameters["@minWordDocumentFrequency"].Value = ldaConfig.FeaturizationParameters.MinWordDocumentFrequency; parameters["@maxRelativeWordDocumentFrequency"].Value = ldaConfig.FeaturizationParameters.MaxRalativeWordDocumentFrequency; // Model sttributes parameters["@topicCount"].Value = ldaConfig.LDAParameters.NumTopics; parameters["@alpha"].Value = ldaConfig.LDAParameters.Alpha; parameters["@rho"].Value = ldaConfig.LDAParameters.Rho; parameters["@minibatch"].Value = ldaConfig.LDAParameters.Minibatch; parameters["@passes"].Value = ldaConfig.LDAParameters.Passes; parameters["@initialT"].Value = ldaConfig.LDAParameters.InitialT; parameters["@powerT"].Value = ldaConfig.LDAParameters.PowerT; // Some meta data parameters["@modelName"].Value = ldaConfig.modelName; parameters["@metricsFilePath"].Value = ldaConfig.ExtrinsicMetricsProcessed; // Metrics foreach (var metric in metrics) { if (metric.Key == "goodtopics") { parameters["@goodTopicCount"].Value = (int)metric.Value; } else { parameters["@" + metric.Key].Value = metric.Value; } } // Excecute modelStatistics.SelectCommand.Connection.Open(); this.modelStatistics.SelectCommand.ExecuteReader(); modelStatistics.SelectCommand.Connection.Close(); modelDbName = parameters["@dbName"].Value.ToString(); modelId = int.Parse(parameters["@modelId"].Value.ToString()); }
/// <summary> /// Adds a row to model metrics destination path (tsv file). /// </summary> /// <param name="modelMetricsDestPath"></param> /// <param name="ldaConfig"></param> /// <param name="modelRepositoryPath"></param> /// <param name="success"></param> private static void AddModelParametersToExcel(StreamWriter writer, LDAConfig ldaConfig, string modelRepositoryPath, ref bool success, bool needWriteTableHeader) { var metrics = ExtractModelMetrics(ref ldaConfig, modelRepositoryPath); if (metrics == null) { return; } if (needWriteTableHeader) { // write the table header. StatusMessage.Write("Writing table header"); writer.Write("Locale\tCorpus\tSample\tMin\tMax\tK\tAlpha\tRho\tMinibatch\tPasses\tInitialT\tPowerT"); foreach (var metric in metrics) { writer.Write("\t{0}", metric.Key); } writer.Write("\tmodelName\tmetricsFilePath"); writer.WriteLine(); } StatusMessage.Write("Adding metrics to EXCEL: " + ldaConfig.ExtrinsicMetricsProcessed); writer.Write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\t{10}\t{11}", ldaConfig.Locale, ldaConfig.Corpus, ldaConfig.SampleName, ldaConfig.FeaturizationParameters.MinWordDocumentFrequency, ldaConfig.FeaturizationParameters.MaxRalativeWordDocumentFrequency, ldaConfig.LDAParameters.NumTopics, ldaConfig.LDAParameters.Alpha, ldaConfig.LDAParameters.Rho, ldaConfig.LDAParameters.Minibatch, ldaConfig.LDAParameters.Passes, ldaConfig.LDAParameters.InitialT, ldaConfig.LDAParameters.PowerT); foreach (var metric in metrics) { writer.Write("\t{0}", metric.Value); } writer.Write("\t{0}\t{1}", ldaConfig.modelName, ldaConfig.ExtrinsicMetricsProcessed); writer.WriteLine(); success = true; }
private static string GetDatabaseName(LDAConfig ldaConfig, IDictionary <string, string> parameters) { var sampleName = ldaConfig.SampleName; if (string.IsNullOrWhiteSpace(sampleName)) { StatusMessage.Write("Sample name not specified."); return(string.Empty); } string dbName; if (!parameters.TryGetValue(Options.DatabaseName, out dbName)) { dbName = string.Format("{0}_{1}_{2}", ldaConfig.SampleName, ldaConfig.FeaturizationParameters.MinWordDocumentFrequency, ldaConfig.FeaturizationParameters.MaxRalativeWordDocumentFrequency); } return(dbName); }
/// <summary> /// Constructor /// </summary> /// <param name="source">Source artifacts</param> /// <param name="target">Target artifacts</param> /// <param name="config">Configuration object</param> public LDAScript(TermDocumentMatrix source, TermDocumentMatrix target, LDAConfig config) : base() { _source = source; _target = target; _config = config; }
/// <summary> /// Constructor /// </summary> /// <param name="source">Source artifacts</param> /// <param name="target">Target artifacts</param> /// <param name="config">Configuration object</param> public LDAScript(TLArtifactsCollection source, TLArtifactsCollection target, LDAConfig config) : base() { _source = new TermDocumentMatrix(source); _target = new TermDocumentMatrix(target); _config = config; }
public LDAComponent(ComponentLogger log) : base(log) { _config = new LDAConfig(); Configuration = _config; }