private bool EnsureCorpusVocabulary() { if (this.disposed) { throw new ObjectDisposedException("LDA Object has already been disposed."); } if (this.corpusVocabulary == null) { if (File.Exists(this.corpusVocabularyFileName)) { StatusMessage.Write("LDA.GetTopicAllocations: Loading Corpus Vocabulary"); this.corpusVocabulary = CorpusVocabulary.NewInstance(File.ReadLines(this.corpusVocabularyFileName)); return(true); } else { StatusMessage.Write(string.Format("LDA.GetTopicAllocations: Error. Cannot find Corpus Vocabulary {0}", this.corpusVocabularyFileName)); } return(false); } return(true); }
/// <summary> /// Initialize module and generate data files if not exist /// </summary> /// <param name="modelConfig">model config file</param> /// <returns>true if successful</returns> public bool Initialize(LDAConfig modelConfig) { // initialize vocabularies StatusMessage.Write("Loading corpus vocabulary file..."); this.corpusVocabulary = CorpusVocabulary.Load(modelConfig.CorpusVocabulary); if (this.corpusVocabulary == null) { return(false); } this.NumTopics = modelConfig.LDAParameters.NumTopics; this.NumWords = modelConfig.ModelStatistics.VocabularySize; this.documentTopicAllocationsFile = modelConfig.DocumentTopicAllocations; this.numDocs = modelConfig.ModelStatistics.DocumentCount; this.numPasses = modelConfig.LDAParameters.Passes; // Initialize array of topic metrics: Coherence, Specificity, Distinctiveness this.topicMetrics = new double[this.NumTopics, sizeof(MetricType)]; // sanity check if (this.NumWords != this.corpusVocabulary.Count()) { StatusMessage.Write("Number of vocabularies mismatch. Check your parameters."); return(false); } // initialize topic words allocation this.wordTopicAllocationsFileName = modelConfig.WordTopicAllocations; string topicWordsAllocBinFile = wordTopicAllocationsFileName + BinFileExt; StatusMessage.Write("Loading topic words allocation bin file..."); if (!this.LoadTopicWords(topicWordsAllocBinFile)) { StatusMessage.Write("Generating topic words allocation bin file..."); if (!this.BuildTopicWordsAllocation(this.wordTopicAllocationsFileName, topicWordsAllocBinFile)) { return(false); } } // initialize words documents list map string wordDocsListMapFile = Path.GetDirectoryName(modelConfig.FeaturizedDocuments) + @"\" + WordDocsListMapFileName; StatusMessage.Write("Loading word documents list bin file..."); if (!this.LoadWordDocsListMap(wordDocsListMapFile)) { StatusMessage.Write("Generating word documents list bin file..."); if (!this.BuildWordDocsListMap(modelConfig.FeaturizedDocuments, wordDocsListMapFile)) { return(false); } } // init topics info var topicsInfoFile = Path.GetDirectoryName(modelConfig.DocumentTopicAllocations) + @"\" + TopicsInfoFileName; if (!this.LoadTopicsInfo(topicsInfoFile)) { if (!this.BuildTopicsInfo(topicsInfoFile)) { return(false); } } return(true); }