static void Main(string[] args) { if (args.Length != 6) { LogHelper.Log("Invalid arguments!"); LogHelper.Log("Example) Core.exe [Corpus Path] [Alpha] [Beta] [Topic Count] [Inference Iteration Step] [Model Export Path]"); return; } try { var parameter = new Parameter(); parameter.CorpusPath = args[0]; parameter.Alpha = double.Parse(args[1]); parameter.Beta = double.Parse(args[2]); parameter.TopicCount = int.Parse(args[3]); parameter.TotalIterationStep = int.Parse(args[4]); parameter.ModelPath = args[5]; // parameter adjustment by topic count //parameter.Alpha /= parameter.TopicCount; parameter.LoadCorpus(); var lda = new LDA(parameter); lda.Inference(); } catch (Exception ex) { LogHelper.Log(ex); } Console.ReadKey(); }
private void buttonLoadModel_Click(object sender, EventArgs e) { if (modelFolderDialog.ShowDialog() == DialogResult.OK) { var path = modelFolderDialog.SelectedPath; Parameter = path.Import<Parameter>(); LDAModel = path.Import<LDAModel>(); ModelHelper.ImportVoca(path); UpdateTopicGridView(); } }
public void UpdateTopicModel(LDAModel model, Parameter parameter, int topWordCount = 5) { _ldaModel = model; _parameter = parameter; _topWordCount = topWordCount; var topicCount = _parameter.TopicCount; // extract top words from each topic _topicTopWordDist = new List<List<Tuple<int, double>>>(topicCount); foreach (var topicId in Enumerable.Range(0, _parameter.TopicCount)) { _topicTopWordDist.Add( model.Phi[topicId] .Select((elem, idx) => new Tuple<int, double>(idx, elem)) .OrderByDescending(e => e.Item2) .Take(_topWordCount) .ToList()); } // extract top word set _topWordSet = new HashSet<int>(_topicTopWordDist .SelectMany(e => e.Select(t => t.Item1)) .Distinct() .OrderBy(e => e)); // resize var startX = 160; var startY = 160; var len = 30; var width = len * (topicCount - 1); var height = len * (_topWordSet.Count - 1); var totalWidth = startX + width + 50; var totalHeight = startY + height + 50; Width = totalWidth; Height = totalHeight; }
public LDA(Parameter parameters) { _parameter = parameters; _ldaModel = new LDAModel(_parameter); }
public List<List<int>> Z; // topic assignment for each word in documents: | Documents | * | Word Count for each document | #endregion Fields #region Constructors public LDAModel(Parameter parameter) { Parameter = parameter; #region initialize LDA model hyper paramters var docCount = Parameter.DocumentList.Count; var vocaCount = WordManager.VocabularyCount; var topicCount = Parameter.TopicCount; // Z Z = new List<List<int>>(docCount); foreach (var document in Parameter.DocumentList.Select((document, idx) => new { Index = idx, WordCount = document.Count })) Z.Add(Enumerable.Repeat(0, document.WordCount).ToList()); // NW NW = ModelHelper.InitializeMatrix<int>(vocaCount, topicCount); // ND ND = ModelHelper.InitializeMatrix<int>(docCount, topicCount); // NWCount NWCount = ModelHelper.InitializeList<int>(topicCount); // Theta Theta = ModelHelper.InitializeMatrix<double>(docCount, topicCount); // Phi Phi = ModelHelper.InitializeMatrix<double>(topicCount, vocaCount); #endregion }