static void Main(string[] args)
        {
            if (args.Length != 6)
            {
                LogHelper.Log("Invalid arguments!");
                LogHelper.Log("Example) Core.exe [Corpus Path] [Alpha] [Beta] [Topic Count] [Inference Iteration Step] [Model Export Path]");

                return;
            }

            try
            {
                var parameter = new Parameter();
                parameter.CorpusPath = args[0];
                parameter.Alpha = double.Parse(args[1]);
                parameter.Beta = double.Parse(args[2]);
                parameter.TopicCount = int.Parse(args[3]);
                parameter.TotalIterationStep = int.Parse(args[4]);
                parameter.ModelPath = args[5];

                // parameter adjustment by topic count
                //parameter.Alpha /= parameter.TopicCount;

                parameter.LoadCorpus();
                var lda = new LDA(parameter);
                lda.Inference();
            }
            catch (Exception ex)
            {
                LogHelper.Log(ex);
            }

            Console.ReadKey();
        }
        private void buttonLoadModel_Click(object sender, EventArgs e)
        {
            if (modelFolderDialog.ShowDialog() == DialogResult.OK)
            {
                var path = modelFolderDialog.SelectedPath;
                Parameter = path.Import<Parameter>();
                LDAModel = path.Import<LDAModel>();
                ModelHelper.ImportVoca(path);

                UpdateTopicGridView();
            }
        }
        public void UpdateTopicModel(LDAModel model, Parameter parameter, int topWordCount = 5)
        {
            _ldaModel = model;
            _parameter = parameter;
            _topWordCount = topWordCount;

            var topicCount = _parameter.TopicCount;

            // extract top words from each topic
            _topicTopWordDist = new List<List<Tuple<int, double>>>(topicCount);
            foreach (var topicId in Enumerable.Range(0, _parameter.TopicCount))
            {
                _topicTopWordDist.Add(
                    model.Phi[topicId]
                        .Select((elem, idx) => new Tuple<int, double>(idx, elem))
                        .OrderByDescending(e => e.Item2)
                        .Take(_topWordCount)
                        .ToList());
            }

            // extract top word set
            _topWordSet = new HashSet<int>(_topicTopWordDist
                .SelectMany(e => e.Select(t => t.Item1))
                .Distinct()
                .OrderBy(e => e));

            // resize
            var startX = 160;
            var startY = 160;

            var len = 30;
            var width = len * (topicCount - 1);
            var height = len * (_topWordSet.Count - 1);

            var totalWidth = startX + width + 50;
            var totalHeight = startY + height + 50;

            Width = totalWidth;
            Height = totalHeight;
        }
예제 #4
0
 public LDA(Parameter parameters)
 {
     _parameter = parameters;
     _ldaModel = new LDAModel(_parameter);
 }
예제 #5
0
        public List<List<int>> Z; // topic assignment for each word in documents: | Documents | * | Word Count for each document |

        #endregion Fields

        #region Constructors

        public LDAModel(Parameter parameter)
        {
            Parameter = parameter;

            #region initialize LDA model hyper paramters

            var docCount = Parameter.DocumentList.Count;
            var vocaCount = WordManager.VocabularyCount;
            var topicCount = Parameter.TopicCount;

            // Z
            Z = new List<List<int>>(docCount);
            foreach (var document in Parameter.DocumentList.Select((document, idx) => new { Index = idx, WordCount = document.Count }))
                Z.Add(Enumerable.Repeat(0, document.WordCount).ToList());

            // NW
            NW = ModelHelper.InitializeMatrix<int>(vocaCount, topicCount);

            // ND
            ND = ModelHelper.InitializeMatrix<int>(docCount, topicCount);

            // NWCount
            NWCount = ModelHelper.InitializeList<int>(topicCount);

            // Theta
            Theta = ModelHelper.InitializeMatrix<double>(docCount, topicCount);

            // Phi
            Phi = ModelHelper.InitializeMatrix<double>(topicCount, vocaCount);

            #endregion
        }