Exemple #1
0
        private void _UpdateUI(DocumentTermModel model)
        {
            progressBar.Visibility  = Visibility.Collapsed;
            browserFrame.Visibility = Visibility.Visible;

            NNMFMatrix termDocument = model.GetNormalised();

            termDocument.Factorise(FEATURE_COUNT, out _weightMatrix, out _featureMatrix);

            // calculate the maximum weight across all documents
            _topWeight = 0;
            for (int i = 0; i < _weightMatrix.ShapeX; i++)
            {
                for (int j = 0; j < _weightMatrix.ShapeY; j++)
                {
                    float val = _weightMatrix[i, j];
                    if (val > _topWeight)
                    {
                        _topWeight = val;
                    }
                }
            }

            // build the list of clusters
            for (int i = 0; i < FEATURE_COUNT; i++)
            {
                StringBuilder sb       = new StringBuilder();
                List <int>    wordList = _RankFeatureWordList(_featureMatrix, i);
                foreach (int index in wordList)
                {
                    string word = model.GetTerm(index);
                    if (sb.Length > 0)
                    {
                        sb.Append(", ");
                    }
                    sb.Append(word);
                }
                Cluster cluster = new Cluster(sb.ToString(), _colourList[i % 7], i);
                cluster.Clicked += new Cluster.ClickedDelegate(Cluster_Clicked);
                tagCloudContainer.Children.Add(cluster);
            }

            _ShowResults(_resultList);
        }
Exemple #2
0
        public NNMFMatrix GetNormalised()
        {
            int        numDocs  = _documentList.Count;
            int        numTerms = _termToIndex.Count;
            double     nd       = numDocs;
            NNMFMatrix ret      = new NNMFMatrix(numTerms, numDocs);
            int        x        = 0;

            foreach (Document doc in _documentList)
            {
                double docTermCount = doc.totalTermCount;
                foreach (KeyValuePair <int, uint> item in doc.termCount)
                {
                    int    termIndex                = item.Key;
                    double count                    = item.Value;
                    double termFrequency            = count / docTermCount;
                    double inverseDocumentFrequency = Math.Log(nd / _keywordDocumentOccurence[termIndex].Count);
                    double weight                   = inverseDocumentFrequency * termFrequency;
                    ret.Set(termIndex, x, (float)weight);
                }
                ++x;
            }
            return(ret);
        }