예제 #1
0
        public override Cluster[] CalculateClusters(short[] docIds, int commonTerms)
        {
            Cluster[] clusters        = new Cluster[GetK];
            int       currentPosition = 0;

            while (currentPosition < clusters.Length - 1)
            {
                //do the splits
                KMeansClustering kmc   = new KMeansClustering(GetIndex, 2, false);
                Cluster[]        split = kmc.GetClusters(docIds, commonTerms);
                docIds = getDocs(split, clusters, currentPosition);
                currentPosition++;
            }
            //process remaining
            clusters[currentPosition] = new Cluster();
            foreach (short docId in docIds)
            {
                clusters[currentPosition].AddDoc(docId);
            }

            return(LoadCommonTerms(clusters, commonTerms));
        }
예제 #2
0
        private void execCluster()
        {
            float w = Convert.ToSingle(tbxPagerank.Text);

            d.Index            index    = new d.Index(getIndexDir());
            d.VSSearcher       searcher = new d.VSSearcher(index, w);
            d.ResultDocument[] results  = searcher.Search(tbxQuery.Text.Trim());
            if (results != null && results.Length > 0)
            {
                int toCluster = Convert.ToInt32(tbxDocs.Text);
                toCluster = Math.Min(toCluster, results.Length);
                short[] docIds = new short[toCluster];
                for (int i = 0; i < toCluster; i++)
                {
                    docIds[i] = results[i].DocId;
                }

                int k = Convert.ToInt32(tbxKmeans.Text);

                d.Cluster[] clusters;
                if (radKmeans.Checked)
                {
                    clusters = new d.KMeansClustering(index, k, true).GetClusters(docIds, 10);
                }
                else if (radBuckshot.Checked)
                {
                    clusters = new d.BisectingClustering(index, k).GetClusters(docIds, 10);
                }
                else
                {
                    clusters = new d.BisectingClustering(index, k).GetClusters(docIds, 10);
                }


                StringBuilder sb = new StringBuilder();

                sb.AppendFormat("<p style='border-bottom:solid 1px #999999;'>Your search returned <b>{0}</b> results. " +
                                " Displaying the top {1} documents clustered into at most {2} clusters:</p>", results.Length, toCluster, k);

                for (int i = 0; i < clusters.Length; i++)
                {
                    sb.AppendFormat("<p style='margin-bottom:-10px;font-weight:bold;font-size:11pt;'>Cluster {0}", i + 1);
                    sb.Append("<p>Common terms: ");
                    IDictionaryEnumerator en = clusters[i].CommonTermIds.GetEnumerator();
                    while (en.MoveNext())
                    {
                        sb.AppendFormat("{0} ", index.GetTerm(Convert.ToInt32(en.Key)));
                    }

                    sb.Append("<p>");

                    en = clusters[i].DocIds.GetEnumerator();
                    int count   = 0;
                    int topDocs = 3;
                    while (count < topDocs && en.MoveNext())
                    {
                        short  docId = Convert.ToInt16(en.Key);
                        string url   = "http://" + index.GetURL(docId).Replace("%%", "/");
                        sb.AppendFormat("<p style='margin-bottom:-10px;'><a style='font-size:11pt;' href='{0}'>{1}</a>", url, index.GetTitle(docId));
                        sb.AppendFormat("<p><a style='color:green;font-size:9pt' href='{0}'>{0}</a>", url);
                        count++;
                    }
                    sb.Append("<p style='border-bottom: solid 1px #999999;'>");
                }
                ltrResults.Text = sb.ToString();
            }
            else
            {
                ltrResults.Text = "There were no results";
            }
        }
예제 #3
0
        public override Cluster[] CalculateClusters(short[] docIds, int commonTerms)
        {
            KMeansClustering kmc = new KMeansClustering(GetIndex, GetK, GetCentroids(docIds), false);

            return(kmc.GetClusters(docIds, commonTerms));
        }