Пример #1
0
        private void displayResults(d.Cluster[] clusters, int topDocs, short[] toCluster)
        {
            d.ClusteringAnalyzer analyzer = new d.ClusteringAnalyzer(index, toCluster);
            int docCount = 0;

            for (int i = 0; i < clusters.Length; i++)
            {
                Console.WriteLine("CLUSTER " + (i + 1) + " documents=" +
                                  clusters[i].DocumentCount + " intra-Similarity: " + analyzer.GetIntraDistance(clusters[i]));
                IDictionaryEnumerator en = clusters[i].CommonTermIds.GetEnumerator();
                while (en.MoveNext())
                {
                    Console.Write(" " + index.GetTerm(Convert.ToInt32(en.Key)));
                }

                Console.WriteLine("\n");

                en = clusters[i].DocIds.GetEnumerator();
                int count = 0;
                while (count < topDocs && en.MoveNext())
                {
                    short docId = Convert.ToInt16(en.Key);
                    Console.WriteLine(" " + index.GetTitle(docId));
                    Console.WriteLine(" " + index.GetURL(docId));
                    Console.WriteLine();
                    count++;
                }
                docCount += clusters[i].DocumentCount;
            }
            Console.WriteLine("total clustered documents: " + docCount);
        }
Пример #2
0
        private void execCluster()
        {
            float w = Convert.ToSingle(tbxPagerank.Text);

            d.Index            index    = new d.Index(getIndexDir());
            d.VSSearcher       searcher = new d.VSSearcher(index, w);
            d.ResultDocument[] results  = searcher.Search(tbxQuery.Text.Trim());
            if (results != null && results.Length > 0)
            {
                int toCluster = Convert.ToInt32(tbxDocs.Text);
                toCluster = Math.Min(toCluster, results.Length);
                short[] docIds = new short[toCluster];
                for (int i = 0; i < toCluster; i++)
                {
                    docIds[i] = results[i].DocId;
                }

                int k = Convert.ToInt32(tbxKmeans.Text);

                d.Cluster[] clusters;
                if (radKmeans.Checked)
                {
                    clusters = new d.KMeansClustering(index, k, true).GetClusters(docIds, 10);
                }
                else if (radBuckshot.Checked)
                {
                    clusters = new d.BisectingClustering(index, k).GetClusters(docIds, 10);
                }
                else
                {
                    clusters = new d.BisectingClustering(index, k).GetClusters(docIds, 10);
                }


                StringBuilder sb = new StringBuilder();

                sb.AppendFormat("<p style='border-bottom:solid 1px #999999;'>Your search returned <b>{0}</b> results. " +
                                " Displaying the top {1} documents clustered into at most {2} clusters:</p>", results.Length, toCluster, k);

                for (int i = 0; i < clusters.Length; i++)
                {
                    sb.AppendFormat("<p style='margin-bottom:-10px;font-weight:bold;font-size:11pt;'>Cluster {0}", i + 1);
                    sb.Append("<p>Common terms: ");
                    IDictionaryEnumerator en = clusters[i].CommonTermIds.GetEnumerator();
                    while (en.MoveNext())
                    {
                        sb.AppendFormat("{0} ", index.GetTerm(Convert.ToInt32(en.Key)));
                    }

                    sb.Append("<p>");

                    en = clusters[i].DocIds.GetEnumerator();
                    int count   = 0;
                    int topDocs = 3;
                    while (count < topDocs && en.MoveNext())
                    {
                        short  docId = Convert.ToInt16(en.Key);
                        string url   = "http://" + index.GetURL(docId).Replace("%%", "/");
                        sb.AppendFormat("<p style='margin-bottom:-10px;'><a style='font-size:11pt;' href='{0}'>{1}</a>", url, index.GetTitle(docId));
                        sb.AppendFormat("<p><a style='color:green;font-size:9pt' href='{0}'>{0}</a>", url);
                        count++;
                    }
                    sb.Append("<p style='border-bottom: solid 1px #999999;'>");
                }
                ltrResults.Text = sb.ToString();
            }
            else
            {
                ltrResults.Text = "There were no results";
            }
        }