Example #1
0
        /**
         * Used to compare between clusters. If this cluster is better, meaning less
         * deviation, returns a negative number so that it is put first when sorted.
         */
        public int CompareTo(Cluster other, DocumentAnalyzer analyzer)
        {
            // TODO: Find better method, this one favors smaller clusters

            // Put clusters with only one article at the ends
            if (Documents.Count == 1)
            {
                return(1);
            }
            if (other.Documents.Count == 1)
            {
                return(-1);
            }

            double simA, simB;

            simA = simB = 0;

            foreach (int i in this.Documents)
            {
                simA += analyzer.Similarity(this.Centroid, i);
            }
            foreach (int i in other.Documents)
            {
                simB += analyzer.Similarity(other.Centroid, i);
            }
            simA /= this.Documents.Count;
            simB /= other.Documents.Count;

            if (simA > simB)
            {
                return(-1);
            }
            else if (simA < simB)
            {
                return(1);
            }
            else
            {
                return(0);
            }
        }
Example #2
0
 public DocumentClusterer(List <NewsArticle> articles)
 {
     this.articles = articles;
     analyzer      = new DocumentAnalyzer(articles);
 }