/** * Used to compare between clusters. If this cluster is better, meaning less * deviation, returns a negative number so that it is put first when sorted. */ public int CompareTo(Cluster other, DocumentAnalyzer analyzer) { // TODO: Find better method, this one favors smaller clusters // Put clusters with only one article at the ends if (Documents.Count == 1) { return(1); } if (other.Documents.Count == 1) { return(-1); } double simA, simB; simA = simB = 0; foreach (int i in this.Documents) { simA += analyzer.Similarity(this.Centroid, i); } foreach (int i in other.Documents) { simB += analyzer.Similarity(other.Centroid, i); } simA /= this.Documents.Count; simB /= other.Documents.Count; if (simA > simB) { return(-1); } else if (simA < simB) { return(1); } else { return(0); } }
public DocumentClusterer(List <NewsArticle> articles) { this.articles = articles; analyzer = new DocumentAnalyzer(articles); }