public static CentroidCluster[] fromFolder(string folderPath, double idfThreshold, int keepWords) { string[] clusterDirs = Directory.GetDirectories(folderPath, "*", SearchOption.TopDirectoryOnly); ArrayList clusters = new ArrayList(); foreach (string clusterDir in clusterDirs) { CentroidCluster cluster = new CentroidCluster(clusterDir, idfThreshold, keepWords); clusters.Add(cluster); } return((CentroidCluster[])clusters.ToArray(typeof(CentroidCluster))); }
public static CentroidCluster[] fromFolder(string folderPath, double idfThreshold, int keepWords) { string[] clusterDirs = Directory.GetDirectories(folderPath, "*", SearchOption.TopDirectoryOnly); ArrayList clusters = new ArrayList(); foreach (string clusterDir in clusterDirs) { CentroidCluster cluster = new CentroidCluster(clusterDir, idfThreshold, keepWords); clusters.Add(cluster); } return ((CentroidCluster[])clusters.ToArray(typeof(CentroidCluster))); }
//override public string generateSummary(DocsStatistics docStats, Document newDoc) override public string generateSummary(ArrayList docs, double compressionRatio) { ArrayList allTitles = new ArrayList(); ArrayList allFirstSents = new ArrayList(); ArrayList allSents = new ArrayList(); foreach (Document doc in docs) { allTitles.Add(doc.title); if (doc.sentences.Count >= 1) { allFirstSents.Add(doc.sentences[0]); } allSents.AddRange(doc.sentences); } double[] cTotal = new double[allSents.Count]; double[] pTotal = new double[allSents.Count]; double[] fTotal = new double[allSents.Count]; double cMax = double.MinValue; if (this.centroidClusters == null) { this.centroidClusters = CentroidCluster.fromFolder(this.clustersDir, this.idfThreshold, this.keepWords); } for (int i = 0; i < allSents.Count; i++) { Sentence currSent = (Sentence)allSents[i]; // Calculate C cTotal[i] = 0; foreach (string word in currSent.words) { cTotal[i] += getCentroidValue(this.centroidClusters, word); } if (cTotal[i] > cMax) { cMax = cTotal[i]; } // Calculate F fTotal[i] = 0; foreach (string word in currSent.words) { int wordOccurence = 0; foreach (Sentence title in allTitles) { if (title.wordsCount[word] != null) { wordOccurence += ((int)title.wordsCount[word]); } } foreach (Sentence firstSent in allFirstSents) { if (firstSent.wordsCount[word] != null) { wordOccurence += ((int)firstSent.wordsCount[word]); } } fTotal[i] += (wordOccurence * ((int)currSent.wordsCount[word])); } } // Calculate P int pIndex = 0; foreach (Document doc in docs) { for (int i = 0; i < doc.sentences.Count; i++) { // Remove + 1 as arrays are zero based. pTotal[pIndex++] = ((doc.sentences.Count - i) * cMax) / doc.sentences.Count; } } double maxScore = double.MinValue; for (int i = 0; i < allSents.Count; i++) { double currWeight = (this.centroidWeight * cTotal[i]) + (this.positionalWeight * pTotal[i]) + (this.firstSentenceWeight * fTotal[i]); ((Sentence)allSents[i]).weight = currWeight; if (currWeight > maxScore) { maxScore = currWeight; } } string genSummary = null; string prevgenSummary = null; do { for (int i = 0; i < allSents.Count; i++) { for (int j = 0; j < allSents.Count; j++) { if (i >= j) { continue; } double redundancy = redundancyPenalty((Sentence)allSents[i], (Sentence)allSents[j]); ((Sentence)allSents[j]).weight -= (maxScore * redundancy); } } maxScore = double.MinValue; for (int i = 0; i < allSents.Count; i++) { if (((Sentence)allSents[i]).weight > maxScore) { maxScore = ((Sentence)allSents[i]).weight; } } Sentence[] sents = (Sentence[])allSents.ToArray(typeof(Sentence)); prevgenSummary = genSummary; genSummary = SummaryUtil.SummarizeByCompressionRatio(sents, compressionRatio); } while (!genSummary.Equals(prevgenSummary)); return(genSummary); }