private Hashtable[] collectAllTerms(Cluster[] clusters) { Hashtable[] clusterTerms = new Hashtable[clusters.Length]; //collect all term counts for each cluster for (int i = 0; i < clusterTerms.Length; i++) { clusterTerms[i] = new Hashtable(); IDictionaryEnumerator en = clusters[i].DocIds.GetEnumerator(); while (en.MoveNext()) { short docId = Convert.ToInt16(en.Key); DocTermItem[] docTerms = GetIndex.DocTerms(docId); //get all terms+counts for current doc foreach (DocTermItem dt in docTerms) { int termId = dt.TermId; short termCount = dt.TermCount; TermCountItem tc = new TermCountItem(termId, termCount); if (!clusterTerms[i].Contains(termId)) { clusterTerms[i].Add(termId, tc); } else { TermCountItem existingTC = (TermCountItem)clusterTerms[i][termId]; existingTC.termCount = existingTC.termCount + termCount; } } } } return(clusterTerms); }
public int CompareTo(object o) { TermCountItem x = (TermCountItem)o; if (x.termCount < termCount) { return(-1); } else if (x.termCount > termCount) { return(1); } else { return(0); } }
private TermCountItem[][] getSortedTermArrays(Hashtable[] clusterTerms, int clusterCount) { TermCountItem[][] sortedTerms = new TermCountItem[clusterCount][]; for (int i = 0; i < clusterCount; i++) { int cursor = 0; sortedTerms[i] = new TermCountItem[clusterTerms[i].Count]; //array length = # of terms in cluster IDictionaryEnumerator en = clusterTerms[i].GetEnumerator(); while (en.MoveNext()) { sortedTerms[i][cursor++] = (TermCountItem)en.Value; } Array.Sort(sortedTerms[i]); } return(sortedTerms); }