示例#1
0
 private Hashtable[] collectAllTerms(Cluster[] clusters)
 {
     Hashtable[] clusterTerms = new Hashtable[clusters.Length];             //collect all term counts for each cluster
     for (int i = 0; i < clusterTerms.Length; i++)
     {
         clusterTerms[i] = new Hashtable();
         IDictionaryEnumerator en = clusters[i].DocIds.GetEnumerator();
         while (en.MoveNext())
         {
             short         docId    = Convert.ToInt16(en.Key);
             DocTermItem[] docTerms = GetIndex.DocTerms(docId);                     //get all terms+counts for current doc
             foreach (DocTermItem dt in docTerms)
             {
                 int           termId    = dt.TermId;
                 short         termCount = dt.TermCount;
                 TermCountItem tc        = new TermCountItem(termId, termCount);
                 if (!clusterTerms[i].Contains(termId))
                 {
                     clusterTerms[i].Add(termId, tc);
                 }
                 else
                 {
                     TermCountItem existingTC = (TermCountItem)clusterTerms[i][termId];
                     existingTC.termCount = existingTC.termCount + termCount;
                 }
             }
         }
     }
     return(clusterTerms);
 }
示例#2
0
            public int CompareTo(object o)
            {
                TermCountItem x = (TermCountItem)o;

                if (x.termCount < termCount)
                {
                    return(-1);
                }
                else if (x.termCount > termCount)
                {
                    return(1);
                }
                else
                {
                    return(0);
                }
            }
示例#3
0
        private TermCountItem[][] getSortedTermArrays(Hashtable[] clusterTerms, int clusterCount)
        {
            TermCountItem[][] sortedTerms = new TermCountItem[clusterCount][];
            for (int i = 0; i < clusterCount; i++)
            {
                int cursor = 0;
                sortedTerms[i] = new TermCountItem[clusterTerms[i].Count];                 //array length = # of terms in cluster
                IDictionaryEnumerator en = clusterTerms[i].GetEnumerator();
                while (en.MoveNext())
                {
                    sortedTerms[i][cursor++] = (TermCountItem)en.Value;
                }

                Array.Sort(sortedTerms[i]);
            }
            return(sortedTerms);
        }