Inheritance: PriorityQueue
Exemplo n.º 1
0
        public static TermInfo[] GetHighFreqTerms(Directory dir, 
            Hashtable junkWords,
            int numTerms,
            String[] fields)
        {
            if (dir == null || fields == null) return new TermInfo[0];

            IndexReader reader = IndexReader.Open(dir, true);
            TermInfoQueue tiq = new TermInfoQueue(numTerms);
            TermEnum terms = reader.Terms();

            int minFreq = 0;

            while (terms.Next())
            {
                String field = terms.Term().Field();

                if (fields != null && fields.Length > 0)
                {
                    bool skip = true;

                    for (int i = 0; i < fields.Length; i++)
                    {
                        if (field.Equals(fields[i]))
                        {
                            skip = false;
                            break;
                        }
                    }
                    if (skip) continue;
                }

                if (junkWords != null && junkWords[terms.Term().Text()] != null)
                    continue;

                if (terms.DocFreq() > minFreq)
                {
                    TermInfo top = (TermInfo)tiq.Add(new TermInfo(terms.Term(), terms.DocFreq()));
                    if (tiq.Size() >= numTerms) 		     // if tiq overfull
                    {
                        tiq.Pop();				     // remove lowest in tiq
                        minFreq = top.DocFreq; // reset minFreq
                    }
                }
            }

            TermInfo[] res = new TermInfo[tiq.Size()];

            for (int i = 0; i < res.Length; i++)
            {
                res[res.Length - i - 1] = (TermInfo)tiq.Pop();
            }

            reader.Close();

            return res;
        }
Exemplo n.º 2
0
        public static TermInfo[] GetHighFreqTerms(Directory dir,
                                                  Hashtable junkWords,
                                                  int numTerms,
                                                  String[] fields)
        {
            if (dir == null || fields == null)
            {
                return(new TermInfo[0]);
            }

            IndexReader   reader = IndexReader.Open(dir, true);
            TermInfoQueue tiq    = new TermInfoQueue(numTerms);
            TermEnum      terms  = reader.Terms();

            int minFreq = 0;

            while (terms.Next())
            {
                String field = terms.Term().Field();

                if (fields != null && fields.Length > 0)
                {
                    bool skip = true;

                    for (int i = 0; i < fields.Length; i++)
                    {
                        if (field.Equals(fields[i]))
                        {
                            skip = false;
                            break;
                        }
                    }
                    if (skip)
                    {
                        continue;
                    }
                }

                if (junkWords != null && junkWords[terms.Term().Text()] != null)
                {
                    continue;
                }

                if (terms.DocFreq() > minFreq)
                {
                    TermInfo top = (TermInfo)tiq.Add(new TermInfo(terms.Term(), terms.DocFreq()));
                    if (tiq.Size() >= numTerms)                        // if tiq overfull
                    {
                        tiq.Pop();                                     // remove lowest in tiq
                        minFreq = top.DocFreq;                         // reset minFreq
                    }
                }
            }

            TermInfo[] res = new TermInfo[tiq.Size()];

            for (int i = 0; i < res.Length; i++)
            {
                res[res.Length - i - 1] = (TermInfo)tiq.Pop();
            }

            reader.Close();

            return(res);
        }