Example #1
0
        public override Query Rewrite(IndexReader reader)
        {
            FilteredTermEnum enumerator   = GetEnum(reader);
            int            maxClauseCount = BooleanQuery.GetMaxClauseCount();
            ScoreTermQueue stQueue        = new ScoreTermQueue(maxClauseCount);

            try
            {
                do
                {
                    float minScore = 0.0f;
                    float score    = 0.0f;
                    Term  t        = enumerator.Term();
                    if (t != null)
                    {
                        score = enumerator.Difference();
                        // terms come in alphabetical order, therefore if queue is full and score
                        // not bigger than minScore, we can skip
                        if (stQueue.Size() < maxClauseCount || score > minScore)
                        {
                            stQueue.Insert(new ScoreTerm(t, score));
                            minScore = ((ScoreTerm)stQueue.Top()).score;                              // maintain minScore
                        }
                    }
                }while (enumerator.Next());
            }
            finally
            {
                enumerator.Close();
            }

            BooleanQuery query = new BooleanQuery(true);
            int          size  = stQueue.Size();

            for (int i = 0; i < size; i++)
            {
                ScoreTerm st = (ScoreTerm)stQueue.Pop();
                TermQuery tq = new TermQuery(st.term);              // found a match
                tq.SetBoost(GetBoost() * st.score);                 // set the boost
                query.Add(tq, BooleanClause.Occur.SHOULD);          // add to query
            }

            return(query);
        }
Example #2
0
        private void AddTerms(IndexReader reader, FieldVals f)
        {
            if (f.queryString == null)
            {
                return;
            }
            TokenStream   ts      = analyzer.TokenStream(f.fieldName, new System.IO.StringReader(f.queryString));
            TermAttribute termAtt = (TermAttribute)ts.AddAttribute(typeof(TermAttribute));

            int       corpusNumDocs            = reader.NumDocs();
            Term      internSavingTemplateTerm = new Term(f.fieldName); //optimization to avoid constructing new Term() objects
            Hashtable processedTerms           = new Hashtable();

            while (ts.IncrementToken())
            {
                String term = termAtt.Term();
                if (!processedTerms.Contains(term))
                {
                    processedTerms.Add(term, term);
                    ScoreTermQueue variantsQ = new ScoreTermQueue(MAX_VARIANTS_PER_TERM); //maxNum variants considered for any one term
                    float          minScore  = 0;
                    Term           startTerm = internSavingTemplateTerm.CreateTerm(term);
                    FuzzyTermEnum  fe        = new FuzzyTermEnum(reader, startTerm, f.minSimilarity, f.prefixLength);
                    TermEnum       origEnum  = reader.Terms(startTerm);
                    int            df        = 0;
                    if (startTerm.Equals(origEnum.Term()))
                    {
                        df = origEnum.DocFreq(); //store the df so all variants use same idf
                    }
                    int numVariants          = 0;
                    int totalVariantDocFreqs = 0;
                    do
                    {
                        Term possibleMatch = fe.Term();
                        if (possibleMatch != null)
                        {
                            numVariants++;
                            totalVariantDocFreqs += fe.DocFreq();
                            float score = fe.Difference();
                            if (variantsQ.Size() < MAX_VARIANTS_PER_TERM || score > minScore)
                            {
                                ScoreTerm st = new ScoreTerm(possibleMatch, score, startTerm);
                                variantsQ.Insert(st);
                                minScore = ((ScoreTerm)variantsQ.Top()).score; // maintain minScore
                            }
                        }
                    }while (fe.Next());
                    if (numVariants > 0)
                    {
                        int avgDf = totalVariantDocFreqs / numVariants;
                        if (df == 0)    //no direct match we can use as df for all variants
                        {
                            df = avgDf; //use avg df of all variants
                        }

                        // take the top variants (scored by edit distance) and reset the score
                        // to include an IDF factor then add to the global queue for ranking
                        // overall top query terms
                        int size = variantsQ.Size();
                        for (int i = 0; i < size; i++)
                        {
                            ScoreTerm st = (ScoreTerm)variantsQ.Pop();
                            st.score = (st.score * st.score) * sim.Idf(df, corpusNumDocs);
                            q.Insert(st);
                        }
                    }
                }
            }
        }
Example #3
0
		public override Query Rewrite(IndexReader reader)
		{
			FilteredTermEnum enumerator = GetEnum(reader);
			int maxClauseCount = BooleanQuery.GetMaxClauseCount();
			ScoreTermQueue stQueue = new ScoreTermQueue(maxClauseCount);
			
			try
			{
				do 
				{
					float minScore = 0.0f;
					float score = 0.0f;
					Term t = enumerator.Term();
					if (t != null)
					{
						score = enumerator.Difference();
						// terms come in alphabetical order, therefore if queue is full and score
						// not bigger than minScore, we can skip
						if (stQueue.Size() < maxClauseCount || score > minScore)
						{
							stQueue.Insert(new ScoreTerm(t, score));
							minScore = ((ScoreTerm) stQueue.Top()).score; // maintain minScore
						}
					}
				}
				while (enumerator.Next());
			}
			finally
			{
				enumerator.Close();
			}
			
			BooleanQuery query = new BooleanQuery(true);
			int size = stQueue.Size();
			for (int i = 0; i < size; i++)
			{
				ScoreTerm st = (ScoreTerm) stQueue.Pop();
				TermQuery tq = new TermQuery(st.term); // found a match
				tq.SetBoost(GetBoost() * st.score); // set the boost
				query.Add(tq, BooleanClause.Occur.SHOULD); // add to query
			}
			
			return query;
		}