예제 #1
0
파일: TermSpans.cs 프로젝트: carrie901/mono
		public TermSpans(TermPositions positions, Term term)
		{
			
			this.positions = positions;
			this.term = term;
			doc = - 1;
		}
예제 #2
0
            public override Query Rewrite(IndexReader reader, MultiTermQuery query)
            {
                FilteredTermEnum enumerator = query.GetEnum(reader);
                BooleanQuery     result     = new BooleanQuery(true);
                int count = 0;

                try
                {
                    do
                    {
                        Term t = enumerator.Term();
                        if (t != null)
                        {
                            TermQuery tq = new TermQuery(t);                             // found a match
                            tq.SetBoost(query.GetBoost() * enumerator.Difference());     // set the boost
                            result.Add(tq, BooleanClause.Occur.SHOULD);                  // add to query
                            count++;
                        }
                    }while (enumerator.Next());
                }
                finally
                {
                    enumerator.Close();
                }
                query.IncTotalNumberOfTerms(count);
                return(result);
            }
예제 #3
0
            protected internal override System.Object CreateValue(IndexReader reader, Entry entryKey)
            {
                System.String   field    = StringHelper.Intern((System.String)entryKey.field);
                System.String[] retArray = new System.String[reader.MaxDoc()];
                TermDocs        termDocs = reader.TermDocs();
                TermEnum        termEnum = reader.Terms(new Term(field));

                try
                {
                    do
                    {
                        Term term = termEnum.Term();
                        if (term == null || (System.Object)term.Field() != (System.Object)field)
                        {
                            break;
                        }
                        System.String termval = term.Text();
                        termDocs.Seek(termEnum);
                        while (termDocs.Next())
                        {
                            retArray[termDocs.Doc()] = termval;
                        }
                    }while (termEnum.Next());
                }
                finally
                {
                    termDocs.Close();
                    termEnum.Close();
                }
                return(retArray);
            }
예제 #4
0
 /// <summary>Increments the enumeration to the next element.  True if one exists. </summary>
 public override bool Next()
 {
     if (actualEnum == null)
     {
         return(false);                // the actual enumerator is not initialized!
     }
     currentTerm = null;
     while (currentTerm == null)
     {
         if (EndEnum())
         {
             return(false);
         }
         if (actualEnum.Next())
         {
             Term term = actualEnum.Term();
             if (TermCompare(term))
             {
                 currentTerm = term;
                 return(true);
             }
         }
         else
         {
             return(false);
         }
     }
     currentTerm = null;
     return(false);
 }
예제 #5
0
        /// <summary> Constructor for enumeration of all terms from specified <code>reader</code> which share a prefix of
        /// length <code>prefixLength</code> with <code>term</code> and which have a fuzzy similarity &gt;
        /// <code>minSimilarity</code>.
        /// <p/>
        /// After calling the constructor the enumeration is already pointing to the first
        /// valid term if such a term exists.
        ///
        /// </summary>
        /// <param name="reader">Delivers terms.
        /// </param>
        /// <param name="term">Pattern term.
        /// </param>
        /// <param name="minSimilarity">Minimum required similarity for terms from the reader. Default value is 0.5f.
        /// </param>
        /// <param name="prefixLength">Length of required common prefix. Default value is 0.
        /// </param>
        /// <throws>  IOException </throws>
        public FuzzyTermEnum(IndexReader reader, Term term, float minSimilarity, int prefixLength) : base()
        {
            if (minSimilarity >= 1.0f)
            {
                throw new System.ArgumentException("minimumSimilarity cannot be greater than or equal to 1");
            }
            else if (minSimilarity < 0.0f)
            {
                throw new System.ArgumentException("minimumSimilarity cannot be less than 0");
            }
            if (prefixLength < 0)
            {
                throw new System.ArgumentException("prefixLength cannot be less than 0");
            }

            this.minimumSimilarity = minSimilarity;
            this.scale_factor      = 1.0f / (1.0f - minimumSimilarity);
            this.searchTerm        = term;
            this.field             = searchTerm.Field();

            //The prefix could be longer than the word.
            //It's kind of silly though.  It means we must match the entire word.
            int fullSearchTermLength = searchTerm.Text().Length;
            int realPrefixLength     = prefixLength > fullSearchTermLength?fullSearchTermLength:prefixLength;

            this.text   = searchTerm.Text().Substring(realPrefixLength);
            this.prefix = searchTerm.Text().Substring(0, (realPrefixLength) - (0));

            InitializeMaxDistances();
            this.d = InitDistanceArray();

            SetEnum(reader.Terms(new Term(searchTerm.Field(), prefix)));
        }
예제 #6
0
            protected internal override System.Object CreateValue(IndexReader reader, Entry entryKey)
            {
                Entry entry = (Entry)entryKey;

                System.String  field      = entry.field;
                SortComparator comparator = (SortComparator)entry.custom;

                System.IComparable[] retArray = new System.IComparable[reader.MaxDoc()];
                TermDocs             termDocs = reader.TermDocs();
                TermEnum             termEnum = reader.Terms(new Term(field));

                try
                {
                    do
                    {
                        Term term = termEnum.Term();
                        if (term == null || (System.Object)term.Field() != (System.Object)field)
                        {
                            break;
                        }
                        System.IComparable termval = comparator.GetComparable(term.Text());
                        termDocs.Seek(termEnum);
                        while (termDocs.Next())
                        {
                            retArray[termDocs.Doc()] = termval;
                        }
                    }while (termEnum.Next());
                }
                finally
                {
                    termDocs.Close();
                    termEnum.Close();
                }
                return(retArray);
            }
예제 #7
0
        /// <summary> Create a new FuzzyQuery that will match terms with a similarity
        /// of at least <code>minimumSimilarity</code> to <code>term</code>.
        /// If a <code>prefixLength</code> &gt; 0 is specified, a common prefix
        /// of that length is also required.
        ///
        /// </summary>
        /// <param name="term">the term to search for
        /// </param>
        /// <param name="minimumSimilarity">a value between 0 and 1 to set the required similarity
        /// between the query term and the matching terms. For example, for a
        /// <code>minimumSimilarity</code> of <code>0.5</code> a term of the same length
        /// as the query term is considered similar to the query term if the edit distance
        /// between both terms is less than <code>length(term)*0.5</code>
        /// </param>
        /// <param name="prefixLength">length of common (non-fuzzy) prefix
        /// </param>
        /// <throws>  IllegalArgumentException if minimumSimilarity is &gt;= 1 or &lt; 0 </throws>
        /// <summary> or if prefixLength &lt; 0
        /// </summary>
        public FuzzyQuery(Term term, float minimumSimilarity, int prefixLength) : base(term)
        {         // will be removed in 3.0
            this.term = term;

            if (minimumSimilarity >= 1.0f)
            {
                throw new System.ArgumentException("minimumSimilarity >= 1");
            }
            else if (minimumSimilarity < 0.0f)
            {
                throw new System.ArgumentException("minimumSimilarity < 0");
            }
            if (prefixLength < 0)
            {
                throw new System.ArgumentException("prefixLength < 0");
            }

            if (term.Text().Length > 1.0f / (1.0f - minimumSimilarity))
            {
                this.termLongEnough = true;
            }

            this.minimumSimilarity = minimumSimilarity;
            this.prefixLength      = prefixLength;
            rewriteMethod          = SCORING_BOOLEAN_QUERY_REWRITE;
        }
예제 #8
0
            public override Query Rewrite(IndexReader reader, MultiTermQuery query)
            {
                // Get the enum and start visiting terms.  If we
                // exhaust the enum before hitting either of the
                // cutoffs, we use ConstantBooleanQueryRewrite; else,
                // ConstantFilterRewrite:
                System.Collections.ArrayList pendingTerms = new System.Collections.ArrayList();
                int docCountCutoff = (int)((docCountPercent / 100.0) * reader.MaxDoc());
                int termCountLimit = System.Math.Min(BooleanQuery.GetMaxClauseCount(), termCountCutoff);
                int docVisitCount  = 0;

                FilteredTermEnum enumerator = query.GetEnum(reader);

                try
                {
                    while (true)
                    {
                        Term t = enumerator.Term();
                        if (t != null)
                        {
                            pendingTerms.Add(t);
                            // Loading the TermInfo from the terms dict here
                            // should not be costly, because 1) the
                            // query/filter will load the TermInfo when it
                            // runs, and 2) the terms dict has a cache:
                            docVisitCount += reader.DocFreq(t);
                        }

                        if (pendingTerms.Count >= termCountLimit || docVisitCount >= docCountCutoff)
                        {
                            // Too many terms -- make a filter.
                            Query result = new ConstantScoreQuery(new MultiTermQueryWrapperFilter(query));
                            result.SetBoost(query.GetBoost());
                            return(result);
                        }
                        else if (!enumerator.Next())
                        {
                            // Enumeration is done, and we hit a small
                            // enough number of terms & docs -- just make a
                            // BooleanQuery, now
                            System.Collections.IEnumerator it = pendingTerms.GetEnumerator();
                            BooleanQuery bq = new BooleanQuery(true);
                            while (it.MoveNext())
                            {
                                TermQuery tq = new TermQuery((Term)it.Current);
                                bq.Add(tq, BooleanClause.Occur.SHOULD);
                            }
                            // Strip scores
                            Query result = new ConstantScoreQuery(new QueryWrapperFilter(bq));
                            result.SetBoost(query.GetBoost());
                            query.IncTotalNumberOfTerms(pendingTerms.Count);
                            return(result);
                        }
                    }
                }
                finally
                {
                    enumerator.Close();
                }
            }
예제 #9
0
            protected internal override System.Object CreateValue(IndexReader reader, Entry entryKey)
            {
                Entry entry = (Entry)entryKey;

                System.String field  = entry.field;
                FloatParser   parser = (FloatParser)entry.custom;

                if (parser == null)
                {
                    try
                    {
                        return(wrapper.GetFloats(reader, field, Mono.Lucene.Net.Search.FieldCache_Fields.DEFAULT_FLOAT_PARSER));
                    }
                    catch (System.FormatException ne)
                    {
                        return(wrapper.GetFloats(reader, field, Mono.Lucene.Net.Search.FieldCache_Fields.NUMERIC_UTILS_FLOAT_PARSER));
                    }
                }
                float[]  retArray = null;
                TermDocs termDocs = reader.TermDocs();
                TermEnum termEnum = reader.Terms(new Term(field));

                try
                {
                    do
                    {
                        Term term = termEnum.Term();
                        if (term == null || (System.Object)term.Field() != (System.Object)field)
                        {
                            break;
                        }
                        float termval = parser.ParseFloat(term.Text());
                        if (retArray == null)
                        {
                            // late init
                            retArray = new float[reader.MaxDoc()];
                        }
                        termDocs.Seek(termEnum);
                        while (termDocs.Next())
                        {
                            retArray[termDocs.Doc()] = termval;
                        }
                    }while (termEnum.Next());
                }
                catch (StopFillCacheException stop)
                {
                }
                finally
                {
                    termDocs.Close();
                    termEnum.Close();
                }
                if (retArray == null)
                {
                    // no values
                    retArray = new float[reader.MaxDoc()];
                }
                return(retArray);
            }
예제 #10
0
		/// <summary>Add multiple terms at the next position in the phrase.  Any of the terms
		/// may match.
		/// 
		/// </summary>
		/// <seealso cref="PhraseQuery.add(Term)">
		/// </seealso>
		public virtual void  Add(Term[] terms)
		{
			int position = 0;
			if (positions.Count > 0)
				position = ((System.Int32) positions[positions.Count - 1]) + 1;
			
			Add(terms, position);
		}
예제 #11
0
 /// <summary>Closes the enumeration to further activity, freeing resources.  </summary>
 public override void  Close()
 {
     if (actualEnum != null)
     {
         actualEnum.Close();
     }
     currentTerm = null;
     actualEnum  = null;
 }
예제 #12
0
        public /*protected internal*/ override bool TermCompare(Term term)
        {
            if (collator == null)
            {
                // Use Unicode code point ordering
                bool checkLower = false;
                if (!includeLower)
                {
                    // make adjustments to set to exclusive
                    checkLower = true;
                }
                if (term != null && (System.Object)term.Field() == (System.Object)field)
                {
                    // interned comparison
                    if (!checkLower || null == lowerTermText || String.CompareOrdinal(term.Text(), lowerTermText) > 0)
                    {
                        checkLower = false;
                        if (upperTermText != null)
                        {
                            int compare = String.CompareOrdinal(upperTermText, term.Text());

                            /*
                             * if beyond the upper term, or is exclusive and this is equal to
                             * the upper term, break out
                             */
                            if ((compare < 0) || (!includeUpper && compare == 0))
                            {
                                endEnum = true;
                                return(false);
                            }
                        }
                        return(true);
                    }
                }
                else
                {
                    // break
                    endEnum = true;
                    return(false);
                }
                return(false);
            }
            else
            {
                if (term != null && (System.Object)term.Field() == (System.Object)field)
                {
                    // interned comparison
                    if ((lowerTermText == null || (includeLower?collator.Compare(term.Text().ToString(), lowerTermText.ToString()) >= 0:collator.Compare(term.Text().ToString(), lowerTermText.ToString()) > 0)) && (upperTermText == null || (includeUpper?collator.Compare(term.Text().ToString(), upperTermText.ToString()) <= 0:collator.Compare(term.Text().ToString(), upperTermText.ToString()) < 0)))
                    {
                        return(true);
                    }
                    return(false);
                }
                endEnum = true;
                return(false);
            }
        }
예제 #13
0
 public /*protected internal*/ override bool TermCompare(Term term)
 {
     if ((System.Object)term.Field() == (System.Object)prefix.Field() && term.Text().StartsWith(prefix.Text()))
     {
         return(true);
     }
     endEnum = true;
     return(false);
 }
예제 #14
0
		/// <summary> use this method to set the actual TermEnum (e.g. in ctor),
		/// it will be automatically positioned on the first matching term.
		/// </summary>
		protected internal virtual void  SetEnum(TermEnum actualEnum)
		{
			this.actualEnum = actualEnum;
			// Find the first term that matches
			Term term = actualEnum.Term();
			if (term != null && TermCompare(term))
				currentTerm = term;
			else
				Next();
		}
예제 #15
0
        public override int DocFreq(Term term)
        {
            int docFreq = 0;

            for (int i = 0; i < searchables.Length; i++)
            {
                docFreq += searchables[i].DocFreq(term);
            }
            return(docFreq);
        }
예제 #16
0
            protected internal override System.Object CreateValue(IndexReader reader, Entry entryKey)
            {
                System.String field      = StringHelper.Intern((System.String)entryKey.field);
                TermEnum      enumerator = reader.Terms(new Term(field));

                try
                {
                    Term term = enumerator.Term();
                    if (term == null)
                    {
                        throw new System.SystemException("no terms in field " + field + " - cannot determine type");
                    }
                    System.Object ret = null;
                    if ((System.Object)term.Field() == (System.Object)field)
                    {
                        System.String termtext = term.Text().Trim();

                        try
                        {
                            System.Int32.Parse(termtext);
                            ret = wrapper.GetInts(reader, field);
                        }
                        catch (System.FormatException nfe1)
                        {
                            try
                            {
                                System.Int64.Parse(termtext);
                                ret = wrapper.GetLongs(reader, field);
                            }
                            catch (System.FormatException nfe2)
                            {
                                try
                                {
                                    SupportClass.Single.Parse(termtext);
                                    ret = wrapper.GetFloats(reader, field);
                                }
                                catch (System.FormatException nfe3)
                                {
                                    ret = wrapper.GetStringIndex(reader, field);
                                }
                            }
                        }
                    }
                    else
                    {
                        throw new System.SystemException("field \"" + field + "\" does not appear to be indexed");
                    }
                    return(ret);
                }
                finally
                {
                    enumerator.Close();
                }
            }
예제 #17
0
 /// <summary> The termCompare method in FuzzyTermEnum uses Levenshtein distance to
 /// calculate the distance between the given term and the comparing term.
 /// </summary>
 public /*protected internal*/ override bool TermCompare(Term term)
 {
     if ((System.Object)field == (System.Object)term.Field() && term.Text().StartsWith(prefix))
     {
         System.String target = term.Text().Substring(prefix.Length);
         this.similarity = Similarity(target);
         return(similarity > minimumSimilarity);
     }
     endEnum = true;
     return(false);
 }
예제 #18
0
        /// <summary> Adds a term to the end of the query phrase.
        /// The relative position of the term is the one immediately after the last term added.
        /// </summary>
        public virtual void  Add(Term term)
        {
            int position = 0;

            if (positions.Count > 0)
            {
                position = ((System.Int32)positions[positions.Count - 1]) + 1;
            }

            Add(term, position);
        }
예제 #19
0
 public override Weight CreateWeight(Searcher searcher)
 {
     if (terms.Count == 1)
     {
         // optimize one-term case
         Term  term      = (Term)terms[0];
         Query termQuery = new TermQuery(term);
         termQuery.SetBoost(GetBoost());
         return(termQuery.CreateWeight(searcher));
     }
     return(new PhraseWeight(this, searcher));
 }
예제 #20
0
 public /*protected internal*/ override bool TermCompare(Term term)
 {
     if ((System.Object)field == (System.Object)term.Field())
     {
         System.String searchText = term.Text();
         if (searchText.StartsWith(pre))
         {
             return(WildcardEquals(text, 0, searchText, preLen));
         }
     }
     endEnum = true;
     return(false);
 }
예제 #21
0
        /// <summary> Computes a score factor for a simple term and returns an explanation
        /// for that score factor.
        ///
        /// <p/>
        /// The default implementation uses:
        ///
        /// <pre>
        /// idf(searcher.docFreq(term), searcher.maxDoc());
        /// </pre>
        ///
        /// Note that {@link Searcher#MaxDoc()} is used instead of
        /// {@link Mono.Lucene.Net.Index.IndexReader#NumDocs()} because it is
        /// proportional to {@link Searcher#DocFreq(Term)} , i.e., when one is
        /// inaccurate, so is the other, and in the same direction.
        ///
        /// </summary>
        /// <param name="term">the term in question
        /// </param>
        /// <param name="searcher">the document collection being searched
        /// </param>
        /// <returns> an IDFExplain object that includes both an idf score factor
        /// and an explanation for the term.
        /// </returns>
        /// <throws>  IOException </throws>
        public virtual IDFExplanation IdfExplain(Term term, Searcher searcher)
        {
            if (SupportedMethods.overridesTermIDF)
            {
                float idf = Idf(term, searcher);
                return(new AnonymousClassIDFExplanation(idf, this));
            }
            int   df   = searcher.DocFreq(term);
            int   max  = searcher.MaxDoc();
            float idf2 = Idf(df, max);

            return(new AnonymousClassIDFExplanation1(df, max, idf2, this));
        }
예제 #22
0
		public /*protected internal*/ override bool TermCompare(Term term)
		{
			if ((System.Object) field == (System.Object) term.Field())
			{
				System.String searchText = term.Text();
				if (searchText.StartsWith(pre))
				{
					return WildcardEquals(text, 0, searchText, preLen);
				}
			}
			endEnum = true;
			return false;
		}
예제 #23
0
        /// <summary>Constructs a query selecting all terms greater than
        /// <code>lowerTerm</code> but less than <code>upperTerm</code>.
        /// There must be at least one term and either term may be null,
        /// in which case there is no bound on that side, but if there are
        /// two terms, both terms <b>must</b> be for the same field.
        /// <p/>
        /// If <code>collator</code> is not null, it will be used to decide whether
        /// index terms are within the given range, rather than using the Unicode code
        /// point order in which index terms are stored.
        /// <p/>
        /// <strong>WARNING:</strong> Using this constructor and supplying a non-null
        /// value in the <code>collator</code> parameter will cause every single
        /// index Term in the Field referenced by lowerTerm and/or upperTerm to be
        /// examined.  Depending on the number of index Terms in this Field, the
        /// operation could be very slow.
        ///
        /// </summary>
        /// <param name="lowerTerm">The Term at the lower end of the range
        /// </param>
        /// <param name="upperTerm">The Term at the upper end of the range
        /// </param>
        /// <param name="inclusive">If true, both <code>lowerTerm</code> and
        /// <code>upperTerm</code> will themselves be included in the range.
        /// </param>
        /// <param name="collator">The collator to use to collate index Terms, to determine
        /// their membership in the range bounded by <code>lowerTerm</code> and
        /// <code>upperTerm</code>.
        /// </param>
        public RangeQuery(Term lowerTerm, Term upperTerm, bool inclusive, System.Globalization.CompareInfo collator)
        {
            if (lowerTerm == null && upperTerm == null)
            {
                throw new System.ArgumentException("At least one term must be non-null");
            }
            if (lowerTerm != null && upperTerm != null && (System.Object)lowerTerm.Field() != (System.Object)upperTerm.Field())
            {
                throw new System.ArgumentException("Both terms must have the same field");
            }

            delegate_Renamed = new TermRangeQuery((lowerTerm == null)?upperTerm.Field():lowerTerm.Field(), (lowerTerm == null)?null:lowerTerm.Text(), (upperTerm == null)?null:upperTerm.Text(), inclusive, inclusive, collator);
            delegate_Renamed.SetRewriteMethod(TermRangeQuery.SCORING_BOOLEAN_QUERY_REWRITE);
        }
예제 #24
0
            public override int DocFreq(Term term)
            {
                int df;

                try
                {
                    df = ((System.Int32)dfMap[term]);
                }
                catch (System.NullReferenceException e)
                {
                    throw new System.ArgumentException("df for term " + term.Text() + " not available");
                }
                return(df);
            }
예제 #25
0
        /// <summary> use this method to set the actual TermEnum (e.g. in ctor),
        /// it will be automatically positioned on the first matching term.
        /// </summary>
        protected internal virtual void  SetEnum(TermEnum actualEnum)
        {
            this.actualEnum = actualEnum;
            // Find the first term that matches
            Term term = actualEnum.Term();

            if (term != null && TermCompare(term))
            {
                currentTerm = term;
            }
            else
            {
                Next();
            }
        }
예제 #26
0
        internal static int DetectFieldType(IndexReader reader, System.String fieldKey)
        {
            System.String field      = StringHelper.Intern(fieldKey);
            TermEnum      enumerator = reader.Terms(new Term(field));

            try
            {
                Term term = enumerator.Term();
                if (term == null)
                {
                    throw new System.SystemException("no terms in field " + field + " - cannot determine sort type");
                }
                int ret = 0;
                if ((System.Object)term.Field() == (System.Object)field)
                {
                    System.String termtext = term.Text().Trim();

                    int tmpI32; long tmpI64; float tmpF;
                    if (System.Int32.TryParse(termtext, out tmpI32))
                    {
                        ret = SortField.INT;
                    }
                    else if (System.Int64.TryParse(termtext, out tmpI64))
                    {
                        ret = SortField.LONG;
                    }
                    else if (SupportClass.Single.TryParse(termtext, out tmpF))
                    {
                        ret = SortField.FLOAT;
                    }
                    else
                    {
                        ret = SortField.STRING;
                    }
                }
                else
                {
                    throw new System.SystemException("field \"" + field + "\" does not appear to be indexed");
                }
                return(ret);
            }
            finally
            {
                enumerator.Close();
            }
        }
예제 #27
0
        private int ArraysHashCode(Term[] termArray)
        {
            if (termArray == null)
            {
                return(0);
            }

            int result = 1;

            for (int i = 0; i < termArray.Length; i++)
            {
                Term term = termArray[i];
                result = 31 * result + (term == null?0:term.GetHashCode());
            }

            return(result);
        }
예제 #28
0
        /// <summary> Adds a term to the end of the query phrase.
        /// The relative position of the term within the phrase is specified explicitly.
        /// This allows e.g. phrases with more than one term at the same position
        /// or phrases with gaps (e.g. in connection with stopwords).
        ///
        /// </summary>
        /// <param name="term">
        /// </param>
        /// <param name="position">
        /// </param>
        public virtual void  Add(Term term, int position)
        {
            if (terms.Count == 0)
            {
                field = term.Field();
            }
            else if ((System.Object)term.Field() != (System.Object)field)
            {
                throw new System.ArgumentException("All phrase terms must be in the same field: " + term);
            }

            terms.Add(term);
            positions.Add((System.Int32)position);
            if (position > maxPosition)
            {
                maxPosition = position;
            }
        }
예제 #29
0
            public virtual void Generate(MultiTermQuery query, IndexReader reader, TermEnum enumerator)
            {
                int[]    docs     = new int[32];
                int[]    freqs    = new int[32];
                TermDocs termDocs = reader.TermDocs();

                try
                {
                    int termCount = 0;
                    do
                    {
                        Term term = enumerator.Term();
                        if (term == null)
                        {
                            break;
                        }
                        termCount++;
                        termDocs.Seek(term);
                        while (true)
                        {
                            int count = termDocs.Read(docs, freqs);
                            if (count != 0)
                            {
                                for (int i = 0; i < count; i++)
                                {
                                    HandleDoc(docs[i]);
                                }
                            }
                            else
                            {
                                break;
                            }
                        }
                    }while (enumerator.Next());

                    query.IncTotalNumberOfTerms(termCount);                     // {{Aroush-2.9}} is the use of 'temp' as is right?
                }
                finally
                {
                    termDocs.Close();
                }
            }
예제 #30
0
            protected internal override System.Object CreateValue(IndexReader reader, Entry entryKey)
            {
                Entry entry = (Entry)entryKey;

                System.String field  = entry.field;
                ShortParser   parser = (ShortParser)entry.custom;

                if (parser == null)
                {
                    return(wrapper.GetShorts(reader, field, Mono.Lucene.Net.Search.FieldCache_Fields.DEFAULT_SHORT_PARSER));
                }
                short[]  retArray = new short[reader.MaxDoc()];
                TermDocs termDocs = reader.TermDocs();
                TermEnum termEnum = reader.Terms(new Term(field));

                try
                {
                    do
                    {
                        Term term = termEnum.Term();
                        if (term == null || (System.Object)term.Field() != (System.Object)field)
                        {
                            break;
                        }
                        short termval = parser.ParseShort(term.Text());
                        termDocs.Seek(termEnum);
                        while (termDocs.Next())
                        {
                            retArray[termDocs.Doc()] = termval;
                        }
                    }while (termEnum.Next());
                }
                catch (StopFillCacheException stop)
                {
                }
                finally
                {
                    termDocs.Close();
                    termEnum.Close();
                }
                return(retArray);
            }
예제 #31
0
		/// <summary> Create a new FuzzyQuery that will match terms with a similarity 
		/// of at least <code>minimumSimilarity</code> to <code>term</code>.
		/// If a <code>prefixLength</code> &gt; 0 is specified, a common prefix
		/// of that length is also required.
		/// 
		/// </summary>
		/// <param name="term">the term to search for
		/// </param>
		/// <param name="minimumSimilarity">a value between 0 and 1 to set the required similarity
		/// between the query term and the matching terms. For example, for a
		/// <code>minimumSimilarity</code> of <code>0.5</code> a term of the same length
		/// as the query term is considered similar to the query term if the edit distance
		/// between both terms is less than <code>length(term)*0.5</code>
		/// </param>
		/// <param name="prefixLength">length of common (non-fuzzy) prefix
		/// </param>
		/// <throws>  IllegalArgumentException if minimumSimilarity is &gt;= 1 or &lt; 0 </throws>
		/// <summary> or if prefixLength &lt; 0
		/// </summary>
		public FuzzyQuery(Term term, float minimumSimilarity, int prefixLength):base(term)
		{ // will be removed in 3.0
			this.term = term;
			
			if (minimumSimilarity >= 1.0f)
				throw new System.ArgumentException("minimumSimilarity >= 1");
			else if (minimumSimilarity < 0.0f)
				throw new System.ArgumentException("minimumSimilarity < 0");
			if (prefixLength < 0)
				throw new System.ArgumentException("prefixLength < 0");
			
			if (term.Text().Length > 1.0f / (1.0f - minimumSimilarity))
			{
				this.termLongEnough = true;
			}
			
			this.minimumSimilarity = minimumSimilarity;
			this.prefixLength = prefixLength;
			rewriteMethod = SCORING_BOOLEAN_QUERY_REWRITE;
		}
예제 #32
0
        /// <summary> Create weight in multiple index scenario.
        ///
        /// Distributed query processing is done in the following steps:
        /// 1. rewrite query
        /// 2. extract necessary terms
        /// 3. collect dfs for these terms from the Searchables
        /// 4. create query weight using aggregate dfs.
        /// 5. distribute that weight to Searchables
        /// 6. merge results
        ///
        /// Steps 1-4 are done here, 5+6 in the search() methods
        ///
        /// </summary>
        /// <returns> rewritten queries
        /// </returns>
        public /*protected internal*/ override Weight CreateWeight(Query original)
        {
            // step 1
            Query rewrittenQuery = Rewrite(original);

            // step 2
            System.Collections.Hashtable terms = new System.Collections.Hashtable();
            rewrittenQuery.ExtractTerms(terms);

            // step3
            Term[] allTermsArray = new Term[terms.Count];
            int    index         = 0;

            System.Collections.IEnumerator e = terms.Keys.GetEnumerator();
            while (e.MoveNext())
            {
                allTermsArray[index++] = e.Current as Term;
            }
            int[] aggregatedDfs = new int[terms.Count];
            for (int i = 0; i < searchables.Length; i++)
            {
                int[] dfs = searchables[i].DocFreqs(allTermsArray);
                for (int j = 0; j < aggregatedDfs.Length; j++)
                {
                    aggregatedDfs[j] += dfs[j];
                }
            }

            System.Collections.Hashtable dfMap = new System.Collections.Hashtable();
            for (int i = 0; i < allTermsArray.Length; i++)
            {
                dfMap[allTermsArray[i]] = (System.Int32)aggregatedDfs[i];
            }

            // step4
            int            numDocs  = MaxDoc();
            CachedDfSource cacheSim = new CachedDfSource(dfMap, numDocs, GetSimilarity());

            return(rewrittenQuery.Weight(cacheSim));
        }
예제 #33
0
		/// <summary> Creates a new <code>WildcardTermEnum</code>.
		/// <p/>
		/// After calling the constructor the enumeration is already pointing to the first 
		/// valid term if such a term exists.
		/// </summary>
		public WildcardTermEnum(IndexReader reader, Term term):base()
		{
			searchTerm = term;
			field = searchTerm.Field();
			System.String searchTermText = searchTerm.Text();
			
			int sidx = searchTermText.IndexOf((System.Char) WILDCARD_STRING);
			int cidx = searchTermText.IndexOf((System.Char) WILDCARD_CHAR);
			int idx = sidx;
			if (idx == - 1)
			{
				idx = cidx;
			}
			else if (cidx >= 0)
			{
				idx = System.Math.Min(idx, cidx);
			}
			pre = idx != - 1?searchTerm.Text().Substring(0, (idx) - (0)):"";
			
			preLen = pre.Length;
			text = searchTermText.Substring(preLen);
			SetEnum(reader.Terms(new Term(searchTerm.Field(), pre)));
		}
예제 #34
0
        /// <summary> Creates a new <code>WildcardTermEnum</code>.
        /// <p/>
        /// After calling the constructor the enumeration is already pointing to the first
        /// valid term if such a term exists.
        /// </summary>
        public WildcardTermEnum(IndexReader reader, Term term) : base()
        {
            searchTerm = term;
            field      = searchTerm.Field();
            System.String searchTermText = searchTerm.Text();

            int sidx = searchTermText.IndexOf((System.Char)WILDCARD_STRING);
            int cidx = searchTermText.IndexOf((System.Char)WILDCARD_CHAR);
            int idx  = sidx;

            if (idx == -1)
            {
                idx = cidx;
            }
            else if (cidx >= 0)
            {
                idx = System.Math.Min(idx, cidx);
            }
            pre = idx != -1?searchTerm.Text().Substring(0, (idx) - (0)):"";

            preLen = pre.Length;
            text   = searchTermText.Substring(preLen);
            SetEnum(reader.Terms(new Term(searchTerm.Field(), pre)));
        }
예제 #35
0
		/// <summary>Increments the enumeration to the next element.  True if one exists. </summary>
		public override bool Next()
		{
			if (actualEnum == null)
				return false; // the actual enumerator is not initialized!
			currentTerm = null;
			while (currentTerm == null)
			{
				if (EndEnum())
					return false;
				if (actualEnum.Next())
				{
					Term term = actualEnum.Term();
					if (TermCompare(term))
					{
						currentTerm = term;
						return true;
					}
				}
				else
					return false;
			}
			currentTerm = null;
			return false;
		}
예제 #36
0
		public PrefixFilter(Term prefix):base(new PrefixQuery(prefix))
		{
		}
예제 #37
0
		/// <summary> Builds a new WildcardQuery instance</summary>
		/// <param name="t">wildcard term
		/// </param>
		/// <returns> new WildcardQuery instance
		/// </returns>
		protected internal virtual Query NewWildcardQuery(Term t)
		{
			WildcardQuery query = new WildcardQuery(t);
			query.SetRewriteMethod(multiTermRewriteMethod);
			return query;
		}
예제 #38
0
		/// <summary>Closes the enumeration to further activity, freeing resources.  </summary>
		public override void  Close()
		{
			if (actualEnum != null)
				actualEnum.Close();
			currentTerm = null;
			actualEnum = null;
		}
예제 #39
0
		/// <summary> Factory method for generating a query. Called when parser
		/// parses an input term token that contains one or more wildcard
		/// characters (? and *), but is not a prefix term token (one
		/// that has just a single * character at the end)
		/// <p/>
		/// Depending on settings, prefix term may be lower-cased
		/// automatically. It will not go through the default Analyzer,
		/// however, since normal Analyzers are unlikely to work properly
		/// with wildcard templates.
		/// <p/>
		/// Can be overridden by extending classes, to provide custom handling for
		/// wildcard queries, which may be necessary due to missing analyzer calls.
		/// 
		/// </summary>
		/// <param name="field">Name of the field query will use.
		/// </param>
		/// <param name="termStr">Term token that contains one or more wild card
		/// characters (? or *), but is not simple prefix term
		/// 
		/// </param>
		/// <returns> Resulting {@link Query} built for the term
		/// </returns>
		/// <exception cref="ParseException">throw in overridden method to disallow
		/// </exception>
		public /*protected internal*/ virtual Query GetWildcardQuery(System.String field, System.String termStr)
		{
			if ("*".Equals(field))
			{
				if ("*".Equals(termStr))
					return NewMatchAllDocsQuery();
			}
			if (!allowLeadingWildcard && (termStr.StartsWith("*") || termStr.StartsWith("?")))
				throw new ParseException("'*' or '?' not allowed as first character in WildcardQuery");
			if (lowercaseExpandedTerms)
			{
				termStr = termStr.ToLower();
			}
			Term t = new Term(field, termStr);
			return NewWildcardQuery(t);
		}
예제 #40
0
파일: TermQuery.cs 프로젝트: carrie901/mono
		/// <summary>Constructs a query for the term <code>t</code>. </summary>
		public TermQuery(Term t)
		{
			term = t;
		}
예제 #41
0
		/// <summary> Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, 0.5f, 0)}.</summary>
		public FuzzyQuery(Term term):this(term, defaultMinSimilarity, defaultPrefixLength)
		{
		}
예제 #42
0
			public override int DocFreq(Term term)
			{
				int df;
				try
				{
					df = ((System.Int32) dfMap[term]);
				}
				catch (System.NullReferenceException e)
				{
					throw new System.ArgumentException("df for term " + term.Text() + " not available");
				}
				return df;
			}
예제 #43
0
 /// <summary> Compares if current upper bound is reached,
 /// this also updates the term count for statistics.
 /// In contrast to {@link FilteredTermEnum}, a return value
 /// of <code>false</code> ends iterating the current enum
 /// and forwards to the next sub-range.
 /// </summary>
 //@Override
 public /*protected internal*/ override bool TermCompare(Term term)
 {
     return((System.Object)term.Field() == (System.Object)Enclosing_Instance.field && String.CompareOrdinal(term.Text(), currentUpperBound) <= 0);
 }
예제 #44
0
		public override int DocFreq(Term term)
		{
			int docFreq = 0;
			for (int i = 0; i < searchables.Length; i++)
				docFreq += searchables[i].DocFreq(term);
			return docFreq;
		}
예제 #45
0
		public WildcardQuery(Term term):base(term)
		{ //will be removed in 3.0
			this.term = term;
			this.termContainsWildcard = (term.Text().IndexOf('*') != - 1) || (term.Text().IndexOf('?') != - 1);
		}
예제 #46
0
		/// <summary> Builds a new PrefixQuery instance</summary>
		/// <param name="prefix">Prefix term
		/// </param>
		/// <returns> new PrefixQuery instance
		/// </returns>
		protected internal virtual Query NewPrefixQuery(Term prefix)
		{
			PrefixQuery query = new PrefixQuery(prefix);
			query.SetRewriteMethod(multiTermRewriteMethod);
			return query;
		}
예제 #47
0
		/// <summary> Builds a new FuzzyQuery instance</summary>
		/// <param name="term">Term
		/// </param>
		/// <param name="minimumSimilarity">minimum similarity
		/// </param>
		/// <param name="prefixLength">prefix length
		/// </param>
		/// <returns> new FuzzyQuery Instance
		/// </returns>
		protected internal virtual Query NewFuzzyQuery(Term term, float minimumSimilarity, int prefixLength)
		{
			// FuzzyQuery doesn't yet allow constant score rewrite
			return new FuzzyQuery(term, minimumSimilarity, prefixLength);
		}
예제 #48
0
		public MultiTermQuery(Term term)
		{
			this.term = term;
		}
예제 #49
0
			public override int[] DocFreqs(Term[] terms)
			{
				int[] result = new int[terms.Length];
				for (int i = 0; i < terms.Length; i++)
				{
					result[i] = DocFreq(terms[i]);
				}
				return result;
			}
예제 #50
0
		/// <summary> Builds a new TermQuery instance</summary>
		/// <param name="term">term
		/// </param>
		/// <returns> new TermQuery instance
		/// </returns>
		protected internal virtual Query NewTermQuery(Term term)
		{
			return new TermQuery(term);
		}
예제 #51
0
		/// <summary> Create weight in multiple index scenario.
		/// 
		/// Distributed query processing is done in the following steps:
		/// 1. rewrite query
		/// 2. extract necessary terms
		/// 3. collect dfs for these terms from the Searchables
		/// 4. create query weight using aggregate dfs.
		/// 5. distribute that weight to Searchables
		/// 6. merge results
		/// 
		/// Steps 1-4 are done here, 5+6 in the search() methods
		/// 
		/// </summary>
		/// <returns> rewritten queries
		/// </returns>
		public /*protected internal*/ override Weight CreateWeight(Query original)
		{
			// step 1
			Query rewrittenQuery = Rewrite(original);
			
			// step 2
			System.Collections.Hashtable terms = new System.Collections.Hashtable();
			rewrittenQuery.ExtractTerms(terms);
			
			// step3
			Term[] allTermsArray = new Term[terms.Count];
            int index = 0;
            System.Collections.IEnumerator e = terms.Keys.GetEnumerator();
            while (e.MoveNext())
                allTermsArray[index++] = e.Current as Term;
            int[] aggregatedDfs = new int[terms.Count];
			for (int i = 0; i < searchables.Length; i++)
			{
				int[] dfs = searchables[i].DocFreqs(allTermsArray);
				for (int j = 0; j < aggregatedDfs.Length; j++)
				{
					aggregatedDfs[j] += dfs[j];
				}
			}
			
			System.Collections.Hashtable dfMap = new System.Collections.Hashtable();
			for (int i = 0; i < allTermsArray.Length; i++)
			{
				dfMap[allTermsArray[i]] = (System.Int32) aggregatedDfs[i];
			}
			
			// step4
			int numDocs = MaxDoc();
			CachedDfSource cacheSim = new CachedDfSource(dfMap, numDocs, GetSimilarity());
			
			return rewrittenQuery.Weight(cacheSim);
		}
예제 #52
0
		/// <summary> Factory method for generating a query (similar to
		/// {@link #getWildcardQuery}). Called when parser parses
		/// an input term token that has the fuzzy suffix (~) appended.
		/// 
		/// </summary>
		/// <param name="field">Name of the field query will use.
		/// </param>
		/// <param name="termStr">Term token to use for building term for the query
		/// 
		/// </param>
		/// <returns> Resulting {@link Query} built for the term
		/// </returns>
		/// <exception cref="ParseException">throw in overridden method to disallow
		/// </exception>
		public /*protected internal*/ virtual Query GetFuzzyQuery(System.String field, System.String termStr, float minSimilarity)
		{
			if (lowercaseExpandedTerms)
			{
				termStr = termStr.ToLower();
			}
			Term t = new Term(field, termStr);
			return NewFuzzyQuery(t, minSimilarity, fuzzyPrefixLength);
		}
예제 #53
0
 /// <summary>Construct a SpanTermQuery matching the named term's spans. </summary>
 public SpanTermQuery(Term term)
 {
     this.term = term;
 }
예제 #54
0
		/// <summary> Constructor for enumeration of all terms from specified <code>reader</code> which share a prefix of
		/// length <code>prefixLength</code> with <code>term</code> and which have a fuzzy similarity &gt;
		/// <code>minSimilarity</code>.
		/// <p/>
		/// After calling the constructor the enumeration is already pointing to the first 
		/// valid term if such a term exists. 
		/// 
		/// </summary>
		/// <param name="reader">Delivers terms.
		/// </param>
		/// <param name="term">Pattern term.
		/// </param>
		/// <param name="minSimilarity">Minimum required similarity for terms from the reader. Default value is 0.5f.
		/// </param>
		/// <param name="prefixLength">Length of required common prefix. Default value is 0.
		/// </param>
		/// <throws>  IOException </throws>
		public FuzzyTermEnum(IndexReader reader, Term term, float minSimilarity, int prefixLength):base()
		{
			
			if (minSimilarity >= 1.0f)
				throw new System.ArgumentException("minimumSimilarity cannot be greater than or equal to 1");
			else if (minSimilarity < 0.0f)
				throw new System.ArgumentException("minimumSimilarity cannot be less than 0");
			if (prefixLength < 0)
				throw new System.ArgumentException("prefixLength cannot be less than 0");
			
			this.minimumSimilarity = minSimilarity;
			this.scale_factor = 1.0f / (1.0f - minimumSimilarity);
			this.searchTerm = term;
			this.field = searchTerm.Field();
			
			//The prefix could be longer than the word.
			//It's kind of silly though.  It means we must match the entire word.
			int fullSearchTermLength = searchTerm.Text().Length;
			int realPrefixLength = prefixLength > fullSearchTermLength?fullSearchTermLength:prefixLength;
			
			this.text = searchTerm.Text().Substring(realPrefixLength);
			this.prefix = searchTerm.Text().Substring(0, (realPrefixLength) - (0));
			
			InitializeMaxDistances();
			this.d = InitDistanceArray();
			
			SetEnum(reader.Terms(new Term(searchTerm.Field(), prefix)));
		}
예제 #55
0
			public ScoreTerm(Term term, float score)
			{
				this.term = term;
				this.score = score;
			}
예제 #56
0
		/// <summary> The termCompare method in FuzzyTermEnum uses Levenshtein distance to 
		/// calculate the distance between the given term and the comparing term. 
		/// </summary>
		public /*protected internal*/ override bool TermCompare(Term term)
		{
			if ((System.Object) field == (System.Object) term.Field() && term.Text().StartsWith(prefix))
			{
				System.String target = term.Text().Substring(prefix.Length);
				this.similarity = Similarity(target);
				return (similarity > minimumSimilarity);
			}
			endEnum = true;
			return false;
		}
예제 #57
0
		/// <summary> Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, minimumSimilarity, 0)}.</summary>
		public FuzzyQuery(Term term, float minimumSimilarity):this(term, minimumSimilarity, defaultPrefixLength)
		{
		}
예제 #58
0
		/// <summary> Creates a FuzzyTermEnum with an empty prefix.
		/// <p/>
		/// After calling the constructor the enumeration is already pointing to the first 
		/// valid term if such a term exists. 
		/// 
		/// </summary>
		/// <param name="reader">
		/// </param>
		/// <param name="term">
		/// </param>
		/// <param name="minSimilarity">
		/// </param>
		/// <throws>  IOException </throws>
		/// <seealso cref="FuzzyTermEnum(IndexReader, Term, float, int)">
		/// </seealso>
		public FuzzyTermEnum(IndexReader reader, Term term, float minSimilarity):this(reader, term, minSimilarity, FuzzyQuery.defaultPrefixLength)
		{
		}
예제 #59
0
		/// <summary> Factory method for generating a query (similar to
		/// {@link #getWildcardQuery}). Called when parser parses an input term
		/// token that uses prefix notation; that is, contains a single '*' wildcard
		/// character as its last character. Since this is a special case
		/// of generic wildcard term, and such a query can be optimized easily,
		/// this usually results in a different query object.
		/// <p/>
		/// Depending on settings, a prefix term may be lower-cased
		/// automatically. It will not go through the default Analyzer,
		/// however, since normal Analyzers are unlikely to work properly
		/// with wildcard templates.
		/// <p/>
		/// Can be overridden by extending classes, to provide custom handling for
		/// wild card queries, which may be necessary due to missing analyzer calls.
		/// 
		/// </summary>
		/// <param name="field">Name of the field query will use.
		/// </param>
		/// <param name="termStr">Term token to use for building term for the query
		/// (<b>without</b> trailing '*' character!)
		/// 
		/// </param>
		/// <returns> Resulting {@link Query} built for the term
		/// </returns>
		/// <exception cref="ParseException">throw in overridden method to disallow
		/// </exception>
		public /*protected internal*/ virtual Query GetPrefixQuery(System.String field, System.String termStr)
		{
			if (!allowLeadingWildcard && termStr.StartsWith("*"))
				throw new ParseException("'*' not allowed as first character in PrefixQuery");
			if (lowercaseExpandedTerms)
			{
				termStr = termStr.ToLower();
			}
			Term t = new Term(field, termStr);
			return NewPrefixQuery(t);
		}
예제 #60
0
		/// <summary>Equality compare on the term </summary>
		public /*protected internal*/ abstract bool TermCompare(Term term);