public TermSpans(TermPositions positions, Term term) { this.positions = positions; this.term = term; doc = - 1; }
public override Query Rewrite(IndexReader reader, MultiTermQuery query) { FilteredTermEnum enumerator = query.GetEnum(reader); BooleanQuery result = new BooleanQuery(true); int count = 0; try { do { Term t = enumerator.Term(); if (t != null) { TermQuery tq = new TermQuery(t); // found a match tq.SetBoost(query.GetBoost() * enumerator.Difference()); // set the boost result.Add(tq, BooleanClause.Occur.SHOULD); // add to query count++; } }while (enumerator.Next()); } finally { enumerator.Close(); } query.IncTotalNumberOfTerms(count); return(result); }
protected internal override System.Object CreateValue(IndexReader reader, Entry entryKey) { System.String field = StringHelper.Intern((System.String)entryKey.field); System.String[] retArray = new System.String[reader.MaxDoc()]; TermDocs termDocs = reader.TermDocs(); TermEnum termEnum = reader.Terms(new Term(field)); try { do { Term term = termEnum.Term(); if (term == null || (System.Object)term.Field() != (System.Object)field) { break; } System.String termval = term.Text(); termDocs.Seek(termEnum); while (termDocs.Next()) { retArray[termDocs.Doc()] = termval; } }while (termEnum.Next()); } finally { termDocs.Close(); termEnum.Close(); } return(retArray); }
/// <summary>Increments the enumeration to the next element. True if one exists. </summary> public override bool Next() { if (actualEnum == null) { return(false); // the actual enumerator is not initialized! } currentTerm = null; while (currentTerm == null) { if (EndEnum()) { return(false); } if (actualEnum.Next()) { Term term = actualEnum.Term(); if (TermCompare(term)) { currentTerm = term; return(true); } } else { return(false); } } currentTerm = null; return(false); }
/// <summary> Constructor for enumeration of all terms from specified <code>reader</code> which share a prefix of /// length <code>prefixLength</code> with <code>term</code> and which have a fuzzy similarity > /// <code>minSimilarity</code>. /// <p/> /// After calling the constructor the enumeration is already pointing to the first /// valid term if such a term exists. /// /// </summary> /// <param name="reader">Delivers terms. /// </param> /// <param name="term">Pattern term. /// </param> /// <param name="minSimilarity">Minimum required similarity for terms from the reader. Default value is 0.5f. /// </param> /// <param name="prefixLength">Length of required common prefix. Default value is 0. /// </param> /// <throws> IOException </throws> public FuzzyTermEnum(IndexReader reader, Term term, float minSimilarity, int prefixLength) : base() { if (minSimilarity >= 1.0f) { throw new System.ArgumentException("minimumSimilarity cannot be greater than or equal to 1"); } else if (minSimilarity < 0.0f) { throw new System.ArgumentException("minimumSimilarity cannot be less than 0"); } if (prefixLength < 0) { throw new System.ArgumentException("prefixLength cannot be less than 0"); } this.minimumSimilarity = minSimilarity; this.scale_factor = 1.0f / (1.0f - minimumSimilarity); this.searchTerm = term; this.field = searchTerm.Field(); //The prefix could be longer than the word. //It's kind of silly though. It means we must match the entire word. int fullSearchTermLength = searchTerm.Text().Length; int realPrefixLength = prefixLength > fullSearchTermLength?fullSearchTermLength:prefixLength; this.text = searchTerm.Text().Substring(realPrefixLength); this.prefix = searchTerm.Text().Substring(0, (realPrefixLength) - (0)); InitializeMaxDistances(); this.d = InitDistanceArray(); SetEnum(reader.Terms(new Term(searchTerm.Field(), prefix))); }
protected internal override System.Object CreateValue(IndexReader reader, Entry entryKey) { Entry entry = (Entry)entryKey; System.String field = entry.field; SortComparator comparator = (SortComparator)entry.custom; System.IComparable[] retArray = new System.IComparable[reader.MaxDoc()]; TermDocs termDocs = reader.TermDocs(); TermEnum termEnum = reader.Terms(new Term(field)); try { do { Term term = termEnum.Term(); if (term == null || (System.Object)term.Field() != (System.Object)field) { break; } System.IComparable termval = comparator.GetComparable(term.Text()); termDocs.Seek(termEnum); while (termDocs.Next()) { retArray[termDocs.Doc()] = termval; } }while (termEnum.Next()); } finally { termDocs.Close(); termEnum.Close(); } return(retArray); }
/// <summary> Create a new FuzzyQuery that will match terms with a similarity /// of at least <code>minimumSimilarity</code> to <code>term</code>. /// If a <code>prefixLength</code> > 0 is specified, a common prefix /// of that length is also required. /// /// </summary> /// <param name="term">the term to search for /// </param> /// <param name="minimumSimilarity">a value between 0 and 1 to set the required similarity /// between the query term and the matching terms. For example, for a /// <code>minimumSimilarity</code> of <code>0.5</code> a term of the same length /// as the query term is considered similar to the query term if the edit distance /// between both terms is less than <code>length(term)*0.5</code> /// </param> /// <param name="prefixLength">length of common (non-fuzzy) prefix /// </param> /// <throws> IllegalArgumentException if minimumSimilarity is >= 1 or < 0 </throws> /// <summary> or if prefixLength < 0 /// </summary> public FuzzyQuery(Term term, float minimumSimilarity, int prefixLength) : base(term) { // will be removed in 3.0 this.term = term; if (minimumSimilarity >= 1.0f) { throw new System.ArgumentException("minimumSimilarity >= 1"); } else if (minimumSimilarity < 0.0f) { throw new System.ArgumentException("minimumSimilarity < 0"); } if (prefixLength < 0) { throw new System.ArgumentException("prefixLength < 0"); } if (term.Text().Length > 1.0f / (1.0f - minimumSimilarity)) { this.termLongEnough = true; } this.minimumSimilarity = minimumSimilarity; this.prefixLength = prefixLength; rewriteMethod = SCORING_BOOLEAN_QUERY_REWRITE; }
public override Query Rewrite(IndexReader reader, MultiTermQuery query) { // Get the enum and start visiting terms. If we // exhaust the enum before hitting either of the // cutoffs, we use ConstantBooleanQueryRewrite; else, // ConstantFilterRewrite: System.Collections.ArrayList pendingTerms = new System.Collections.ArrayList(); int docCountCutoff = (int)((docCountPercent / 100.0) * reader.MaxDoc()); int termCountLimit = System.Math.Min(BooleanQuery.GetMaxClauseCount(), termCountCutoff); int docVisitCount = 0; FilteredTermEnum enumerator = query.GetEnum(reader); try { while (true) { Term t = enumerator.Term(); if (t != null) { pendingTerms.Add(t); // Loading the TermInfo from the terms dict here // should not be costly, because 1) the // query/filter will load the TermInfo when it // runs, and 2) the terms dict has a cache: docVisitCount += reader.DocFreq(t); } if (pendingTerms.Count >= termCountLimit || docVisitCount >= docCountCutoff) { // Too many terms -- make a filter. Query result = new ConstantScoreQuery(new MultiTermQueryWrapperFilter(query)); result.SetBoost(query.GetBoost()); return(result); } else if (!enumerator.Next()) { // Enumeration is done, and we hit a small // enough number of terms & docs -- just make a // BooleanQuery, now System.Collections.IEnumerator it = pendingTerms.GetEnumerator(); BooleanQuery bq = new BooleanQuery(true); while (it.MoveNext()) { TermQuery tq = new TermQuery((Term)it.Current); bq.Add(tq, BooleanClause.Occur.SHOULD); } // Strip scores Query result = new ConstantScoreQuery(new QueryWrapperFilter(bq)); result.SetBoost(query.GetBoost()); query.IncTotalNumberOfTerms(pendingTerms.Count); return(result); } } } finally { enumerator.Close(); } }
protected internal override System.Object CreateValue(IndexReader reader, Entry entryKey) { Entry entry = (Entry)entryKey; System.String field = entry.field; FloatParser parser = (FloatParser)entry.custom; if (parser == null) { try { return(wrapper.GetFloats(reader, field, Mono.Lucene.Net.Search.FieldCache_Fields.DEFAULT_FLOAT_PARSER)); } catch (System.FormatException ne) { return(wrapper.GetFloats(reader, field, Mono.Lucene.Net.Search.FieldCache_Fields.NUMERIC_UTILS_FLOAT_PARSER)); } } float[] retArray = null; TermDocs termDocs = reader.TermDocs(); TermEnum termEnum = reader.Terms(new Term(field)); try { do { Term term = termEnum.Term(); if (term == null || (System.Object)term.Field() != (System.Object)field) { break; } float termval = parser.ParseFloat(term.Text()); if (retArray == null) { // late init retArray = new float[reader.MaxDoc()]; } termDocs.Seek(termEnum); while (termDocs.Next()) { retArray[termDocs.Doc()] = termval; } }while (termEnum.Next()); } catch (StopFillCacheException stop) { } finally { termDocs.Close(); termEnum.Close(); } if (retArray == null) { // no values retArray = new float[reader.MaxDoc()]; } return(retArray); }
/// <summary>Add multiple terms at the next position in the phrase. Any of the terms /// may match. /// /// </summary> /// <seealso cref="PhraseQuery.add(Term)"> /// </seealso> public virtual void Add(Term[] terms) { int position = 0; if (positions.Count > 0) position = ((System.Int32) positions[positions.Count - 1]) + 1; Add(terms, position); }
/// <summary>Closes the enumeration to further activity, freeing resources. </summary> public override void Close() { if (actualEnum != null) { actualEnum.Close(); } currentTerm = null; actualEnum = null; }
public /*protected internal*/ override bool TermCompare(Term term) { if (collator == null) { // Use Unicode code point ordering bool checkLower = false; if (!includeLower) { // make adjustments to set to exclusive checkLower = true; } if (term != null && (System.Object)term.Field() == (System.Object)field) { // interned comparison if (!checkLower || null == lowerTermText || String.CompareOrdinal(term.Text(), lowerTermText) > 0) { checkLower = false; if (upperTermText != null) { int compare = String.CompareOrdinal(upperTermText, term.Text()); /* * if beyond the upper term, or is exclusive and this is equal to * the upper term, break out */ if ((compare < 0) || (!includeUpper && compare == 0)) { endEnum = true; return(false); } } return(true); } } else { // break endEnum = true; return(false); } return(false); } else { if (term != null && (System.Object)term.Field() == (System.Object)field) { // interned comparison if ((lowerTermText == null || (includeLower?collator.Compare(term.Text().ToString(), lowerTermText.ToString()) >= 0:collator.Compare(term.Text().ToString(), lowerTermText.ToString()) > 0)) && (upperTermText == null || (includeUpper?collator.Compare(term.Text().ToString(), upperTermText.ToString()) <= 0:collator.Compare(term.Text().ToString(), upperTermText.ToString()) < 0))) { return(true); } return(false); } endEnum = true; return(false); } }
public /*protected internal*/ override bool TermCompare(Term term) { if ((System.Object)term.Field() == (System.Object)prefix.Field() && term.Text().StartsWith(prefix.Text())) { return(true); } endEnum = true; return(false); }
/// <summary> use this method to set the actual TermEnum (e.g. in ctor), /// it will be automatically positioned on the first matching term. /// </summary> protected internal virtual void SetEnum(TermEnum actualEnum) { this.actualEnum = actualEnum; // Find the first term that matches Term term = actualEnum.Term(); if (term != null && TermCompare(term)) currentTerm = term; else Next(); }
public override int DocFreq(Term term) { int docFreq = 0; for (int i = 0; i < searchables.Length; i++) { docFreq += searchables[i].DocFreq(term); } return(docFreq); }
protected internal override System.Object CreateValue(IndexReader reader, Entry entryKey) { System.String field = StringHelper.Intern((System.String)entryKey.field); TermEnum enumerator = reader.Terms(new Term(field)); try { Term term = enumerator.Term(); if (term == null) { throw new System.SystemException("no terms in field " + field + " - cannot determine type"); } System.Object ret = null; if ((System.Object)term.Field() == (System.Object)field) { System.String termtext = term.Text().Trim(); try { System.Int32.Parse(termtext); ret = wrapper.GetInts(reader, field); } catch (System.FormatException nfe1) { try { System.Int64.Parse(termtext); ret = wrapper.GetLongs(reader, field); } catch (System.FormatException nfe2) { try { SupportClass.Single.Parse(termtext); ret = wrapper.GetFloats(reader, field); } catch (System.FormatException nfe3) { ret = wrapper.GetStringIndex(reader, field); } } } } else { throw new System.SystemException("field \"" + field + "\" does not appear to be indexed"); } return(ret); } finally { enumerator.Close(); } }
/// <summary> The termCompare method in FuzzyTermEnum uses Levenshtein distance to /// calculate the distance between the given term and the comparing term. /// </summary> public /*protected internal*/ override bool TermCompare(Term term) { if ((System.Object)field == (System.Object)term.Field() && term.Text().StartsWith(prefix)) { System.String target = term.Text().Substring(prefix.Length); this.similarity = Similarity(target); return(similarity > minimumSimilarity); } endEnum = true; return(false); }
/// <summary> Adds a term to the end of the query phrase. /// The relative position of the term is the one immediately after the last term added. /// </summary> public virtual void Add(Term term) { int position = 0; if (positions.Count > 0) { position = ((System.Int32)positions[positions.Count - 1]) + 1; } Add(term, position); }
public override Weight CreateWeight(Searcher searcher) { if (terms.Count == 1) { // optimize one-term case Term term = (Term)terms[0]; Query termQuery = new TermQuery(term); termQuery.SetBoost(GetBoost()); return(termQuery.CreateWeight(searcher)); } return(new PhraseWeight(this, searcher)); }
public /*protected internal*/ override bool TermCompare(Term term) { if ((System.Object)field == (System.Object)term.Field()) { System.String searchText = term.Text(); if (searchText.StartsWith(pre)) { return(WildcardEquals(text, 0, searchText, preLen)); } } endEnum = true; return(false); }
/// <summary> Computes a score factor for a simple term and returns an explanation /// for that score factor. /// /// <p/> /// The default implementation uses: /// /// <pre> /// idf(searcher.docFreq(term), searcher.maxDoc()); /// </pre> /// /// Note that {@link Searcher#MaxDoc()} is used instead of /// {@link Mono.Lucene.Net.Index.IndexReader#NumDocs()} because it is /// proportional to {@link Searcher#DocFreq(Term)} , i.e., when one is /// inaccurate, so is the other, and in the same direction. /// /// </summary> /// <param name="term">the term in question /// </param> /// <param name="searcher">the document collection being searched /// </param> /// <returns> an IDFExplain object that includes both an idf score factor /// and an explanation for the term. /// </returns> /// <throws> IOException </throws> public virtual IDFExplanation IdfExplain(Term term, Searcher searcher) { if (SupportedMethods.overridesTermIDF) { float idf = Idf(term, searcher); return(new AnonymousClassIDFExplanation(idf, this)); } int df = searcher.DocFreq(term); int max = searcher.MaxDoc(); float idf2 = Idf(df, max); return(new AnonymousClassIDFExplanation1(df, max, idf2, this)); }
public /*protected internal*/ override bool TermCompare(Term term) { if ((System.Object) field == (System.Object) term.Field()) { System.String searchText = term.Text(); if (searchText.StartsWith(pre)) { return WildcardEquals(text, 0, searchText, preLen); } } endEnum = true; return false; }
/// <summary>Constructs a query selecting all terms greater than /// <code>lowerTerm</code> but less than <code>upperTerm</code>. /// There must be at least one term and either term may be null, /// in which case there is no bound on that side, but if there are /// two terms, both terms <b>must</b> be for the same field. /// <p/> /// If <code>collator</code> is not null, it will be used to decide whether /// index terms are within the given range, rather than using the Unicode code /// point order in which index terms are stored. /// <p/> /// <strong>WARNING:</strong> Using this constructor and supplying a non-null /// value in the <code>collator</code> parameter will cause every single /// index Term in the Field referenced by lowerTerm and/or upperTerm to be /// examined. Depending on the number of index Terms in this Field, the /// operation could be very slow. /// /// </summary> /// <param name="lowerTerm">The Term at the lower end of the range /// </param> /// <param name="upperTerm">The Term at the upper end of the range /// </param> /// <param name="inclusive">If true, both <code>lowerTerm</code> and /// <code>upperTerm</code> will themselves be included in the range. /// </param> /// <param name="collator">The collator to use to collate index Terms, to determine /// their membership in the range bounded by <code>lowerTerm</code> and /// <code>upperTerm</code>. /// </param> public RangeQuery(Term lowerTerm, Term upperTerm, bool inclusive, System.Globalization.CompareInfo collator) { if (lowerTerm == null && upperTerm == null) { throw new System.ArgumentException("At least one term must be non-null"); } if (lowerTerm != null && upperTerm != null && (System.Object)lowerTerm.Field() != (System.Object)upperTerm.Field()) { throw new System.ArgumentException("Both terms must have the same field"); } delegate_Renamed = new TermRangeQuery((lowerTerm == null)?upperTerm.Field():lowerTerm.Field(), (lowerTerm == null)?null:lowerTerm.Text(), (upperTerm == null)?null:upperTerm.Text(), inclusive, inclusive, collator); delegate_Renamed.SetRewriteMethod(TermRangeQuery.SCORING_BOOLEAN_QUERY_REWRITE); }
public override int DocFreq(Term term) { int df; try { df = ((System.Int32)dfMap[term]); } catch (System.NullReferenceException e) { throw new System.ArgumentException("df for term " + term.Text() + " not available"); } return(df); }
/// <summary> use this method to set the actual TermEnum (e.g. in ctor), /// it will be automatically positioned on the first matching term. /// </summary> protected internal virtual void SetEnum(TermEnum actualEnum) { this.actualEnum = actualEnum; // Find the first term that matches Term term = actualEnum.Term(); if (term != null && TermCompare(term)) { currentTerm = term; } else { Next(); } }
internal static int DetectFieldType(IndexReader reader, System.String fieldKey) { System.String field = StringHelper.Intern(fieldKey); TermEnum enumerator = reader.Terms(new Term(field)); try { Term term = enumerator.Term(); if (term == null) { throw new System.SystemException("no terms in field " + field + " - cannot determine sort type"); } int ret = 0; if ((System.Object)term.Field() == (System.Object)field) { System.String termtext = term.Text().Trim(); int tmpI32; long tmpI64; float tmpF; if (System.Int32.TryParse(termtext, out tmpI32)) { ret = SortField.INT; } else if (System.Int64.TryParse(termtext, out tmpI64)) { ret = SortField.LONG; } else if (SupportClass.Single.TryParse(termtext, out tmpF)) { ret = SortField.FLOAT; } else { ret = SortField.STRING; } } else { throw new System.SystemException("field \"" + field + "\" does not appear to be indexed"); } return(ret); } finally { enumerator.Close(); } }
private int ArraysHashCode(Term[] termArray) { if (termArray == null) { return(0); } int result = 1; for (int i = 0; i < termArray.Length; i++) { Term term = termArray[i]; result = 31 * result + (term == null?0:term.GetHashCode()); } return(result); }
/// <summary> Adds a term to the end of the query phrase. /// The relative position of the term within the phrase is specified explicitly. /// This allows e.g. phrases with more than one term at the same position /// or phrases with gaps (e.g. in connection with stopwords). /// /// </summary> /// <param name="term"> /// </param> /// <param name="position"> /// </param> public virtual void Add(Term term, int position) { if (terms.Count == 0) { field = term.Field(); } else if ((System.Object)term.Field() != (System.Object)field) { throw new System.ArgumentException("All phrase terms must be in the same field: " + term); } terms.Add(term); positions.Add((System.Int32)position); if (position > maxPosition) { maxPosition = position; } }
public virtual void Generate(MultiTermQuery query, IndexReader reader, TermEnum enumerator) { int[] docs = new int[32]; int[] freqs = new int[32]; TermDocs termDocs = reader.TermDocs(); try { int termCount = 0; do { Term term = enumerator.Term(); if (term == null) { break; } termCount++; termDocs.Seek(term); while (true) { int count = termDocs.Read(docs, freqs); if (count != 0) { for (int i = 0; i < count; i++) { HandleDoc(docs[i]); } } else { break; } } }while (enumerator.Next()); query.IncTotalNumberOfTerms(termCount); // {{Aroush-2.9}} is the use of 'temp' as is right? } finally { termDocs.Close(); } }
protected internal override System.Object CreateValue(IndexReader reader, Entry entryKey) { Entry entry = (Entry)entryKey; System.String field = entry.field; ShortParser parser = (ShortParser)entry.custom; if (parser == null) { return(wrapper.GetShorts(reader, field, Mono.Lucene.Net.Search.FieldCache_Fields.DEFAULT_SHORT_PARSER)); } short[] retArray = new short[reader.MaxDoc()]; TermDocs termDocs = reader.TermDocs(); TermEnum termEnum = reader.Terms(new Term(field)); try { do { Term term = termEnum.Term(); if (term == null || (System.Object)term.Field() != (System.Object)field) { break; } short termval = parser.ParseShort(term.Text()); termDocs.Seek(termEnum); while (termDocs.Next()) { retArray[termDocs.Doc()] = termval; } }while (termEnum.Next()); } catch (StopFillCacheException stop) { } finally { termDocs.Close(); termEnum.Close(); } return(retArray); }
/// <summary> Create a new FuzzyQuery that will match terms with a similarity /// of at least <code>minimumSimilarity</code> to <code>term</code>. /// If a <code>prefixLength</code> > 0 is specified, a common prefix /// of that length is also required. /// /// </summary> /// <param name="term">the term to search for /// </param> /// <param name="minimumSimilarity">a value between 0 and 1 to set the required similarity /// between the query term and the matching terms. For example, for a /// <code>minimumSimilarity</code> of <code>0.5</code> a term of the same length /// as the query term is considered similar to the query term if the edit distance /// between both terms is less than <code>length(term)*0.5</code> /// </param> /// <param name="prefixLength">length of common (non-fuzzy) prefix /// </param> /// <throws> IllegalArgumentException if minimumSimilarity is >= 1 or < 0 </throws> /// <summary> or if prefixLength < 0 /// </summary> public FuzzyQuery(Term term, float minimumSimilarity, int prefixLength):base(term) { // will be removed in 3.0 this.term = term; if (minimumSimilarity >= 1.0f) throw new System.ArgumentException("minimumSimilarity >= 1"); else if (minimumSimilarity < 0.0f) throw new System.ArgumentException("minimumSimilarity < 0"); if (prefixLength < 0) throw new System.ArgumentException("prefixLength < 0"); if (term.Text().Length > 1.0f / (1.0f - minimumSimilarity)) { this.termLongEnough = true; } this.minimumSimilarity = minimumSimilarity; this.prefixLength = prefixLength; rewriteMethod = SCORING_BOOLEAN_QUERY_REWRITE; }
/// <summary> Create weight in multiple index scenario. /// /// Distributed query processing is done in the following steps: /// 1. rewrite query /// 2. extract necessary terms /// 3. collect dfs for these terms from the Searchables /// 4. create query weight using aggregate dfs. /// 5. distribute that weight to Searchables /// 6. merge results /// /// Steps 1-4 are done here, 5+6 in the search() methods /// /// </summary> /// <returns> rewritten queries /// </returns> public /*protected internal*/ override Weight CreateWeight(Query original) { // step 1 Query rewrittenQuery = Rewrite(original); // step 2 System.Collections.Hashtable terms = new System.Collections.Hashtable(); rewrittenQuery.ExtractTerms(terms); // step3 Term[] allTermsArray = new Term[terms.Count]; int index = 0; System.Collections.IEnumerator e = terms.Keys.GetEnumerator(); while (e.MoveNext()) { allTermsArray[index++] = e.Current as Term; } int[] aggregatedDfs = new int[terms.Count]; for (int i = 0; i < searchables.Length; i++) { int[] dfs = searchables[i].DocFreqs(allTermsArray); for (int j = 0; j < aggregatedDfs.Length; j++) { aggregatedDfs[j] += dfs[j]; } } System.Collections.Hashtable dfMap = new System.Collections.Hashtable(); for (int i = 0; i < allTermsArray.Length; i++) { dfMap[allTermsArray[i]] = (System.Int32)aggregatedDfs[i]; } // step4 int numDocs = MaxDoc(); CachedDfSource cacheSim = new CachedDfSource(dfMap, numDocs, GetSimilarity()); return(rewrittenQuery.Weight(cacheSim)); }
/// <summary> Creates a new <code>WildcardTermEnum</code>. /// <p/> /// After calling the constructor the enumeration is already pointing to the first /// valid term if such a term exists. /// </summary> public WildcardTermEnum(IndexReader reader, Term term):base() { searchTerm = term; field = searchTerm.Field(); System.String searchTermText = searchTerm.Text(); int sidx = searchTermText.IndexOf((System.Char) WILDCARD_STRING); int cidx = searchTermText.IndexOf((System.Char) WILDCARD_CHAR); int idx = sidx; if (idx == - 1) { idx = cidx; } else if (cidx >= 0) { idx = System.Math.Min(idx, cidx); } pre = idx != - 1?searchTerm.Text().Substring(0, (idx) - (0)):""; preLen = pre.Length; text = searchTermText.Substring(preLen); SetEnum(reader.Terms(new Term(searchTerm.Field(), pre))); }
/// <summary> Creates a new <code>WildcardTermEnum</code>. /// <p/> /// After calling the constructor the enumeration is already pointing to the first /// valid term if such a term exists. /// </summary> public WildcardTermEnum(IndexReader reader, Term term) : base() { searchTerm = term; field = searchTerm.Field(); System.String searchTermText = searchTerm.Text(); int sidx = searchTermText.IndexOf((System.Char)WILDCARD_STRING); int cidx = searchTermText.IndexOf((System.Char)WILDCARD_CHAR); int idx = sidx; if (idx == -1) { idx = cidx; } else if (cidx >= 0) { idx = System.Math.Min(idx, cidx); } pre = idx != -1?searchTerm.Text().Substring(0, (idx) - (0)):""; preLen = pre.Length; text = searchTermText.Substring(preLen); SetEnum(reader.Terms(new Term(searchTerm.Field(), pre))); }
/// <summary>Increments the enumeration to the next element. True if one exists. </summary> public override bool Next() { if (actualEnum == null) return false; // the actual enumerator is not initialized! currentTerm = null; while (currentTerm == null) { if (EndEnum()) return false; if (actualEnum.Next()) { Term term = actualEnum.Term(); if (TermCompare(term)) { currentTerm = term; return true; } } else return false; } currentTerm = null; return false; }
public PrefixFilter(Term prefix):base(new PrefixQuery(prefix)) { }
/// <summary> Builds a new WildcardQuery instance</summary> /// <param name="t">wildcard term /// </param> /// <returns> new WildcardQuery instance /// </returns> protected internal virtual Query NewWildcardQuery(Term t) { WildcardQuery query = new WildcardQuery(t); query.SetRewriteMethod(multiTermRewriteMethod); return query; }
/// <summary>Closes the enumeration to further activity, freeing resources. </summary> public override void Close() { if (actualEnum != null) actualEnum.Close(); currentTerm = null; actualEnum = null; }
/// <summary> Factory method for generating a query. Called when parser /// parses an input term token that contains one or more wildcard /// characters (? and *), but is not a prefix term token (one /// that has just a single * character at the end) /// <p/> /// Depending on settings, prefix term may be lower-cased /// automatically. It will not go through the default Analyzer, /// however, since normal Analyzers are unlikely to work properly /// with wildcard templates. /// <p/> /// Can be overridden by extending classes, to provide custom handling for /// wildcard queries, which may be necessary due to missing analyzer calls. /// /// </summary> /// <param name="field">Name of the field query will use. /// </param> /// <param name="termStr">Term token that contains one or more wild card /// characters (? or *), but is not simple prefix term /// /// </param> /// <returns> Resulting {@link Query} built for the term /// </returns> /// <exception cref="ParseException">throw in overridden method to disallow /// </exception> public /*protected internal*/ virtual Query GetWildcardQuery(System.String field, System.String termStr) { if ("*".Equals(field)) { if ("*".Equals(termStr)) return NewMatchAllDocsQuery(); } if (!allowLeadingWildcard && (termStr.StartsWith("*") || termStr.StartsWith("?"))) throw new ParseException("'*' or '?' not allowed as first character in WildcardQuery"); if (lowercaseExpandedTerms) { termStr = termStr.ToLower(); } Term t = new Term(field, termStr); return NewWildcardQuery(t); }
/// <summary>Constructs a query for the term <code>t</code>. </summary> public TermQuery(Term t) { term = t; }
/// <summary> Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, 0.5f, 0)}.</summary> public FuzzyQuery(Term term):this(term, defaultMinSimilarity, defaultPrefixLength) { }
public override int DocFreq(Term term) { int df; try { df = ((System.Int32) dfMap[term]); } catch (System.NullReferenceException e) { throw new System.ArgumentException("df for term " + term.Text() + " not available"); } return df; }
/// <summary> Compares if current upper bound is reached, /// this also updates the term count for statistics. /// In contrast to {@link FilteredTermEnum}, a return value /// of <code>false</code> ends iterating the current enum /// and forwards to the next sub-range. /// </summary> //@Override public /*protected internal*/ override bool TermCompare(Term term) { return((System.Object)term.Field() == (System.Object)Enclosing_Instance.field && String.CompareOrdinal(term.Text(), currentUpperBound) <= 0); }
public override int DocFreq(Term term) { int docFreq = 0; for (int i = 0; i < searchables.Length; i++) docFreq += searchables[i].DocFreq(term); return docFreq; }
public WildcardQuery(Term term):base(term) { //will be removed in 3.0 this.term = term; this.termContainsWildcard = (term.Text().IndexOf('*') != - 1) || (term.Text().IndexOf('?') != - 1); }
/// <summary> Builds a new PrefixQuery instance</summary> /// <param name="prefix">Prefix term /// </param> /// <returns> new PrefixQuery instance /// </returns> protected internal virtual Query NewPrefixQuery(Term prefix) { PrefixQuery query = new PrefixQuery(prefix); query.SetRewriteMethod(multiTermRewriteMethod); return query; }
/// <summary> Builds a new FuzzyQuery instance</summary> /// <param name="term">Term /// </param> /// <param name="minimumSimilarity">minimum similarity /// </param> /// <param name="prefixLength">prefix length /// </param> /// <returns> new FuzzyQuery Instance /// </returns> protected internal virtual Query NewFuzzyQuery(Term term, float minimumSimilarity, int prefixLength) { // FuzzyQuery doesn't yet allow constant score rewrite return new FuzzyQuery(term, minimumSimilarity, prefixLength); }
public MultiTermQuery(Term term) { this.term = term; }
public override int[] DocFreqs(Term[] terms) { int[] result = new int[terms.Length]; for (int i = 0; i < terms.Length; i++) { result[i] = DocFreq(terms[i]); } return result; }
/// <summary> Builds a new TermQuery instance</summary> /// <param name="term">term /// </param> /// <returns> new TermQuery instance /// </returns> protected internal virtual Query NewTermQuery(Term term) { return new TermQuery(term); }
/// <summary> Create weight in multiple index scenario. /// /// Distributed query processing is done in the following steps: /// 1. rewrite query /// 2. extract necessary terms /// 3. collect dfs for these terms from the Searchables /// 4. create query weight using aggregate dfs. /// 5. distribute that weight to Searchables /// 6. merge results /// /// Steps 1-4 are done here, 5+6 in the search() methods /// /// </summary> /// <returns> rewritten queries /// </returns> public /*protected internal*/ override Weight CreateWeight(Query original) { // step 1 Query rewrittenQuery = Rewrite(original); // step 2 System.Collections.Hashtable terms = new System.Collections.Hashtable(); rewrittenQuery.ExtractTerms(terms); // step3 Term[] allTermsArray = new Term[terms.Count]; int index = 0; System.Collections.IEnumerator e = terms.Keys.GetEnumerator(); while (e.MoveNext()) allTermsArray[index++] = e.Current as Term; int[] aggregatedDfs = new int[terms.Count]; for (int i = 0; i < searchables.Length; i++) { int[] dfs = searchables[i].DocFreqs(allTermsArray); for (int j = 0; j < aggregatedDfs.Length; j++) { aggregatedDfs[j] += dfs[j]; } } System.Collections.Hashtable dfMap = new System.Collections.Hashtable(); for (int i = 0; i < allTermsArray.Length; i++) { dfMap[allTermsArray[i]] = (System.Int32) aggregatedDfs[i]; } // step4 int numDocs = MaxDoc(); CachedDfSource cacheSim = new CachedDfSource(dfMap, numDocs, GetSimilarity()); return rewrittenQuery.Weight(cacheSim); }
/// <summary> Factory method for generating a query (similar to /// {@link #getWildcardQuery}). Called when parser parses /// an input term token that has the fuzzy suffix (~) appended. /// /// </summary> /// <param name="field">Name of the field query will use. /// </param> /// <param name="termStr">Term token to use for building term for the query /// /// </param> /// <returns> Resulting {@link Query} built for the term /// </returns> /// <exception cref="ParseException">throw in overridden method to disallow /// </exception> public /*protected internal*/ virtual Query GetFuzzyQuery(System.String field, System.String termStr, float minSimilarity) { if (lowercaseExpandedTerms) { termStr = termStr.ToLower(); } Term t = new Term(field, termStr); return NewFuzzyQuery(t, minSimilarity, fuzzyPrefixLength); }
/// <summary>Construct a SpanTermQuery matching the named term's spans. </summary> public SpanTermQuery(Term term) { this.term = term; }
/// <summary> Constructor for enumeration of all terms from specified <code>reader</code> which share a prefix of /// length <code>prefixLength</code> with <code>term</code> and which have a fuzzy similarity > /// <code>minSimilarity</code>. /// <p/> /// After calling the constructor the enumeration is already pointing to the first /// valid term if such a term exists. /// /// </summary> /// <param name="reader">Delivers terms. /// </param> /// <param name="term">Pattern term. /// </param> /// <param name="minSimilarity">Minimum required similarity for terms from the reader. Default value is 0.5f. /// </param> /// <param name="prefixLength">Length of required common prefix. Default value is 0. /// </param> /// <throws> IOException </throws> public FuzzyTermEnum(IndexReader reader, Term term, float minSimilarity, int prefixLength):base() { if (minSimilarity >= 1.0f) throw new System.ArgumentException("minimumSimilarity cannot be greater than or equal to 1"); else if (minSimilarity < 0.0f) throw new System.ArgumentException("minimumSimilarity cannot be less than 0"); if (prefixLength < 0) throw new System.ArgumentException("prefixLength cannot be less than 0"); this.minimumSimilarity = minSimilarity; this.scale_factor = 1.0f / (1.0f - minimumSimilarity); this.searchTerm = term; this.field = searchTerm.Field(); //The prefix could be longer than the word. //It's kind of silly though. It means we must match the entire word. int fullSearchTermLength = searchTerm.Text().Length; int realPrefixLength = prefixLength > fullSearchTermLength?fullSearchTermLength:prefixLength; this.text = searchTerm.Text().Substring(realPrefixLength); this.prefix = searchTerm.Text().Substring(0, (realPrefixLength) - (0)); InitializeMaxDistances(); this.d = InitDistanceArray(); SetEnum(reader.Terms(new Term(searchTerm.Field(), prefix))); }
public ScoreTerm(Term term, float score) { this.term = term; this.score = score; }
/// <summary> The termCompare method in FuzzyTermEnum uses Levenshtein distance to /// calculate the distance between the given term and the comparing term. /// </summary> public /*protected internal*/ override bool TermCompare(Term term) { if ((System.Object) field == (System.Object) term.Field() && term.Text().StartsWith(prefix)) { System.String target = term.Text().Substring(prefix.Length); this.similarity = Similarity(target); return (similarity > minimumSimilarity); } endEnum = true; return false; }
/// <summary> Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, minimumSimilarity, 0)}.</summary> public FuzzyQuery(Term term, float minimumSimilarity):this(term, minimumSimilarity, defaultPrefixLength) { }
/// <summary> Creates a FuzzyTermEnum with an empty prefix. /// <p/> /// After calling the constructor the enumeration is already pointing to the first /// valid term if such a term exists. /// /// </summary> /// <param name="reader"> /// </param> /// <param name="term"> /// </param> /// <param name="minSimilarity"> /// </param> /// <throws> IOException </throws> /// <seealso cref="FuzzyTermEnum(IndexReader, Term, float, int)"> /// </seealso> public FuzzyTermEnum(IndexReader reader, Term term, float minSimilarity):this(reader, term, minSimilarity, FuzzyQuery.defaultPrefixLength) { }
/// <summary> Factory method for generating a query (similar to /// {@link #getWildcardQuery}). Called when parser parses an input term /// token that uses prefix notation; that is, contains a single '*' wildcard /// character as its last character. Since this is a special case /// of generic wildcard term, and such a query can be optimized easily, /// this usually results in a different query object. /// <p/> /// Depending on settings, a prefix term may be lower-cased /// automatically. It will not go through the default Analyzer, /// however, since normal Analyzers are unlikely to work properly /// with wildcard templates. /// <p/> /// Can be overridden by extending classes, to provide custom handling for /// wild card queries, which may be necessary due to missing analyzer calls. /// /// </summary> /// <param name="field">Name of the field query will use. /// </param> /// <param name="termStr">Term token to use for building term for the query /// (<b>without</b> trailing '*' character!) /// /// </param> /// <returns> Resulting {@link Query} built for the term /// </returns> /// <exception cref="ParseException">throw in overridden method to disallow /// </exception> public /*protected internal*/ virtual Query GetPrefixQuery(System.String field, System.String termStr) { if (!allowLeadingWildcard && termStr.StartsWith("*")) throw new ParseException("'*' not allowed as first character in PrefixQuery"); if (lowercaseExpandedTerms) { termStr = termStr.ToLower(); } Term t = new Term(field, termStr); return NewPrefixQuery(t); }
/// <summary>Equality compare on the term </summary> public /*protected internal*/ abstract bool TermCompare(Term term);