Пример #1
0
        public RegexTermEnum(IndexReader reader, Term term)
            : base()
        {
            field = term.Field();
            System.String text = term.Text();

            pattern = new Pattern(text);

            // Find the first regex character position, to find the
            // maximum prefix to use for term enumeration
            int index = 0;
            while (index < text.Length)
            {
                char c = text[index];

                if (!System.Char.IsLetterOrDigit(c))
                    break;

                index++;
            }

            pre = text.Substring(0, (index) - (0));

            SetEnum(reader.Terms(new Term(term.Field(), pre)));
        }
Пример #2
0
		public TermSpans(TermPositions positions, Term term)
		{
			
			this.positions = positions;
			this.term = term;
			doc = - 1;
		}
 public override int DocFreq(Term term)
 {
     int docFreq = 0;
     for (int i = 0; i < searchables.Length; i++)
         docFreq += searchables[i].DocFreq(term);
     return docFreq;
 }
 /// <summary>Closes the enumeration to further activity, freeing resources.  </summary>
 public override void Close()
 {
     if (actualEnum != null)
         actualEnum.Close();
     currentTerm = null;
     actualEnum = null;
 }
Пример #5
0
 public void Read(IndexInput input, FieldInfos fieldInfos)
 {
     this.Term = null; // invalidate cache
     NewSuffixStart = input.ReadVInt();
     int length = input.ReadVInt();
     int totalLength = NewSuffixStart + length;
     Debug.Assert(totalLength <= ByteBlockPool.BYTE_BLOCK_SIZE - 2, "termLength=" + totalLength + ",resource=" + input);
     if (Bytes.Bytes.Length < totalLength)
     {
         Bytes.Grow(totalLength);
     }
     Bytes.Length = totalLength;
     input.ReadBytes(Bytes.Bytes, NewSuffixStart, length);
     int fieldNumber = input.ReadVInt();
     if (fieldNumber != CurrentFieldNumber)
     {
         CurrentFieldNumber = fieldNumber;
         // NOTE: too much sneakiness here, seriously this is a negative vint?!
         if (CurrentFieldNumber == -1)
         {
             Field = "";
         }
         else
         {
             Debug.Assert(fieldInfos.FieldInfo(CurrentFieldNumber) != null, CurrentFieldNumber.ToString());
             Field = String.Intern(fieldInfos.FieldInfo(CurrentFieldNumber).Name);
         }
     }
     else
     {
         Debug.Assert(Field.Equals(fieldInfos.FieldInfo(fieldNumber).Name), "currentFieldNumber=" + CurrentFieldNumber + " field=" + Field + " vs " + fieldInfos.FieldInfo(fieldNumber) == null ? "null" : fieldInfos.FieldInfo(fieldNumber).Name);
     }
 }
Пример #6
0
        /// <summary>
        /// Executes each <see cref="Searchable"/>'s docFreq() in its own thread and 
        /// waits for each search to complete and merge the results back together.
        /// </summary>
        public override int DocFreq(Term term)
        {
            int[] results = new int[searchables.Length];
            Parallel.For(0, searchables.Length, (i) => results[i] = searchables[i].DocFreq(term));

            return results.Sum();
        }
Пример #7
0
 public WildcardQuery(Term term)
     : base(term)
 {
     //will be removed in 3.0
     this.term = term;
     this.termContainsWildcard = (term.Text().IndexOf('*') != - 1) || (term.Text().IndexOf('?') != - 1);
 }
Пример #8
0
		public static void  Main(System.String[] args)
		{
			System.String usage = typeof(DeleteFiles) + " <unique_term>";
			if (args.Length == 0)
			{
				System.Console.Error.WriteLine("Usage: " + usage);
				System.Environment.Exit(1);
			}
			try
			{
				Directory directory = FSDirectory.Open("index");
				IndexReader reader = IndexReader.Open(directory, false); // we don't want read-only because we are about to delete
				
				Term term = new Term("path", args[0]);
				int deleted = reader.DeleteDocuments(term);
				
				System.Console.Out.WriteLine("deleted " + deleted + " documents containing " + term);
				
				// one can also delete documents by their internal id:
				/*
				for (int i = 0; i < reader.maxDoc(); i++) {
				System.out.println("Deleting document with id " + i);
				reader.delete(i);
				}*/
				
				reader.Close();
				directory.Close();
			}
			catch (System.Exception e)
			{
				System.Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message);
			}
		}
		public TermSpans(TermPositions positions, Term term)
		{
			
			this.internalPositions = positions;
			this.term = term;
			internalDoc = - 1;
		}
Пример #10
0
		public virtual void  CreateRandomTerms(int nDocs, int nTerms, double power, Directory dir)
		{
			int[] freq = new int[nTerms];
			for (int i = 0; i < nTerms; i++)
			{
				int f = (nTerms + 1) - i; // make first terms less frequent
				freq[i] = (int) System.Math.Ceiling(System.Math.Pow(f, power));
				terms[i] = new Term("f", System.Convert.ToString((char) ('A' + i)));
			}
			
			IndexWriter iw = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
			for (int i = 0; i < nDocs; i++)
			{
				Document d = new Document();
				for (int j = 0; j < nTerms; j++)
				{
					if (r.Next(freq[j]) == 0)
					{
						d.Add(new Field("f", terms[j].Text(), Field.Store.NO, Field.Index.UN_TOKENIZED));
						//System.out.println(d);
					}
				}
				iw.AddDocument(d);
			}
			iw.Optimize();
			iw.Close();
		}
	    /// <summary> Adds a term to the end of the query phrase.
		/// The relative position of the term is the one immediately after the last term added.
		/// </summary>
		public virtual void  Add(Term term)
		{
			int position = 0;
			if (positions.Count > 0)
				position = positions[positions.Count - 1] + 1;
			
			Add(term, position);
		}
Пример #12
0
		/// <summary>Add multiple terms at the next position in the phrase.  Any of the terms
		/// may match.
		/// 
		/// </summary>
		/// <seealso cref="PhraseQuery.Add(Term)">
		/// </seealso>
		public virtual void  Add(Term[] terms)
		{
			int position = 0;
			if (positions.Count > 0)
				position = ((System.Int32) positions[positions.Count - 1]) + 1;
			
			Add(terms, position);
		}
Пример #13
0
		public /*protected internal*/ override bool TermCompare(Term term)
		{
			if ((System.Object) term.Field() == (System.Object) prefix.Field() && term.Text().StartsWith(prefix.Text()))
			{
				return true;
			}
			endEnum = true;
			return false;
		}
Пример #14
0
 // inherit javadoc
 public virtual int[] DocFreqs(Term[] terms)
 {
     int[] result = new int[terms.Length];
     for (int i = 0; i < terms.Length; i++)
     {
         result[i] = DocFreq(terms[i]);
     }
     return result;
 }
	    protected internal override bool TermCompare(Term term)
		{
			if ((System.Object) term.Field == (System.Object) prefix.Field && term.Text.StartsWith(prefix.Text))
			{
				return true;
			}
			endEnum = true;
			return false;
		}
		/// <summary> use this method to set the actual TermEnum (e.g. in ctor),
		/// it will be automatically positioned on the first matching term.
		/// </summary>
		protected internal virtual void  SetEnum(TermEnum actualEnum)
		{
			this.actualEnum = actualEnum;
			// Find the first term that matches
			Term term = actualEnum.Term;
			if (term != null && TermCompare(term))
				currentTerm = term;
			else
				Next();
		}
		public WildcardQuery(Term term)
		{ 
			this.internalTerm = term;
		    string text = term.Text;
		    _termContainsWildcard = (term.Text.IndexOf('*') != -1)
		                                || (term.Text.IndexOf('?') != -1);
		    _termIsPrefix = _termContainsWildcard
		                        && (text.IndexOf('?') == -1)
		                        && (text.IndexOf('*') == text.Length - 1);
		}
Пример #18
0
 public override Weight CreateWeight(Searcher searcher, IState state)
 {
     if (terms.Count == 1)
     {
         // optimize one-term case
         Term  term      = terms[0];
         Query termQuery = new TermQuery(term);
         termQuery.Boost = Boost;
         return(termQuery.CreateWeight(searcher, state));
     }
     return(new PhraseWeight(this, searcher, state));
 }
Пример #19
0
        /// <summary>
        /// Executes each <see cref="Searchable"/>'s docFreq() in its own thread and
        /// waits for each search to complete and merge the results back together.
        /// </summary>
        public override int DocFreq(Term term, IState state)
        {
            Task <int>[] tasks = new Task <int> [searchables.Length];
            for (int i = 0; i < searchables.Length; i++)
            {
                Searchable searchable = searchables[i];
                tasks[i] = Task.Factory.StartNew(() => searchable.DocFreq(term, state));
            }

            Task.WaitAll(tasks);
            return(tasks.Sum(task => task.Result));
        }
Пример #20
0
 // inherit javadoc
 public override void  ExtractTerms(System.Collections.Hashtable terms)
 {
     for (System.Collections.IEnumerator iter = termArrays.GetEnumerator(); iter.MoveNext();)
     {
         Term[] arr = (Term[])iter.Current;
         for (int i = 0; i < arr.Length; i++)
         {
             Term tmp = arr[i];
             terms.Add(tmp, tmp);
         }
     }
 }
        public virtual void TestBooleanReqExclVersusTerm()
        {
            Term         t1 = RandomTerm();
            Term         t2 = RandomTerm();
            BooleanQuery q1 = new BooleanQuery();

            q1.Add(new TermQuery(t1), Occur.MUST);
            q1.Add(new TermQuery(t2), Occur.MUST_NOT);
            TermQuery q2 = new TermQuery(t1);

            AssertSubsetOf(q1, q2);
        }
Пример #22
0
        /// <summary> Adds a term to the end of the query phrase.
        /// The relative position of the term within the phrase is specified explicitly.
        /// This allows e.g. phrases with more than one term at the same position
        /// or phrases with gaps (e.g. in connection with stopwords).
        /// 
        /// </summary>
        /// <param name="term">
        /// </param>
        /// <param name="position">
        /// </param>
        public virtual void Add(Term term, int position)
        {
            if (terms.Count == 0)
                field = term.Field();
            else if (term.Field() != field)
            {
                throw new System.ArgumentException("All phrase terms must be in the same field: " + term);
            }

            terms.Add(term);
            positions.Add((System.Int32) position);
        }
Пример #23
0
        public override Query Rewrite(IndexReader reader)
        {
            BooleanQuery query      = new BooleanQuery(true);
            TermEnum     enumerator = reader.Terms(lowerTerm);

            try
            {
                bool checkLower = false;
                if (!inclusive)
                {
                    // make adjustments to set to exclusive
                    checkLower = true;
                }

                System.String testField = GetField();

                do
                {
                    Term term = enumerator.Term();
                    if (term != null && term.Field() == testField)
                    {
                        // interned comparison
                        if (!checkLower || String.CompareOrdinal(term.Text(), lowerTerm.Text()) > 0)
                        {
                            checkLower = false;
                            if (upperTerm != null)
                            {
                                int compare = String.CompareOrdinal(upperTerm.Text(), term.Text());

                                /* if beyond the upper term, or is exclusive and
                                 * this is equal to the upper term, break out */
                                if ((compare < 0) || (!inclusive && compare == 0))
                                {
                                    break;
                                }
                            }
                            TermQuery tq = new TermQuery(term);                  // found a match
                            tq.SetBoost(GetBoost());                             // set the boost
                            query.Add(tq, BooleanClause.Occur.SHOULD);           // add to query
                        }
                    }
                    else
                    {
                        break;
                    }
                }while (enumerator.Next());
            }
            finally
            {
                enumerator.Close();
            }
            return(query);
        }
Пример #24
0
 public override Weight CreateWeight(Searcher searcher)
 {
     if (terms.Count == 1)
     {
         // optimize one-term case
         Term  term      = (Term)terms[0];
         Query termQuery = new TermQuery(term);
         termQuery.SetBoost(GetBoost());
         return(termQuery.CreateWeight(searcher));
     }
     return(new PhraseWeight(this, searcher));
 }
Пример #25
0
        protected internal override bool TermCompare(Term term)
        {
            if (collator == null)
            {
                // Use Unicode code point ordering
                bool checkLower = !includeLower;
                if (term != null && (object)term.Field == (object)field)
                {
                    // interned comparison
                    if (!checkLower || null == lowerTermText || string.CompareOrdinal(term.Text, lowerTermText) > 0)
                    {
                        checkLower = false;
                        if (upperTermText != null)
                        {
                            int compare = string.CompareOrdinal(upperTermText, term.Text);

                            /*
                             * if beyond the upper term, or is exclusive and this is equal to
                             * the upper term, break out
                             */
                            if ((compare < 0) || (!includeUpper && compare == 0))
                            {
                                endEnum = true;
                                return(false);
                            }
                        }
                        return(true);
                    }
                }
                else
                {
                    // break
                    endEnum = true;
                    return(false);
                }
                return(false);
            }
            else
            {
                if (term != null && (object)term.Field == (object)field)
                {
                    // interned comparison
                    if ((lowerTermText == null || (includeLower?collator.Compare(term.Text.ToString(), lowerTermText.ToString()) >= 0:collator.Compare(term.Text.ToString(), lowerTermText.ToString()) > 0)) && (upperTermText == null || (includeUpper?collator.Compare(term.Text.ToString(), upperTermText.ToString()) <= 0:collator.Compare(term.Text.ToString(), upperTermText.ToString()) < 0)))
                    {
                        return(true);
                    }
                    return(false);
                }
                endEnum = true;
                return(false);
            }
        }
	    /*protected internal*/ protected internal override bool TermCompare(Term term)
		{
			if ((System.Object) field == (System.Object) term.Field)
			{
				System.String searchText = term.Text;
				if (searchText.StartsWith(pre))
				{
					return WildcardEquals(text, 0, searchText, preLen);
				}
			}
			endEnum = true;
			return false;
		}
Пример #27
0
 protected internal override bool TermCompare(Term term)
 {
     if (field == term.Field())
     {
         System.String searchText = term.Text();
         if (searchText.StartsWith(pre))
         {
             return(WildcardEquals(text, 0, searchText, preLen));
         }
     }
     endEnum = true;
     return(false);
 }
Пример #28
0
        /// <summary> Computes a score factor for a simple term and returns an explanation
        /// for that score factor.
        ///
        /// <p/>
        /// The default implementation uses:
        ///
        /// <pre>
        /// idf(searcher.docFreq(term), searcher.maxDoc());
        /// </pre>
        ///
        /// Note that {@link Searcher#MaxDoc()} is used instead of
        /// {@link Lucene.Net.Index.IndexReader#NumDocs()} because it is
        /// proportional to {@link Searcher#DocFreq(Term)} , i.e., when one is
        /// inaccurate, so is the other, and in the same direction.
        ///
        /// </summary>
        /// <param name="term">the term in question
        /// </param>
        /// <param name="searcher">the document collection being searched
        /// </param>
        /// <returns> an IDFExplain object that includes both an idf score factor
        /// and an explanation for the term.
        /// </returns>
        /// <throws>  IOException </throws>
        public virtual IDFExplanation IdfExplain(Term term, Searcher searcher)
        {
            if (SupportedMethods.overridesTermIDF)
            {
                float idf = Idf(term, searcher);
                return(new AnonymousClassIDFExplanation(idf, this));
            }
            int   df   = searcher.DocFreq(term);
            int   max  = searcher.MaxDoc();
            float idf2 = Idf(df, max);

            return(new AnonymousClassIDFExplanation1(df, max, idf2, this));
        }
Пример #29
0
        /// <summary> Create a new FuzzyQuery that will match terms with a similarity 
        /// of at least <code>minimumSimilarity</code> to <code>term</code>.
        /// If a <code>prefixLength</code> &gt; 0 is specified, a common prefix
        /// of that length is also required.
        /// 
        /// </summary>
        /// <param name="term">the term to search for
        /// </param>
        /// <param name="minimumSimilarity">a value between 0 and 1 to set the required similarity
        /// between the query term and the matching terms. For example, for a
        /// <code>minimumSimilarity</code> of <code>0.5</code> a term of the same length
        /// as the query term is considered similar to the query term if the edit distance
        /// between both terms is less than <code>length(term)*0.5</code>
        /// </param>
        /// <param name="prefixLength">length of common (non-fuzzy) prefix
        /// </param>
        /// <throws>  IllegalArgumentException if minimumSimilarity is &gt;= 1 or &lt; 0 </throws>
        /// <summary> or if prefixLength &lt; 0
        /// </summary>
        public FuzzyQuery(Term term, float minimumSimilarity, int prefixLength)
            : base(term)
        {
            if (minimumSimilarity >= 1.0f)
                throw new System.ArgumentException("minimumSimilarity >= 1");
            else if (minimumSimilarity < 0.0f)
                throw new System.ArgumentException("minimumSimilarity < 0");
            if (prefixLength < 0)
                throw new System.ArgumentException("prefixLength < 0");

            this.minimumSimilarity = minimumSimilarity;
            this.prefixLength = prefixLength;
        }
Пример #30
0
 /*protected internal*/ protected internal override bool TermCompare(Term term)
 {
     if ((object)field == (object)term.Field)
     {
         string searchText = term.Text;
         if (searchText.StartsWith(pre))
         {
             return(WildcardEquals(text, 0, searchText, preLen));
         }
     }
     endEnum = true;
     return(false);
 }
Пример #31
0
		protected internal override bool TermCompare(Term term)
		{
			if ((System.Object) field == (System.Object) term.Field())
			{
				System.String searchText = term.Text();
				if (searchText.StartsWith(pre))
				{
                    return pattern.Match(searchText).Success;
				}
			}
			endEnum = true;
			return false;
		}
Пример #32
0
 protected internal override bool TermCompare(Term term)
 {
     if ((System.Object)field == (System.Object)term.Field())
     {
         System.String searchText = term.Text();
         if (searchText.StartsWith(pre))
         {
             return(pattern.Match(searchText).Success);
         }
     }
     endEnum = true;
     return(false);
 }
Пример #33
0
        public virtual void  TestSkipTo()
        {
            Term      allTerm   = new Term(FIELD, "all");
            TermQuery termQuery = new TermQuery(allTerm);

            Weight weight = termQuery.Weight(indexSearcher);

            TermScorer ts = new TermScorer(weight, indexReader.TermDocs(allTerm), indexSearcher.GetSimilarity(), indexReader.Norms(FIELD));

            Assert.IsTrue(ts.Advance(3) != DocIdSetIterator.NO_MORE_DOCS, "Didn't skip");
            //The next doc should be doc 5
            Assert.IsTrue(ts.DocID() == 5, "doc should be number 5");
        }
Пример #34
0
			public override int DocFreq(Term term)
			{
				int df;
				try
				{
					df = ((System.Int32) dfMap[term]);
				}
				catch (System.NullReferenceException)
				{
					throw new System.ArgumentException("df for term " + term.Text() + " not available");
				}
				return df;
			}
Пример #35
0
        public virtual void  TestSetBufferSize()
        {
            System.IO.FileInfo indexDir = new System.IO.FileInfo(System.IO.Path.Combine(SupportClass.AppSettings.Get("tempDir", ""), "testSetBufferSize"));
            MockFSDirectory    dir      = new MockFSDirectory(indexDir, NewRandom());

            try
            {
                IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
                writer.SetUseCompoundFile(false);
                for (int i = 0; i < 37; i++)
                {
                    Document doc = new Document();
                    doc.Add(new Field("content", "aaa bbb ccc ddd" + i, Field.Store.YES, Field.Index.ANALYZED));
                    doc.Add(new Field("id", "" + i, Field.Store.YES, Field.Index.ANALYZED));
                    writer.AddDocument(doc);
                }
                writer.Close();

                dir.allIndexInputs.Clear();

                IndexReader reader = IndexReader.Open(dir);
                Term        aaa    = new Term("content", "aaa");
                Term        bbb    = new Term("content", "bbb");
                Term        ccc    = new Term("content", "ccc");
                Assert.AreEqual(37, reader.DocFreq(ccc));
                reader.DeleteDocument(0);
                Assert.AreEqual(37, reader.DocFreq(aaa));
                dir.tweakBufferSizes();
                reader.DeleteDocument(4);
                Assert.AreEqual(reader.DocFreq(bbb), 37);
                dir.tweakBufferSizes();

                IndexSearcher searcher = new IndexSearcher(reader);
                ScoreDoc[]    hits     = searcher.Search(new TermQuery(bbb), null, 1000).scoreDocs;
                dir.tweakBufferSizes();
                Assert.AreEqual(35, hits.Length);
                dir.tweakBufferSizes();
                hits = searcher.Search(new TermQuery(new Term("id", "33")), null, 1000).scoreDocs;
                dir.tweakBufferSizes();
                Assert.AreEqual(1, hits.Length);
                hits = searcher.Search(new TermQuery(aaa), null, 1000).scoreDocs;
                dir.tweakBufferSizes();
                Assert.AreEqual(35, hits.Length);
                searcher.Close();
                reader.Close();
            }
            finally
            {
                _TestUtil.RmDir(indexDir);
            }
        }
Пример #36
0
 // inherit javadocs
 public virtual float[] GetFloats(IndexReader reader, System.String field, FloatParser parser)
 {
     field = String.Intern(field);
     System.Object ret = Lookup(reader, field, parser);
     if (ret == null)
     {
         float[] retArray = new float[reader.MaxDoc()];
         if (retArray.Length > 0)
         {
             TermDocs termDocs = reader.TermDocs();
             TermEnum termEnum = reader.Terms(new Term(field, ""));
             try
             {
                 if (termEnum.Term() == null)
                 {
                     throw new System.SystemException("no terms in field " + field);
                 }
                 do
                 {
                     Term term = termEnum.Term();
                     if (term.Field() != field)
                     {
                         break;
                     }
                     float termval;
                     try
                     {
                         termval = SupportClass.Single.Parse(term.Text());
                     }
                     catch (Exception e)
                     {
                         termval = 0;
                     }
                     termDocs.Seek(termEnum);
                     while (termDocs.Next())
                     {
                         retArray[termDocs.Doc()] = termval;
                     }
                 }while (termEnum.Next());
             }
             finally
             {
                 termDocs.Close();
                 termEnum.Close();
             }
         }
         Store(reader, field, parser, retArray);
         return(retArray);
     }
     return((float[])ret);
 }
        public virtual void TestExactPhraseVersusBooleanAndWithHoles()
        {
            Term        t1 = RandomTerm();
            Term        t2 = RandomTerm();
            PhraseQuery q1 = new PhraseQuery();

            q1.Add(t1);
            q1.Add(t2, 2);
            BooleanQuery q2 = new BooleanQuery();

            q2.Add(new TermQuery(t1), Occur.MUST);
            q2.Add(new TermQuery(t2), Occur.MUST);
            AssertSubsetOf(q1, q2);
        }
        public virtual void TestBooleanAndVersusBooleanOr()
        {
            Term         t1 = RandomTerm();
            Term         t2 = RandomTerm();
            BooleanQuery q1 = new BooleanQuery();

            q1.Add(new TermQuery(t1), Occur.SHOULD);
            q1.Add(new TermQuery(t2), Occur.SHOULD);
            BooleanQuery q2 = new BooleanQuery();

            q2.Add(new TermQuery(t1), Occur.SHOULD);
            q2.Add(new TermQuery(t2), Occur.SHOULD);
            AssertSubsetOf(q1, q2);
        }
Пример #39
0
        /// <summary> Adds a term to the end of the query phrase.
        /// The relative position of the term within the phrase is specified explicitly.
        /// This allows e.g. phrases with more than one term at the same position
        /// or phrases with gaps (e.g. in connection with stopwords).
        ///
        /// </summary>
        /// <param name="term">
        /// </param>
        /// <param name="position">
        /// </param>
        public virtual void  Add(Term term, int position)
        {
            if (terms.Count == 0)
            {
                field = term.Field();
            }
            else if (term.Field() != field)
            {
                throw new System.ArgumentException("All phrase terms must be in the same field: " + term);
            }

            terms.Add(term);
            positions.Add((System.Int32)position);
        }
Пример #40
0
        public virtual void  TestSimilarity_()
        {
            RAMDirectory store  = new RAMDirectory();
            IndexWriter  writer = new IndexWriter(store, new SimpleAnalyzer(), true);

            writer.SetSimilarity(new SimpleSimilarity());

            Document d1 = new Document();

            d1.Add(Field.Text("Field", "a c"));

            Document d2 = new Document();

            d2.Add(Field.Text("Field", "a b c"));

            writer.AddDocument(d1);
            writer.AddDocument(d2);
            writer.Optimize();
            writer.Close();

            float[] scores = new float[4];

            Searcher searcher = new IndexSearcher(store);

            searcher.SetSimilarity(new SimpleSimilarity());

            Term a = new Term("Field", "a");
            Term b = new Term("Field", "b");
            Term c = new Term("Field", "c");

            searcher.Search(new TermQuery(b), new AnonymousClassHitCollector(this));

            BooleanQuery bq = new BooleanQuery();

            bq.Add(new TermQuery(a), false, false);
            bq.Add(new TermQuery(b), false, false);
            //System.out.println(bq.toString("Field"));
            searcher.Search(bq, new AnonymousClassHitCollector1(this));

            PhraseQuery pq = new PhraseQuery();

            pq.Add(a);
            pq.Add(c);
            //System.out.println(pq.toString("Field"));
            searcher.Search(pq, new AnonymousClassHitCollector2(this));

            pq.SetSlop(2);
            //System.out.println(pq.toString("Field"));
            searcher.Search(pq, new AnonymousClassHitCollector3(this));
        }
Пример #41
0
            public override int DocFreq(Term term)
            {
                int df;

                try
                {
                    df = ((System.Int32)dfMap[term]);
                }
                catch (System.NullReferenceException)
                {
                    throw new System.ArgumentException("df for term " + term.Text() + " not available");
                }
                return(df);
            }
Пример #42
0
        /// <summary>Constructs a query selecting all terms greater than
        /// <code>lowerTerm</code> but less than <code>upperTerm</code>.
        /// There must be at least one term and either term may be null,
        /// in which case there is no bound on that side, but if there are
        /// two terms, both terms <b>must</b> be for the same field.
        /// <p/>
        /// If <code>collator</code> is not null, it will be used to decide whether
        /// index terms are within the given range, rather than using the Unicode code
        /// point order in which index terms are stored.
        /// <p/>
        /// <strong>WARNING:</strong> Using this constructor and supplying a non-null
        /// value in the <code>collator</code> parameter will cause every single
        /// index Term in the Field referenced by lowerTerm and/or upperTerm to be
        /// examined.  Depending on the number of index Terms in this Field, the
        /// operation could be very slow.
        ///
        /// </summary>
        /// <param name="lowerTerm">The Term at the lower end of the range
        /// </param>
        /// <param name="upperTerm">The Term at the upper end of the range
        /// </param>
        /// <param name="inclusive">If true, both <code>lowerTerm</code> and
        /// <code>upperTerm</code> will themselves be included in the range.
        /// </param>
        /// <param name="collator">The collator to use to collate index Terms, to determine
        /// their membership in the range bounded by <code>lowerTerm</code> and
        /// <code>upperTerm</code>.
        /// </param>
        public RangeQuery(Term lowerTerm, Term upperTerm, bool inclusive, System.Globalization.CompareInfo collator)
        {
            if (lowerTerm == null && upperTerm == null)
            {
                throw new System.ArgumentException("At least one term must be non-null");
            }
            if (lowerTerm != null && upperTerm != null && (System.Object)lowerTerm.Field() != (System.Object)upperTerm.Field())
            {
                throw new System.ArgumentException("Both terms must have the same field");
            }

            delegate_Renamed = new TermRangeQuery((lowerTerm == null)?upperTerm.Field():lowerTerm.Field(), (lowerTerm == null)?null:lowerTerm.Text(), (upperTerm == null)?null:upperTerm.Text(), inclusive, inclusive, collator);
            delegate_Renamed.SetRewriteMethod(TermRangeQuery.SCORING_BOOLEAN_QUERY_REWRITE);
        }
        public virtual void TestDisjunctionSumVersusDisjunctionMax()
        {
            Term         t1 = RandomTerm();
            Term         t2 = RandomTerm();
            BooleanQuery q1 = new BooleanQuery();

            q1.Add(new TermQuery(t1), Occur.SHOULD);
            q1.Add(new TermQuery(t2), Occur.SHOULD);
            DisjunctionMaxQuery q2 = new DisjunctionMaxQuery(0.5f);

            q2.Add(new TermQuery(t1));
            q2.Add(new TermQuery(t2));
            AssertSameSet(q1, q2);
        }
Пример #44
0
            public override int DocFreq(Term term, IState state)
            {
                int df;

                try
                {
                    df = dfMap[term];
                }
                catch (KeyNotFoundException)                 // C# equiv. of java code.
                {
                    throw new System.ArgumentException("df for term " + term.Text + " not available");
                }
                return(df);
            }
        public virtual void TestExactPhraseVersusMultiPhraseWithHoles()
        {
            Term        t1 = RandomTerm();
            Term        t2 = RandomTerm();
            PhraseQuery q1 = new PhraseQuery();

            q1.Add(t1);
            q1.Add(t2, 2);
            Term             t3 = RandomTerm();
            MultiPhraseQuery q2 = new MultiPhraseQuery();

            q2.Add(t1);
            q2.Add(new Term[] { t2, t3 }, 2);
            AssertSubsetOf(q1, q2);
        }
        public virtual void TestPhraseVersusSloppyPhraseWithHoles()
        {
            Term        t1 = RandomTerm();
            Term        t2 = RandomTerm();
            PhraseQuery q1 = new PhraseQuery();

            q1.Add(t1);
            q1.Add(t2, 2);
            PhraseQuery q2 = new PhraseQuery();

            q2.Add(t1);
            q2.Add(t2, 2);
            q2.Slop = 1;
            AssertSubsetOf(q1, q2);
        }
Пример #47
0
        /// <summary> use this method to set the actual TermEnum (e.g. in ctor),
        /// it will be automatically positioned on the first matching term.
        /// </summary>
        protected internal virtual void  SetEnum(TermEnum actualEnum)
        {
            this.actualEnum = actualEnum;
            // Find the first term that matches
            Term term = actualEnum.Term;

            if (term != null && TermCompare(term))
            {
                currentTerm = term;
            }
            else
            {
                Next();
            }
        }
Пример #48
0
        public virtual void  TestNext()
        {
            Term      allTerm   = new Term(FIELD, "all");
            TermQuery termQuery = new TermQuery(allTerm);

            Weight weight = termQuery.Weight(indexSearcher);

            TermScorer ts = new TermScorer(weight, indexReader.TermDocs(allTerm), indexSearcher.GetSimilarity(), indexReader.Norms(FIELD));

            Assert.IsTrue(ts.NextDoc() != DocIdSetIterator.NO_MORE_DOCS, "next did not return a doc");
            Assert.IsTrue(ts.Score() == 1.6931472f, "score is not correct");
            Assert.IsTrue(ts.NextDoc() != DocIdSetIterator.NO_MORE_DOCS, "next did not return a doc");
            Assert.IsTrue(ts.Score() == 1.6931472f, "score is not correct");
            Assert.IsTrue(ts.NextDoc() == DocIdSetIterator.NO_MORE_DOCS, "next returned a doc and it should not have");
        }
Пример #49
0
        public virtual void  TestSimilarity_Renamed()
        {
            RAMDirectory store  = new RAMDirectory();
            IndexWriter  writer = new IndexWriter(store, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null);

            writer.SetSimilarity(new SimpleSimilarity());

            Document d1 = new Document();

            d1.Add(new Field("field", "a c", Field.Store.YES, Field.Index.ANALYZED));

            Document d2 = new Document();

            d2.Add(new Field("field", "a b c", Field.Store.YES, Field.Index.ANALYZED));

            writer.AddDocument(d1, null);
            writer.AddDocument(d2, null);
            writer.Optimize(null);
            writer.Close();

            Searcher searcher = new IndexSearcher(store, true, null);

            searcher.Similarity = new SimpleSimilarity();

            Term a = new Term("field", "a");
            Term b = new Term("field", "b");
            Term c = new Term("field", "c");

            searcher.Search(new TermQuery(b), new AnonymousClassCollector(this), null);

            BooleanQuery bq = new BooleanQuery();

            bq.Add(new TermQuery(a), Occur.SHOULD);
            bq.Add(new TermQuery(b), Occur.SHOULD);
            //System.out.println(bq.toString("field"));
            searcher.Search(bq, new AnonymousClassCollector1(this), null);

            PhraseQuery pq = new PhraseQuery();

            pq.Add(a);
            pq.Add(c);
            //System.out.println(pq.toString("field"));
            searcher.Search(pq, new AnonymousClassCollector2(this), null);

            pq.Slop = 2;
            //System.out.println(pq.toString("field"));
            searcher.Search(pq, new AnonymousClassCollector3(this), null);
        }
Пример #50
0
        public override System.String ToString(System.String field)
        {
            System.Text.StringBuilder buffer = new System.Text.StringBuilder();
            Term term = GetTerm();

            if (!term.Field().Equals(field))
            {
                buffer.Append(term.Field());
                buffer.Append(":");
            }
            buffer.Append(term.Text());
            buffer.Append('~');
            buffer.Append(SupportClass.Single.ToString(minimumSimilarity));
            buffer.Append(ToStringUtils.Boost(GetBoost()));
            return(buffer.ToString());
        }
Пример #51
0
        private int ArraysHashCode(Term[] termArray)
        {
            if (termArray == null)
            {
                return(0);
            }

            int result = 1;

            for (int i = 0; i < termArray.Length; i++)
            {
                Term term = termArray[i];
                result = 31 * result + (term == null?0:term.GetHashCode());
            }

            return(result);
        }
Пример #52
0
        internal static int DetectFieldType(IndexReader reader, System.String fieldKey)
        {
            System.String field      = StringHelper.Intern(fieldKey);
            TermEnum      enumerator = reader.Terms(new Term(field));

            try
            {
                Term term = enumerator.Term();
                if (term == null)
                {
                    throw new System.SystemException("no terms in field " + field + " - cannot determine sort type");
                }
                int ret = 0;
                if ((System.Object)term.Field() == (System.Object)field)
                {
                    System.String termtext = term.Text().Trim();

                    int tmpI32; long tmpI64; float tmpF;
                    if (System.Int32.TryParse(termtext, out tmpI32))
                    {
                        ret = SortField.INT;
                    }
                    else if (System.Int64.TryParse(termtext, out tmpI64))
                    {
                        ret = SortField.LONG;
                    }
                    else if (SupportClass.Single.TryParse(termtext, out tmpF))
                    {
                        ret = SortField.FLOAT;
                    }
                    else
                    {
                        ret = SortField.STRING;
                    }
                }
                else
                {
                    throw new System.SystemException("field \"" + field + "\" does not appear to be indexed");
                }
                return(ret);
            }
            finally
            {
                enumerator.Close();
            }
        }
Пример #53
0
        public virtual void TestSimilarity_Mem()
        {
            Directory store = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), store, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetSimilarity(new SimpleSimilarity()));

            Document d1 = new Document();
            d1.Add(NewTextField("field", "a c", Field.Store.YES));

            Document d2 = new Document();
            d2.Add(NewTextField("field", "a b c", Field.Store.YES));

            writer.AddDocument(d1);
            writer.AddDocument(d2);
            IndexReader reader = writer.Reader;
            writer.Dispose();

            IndexSearcher searcher = NewSearcher(reader);
            searcher.Similarity = new SimpleSimilarity();

            Term a = new Term("field", "a");
            Term b = new Term("field", "b");
            Term c = new Term("field", "c");

            searcher.Search(new TermQuery(b), new CollectorAnonymousInnerClassHelper(this));

            BooleanQuery bq = new BooleanQuery();
            bq.Add(new TermQuery(a), BooleanClause.Occur.SHOULD);
            bq.Add(new TermQuery(b), BooleanClause.Occur.SHOULD);
            //System.out.println(bq.toString("field"));
            searcher.Search(bq, new CollectorAnonymousInnerClassHelper2(this));

            PhraseQuery pq = new PhraseQuery();
            pq.Add(a);
            pq.Add(c);
            //System.out.println(pq.toString("field"));
            searcher.Search(pq, new CollectorAnonymousInnerClassHelper3(this));

            pq.Slop = 2;
            //System.out.println(pq.toString("field"));
            searcher.Search(pq, new CollectorAnonymousInnerClassHelper4(this));

            reader.Dispose();
            store.Dispose();
        }
Пример #54
0
		/// <summary> Create a new FuzzyQuery that will match terms with a similarity 
		/// of at least <code>minimumSimilarity</code> to <code>term</code>.
		/// If a <code>prefixLength</code> &gt; 0 is specified, a common prefix
		/// of that length is also required.
		/// 
		/// </summary>
		/// <param name="term">the term to search for
		/// </param>
		/// <param name="minimumSimilarity">a value between 0 and 1 to set the required similarity
		/// between the query term and the matching terms. For example, for a
		/// <code>minimumSimilarity</code> of <code>0.5</code> a term of the same length
		/// as the query term is considered similar to the query term if the edit distance
		/// between both terms is less than <code>length(term)*0.5</code>
		/// </param>
		/// <param name="prefixLength">length of common (non-fuzzy) prefix
		/// </param>
		/// <throws>  IllegalArgumentException if minimumSimilarity is &gt;= 1 or &lt; 0 </throws>
		/// <summary> or if prefixLength &lt; 0
		/// </summary>
		public FuzzyQuery(Term term, float minimumSimilarity, int prefixLength):base(term)
		{ // will be removed in 3.0
			this.term = term;
			
			if (minimumSimilarity >= 1.0f)
				throw new System.ArgumentException("minimumSimilarity >= 1");
			else if (minimumSimilarity < 0.0f)
				throw new System.ArgumentException("minimumSimilarity < 0");
			if (prefixLength < 0)
				throw new System.ArgumentException("prefixLength < 0");
			
			if (term.Text().Length > 1.0f / (1.0f - minimumSimilarity))
			{
				this.termLongEnough = true;
			}
			
			this.minimumSimilarity = minimumSimilarity;
			this.prefixLength = prefixLength;
			rewriteMethod = SCORING_BOOLEAN_QUERY_REWRITE;
		}
		/// <summary> Creates a new <c>WildcardTermEnum</c>.
		/// <p/>
		/// After calling the constructor the enumeration is already pointing to the first 
		/// valid term if such a term exists.
		/// </summary>
		public WildcardTermEnum(IndexReader reader, Term term):base()
		{
			searchTerm = term;
			field = searchTerm.Field;
			System.String searchTermText = searchTerm.Text;
			
			int sidx = searchTermText.IndexOf((System.Char) WILDCARD_STRING);
			int cidx = searchTermText.IndexOf((System.Char) WILDCARD_CHAR);
			int idx = sidx;
			if (idx == - 1)
			{
				idx = cidx;
			}
			else if (cidx >= 0)
			{
				idx = System.Math.Min(idx, cidx);
			}
			pre = idx != - 1?searchTerm.Text.Substring(0, (idx) - (0)):"";
			
			preLen = pre.Length;
			text = searchTermText.Substring(preLen);
			SetEnum(reader.Terms(new Term(searchTerm.Field, pre)));
		}
Пример #56
0
        /// <summary>Constructs a query selecting all terms greater than
        /// <code>lowerTerm</code> but less than <code>upperTerm</code>.
        /// There must be at least one term and either term may be null,
        /// in which case there is no bound on that side, but if there are
        /// two terms, both terms <b>must</b> be for the same field.
        /// </summary>
        public RangeQuery(Term lowerTerm, Term upperTerm, bool inclusive)
        {
            if (lowerTerm == null && upperTerm == null)
            {
                throw new System.ArgumentException("At least one term must be non-null");
            }
            if (lowerTerm != null && upperTerm != null && lowerTerm.Field() != upperTerm.Field())
            {
                throw new System.ArgumentException("Both terms must be for the same field");
            }

            // if we have a lowerTerm, start there. otherwise, start at beginning
            if (lowerTerm != null)
            {
                this.lowerTerm = lowerTerm;
            }
            else
            {
                this.lowerTerm = new Term(upperTerm.Field(), "");
            }

            this.upperTerm = upperTerm;
            this.inclusive = inclusive;
        }
Пример #57
0
 /// <summary>Increments the enumeration to the next element.  True if one exists. </summary>
 public override bool Next()
 {
     if (actualEnum == null)
         return false; // the actual enumerator is not initialized!
     currentTerm = null;
     while (currentTerm == null)
     {
         if (EndEnum())
             return false;
         if (actualEnum.Next())
         {
             Term term = actualEnum.Term();
             if (TermCompare(term))
             {
                 currentTerm = term;
                 return true;
             }
         }
         else
             return false;
     }
     currentTerm = null;
     return false;
 }
Пример #58
0
		/// <summary> Computes a score factor for a simple term and returns an explanation
		/// for that score factor.
		/// 
		/// <p/>
		/// The default implementation uses:
		/// 
		/// <pre>
		/// idf(searcher.docFreq(term), searcher.maxDoc());
		/// </pre>
		/// 
		/// Note that {@link Searcher#MaxDoc()} is used instead of
		/// {@link Lucene.Net.Index.IndexReader#NumDocs()} because it is
		/// proportional to {@link Searcher#DocFreq(Term)} , i.e., when one is
		/// inaccurate, so is the other, and in the same direction.
		/// 
		/// </summary>
		/// <param name="term">the term in question
		/// </param>
		/// <param name="searcher">the document collection being searched
		/// </param>
		/// <returns> an IDFExplain object that includes both an idf score factor 
		/// and an explanation for the term.
		/// </returns>
		/// <throws>  IOException </throws>
		public virtual IDFExplanation IdfExplain(Term term, Searcher searcher)
		{
			if (SupportedMethods.overridesTermIDF)
			{
				float idf = Idf(term, searcher);
				return new AnonymousClassIDFExplanation(idf, this);
			}
			int df = searcher.DocFreq(term);
			int max = searcher.MaxDoc();
			float idf2 = Idf(df, max);
			return new AnonymousClassIDFExplanation1(df, max, idf2, this);
		}
Пример #59
0
		public virtual float Idf(Term term, Searcher searcher)
		{
			return Idf(searcher.DocFreq(term), searcher.MaxDoc());
		}
Пример #60
0
		/// <summary>Constructs a query for the term <c>t</c>. </summary>
		public TermQuery(Term t)
		{
			term = t;
		}