Example #1
0
		/* Walk directory hierarchy in uid order, while keeping uid iterator from
		/* existing index in sync.  Mismatches indicate one of: (a) old documents to
		/* be deleted; (b) unchanged documents, to be left alone; or (c) new
		/* documents, to be indexed.
		*/

        private static void IndexDocs(System.IO.DirectoryInfo file, System.IO.DirectoryInfo index, bool create)
		{
			if (!create)
			{
				// incrementally update
				
				reader = IndexReader.Open(FSDirectory.Open(index), false); // open existing index
				uidIter = reader.Terms(new Term("uid", "")); // init uid iterator
				
				IndexDocs(file);
				
				if (deleting)
				{
					// delete rest of stale docs
					while (uidIter.Term() != null && (System.Object) uidIter.Term().Field == (System.Object) "uid")
					{
						System.Console.Out.WriteLine("deleting " + HTMLDocument.Uid2url(uidIter.Term().Text));
						reader.DeleteDocuments(uidIter.Term());
						uidIter.Next();
					}
					deleting = false;
				}
				
				uidIter.Close(); // close uid iterator
				reader.Close(); // close existing index
			}
			// don't have exisiting
			else
				IndexDocs(file);
		}
			/// <summary> Returns an array of objects which represent that natural order
			/// of the term values in the given field.
			/// 
			/// </summary>
			/// <param name="reader">    Terms are in this index.
			/// </param>
			/// <param name="enumerator">Use this to get the term values and TermDocs.
			/// </param>
			/// <param name="fieldname"> Comparables should be for this field.
			/// </param>
			/// <returns> Array of objects representing natural order of terms in field.
			/// </returns>
			/// <throws>  IOException If an error occurs reading the index. </throws>
			public static System.IComparable[] FillCache(IndexReader reader, TermEnum enumerator, System.String fieldname)
			{
				System.String field = String.Intern(fieldname);
				System.IComparable[] retArray = new System.IComparable[reader.MaxDoc()];
				if (retArray.Length > 0)
				{
					TermDocs termDocs = reader.TermDocs();
					try
					{
						if (enumerator.Term() == null)
						{
							throw new System.SystemException("no terms in field " + field);
						}
						do 
						{
							Term term = enumerator.Term();
							if (term.Field() != field)
								break;
							System.IComparable termval = GetComparable(term.Text());
							termDocs.Seek(enumerator);
							while (termDocs.Next())
							{
								retArray[termDocs.Doc()] = termval;
							}
						}
						while (enumerator.Next());
					}
					finally
					{
						termDocs.Close();
					}
				}
				return retArray;
			}
        /// <summary> Returns a DocIdSet with documents that should be
        /// permitted in search results.
        /// </summary>
        public override DocIdSet GetDocIdSet(IndexReader reader)
        {
            OpenBitSet bits = new OpenBitSet(reader.MaxDoc());

            TermEnum enumerator = (null != lowerTerm && collator == null ?
                                   reader.Terms(new Term(fieldName, lowerTerm)) :
                                   reader.Terms(new Term(fieldName)));

            try
            {
                if (enumerator.Term() == null)
                {
                    return(bits);
                }

                TermDocs termDocs = reader.TermDocs();
                try
                {
                    if (collator != null)
                    {
                        do
                        {
                            Term term = enumerator.Term();
                            if (term != null && term.Field().Equals(fieldName))
                            {
                                if ((lowerTerm == null ||
                                     (includeLower ? collator.Compare(term.Text(), lowerTerm) >= 0 : collator.Compare(term.Text(), lowerTerm) > 0)) &&
                                    (upperTerm == null ||
                                     (includeUpper ? collator.Compare(term.Text(), upperTerm) <= 0 : collator.Compare(term.Text(), upperTerm) < 0)))
                                {
                                    // term in range, lookup docs
                                    termDocs.Seek(enumerator.Term());
                                    while (termDocs.Next())
                                    {
                                        bits.Set(termDocs.Doc());
                                    }
                                }
                            }
                        }while (enumerator.Next());
                    }
                    else // null collator; using Unicode code point ordering
                    {
                        bool checkLower = false;
                        if (!includeLower) // make adjustments to set to exclusive
                        {
                            checkLower = true;
                        }
                        do
                        {
                            Term term = enumerator.Term();
                            if (term != null && term.Field().Equals(fieldName))
                            {
                                if (!checkLower || null == lowerTerm || String.CompareOrdinal(term.Text(), lowerTerm) > 0)
                                {
                                    checkLower = false;
                                    if (upperTerm != null)
                                    {
                                        int compare = String.CompareOrdinal(upperTerm, term.Text());

                                        /* if beyond the upper term, or is exclusive and
                                         * this is equal to the upper term, break out */
                                        if ((compare < 0) || (!includeUpper && compare == 0))
                                        {
                                            break;
                                        }
                                    }
                                    /* we have a good term, find the docs */

                                    termDocs.Seek(enumerator.Term());
                                    while (termDocs.Next())
                                    {
                                        bits.Set(termDocs.Doc());
                                    }
                                }
                            }
                            else
                            {
                                break;
                            }
                        }while (enumerator.Next());
                    }
                }
                finally
                {
                    termDocs.Close();
                }
            }
            finally
            {
                enumerator.Close();
            }

            return(bits);
        }
            protected internal override object CreateValue(IndexReader reader, object fieldKey)
            {
                System.String   field    = String.Intern(((System.String)fieldKey));
                int[]           retArray = new int[reader.MaxDoc()];
                System.String[] mterms   = new System.String[reader.MaxDoc() + 1];
                TermDocs        termDocs = reader.TermDocs();
                TermEnum        termEnum = reader.Terms(new Term(field));
                int             t        = 0; // current term number

                // an entry for documents that have no terms in this field
                // should a document with no terms be at top or bottom?
                // this puts them at the top - if it is changed, FieldDocSortedHitQueue
                // needs to change as well.
                mterms[t++] = null;

                try
                {
                    do
                    {
                        Term term = termEnum.Term();
                        if (term == null || (object)term.Field() != (object)field)
                        {
                            break;
                        }

                        // store term text
                        // we expect that there is at most one term per document
                        if (t >= mterms.Length)
                        {
                            throw new System.SystemException("there are more terms than " + "documents in field \"" + field + "\", but it's impossible to sort on " + "tokenized fields");
                        }
                        mterms[t] = term.Text();

                        termDocs.Seek(termEnum);
                        while (termDocs.Next())
                        {
                            retArray[termDocs.Doc()] = t;
                        }

                        t++;
                    }while (termEnum.Next());
                }
                finally
                {
                    termDocs.Close();
                    termEnum.Close();
                }

                if (t == 0)
                {
                    // if there are no terms, make the term array
                    // have a single null entry
                    mterms = new System.String[1];
                }
                else if (t < mterms.Length)
                {
                    // if there are less terms than documents,
                    // trim off the dead array space
                    System.String[] terms = new System.String[t];
                    Array.Copy(mterms, 0, terms, 0, t);
                    mterms = terms;
                }

                StringIndex value_Renamed = new StringIndex(retArray, mterms);

                return(value_Renamed);
            }
            public virtual void Generate(MultiTermQuery query, IndexReader reader, TermEnum enumerator)
			{
				int[] docs = new int[32];
				int[] freqs = new int[32];
				TermDocs termDocs = reader.TermDocs();
				try
				{
					int termCount = 0;
					do 
					{
						Term term = enumerator.Term();
						if (term == null)
							break;
						termCount++;
						termDocs.Seek(term);
						while (true)
						{
							int count = termDocs.Read(docs, freqs);
							if (count != 0)
							{
								for (int i = 0; i < count; i++)
								{
									HandleDoc(docs[i]);
								}
							}
							else
							{
								break;
							}
						}
					}
					while (enumerator.Next());
					
					query.IncTotalNumberOfTerms(termCount); // {{Aroush-2.9}} is the use of 'temp' as is right?
				}
				finally
				{
					termDocs.Close();
				}
			}
Example #6
0
        /// <summary> Returns a BitSet with true for documents which should be
        /// permitted in search results, and false for those that should
        /// not.
        /// </summary>
        public override System.Collections.BitArray Bits(IndexReader reader)
        {
            System.Collections.BitArray bits = new System.Collections.BitArray((reader.MaxDoc() % 64 == 0 ? reader.MaxDoc() / 64 : reader.MaxDoc() / 64 + 1) * 64);
            TermEnum enumerator = (null != lowerTerm ? reader.Terms(new Term(fieldName, lowerTerm)) : reader.Terms(new Term(fieldName, "")));

            try
            {
                if (enumerator.Term() == null)
                {
                    return(bits);
                }

                bool checkLower = false;
                if (!includeLower)
                {
                    // make adjustments to set to exclusive
                    checkLower = true;
                }

                TermDocs termDocs = reader.TermDocs();
                try
                {
                    do
                    {
                        Term term = enumerator.Term();
                        if (term != null && term.Field().Equals(fieldName))
                        {
                            if (!checkLower || null == lowerTerm || String.CompareOrdinal(term.Text(), lowerTerm) > 0)
                            {
                                checkLower = false;
                                if (upperTerm != null)
                                {
                                    int compare = String.CompareOrdinal(upperTerm, term.Text());

                                    /* if beyond the upper term, or is exclusive and
                                     * this is equal to the upper term, break out */
                                    if ((compare < 0) || (!includeUpper && compare == 0))
                                    {
                                        break;
                                    }
                                }
                                /* we have a good term, find the docs */

                                termDocs.Seek(enumerator.Term());
                                while (termDocs.Next())
                                {
                                    bits.Set(termDocs.Doc(), true);
                                }
                            }
                        }
                        else
                        {
                            break;
                        }
                    }while (enumerator.Next());
                }
                finally
                {
                    termDocs.Close();
                }
            }
            finally
            {
                enumerator.Close();
            }

            return(bits);
        }
Example #7
0
        public virtual void  TestPhrasePrefix()
        {
            RAMDirectory indexStore = new RAMDirectory();
            IndexWriter  writer     = new IndexWriter(indexStore, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            Add("blueberry pie", writer);
            Add("blueberry strudel", writer);
            Add("blueberry pizza", writer);
            Add("blueberry chewing gum", writer);
            Add("bluebird pizza", writer);
            Add("bluebird foobar pizza", writer);
            Add("piccadilly circus", writer);
            writer.Optimize();
            writer.Close();

            IndexSearcher searcher = new IndexSearcher(indexStore);

            // search for "blueberry pi*":
            MultiPhraseQuery query1 = new MultiPhraseQuery();
            // search for "strawberry pi*":
            MultiPhraseQuery query2 = new MultiPhraseQuery();

            query1.Add(new Term("body", "blueberry"));
            query2.Add(new Term("body", "strawberry"));

            System.Collections.ArrayList termsWithPrefix = new System.Collections.ArrayList();
            IndexReader ir = IndexReader.Open(indexStore);

            // this TermEnum gives "piccadilly", "pie" and "pizza".
            System.String prefix = "pi";
            TermEnum      te     = ir.Terms(new Term("body", prefix));

            do
            {
                if (te.Term().Text().StartsWith(prefix))
                {
                    termsWithPrefix.Add(te.Term());
                }
            }while (te.Next());

            query1.Add((Term[])termsWithPrefix.ToArray(typeof(Term)));
            Assert.AreEqual("body:\"blueberry (piccadilly pie pizza)\"", query1.ToString());
            query2.Add((Term[])termsWithPrefix.ToArray(typeof(Term)));
            Assert.AreEqual("body:\"strawberry (piccadilly pie pizza)\"", query2.ToString());

            ScoreDoc[] result;
            result = searcher.Search(query1, null, 1000).scoreDocs;
            Assert.AreEqual(2, result.Length);
            result = searcher.Search(query2, null, 1000).scoreDocs;
            Assert.AreEqual(0, result.Length);

            // search for "blue* pizza":
            MultiPhraseQuery query3 = new MultiPhraseQuery();

            termsWithPrefix.Clear();
            prefix = "blue";
            te     = ir.Terms(new Term("body", prefix));
            do
            {
                if (te.Term().Text().StartsWith(prefix))
                {
                    termsWithPrefix.Add(te.Term());
                }
            }while (te.Next());
            query3.Add((Term[])termsWithPrefix.ToArray(typeof(Term)));
            query3.Add(new Term("body", "pizza"));

            result = searcher.Search(query3, null, 1000).scoreDocs;
            Assert.AreEqual(2, result.Length);             // blueberry pizza, bluebird pizza
            Assert.AreEqual("body:\"(blueberry bluebird) pizza\"", query3.ToString());

            // test slop:
            query3.SetSlop(1);
            result = searcher.Search(query3, null, 1000).scoreDocs;
            Assert.AreEqual(3, result.Length);             // blueberry pizza, bluebird pizza, bluebird foobar pizza

            MultiPhraseQuery query4 = new MultiPhraseQuery();

            try
            {
                query4.Add(new Term("field1", "foo"));
                query4.Add(new Term("field2", "foobar"));
                Assert.Fail();
            }
            catch (System.ArgumentException e)
            {
                // okay, all terms must belong to the same field
            }

            searcher.Close();
            indexStore.Close();
        }
Example #8
0
            protected internal override System.Object CreateValue(IndexReader reader, Entry entryKey)
            {
                System.String   field    = StringHelper.Intern((System.String)entryKey.field);
                int[]           retArray = new int[reader.MaxDoc()];
                System.String[] mterms   = new System.String[reader.MaxDoc() + 1];
                TermDocs        termDocs = reader.TermDocs();
                TermEnum        termEnum = reader.Terms(new Term(field));
                int             t        = 0; // current term number

                // an entry for documents that have no terms in this field
                // should a document with no terms be at top or bottom?
                // this puts them at the top - if it is changed, FieldDocSortedHitQueue
                // needs to change as well.
                mterms[t++] = null;

                try
                {
                    do
                    {
                        Term term = termEnum.Term();
                        if (term == null || term.Field() != field || t >= mterms.Length)
                        {
                            break;
                        }

                        // store term text
                        mterms[t] = term.Text();

                        termDocs.Seek(termEnum);
                        while (termDocs.Next())
                        {
                            retArray[termDocs.Doc()] = t;
                        }

                        t++;
                    }while (termEnum.Next());
                }
                finally
                {
                    termDocs.Close();
                    termEnum.Close();
                }

                if (t == 0)
                {
                    // if there are no terms, make the term array
                    // have a single null entry
                    mterms = new System.String[1];
                }
                else if (t < mterms.Length)
                {
                    // if there are less terms than documents,
                    // trim off the dead array space
                    System.String[] terms = new System.String[t];
                    Array.Copy(mterms, 0, terms, 0, t);
                    mterms = terms;
                }

                StringIndex value_Renamed = new StringIndex(retArray, mterms);

                return(value_Renamed);
            }
Example #9
0
        public override Query Rewrite(IndexReader reader)
        {
            BooleanQuery query     = new BooleanQuery(true);
            string       testField = GetField();

            if (collator != null)
            {
                TermEnum enumerator    = reader.Terms(new Term(testField, ""));
                string   lowerTermText = lowerTerm != null?lowerTerm.Text() : null;

                string upperTermText = upperTerm != null?upperTerm.Text() : null;

                try
                {
                    do
                    {
                        Term term = enumerator.Term();
                        if (term != null && term.Field() == testField) // interned comparison
                        {
                            if ((lowerTermText == null ||
                                 (inclusive ? collator.Compare(term.Text(), lowerTermText) >= 0 : collator.Compare(term.Text(), lowerTermText) > 0))
                                &&
                                (upperTermText == null ||
                                 (inclusive ? collator.Compare(term.Text(), upperTermText) <= 0 : collator.Compare(term.Text(), upperTermText) < 0))
                                )
                            {
                                AddTermToQuery(term, query);
                            }
                        }
                    }while (enumerator.Next());
                }
                finally
                {
                    enumerator.Close();
                }
            }
            else
            {
                TermEnum enumerator = reader.Terms(lowerTerm);

                try
                {
                    bool checkLower = false;
                    if (!inclusive)
                    {
                        // make adjustments to set to exclusive
                        checkLower = true;
                    }

                    do
                    {
                        Term term = enumerator.Term();
                        if (term != null && term.Field() == testField)
                        {
                            // interned comparison
                            if (!checkLower || String.CompareOrdinal(term.Text(), lowerTerm.Text()) > 0)
                            {
                                checkLower = false;
                                if (upperTerm != null)
                                {
                                    int compare = String.CompareOrdinal(upperTerm.Text(), term.Text());

                                    /* if beyond the upper term, or is exclusive and
                                     * this is equal to the upper term, break out */
                                    if ((compare < 0) || (!inclusive && compare == 0))
                                    {
                                        break;
                                    }
                                }
                                AddTermToQuery(term, query); // Found a match
                            }
                        }
                        else
                        {
                            break;
                        }
                    }while (enumerator.Next());
                }
                finally
                {
                    enumerator.Close();
                }
            }
            return(query);
        }