Example #1
0
		/* Walk directory hierarchy in uid order, while keeping uid iterator from
		/* existing index in sync.  Mismatches indicate one of: (a) old documents to
		/* be deleted; (b) unchanged documents, to be left alone; or (c) new
		/* documents, to be indexed.
		*/

        private static void IndexDocs(System.IO.DirectoryInfo file, System.IO.DirectoryInfo index, bool create)
		{
			if (!create)
			{
				// incrementally update
				
				reader = IndexReader.Open(FSDirectory.Open(index), false); // open existing index
				uidIter = reader.Terms(new Term("uid", "")); // init uid iterator
				
				IndexDocs(file);
				
				if (deleting)
				{
					// delete rest of stale docs
					while (uidIter.Term() != null && (System.Object) uidIter.Term().Field == (System.Object) "uid")
					{
						System.Console.Out.WriteLine("deleting " + HTMLDocument.Uid2url(uidIter.Term().Text));
						reader.DeleteDocuments(uidIter.Term());
						uidIter.Next();
					}
					deleting = false;
				}
				
				uidIter.Close(); // close uid iterator
				reader.Close(); // close existing index
			}
			// don't have exisiting
			else
				IndexDocs(file);
		}
Example #2
0
        /// <summary> Returns a BitSet with true for documents which should be
        /// permitted in search results, and false for those that should
        /// not.
        /// </summary>
        public override System.Collections.BitArray Bits(IndexReader reader)
        {
            System.Collections.BitArray bits = new System.Collections.BitArray((reader.MaxDoc() % 64 == 0?reader.MaxDoc() / 64:reader.MaxDoc() / 64 + 1) * 64);
            TermEnum enumerator = (null != lowerTerm?reader.Terms(new Term(fieldName, lowerTerm)):reader.Terms(new Term(fieldName, "")));

            try
            {
                if (enumerator.Term() == null)
                {
                    return(bits);
                }

                bool checkLower = false;
                if (!includeLower)
                {
                    // make adjustments to set to exclusive
                    checkLower = true;
                }

                TermDocs termDocs = reader.TermDocs();
                try
                {
                    do
                    {
                        Term term = enumerator.Term();
                        if (term != null && term.Field().Equals(fieldName))
                        {
                            if (!checkLower || null == lowerTerm || String.CompareOrdinal(term.Text(), lowerTerm) > 0)
                            {
                                checkLower = false;
                                if (upperTerm != null)
                                {
                                    int compare = String.CompareOrdinal(upperTerm, term.Text());

                                    /* if beyond the upper term, or is exclusive and
                                     * this is equal to the upper term, break out */
                                    if ((compare < 0) || (!includeUpper && compare == 0))
                                    {
                                        break;
                                    }
                                }
                                /* we have a good term, find the docs */

                                termDocs.Seek(enumerator.Term());
                                while (termDocs.Next())
                                {
                                    bits.Set(termDocs.Doc(), true);
                                }
                            }
                        }
                        else
                        {
                            break;
                        }
                    }while (enumerator.Next());
                }
                finally
                {
                    termDocs.Close();
                }
            }
            finally
            {
                enumerator.Close();
            }

            return(bits);
        }
Example #3
0
            protected internal override System.Object CreateValue(IndexReader reader, Entry entryKey)
            {
                System.String   field    = StringHelper.Intern((System.String)entryKey.field);
                int[]           retArray = new int[reader.MaxDoc()];
                System.String[] mterms   = new System.String[reader.MaxDoc() + 1];
                TermDocs        termDocs = reader.TermDocs();
                TermEnum        termEnum = reader.Terms(new Term(field));
                int             t        = 0; // current term number

                // an entry for documents that have no terms in this field
                // should a document with no terms be at top or bottom?
                // this puts them at the top - if it is changed, FieldDocSortedHitQueue
                // needs to change as well.
                mterms[t++] = null;

                try
                {
                    do
                    {
                        Term term = termEnum.Term();
                        if (term == null || (System.Object)term.Field() != (System.Object)field)
                        {
                            break;
                        }

                        // store term text
                        // we expect that there is at most one term per document
                        if (t >= mterms.Length)
                        {
                            //throw new System.SystemException("there are more terms than " + "documents in field \"" + field + "\", but it's impossible to sort on " + "tokenized fields");
                            //LUCENENET-388
                            throw new System.IO.IOException("there are more terms than " + "documents in field \"" + field + "\", but it's impossible to sort on " + "tokenized fields");
                        }
                        mterms[t] = term.Text();

                        termDocs.Seek(termEnum);
                        while (termDocs.Next())
                        {
                            retArray[termDocs.Doc()] = t;
                        }

                        t++;
                    }while (termEnum.Next());
                }
                finally
                {
                    termDocs.Close();
                    termEnum.Close();
                }

                if (t == 0)
                {
                    // if there are no terms, make the term array
                    // have a single null entry
                    mterms = new System.String[1];
                }
                else if (t < mterms.Length)
                {
                    // if there are less terms than documents,
                    // trim off the dead array space
                    System.String[] terms = new System.String[t];
                    Array.Copy(mterms, 0, terms, 0, t);
                    mterms = terms;
                }

                StringIndex value_Renamed = new StringIndex(retArray, mterms);

                return(value_Renamed);
            }
Example #4
0
        // inherit javadocs
        public virtual StringIndex GetStringIndex(IndexReader reader, System.String field)
        {
            field = String.Intern(field);
            System.Object ret = Lookup(reader, field, Lucene.Net.Search.FieldCache_Fields.STRING_INDEX, null);
            if (ret == null)
            {
                int[]           retArray = new int[reader.MaxDoc()];
                System.String[] mterms   = new System.String[reader.MaxDoc() + 1];
                TermDocs        termDocs = reader.TermDocs();
                TermEnum        termEnum = reader.Terms(new Term(field, ""));
                int             t        = 0; // current term number

                // an entry for documents that have no terms in this field
                // should a document with no terms be at top or bottom?
                // this puts them at the top - if it is changed, FieldDocSortedHitQueue
                // needs to change as well.
                mterms[t++] = null;

                try
                {
                    do
                    {
                        Term term = termEnum.Term();
                        if (term == null || term.Field() != field)
                        {
                            break;
                        }

                        // store term text
                        // we expect that there is at most one term per document
                        if (t >= mterms.Length)
                        {
                            throw new System.SystemException("there are more terms than " + "documents in field \"" + field + "\", but it's impossible to sort on " + "tokenized fields");
                        }
                        mterms[t] = term.Text();

                        termDocs.Seek(termEnum);
                        while (termDocs.Next())
                        {
                            retArray[termDocs.Doc()] = t;
                        }

                        t++;
                    }while (termEnum.Next());
                }
                finally
                {
                    termDocs.Close();
                    termEnum.Close();
                }

                if (t == 0)
                {
                    // if there are no terms, make the term array
                    // have a single null entry
                    mterms = new System.String[1];
                }
                else if (t < mterms.Length)
                {
                    // if there are less terms than documents,
                    // trim off the dead array space
                    System.String[] terms = new System.String[t];
                    Array.Copy(mterms, 0, terms, 0, t);
                    mterms = terms;
                }

                StringIndex value_Renamed = new StringIndex(retArray, mterms);
                Store(reader, field, Lucene.Net.Search.FieldCache_Fields.STRING_INDEX, null, value_Renamed);
                return(value_Renamed);
            }
            return((StringIndex)ret);
        }
            protected internal override object CreateValue(IndexReader reader, Entry entryKey)
            {
                string field = StringHelper.Intern(entryKey.field);

                int[]    retArray = new int[reader.MaxDoc];
                string[] mterms   = new string[reader.MaxDoc + 1];
                TermDocs termDocs = reader.TermDocs();
                TermEnum termEnum = reader.Terms(new Term(field));
                int      t        = 0; // current term number

                // an entry for documents that have no terms in this field
                // should a document with no terms be at top or bottom?
                // this puts them at the top - if it is changed, FieldDocSortedHitQueue
                // needs to change as well.
                mterms[t++] = null;

                try
                {
                    do
                    {
                        Term term = termEnum.Term;
                        if (term == null || term.Field != field || t >= mterms.Length)
                        {
                            break;
                        }

                        // store term text
                        mterms[t] = term.Text;

                        termDocs.Seek(termEnum);
                        while (termDocs.Next())
                        {
                            retArray[termDocs.Doc] = t;
                        }

                        t++;
                    }while (termEnum.Next());
                }
                finally
                {
                    termDocs.Close();
                    termEnum.Close();
                }

                if (t == 0)
                {
                    // if there are no terms, make the term array
                    // have a single null entry
                    mterms = new string[1];
                }
                else if (t < mterms.Length)
                {
                    // if there are less terms than documents,
                    // trim off the dead array space
                    string[] terms = new string[t];
                    Array.Copy(mterms, 0, terms, 0, t);
                    mterms = terms;
                }

                StringIndex value_Renamed = new StringIndex(retArray, mterms);

                return(value_Renamed);
            }
Example #6
0
            protected internal override System.Object CreateValue(IndexReader reader, System.Object fieldKey)
            {
                System.String field      = String.Intern(((System.String)fieldKey));
                TermEnum      enumerator = reader.Terms(new Term(field, ""));

                try
                {
                    Term term = enumerator.Term();
                    if (term == null)
                    {
                        throw new System.SystemException("no terms in field " + field + " - cannot determine sort type");
                    }
                    System.Object ret = null;
                    if ((System.Object)term.Field() == (System.Object)field)
                    {
                        System.String termtext = term.Text().Trim();

                        /**
                         * Java 1.4 level code:
                         *
                         * if (pIntegers.matcher(termtext).matches())
                         * return IntegerSortedHitQueue.comparator (reader, enumerator, field);
                         *
                         * else if (pFloats.matcher(termtext).matches())
                         * return FloatSortedHitQueue.comparator (reader, enumerator, field);
                         */

                        // Java 1.3 level code:
                        try
                        {
                            int  parsedIntValue;
                            long parsedLongValue;
                            if (int.TryParse(termtext, out parsedIntValue))
                            {
                                ret = Enclosing_Instance.GetInts(reader, field);
                            }
                            else if (long.TryParse(termtext, out parsedLongValue))
                            {
                                ret = ((ExtendedFieldCacheImpl)Enclosing_Instance).GetLongs(reader, field);
                            }
                            else
                            {
                                try
                                {
                                    SupportClass.Single.Parse(termtext);
                                    ret = Enclosing_Instance.GetFloats(reader, field);
                                }
                                catch (System.FormatException)
                                {
                                    ret = Enclosing_Instance.GetStringIndex(reader, field);
                                }
                            }
                        }
                        catch (System.Exception)
                        {
                            ret = Enclosing_Instance.GetStringIndex(reader, field);
                        }
                    }
                    else
                    {
                        throw new System.SystemException("field \"" + field + "\" does not appear to be indexed");
                    }
                    return(ret);
                }
                finally
                {
                    enumerator.Close();
                }
            }
 /// <summary>Closes the enumeration to further activity, freeing resources.  </summary>
 public override void  Close()
 {
     actualEnum.Close();
     currentTerm = null;
     actualEnum  = null;
 }
Example #8
0
        public override Query Rewrite(IndexReader reader)
        {
            BooleanQuery query     = new BooleanQuery(true);
            string       testField = GetField();

            if (collator != null)
            {
                TermEnum enumerator    = reader.Terms(new Term(testField, ""));
                string   lowerTermText = lowerTerm != null?lowerTerm.Text() : null;

                string upperTermText = upperTerm != null?upperTerm.Text() : null;

                try
                {
                    do
                    {
                        Term term = enumerator.Term();
                        if (term != null && term.Field() == testField) // interned comparison
                        {
                            if ((lowerTermText == null ||
                                 (inclusive ? collator.Compare(term.Text(), lowerTermText) >= 0 : collator.Compare(term.Text(), lowerTermText) > 0))
                                &&
                                (upperTermText == null ||
                                 (inclusive ? collator.Compare(term.Text(), upperTermText) <= 0 : collator.Compare(term.Text(), upperTermText) < 0))
                                )
                            {
                                AddTermToQuery(term, query);
                            }
                        }
                    }while (enumerator.Next());
                }
                finally
                {
                    enumerator.Close();
                }
            }
            else
            {
                TermEnum enumerator = reader.Terms(lowerTerm);

                try
                {
                    bool checkLower = false;
                    if (!inclusive)
                    {
                        // make adjustments to set to exclusive
                        checkLower = true;
                    }

                    do
                    {
                        Term term = enumerator.Term();
                        if (term != null && term.Field() == testField)
                        {
                            // interned comparison
                            if (!checkLower || String.CompareOrdinal(term.Text(), lowerTerm.Text()) > 0)
                            {
                                checkLower = false;
                                if (upperTerm != null)
                                {
                                    int compare = String.CompareOrdinal(upperTerm.Text(), term.Text());

                                    /* if beyond the upper term, or is exclusive and
                                     * this is equal to the upper term, break out */
                                    if ((compare < 0) || (!inclusive && compare == 0))
                                    {
                                        break;
                                    }
                                }
                                AddTermToQuery(term, query); // Found a match
                            }
                        }
                        else
                        {
                            break;
                        }
                    }while (enumerator.Next());
                }
                finally
                {
                    enumerator.Close();
                }
            }
            return(query);
        }