/* Walk directory hierarchy in uid order, while keeping uid iterator from /* existing index in sync. Mismatches indicate one of: (a) old documents to /* be deleted; (b) unchanged documents, to be left alone; or (c) new /* documents, to be indexed. */ private static void IndexDocs(System.IO.DirectoryInfo file, System.IO.DirectoryInfo index, bool create) { if (!create) { // incrementally update reader = IndexReader.Open(FSDirectory.Open(index), false); // open existing index uidIter = reader.Terms(new Term("uid", "")); // init uid iterator IndexDocs(file); if (deleting) { // delete rest of stale docs while (uidIter.Term() != null && (System.Object) uidIter.Term().Field == (System.Object) "uid") { System.Console.Out.WriteLine("deleting " + HTMLDocument.Uid2url(uidIter.Term().Text)); reader.DeleteDocuments(uidIter.Term()); uidIter.Next(); } deleting = false; } uidIter.Close(); // close uid iterator reader.Close(); // close existing index } // don't have exisiting else IndexDocs(file); }
/// <summary> Returns an array of objects which represent that natural order /// of the term values in the given field. /// /// </summary> /// <param name="reader"> Terms are in this index. /// </param> /// <param name="enumerator">Use this to get the term values and TermDocs. /// </param> /// <param name="fieldname"> Comparables should be for this field. /// </param> /// <returns> Array of objects representing natural order of terms in field. /// </returns> /// <throws> IOException If an error occurs reading the index. </throws> public static System.IComparable[] FillCache(IndexReader reader, TermEnum enumerator, System.String fieldname) { System.String field = String.Intern(fieldname); System.IComparable[] retArray = new System.IComparable[reader.MaxDoc()]; if (retArray.Length > 0) { TermDocs termDocs = reader.TermDocs(); try { if (enumerator.Term() == null) { throw new System.SystemException("no terms in field " + field); } do { Term term = enumerator.Term(); if (term.Field() != field) break; System.IComparable termval = GetComparable(term.Text()); termDocs.Seek(enumerator); while (termDocs.Next()) { retArray[termDocs.Doc()] = termval; } } while (enumerator.Next()); } finally { termDocs.Close(); } } return retArray; }
/// <summary> Returns a DocIdSet with documents that should be /// permitted in search results. /// </summary> public override DocIdSet GetDocIdSet(IndexReader reader) { OpenBitSet bits = new OpenBitSet(reader.MaxDoc()); TermEnum enumerator = (null != lowerTerm && collator == null ? reader.Terms(new Term(fieldName, lowerTerm)) : reader.Terms(new Term(fieldName))); try { if (enumerator.Term() == null) { return(bits); } TermDocs termDocs = reader.TermDocs(); try { if (collator != null) { do { Term term = enumerator.Term(); if (term != null && term.Field().Equals(fieldName)) { if ((lowerTerm == null || (includeLower ? collator.Compare(term.Text(), lowerTerm) >= 0 : collator.Compare(term.Text(), lowerTerm) > 0)) && (upperTerm == null || (includeUpper ? collator.Compare(term.Text(), upperTerm) <= 0 : collator.Compare(term.Text(), upperTerm) < 0))) { // term in range, lookup docs termDocs.Seek(enumerator.Term()); while (termDocs.Next()) { bits.Set(termDocs.Doc()); } } } }while (enumerator.Next()); } else // null collator; using Unicode code point ordering { bool checkLower = false; if (!includeLower) // make adjustments to set to exclusive { checkLower = true; } do { Term term = enumerator.Term(); if (term != null && term.Field().Equals(fieldName)) { if (!checkLower || null == lowerTerm || String.CompareOrdinal(term.Text(), lowerTerm) > 0) { checkLower = false; if (upperTerm != null) { int compare = String.CompareOrdinal(upperTerm, term.Text()); /* if beyond the upper term, or is exclusive and * this is equal to the upper term, break out */ if ((compare < 0) || (!includeUpper && compare == 0)) { break; } } /* we have a good term, find the docs */ termDocs.Seek(enumerator.Term()); while (termDocs.Next()) { bits.Set(termDocs.Doc()); } } } else { break; } }while (enumerator.Next()); } } finally { termDocs.Close(); } } finally { enumerator.Close(); } return(bits); }
protected internal override object CreateValue(IndexReader reader, object fieldKey) { System.String field = String.Intern(((System.String)fieldKey)); int[] retArray = new int[reader.MaxDoc()]; System.String[] mterms = new System.String[reader.MaxDoc() + 1]; TermDocs termDocs = reader.TermDocs(); TermEnum termEnum = reader.Terms(new Term(field)); int t = 0; // current term number // an entry for documents that have no terms in this field // should a document with no terms be at top or bottom? // this puts them at the top - if it is changed, FieldDocSortedHitQueue // needs to change as well. mterms[t++] = null; try { do { Term term = termEnum.Term(); if (term == null || (object)term.Field() != (object)field) { break; } // store term text // we expect that there is at most one term per document if (t >= mterms.Length) { throw new System.SystemException("there are more terms than " + "documents in field \"" + field + "\", but it's impossible to sort on " + "tokenized fields"); } mterms[t] = term.Text(); termDocs.Seek(termEnum); while (termDocs.Next()) { retArray[termDocs.Doc()] = t; } t++; }while (termEnum.Next()); } finally { termDocs.Close(); termEnum.Close(); } if (t == 0) { // if there are no terms, make the term array // have a single null entry mterms = new System.String[1]; } else if (t < mterms.Length) { // if there are less terms than documents, // trim off the dead array space System.String[] terms = new System.String[t]; Array.Copy(mterms, 0, terms, 0, t); mterms = terms; } StringIndex value_Renamed = new StringIndex(retArray, mterms); return(value_Renamed); }
public virtual void Generate(MultiTermQuery query, IndexReader reader, TermEnum enumerator) { int[] docs = new int[32]; int[] freqs = new int[32]; TermDocs termDocs = reader.TermDocs(); try { int termCount = 0; do { Term term = enumerator.Term(); if (term == null) break; termCount++; termDocs.Seek(term); while (true) { int count = termDocs.Read(docs, freqs); if (count != 0) { for (int i = 0; i < count; i++) { HandleDoc(docs[i]); } } else { break; } } } while (enumerator.Next()); query.IncTotalNumberOfTerms(termCount); // {{Aroush-2.9}} is the use of 'temp' as is right? } finally { termDocs.Close(); } }
/// <summary> Returns a BitSet with true for documents which should be /// permitted in search results, and false for those that should /// not. /// </summary> public override System.Collections.BitArray Bits(IndexReader reader) { System.Collections.BitArray bits = new System.Collections.BitArray((reader.MaxDoc() % 64 == 0 ? reader.MaxDoc() / 64 : reader.MaxDoc() / 64 + 1) * 64); TermEnum enumerator = (null != lowerTerm ? reader.Terms(new Term(fieldName, lowerTerm)) : reader.Terms(new Term(fieldName, ""))); try { if (enumerator.Term() == null) { return(bits); } bool checkLower = false; if (!includeLower) { // make adjustments to set to exclusive checkLower = true; } TermDocs termDocs = reader.TermDocs(); try { do { Term term = enumerator.Term(); if (term != null && term.Field().Equals(fieldName)) { if (!checkLower || null == lowerTerm || String.CompareOrdinal(term.Text(), lowerTerm) > 0) { checkLower = false; if (upperTerm != null) { int compare = String.CompareOrdinal(upperTerm, term.Text()); /* if beyond the upper term, or is exclusive and * this is equal to the upper term, break out */ if ((compare < 0) || (!includeUpper && compare == 0)) { break; } } /* we have a good term, find the docs */ termDocs.Seek(enumerator.Term()); while (termDocs.Next()) { bits.Set(termDocs.Doc(), true); } } } else { break; } }while (enumerator.Next()); } finally { termDocs.Close(); } } finally { enumerator.Close(); } return(bits); }
public virtual void TestPhrasePrefix() { RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Add("blueberry pie", writer); Add("blueberry strudel", writer); Add("blueberry pizza", writer); Add("blueberry chewing gum", writer); Add("bluebird pizza", writer); Add("bluebird foobar pizza", writer); Add("piccadilly circus", writer); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(indexStore); // search for "blueberry pi*": MultiPhraseQuery query1 = new MultiPhraseQuery(); // search for "strawberry pi*": MultiPhraseQuery query2 = new MultiPhraseQuery(); query1.Add(new Term("body", "blueberry")); query2.Add(new Term("body", "strawberry")); System.Collections.ArrayList termsWithPrefix = new System.Collections.ArrayList(); IndexReader ir = IndexReader.Open(indexStore); // this TermEnum gives "piccadilly", "pie" and "pizza". System.String prefix = "pi"; TermEnum te = ir.Terms(new Term("body", prefix)); do { if (te.Term().Text().StartsWith(prefix)) { termsWithPrefix.Add(te.Term()); } }while (te.Next()); query1.Add((Term[])termsWithPrefix.ToArray(typeof(Term))); Assert.AreEqual("body:\"blueberry (piccadilly pie pizza)\"", query1.ToString()); query2.Add((Term[])termsWithPrefix.ToArray(typeof(Term))); Assert.AreEqual("body:\"strawberry (piccadilly pie pizza)\"", query2.ToString()); ScoreDoc[] result; result = searcher.Search(query1, null, 1000).scoreDocs; Assert.AreEqual(2, result.Length); result = searcher.Search(query2, null, 1000).scoreDocs; Assert.AreEqual(0, result.Length); // search for "blue* pizza": MultiPhraseQuery query3 = new MultiPhraseQuery(); termsWithPrefix.Clear(); prefix = "blue"; te = ir.Terms(new Term("body", prefix)); do { if (te.Term().Text().StartsWith(prefix)) { termsWithPrefix.Add(te.Term()); } }while (te.Next()); query3.Add((Term[])termsWithPrefix.ToArray(typeof(Term))); query3.Add(new Term("body", "pizza")); result = searcher.Search(query3, null, 1000).scoreDocs; Assert.AreEqual(2, result.Length); // blueberry pizza, bluebird pizza Assert.AreEqual("body:\"(blueberry bluebird) pizza\"", query3.ToString()); // test slop: query3.SetSlop(1); result = searcher.Search(query3, null, 1000).scoreDocs; Assert.AreEqual(3, result.Length); // blueberry pizza, bluebird pizza, bluebird foobar pizza MultiPhraseQuery query4 = new MultiPhraseQuery(); try { query4.Add(new Term("field1", "foo")); query4.Add(new Term("field2", "foobar")); Assert.Fail(); } catch (System.ArgumentException e) { // okay, all terms must belong to the same field } searcher.Close(); indexStore.Close(); }
protected internal override System.Object CreateValue(IndexReader reader, Entry entryKey) { System.String field = StringHelper.Intern((System.String)entryKey.field); int[] retArray = new int[reader.MaxDoc()]; System.String[] mterms = new System.String[reader.MaxDoc() + 1]; TermDocs termDocs = reader.TermDocs(); TermEnum termEnum = reader.Terms(new Term(field)); int t = 0; // current term number // an entry for documents that have no terms in this field // should a document with no terms be at top or bottom? // this puts them at the top - if it is changed, FieldDocSortedHitQueue // needs to change as well. mterms[t++] = null; try { do { Term term = termEnum.Term(); if (term == null || term.Field() != field || t >= mterms.Length) { break; } // store term text mterms[t] = term.Text(); termDocs.Seek(termEnum); while (termDocs.Next()) { retArray[termDocs.Doc()] = t; } t++; }while (termEnum.Next()); } finally { termDocs.Close(); termEnum.Close(); } if (t == 0) { // if there are no terms, make the term array // have a single null entry mterms = new System.String[1]; } else if (t < mterms.Length) { // if there are less terms than documents, // trim off the dead array space System.String[] terms = new System.String[t]; Array.Copy(mterms, 0, terms, 0, t); mterms = terms; } StringIndex value_Renamed = new StringIndex(retArray, mterms); return(value_Renamed); }
public override Query Rewrite(IndexReader reader) { BooleanQuery query = new BooleanQuery(true); string testField = GetField(); if (collator != null) { TermEnum enumerator = reader.Terms(new Term(testField, "")); string lowerTermText = lowerTerm != null?lowerTerm.Text() : null; string upperTermText = upperTerm != null?upperTerm.Text() : null; try { do { Term term = enumerator.Term(); if (term != null && term.Field() == testField) // interned comparison { if ((lowerTermText == null || (inclusive ? collator.Compare(term.Text(), lowerTermText) >= 0 : collator.Compare(term.Text(), lowerTermText) > 0)) && (upperTermText == null || (inclusive ? collator.Compare(term.Text(), upperTermText) <= 0 : collator.Compare(term.Text(), upperTermText) < 0)) ) { AddTermToQuery(term, query); } } }while (enumerator.Next()); } finally { enumerator.Close(); } } else { TermEnum enumerator = reader.Terms(lowerTerm); try { bool checkLower = false; if (!inclusive) { // make adjustments to set to exclusive checkLower = true; } do { Term term = enumerator.Term(); if (term != null && term.Field() == testField) { // interned comparison if (!checkLower || String.CompareOrdinal(term.Text(), lowerTerm.Text()) > 0) { checkLower = false; if (upperTerm != null) { int compare = String.CompareOrdinal(upperTerm.Text(), term.Text()); /* if beyond the upper term, or is exclusive and * this is equal to the upper term, break out */ if ((compare < 0) || (!inclusive && compare == 0)) { break; } } AddTermToQuery(term, query); // Found a match } } else { break; } }while (enumerator.Next()); } finally { enumerator.Close(); } } return(query); }