// firstDocID is ignored since nextDoc() sets 'doc' public /*protected internal*/ override bool Score(Collector c, int end, int firstDocID) { c.SetScorer(this); while (doc < end) { // for docs in window c.Collect(doc); // collect score if (++pointer >= pointerMax) { pointerMax = termDocs.Read(docs, freqs); // refill buffers if (pointerMax != 0) { pointer = 0; } else { termDocs.Close(); // close stream doc = System.Int32.MaxValue; // set to sentinel value return(false); } } doc = docs[pointer]; } return(true); }
private void AssertTermDocsCount(System.String msg, IndexReader reader, Term term, int expected) { TermDocs tdocs = null; try { tdocs = reader.TermDocs(term); Assert.IsNotNull(tdocs, msg + ", null TermDocs"); int count = 0; while (tdocs.Next()) { count++; } Assert.AreEqual(expected, count, msg + ", count mismatch"); } finally { if (tdocs != null) { try { tdocs.Close(); } catch (System.Exception e) { } } } }
protected virtual void Dispose(bool disposing) { if (disposing) { in_Renamed.Close(); } }
public virtual void Close() { if (termDocs != null) { termDocs.Close(); } }
protected internal override System.Object CreateValue(IndexReader reader, Entry entryKey) { System.String field = StringHelper.Intern(entryKey.field); System.String[] retArray = new System.String[reader.MaxDoc]; TermDocs termDocs = reader.TermDocs(); TermEnum termEnum = reader.Terms(new Term(field)); try { do { Term term = termEnum.Term; if (term == null || (System.Object)term.Field != (System.Object)field) { break; } System.String termval = term.Text; termDocs.Seek(termEnum); while (termDocs.Next()) { retArray[termDocs.Doc] = termval; } }while (termEnum.Next()); } finally { termDocs.Close(); termEnum.Close(); } return(retArray); }
/// <summary>Deletes all documents that have a given <code>term</code> indexed. /// This is useful if one uses a document field to hold a unique ID string for /// the document. Then to delete such a document, one merely constructs a /// term with the appropriate field and the unique ID string as its text and /// passes it to this method. /// See {@link #DeleteDocument(int)} for information about when this deletion will /// become effective. /// /// </summary> /// <returns> the number of documents deleted /// </returns> /// <throws> StaleReaderException if the index has changed </throws> /// <summary> since this reader was opened /// </summary> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> LockObtainFailedException if another writer </throws> /// <summary> has this index open (<code>write.lock</code> could not /// be obtained) /// </summary> /// <throws> IOException if there is a low-level IO error </throws> public int DeleteDocuments(Term term) { EnsureOpen(); TermDocs docs = TermDocs(term); if (docs == null) { return(0); } int n = 0; try { while (docs.Next()) { DeleteDocument(docs.Doc()); n++; } } finally { docs.Close(); } return(n); }
public override DocIdSet GetDocIdSet(IndexReader reader) { TermEnum enumerator = query.GetEnum(reader); try { // if current term in enum is null, the enum is empty -> shortcut if (enumerator.Term == null) { return(DocIdSet.EMPTY_DOCIDSET); } // else fill into an OpenBitSet OpenBitSet bitSet = new OpenBitSet(reader.MaxDoc); int[] docs = new int[32]; int[] freqs = new int[32]; TermDocs termDocs = reader.TermDocs(); try { int termCount = 0; do { Term term = enumerator.Term; if (term == null) { break; } termCount++; termDocs.Seek(term); while (true) { int count = termDocs.Read(docs, freqs); if (count != 0) { for (int i = 0; i < count; i++) { bitSet.Set(docs[i]); } } else { break; } } } while (enumerator.Next()); query.IncTotalNumberOfTerms(termCount); // {{Aroush-2.9}} is the use of 'temp' as is right? } finally { termDocs.Close(); } return(bitSet); } finally { enumerator.Close(); } }
protected internal override System.Object CreateValue(IndexReader reader, Entry entryKey) { Entry entry = entryKey; System.String field = entry.field; FloatParser parser = (FloatParser)entry.custom; if (parser == null) { try { return(wrapper.GetFloats(reader, field, Lucene.Net.Search.FieldCache_Fields.DEFAULT_FLOAT_PARSER)); } catch (System.FormatException) { return(wrapper.GetFloats(reader, field, Lucene.Net.Search.FieldCache_Fields.NUMERIC_UTILS_FLOAT_PARSER)); } } float[] retArray = null; TermDocs termDocs = reader.TermDocs(); TermEnum termEnum = reader.Terms(new Term(field)); try { do { Term term = termEnum.Term; if (term == null || (System.Object)term.Field != (System.Object)field) { break; } float termval = parser.ParseFloat(term.Text); if (retArray == null) { // late init retArray = new float[reader.MaxDoc]; } termDocs.Seek(termEnum); while (termDocs.Next()) { retArray[termDocs.Doc] = termval; } }while (termEnum.Next()); } catch (StopFillCacheException) { } finally { termDocs.Close(); termEnum.Close(); } if (retArray == null) { // no values retArray = new float[reader.MaxDoc]; } return(retArray); }
public virtual void TestFilterIndexReader_Renamed() { RAMDirectory directory = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document d1 = new Document(); d1.Add(new Field("default", "one two", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(d1); Document d2 = new Document(); d2.Add(new Field("default", "one three", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(d2); Document d3 = new Document(); d3.Add(new Field("default", "two four", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(d3); writer.Close(); IndexReader reader = new TestReader(IndexReader.Open(directory)); Assert.IsTrue(reader.IsOptimized()); TermEnum terms = reader.Terms(); while (terms.Next()) { Assert.IsTrue(terms.Term().Text().IndexOf('e') != -1); } terms.Close(); TermPositions positions = reader.TermPositions(new Term("default", "one")); while (positions.Next()) { Assert.IsTrue((positions.Doc() % 2) == 1); } int NUM_DOCS = 3; TermDocs td = reader.TermDocs(null); for (int i = 0; i < NUM_DOCS; i++) { Assert.IsTrue(td.Next()); Assert.AreEqual(i, td.Doc()); Assert.AreEqual(1, td.Freq()); } td.Close(); reader.Close(); directory.Close(); }
public static int Count(Term t, IndexReader r) { int count = 0; TermDocs td = r.TermDocs(t, null); while (td.Next(null)) { var d = td.Doc; count++; } td.Close(); return(count); }
public static int Count(Term t, IndexReader r) { int count = 0; TermDocs td = r.TermDocs(t); while (td.Next()) { td.Doc(); count++; } td.Close(); return(count); }
public virtual void TestMultiTermDocs() { SqlServerDirectory.ProvisionDatabase(Connection, "test1", true); SqlServerDirectory.ProvisionDatabase(Connection, "test2", true); SqlServerDirectory.ProvisionDatabase(Connection, "test3", true); var ramDir1 = new SqlServerDirectory(Connection, new Options() { SchemaName = "test1" }); AddDoc(ramDir1, "test foo", true); var ramDir2 = new SqlServerDirectory(Connection, new Options() { SchemaName = "test2" }); AddDoc(ramDir2, "test blah", true); var ramDir3 = new SqlServerDirectory(Connection, new Options() { SchemaName = "test3" }); AddDoc(ramDir3, "test wow", true); IndexReader[] readers1 = new[] { IndexReader.Open(ramDir1, false), IndexReader.Open(ramDir3, false) }; IndexReader[] readers2 = new[] { IndexReader.Open(ramDir1, false), IndexReader.Open(ramDir2, false), IndexReader.Open(ramDir3, false) }; MultiReader mr2 = new MultiReader(readers1); MultiReader mr3 = new MultiReader(readers2); // test mixing up TermDocs and TermEnums from different readers. TermDocs td2 = mr2.TermDocs(); TermEnum te3 = mr3.Terms(new Term("body", "wow")); td2.Seek(te3); int ret = 0; // This should blow up if we forget to check that the TermEnum is from the same // reader as the TermDocs. while (td2.Next()) { ret += td2.Doc; } td2.Close(); te3.Close(); // really a dummy assert to ensure that we got some docs and to ensure that // nothing is optimized out. Assert.IsTrue(ret > 0); }
/// <summary> /// Deletes a number of documents that conform to the specified Term-s /// </summary> /// <param name="terms">Term-s to be deleted</param> /// <returns>A number of documents deleted</returns> public int OptimizedDeleteDocuments(Term[] terms) { int n = 0; lock (this) { if (directoryOwner) { AquireWriteLock(); } foreach (Term term in terms) { TermDocs docs = TermDocs(term); if (docs == null) { continue; } try { while (docs.Next()) { DoDelete(docs.Doc()); hasChanges = true; n++; } } finally { docs.Close(); } } // Release the lock ASAP if there are no changes if (!hasChanges && writeLock != null) { writeLock.Release(); writeLock = null; } } return(n); }
private void Remove(System.Type entity, object id, IDirectoryProvider provider) { /* * even with Lucene 2.1, use of indexWriter to delete is not an option * We can only delete by term, and the index doesn't have a termt that * uniquely identify the entry. See logic below */ log.DebugFormat("remove from Lucene index: {0}#{1}", entity, id); DocumentBuilder builder = workspace.GetDocumentBuilder(entity); Term term = builder.GetTerm(id); IndexReader reader = workspace.GetIndexReader(provider, entity); TermDocs termDocs = null; try { // TODO is there a faster way? // TODO include TermDocs into the workspace? termDocs = reader.TermDocs(term); string entityName = TypeHelper.LuceneTypeName(entity); while (termDocs.Next()) { int docIndex = termDocs.Doc; if (entityName.Equals(reader.Document(docIndex).Get(DocumentBuilder.CLASS_FIELDNAME))) { // remove only the one of the right class // loop all to remove all the matches (defensive code) reader.DeleteDocument(docIndex); } } } catch (Exception e) { throw new SearchException("Unable to remove from Lucene index: " + entity + "#" + id, e); } finally { if (termDocs != null) { try { termDocs.Close(); } catch (IOException e) { log.Warn("Unable to close termDocs properly", e); } } } }
public virtual void TestAllTermDocs() { IndexReader reader = OpenReader(); int NUM_DOCS = 2; TermDocs td = reader.TermDocs(null); for (int i = 0; i < NUM_DOCS; i++) { Assert.IsTrue(td.Next()); Assert.AreEqual(i, td.Doc()); Assert.AreEqual(1, td.Freq()); } td.Close(); reader.Close(); }
protected virtual void Dispose(bool disposing) { if (isDisposed) { return; } if (disposing) { if (termDocs != null) { termDocs.Close(); } } isDisposed = true; }
/// <summary> /// 得到指定Term的文档 /// </summary> /// <param name="term"></param> /// <returns></returns> public IList <TermDoc> DocumentCount(Term term) { TermDocs docs = open.Reader.TermDocs(term); List <TermDoc> list = new List <TermDoc>(); while (docs.Next()) { TermDoc doc2 = new TermDoc(); doc2.Freq = docs.Freq(); doc2.Doc = docs.Doc(); doc2.Term = term; doc2.Norm = GetNorm(open.Reader, term.Field(), doc2.Doc); TermDoc item = doc2; list.Add(item); } docs.Close(); return(list); }
protected internal override System.Object CreateValue(IndexReader reader, Entry entryKey) { Entry entry = entryKey; System.String field = entry.field; ShortParser parser = (ShortParser)entry.custom; if (parser == null) { return(wrapper.GetShorts(reader, field, Lucene.Net.Search.FieldCache_Fields.DEFAULT_SHORT_PARSER)); } short[] retArray = new short[reader.MaxDoc]; TermDocs termDocs = reader.TermDocs(); TermEnum termEnum = reader.Terms(new Term(field)); try { do { Term term = termEnum.Term; if (term == null || (System.Object)term.Field != (System.Object)field) { break; } short termval = parser.ParseShort(term.Text); termDocs.Seek(termEnum); while (termDocs.Next()) { retArray[termDocs.Doc] = termval; } }while (termEnum.Next()); } catch (StopFillCacheException) { } finally { termDocs.Close(); termEnum.Close(); } return(retArray); }
public virtual void TestAllTermDocs() { Directory dir1 = GetDir1(); Directory dir2 = GetDir2(); ParallelReader pr = new ParallelReader(); pr.Add(IndexReader.Open(dir1)); pr.Add(IndexReader.Open(dir2)); int NUM_DOCS = 2; TermDocs td = pr.TermDocs(null); for (int i = 0; i < NUM_DOCS; i++) { Assert.IsTrue(td.Next()); Assert.AreEqual(i, td.Doc()); Assert.AreEqual(1, td.Freq()); } td.Close(); pr.Close(); dir1.Close(); dir2.Close(); }
/// <summary> /// Get the DocIdSet. /// </summary> /// <param name="reader">Applcible reader.</param> /// <returns>The set.</returns> public override DocIdSet GetDocIdSet(IndexReader reader) { OpenBitSet result = new OpenBitSet(reader.MaxDoc); TermDocs td = reader.TermDocs(); try { foreach (Term t in this.terms) { td.Seek(t); while (td.Next()) { result.Set(td.Doc); } } } finally { td.Close(); } return(result); }
public virtual void TestMultiTermDocs() { RAMDirectory ramDir1 = new RAMDirectory(); AddDoc(ramDir1, "test foo", true); RAMDirectory ramDir2 = new RAMDirectory(); AddDoc(ramDir2, "test blah", true); RAMDirectory ramDir3 = new RAMDirectory(); AddDoc(ramDir3, "test wow", true); IndexReader[] readers1 = new IndexReader[] { IndexReader.Open(ramDir1), IndexReader.Open(ramDir3) }; IndexReader[] readers2 = new IndexReader[] { IndexReader.Open(ramDir1), IndexReader.Open(ramDir2), IndexReader.Open(ramDir3) }; MultiReader mr2 = new MultiReader(readers1); MultiReader mr3 = new MultiReader(readers2); // test mixing up TermDocs and TermEnums from different readers. TermDocs td2 = mr2.TermDocs(); TermEnum te3 = mr3.Terms(new Term("body", "wow")); td2.Seek(te3); int ret = 0; // This should blow up if we forget to check that the TermEnum is from the same // reader as the TermDocs. while (td2.Next()) { ret += td2.Doc(); } td2.Close(); te3.Close(); // really a dummy assert to ensure that we got some docs and to ensure that // nothing is optimized out. Assert.IsTrue(ret > 0); }
//////////////////////////////////////////////////////////////// static private void ScoreHits(Dictionary <int, Hit> hits_by_id, IndexReader reader, ICollection term_list) { LNS.Similarity similarity; similarity = LNS.Similarity.GetDefault(); TermDocs term_docs = reader.TermDocs(); Hit hit; foreach (Term term in term_list) { double idf; idf = similarity.Idf(reader.DocFreq(term), reader.MaxDoc()); int hit_count; hit_count = hits_by_id.Count; term_docs.Seek(term); while (term_docs.Next() && hit_count > 0) { int id; id = term_docs.Doc(); if (hits_by_id.TryGetValue(id, out hit)) { double tf; tf = similarity.Tf(term_docs.Freq()); hit.Score += tf * idf; --hit_count; } } } term_docs.Close(); }
/// <summary> /// Deletes the specified reader. /// </summary> /// <param name="reader">The reader.</param> /// <param name="term">The term.</param> /// <returns></returns> public int Delete(IndexReader reader, Term term) { TermDocs docs = reader.TermDocs(term); if (docs == null) { return(0); } int num = 0; try { while (docs.Next()) { reader.DeleteDocument(docs.Doc()); num++; } } finally { docs.Close(); } return(num); }
public void FlushUris() { if (pending_uris == null) { return; } TermDocs term_docs = this.searcher.Reader.TermDocs(); for (int i = 0; i < pending_uris.Count; i++) { Term term = new Term("Uri", (string)pending_uris [i]); term_docs.Seek(term); if (term_docs.Next()) { this.Set(term_docs.Doc(), true); } } term_docs.Close(); pending_uris = null; }
public virtual void Close() { termDocs.Close(); }
public virtual void Close() { in_Renamed.Close(); }
// There are two ways we can determine the max_results // most recent items: // // One is to instantiate Lucene documents for each of // the document IDs in primary_matches. This is a // fairly expensive operation. // // The other is to walk through the list of all // document IDs in descending time order. This is // a less expensive operation, but adds up over time // on large data sets. // // We can walk about 2.5 docs for every Document we // instantiate. So what we'll do, if we have more // matches than available hits, is walk (m * 1.25) // docs to see if we can fill out the top 100 hits. // If not, we'll fall back to creating documents // for all of them. private static ArrayList ScanRecentDocs(IndexReader primary_reader, IndexReader secondary_reader, BetterBitArray primary_matches, Dictionary <int, Hit> hits_by_id, int max_results, ref int total_number_of_matches, HitFilter hit_filter, string index_name) { Stopwatch a = new Stopwatch(); a.Start(); TermDocs docs = primary_reader.TermDocs(); TermEnum enumerator = primary_reader.Terms(new Term("InvertedTimestamp", String.Empty)); ArrayList results = new ArrayList(max_results); int docs_found = 0; int docs_walked = 0; int hit_filter_removed = 0; int max_docs = (int)(primary_matches.TrueCount * 1.25); Term term; TermDocs secondary_term_docs = null; if (secondary_reader != null) { secondary_term_docs = secondary_reader.TermDocs(); } do { term = enumerator.Term(); if (term.Field() != "InvertedTimestamp") { break; } docs.Seek(enumerator); while (docs.Next() && docs_found < max_results && docs_walked < max_docs) { int doc_id = docs.Doc(); if (primary_matches.Get(doc_id)) { Document doc = primary_reader.Document(doc_id); Hit hit = CreateHit(doc, secondary_reader, secondary_term_docs); // If we have a HitFilter, apply it. if (hit_filter != null && !hit_filter(hit)) { if (Debug) { Log.Debug("Filtered out {0}", hit.Uri); } hit_filter_removed++; continue; } hits_by_id [doc_id] = hit; // Add the result, last modified first results.Add(hit); docs_found++; } docs_walked++; } } while (enumerator.Next() && docs_found < max_results && docs_walked < max_docs); docs.Close(); if (secondary_term_docs != null) { secondary_term_docs.Close(); } // If we've found all the docs we can return in a subset! // Fantastic, we've probably short circuited a slow search. if (docs_found != max_results) { // Otherwise bad luck! Not all docs found // Start afresh - this time traversing all results results = null; } else { // Adjust total_number_of_matches. We need to do this to avoid scenarios like the following: // max_hits = 100. Matched 100 results. But hit filter removed 30. So 70 results will be returned. // We want to avoid saying "Showing top 70 of 100". Note that since we are not passing // every document in the index through the hit_filter, when we say "Showing top 100 of 1234", the // 1234 could actually be much less. But since max_hits was 100, that will not mislead the user. total_number_of_matches -= hit_filter_removed; } a.Stop(); if (Debug) { Log.Debug(">>> {0}: Walked {1} items, populated an enum with {2} items in {3}", index_name, docs_walked, docs_found, a); if (docs_found == max_results) { Log.Debug(">>> {0}: Successfully short circuited timestamp ordering!", index_name); } } return(results); }
private static ArrayList FindRecentResults(IndexReader primary_reader, IndexReader secondary_reader, BetterBitArray primary_matches, Dictionary <int, Hit> hits_by_id, int max_results, ref int total_number_of_matches, HitFilter hit_filter, string index_name) { Stopwatch b = new Stopwatch(); b.Start(); int count = 0; Document doc; ArrayList all_docs = null; TopScores top_docs = null; TermDocs term_docs = null; if (primary_matches.TrueCount > max_results) { top_docs = new TopScores(max_results); } else { all_docs = new ArrayList(primary_matches.TrueCount); } if (secondary_reader != null) { term_docs = secondary_reader.TermDocs(); } for (int match_index = primary_matches.Count; ; match_index--) { // Walk across the matches backwards, since newer // documents are more likely to be at the end of // the index. match_index = primary_matches.GetPreviousTrueIndex(match_index); if (match_index < 0) { break; } count++; doc = primary_reader.Document(match_index, fields_timestamp_uri); // Check the timestamp --- if we have already reached our // limit, we might be able to reject it immediately. string timestamp_str; long timestamp_num = 0; timestamp_str = doc.Get("Timestamp"); if (timestamp_str == null) { Logger.Log.Warn("No timestamp on {0}!", GetUriFromDocument(doc)); } else { timestamp_num = Int64.Parse(doc.Get("Timestamp")); if (top_docs != null && !top_docs.WillAccept(timestamp_num)) { continue; } } // Get the actual hit now // doc was created with only 2 fields, so first get the complete lucene document for primary document. // Also run our hit_filter now, if we have one. Since we insist of returning max_results // most recent hits, any hits that would be filtered out should happen now and not later. Hit hit = CreateHit(primary_reader.Document(match_index), secondary_reader, term_docs); if (hit_filter != null && !hit_filter(hit)) { if (Debug) { Log.Debug("Filtered out {0}", hit.Uri); } total_number_of_matches--; continue; } hits_by_id [match_index] = hit; // Add the document to the appropriate data structure. // We use the timestamp_num as the score, so high // scores correspond to more-recent timestamps. if (all_docs != null) { all_docs.Add(hit); } else { top_docs.Add(timestamp_num, hit); } } if (term_docs != null) { term_docs.Close(); } b.Stop(); if (Debug) { Log.Debug(">>> {0}: Instantiated and scanned {1} documents in {2}", index_name, count, b); } if (all_docs != null) { // Sort results before sending all_docs.Sort(); return(all_docs); } else { return(top_docs.TopScoringObjects); } }
private IndexerReceipt [] Flush_Unlocked(IndexerRequest request) { ArrayList receipt_queue; receipt_queue = new ArrayList(); IndexReader primary_reader, secondary_reader; primary_reader = IndexReader.Open(PrimaryStore); secondary_reader = IndexReader.Open(SecondaryStore); // Step #1: Make our first pass over the list of // indexables that make up our request. For each add // or property change in the request, get the Lucene // documents so we can move forward any persistent // properties (for adds) or all old properties (for // property changes). // // Then, for each add or remove in the request, // delete the associated documents from the index. // Note that we previously cached added documents so // that we can move persistent properties forward. // parent_child_old_props is double-nested hashtable (depth-2 tree) // indexed by the parent uri, it stores another hashtable indexed by the (parent+child documents) // FIXME: 2-level hashtable is a waste for any non-child document. // Replace this by a better data structure. Hashtable parent_child_old_props = UriFu.NewHashtable(); TermDocs term_docs = secondary_reader.TermDocs(); int delete_count = 0; IEnumerable request_indexables = request.Indexables; foreach (Indexable indexable in request_indexables) { string uri_str = UriFu.UriToEscapedString(indexable.Uri); Term term; // Store the necessary properties from old documents for re-addition if (indexable.Type == IndexableType.Add || indexable.Type == IndexableType.PropertyChange) { term = new Term("Uri", uri_str); term_docs.Seek(term); Hashtable this_parent_child_props = null; if (term_docs.Next()) { this_parent_child_props = UriFu.NewHashtable(); this_parent_child_props [indexable.Uri] = secondary_reader.Document(term_docs.Doc()); parent_child_old_props [indexable.Uri] = this_parent_child_props; } term = new Term("ParentUri", uri_str); term_docs.Seek(term); while (term_docs.Next()) { Document doc = secondary_reader.Document(term_docs.Doc()); string child_uri_str = doc.Get("Uri"); Uri child_uri = UriFu.EscapedStringToUri(child_uri_str); // Any valid lucene document *should* have a Uri, so no need to check for null // Store the child documents too, to save persistent-properties // of child documents this_parent_child_props [child_uri] = doc; } } // Now remove (non-remove indexables will be re-added in next block) Logger.Log.Debug("-{0}", indexable.DisplayUri); int num_delete = 0; term = new Term("Uri", uri_str); // For property changes, only secondary index is modified secondary_reader.DeleteDocuments(term); // Now remove from everywhere else (if asked to remove or if asked to add, in which case // we first remove and then add) // So we also need to remove child documents if (indexable.Type != IndexableType.PropertyChange) { num_delete = primary_reader.DeleteDocuments(term); // When we delete an indexable, also delete any children. // FIXME: Shouldn't we also delete any children of children, etc.? term = new Term("ParentUri", uri_str); num_delete += primary_reader.DeleteDocuments(term); secondary_reader.DeleteDocuments(term); } // If this is a strict removal (and not a deletion that // we are doing in anticipation of adding something back), // queue up a removed receipt. if (indexable.Type == IndexableType.Remove) { IndexerRemovedReceipt r; r = new IndexerRemovedReceipt(indexable.Id); r.NumRemoved = num_delete; receipt_queue.Add(r); } delete_count += num_delete; } term_docs.Close(); if (HaveItemCount) { AdjustItemCount(-delete_count); } else { SetItemCount(primary_reader); } // We are now done with the readers, so we close them. // And also free them. Somehow not freeing them is preventing them from // GCed at all. primary_reader.Close(); primary_reader = null; secondary_reader.Close(); secondary_reader = null; // FIXME: If we crash at exactly this point, we are in // trouble. Items will have been dropped from the index // without the proper replacements being added. We can // hopefully fix this when we move to Lucene 2.1. // Step #2: Make another pass across our list of indexables // and write out any new documents. if (text_cache != null) { text_cache.BeginTransaction(); } IndexWriter primary_writer, secondary_writer; // FIXME: Lock obtain time-out can happen here; if that happens, // an exception will be thrown and this method will break in the middle // leaving IndexWriters unclosed! Same for any Lucene.Net-index modification // methods. primary_writer = new IndexWriter(PrimaryStore, IndexingAnalyzer, false); secondary_writer = null; foreach (Indexable indexable in request_indexables) { // If shutdown has been started, break here // FIXME: Some more processing will continue, a lot of them // concerning receipts, but the daemon will anyway ignore receipts // now, what is the fastest way to stop from here ? if (Shutdown.ShutdownRequested) { Log.Debug("Shutdown initiated. Breaking while flushing indexables."); break; } // Receipts for removes were generated in the // previous block. Now we just have to remove // items from the text cache. if (indexable.Type == IndexableType.Remove) { if (text_cache != null) { text_cache.Delete(indexable.Uri); } continue; } IndexerAddedReceipt r; Hashtable prop_change_docs = (Hashtable)parent_child_old_props [indexable.Uri]; if (indexable.Type == IndexableType.PropertyChange) { Logger.Log.Debug("+{0} (props only)", indexable.DisplayUri); r = new IndexerAddedReceipt(indexable.Id); r.PropertyChangesOnly = true; receipt_queue.Add(r); Document doc; if (prop_change_docs == null) { doc = null; } else { doc = (Document)prop_change_docs [indexable.Uri]; } Document new_doc; new_doc = RewriteDocument(doc, indexable); // Write out the new document... if (secondary_writer == null) { secondary_writer = new IndexWriter(SecondaryStore, IndexingAnalyzer, false); } secondary_writer.AddDocument(new_doc); // Get child property change indexables... ArrayList prop_change_indexables; prop_change_indexables = GetChildPropertyChange(prop_change_docs, indexable); // and store them; no need to delete them first, since they were already removed from the index if (prop_change_indexables == null) { continue; } foreach (Indexable prop_change_indexable in prop_change_indexables) { Log.Debug("+{0} (props only, generated indexable)", prop_change_indexable.Uri); doc = (Document)prop_change_docs [prop_change_indexable.Uri]; new_doc = RewriteDocument(doc, prop_change_indexable); secondary_writer.AddDocument(new_doc); } continue; // ...and proceed to the next Indexable } // If we reach this point we know we are dealing with an IndexableType.Add if (indexable.Type != IndexableType.Add) { throw new Exception("When I said it was an IndexableType.Add, I meant it!"); } r = AddIndexableToIndex(indexable, primary_writer, ref secondary_writer, prop_change_docs); if (r != null) { receipt_queue.Add(r); } } if (text_cache != null) { text_cache.CommitTransaction(); } if (Shutdown.ShutdownRequested) { foreach (DeferredInfo di in deferred_indexables) { di.Cleanup(); } deferred_indexables.Clear(); foreach (Indexable indexable in request_indexables) { indexable.Cleanup(); } primary_writer.Close(); if (secondary_writer != null) { secondary_writer.Close(); } return(null); } if (request.OptimizeIndex) { Stopwatch watch = new Stopwatch(); Logger.Log.Debug("Optimizing {0}", IndexName); watch.Start(); primary_writer.Optimize(); if (secondary_writer == null) { secondary_writer = new IndexWriter(SecondaryStore, IndexingAnalyzer, false); } secondary_writer.Optimize(); watch.Stop(); Logger.Log.Debug("{0} optimized in {1}", IndexName, watch); } // Step #4. Close our writers and return the events to // indicate what has happened. primary_writer.Close(); if (secondary_writer != null) { secondary_writer.Close(); } // Send a single IndexerIndexablesReceipt if there were deferred indexables if (deferred_indexables.Count > 0) { Log.Debug("{0} indexables generated more indexables; asking daemon to schedule their indexing.", deferred_indexables.Count); IndexerIndexablesReceipt r = new IndexerIndexablesReceipt(); receipt_queue.Add(r); } IndexerReceipt [] receipt_array; receipt_array = new IndexerReceipt [receipt_queue.Count]; for (int i = 0; i < receipt_queue.Count; ++i) { receipt_array [i] = (IndexerReceipt)receipt_queue [i]; } return(receipt_array); }
public virtual void testSkipTo(int indexDivisor) { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Term ta = new Term("content", "aaa"); for (int i = 0; i < 10; i++) { AddDoc(writer, "aaa aaa aaa aaa"); } Term tb = new Term("content", "bbb"); for (int i = 0; i < 16; i++) { AddDoc(writer, "bbb bbb bbb bbb"); } Term tc = new Term("content", "ccc"); for (int i = 0; i < 50; i++) { AddDoc(writer, "ccc ccc ccc ccc"); } // assure that we deal with a single segment writer.Optimize(); writer.Close(); IndexReader reader = IndexReader.Open(dir, null, true, indexDivisor); TermDocs tdocs = reader.TermDocs(); // without optimization (assumption skipInterval == 16) // with next tdocs.Seek(ta); Assert.IsTrue(tdocs.Next()); Assert.AreEqual(0, tdocs.Doc()); Assert.AreEqual(4, tdocs.Freq()); Assert.IsTrue(tdocs.Next()); Assert.AreEqual(1, tdocs.Doc()); Assert.AreEqual(4, tdocs.Freq()); Assert.IsTrue(tdocs.SkipTo(0)); Assert.AreEqual(2, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(4)); Assert.AreEqual(4, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(9)); Assert.AreEqual(9, tdocs.Doc()); Assert.IsFalse(tdocs.SkipTo(10)); // without next tdocs.Seek(ta); Assert.IsTrue(tdocs.SkipTo(0)); Assert.AreEqual(0, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(4)); Assert.AreEqual(4, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(9)); Assert.AreEqual(9, tdocs.Doc()); Assert.IsFalse(tdocs.SkipTo(10)); // exactly skipInterval documents and therefore with optimization // with next tdocs.Seek(tb); Assert.IsTrue(tdocs.Next()); Assert.AreEqual(10, tdocs.Doc()); Assert.AreEqual(4, tdocs.Freq()); Assert.IsTrue(tdocs.Next()); Assert.AreEqual(11, tdocs.Doc()); Assert.AreEqual(4, tdocs.Freq()); Assert.IsTrue(tdocs.SkipTo(5)); Assert.AreEqual(12, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(15)); Assert.AreEqual(15, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(24)); Assert.AreEqual(24, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(25)); Assert.AreEqual(25, tdocs.Doc()); Assert.IsFalse(tdocs.SkipTo(26)); // without next tdocs.Seek(tb); Assert.IsTrue(tdocs.SkipTo(5)); Assert.AreEqual(10, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(15)); Assert.AreEqual(15, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(24)); Assert.AreEqual(24, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(25)); Assert.AreEqual(25, tdocs.Doc()); Assert.IsFalse(tdocs.SkipTo(26)); // much more than skipInterval documents and therefore with optimization // with next tdocs.Seek(tc); Assert.IsTrue(tdocs.Next()); Assert.AreEqual(26, tdocs.Doc()); Assert.AreEqual(4, tdocs.Freq()); Assert.IsTrue(tdocs.Next()); Assert.AreEqual(27, tdocs.Doc()); Assert.AreEqual(4, tdocs.Freq()); Assert.IsTrue(tdocs.SkipTo(5)); Assert.AreEqual(28, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(40)); Assert.AreEqual(40, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(57)); Assert.AreEqual(57, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(74)); Assert.AreEqual(74, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(75)); Assert.AreEqual(75, tdocs.Doc()); Assert.IsFalse(tdocs.SkipTo(76)); //without next tdocs.Seek(tc); Assert.IsTrue(tdocs.SkipTo(5)); Assert.AreEqual(26, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(40)); Assert.AreEqual(40, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(57)); Assert.AreEqual(57, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(74)); Assert.AreEqual(74, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(75)); Assert.AreEqual(75, tdocs.Doc()); Assert.IsFalse(tdocs.SkipTo(76)); tdocs.Close(); reader.Close(); dir.Close(); }