NOTE: if this method hits an OutOfMemoryError you should immediately close the writer. See above for details.
//END //this method creates document from an ObjectToIndex public void BuildIndex(FileToIndex file) { using (var analyzer = new Lucene.Net.Analysis.Ru.RussianAnalyzer(Version.LUCENE_30)) { using (IndexWriter idxw = new IndexWriter(_directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED)) { //check if document exists, if true deletes existing var searchQuery = new TermQuery(new Term("Id", file.Id.ToString())); idxw.DeleteDocuments(searchQuery); //creation Document doc = new Document(); doc.Add(new Field("Id", file.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));//аналайзер разбивает строки на слова doc.Add(new Field("Title", file.Title, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("Description", file.Description, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("Authors", file.Authors, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("Text", file.Text, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("Hashtags", file.Hashtags, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("Discipline", file.Discipline, Field.Store.YES, Field.Index.ANALYZED)); //write the document to the index idxw.AddDocument(doc); //optimize and close the writer idxw.Commit(); idxw.Optimize(); } } }
public void DeleteDocumentsFrom(DateTime fromDate) { string sFrom = DateTools.DateToString(fromDate, DateTools.Resolution.MINUTE); Lucene.Net.Search.TermRangeQuery queryFrom = new Lucene.Net.Search.TermRangeQuery("LastWriteTime", sFrom, null, false, false); writer.DeleteDocuments(queryFrom); }
public void Test_IndexReader_IsCurrent() { RAMDirectory ramDir = new RAMDirectory(); IndexWriter writer = new IndexWriter(ramDir, new KeywordAnalyzer(), true, new IndexWriter.MaxFieldLength(1000)); Field field = new Field("TEST", "mytest", Field.Store.YES, Field.Index.ANALYZED); Document doc = new Document(); doc.Add(field); writer.AddDocument(doc); IndexReader reader = writer.GetReader(); writer.DeleteDocuments(new Lucene.Net.Index.Term("TEST", "mytest")); Assert.IsFalse(reader.IsCurrent()); int resCount1 = new IndexSearcher(reader).Search(new TermQuery(new Term("TEST", "mytest")),100).TotalHits; Assert.AreEqual(1, resCount1); writer.Commit(); Assert.IsFalse(reader.IsCurrent()); int resCount2 = new IndexSearcher(reader).Search(new TermQuery(new Term("TEST", "mytest")),100).TotalHits; Assert.AreEqual(1, resCount2, "Reopen not invoked yet, resultCount must still be 1."); reader = reader.Reopen(); Assert.IsTrue(reader.IsCurrent()); int resCount3 = new IndexSearcher(reader).Search(new TermQuery(new Term("TEST", "mytest")), 100).TotalHits; Assert.AreEqual(0, resCount3, "After reopen, resultCount must be 0."); reader.Close(); writer.Dispose(); }
public virtual void TestBackToTheFuture() { Directory dir = NewDirectory(); IndexWriter iw = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, null)); Document doc = new Document(); doc.Add(NewStringField("foo", "bar", Field.Store.NO)); iw.AddDocument(doc); doc = new Document(); doc.Add(NewStringField("foo", "baz", Field.Store.NO)); iw.AddDocument(doc); DirectoryReader r1 = DirectoryReader.Open(iw, true); iw.DeleteDocuments(new Term("foo", "baz")); DirectoryReader r2 = DirectoryReader.Open(iw, true); FieldCache.DEFAULT.GetDocTermOrds(GetOnlySegmentReader(r2), "foo"); SortedSetDocValues v = FieldCache.DEFAULT.GetDocTermOrds(GetOnlySegmentReader(r1), "foo"); Assert.AreEqual(2, v.ValueCount); v.Document = 1; Assert.AreEqual(1, v.NextOrd()); iw.Dispose(); r1.Dispose(); r2.Dispose(); dir.Dispose(); }
private static RAMDirectory MakeEmptyIndex(int numDeletedDocs) { RAMDirectory d = new RAMDirectory(); IndexWriter w = new IndexWriter(d, new WhitespaceAnalyzer(), true, MaxFieldLength.LIMITED, null); for (int i = 0; i < numDeletedDocs; i++) { w.AddDocument(new Document(), null); } w.Commit(null); w.DeleteDocuments(null, new MatchAllDocsQuery()); w.Commit(null); if (0 < numDeletedDocs) { Assert.IsTrue(w.HasDeletions(null), "writer has no deletions"); } Assert.AreEqual(numDeletedDocs, w.MaxDoc(), "writer is missing some deleted docs"); Assert.AreEqual(0, w.NumDocs(null), "writer has non-deleted docs"); w.Close(); IndexReader r = IndexReader.Open((Directory)d, true, null); Assert.AreEqual(numDeletedDocs, r.NumDeletedDocs, "reader has wrong number of deleted docs"); r.Close(); return(d); }
private void AddItemToIndex(AbstactDocument doc, IndexWriter writer) { var query = new BooleanQuery(); query.Add(new TermQuery(new Term("Id", doc.Id.ToString())), Occur.MUST); query.Add(new TermQuery(new Term("Type", doc.Type)), Occur.MUST); writer.DeleteDocuments(query); writer.AddDocument(doc.Document); }
public void TestSplit() { string format = "{0:000000000}"; IndexWriter w; using (Directory dir = NewDirectory()) { using (w = new IndexWriter(dir, NewIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false)) .SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE).SetMergePolicy(NoMergePolicy.COMPOUND_FILES))) { for (int x = 0; x < 11; x++) { Document doc = CreateDocument(x, "1", 3, format); w.AddDocument(doc); if (x % 3 == 0) w.Commit(); } for (int x = 11; x < 20; x++) { Document doc = CreateDocument(x, "2", 3, format); w.AddDocument(doc); if (x % 3 == 0) w.Commit(); } } Term midTerm = new Term("id", string.Format(CultureInfo.InvariantCulture, format, 11)); CheckSplitting(dir, midTerm, 11, 9); // delete some documents using (w = new IndexWriter(dir, NewIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false)) .SetOpenMode(IndexWriterConfig.OpenMode_e.APPEND).SetMergePolicy(NoMergePolicy.COMPOUND_FILES))) { w.DeleteDocuments(midTerm); w.DeleteDocuments(new Term("id", string.Format(CultureInfo.InvariantCulture, format, 2))); } CheckSplitting(dir, midTerm, 10, 8); } }
public void DeleteFromIndex(Term term) { Directory directory = FSDirectory.Open(new System.IO.DirectoryInfo(serverPath)); Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29); IndexWriter writer = new IndexWriter(directory, analyzer, false, IndexWriter.MaxFieldLength.UNLIMITED); writer.DeleteDocuments(term); writer.Commit(); writer.Close(); }
public static void ClearIndexRecord(int id) { var analyzer = new StandardAnalyzer(Version.LUCENE_29); using (var writer = new IndexWriter(Directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED)) { var searchQuery = new TermQuery(new Term("Id", Convert.ToString(id))); writer.DeleteDocuments(searchQuery); analyzer.Close(); } }
public void DeleteIndex(int id) { var analyzer = new StandardAnalyzer(Version.LUCENE_30); using (var writer = new IndexWriter(Directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED)) { var searchQuery = new TermQuery(new Term(SearchingFields.Id.ToString(), id.ToString())); writer.DeleteDocuments(searchQuery); analyzer.Close(); } }
public int FindChangedAndIndex() { var lastDateTimeFile = Path.Combine(path, "last.time"); var lastDateTime = DateTime.MinValue; try { if (File.Exists(lastDateTimeFile)) { lastDateTime = DateTime.Parse(File.ReadAllText(lastDateTimeFile)).ToUniversalTime(); } } catch (FormatException) { } catch (ArgumentNullException) { } var copyLastDateTime = lastDateTime; lastDateTime = DateTime.UtcNow; var texts = SelectTextsForIndex(copyLastDateTime, true); if (0 < texts.Count) { var directory = GetOrCreateDirectory(path); var analyzer = new AnalyzersProvider().GetAnalyzer(tenant.GetCulture().TwoLetterISOLanguageName); var create = directory.ListAll().Length == 0; var index = new IndexWriter(directory, analyzer, create, IndexWriter.MaxFieldLength.UNLIMITED); try { foreach (var t in texts) { var term = new Term("Id", t.Item1); if (string.IsNullOrEmpty(t.Item2)) { index.DeleteDocuments(term); } else { var doc = new Document(); doc.Add(new Field("Id", t.Item1, Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.YES)); doc.Add(new Field("Text", t.Item2, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); index.UpdateDocument(term, doc); } } } finally { index.Optimize(); index.Commit(); index.Close(); } File.WriteAllText(lastDateTimeFile, lastDateTime.ToString("o")); } return texts.Count; }
public void RemoveIndex(Guid id) { var directory = FSDirectory.Open(_path); using ( var write = new IndexWriter(directory, _analyzer, !IndexReader.IndexExists(directory), IndexWriter.MaxFieldLength.UNLIMITED)) { write.DeleteDocuments(new Term("id", id.ToString())); } }
public static void ClearLuceneIndexRecord(int record_id) { // init lucene using (var analyzer = new RussianAnalyzer(Version.LUCENE_30)) using (var writer = new IndexWriter(LuceneConfig.Directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED)) { // remove older index entry var searchQuery = new TermQuery(new Term("Id", record_id.ToString())); writer.DeleteDocuments(searchQuery); } }
private void AddToIndex(Project project, IndexWriter writer) { var searchQuery = new TermQuery(new Term(SearchingFields.Id.ToString(), project.Id.ToString())); writer.DeleteDocuments(searchQuery); var doc = new Document(); doc.Add(new Field(SearchingFields.Id.ToString(), project.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field(SearchingFields.Name.ToString(), project.Title, Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); }
/// <summary> /// 按id清楚 /// </summary> /// <param name="recordId"></param> public static void ClearLuceneIndexRecord(int recordId) { //var analyzer = new StandardAnalyzer(Version.LUCENE_30); var analyzer = SearcherBase.GetAnalyzer(); using (var writer = new IndexWriter(SearcherBase.Directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED)) { var searchQuery = new TermQuery(new Term("Id", recordId.ToString())); writer.DeleteDocuments(searchQuery); analyzer.Close(); } }
/// <summary> /// Deletes the manual from index ' /// </summary> /// <param name="manualId">The manual identifier.</param> public void DeleteManualFromIndex(int manualId) { var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); using (var writer = new IndexWriter(directoryTemp, analyzer, IndexWriter.MaxFieldLength.UNLIMITED)) { var query = new TermQuery(new Term("Id", manualId.ToString())); writer.DeleteDocuments(query); analyzer.Close(); writer.Optimize(); writer.Dispose(); } }
/// <summary> /// Clears an item from the index. /// </summary> /// <param name="id">The identifier.</param> /// <param name="luceneVersion">The lucene version.</param> /// <param name="fsDirectory">The fs directory.</param> /// <param name="maxFieldLength">Maximum length of the field.</param> public void ClearIndex(string id, Version luceneVersion, FSDirectory fsDirectory, IndexWriter.MaxFieldLength maxFieldLength) { var analyzer = new StandardAnalyzer(luceneVersion); using (var indexWriter = new IndexWriter(fsDirectory, analyzer, maxFieldLength)) { var searchQuery = new TermQuery(new Term("Key", id)); indexWriter.DeleteDocuments(searchQuery); analyzer.Close(); } }
private static void _addToLuceneIndex(SampleData sampleData, IndexWriter writer) { var searchQuery = new TermQuery(new Term("Id", sampleData.Id.ToString())); writer.DeleteDocuments(searchQuery); var doc = new Document(); doc.Add(new Field("Id", sampleData.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("Name", sampleData.Name, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("Description", sampleData.Description, Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); }
protected virtual Task <bool> UpdateIndex(IEnumerable <TLuceneEntity> articles, Lucene.Net.Store.Directory directory) { if (articles.Count() <= 0) { return(Task.FromResult(false)); } var propertyInfo = typeof(TLuceneEntity).GetProperties().FirstOrDefault(p => p.CustomAttributes.Any(ca => ca.AttributeType == typeof(LuceneKeyAttribute))); string keyName = propertyInfo?.Name.ToString().ToLower(); if (string.IsNullOrEmpty(keyName)) { return(Task.FromResult(true)); } IndexWriter writer = new Lucene.Net.Index.IndexWriter(directory, new Lucene.Net.Index.IndexWriterConfig(Lucene.Net.Util.LuceneVersion.LUCENE_48, _analyzer)); var tempQuery = new PhraseQuery(); foreach (var item in articles) { string value = propertyInfo.GetValue(item).ToString(); tempQuery.Add(new Term(keyName, value)); } var boolQuery = new BooleanQuery(); boolQuery.Add(tempQuery, Occur.MUST); var queryParser = new QueryParser(LuceneVersion.LUCENE_48, keyName, _analyzer); var query = queryParser.Parse(boolQuery.ToString()); writer.DeleteDocuments(query); try { CreateDocument(articles, writer); } catch { Lucene.Net.Index.IndexWriter.Unlock(directory); throw; } finally { writer.Dispose(); } return(Task.FromResult(true)); }
/// <summary> /// Adds the PointDataSummary to the lucene index specified by the IndexWriter /// </summary> /// <param name="indexWriter"></param> /// <param name="summary"></param> public void Index(IndexWriter indexWriter, PointDataSummary summary) { // Delete the current document if it exists already indexWriter.DeleteDocuments(new Lucene.Net.Index.Term("point_id", summary.Id.ToString())); // Create Lucene document and add the indexed fields. Document doc = new Document(); doc.Add(new Field("point_id", summary.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.NO)); doc.Add(new Field("Name", summary.Name, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO)); doc.Add(new Field("Tags", summary.Tag, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO)); doc.Add(new Field("Description", summary.Description, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO)); doc.Add(new Field("LayerId", summary.LayerId, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO)); indexWriter.AddDocument(doc); }
private static void _addToLuceneIndex(CardCriterion cardCriterion, IndexWriter writer) { //Add Tikafields from CardCriteriaRepository maybe here later to avoid RAM-haevy traffic? // remove older index entry TermQuery searchQuery = new TermQuery(new Term("EditionId", Convert.ToString(cardCriterion.EditionId))); writer.DeleteDocuments(searchQuery); // add new index entry Document doc = new Document(); // add lucene fields mapped to db fields doc.Add(new Field("EditionId", Convert.ToString(cardCriterion.EditionId), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("CardId", Convert.ToString(cardCriterion.CardId), Field.Store.YES, Field.Index.NOT_ANALYZED)); //doc.Add(new NumericField("CardId").SetIntValue(cardCriterion.CardId)); doc.Add(new Field("CardName", cardCriterion.CardName, Field.Store.YES, Field.Index.ANALYZED)); //doc.Add(new Field("CardName", cardCriterion.CardName, Field.Store.NO, // Field.Index.ANALYZED)); doc.Add(new Field("CardKind", cardCriterion.CardKind, Field.Store.YES, Field.Index.NOT_ANALYZED)); //Changed from analyzed doc.Add(new Field("FileExt", cardCriterion.FileExt, Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("CardEdition", cardCriterion.CardEdition, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("CardAdoptionKindSubject", cardCriterion.CardAdoptionKindSubject, Field.Store.YES, Field.Index.NOT_ANALYZED)); //Changed from analyzed doc.Add(new Field("CardAdoptionSubject", cardCriterion.CardAdoptionSubject, Field.Store.YES, Field.Index.NOT_ANALYZED)); //Changed from analyzed doc.Add(new Field("Territory", cardCriterion.Territory, Field.Store.YES, Field.Index.NOT_ANALYZED)); //doc.Add(new Field("CardEdition", cardCriterion.CardEdition, Field.Store.NO, // Field.Index.ANALYZED)); doc.Add(new Field("CardAdoptionNumber", cardCriterion.CardAdoptionNumber, Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("CardAdoptionDate", DateTools.DateToString(cardCriterion.CardAdoptionDate, DateTools.Resolution.DAY), Field.Store.YES, Field.Index.NOT_ANALYZED)); #region Index Metadata //doc.Add(new Field("ContentType", cardCriterion.ContentType, Field.Store.YES, // Field.Index.ANALYZED)); No need to index contentype so far //foreach (var kvp in cardCriterion.ContentMetadataDict) //{ // doc.Add(new Field(kvp.Key, kvp.Value, Field.Store.YES, Field.Index.NOT_ANALYZED)); //} //Add later if metadata indexing is needed #endregion // add entry to index writer.AddDocument(doc); }
private void AddToIndex(Lecturer lecturer, IndexWriter writer) { var searchQuery = new TermQuery(new Term(SearchingFields.Id.ToString(), lecturer.Id.ToString())); writer.DeleteDocuments(searchQuery); var doc = new Document(); doc.Add(new Field(SearchingFields.Id.ToString(), lecturer.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field(SearchingFields.FirstName.ToString(), lecturer.FirstName, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field(SearchingFields.MiddleName.ToString(), lecturer.MiddleName, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field(SearchingFields.LastName.ToString(), lecturer.LastName, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field(SearchingFields.Name.ToString(), lecturer.User != null ? lecturer.User.UserName : "", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); }
public static void ClearLuceneIndexRecord(int record_id) { // init lucene var analyzer = new StandardAnalyzer(Version.LUCENE_30); using (var writer = new IndexWriter(_directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED)) { // remove older index entry var searchQuery = new TermQuery(new Term("Id", record_id.ToString())); writer.DeleteDocuments(searchQuery); // close handles analyzer.Close(); writer.Dispose(); } }
private static void _addToLuceneIndex(LuceneBookModel bookData, IndexWriter writer) { // remove older index entry var searchQuery = new TermQuery(new Term("PostId", bookData.PostId.ToString(CultureInfo.InvariantCulture))); writer.DeleteDocuments(searchQuery); // add new index entry var bookDocument = new Document(); // add lucene fields mapped to db fields bookDocument.Add(new Field("PostId", bookData.PostId.ToString(CultureInfo.InvariantCulture), Field.Store.YES, Field.Index.NOT_ANALYZED)); if (bookData.Author != null) { bookDocument.Add(new Field("Author", bookData.Author, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); } if (bookData.Title != null) { bookDocument.Add(new Field("Title", bookData.Title, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); } if (bookData.Name != null) { bookDocument.Add(new Field("Name", bookData.Name, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS) { Boost = 3 }); } if (bookData.Publisher != null) { bookDocument.Add(new Field("Publisher", bookData.Publisher, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); } if (bookData.ISBN != null) { bookDocument.Add(new Field("ISBN", bookData.ISBN, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); } if (bookData.Description != null) { bookDocument.Add(new Field("Description", bookData.Description, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); } // add entry to index writer.AddDocument(bookDocument); }
// test using a sparse index (with deleted docs). The DocIdSet should be not cacheable, as it uses TermDocs if the range contains 0 public void TestSparseIndex() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), T, IndexWriter.MaxFieldLength.LIMITED, null); for (int d = -20; d <= 20; d++) { Document doc = new Document(); doc.Add(new Field("id", d.ToString(), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("body", "body", Field.Store.NO, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc, null); } writer.Optimize(null); writer.DeleteDocuments(null, new Term("id", "0")); writer.Close(); IndexReader reader = IndexReader.Open((Directory)dir, true, null); IndexSearcher Search = new IndexSearcher(reader); Assert.True(reader.HasDeletions); ScoreDoc[] result; Query q = new TermQuery(new Term("body", "body")); FieldCacheRangeFilter <sbyte?> fcrf; result = Search.Search(q, fcrf = FieldCacheRangeFilter.NewByteRange("id", -20, 20, T, T), 100, null).ScoreDocs; Assert.False(fcrf.GetDocIdSet(reader.GetSequentialSubReaders()[0], null).IsCacheable, "DocIdSet must be not cacheable"); Assert.AreEqual(40, result.Length, "find all"); result = Search.Search(q, fcrf = FieldCacheRangeFilter.NewByteRange("id", 0, 20, T, T), 100, null).ScoreDocs; Assert.False(fcrf.GetDocIdSet(reader.GetSequentialSubReaders()[0], null).IsCacheable, "DocIdSet must be not cacheable"); Assert.AreEqual(20, result.Length, "find all"); result = Search.Search(q, fcrf = FieldCacheRangeFilter.NewByteRange("id", -20, 0, T, T), 100, null).ScoreDocs; Assert.False(fcrf.GetDocIdSet(reader.GetSequentialSubReaders()[0], null).IsCacheable, "DocIdSet must be not cacheable"); Assert.AreEqual(20, result.Length, "find all"); result = Search.Search(q, fcrf = FieldCacheRangeFilter.NewByteRange("id", 10, 20, T, T), 100, null).ScoreDocs; Assert.True(fcrf.GetDocIdSet(reader.GetSequentialSubReaders()[0], null).IsCacheable, "DocIdSet must be not cacheable"); Assert.AreEqual(11, result.Length, "find all"); result = Search.Search(q, fcrf = FieldCacheRangeFilter.NewByteRange("id", -20, -10, T, T), 100, null).ScoreDocs; Assert.True(fcrf.GetDocIdSet(reader.GetSequentialSubReaders()[0], null).IsCacheable, "DocIdSet must be not cacheable"); Assert.AreEqual(11, result.Length, "find all"); }
private static void _addToLuceneIndex(SampleData sampleData, IndexWriter writer) { // remove older index entry var searchQuery = new TermQuery(new Term("Id", sampleData.Id.ToString())); writer.DeleteDocuments(searchQuery); // add new index entry var doc = new Document(); // add lucene fields mapped to db fields doc.Add(new Field("Id", sampleData.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("Name", sampleData.Name, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("Description", sampleData.Description, Field.Store.YES, Field.Index.ANALYZED)); // add entry to index writer.AddDocument(doc); }
static void ProcessCatalogIndex(IndexWriter indexWriter, JObject catalogIndex, string baseAddress) { indexWriter.DeleteDocuments(new Term("@type", Schema.DataTypes.CatalogInfastructure.AbsoluteUri)); Document doc = new Document(); Add(doc, "@type", Schema.DataTypes.CatalogInfastructure.AbsoluteUri, Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); Add(doc, "Visibility", "Public", Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); if (catalogIndex != null) { IEnumerable<string> storagePaths = GetCatalogStoragePaths(catalogIndex); AddStoragePaths(doc, storagePaths, baseAddress); } indexWriter.AddDocument(doc); }
public void NearRealTimeTestTeste() { var diretorio = new RAMDirectory(); const int quantidadeItensEsperados = 10; using (var escritorIndice = new IndexWriter(diretorio, LuceneUtil.ObterAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED)) { for (int i = 0; i < quantidadeItensEsperados; i++) { var documento = new Document(); documento.Add(new Field("id", i.ToString(), Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS)); documento.Add(new Field("text", "aaa", Field.Store.NO, Field.Index.ANALYZED)); escritorIndice.AddDocument(documento); } var leitorIndice = escritorIndice.GetReader(); var pesquisa = new IndexSearcher(leitorIndice); var consulta = new TermQuery(new Term("text", "aaa")); var resultado = pesquisa.Search(consulta, 1); var mensagemErro = string.Format("Resultado não encontrou {0} itens que se encaixavam", quantidadeItensEsperados); Assert.AreEqual(quantidadeItensEsperados, resultado.TotalHits, mensagemErro); var outroDocumento = new Document(); escritorIndice.DeleteDocuments(new Term("id", "7")); outroDocumento.Add(new Field("id", "11", Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS)); outroDocumento.Add(new Field("text", "bbb", Field.Store.NO, Field.Index.ANALYZED)); escritorIndice.AddDocument(outroDocumento); var novoLeitor = leitorIndice.Reopen(); Assert.AreNotSame(novoLeitor, leitorIndice); leitorIndice.Close(); pesquisa = new IndexSearcher(novoLeitor); resultado = pesquisa.Search(consulta, 10); Assert.AreEqual(9, resultado.TotalHits, string.Format("Não encontrou {0} como quantidade esperada.", quantidadeItensEsperados - 1)); consulta = new TermQuery(new Term("text", "bbb")); resultado = pesquisa.Search(consulta, 1); Assert.AreEqual(1, resultado.TotalHits); novoLeitor.Close(); } }
private static void _addToLuceneIndex(ContentEntity content, IndexWriter writer) { // remove older index entry var searchQuery = new TermQuery(new Term("Id", content.Id.ToString())); writer.DeleteDocuments(searchQuery); // add new index entry var doc = new Document(); bool commentsAreEmpty = content.Comments == null || !content.Comments.Any(); // add lucene fields mapped to db fields doc.Add(new Field("Id", content.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("Name", content.Name, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("Description", content.Description, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("Text", commentsAreEmpty ? string.Empty : content.Comments.Select(x => x.Text).Aggregate((x, y) => (x + y)), Field.Store.YES, Field.Index.ANALYZED)); // add entry to index writer.AddDocument(doc); }
protected override Task<bool> OnProcessBatch(CollectorHttpClient client, IEnumerable<JToken> items, JToken context, DateTime commitTimeStamp, CancellationToken cancellationToken) { PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30)); analyzer.AddAnalyzer("Id", new IdentifierKeywordAnalyzer()); int i = 0; using (IndexWriter writer = new IndexWriter(_directory, analyzer, false, IndexWriter.MaxFieldLength.UNLIMITED)) { foreach (JObject item in items) { i++; string id = item["nuget:id"].ToString(); string version = item["nuget:version"].ToString(); BooleanQuery query = new BooleanQuery(); query.Add(new BooleanClause(new TermQuery(new Term("Id", id.ToLowerInvariant())), Occur.MUST)); query.Add(new BooleanClause(new TermQuery(new Term("Version", version)), Occur.MUST)); writer.DeleteDocuments(query); Document doc = new Document(); doc.Add(new Field("Id", item["nuget:id"].ToString(), Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("Version", item["nuget:version"].ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); } string trace = Guid.NewGuid().ToString(); writer.Commit(new Dictionary<string, string> { { "commitTimeStamp", commitTimeStamp.ToString("O") }, { "trace", trace } }); Trace.TraceInformation("COMMIT {0} documents, index contains {1} documents, commitTimeStamp {2}, trace: {3}", i, writer.NumDocs(), commitTimeStamp.ToString("O"), trace); } return Task.FromResult(true); }
public virtual void TestSimpleCase() { string[] keywords = new string[] { "1", "2" }; string[] unindexed = new string[] { "Netherlands", "Italy" }; string[] unstored = new string[] { "Amsterdam has lots of bridges", "Venice has lots of canals" }; string[] text = new string[] { "Amsterdam", "Venice" }; Directory dir = NewDirectory(); IndexWriter modifier = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false)).SetMaxBufferedDeleteTerms(1)); FieldType custom1 = new FieldType(); custom1.Stored = true; for (int i = 0; i < keywords.Length; i++) { Document doc = new Document(); doc.Add(NewStringField("id", keywords[i], Field.Store.YES)); doc.Add(NewField("country", unindexed[i], custom1)); doc.Add(NewTextField("contents", unstored[i], Field.Store.NO)); doc.Add(NewTextField("city", text[i], Field.Store.YES)); modifier.AddDocument(doc); } modifier.ForceMerge(1); modifier.Commit(); Term term = new Term("city", "Amsterdam"); int hitCount = GetHitCount(dir, term); Assert.AreEqual(1, hitCount); if (VERBOSE) { Console.WriteLine("\nTEST: now delete by term=" + term); } modifier.DeleteDocuments(term); modifier.Commit(); if (VERBOSE) { Console.WriteLine("\nTEST: now getHitCount"); } hitCount = GetHitCount(dir, term); Assert.AreEqual(0, hitCount); modifier.Dispose(); dir.Dispose(); }
public void CreateIndex(Guid id, string table, string content) { //Directory表示索引文件保存的地方,是抽象类,两个子类FSDirectory表示文件中,RAMDirectory 表示存储在内存中 var directory = FSDirectory.Open(_path); //第三个参数为是否创建索引文件夹,Bool Create,如果为True,则新创建的索引会覆盖掉原来的索引文件,反之,则不必创建,更新即可。 using ( var write = new IndexWriter(directory, _analyzer, !IndexReader.IndexExists(directory), IndexWriter.MaxFieldLength.UNLIMITED)) { write.DeleteDocuments(new Term("id", id.ToString())); var document = new Document(); document.Add(new Field("id", id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); document.Add(new Field("table", table, Field.Store.YES, Field.Index.ANALYZED)); document.Add(new Field("content", content, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); write.AddDocument(document); } }
public override void SetUp() { base.SetUp(); dir = NewDirectory(); using (IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES))) { Document doc; for (int i = 0; i < NUM_DOCS; i++) { doc = new Document(); doc.Add(NewStringField("id", i + "", Field.Store.YES)); doc.Add(NewTextField("f", i + " " + i, Field.Store.YES)); w.AddDocument(doc); if (i % 3 == 0) w.Commit(); } w.Commit(); w.DeleteDocuments(new Term("id", "" + (NUM_DOCS - 1))); } input = DirectoryReader.Open(dir); }
public virtual void TestForceMergeDeletes() { Directory dir = NewDirectory(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); TieredMergePolicy tmp = NewTieredMergePolicy(); conf.SetMergePolicy(tmp); conf.SetMaxBufferedDocs(4); tmp.MaxMergeAtOnce = 100; tmp.SegmentsPerTier = 100; tmp.ForceMergeDeletesPctAllowed = 30.0; IndexWriter w = new IndexWriter(dir, conf); for (int i = 0; i < 80; i++) { Document doc = new Document(); doc.Add(NewTextField("content", "aaa " + (i % 4), Field.Store.NO)); w.AddDocument(doc); } Assert.AreEqual(80, w.MaxDoc); Assert.AreEqual(80, w.NumDocs()); if (VERBOSE) { Console.WriteLine("\nTEST: delete docs"); } w.DeleteDocuments(new Term("content", "0")); w.ForceMergeDeletes(); Assert.AreEqual(80, w.MaxDoc); Assert.AreEqual(60, w.NumDocs()); if (VERBOSE) { Console.WriteLine("\nTEST: forceMergeDeletes2"); } ((TieredMergePolicy)w.Config.MergePolicy).ForceMergeDeletesPctAllowed = 10.0; w.ForceMergeDeletes(); Assert.AreEqual(60, w.NumDocs()); Assert.AreEqual(60, w.MaxDoc); w.Dispose(); dir.Dispose(); }
public void End(bool shouldClose) { if (!_is_started) { return; } if (!shouldClose) { return; } //build 2del file list if (!_job_status.Cancelled) { TermEnum term_enum = _index_reader.Terms(); Term path_term = new Term("path"); int nb_terms = 0; while (term_enum.SkipTo(path_term)) //skip to new term equal or *ABOVE* "path:" !!! { Term term = term_enum.Term(); if (term.Field() != path_term.Field()) { break; } if (!File.Exists(term.Text())) { _del_file_list.Add(term.Text()); } if (_job_status.Cancelled) { break; } nb_terms++; } term_enum.Close(); Logger.Log.Info("update: deletion: {0} analyzed terms, found {1} vanished files.", nb_terms, _del_file_list.Count); } _index_searcher.Close(); _index_reader.Close(); //--- deleting deprecated if ((_del_file_list.Count > 0) && (!_job_status.Cancelled)) { Stopwatch watch = new Stopwatch(); watch.Start(); int num_file = 0; int nb_files = _del_file_list.Count; IndexWriter writer = new IndexWriter(_index_path, _default_analyzer, false); foreach (string path in _del_file_list) { if (((num_file++) % 101) == 1) { int progress = ((((num_file++) + 1)) * 100) / nb_files; _job_status.Progress = progress; _job_status.Description = String.Format("upd: removing (from index) file {0}/{1} - {2}", num_file, _del_file_list.Count, StringFu.TimeSpanToString(new TimeSpan((long)(watch.ElapsedMilliseconds) * 10000))); } if (_job_status.Cancelled) { break; } writer.DeleteDocuments(new Term("path", path)); } writer.Commit(); writer.Close(); watch.Stop(); } //adding new files if ((_add_file_list.Count > 0) && (!_job_status.Cancelled)) { Stopwatch watch = new Stopwatch(); watch.Start(); IndexWriter writer = null; try { writer = new IndexWriter(_index_path, _default_analyzer, false, new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH)); int num_file = 0; int nb_files = _add_file_list.Count; foreach (BasicFileInfo fi in _add_file_list) { if (((num_file++) % 101) == 1) { int progress = ((((num_file++) + 1)) * 100) / nb_files; _job_status.Progress = progress; _job_status.Description = String.Format("upd: indexing new file {0}/{1} - {2}", num_file, _add_file_list.Count, StringFu.TimeSpanToString(new TimeSpan((long)(watch.ElapsedMilliseconds) * 10000))); } if (_job_status.Cancelled) { break; } writer.AddDocument(_doc_factory.CreateFromPath(fi.FilePath, fi.LastModification)); if (num_file % 20 == 0) { writer.Commit(); } } writer.Commit(); } catch (System.Exception ex) { Log.Error(ex); } finally { if (writer != null) { writer.Close(); writer = null; } } watch.Stop(); } //updating modified files if ((_upd_file_list.Count > 0) && (!_job_status.Cancelled)) { Stopwatch watch = new Stopwatch(); watch.Start(); int num_file = 0; int nb_files = _upd_file_list.Count; IndexWriter writer = null; try { writer = new IndexWriter(_index_path, _default_analyzer, false, new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH)); foreach (BasicFileInfo fi in _upd_file_list) { if (((num_file++) % 101) == 1) { int progress = ((((num_file++) + 1)) * 100) / nb_files; _job_status.Progress = progress; _job_status.Description = String.Format("upd: modified file {0}/{1} - {2}", num_file, _upd_file_list.Count, StringFu.TimeSpanToString(new TimeSpan((long)(watch.ElapsedMilliseconds) * 10000))); } if (_job_status.Cancelled) { break; } writer.UpdateDocument(new Term("path", fi.FilePath), _doc_factory.CreateFromPath(fi.FilePath, fi.LastModification)); } writer.Commit(); //LittleBeagle.Properties.Settings.Default.NbIndexedFiles = num_file; } catch (System.Exception ex) { Log.Error(ex); } finally { if (writer != null) { writer.Close(); writer = null; } } watch.Stop(); } }
public virtual void TestCommitOnCloseAbort() { Directory dir = NewDirectory(); IndexWriter writer = new IndexWriter(dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(10)); for (int i = 0; i < 14; i++) { AddDoc(writer); } writer.Dispose(); Term searchTerm = new Term("content", "aaa"); IndexReader reader = DirectoryReader.Open(dir); IndexSearcher searcher = NewSearcher(reader); ScoreDoc[] hits = searcher.Search(new TermQuery(searchTerm), null, 1000).ScoreDocs; Assert.AreEqual(14, hits.Length, "first number of hits"); reader.Dispose(); writer = new IndexWriter(dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode.APPEND).SetMaxBufferedDocs(10)); for (int j = 0; j < 17; j++) { AddDoc(writer); } // Delete all docs: writer.DeleteDocuments(searchTerm); reader = DirectoryReader.Open(dir); searcher = NewSearcher(reader); hits = searcher.Search(new TermQuery(searchTerm), null, 1000).ScoreDocs; Assert.AreEqual(14, hits.Length, "reader incorrectly sees changes from writer"); reader.Dispose(); // Now, close the writer: writer.Rollback(); TestIndexWriter.AssertNoUnreferencedFiles(dir, "unreferenced files remain after rollback()"); reader = DirectoryReader.Open(dir); searcher = NewSearcher(reader); hits = searcher.Search(new TermQuery(searchTerm), null, 1000).ScoreDocs; Assert.AreEqual(14, hits.Length, "saw changes after writer.abort"); reader.Dispose(); // Now make sure we can re-open the index, add docs, // and all is good: writer = new IndexWriter(dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode.APPEND).SetMaxBufferedDocs(10)); // On abort, writer in fact may write to the same // segments_N file: if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).PreventDoubleWrite = false; } for (int i = 0; i < 12; i++) { for (int j = 0; j < 17; j++) { AddDoc(writer); } IndexReader r = DirectoryReader.Open(dir); searcher = NewSearcher(r); hits = searcher.Search(new TermQuery(searchTerm), null, 1000).ScoreDocs; Assert.AreEqual(14, hits.Length, "reader incorrectly sees changes from writer"); r.Dispose(); } writer.Dispose(); IndexReader ir = DirectoryReader.Open(dir); searcher = NewSearcher(ir); hits = searcher.Search(new TermQuery(searchTerm), null, 1000).ScoreDocs; Assert.AreEqual(218, hits.Length, "didn't see changes after close"); ir.Dispose(); dir.Dispose(); }
public override void Run() { DirectoryReader currentReader = null; Random random = LuceneTestCase.Random; try { Document doc = new Document(); doc.Add(new TextField("id", "1", Field.Store.NO)); Writer.AddDocument(doc); Holder.Reader = currentReader = Writer.GetReader(true); Term term = new Term("id"); for (int i = 0; i < NumOps && !Holder.Stop; i++) { float nextOp = (float)random.NextDouble(); if (nextOp < 0.3) { term.Set("id", new BytesRef("1")); Writer.UpdateDocument(term, doc); } else if (nextOp < 0.5) { Writer.AddDocument(doc); } else { term.Set("id", new BytesRef("1")); Writer.DeleteDocuments(term); } if (Holder.Reader != currentReader) { Holder.Reader = currentReader; if (Countdown) { Countdown = false; Latch.Signal(); } } if (random.NextBoolean()) { Writer.Commit(); DirectoryReader newReader = DirectoryReader.OpenIfChanged(currentReader); if (newReader != null) { currentReader.DecRef(); currentReader = newReader; } if (currentReader.NumDocs == 0) { Writer.AddDocument(doc); } } } } catch (Exception e) { Failed = e; } finally { Holder.Reader = null; if (Countdown) { Latch.Signal(); } if (currentReader != null) { try { currentReader.DecRef(); } #pragma warning disable 168 catch (IOException e) #pragma warning restore 168 { } } } if (VERBOSE) { Console.WriteLine("writer stopped - forced by reader: " + Holder.Stop); } }
/// <summary> Make sure if modifier tries to commit but hits disk full that modifier /// remains consistent and usable. Similar to TestIndexReader.testDiskFull(). /// </summary> private void TestOperationsOnDiskFull(bool updates) { bool debug = false; Term searchTerm = new Term("content", "aaa"); int START_COUNT = 157; int END_COUNT = 144; for (int pass = 0; pass < 2; pass++) { bool autoCommit = (0 == pass); // First build up a starting index: MockRAMDirectory startDir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(startDir, autoCommit, new WhitespaceAnalyzer(), true); for (int i = 0; i < 157; i++) { Document d = new Document(); d.Add(new Field("id", System.Convert.ToString(i), Field.Store.YES, Field.Index.NOT_ANALYZED)); d.Add(new Field("content", "aaa " + i, Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(d); } writer.Close(); long diskUsage = startDir.SizeInBytes(); long diskFree = diskUsage + 10; System.IO.IOException err = null; bool done = false; // Iterate w/ ever increasing free disk space: while (!done) { MockRAMDirectory dir = new MockRAMDirectory(startDir); dir.SetPreventDoubleWrite(false); IndexWriter modifier = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer()); modifier.SetMaxBufferedDocs(1000); // use flush or close modifier.SetMaxBufferedDeleteTerms(1000); // use flush or close // For each disk size, first try to commit against // dir that will hit random IOExceptions & disk // full; after, give it infinite disk space & turn // off random IOExceptions & retry w/ same reader: bool success = false; for (int x = 0; x < 2; x++) { double rate = 0.1; double diskRatio = ((double)diskFree) / diskUsage; long thisDiskFree; System.String testName; if (0 == x) { thisDiskFree = diskFree; if (diskRatio >= 2.0) { rate /= 2; } if (diskRatio >= 4.0) { rate /= 2; } if (diskRatio >= 6.0) { rate = 0.0; } if (debug) { System.Console.Out.WriteLine("\ncycle: " + diskFree + " bytes"); } testName = "disk full during reader.close() @ " + thisDiskFree + " bytes"; } else { thisDiskFree = 0; rate = 0.0; if (debug) { System.Console.Out.WriteLine("\ncycle: same writer: unlimited disk space"); } testName = "reader re-use after disk full"; } dir.SetMaxSizeInBytes(thisDiskFree); dir.SetRandomIOExceptionRate(rate, diskFree); try { if (0 == x) { int docId = 12; for (int i = 0; i < 13; i++) { if (updates) { Document d = new Document(); d.Add(new Field("id", System.Convert.ToString(i), Field.Store.YES, Field.Index.NOT_ANALYZED)); d.Add(new Field("content", "bbb " + i, Field.Store.NO, Field.Index.ANALYZED)); modifier.UpdateDocument(new Term("id", System.Convert.ToString(docId)), d); } else { // deletes modifier.DeleteDocuments(new Term("id", System.Convert.ToString(docId))); // modifier.setNorm(docId, "contents", (float)2.0); } docId += 12; } } modifier.Close(); success = true; if (0 == x) { done = true; } } catch (System.IO.IOException e) { if (debug) { System.Console.Out.WriteLine(" hit IOException: " + e); System.Console.Out.WriteLine(e.StackTrace); } err = e; if (1 == x) { System.Console.Error.WriteLine(e.StackTrace); Assert.Fail(testName + " hit IOException after disk space was freed up"); } } // If the close() succeeded, make sure there are // no unreferenced files. if (success) { TestIndexWriter.AssertNoUnreferencedFiles(dir, "after writer.close"); } // Finally, verify index is not corrupt, and, if // we succeeded, we see all docs changed, and if // we failed, we see either all docs or no docs // changed (transactional semantics): IndexReader newReader = null; try { newReader = IndexReader.Open(dir); } catch (System.IO.IOException e) { System.Console.Error.WriteLine(e.StackTrace); Assert.Fail(testName + ":exception when creating IndexReader after disk full during close: " + e); } IndexSearcher searcher = new IndexSearcher(newReader); ScoreDoc[] hits = null; try { hits = searcher.Search(new TermQuery(searchTerm), null, 1000).scoreDocs; } catch (System.IO.IOException e) { System.Console.Error.WriteLine(e.StackTrace); Assert.Fail(testName + ": exception when searching: " + e); } int result2 = hits.Length; if (success) { if (x == 0 && result2 != END_COUNT) { Assert.Fail(testName + ": method did not throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + END_COUNT); } else if (x == 1 && result2 != START_COUNT && result2 != END_COUNT) { // It's possible that the first exception was // "recoverable" wrt pending deletes, in which // case the pending deletes are retained and // then re-flushing (with plenty of disk // space) will succeed in flushing the // deletes: Assert.Fail(testName + ": method did not throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + START_COUNT + " or " + END_COUNT); } } else { // On hitting exception we still may have added // all docs: if (result2 != START_COUNT && result2 != END_COUNT) { System.Console.Error.WriteLine(err.StackTrace); Assert.Fail(testName + ": method did throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + START_COUNT + " or " + END_COUNT); } } searcher.Close(); newReader.Close(); if (result2 == END_COUNT) { break; } } dir.Close(); // Try again with 10 more bytes of free space: diskFree += 10; } } }
public virtual void TestDeletedDocs() { Directory dir = NewDirectory(); IndexWriter writer = new IndexWriter(dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2)); for (int i = 0; i < 19; i++) { Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_STORED); customType.StoreTermVectors = true; customType.StoreTermVectorPositions = true; customType.StoreTermVectorOffsets = true; doc.Add(NewField("field", "aaa" + i, customType)); writer.AddDocument(doc); } writer.ForceMerge(1); writer.Commit(); writer.DeleteDocuments(new Term("field", "aaa5")); writer.Dispose(); ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); CheckIndex checker = new CheckIndex(dir); checker.InfoStream = new StreamWriter(bos, Encoding.UTF8); if (VERBOSE) { checker.InfoStream = Console.Out; } CheckIndex.Status indexStatus = checker.DoCheckIndex(); if (indexStatus.Clean == false) { Console.WriteLine("CheckIndex failed"); checker.FlushInfoStream(); Console.WriteLine(bos.ToString()); Assert.Fail(); } CheckIndex.Status.SegmentInfoStatus seg = indexStatus.SegmentInfos[0]; Assert.IsTrue(seg.OpenReaderPassed); Assert.IsNotNull(seg.Diagnostics); Assert.IsNotNull(seg.FieldNormStatus); Assert.IsNull(seg.FieldNormStatus.Error); Assert.AreEqual(1, seg.FieldNormStatus.TotFields); Assert.IsNotNull(seg.TermIndexStatus); Assert.IsNull(seg.TermIndexStatus.Error); Assert.AreEqual(18, seg.TermIndexStatus.TermCount); Assert.AreEqual(18, seg.TermIndexStatus.TotFreq); Assert.AreEqual(18, seg.TermIndexStatus.TotPos); Assert.IsNotNull(seg.StoredFieldStatus); Assert.IsNull(seg.StoredFieldStatus.Error); Assert.AreEqual(18, seg.StoredFieldStatus.DocCount); Assert.AreEqual(18, seg.StoredFieldStatus.TotFields); Assert.IsNotNull(seg.TermVectorStatus); Assert.IsNull(seg.TermVectorStatus.Error); Assert.AreEqual(18, seg.TermVectorStatus.DocCount); Assert.AreEqual(18, seg.TermVectorStatus.TotVectors); Assert.IsTrue(seg.Diagnostics.Count > 0); IList <string> onlySegments = new List <string>(); onlySegments.Add("_0"); Assert.IsTrue(checker.DoCheckIndex(onlySegments).Clean == true); dir.Dispose(); }
protected internal virtual void DeleteDocuments(Term term) { Writer.DeleteDocuments(term); }
/// <summary> /// Deletes the documents for objects of the given type matching the given selection. /// </summary> /// <param name="writer"> /// The IndexWriter to delete the documents from. /// </param> /// <param name="type"> /// The type of the object to delete documents for. /// </param> /// <param name="selection"> /// The Query which selects the documents to delete. /// </param> public static void DeleteDocuments(this IndexWriter writer, Type type, Query selection) { Query deleteQuery = new FilteredQuery(selection, ObjectMapping.GetTypeFilter(type)); writer.DeleteDocuments(deleteQuery); }
public virtual void TestKeepLastNDeletionPolicyWithCreates() { const int N = 10; for (int pass = 0; pass < 2; pass++) { bool useCompoundFile = (pass % 2) != 0; Directory dir = NewDirectory(); IndexWriterConfig conf = (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode.CREATE).SetIndexDeletionPolicy(new KeepLastNDeletionPolicy(this, N)).SetMaxBufferedDocs(10); MergePolicy mp = conf.MergePolicy; mp.NoCFSRatio = useCompoundFile ? 1.0 : 0.0; IndexWriter writer = new IndexWriter(dir, conf); KeepLastNDeletionPolicy policy = (KeepLastNDeletionPolicy)writer.Config.IndexDeletionPolicy; writer.Dispose(); Term searchTerm = new Term("content", "aaa"); Query query = new TermQuery(searchTerm); for (int i = 0; i < N + 1; i++) { conf = (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode.APPEND).SetIndexDeletionPolicy(policy).SetMaxBufferedDocs(10); mp = conf.MergePolicy; mp.NoCFSRatio = useCompoundFile ? 1.0 : 0.0; writer = new IndexWriter(dir, conf); policy = (KeepLastNDeletionPolicy)writer.Config.IndexDeletionPolicy; for (int j = 0; j < 17; j++) { AddDocWithID(writer, i * (N + 1) + j); } // this is a commit writer.Dispose(); conf = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetIndexDeletionPolicy(policy).SetMergePolicy(NoMergePolicy.COMPOUND_FILES); writer = new IndexWriter(dir, conf); policy = (KeepLastNDeletionPolicy)writer.Config.IndexDeletionPolicy; writer.DeleteDocuments(new Term("id", "" + (i * (N + 1) + 3))); // this is a commit writer.Dispose(); IndexReader reader = DirectoryReader.Open(dir); IndexSearcher searcher = NewSearcher(reader); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(16, hits.Length); reader.Dispose(); writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode.CREATE).SetIndexDeletionPolicy(policy)); policy = (KeepLastNDeletionPolicy)writer.Config.IndexDeletionPolicy; // this will not commit: there are no changes // pending because we opened for "create": writer.Dispose(); } Assert.AreEqual(3 * (N + 1) + 1, policy.NumOnInit); Assert.AreEqual(3 * (N + 1) + 1, policy.NumOnCommit); IndexReader rwReader = DirectoryReader.Open(dir); IndexSearcher searcher_ = NewSearcher(rwReader); ScoreDoc[] hits_ = searcher_.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits_.Length); // Simplistic check: just verify only the past N segments_N's still // exist, and, I can open a reader on each: long gen = SegmentInfos.GetLastCommitGeneration(dir); dir.DeleteFile(IndexFileNames.SEGMENTS_GEN); int expectedCount = 0; rwReader.Dispose(); for (int i = 0; i < N + 1; i++) { try { IndexReader reader = DirectoryReader.Open(dir); // Work backwards in commits on what the expected // count should be. searcher_ = NewSearcher(reader); hits_ = searcher_.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(expectedCount, hits_.Length); if (expectedCount == 0) { expectedCount = 16; } else if (expectedCount == 16) { expectedCount = 17; } else if (expectedCount == 17) { expectedCount = 0; } reader.Dispose(); if (i == N) { Assert.Fail("should have failed on commits before last " + N); } } catch (IOException /*e*/) { if (i != N) { throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details) } } if (i < N) { dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); } gen--; } dir.Dispose(); } }
public override void Run() { DirectoryReader currentReader = null; Random random = LuceneTestCase.Random; try { Document doc = new Document(); doc.Add(new TextField("id", "1", Field.Store.NO)); writer.AddDocument(doc); holder.reader = currentReader = writer.GetReader(true); Term term = new Term("id"); for (int i = 0; i < numOps && !holder.stop; i++) { float nextOp = (float)random.NextDouble(); if (nextOp < 0.3) { term.Set("id", new BytesRef("1")); writer.UpdateDocument(term, doc); } else if (nextOp < 0.5) { writer.AddDocument(doc); } else { term.Set("id", new BytesRef("1")); writer.DeleteDocuments(term); } if (holder.reader != currentReader) { holder.reader = currentReader; if (countdown) { countdown = false; latch.Signal(); } } if (random.NextBoolean()) { writer.Commit(); DirectoryReader newReader = DirectoryReader.OpenIfChanged(currentReader); if (newReader != null) { currentReader.DecRef(); currentReader = newReader; } if (currentReader.NumDocs == 0) { writer.AddDocument(doc); } } } } catch (Exception e) when(e.IsThrowable()) { failed = e; } finally { holder.reader = null; if (countdown) { latch.Signal(); } if (currentReader != null) { try { currentReader.DecRef(); } catch (Exception e) when(e.IsIOException()) { } } } if (Verbose) { Console.WriteLine("writer stopped - forced by reader: " + holder.stop); } }
public virtual void TestForceMergeDeletes2() { Directory dir = NewDirectory(); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMaxBufferedDocs(2).SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH).SetMergePolicy(NewLogMergePolicy(50))); Document document = new Document(); FieldType customType = new FieldType(); customType.IsStored = true; FieldType customType1 = new FieldType(TextField.TYPE_NOT_STORED); customType1.IsTokenized = false; customType1.StoreTermVectors = true; customType1.StoreTermVectorPositions = true; customType1.StoreTermVectorOffsets = true; Field storedField = NewField("stored", "stored", customType); document.Add(storedField); Field termVectorField = NewField("termVector", "termVector", customType1); document.Add(termVectorField); Field idField = NewStringField("id", "", Field.Store.NO); document.Add(idField); for (int i = 0; i < 98; i++) { idField.SetStringValue("" + i); writer.AddDocument(document); } writer.Dispose(); IndexReader ir = DirectoryReader.Open(dir); Assert.AreEqual(98, ir.MaxDoc); Assert.AreEqual(98, ir.NumDocs); ir.Dispose(); IndexWriterConfig dontMergeConfig = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))).SetMergePolicy(NoMergePolicy.COMPOUND_FILES); writer = new IndexWriter(dir, dontMergeConfig); for (int i = 0; i < 98; i += 2) { writer.DeleteDocuments(new Term("id", "" + i)); } writer.Dispose(); ir = DirectoryReader.Open(dir); Assert.AreEqual(49, ir.NumDocs); ir.Dispose(); writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NewLogMergePolicy(3))); Assert.AreEqual(49, writer.NumDocs); writer.ForceMergeDeletes(); writer.Dispose(); ir = DirectoryReader.Open(dir); Assert.AreEqual(49, ir.MaxDoc); Assert.AreEqual(49, ir.NumDocs); ir.Dispose(); dir.Dispose(); }
public override void Run() { DirectoryReader reader = null; bool success = false; try { Random random = Random(); while (NumUpdates.GetAndDecrement() > 0) { double group = random.NextDouble(); Term t; if (group < 0.1) { t = new Term("updKey", "g0"); } else if (group < 0.5) { t = new Term("updKey", "g1"); } else if (group < 0.8) { t = new Term("updKey", "g2"); } else { t = new Term("updKey", "g3"); } // System.out.println("[" + Thread.currentThread().getName() + "] numUpdates=" + numUpdates + " updateTerm=" + t); if (random.NextBoolean()) // sometimes unset a value { // System.err.println("[" + Thread.currentThread().getName() + "] t=" + t + ", f=" + f + ", updValue=UNSET"); Writer.UpdateBinaryDocValue(t, f, null); Writer.UpdateNumericDocValue(t, Cf, null); } else { long updValue = random.Next(); // System.err.println("[" + Thread.currentThread().getName() + "] t=" + t + ", f=" + f + ", updValue=" + updValue); Writer.UpdateBinaryDocValue(t, f, TestBinaryDocValuesUpdates.ToBytes(updValue)); Writer.UpdateNumericDocValue(t, Cf, updValue * 2); } if (random.NextDouble() < 0.2) { // delete a random document int doc = random.Next(NumDocs); // System.out.println("[" + Thread.currentThread().getName() + "] deleteDoc=doc" + doc); Writer.DeleteDocuments(new Term("id", "doc" + doc)); } if (random.NextDouble() < 0.05) // commit every 20 updates on average { // System.out.println("[" + Thread.currentThread().getName() + "] commit"); Writer.Commit(); } if (random.NextDouble() < 0.1) // reopen NRT reader (apply updates), on average once every 10 updates { if (reader == null) { // System.out.println("[" + Thread.currentThread().getName() + "] open NRT"); reader = DirectoryReader.Open(Writer, true); } else { // System.out.println("[" + Thread.currentThread().getName() + "] reopen NRT"); DirectoryReader r2 = DirectoryReader.OpenIfChanged(reader, Writer, true); if (r2 != null) { reader.Dispose(); reader = r2; } } } } // System.out.println("[" + Thread.currentThread().getName() + "] DONE"); success = true; } catch (IOException e) { throw new Exception(e.ToString(), e); } finally { if (reader != null) { try { reader.Dispose(); } catch (IOException e) { if (success) // suppress this exception only if there was another exception { throw new Exception(e.ToString(), e); } } } Done.Signal(); } }
public virtual void TestNoWaitClose() { Directory directory = NewDirectory(); Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_STORED); customType.IsTokenized = false; Field idField = NewField("id", "", customType); doc.Add(idField); for (int pass = 0; pass < 2; pass++) { if (Verbose) { Console.WriteLine("TEST: pass="******"TEST: iter=" + iter); } for (int j = 0; j < 199; j++) { idField.SetStringValue(Convert.ToString(iter * 201 + j)); writer.AddDocument(doc); } int delID = iter * 199; for (int j = 0; j < 20; j++) { writer.DeleteDocuments(new Term("id", Convert.ToString(delID))); delID += 5; } // Force a bunch of merge threads to kick off so we // stress out aborting them on close: ((LogMergePolicy)writer.Config.MergePolicy).MergeFactor = 2; IndexWriter finalWriter = writer; IList <Exception> failure = new JCG.List <Exception>(); ThreadJob t1 = new ThreadAnonymousClass(this, doc, finalWriter, failure); if (failure.Count > 0) { ExceptionDispatchInfo.Capture(failure[0]).Throw(); // LUCENENET: Rethrow to preserve stack details from the original throw } t1.Start(); writer.Dispose(false); t1.Join(); // Make sure reader can read IndexReader reader = DirectoryReader.Open(directory); reader.Dispose(); // Reopen writer = new IndexWriter(directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.APPEND).SetMergePolicy(NewLogMergePolicy())); } writer.Dispose(); } directory.Dispose(); }
public virtual void TestDeleteLeftoverFiles() { Directory dir = NewDirectory(); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).PreventDoubleWrite = false; } MergePolicy mergePolicy = NewLogMergePolicy(true, 10); // this test expects all of its segments to be in CFS mergePolicy.NoCFSRatio = 1.0; mergePolicy.MaxCFSSegmentSizeMB = double.PositiveInfinity; IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMaxBufferedDocs(10).SetMergePolicy(mergePolicy).SetUseCompoundFile(true)); int i; for (i = 0; i < 35; i++) { AddDoc(writer, i); } writer.Config.MergePolicy.NoCFSRatio = 0.0; writer.Config.SetUseCompoundFile(false); for (; i < 45; i++) { AddDoc(writer, i); } writer.Dispose(); // Delete one doc so we get a .del file: writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NoMergePolicy.NO_COMPOUND_FILES).SetUseCompoundFile(true)); Term searchTerm = new Term("id", "7"); writer.DeleteDocuments(searchTerm); writer.Dispose(); // Now, artificially create an extra .del file & extra // .s0 file: string[] files = dir.ListAll(); /* * for(int j=0;j<files.Length;j++) { * System.out.println(j + ": " + files[j]); * } */ // TODO: fix this test better string ext = Codec.Default.Name.Equals("SimpleText", StringComparison.Ordinal) ? ".liv" : ".del"; // Create a bogus separate del file for a // segment that already has a separate del file: CopyFile(dir, "_0_1" + ext, "_0_2" + ext); // Create a bogus separate del file for a // segment that does not yet have a separate del file: CopyFile(dir, "_0_1" + ext, "_1_1" + ext); // Create a bogus separate del file for a // non-existent segment: CopyFile(dir, "_0_1" + ext, "_188_1" + ext); // Create a bogus segment file: CopyFile(dir, "_0.cfs", "_188.cfs"); // Create a bogus fnm file when the CFS already exists: CopyFile(dir, "_0.cfs", "_0.fnm"); // Create some old segments file: CopyFile(dir, "segments_2", "segments"); CopyFile(dir, "segments_2", "segments_1"); // Create a bogus cfs file shadowing a non-cfs segment: // TODO: assert is bogus (relies upon codec-specific filenames) Assert.IsTrue(SlowFileExists(dir, "_3.fdt") || SlowFileExists(dir, "_3.fld")); Assert.IsTrue(!SlowFileExists(dir, "_3.cfs")); CopyFile(dir, "_1.cfs", "_3.cfs"); string[] filesPre = dir.ListAll(); // Open & close a writer: it should delete the above 4 // files and nothing more: writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.APPEND)); writer.Dispose(); string[] files2 = dir.ListAll(); dir.Dispose(); Array.Sort(files); Array.Sort(files2); ISet <string> dif = DifFiles(files, files2); if (!Arrays.Equals(files, files2)) { Assert.Fail("IndexFileDeleter failed to delete unreferenced extra files: should have deleted " + (filesPre.Length - files.Length) + " files but only deleted " + (filesPre.Length - files2.Length) + "; expected files:\n " + AsString(files) + "\n actual files:\n " + AsString(files2) + "\ndiff: " + dif); } }
public virtual void TestDeletes1() { //IndexWriter.debug2 = System.out; Directory dir = new MockDirectoryWrapper(new Random(Random().Next()), new RAMDirectory()); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwc.SetMergeScheduler(new SerialMergeScheduler()); iwc.SetMaxBufferedDocs(5000); iwc.SetRAMBufferSizeMB(100); RangeMergePolicy fsmp = new RangeMergePolicy(this, false); iwc.SetMergePolicy(fsmp); IndexWriter writer = new IndexWriter(dir, iwc); for (int x = 0; x < 5; x++) { writer.AddDocument(DocHelper.CreateDocument(x, "1", 2)); //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); } //System.out.println("commit1"); writer.Commit(); Assert.AreEqual(1, writer.SegmentCount); for (int x = 5; x < 10; x++) { writer.AddDocument(DocHelper.CreateDocument(x, "2", 2)); //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); } //System.out.println("commit2"); writer.Commit(); Assert.AreEqual(2, writer.SegmentCount); for (int x = 10; x < 15; x++) { writer.AddDocument(DocHelper.CreateDocument(x, "3", 2)); //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); } writer.DeleteDocuments(new Term("id", "1")); writer.DeleteDocuments(new Term("id", "11")); // flushing without applying deletes means // there will still be deletes in the segment infos writer.Flush(false, false); Assert.IsTrue(writer.BufferedUpdatesStreamAny); // get reader flushes pending deletes // so there should not be anymore IndexReader r1 = writer.Reader; Assert.IsFalse(writer.BufferedUpdatesStreamAny); r1.Dispose(); // delete id:2 from the first segment // merge segments 0 and 1 // which should apply the delete id:2 writer.DeleteDocuments(new Term("id", "2")); writer.Flush(false, false); fsmp = (RangeMergePolicy)writer.Config.MergePolicy; fsmp.DoMerge = true; fsmp.Start = 0; fsmp.Length = 2; writer.MaybeMerge(); Assert.AreEqual(2, writer.SegmentCount); // id:2 shouldn't exist anymore because // it's been applied in the merge and now it's gone IndexReader r2 = writer.Reader; int[] id2docs = ToDocsArray(new Term("id", "2"), null, r2); Assert.IsTrue(id2docs == null); r2.Dispose(); /* * /// // added docs are in the ram buffer * /// for (int x = 15; x < 20; x++) { * /// writer.AddDocument(TestIndexWriterReader.CreateDocument(x, "4", 2)); * /// System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); * /// } * /// Assert.IsTrue(writer.numRamDocs() > 0); * /// // delete from the ram buffer * /// writer.DeleteDocuments(new Term("id", Integer.toString(13))); * /// * /// Term id3 = new Term("id", Integer.toString(3)); * /// * /// // delete from the 1st segment * /// writer.DeleteDocuments(id3); * /// * /// Assert.IsTrue(writer.numRamDocs() > 0); * /// * /// //System.out * /// // .println("segdels1:" + writer.docWriter.deletesToString()); * /// * /// //Assert.IsTrue(writer.docWriter.segmentDeletes.Size() > 0); * /// * /// // we cause a merge to happen * /// fsmp.doMerge = true; * /// fsmp.start = 0; * /// fsmp.Length = 2; * /// System.out.println("maybeMerge "+writer.SegmentInfos); * /// * /// SegmentInfo info0 = writer.SegmentInfos.Info(0); * /// SegmentInfo info1 = writer.SegmentInfos.Info(1); * /// * /// writer.MaybeMerge(); * /// System.out.println("maybeMerge after "+writer.SegmentInfos); * /// // there should be docs in RAM * /// Assert.IsTrue(writer.numRamDocs() > 0); * /// * /// // assert we've merged the 1 and 2 segments * /// // and still have a segment leftover == 2 * /// Assert.AreEqual(2, writer.SegmentInfos.Size()); * /// Assert.IsFalse(segThere(info0, writer.SegmentInfos)); * /// Assert.IsFalse(segThere(info1, writer.SegmentInfos)); * /// * /// //System.out.println("segdels2:" + writer.docWriter.deletesToString()); * /// * /// //Assert.IsTrue(writer.docWriter.segmentDeletes.Size() > 0); * /// * /// IndexReader r = writer.GetReader(); * /// IndexReader r1 = r.getSequentialSubReaders()[0]; * /// printDelDocs(r1.GetLiveDocs()); * /// int[] docs = toDocsArray(id3, null, r); * /// System.out.println("id3 docs:"+Arrays.toString(docs)); * /// // there shouldn't be any docs for id:3 * /// Assert.IsTrue(docs == null); * /// r.Dispose(); * /// * /// part2(writer, fsmp); * /// */ // System.out.println("segdels2:"+writer.docWriter.segmentDeletes.toString()); //System.out.println("close"); writer.Dispose(); dir.Dispose(); }
/// <summary> /// Deletes the documents for objects of the given type matching the given selection. /// </summary> /// <typeparam name="TObject"> /// The type of the object to delete documents for. /// </typeparam> /// <param name="writer"> /// The IndexWriter to delete the documents from. /// </param> /// <param name="kind"> /// The kind of type to restrict the search to. /// </param> /// <param name="selection"> /// The Query which selects the documents to delete. /// </param> public static void DeleteDocuments <TObject>(this IndexWriter writer, DocumentObjectTypeKind kind, Query selection) { Query deleteQuery = new FilteredQuery(selection, ObjectMapping.GetTypeFilter <TObject>(kind)); writer.DeleteDocuments(deleteQuery); }
public virtual void TestErrorAfterApplyDeletes() { MockRAMDirectory.Failure failure = new AnonymousClassFailure(this); // create a couple of files System.String[] keywords = new System.String[] { "1", "2" }; System.String[] unindexed = new System.String[] { "Netherlands", "Italy" }; System.String[] unstored = new System.String[] { "Amsterdam has lots of bridges", "Venice has lots of canals" }; System.String[] text = new System.String[] { "Amsterdam", "Venice" }; for (int pass = 0; pass < 2; pass++) { bool autoCommit = (0 == pass); MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter modifier = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true); modifier.SetUseCompoundFile(true); modifier.SetMaxBufferedDeleteTerms(2); dir.FailOn(failure.Reset()); for (int i = 0; i < keywords.Length; i++) { Document doc = new Document(); doc.Add(new Field("id", keywords[i], Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("country", unindexed[i], Field.Store.YES, Field.Index.NO)); doc.Add(new Field("contents", unstored[i], Field.Store.NO, Field.Index.ANALYZED)); doc.Add(new Field("city", text[i], Field.Store.YES, Field.Index.ANALYZED)); modifier.AddDocument(doc); } // flush (and commit if ac) modifier.Optimize(); modifier.Commit(); // one of the two files hits Term term = new Term("city", "Amsterdam"); int hitCount = GetHitCount(dir, term); Assert.AreEqual(1, hitCount); // open the writer again (closed above) // delete the doc // max buf del terms is two, so this is buffered modifier.DeleteDocuments(term); // add a doc (needed for the !ac case; see below) // doc remains buffered Document doc2 = new Document(); modifier.AddDocument(doc2); // commit the changes, the buffered deletes, and the new doc // The failure object will fail on the first write after the del // file gets created when processing the buffered delete // in the ac case, this will be when writing the new segments // files so we really don't need the new doc, but it's harmless // in the !ac case, a new segments file won't be created but in // this case, creation of the cfs file happens next so we need // the doc (to test that it's okay that we don't lose deletes if // failing while creating the cfs file) bool failed = false; try { modifier.Commit(); } catch (System.IO.IOException ioe) { failed = true; } Assert.IsTrue(failed); // The commit above failed, so we need to retry it (which will // succeed, because the failure is a one-shot) modifier.Commit(); hitCount = GetHitCount(dir, term); // Make sure the delete was successfully flushed: Assert.AreEqual(0, hitCount); modifier.Close(); dir.Close(); } }
protected virtual void DeleteDocuments(Term term) { m_writer.DeleteDocuments(term); }
public virtual void TestReopenOnCommit() { Directory dir = NewDirectory(); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetIndexDeletionPolicy(new KeepAllCommits()).SetMaxBufferedDocs(-1).SetMergePolicy(NewLogMergePolicy(10))); for (int i = 0; i < 4; i++) { Document doc = new Document(); doc.Add(NewStringField("id", "" + i, Field.Store.NO)); writer.AddDocument(doc); IDictionary <string, string> data = new Dictionary <string, string>(); data["index"] = i + ""; writer.SetCommitData(data); writer.Commit(); } for (int i = 0; i < 4; i++) { writer.DeleteDocuments(new Term("id", "" + i)); IDictionary <string, string> data = new Dictionary <string, string>(); data["index"] = (4 + i) + ""; writer.SetCommitData(data); writer.Commit(); } writer.Dispose(); DirectoryReader r = DirectoryReader.Open(dir); Assert.AreEqual(0, r.NumDocs); ICollection <IndexCommit> commits = DirectoryReader.ListCommits(dir); foreach (IndexCommit commit in commits) { DirectoryReader r2 = DirectoryReader.OpenIfChanged(r, commit); Assert.IsNotNull(r2); Assert.IsTrue(r2 != r); IDictionary <string, string> s = commit.UserData; int v; if (s.Count == 0) { // First commit created by IW v = -1; } else { v = Convert.ToInt32(s["index"]); } if (v < 4) { Assert.AreEqual(1 + v, r2.NumDocs); } else { Assert.AreEqual(7 - v, r2.NumDocs); } r.Dispose(); r = r2; } r.Dispose(); dir.Dispose(); }
public virtual void TestManyReopensAndFields() { Directory dir = NewDirectory(); Random random = Random(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); LogMergePolicy lmp = NewLogMergePolicy(); lmp.MergeFactor = 3; // merge often conf.SetMergePolicy(lmp); IndexWriter writer = new IndexWriter(dir, conf); bool isNRT = random.NextBoolean(); DirectoryReader reader; if (isNRT) { reader = DirectoryReader.Open(writer, true); } else { writer.Commit(); reader = DirectoryReader.Open(dir); } int numFields = random.Next(4) + 3; // 3-7 int numNDVFields = random.Next(numFields / 2) + 1; // 1-3 long[] fieldValues = new long[numFields]; bool[] fieldHasValue = new bool[numFields]; Arrays.Fill(fieldHasValue, true); for (int i = 0; i < fieldValues.Length; i++) { fieldValues[i] = 1; } int numRounds = AtLeast(15); int docID = 0; for (int i = 0; i < numRounds; i++) { int numDocs = AtLeast(5); // System.out.println("[" + Thread.currentThread().getName() + "]: round=" + i + ", numDocs=" + numDocs); for (int j = 0; j < numDocs; j++) { Document doc = new Document(); doc.Add(new StringField("id", "doc-" + docID, Store.NO)); doc.Add(new StringField("key", "all", Store.NO)); // update key // add all fields with their current value for (int f = 0; f < fieldValues.Length; f++) { if (f < numNDVFields) { doc.Add(new NumericDocValuesField("f" + f, fieldValues[f])); } else { doc.Add(new BinaryDocValuesField("f" + f, TestBinaryDocValuesUpdates.ToBytes(fieldValues[f]))); } } writer.AddDocument(doc); ++docID; } // if field's value was unset before, unset it from all new added documents too for (int field = 0; field < fieldHasValue.Length; field++) { if (!fieldHasValue[field]) { if (field < numNDVFields) { writer.UpdateNumericDocValue(new Term("key", "all"), "f" + field, null); } else { writer.UpdateBinaryDocValue(new Term("key", "all"), "f" + field, null); } } } int fieldIdx = random.Next(fieldValues.Length); string updateField = "f" + fieldIdx; if (random.NextBoolean()) { // System.out.println("[" + Thread.currentThread().getName() + "]: unset field '" + updateField + "'"); fieldHasValue[fieldIdx] = false; if (fieldIdx < numNDVFields) { writer.UpdateNumericDocValue(new Term("key", "all"), updateField, null); } else { writer.UpdateBinaryDocValue(new Term("key", "all"), updateField, null); } } else { fieldHasValue[fieldIdx] = true; if (fieldIdx < numNDVFields) { writer.UpdateNumericDocValue(new Term("key", "all"), updateField, ++fieldValues[fieldIdx]); } else { writer.UpdateBinaryDocValue(new Term("key", "all"), updateField, TestBinaryDocValuesUpdates.ToBytes(++fieldValues[fieldIdx])); } // System.out.println("[" + Thread.currentThread().getName() + "]: updated field '" + updateField + "' to value " + fieldValues[fieldIdx]); } if (random.NextDouble() < 0.2) { int deleteDoc = random.Next(docID); // might also delete an already deleted document, ok! writer.DeleteDocuments(new Term("id", "doc-" + deleteDoc)); // System.out.println("[" + Thread.currentThread().getName() + "]: deleted document: doc-" + deleteDoc); } // verify reader if (!isNRT) { writer.Commit(); } // System.out.println("[" + Thread.currentThread().getName() + "]: reopen reader: " + reader); DirectoryReader newReader = DirectoryReader.OpenIfChanged(reader); Assert.IsNotNull(newReader); reader.Dispose(); reader = newReader; // System.out.println("[" + Thread.currentThread().getName() + "]: reopened reader: " + reader); Assert.IsTrue(reader.NumDocs > 0); // we delete at most one document per round BytesRef scratch = new BytesRef(); foreach (AtomicReaderContext context in reader.Leaves) { AtomicReader r = context.AtomicReader; // System.out.println(((SegmentReader) r).getSegmentName()); IBits liveDocs = r.LiveDocs; for (int field = 0; field < fieldValues.Length; field++) { string f = "f" + field; BinaryDocValues bdv = r.GetBinaryDocValues(f); NumericDocValues ndv = r.GetNumericDocValues(f); IBits docsWithField = r.GetDocsWithField(f); if (field < numNDVFields) { Assert.IsNotNull(ndv); Assert.IsNull(bdv); } else { Assert.IsNull(ndv); Assert.IsNotNull(bdv); } int maxDoc = r.MaxDoc; for (int doc = 0; doc < maxDoc; doc++) { if (liveDocs == null || liveDocs.Get(doc)) { // System.out.println("doc=" + (doc + context.DocBase) + " f='" + f + "' vslue=" + getValue(bdv, doc, scratch)); if (fieldHasValue[field]) { Assert.IsTrue(docsWithField.Get(doc)); if (field < numNDVFields) { Assert.AreEqual(fieldValues[field], ndv.Get(doc), "invalid value for doc=" + doc + ", field=" + f + ", reader=" + r); } else { Assert.AreEqual(fieldValues[field], TestBinaryDocValuesUpdates.GetValue(bdv, doc, scratch), "invalid value for doc=" + doc + ", field=" + f + ", reader=" + r); } } else { Assert.IsFalse(docsWithField.Get(doc)); } } } } } // System.out.println(); } IOUtils.Dispose(writer, reader, dir); }
public virtual void TestFieldNumberGaps() { int numIters = AtLeast(13); for (int i = 0; i < numIters; i++) { Directory dir = NewDirectory(); { IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.NO_COMPOUND_FILES)); Document d = new Document(); d.Add(new TextField("f1", "d1 first field", Field.Store.YES)); d.Add(new TextField("f2", "d1 second field", Field.Store.YES)); writer.AddDocument(d); writer.Dispose(); SegmentInfos sis = new SegmentInfos(); sis.Read(dir); Assert.AreEqual(1, sis.Count); FieldInfos fis1 = SegmentReader.ReadFieldInfos(sis.Info(0)); Assert.AreEqual("f1", fis1.FieldInfo(0).Name); Assert.AreEqual("f2", fis1.FieldInfo(1).Name); } { IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(Random().NextBoolean() ? NoMergePolicy.NO_COMPOUND_FILES : NoMergePolicy.COMPOUND_FILES)); Document d = new Document(); d.Add(new TextField("f1", "d2 first field", Field.Store.YES)); d.Add(new StoredField("f3", new byte[] { 1, 2, 3 })); writer.AddDocument(d); writer.Dispose(); SegmentInfos sis = new SegmentInfos(); sis.Read(dir); Assert.AreEqual(2, sis.Count); FieldInfos fis1 = SegmentReader.ReadFieldInfos(sis.Info(0)); FieldInfos fis2 = SegmentReader.ReadFieldInfos(sis.Info(1)); Assert.AreEqual("f1", fis1.FieldInfo(0).Name); Assert.AreEqual("f2", fis1.FieldInfo(1).Name); Assert.AreEqual("f1", fis2.FieldInfo(0).Name); Assert.IsNull(fis2.FieldInfo(1)); Assert.AreEqual("f3", fis2.FieldInfo(2).Name); } { IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(Random().NextBoolean() ? NoMergePolicy.NO_COMPOUND_FILES : NoMergePolicy.COMPOUND_FILES)); Document d = new Document(); d.Add(new TextField("f1", "d3 first field", Field.Store.YES)); d.Add(new TextField("f2", "d3 second field", Field.Store.YES)); d.Add(new StoredField("f3", new byte[] { 1, 2, 3, 4, 5 })); writer.AddDocument(d); writer.Dispose(); SegmentInfos sis = new SegmentInfos(); sis.Read(dir); Assert.AreEqual(3, sis.Count); FieldInfos fis1 = SegmentReader.ReadFieldInfos(sis.Info(0)); FieldInfos fis2 = SegmentReader.ReadFieldInfos(sis.Info(1)); FieldInfos fis3 = SegmentReader.ReadFieldInfos(sis.Info(2)); Assert.AreEqual("f1", fis1.FieldInfo(0).Name); Assert.AreEqual("f2", fis1.FieldInfo(1).Name); Assert.AreEqual("f1", fis2.FieldInfo(0).Name); Assert.IsNull(fis2.FieldInfo(1)); Assert.AreEqual("f3", fis2.FieldInfo(2).Name); Assert.AreEqual("f1", fis3.FieldInfo(0).Name); Assert.AreEqual("f2", fis3.FieldInfo(1).Name); Assert.AreEqual("f3", fis3.FieldInfo(2).Name); } { IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(Random().NextBoolean() ? NoMergePolicy.NO_COMPOUND_FILES : NoMergePolicy.COMPOUND_FILES)); writer.DeleteDocuments(new Term("f1", "d1")); // nuke the first segment entirely so that the segment with gaps is // loaded first! writer.ForceMergeDeletes(); writer.Dispose(); } IndexWriter writer_ = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(new LogByteSizeMergePolicy()).SetInfoStream(new FailOnNonBulkMergesInfoStream())); writer_.ForceMerge(1); writer_.Dispose(); SegmentInfos sis_ = new SegmentInfos(); sis_.Read(dir); Assert.AreEqual(1, sis_.Count); FieldInfos fis1_ = SegmentReader.ReadFieldInfos(sis_.Info(0)); Assert.AreEqual("f1", fis1_.FieldInfo(0).Name); Assert.AreEqual("f2", fis1_.FieldInfo(1).Name); Assert.AreEqual("f3", fis1_.FieldInfo(2).Name); dir.Dispose(); } }
/// <summary> /// Calls <see cref="IndexWriter.DeleteDocuments(Term)"/> and /// returns the generation that reflects this change. /// </summary> public virtual long DeleteDocuments(Term t) { writer.DeleteDocuments(t); // Return gen as of when indexing finished: return(indexingGen.Get()); }
public virtual void TestRandom() { int num = AtLeast(2); for (int iter = 0; iter < num; iter++) { if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter); } Directory dir = NewDirectory(); IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES)); // we can do this because we use NoMergePolicy (and dont merge to "nothing") w.KeepFullyDeletedSegments = true; IDictionary <BytesRef, IList <int?> > docs = new Dictionary <BytesRef, IList <int?> >(); HashSet <int?> deleted = new HashSet <int?>(); IList <BytesRef> terms = new List <BytesRef>(); int numDocs = TestUtil.NextInt(Random(), 1, 100 * RANDOM_MULTIPLIER); Documents.Document doc = new Documents.Document(); Field f = NewStringField("field", "", Field.Store.NO); doc.Add(f); Field id = NewStringField("id", "", Field.Store.NO); doc.Add(id); bool onlyUniqueTerms = Random().NextBoolean(); if (VERBOSE) { Console.WriteLine("TEST: onlyUniqueTerms=" + onlyUniqueTerms + " numDocs=" + numDocs); } HashSet <BytesRef> uniqueTerms = new HashSet <BytesRef>(); for (int i = 0; i < numDocs; i++) { if (!onlyUniqueTerms && Random().NextBoolean() && terms.Count > 0) { // re-use existing term BytesRef term = terms[Random().Next(terms.Count)]; docs[term].Add(i); f.StringValue = term.Utf8ToString(); } else { string s = TestUtil.RandomUnicodeString(Random(), 10); BytesRef term = new BytesRef(s); if (!docs.ContainsKey(term)) { docs[term] = new List <int?>(); } docs[term].Add(i); terms.Add(term); uniqueTerms.Add(term); f.StringValue = s; } id.StringValue = "" + i; w.AddDocument(doc); if (Random().Next(4) == 1) { w.Commit(); } if (i > 0 && Random().Next(20) == 1) { int delID = Random().Next(i); deleted.Add(delID); w.DeleteDocuments(new Term("id", "" + delID)); if (VERBOSE) { Console.WriteLine("TEST: delete " + delID); } } } if (VERBOSE) { List <BytesRef> termsList = new List <BytesRef>(uniqueTerms); termsList.Sort(BytesRef.UTF8SortedAsUTF16Comparer); Console.WriteLine("TEST: terms in UTF16 order:"); foreach (BytesRef b in termsList) { Console.WriteLine(" " + UnicodeUtil.ToHexString(b.Utf8ToString()) + " " + b); foreach (int docID in docs[b]) { if (deleted.Contains(docID)) { Console.WriteLine(" " + docID + " (deleted)"); } else { Console.WriteLine(" " + docID); } } } } IndexReader reader = w.Reader; w.Dispose(); if (VERBOSE) { Console.WriteLine("TEST: reader=" + reader); } Bits liveDocs = MultiFields.GetLiveDocs(reader); foreach (int delDoc in deleted) { Assert.IsFalse(liveDocs.Get(delDoc)); } for (int i = 0; i < 100; i++) { BytesRef term = terms[Random().Next(terms.Count)]; if (VERBOSE) { Console.WriteLine("TEST: seek term=" + UnicodeUtil.ToHexString(term.Utf8ToString()) + " " + term); } DocsEnum docsEnum = TestUtil.Docs(Random(), reader, "field", term, liveDocs, null, DocsEnum.FLAG_NONE); Assert.IsNotNull(docsEnum); foreach (int docID in docs[term]) { if (!deleted.Contains(docID)) { Assert.AreEqual(docID, docsEnum.NextDoc()); } } Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, docsEnum.NextDoc()); } reader.Dispose(); dir.Dispose(); } }
public virtual void DeleteDocuments(Term term) => IndexWriter.DeleteDocuments(term);
public virtual void TestEmptyIndexWithVectors() { Directory rd1 = NewDirectory(); { if (VERBOSE) { Console.WriteLine("\nTEST: make 1st writer"); } IndexWriter iw = new IndexWriter(rd1, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); Document doc = new Document(); Field idField = NewTextField("id", "", Field.Store.NO); doc.Add(idField); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.StoreTermVectors = true; doc.Add(NewField("test", "", customType)); idField.StringValue = "1"; iw.AddDocument(doc); doc.Add(NewTextField("test", "", Field.Store.NO)); idField.StringValue = "2"; iw.AddDocument(doc); iw.Dispose(); IndexWriterConfig dontMergeConfig = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetMergePolicy(NoMergePolicy.COMPOUND_FILES); if (VERBOSE) { Console.WriteLine("\nTEST: make 2nd writer"); } IndexWriter writer = new IndexWriter(rd1, dontMergeConfig); writer.DeleteDocuments(new Term("id", "1")); writer.Dispose(); IndexReader ir = DirectoryReader.Open(rd1); Assert.AreEqual(2, ir.MaxDoc); Assert.AreEqual(1, ir.NumDocs); ir.Dispose(); iw = new IndexWriter(rd1, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode_e.APPEND)); iw.ForceMerge(1); iw.Dispose(); } Directory rd2 = NewDirectory(); { IndexWriter iw = new IndexWriter(rd2, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); Document doc = new Document(); iw.AddDocument(doc); iw.Dispose(); } Directory rdOut = NewDirectory(); IndexWriter iwOut = new IndexWriter(rdOut, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); DirectoryReader reader1, reader2; ParallelAtomicReader pr = new ParallelAtomicReader(SlowCompositeReaderWrapper.Wrap(reader1 = DirectoryReader.Open(rd1)), SlowCompositeReaderWrapper.Wrap(reader2 = DirectoryReader.Open(rd2))); // When unpatched, Lucene crashes here with an ArrayIndexOutOfBoundsException (caused by TermVectorsWriter) iwOut.AddIndexes(pr); // ParallelReader closes any IndexReader you added to it: pr.Dispose(); // assert subreaders were closed Assert.AreEqual(0, reader1.RefCount); Assert.AreEqual(0, reader2.RefCount); rd1.Dispose(); rd2.Dispose(); iwOut.ForceMerge(1); iwOut.Dispose(); rdOut.Dispose(); }