public override void SetUp() { base.SetUp(); base.SetUp(); Lucene.Net.Documents.Document doc; RAMDirectory rd1 = new RAMDirectory(); IndexWriter iw1 = new IndexWriter(rd1, new SimpleAnalyzer(), true); doc = new Lucene.Net.Documents.Document(); doc.Add(new Field("field1", "the quick brown fox jumps", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.TOKENIZED)); doc.Add(new Field("field2", "the quick brown fox jumps", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.TOKENIZED)); doc.Add(new Field("field4", "", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.TOKENIZED)); iw1.AddDocument(doc); iw1.Close(); RAMDirectory rd2 = new RAMDirectory(); IndexWriter iw2 = new IndexWriter(rd2, new SimpleAnalyzer(), true); doc = new Lucene.Net.Documents.Document(); doc.Add(new Field("field0", "", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.TOKENIZED)); doc.Add(new Field("field1", "the fox jumps over the lazy dog", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.TOKENIZED)); doc.Add(new Field("field3", "the fox jumps over the lazy dog", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.TOKENIZED)); iw2.AddDocument(doc); iw2.Close(); this.ir1 = IndexReader.Open(rd1); this.ir2 = IndexReader.Open(rd2); }
public override void SetUp() { directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); doc.Add(new Field("field", "one two three four five", Field.Store.YES, Field.Index.TOKENIZED)); doc.Add(new Field("sorter", "b", Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); doc = new Lucene.Net.Documents.Document(); doc.Add(new Field("field", "one two three four", Field.Store.YES, Field.Index.TOKENIZED)); doc.Add(new Field("sorter", "d", Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); doc = new Lucene.Net.Documents.Document(); doc.Add(new Field("field", "one two three y", Field.Store.YES, Field.Index.TOKENIZED)); doc.Add(new Field("sorter", "a", Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); doc = new Lucene.Net.Documents.Document(); doc.Add(new Field("field", "one two x", Field.Store.YES, Field.Index.TOKENIZED)); doc.Add(new Field("sorter", "c", Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); writer.Optimize(); writer.Close(); searcher = new IndexSearcher(directory); query = new TermQuery(new Term("field", "three")); filter = new AnonymousClassFilter(); }
public virtual void TestBefore() { // create an index RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true); long now = (long) (DateTime.UtcNow - new DateTime(1970, 1, 1)).TotalMilliseconds; Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); // add time that is in the past doc.Add(new Field("datefield", Lucene.Net.Documents.DateTools.TimeToString(now - 1000 * 100000, Lucene.Net.Documents.DateTools.Resolution.MILLISECOND), Field.Store.YES, Field.Index.UN_TOKENIZED)); doc.Add(new Field("body", "Today is a very sunny day in New York City", Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(indexStore); // filter that should preserve matches //DateFilter df1 = DateFilter.Before("datefield", now); RangeFilter df1 = new RangeFilter("datefield", Lucene.Net.Documents.DateTools.TimeToString(now - 2000 * 100000, Lucene.Net.Documents.DateTools.Resolution.MILLISECOND), Lucene.Net.Documents.DateTools.TimeToString(now, Lucene.Net.Documents.DateTools.Resolution.MILLISECOND), false, true); // filter that should discard matches //DateFilter df2 = DateFilter.Before("datefield", now - 999999); RangeFilter df2 = new RangeFilter("datefield", Lucene.Net.Documents.DateTools.TimeToString(0, Lucene.Net.Documents.DateTools.Resolution.MILLISECOND), Lucene.Net.Documents.DateTools.TimeToString(now - 2000 * 100000, Lucene.Net.Documents.DateTools.Resolution.MILLISECOND), true, false); // search something that doesn't exist with DateFilter Query query1 = new TermQuery(new Term("body", "NoMatchForThis")); // search for something that does exists Query query2 = new TermQuery(new Term("body", "sunny")); Hits result; // ensure that queries return expected results without DateFilter first result = searcher.Search(query1); Assert.AreEqual(0, result.Length()); result = searcher.Search(query2); Assert.AreEqual(1, result.Length()); // run queries with DateFilter result = searcher.Search(query1, df1); Assert.AreEqual(0, result.Length()); result = searcher.Search(query1, df2); Assert.AreEqual(0, result.Length()); result = searcher.Search(query2, df1); Assert.AreEqual(1, result.Length()); result = searcher.Search(query2, df2); Assert.AreEqual(0, result.Length()); }
public virtual void TestMethod() { RAMDirectory directory = new RAMDirectory(); System.String[] values = new System.String[]{"1", "2", "3", "4"}; try { IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); for (int i = 0; i < values.Length; i++) { Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); doc.Add(new Field(FIELD, values[i], Field.Store.YES, Field.Index.UN_TOKENIZED)); writer.AddDocument(doc); } writer.Close(); BooleanQuery booleanQuery1 = new BooleanQuery(); booleanQuery1.Add(new TermQuery(new Term(FIELD, "1")), BooleanClause.Occur.SHOULD); booleanQuery1.Add(new TermQuery(new Term(FIELD, "2")), BooleanClause.Occur.SHOULD); BooleanQuery query = new BooleanQuery(); query.Add(booleanQuery1, BooleanClause.Occur.MUST); query.Add(new TermQuery(new Term(FIELD, "9")), BooleanClause.Occur.MUST_NOT); IndexSearcher indexSearcher = new IndexSearcher(directory); Hits hits = indexSearcher.Search(query); Assert.AreEqual(2, hits.Length(), "Number of matched documents"); } catch (System.IO.IOException e) { Assert.Fail(e.Message); } }
public override void SetUp() { base.SetUp(); IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true); //writer.setUseCompoundFile(true); //writer.infoStream = System.out; for (int i = 0; i < 1000; i++) { Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); Field.TermVector termVector; int mod3 = i % 3; int mod2 = i % 2; if (mod2 == 0 && mod3 == 0) { termVector = Field.TermVector.WITH_POSITIONS_OFFSETS; } else if (mod2 == 0) { termVector = Field.TermVector.WITH_POSITIONS; } else if (mod3 == 0) { termVector = Field.TermVector.WITH_OFFSETS; } else { termVector = Field.TermVector.YES; } doc.Add(new Field("field", English.IntToEnglish(i), Field.Store.YES, Field.Index.TOKENIZED, termVector)); writer.AddDocument(doc); } writer.Close(); searcher = new IndexSearcher(directory); }
public virtual void TestPhrasePrefix() { RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true); Lucene.Net.Documents.Document doc1 = new Lucene.Net.Documents.Document(); Lucene.Net.Documents.Document doc2 = new Lucene.Net.Documents.Document(); Lucene.Net.Documents.Document doc3 = new Lucene.Net.Documents.Document(); Lucene.Net.Documents.Document doc4 = new Lucene.Net.Documents.Document(); Lucene.Net.Documents.Document doc5 = new Lucene.Net.Documents.Document(); doc1.Add(new Field("body", "blueberry pie", Field.Store.YES, Field.Index.TOKENIZED)); doc2.Add(new Field("body", "blueberry strudel", Field.Store.YES, Field.Index.TOKENIZED)); doc3.Add(new Field("body", "blueberry pizza", Field.Store.YES, Field.Index.TOKENIZED)); doc4.Add(new Field("body", "blueberry chewing gum", Field.Store.YES, Field.Index.TOKENIZED)); doc5.Add(new Field("body", "piccadilly circus", Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc1); writer.AddDocument(doc2); writer.AddDocument(doc3); writer.AddDocument(doc4); writer.AddDocument(doc5); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(indexStore); //PhrasePrefixQuery query1 = new PhrasePrefixQuery(); MultiPhraseQuery query1 = new MultiPhraseQuery(); //PhrasePrefixQuery query2 = new PhrasePrefixQuery(); MultiPhraseQuery query2 = new MultiPhraseQuery(); query1.Add(new Term("body", "blueberry")); query2.Add(new Term("body", "strawberry")); System.Collections.ArrayList termsWithPrefix = new System.Collections.ArrayList(); IndexReader ir = IndexReader.Open(indexStore); // this TermEnum gives "piccadilly", "pie" and "pizza". System.String prefix = "pi"; TermEnum te = ir.Terms(new Term("body", prefix + "*")); do { if (te.Term().Text().StartsWith(prefix)) { termsWithPrefix.Add(te.Term()); } } while (te.Next()); query1.Add((Term[]) termsWithPrefix.ToArray(typeof(Term))); query2.Add((Term[]) termsWithPrefix.ToArray(typeof(Term))); Hits result; result = searcher.Search(query1); Assert.AreEqual(2, result.Length()); result = searcher.Search(query2); Assert.AreEqual(0, result.Length()); }
public virtual void TestMethod() { RAMDirectory directory = new RAMDirectory(); System.String[] categories = new System.String[]{"food", "foodanddrink", "foodanddrinkandgoodtimes", "food and drink"}; Query rw1 = null; Query rw2 = null; try { IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); for (int i = 0; i < categories.Length; i++) { Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); doc.Add(new Field("category", categories[i], Field.Store.YES, Field.Index.UN_TOKENIZED)); writer.AddDocument(doc); } writer.Close(); IndexReader reader = IndexReader.Open(directory); PrefixQuery query = new PrefixQuery(new Term("category", "foo")); rw1 = query.Rewrite(reader); BooleanQuery bq = new BooleanQuery(); bq.Add(query, BooleanClause.Occur.MUST); rw2 = bq.Rewrite(reader); } catch (System.IO.IOException e) { Assert.Fail(e.Message); } BooleanQuery bq1 = null; if (rw1 is BooleanQuery) { bq1 = (BooleanQuery) rw1; } BooleanQuery bq2 = null; if (rw2 is BooleanQuery) { bq2 = (BooleanQuery) rw2; } else { Assert.Fail("Rewrite"); } Assert.AreEqual(bq1.GetClauses().Length, bq2.GetClauses().Length, "Number of Clauses Mismatch"); }
public override void SetUp() { base.SetUp(); RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); for (int i = 0; i < docFields.Length; i++) { Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); doc.Add(new Field(field, docFields[i], Field.Store.NO, Field.Index.TOKENIZED)); writer.AddDocument(doc); } writer.Close(); searcher = new IndexSearcher(directory); }
public override void SetUp() { base.SetUp(); RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true); //writer.infoStream = System.out; for (int i = 0; i < 1000; i++) { Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); doc.Add(new Field("field", English.IntToEnglish(i), Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); } writer.Close(); searcher = new IndexSearcher(directory); }
public virtual void TestNot_Renamed_Method() { RAMDirectory store = new RAMDirectory(); IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true); Lucene.Net.Documents.Document d1 = new Lucene.Net.Documents.Document(); d1.Add(new Field("field", "a b", Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(d1); writer.Optimize(); writer.Close(); Searcher searcher = new IndexSearcher(store); Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser("field", new SimpleAnalyzer()); Lucene.Net.Search.Query query = parser.Parse("a NOT b"); //System.out.println(query); Hits hits = searcher.Search(query); Assert.AreEqual(0, hits.Length()); }
public virtual void TestIterator() { RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); doc.Add(new Field("field", "iterator test doc 1", Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); doc = new Lucene.Net.Documents.Document(); doc.Add(new Field("field", "iterator test doc 2", Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); writer.Close(); IndexSearcher searcher = new IndexSearcher(directory); Hits hits = searcher.Search(new TermQuery(new Term("field", "iterator"))); HitIterator iterator = (HitIterator) hits.Iterator(); Assert.AreEqual(2, iterator.Length()); Assert.IsTrue(iterator.MoveNext()); Hit hit = (Hit) iterator.Current; Assert.AreEqual("iterator test doc 1", hit.Get("field")); Assert.IsTrue(iterator.MoveNext()); hit = (Hit) iterator.Current; Assert.AreEqual("iterator test doc 2", hit.GetDocument().Get("field")); Assert.IsFalse(iterator.MoveNext()); bool caughtException = false; try { System.Object generatedAux = iterator.Current; } catch (System.ArgumentOutOfRangeException) { Assert.IsTrue(true); caughtException = true; } Assert.IsTrue(caughtException); }
private void CreateIndex(int numHits) { int numDocs = 500; Directory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); writer.SetMaxBufferedDocs(10); for (int i = 0; i < numDocs; i++) { Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); System.String content; if (i % (numDocs / numHits) == 0) { // add a document that matches the query "term1 term2" content = this.term1 + " " + this.term2; } else if (i % 15 == 0) { // add a document that only contains term1 content = this.term1 + " " + this.term1; } else { // add a document that contains term2 but not term 1 content = this.term3 + " " + this.term2; } doc.Add(new Field(this.field, content, Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); } // make sure the index has only a single segment writer.Optimize(); writer.Close(); // the index is a single segment, thus IndexReader.open() returns an instance of SegmentReader SegmentReader reader = (SegmentReader) IndexReader.Open(directory); // we decorate the proxStream with a wrapper class that allows to count the number of calls of seek() reader.ProxStream_ForNUnitTest = new SeeksCountingStream(this, reader.ProxStream_ForNUnitTest); this.searcher = new IndexSearcher(reader); }
override public void Run() { for (int j = 1; j < Enclosing_Instance.docsPerThread; j++) { Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); doc.Add(new Field("sizeContent", English.IntToEnglish(num * Enclosing_Instance.docsPerThread + j).Trim(), Field.Store.YES, Field.Index.UN_TOKENIZED)); try { writer.AddDocument(doc); } catch (System.IO.IOException e) { throw new System.SystemException("", e); } lock (ramDir) { Assert.AreEqual(ramDir.SizeInBytes(), ramDir.GetRecomputedSizeInBytes()); } } }
public virtual void TestPrefixQuery_Renamed_Method() { RAMDirectory directory = new RAMDirectory(); System.String[] categories = new System.String[]{"/Computers", "/Computers/Mac", "/Computers/Windows"}; IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); for (int i = 0; i < categories.Length; i++) { Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); doc.Add(new Field("category", categories[i], Field.Store.YES, Field.Index.UN_TOKENIZED)); writer.AddDocument(doc); } writer.Close(); PrefixQuery query = new PrefixQuery(new Term("category", "/Computers")); IndexSearcher searcher = new IndexSearcher(directory); Hits hits = searcher.Search(query); Assert.AreEqual(3, hits.Length(), "All documents in /Computers category and below"); query = new PrefixQuery(new Term("category", "/Computers/Mac")); hits = searcher.Search(query); Assert.AreEqual(1, hits.Length(), "One in /Computers/Mac"); }
private void AddDocumentWithDifferentFields(IndexWriter writer) { Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); doc.Add(new Field("keyword2", "test1", Field.Store.YES, Field.Index.UN_TOKENIZED)); doc.Add(new Field("text2", "test1", Field.Store.YES, Field.Index.TOKENIZED)); doc.Add(new Field("unindexed2", "test1", Field.Store.YES, Field.Index.NO)); doc.Add(new Field("unstored2", "test1", Field.Store.NO, Field.Index.TOKENIZED)); writer.AddDocument(doc); }
private void AddDoc(IndexWriter writer, System.String value_Renamed) { Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); doc.Add(new Field("content", value_Renamed, Field.Store.NO, Field.Index.TOKENIZED)); writer.AddDocument(doc); }
private void AddDoc(System.String text, IndexWriter iw) { Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); doc.Add(new Field("key", text, Field.Store.YES, Field.Index.TOKENIZED)); iw.AddDocument(doc); }
internal virtual void AddDocs(Directory dir, int ndocs, System.String field, System.String val, int maxTF, float percentDocs) { System.Random random = new System.Random((System.Int32) 0); RepeatingTokenStream ts = new RepeatingTokenStream(val); Analyzer analyzer = new AnonymousClassAnalyzer(random, percentDocs, ts, maxTF, this); Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); doc.Add(new Field(field, val, Field.Store.NO, Field.Index.NO_NORMS)); IndexWriter writer = new IndexWriter(dir, analyzer, true); writer.SetMaxBufferedDocs(100); writer.SetMergeFactor(100); for (int i = 0; i < ndocs; i++) { writer.AddDocument(doc); } writer.Optimize(); writer.Close(); }
private void Add(System.String value_Renamed, IndexWriter iw) { Lucene.Net.Documents.Document d = new Lucene.Net.Documents.Document(); d.Add(new Field(FIELD_NAME, value_Renamed, Field.Store.YES, Field.Index.TOKENIZED)); iw.AddDocument(d); }
private void AddDoc(IndexWriter writer, int id) { Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); doc.Add(new Field("content", "aaa", Field.Store.NO, Field.Index.TOKENIZED)); doc.Add(new Field("id", System.Convert.ToString(id), Field.Store.YES, Field.Index.UN_TOKENIZED)); writer.AddDocument(doc); }
private void AddDoc(IndexWriter writer) { Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); doc.Add(new Field("content", "aaa", Field.Store.NO, Field.Index.TOKENIZED)); writer.AddDocument(doc); }
private void Build() { try { /* build an index */ IndexWriter writer = new IndexWriter(index, new SimpleAnalyzer(), T); for (int d = minId; d <= maxId; d++) { Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); doc.Add(new Field("id", Pad(d), Field.Store.YES, Field.Index.UN_TOKENIZED)); int r = rand.Next(); if (maxR < r) { maxR = r; } if (r < minR) { minR = r; } doc.Add(new Field("rand", Pad(r), Field.Store.YES, Field.Index.UN_TOKENIZED)); doc.Add(new Field("body", "body", Field.Store.YES, Field.Index.UN_TOKENIZED)); writer.AddDocument(doc); } writer.Optimize(); writer.Close(); } catch (System.Exception e) { throw new System.Exception("can't build index", e); } }
public override void SetUp() { base.SetUp(); index = new RAMDirectory(); IndexWriter writer = new IndexWriter(index, new WhitespaceAnalyzer(), true); writer.SetSimilarity(sim); // hed is the most important field, dek is secondary // d1 is an "ok" match for: albino elephant { Lucene.Net.Documents.Document d1 = new Lucene.Net.Documents.Document(); d1.Add(new Field("id", "d1", Field.Store.YES, Field.Index.UN_TOKENIZED)); //Field.Keyword("id", "d1")); d1.Add(new Field("hed", "elephant", Field.Store.YES, Field.Index.TOKENIZED)); //Field.Text("hed", "elephant")); d1.Add(new Field("dek", "elephant", Field.Store.YES, Field.Index.TOKENIZED)); //Field.Text("dek", "elephant")); writer.AddDocument(d1); } // d2 is a "good" match for: albino elephant { Lucene.Net.Documents.Document d2 = new Lucene.Net.Documents.Document(); d2.Add(new Field("id", "d2", Field.Store.YES, Field.Index.UN_TOKENIZED)); //Field.Keyword("id", "d2")); d2.Add(new Field("hed", "elephant", Field.Store.YES, Field.Index.TOKENIZED)); //Field.Text("hed", "elephant")); d2.Add(new Field("dek", "albino", Field.Store.YES, Field.Index.TOKENIZED)); //Field.Text("dek", "albino")); d2.Add(new Field("dek", "elephant", Field.Store.YES, Field.Index.TOKENIZED)); //Field.Text("dek", "elephant")); writer.AddDocument(d2); } // d3 is a "better" match for: albino elephant { Lucene.Net.Documents.Document d3 = new Lucene.Net.Documents.Document(); d3.Add(new Field("id", "d3", Field.Store.YES, Field.Index.UN_TOKENIZED)); //Field.Keyword("id", "d3")); d3.Add(new Field("hed", "albino", Field.Store.YES, Field.Index.TOKENIZED)); //Field.Text("hed", "albino")); d3.Add(new Field("hed", "elephant", Field.Store.YES, Field.Index.TOKENIZED)); //Field.Text("hed", "elephant")); writer.AddDocument(d3); } // d4 is the "best" match for: albino elephant { Lucene.Net.Documents.Document d4 = new Lucene.Net.Documents.Document(); d4.Add(new Field("id", "d4", Field.Store.YES, Field.Index.UN_TOKENIZED)); //Field.Keyword("id", "d4")); d4.Add(new Field("hed", "albino", Field.Store.YES, Field.Index.TOKENIZED)); //Field.Text("hed", "albino")); d4.Add(new Field("hed", "elephant", Field.Store.YES, Field.Index.TOKENIZED)); //Field.Text("hed", "elephant")); d4.Add(new Field("dek", "albino", Field.Store.YES, Field.Index.TOKENIZED)); //Field.Text("dek", "albino")); writer.AddDocument(d4); } writer.Close(); r = IndexReader.Open(index); s = new IndexSearcher(r); s.SetSimilarity(sim); }
public override void SetUp() { //base.SetUp(); base.SetUp(); // RAMDirectory rd = new RAMDirectory(); // IndexWriter writer = new IndexWriter(rd, new StandardAnalyzer(), true); // Lucene.Net.Documents.Document d = new Lucene.Net.Documents.Document(); d.Add(new Field(FIELD_T, "Optimize not deleting all files", Field.Store.YES, Field.Index.TOKENIZED)); d.Add(new Field(FIELD_C, "Deleted When I run an optimize in our production environment.", Field.Store.YES, Field.Index.TOKENIZED)); // writer.AddDocument(d); writer.Close(); // searcher = new IndexSearcher(rd); }
// create an index of all the documents, or just the x, or just the y documents private Searcher GetIndex(bool even, bool odd) { RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true); for (int i = 0; i < data.Length; ++i) { if (((i % 2) == 0 && even) || ((i % 2) == 1 && odd)) { Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); doc.Add(new Field("tracer", data[i][0], Field.Store.YES, Field.Index.NO)); doc.Add(new Field("contents", data[i][1], Field.Store.NO, Field.Index.TOKENIZED)); if (data[i][2] != null) doc.Add(new Field("int", data[i][2], Field.Store.NO, Field.Index.UN_TOKENIZED)); if (data[i][3] != null) doc.Add(new Field("float", data[i][3], Field.Store.NO, Field.Index.UN_TOKENIZED)); if (data[i][4] != null) doc.Add(new Field("string", data[i][4], Field.Store.NO, Field.Index.UN_TOKENIZED)); if (data[i][5] != null) doc.Add(new Field("custom", data[i][5], Field.Store.NO, Field.Index.UN_TOKENIZED)); if (data[i][6] != null) doc.Add(new Field("i18n", data[i][6], Field.Store.NO, Field.Index.UN_TOKENIZED)); if (data[i][7] != null) doc.Add(new Field("long", data[i][7], Field.Store.NO, Field.Index.UN_TOKENIZED)); if (data[i][8] != null) doc.Add(new Field("double", data[i][8], Field.Store.NO, Field.Index.UN_TOKENIZED)); doc.SetBoost(2); // produce some scores above 1.0 writer.AddDocument(doc); } } writer.Optimize(); writer.Close(); return new IndexSearcher(indexStore); }
private void InsertDoc(IndexWriter writer, System.String content) { Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); doc.Add(new Field("id", "id" + docCount, Field.Store.YES, Field.Index.UN_TOKENIZED)); doc.Add(new Field("content", content, Field.Store.NO, Field.Index.TOKENIZED)); writer.AddDocument(doc); docCount++; }
public virtual void TestDiskFull() { bool debug = false; Term searchTerm = new Term("content", "aaa"); int START_COUNT = 157; int END_COUNT = 144; // First build up a starting index: RAMDirectory startDir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(startDir, new WhitespaceAnalyzer(), true); for (int i = 0; i < 157; i++) { Lucene.Net.Documents.Document d = new Lucene.Net.Documents.Document(); d.Add(new Field("id", System.Convert.ToString(i), Field.Store.YES, Field.Index.UN_TOKENIZED)); d.Add(new Field("content", "aaa " + i, Field.Store.NO, Field.Index.TOKENIZED)); writer.AddDocument(d); } writer.Close(); long diskUsage = startDir.SizeInBytes(); long diskFree = diskUsage + 100; System.IO.IOException err = null; bool done = false; // Iterate w/ ever increasing free disk space: while (!done) { MockRAMDirectory dir = new MockRAMDirectory(startDir); IndexReader reader = IndexReader.Open(dir); // For each disk size, first try to commit against // dir that will hit random IOExceptions & disk // full; after, give it infinite disk space & turn // off random IOExceptions & retry w/ same reader: bool success = false; for (int x = 0; x < 2; x++) { double rate = 0.05; double diskRatio = ((double) diskFree) / diskUsage; long thisDiskFree; System.String testName; if (0 == x) { thisDiskFree = diskFree; if (diskRatio >= 2.0) { rate /= 2; } if (diskRatio >= 4.0) { rate /= 2; } if (diskRatio >= 6.0) { rate = 0.0; } if (debug) { System.Console.Out.WriteLine("\ncycle: " + diskFree + " bytes"); } testName = "disk full during reader.Close() @ " + thisDiskFree + " bytes"; } else { thisDiskFree = 0; rate = 0.0; if (debug) { System.Console.Out.WriteLine("\ncycle: same writer: unlimited disk space"); } testName = "reader re-use after disk full"; } dir.SetMaxSizeInBytes(thisDiskFree); dir.SetRandomIOExceptionRate(rate, diskFree); try { if (0 == x) { int docId = 12; for (int i = 0; i < 13; i++) { reader.DeleteDocument(docId); reader.SetNorm(docId, "contents", (float) 2.0); docId += 12; } } reader.Close(); success = true; if (0 == x) { done = true; } } catch (System.IO.IOException e) { if (debug) { System.Console.Out.WriteLine(" hit IOException: " + e); } err = e; if (1 == x) { System.Console.Error.WriteLine(e.StackTrace); Assert.Fail(testName + " hit IOException after disk space was freed up"); } } // Whether we succeeded or failed, check that all // un-referenced files were in fact deleted (ie, // we did not create garbage). Just create a // new IndexFileDeleter, have it delete // unreferenced files, then verify that in fact // no files were deleted: System.String[] startFiles = dir.List(); SegmentInfos infos = new SegmentInfos(); infos.Read(dir); IndexFileDeleter d = new IndexFileDeleter(dir, new KeepOnlyLastCommitDeletionPolicy(), infos, null, null); System.String[] endFiles = dir.List(); System.Array.Sort(startFiles); System.Array.Sort(endFiles); //for(int i=0;i<startFiles.length;i++) { // System.out.println(" startFiles: " + i + ": " + startFiles[i]); //} if (SupportClass.Compare.CompareStringArrays(startFiles, endFiles) == false) { System.String successStr; if (success) { successStr = "success"; } else { successStr = "IOException"; System.Console.Error.WriteLine(err.StackTrace); } Assert.Fail("reader.Close() failed to delete unreferenced files after " + successStr + " (" + diskFree + " bytes): before delete:\n " + ArrayToString(startFiles) + "\n after delete:\n " + ArrayToString(endFiles)); } // Finally, verify index is not corrupt, and, if // we succeeded, we see all docs changed, and if // we failed, we see either all docs or no docs // changed (transactional semantics): IndexReader newReader = null; try { newReader = IndexReader.Open(dir); } catch (System.IO.IOException e) { System.Console.Error.WriteLine(e.StackTrace); Assert.Fail(testName + ":exception when creating IndexReader after disk full during Close: " + e); } /* int result = newReader.docFreq(searchTerm); if (success) { if (result != END_COUNT) { fail(testName + ": method did not throw exception but docFreq('aaa') is " + result + " instead of expected " + END_COUNT); } } else { // On hitting exception we still may have added // all docs: if (result != START_COUNT && result != END_COUNT) { err.printStackTrace(); fail(testName + ": method did throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT + " or " + END_COUNT); } } */ IndexSearcher searcher = new IndexSearcher(newReader); Hits hits = null; try { hits = searcher.Search(new TermQuery(searchTerm)); } catch (System.IO.IOException e) { System.Console.Error.WriteLine(e.StackTrace); Assert.Fail(testName + ": exception when searching: " + e); } int result2 = hits.Length(); if (success) { if (result2 != END_COUNT) { Assert.Fail(testName + ": method did not throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + END_COUNT); } } else { // On hitting exception we still may have added // all docs: if (result2 != START_COUNT && result2 != END_COUNT) { System.Console.Error.WriteLine(err.StackTrace); Assert.Fail(testName + ": method did throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + START_COUNT); } } searcher.Close(); newReader.Close(); if (result2 == END_COUNT) { break; } } dir.Close(); // Try again with 10 more bytes of free space: diskFree += 10; } startDir.Close(); }
private void AddDocumentWithTermVectorFields(IndexWriter writer) { Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); doc.Add(new Field("tvnot", "tvnot", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO)); doc.Add(new Field("termvector", "termvector", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.YES)); doc.Add(new Field("tvoffset", "tvoffset", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_OFFSETS)); doc.Add(new Field("tvposition", "tvposition", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS)); doc.Add(new Field("tvpositionoffset", "tvpositionoffset", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); writer.AddDocument(doc); }
public override void SetUp() { base.SetUp(); System.String tempDir = System.IO.Path.GetTempPath(); if (tempDir == null) throw new System.IO.IOException("java.io.tmpdir undefined, cannot run test"); indexDir = new System.IO.FileInfo(System.IO.Path.Combine(tempDir, "RAMDirIndex")); IndexWriter writer = new IndexWriter(indexDir, new WhitespaceAnalyzer(), true); // add some documents Lucene.Net.Documents.Document doc = null; for (int i = 0; i < docsToAdd; i++) { doc = new Lucene.Net.Documents.Document(); doc.Add(new Field("content", English.IntToEnglish(i).Trim(), Field.Store.YES, Field.Index.UN_TOKENIZED)); writer.AddDocument(doc); } Assert.AreEqual(docsToAdd, writer.DocCount()); writer.Close(); }
private void FillIndex(Directory dir, int start, int numDocs) { IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true); writer.SetMergeFactor(2); writer.SetMaxBufferedDocs(2); for (int i = start; i < (start + numDocs); i++) { Lucene.Net.Documents.Document temp = new Lucene.Net.Documents.Document(); temp.Add(new Field("count", ("" + i), Field.Store.YES, Field.Index.UN_TOKENIZED)); writer.AddDocument(temp); } writer.Close(); }