public virtual void TestDemo_Renamed_Method() { Analyzer analyzer = new StandardAnalyzer(); // Store the index in memory: Directory directory = new RAMDirectory(); // To store an index on disk, use this instead (note that the // parameter true will overwrite the index in that directory // if one exists): //Directory directory = FSDirectory.getDirectory("/tmp/testindex", true); IndexWriter iwriter = new IndexWriter(directory, analyzer, true); iwriter.SetMaxFieldLength(25000); Document doc = new Document(); System.String text = "This is the text to be indexed."; doc.Add(new Field("fieldname", text, Field.Store.YES, Field.Index.TOKENIZED)); iwriter.AddDocument(doc); iwriter.Close(); // Now search the index: IndexSearcher isearcher = new IndexSearcher(directory); // Parse a simple query that searches for "text": Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser("fieldname", analyzer); Query query = parser.Parse("text"); Hits hits = isearcher.Search(query); Assert.AreEqual(1, hits.Length()); // Iterate through the results: for (int i = 0; i < hits.Length(); i++) { Document hitDoc = hits.Doc(i); Assert.AreEqual("This is the text to be indexed.", hitDoc.Get("fieldname")); } isearcher.Close(); directory.Close(); }
internal virtual void BuildDir(Directory dir, int nDocs, int maxFields, int maxFieldLen) { IndexWriter iw = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); iw.SetMaxBufferedDocs(10); for (int j = 0; j < nDocs; j++) { Document d = new Document(); int nFields = r.Next(maxFields); for (int i = 0; i < nFields; i++) { int flen = r.Next(maxFieldLen); System.Text.StringBuilder sb = new System.Text.StringBuilder("^ "); while (sb.Length < flen) { sb.Append(' ').Append(words[r.Next(words.Length)]); } sb.Append(" $"); Field.Store store = Field.Store.YES; // make random later Field.Index index = Field.Index.ANALYZED; // make random later d.Add(new Field("f" + i, sb.ToString(), store, index)); } iw.AddDocument(d); } iw.Close(); }
public virtual void SetUp() { // Create an index writer. directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); // oldest doc: // Add the first document. text = "Document 1" dateTime = Oct 10 03:25:22 EDT 2007 writer.AddDocument(CreateDocument("Document 1", 633275835220000000L)); // Add the second document. text = "Document 2" dateTime = Oct 10 03:25:26 EDT 2007 writer.AddDocument(CreateDocument("Document 2", 633275835260000000L)); // Add the third document. text = "Document 3" dateTime = Oct 11 07:12:13 EDT 2007 writer.AddDocument(CreateDocument("Document 3", 633276835330000000L)); // Add the fourth document. text = "Document 4" dateTime = Oct 11 08:02:09 EDT 2007 writer.AddDocument(CreateDocument("Document 4", 633276865290000000L)); // latest doc: // Add the fifth document. text = "Document 5" dateTime = Oct 12 13:25:43 EDT 2007 writer.AddDocument(CreateDocument("Document 5", 633277923430000000L)); //// oldest doc: //// Add the first document. text = "Document 1" dateTime = Oct 10 03:25:22 EDT 2007 //writer.AddDocument(CreateDocument("Document 1", 1192001122000L)); //// Add the second document. text = "Document 2" dateTime = Oct 10 03:25:26 EDT 2007 //writer.AddDocument(CreateDocument("Document 2", 1192001126000L)); //// Add the third document. text = "Document 3" dateTime = Oct 11 07:12:13 EDT 2007 //writer.AddDocument(CreateDocument("Document 3", 1192101133000L)); //// Add the fourth document. text = "Document 4" dateTime = Oct 11 08:02:09 EDT 2007 //writer.AddDocument(CreateDocument("Document 4", 1192104129000L)); //// latest doc: //// Add the fifth document. text = "Document 5" dateTime = Oct 12 13:25:43 EDT 2007 //writer.AddDocument(CreateDocument("Document 5", 1192209943000L)); writer.Optimize(); writer.Close(); }
void Index() { Lucene.Net.Index.IndexWriter wr = new Lucene.Net.Index.IndexWriter(dir, new Lucene.Net.Analysis.WhitespaceAnalyzer(), Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED); Lucene.Net.Documents.Document doc = null; Lucene.Net.Documents.Field f = null; doc = new Lucene.Net.Documents.Document(); f = new Lucene.Net.Documents.Field("field", "a b c d", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED); doc.Add(f); wr.AddDocument(doc); doc = new Lucene.Net.Documents.Document(); f = new Lucene.Net.Documents.Field("field", "a b a d", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED); doc.Add(f); wr.AddDocument(doc); doc = new Lucene.Net.Documents.Document(); f = new Lucene.Net.Documents.Field("field", "a b e f", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED); doc.Add(f); wr.AddDocument(doc); doc = new Lucene.Net.Documents.Document(); f = new Lucene.Net.Documents.Field("field", "x y z", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED); doc.Add(f); wr.AddDocument(doc); wr.Close(); }
public override void SetUp() { base.SetUp(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); for (int i = 0; i < 5137; ++i) { Document doc = new Document(); doc.Add(new Field(FIELD, "meaninglessnames", Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); } { Document doc = new Document(); doc.Add(new Field(FIELD, "tangfulin", Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); } for (int i = 5138; i < 11377; ++i) { Document doc = new Document(); doc.Add(new Field(FIELD, "meaninglessnames", Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); } { Document doc = new Document(); doc.Add(new Field(FIELD, "tangfulin", Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); } writer.Close(); }
public virtual void TestMethod() { RAMDirectory directory = new RAMDirectory(); System.String[] values = new System.String[]{"1", "2", "3", "4"}; try { IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); for (int i = 0; i < values.Length; i++) { Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); doc.Add(new Field(FIELD, values[i], Field.Store.YES, Field.Index.UN_TOKENIZED)); writer.AddDocument(doc); } writer.Close(); BooleanQuery booleanQuery1 = new BooleanQuery(); booleanQuery1.Add(new TermQuery(new Term(FIELD, "1")), BooleanClause.Occur.SHOULD); booleanQuery1.Add(new TermQuery(new Term(FIELD, "2")), BooleanClause.Occur.SHOULD); BooleanQuery query = new BooleanQuery(); query.Add(booleanQuery1, BooleanClause.Occur.MUST); query.Add(new TermQuery(new Term(FIELD, "9")), BooleanClause.Occur.MUST_NOT); IndexSearcher indexSearcher = new IndexSearcher(directory); Hits hits = indexSearcher.Search(query); Assert.AreEqual(2, hits.Length(), "Number of matched documents"); } catch (System.IO.IOException e) { Assert.Fail(e.Message); } }
public virtual void TestTokenLengthOpt() { RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); AddDoc("12345678911", writer); AddDoc("segment", writer); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(directory); Query query; // term not over 10 chars, so optimization shortcuts query = new FuzzyQuery(new Term("field", "1234569"), 0.9f); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // 10 chars, so no optimization query = new FuzzyQuery(new Term("field", "1234567891"), 0.9f); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // over 10 chars, so no optimization query = new FuzzyQuery(new Term("field", "12345678911"), 0.9f); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); // over 10 chars, no match query = new FuzzyQuery(new Term("field", "sdfsdfsdfsdf"), 0.9f); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); }
public virtual void TestQuery() { RAMDirectory dir = new RAMDirectory(); IndexWriter iw = new IndexWriter(dir, new StandardAnalyzer(), true); AddDoc("one", iw); AddDoc("two", iw); AddDoc("three four", iw); iw.Close(); IndexSearcher is_Renamed = new IndexSearcher(dir); Hits hits = is_Renamed.Search(new MatchAllDocsQuery()); Assert.AreEqual(3, hits.Length()); // some artificial queries to trigger the use of skipTo(): BooleanQuery bq = new BooleanQuery(); bq.Add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); bq.Add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); hits = is_Renamed.Search(bq); Assert.AreEqual(3, hits.Length()); bq = new BooleanQuery(); bq.Add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); bq.Add(new TermQuery(new Term("key", "three")), BooleanClause.Occur.MUST); hits = is_Renamed.Search(bq); Assert.AreEqual(1, hits.Length()); // delete a document: is_Renamed.GetIndexReader().DeleteDocument(0); hits = is_Renamed.Search(new MatchAllDocsQuery()); Assert.AreEqual(2, hits.Length()); is_Renamed.Close(); }
public virtual void _testStressLocks(LockFactory lockFactory, System.IO.DirectoryInfo indexDir) { FSDirectory fs1 = FSDirectory.Open(new System.IO.DirectoryInfo(indexDir.FullName), lockFactory); // First create a 1 doc index: IndexWriter w = new IndexWriter(fs1, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); AddDoc(w); w.Close(); WriterThread writer = new WriterThread(this, 100, fs1); SearcherThread searcher = new SearcherThread(this, 100, fs1); writer.Start(); searcher.Start(); while (writer.IsAlive || searcher.IsAlive) { System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 1000)); } Assert.IsTrue(!writer.hitException, "IndexWriter hit unexpected exceptions"); Assert.IsTrue(!searcher.hitException, "IndexSearcher hit unexpected exceptions"); // Cleanup _TestUtil.RmDir(indexDir); }
public virtual void TestDemo_Renamed() { Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); // Store the index in memory: Directory directory = new RAMDirectory(); // To store an index on disk, use this instead: //Directory directory = FSDirectory.open("/tmp/testindex"); IndexWriter iwriter = new IndexWriter(directory, analyzer, true, new IndexWriter.MaxFieldLength(25000)); Document doc = new Document(); System.String text = "This is the text to be indexed."; doc.Add(new Field("fieldname", text, Field.Store.YES, Field.Index.ANALYZED)); iwriter.AddDocument(doc); iwriter.Close(); // Now search the index: IndexSearcher isearcher = new IndexSearcher(directory, true); // read-only=true // Parse a simple query that searches for "text": QueryParser parser = new QueryParser("fieldname", analyzer); Query query = parser.Parse("text"); ScoreDoc[] hits = isearcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); // Iterate through the results: for (int i = 0; i < hits.Length; i++) { Document hitDoc = isearcher.Doc(hits[i].doc); Assert.AreEqual(hitDoc.Get("fieldname"), "This is the text to be indexed."); } isearcher.Close(); directory.Close(); }
/*public TestCustomSearcherSort(System.String name):base(name) * { * }*/ /*[STAThread] * public static void Main(System.String[] argv) * { * // TestRunner.run(suite()); // {{Aroush-2.9}} how is this done in NUnit? * }*/ /*public static Test suite() * { * return new TestSuite(typeof(TestCustomSearcherSort)); * }*/ // create an index for testing private Directory GetIndex() { RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED); RandomGen random = new RandomGen(this, NewRandom()); for (int i = 0; i < INDEX_SIZE; ++i) { // don't decrease; if to low the problem doesn't show up Document doc = new Document(); if ((i % 5) != 0) { // some documents must not have an entry in the first sort field doc.Add(new Field("publicationDate_", random.GetLuceneDate(), Field.Store.YES, Field.Index.NOT_ANALYZED)); } if ((i % 7) == 0) { // some documents to match the query (see below) doc.Add(new Field("content", "test", Field.Store.YES, Field.Index.ANALYZED)); } // every document has a defined 'mandant' field doc.Add(new Field("mandant", System.Convert.ToString(i % 3), Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); } writer.Optimize(); writer.Close(); return(indexStore); }
public virtual void TestPhraseQueryWithStopAnalyzer() { RAMDirectory directory = new RAMDirectory(); StopAnalyzer stopAnalyzer = new StopAnalyzer(Util.Version.LUCENE_24); IndexWriter writer = new IndexWriter(directory, stopAnalyzer, true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("field", "the stop words are here", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Close(); IndexSearcher searcher = new IndexSearcher(directory, true); // valid exact phrase query PhraseQuery query = new PhraseQuery(); query.Add(new Term("field", "stop")); query.Add(new Term("field", "words")); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); QueryUtils.Check(query, searcher); // StopAnalyzer as of 2.4 does not leave "holes", so this matches. query = new PhraseQuery(); query.Add(new Term("field", "words")); query.Add(new Term("field", "here")); hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); QueryUtils.Check(query, searcher); searcher.Close(); }
public virtual void TestDemo_Renamed() { Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); // Store the index in memory: Directory directory = new RAMDirectory(); // To store an index on disk, use this instead: //Directory directory = FSDirectory.open("/tmp/testindex"); IndexWriter iwriter = new IndexWriter(directory, analyzer, true, new IndexWriter.MaxFieldLength(25000)); Document doc = new Document(); System.String text = "This is the text to be indexed."; doc.Add(new Field("fieldname", text, Field.Store.YES, Field.Index.ANALYZED)); iwriter.AddDocument(doc); iwriter.Close(); // Now search the index: IndexSearcher isearcher = new IndexSearcher(directory, true); // read-only=true // Parse a simple query that searches for "text": QueryParser parser = new QueryParser(Util.Version.LUCENE_CURRENT, "fieldname", analyzer); Query query = parser.Parse("text"); ScoreDoc[] hits = isearcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); // Iterate through the results: for (int i = 0; i < hits.Length; i++) { Document hitDoc = isearcher.Doc(hits[i].Doc); Assert.AreEqual(hitDoc.Get("fieldname"), "This is the text to be indexed."); } isearcher.Close(); directory.Close(); }
public virtual void TestPhrasePrefixWithBooleanQuery() { RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new StandardAnalyzer(Util.Version.LUCENE_CURRENT, Support.Compatibility.SetFactory.CreateHashSet <string>()), true, IndexWriter.MaxFieldLength.LIMITED); Add("This is a test", "object", writer); Add("a note", "note", writer); writer.Close(); IndexSearcher searcher = new IndexSearcher(indexStore, true); // This query will be equivalent to +type:note +body:"a t*" BooleanQuery q = new BooleanQuery(); q.Add(new TermQuery(new Term("type", "note")), Occur.MUST); MultiPhraseQuery trouble = new MultiPhraseQuery(); trouble.Add(new Term("body", "a")); trouble.Add(new Term[] { new Term("body", "test"), new Term("body", "this") }); q.Add(trouble, Occur.MUST); // exception will be thrown here without fix for #35626: ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length, "Wrong number of hits"); searcher.Close(); }
public static void Main(System.String[] args) { System.DateTime start = System.DateTime.Now; try { var writer = new Lucene.Net.Index.IndexWriter(Lucene.Net.Store.FSDirectory.Open(INDEX_DIR), new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT), true, Lucene.Net.Index.IndexWriter.MaxFieldLength.LIMITED); System.Console.Out.WriteLine("Indexing to directory '" + INDEX_DIR + "'..."); // want to clean up if indexing gets "cancelled" try { parse_email_messages(writer); } catch (Exception e) { System.Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message); } System.Console.Out.WriteLine(System.DateTime.Now.Millisecond - start.Millisecond + "ms inserting docs"); System.Console.Out.WriteLine("Optimizing..."); writer.Optimize(); writer.Close(); System.Console.Out.WriteLine(System.DateTime.Now.Millisecond - start.Millisecond + "ms total"); } catch (System.IO.IOException e) { System.Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message); } }
public virtual void TestCachingWorks() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.Close(); IndexReader reader = IndexReader.Open(dir); MockFilter filter = new MockFilter(); CachingWrapperFilter cacher = new CachingWrapperFilter(filter); // first time, nested filter is called cacher.GetDocIdSet(reader); Assert.IsTrue(filter.WasCalled(), "first time"); // make sure no exception if cache is holding the wrong bitset cacher.Bits(reader); cacher.GetDocIdSet(reader); // second time, nested filter should not be called filter.Clear(); cacher.GetDocIdSet(reader); Assert.IsFalse(filter.WasCalled(), "second time"); reader.Close(); }
private void Build(TestIndex index) { try { /* build an index */ IndexWriter writer = new IndexWriter(index.index, new SimpleAnalyzer(), T, IndexWriter.MaxFieldLength.LIMITED); for (int d = minId; d <= maxId; d++) { Document doc = new Document(); doc.Add(new Field("id", Pad(d), Field.Store.YES, Field.Index.NOT_ANALYZED)); int r = index.allowNegativeRandomInts ? rand.Next() : rand.Next(System.Int32.MaxValue); if (index.maxR < r) { index.maxR = r; } if (r < index.minR) { index.minR = r; } doc.Add(new Field("rand", Pad(r), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("body", "body", Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); } writer.Optimize(); writer.Close(); } catch (System.Exception e) { throw new System.SystemException("can't build index", e); } }
public virtual void TestBasic() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("field", "value", Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Close(); TermQuery termQuery = new TermQuery(new Term("field", "value")); // should not throw exception with primitive query QueryWrapperFilter qwf = new QueryWrapperFilter(termQuery); IndexSearcher searcher = new IndexSearcher(dir, true); TopDocs hits = searcher.Search(new MatchAllDocsQuery(), qwf, 10); Assert.AreEqual(1, hits.totalHits); // should not throw exception with complex primitive query BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.Add(termQuery, Occur.MUST); booleanQuery.Add(new TermQuery(new Term("field", "missing")), Occur.MUST_NOT); qwf = new QueryWrapperFilter(termQuery); hits = searcher.Search(new MatchAllDocsQuery(), qwf, 10); Assert.AreEqual(1, hits.totalHits); // should not throw exception with non primitive Query (doesn't implement // Query#createWeight) qwf = new QueryWrapperFilter(new FuzzyQuery(new Term("field", "valu"))); hits = searcher.Search(new MatchAllDocsQuery(), qwf, 10); Assert.AreEqual(1, hits.totalHits); }
public virtual void TestCustomLockFactory() { Directory dir = new RAMDirectory(); MockLockFactory lf = new MockLockFactory(this); dir.SetLockFactory(lf); // Lock prefix should have been set: Assert.IsTrue(lf.lockPrefixSet, "lock prefix was not set by the RAMDirectory"); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); // add 100 documents (so that commit lock is used) for (int i = 0; i < 100; i++) { AddDoc(writer); } // Both write lock and commit lock should have been created: Assert.AreEqual(1, lf.locksCreated.Count, "# of unique locks created (after instantiating IndexWriter)"); Assert.IsTrue(lf.makeLockCount >= 1, "# calls to makeLock is 0 (after instantiating IndexWriter)"); for (System.Collections.IEnumerator e = lf.locksCreated.Keys.GetEnumerator(); e.MoveNext(); ) { System.String lockName = (System.String) e.Current; MockLockFactory.MockLock lock_Renamed = (MockLockFactory.MockLock) lf.locksCreated[lockName]; Assert.IsTrue(lock_Renamed.lockAttempts > 0, "# calls to Lock.obtain is 0 (after instantiating IndexWriter)"); } writer.Close(); }
public virtual void TestMmapIndex() { Assert.Ignore("Need to port tests, but we don't really support MMapDirectories anyway"); FSDirectory storeDirectory; storeDirectory = new MMapDirectory(new System.IO.DirectoryInfo(storePathname), null); // plan to add a set of useful stopwords, consider changing some of the // interior filters. StandardAnalyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT, Support.Compatibility.SetFactory.CreateHashSet <string>()); // TODO: something about lock timeouts and leftover locks. IndexWriter writer = new IndexWriter(storeDirectory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED, null); IndexSearcher searcher = new IndexSearcher(storeDirectory, true, null); for (int dx = 0; dx < 1000; dx++) { System.String f = RandomField(); Document doc = new Document(); doc.Add(new Field("data", f, Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc, null); } searcher.Close(); writer.Close(); RmDir(new System.IO.FileInfo(storePathname)); }
public virtual void TestFilterWorks() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); for (int i = 0; i < 500; i++) { Document document = new Document(); document.Add(new Field("field", English.IntToEnglish(i) + " equals " + English.IntToEnglish(i), Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(document); } writer.Close(); IndexReader reader = IndexReader.Open(dir, true); SpanTermQuery query = new SpanTermQuery(new Term("field", English.IntToEnglish(10).Trim())); SpanQueryFilter filter = new SpanQueryFilter(query); SpanFilterResult result = filter.BitSpans(reader); DocIdSet docIdSet = result.DocIdSet; Assert.IsTrue(docIdSet != null, "docIdSet is null and it shouldn't be"); AssertContainsDocId("docIdSet doesn't contain docId 10", docIdSet, 10); var spans = result.Positions; Assert.IsTrue(spans != null, "spans is null and it shouldn't be"); int size = GetDocIdSetSize(docIdSet); Assert.IsTrue(spans.Count == size, "spans Size: " + spans.Count + " is not: " + size); for (System.Collections.IEnumerator iterator = spans.GetEnumerator(); iterator.MoveNext(); ) { SpanFilterResult.PositionInfo info = (SpanFilterResult.PositionInfo) iterator.Current; Assert.IsTrue(info != null, "info is null and it shouldn't be"); //The doc should indicate the bit is on AssertContainsDocId("docIdSet doesn't contain docId " + info.Doc, docIdSet, info.Doc); //There should be two positions in each Assert.IsTrue(info.Positions.Count == 2, "info.getPositions() Size: " + info.Positions.Count + " is not: " + 2); } reader.Close(); }
public virtual void TestNPESpanQuery() { Directory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(new System.Collections.Hashtable(0)), IndexWriter.MaxFieldLength.LIMITED); // Add documents AddDoc(writer, "1", "the big dogs went running to the market"); AddDoc(writer, "2", "the cat chased the mouse, then the cat ate the mouse quickly"); // Commit writer.Close(); // Get searcher IndexReader reader = IndexReader.Open(dir); IndexSearcher searcher = new IndexSearcher(reader); // Control (make sure docs indexed) Assert.AreEqual(2, HitCount(searcher, "the")); Assert.AreEqual(1, HitCount(searcher, "cat")); Assert.AreEqual(1, HitCount(searcher, "dogs")); Assert.AreEqual(0, HitCount(searcher, "rabbit")); // This throws exception (it shouldn't) Assert.AreEqual(1, searcher.Search(CreateSpan(0, true, new SpanQuery[] { CreateSpan(4, false, "chased", "cat"), CreateSpan("ate") }), 10).totalHits); reader.Close(); dir.Close(); }
public override void SetUp() { base.SetUp(); System.String[] data = new System.String[] { "A 1 2 3 4 5 6", "Z 4 5 6", null, "B 2 4 5 6", "Y 3 5 6", null, "C 3 6", "X 4 5 6" }; index = new RAMDirectory(); IndexWriter writer = new IndexWriter(index, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); for (int i = 0; i < data.Length; i++) { Document doc = new Document(); doc.Add(new Field("id", System.Convert.ToString(i), Field.Store.YES, Field.Index.NOT_ANALYZED)); //Field.Keyword("id",String.valueOf(i))); doc.Add(new Field("all", "all", Field.Store.YES, Field.Index.NOT_ANALYZED)); //Field.Keyword("all","all")); if (null != data[i]) { doc.Add(new Field("data", data[i], Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("data",data[i])); } writer.AddDocument(doc); } writer.Optimize(); writer.Close(); r = IndexReader.Open(index); s = new IndexSearcher(r); //System.out.println("Set up " + getName()); }
public override void SetUp() { base.SetUp(); directory = new RAMDirectory(); Analyzer analyzer = new AnonymousClassAnalyzer(this); IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("field", "one two three four five", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("repeated", "this is a repeated field - first part", Field.Store.YES, Field.Index.ANALYZED)); IFieldable repeatedField = new Field("repeated", "second part of a repeated field", Field.Store.YES, Field.Index.ANALYZED); doc.Add(repeatedField); doc.Add(new Field("palindrome", "one two three two one", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("nonexist", "phrase exist notexist exist found", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("nonexist", "phrase exist notexist exist found", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Optimize(); writer.Close(); searcher = new IndexSearcher(directory, true); query = new PhraseQuery(); }
private void AddDoc(System.String content) { IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false); InsertDoc(writer, content); writer.Close(); }
/// <summary> /// Forms a Lucene index based on the 2 maps. /// </summary> /// <param name="indexDir">the direcotry where the index should be created</param> /// <param name="word2Nums">word2Nums</param> /// <param name="num2Words">num2Words</param> private static void Index(String indexDir, System.Collections.IDictionary word2Nums, System.Collections.IDictionary num2Words) { var row = 0; var mod = 1; using (var dir = FSDirectory.Open(new DirectoryInfo(indexDir))) { var writer = new IndexWriter(dir, ana, true, IndexWriter.MaxFieldLength.LIMITED); writer.UseCompoundFile = true; // why? var i1 = word2Nums.Keys.GetEnumerator(); while (i1.MoveNext()) { var g = (String)i1.Current; var doc = new Document(); var n = Index(word2Nums, num2Words, g, doc); if (n > 0) { doc.Add(new Field(F_WORD, g, Field.Store.YES, Field.Index.NOT_ANALYZED)); if ((++row % mod) == 0) { o.WriteLine("\trow=" + row + "/" + word2Nums.Count + " doc= " + doc); mod *= 2; } writer.AddDocument(doc); } } o.WriteLine("Optimizing.."); writer.Optimize(); writer.Close(); } }
public override void SetUp() { base.SetUp(); directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("field", "one two three four five", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("sorter", "b", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("field", "one two three four", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("sorter", "d", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("field", "one two three y", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("sorter", "a", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("field", "one two x", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("sorter", "c", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Optimize(); writer.Close(); searcher = new IndexSearcher(directory); query = new TermQuery(new Term("field", "three")); filter = NewStaticFilterB(); }
public virtual void TestCustomLockFactory() { Directory dir = new RAMDirectory(); MockLockFactory lf = new MockLockFactory(this); dir.SetLockFactory(lf); // Lock prefix should have been set: Assert.IsTrue(lf.lockPrefixSet, "lock prefix was not set by the RAMDirectory"); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); // add 100 documents (so that commit lock is used) for (int i = 0; i < 100; i++) { AddDoc(writer); } // Both write lock and commit lock should have been created: Assert.AreEqual(1, lf.locksCreated.Count, "# of unique locks created (after instantiating IndexWriter)"); Assert.IsTrue(lf.makeLockCount >= 1, "# calls to makeLock is 0 (after instantiating IndexWriter)"); for (System.Collections.IEnumerator e = lf.locksCreated.Keys.GetEnumerator(); e.MoveNext();) { System.String lockName = (System.String)e.Current; MockLockFactory.MockLock lock_Renamed = (MockLockFactory.MockLock)lf.locksCreated[lockName]; Assert.IsTrue(lock_Renamed.lockAttempts > 0, "# calls to Lock.obtain is 0 (after instantiating IndexWriter)"); } writer.Close(); }
public virtual void TestPhrasePrefixWithBooleanQuery() { RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new StandardAnalyzer(new System.Collections.Hashtable(0)), true, IndexWriter.MaxFieldLength.LIMITED); Add("This is a test", "object", writer); Add("a note", "note", writer); writer.Close(); IndexSearcher searcher = new IndexSearcher(indexStore); // This query will be equivalent to +type:note +body:"a t*" BooleanQuery q = new BooleanQuery(); q.Add(new TermQuery(new Term("type", "note")), BooleanClause.Occur.MUST); MultiPhraseQuery trouble = new MultiPhraseQuery(); trouble.Add(new Term("body", "a")); trouble.Add(new Term[] { new Term("body", "test"), new Term("body", "this") }); q.Add(trouble, BooleanClause.Occur.MUST); // exception will be thrown here without fix for #35626: ScoreDoc[] hits = searcher.Search(q, null, 1000).scoreDocs; Assert.AreEqual(0, hits.Length, "Wrong number of hits"); searcher.Close(); }
public void BuildIndex() { Lucene.Net.Index.IndexWriter writer = new Lucene.Net.Index.IndexWriter(CustomAppSettings.SearchIndexFolder, analyzer, true); using (var noSqlSession = noSqlSessionFactory()) { foreach (var idea in noSqlSession.GetAll <Idea>()) { writer.AddDocument(CreateDoc(idea.Id, CreateSearchText(idea), idea.Subject, EntryTypes.Idea)); } foreach (var issue in noSqlSession.GetAll <Issue>()) { writer.AddDocument(CreateDoc(issue.Id, CreateSearchText(issue), issue.Subject, EntryTypes.Issue)); } foreach (var user in noSqlSession.GetAll <User>()) { writer.AddDocument(CreateDoc(user.Id, user.FullName, user.FullName, EntryTypes.User)); } foreach (var org in noSqlSession.GetAll <Organization>()) { writer.AddDocument(CreateDoc(org.Id, org.Name, org.Name, EntryTypes.Organization)); } foreach (var prob in noSqlSession.GetAll <Problem>()) { writer.AddDocument(CreateDoc(prob.Id, CreateSearchText(prob), prob.Text.LimitLength(100), EntryTypes.Problem)); } } writer.Optimize(); writer.Close(); }
public override void SetUp() { base.SetUp(); // create MultiSearcher from two seperate searchers Directory d1 = new RAMDirectory(); IndexWriter iw1 = new IndexWriter(d1, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); AddCollection1(iw1); iw1.Close(); Directory d2 = new RAMDirectory(); IndexWriter iw2 = new IndexWriter(d2, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); AddCollection2(iw2); iw2.Close(); Searchable[] s = new Searchable[2]; s[0] = new IndexSearcher(d1); s[1] = new IndexSearcher(d2); multiSearcher = new MultiSearcher(s); // create IndexSearcher which contains all documents Directory d = new RAMDirectory(); IndexWriter iw = new IndexWriter(d, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); AddCollection1(iw); AddCollection2(iw); iw.Close(); singleSearcher = new IndexSearcher(d); }
public override void SetUp() { base.SetUp(); System.String[] docText = new System.String[] { "docThatNeverMatchesSoWeCanRequireLastDocCollectedToBeGreaterThanZero", "one blah three", "one foo three multiOne", "one foobar three multiThree", "blueberry pancakes", "blueberry pie", "blueberry strudel", "blueberry pizza" }; Directory directory = new RAMDirectory(); IndexWriter iw = new IndexWriter(directory, new WhitespaceAnalyzer(), true, MaxFieldLength.UNLIMITED); for (int i = 0; i < N_DOCS; i++) { Add(docText[i % docText.Length], iw); } iw.Close(); searcher = new IndexSearcher(directory, true); System.String qtxt = "one"; // start from 1, so that the 0th doc never matches for (int i = 0; i < docText.Length; i++) { qtxt += (' ' + docText[i]); // large query so that search will be longer } QueryParser queryParser = new QueryParser(Util.Version.LUCENE_CURRENT, FIELD_NAME, new WhitespaceAnalyzer()); query = queryParser.Parse(qtxt); // warm the searcher searcher.Search(query, null, 1000); }
public virtual void TestPrefixQuery() { RAMDirectory directory = new RAMDirectory(); System.String[] categories = new System.String[] { "/Computers", "/Computers/Mac", "/Computers/Windows" }; IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); for (int i = 0; i < categories.Length; i++) { Document doc = new Document(); doc.Add(Field.Keyword("category", categories[i])); writer.AddDocument(doc); } writer.Close(); PrefixQuery query = new PrefixQuery(new Term("category", "/Computers")); IndexSearcher searcher = new IndexSearcher(directory); Hits hits = searcher.Search(query); Assert.AreEqual(3, hits.Length(), "All documents in /Computers category and below"); query = new PrefixQuery(new Term("category", "/Computers/Mac")); hits = searcher.Search(query); Assert.AreEqual(1, hits.Length(), "One in /Computers/Mac"); }
public virtual void TestBooleanQueryContainingSingleTermPrefixQuery() { // this tests against bug 33161 (now fixed) // In order to cause the bug, the outer query must have more than one term // and all terms required. // The contained PhraseMultiQuery must contain exactly one term array. RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Add("blueberry pie", writer); Add("blueberry chewing gum", writer); Add("blue raspberry pie", writer); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(indexStore, true); // This query will be equivalent to +body:pie +body:"blue*" BooleanQuery q = new BooleanQuery(); q.Add(new TermQuery(new Term("body", "pie")), Occur.MUST); MultiPhraseQuery trouble = new MultiPhraseQuery(); trouble.Add(new Term[] { new Term("body", "blueberry"), new Term("body", "blue") }); q.Add(trouble, Occur.MUST); // exception will be thrown here without fix ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(2, hits.Length, "Wrong number of hits"); searcher.Close(); }
public override void SetUp() { base.SetUp(); RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); long theLong = System.Int64.MaxValue; double theDouble = System.Double.MaxValue; sbyte theByte = (sbyte)System.SByte.MaxValue; short theShort = System.Int16.MaxValue; int theInt = System.Int32.MaxValue; float theFloat = System.Single.MaxValue; for (int i = 0; i < NUM_DOCS; i++) { Document doc = new Document(); doc.Add(new Field("theLong", System.Convert.ToString(theLong--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theDouble", (theDouble--).ToString("E16"), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theByte", System.Convert.ToString((sbyte)theByte--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theShort", System.Convert.ToString(theShort--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theInt", System.Convert.ToString(theInt--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theFloat", (theFloat--).ToString("E8"), Field.Store.NO, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); } writer.Close(); reader = IndexReader.Open(directory, true); }
private void AddDoc(System.String content) { IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.LIMITED); InsertDoc(writer, content); writer.Close(); }
public virtual void TestRAMDirectoryNoLocking() { Directory dir = new RAMDirectory(); dir.SetLockFactory(NoLockFactory.Instance); Assert.IsTrue(typeof(NoLockFactory).IsInstanceOfType(dir.LockFactory), "RAMDirectory.setLockFactory did not take"); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); // Create a 2nd IndexWriter. This is normally not allowed but it should run through since we're not // using any locks: IndexWriter writer2 = null; try { writer2 = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.LIMITED); } catch (System.Exception e) { System.Console.Out.WriteLine(e.StackTrace); Assert.Fail("Should not have hit an IOException with no locking"); } writer.Close(); if (writer2 != null) { writer2.Close(); } }
public override void SetUp() { base.SetUp(); RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); long theLong = System.Int64.MaxValue; double theDouble = System.Double.MaxValue; sbyte theByte = (sbyte) System.SByte.MaxValue; short theShort = System.Int16.MaxValue; int theInt = System.Int32.MaxValue; float theFloat = System.Single.MaxValue; for (int i = 0; i < NUM_DOCS; i++) { Document doc = new Document(); doc.Add(new Field("theLong", System.Convert.ToString(theLong--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theDouble", (theDouble--).ToString("E16"), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theByte", System.Convert.ToString((sbyte) theByte--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theShort", System.Convert.ToString(theShort--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theInt", System.Convert.ToString(theInt--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theFloat", (theFloat--).ToString("E8"), Field.Store.NO, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); } writer.Close(); reader = IndexReader.Open(directory); }
public void SearchFiltered(IndexWriter writer, Directory directory, Filter filter, bool optimize) { try { for (int i = 0; i < 60; i++) {//Simple docs Document doc = new Document(); doc.Add(new Field(FIELD, i.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); } if (optimize) writer.Optimize(); writer.Close(); BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.Add(new TermQuery(new Term(FIELD, "36")), Occur.SHOULD); IndexSearcher indexSearcher = new IndexSearcher(directory); ScoreDoc[] hits = indexSearcher.Search(booleanQuery, filter, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "Number of matched documents"); } catch (System.IO.IOException e) { Assert.Fail(e.Message); } }
public static void Main(System.String[] args) { System.String usage = typeof(IndexFiles) + " <root_directory>"; if (args.Length == 0) { System.Console.Error.WriteLine("Usage: " + usage); System.Environment.Exit(1); } System.DateTime start = System.DateTime.Now; try { IndexWriter writer = new IndexWriter("index", new StandardAnalyzer(), true); IndexDocs(writer, new System.IO.FileInfo(args[0])); writer.Optimize(); writer.Close(); System.DateTime end = System.DateTime.Now; System.Console.Out.Write(end.Ticks - start.Ticks); System.Console.Out.WriteLine(" total milliseconds"); } catch (System.IO.IOException e) { System.Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message); } }
public override void SetUp() { base.SetUp(); RAMDirectory directory = new RAMDirectory(); PayloadAnalyzer analyzer = new PayloadAnalyzer(this); IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.SetSimilarity(similarity); //writer.infoStream = System.out; for (int i = 0; i < 1000; i++) { Document doc = new Document(); Field noPayloadField = new Field(PayloadHelper.NO_PAYLOAD_FIELD, English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED); //noPayloadField.setBoost(0); doc.Add(noPayloadField); doc.Add(new Field("field", English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("multiField", English.IntToEnglish(i) + " " + English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); } writer.Optimize(); writer.Close(); searcher = new IndexSearcher(directory); searcher.SetSimilarity(similarity); }
public virtual void TestDanish() { /* build an index */ RAMDirectory danishIndex = new RAMDirectory(); IndexWriter writer = new IndexWriter(danishIndex, new SimpleAnalyzer(), T, IndexWriter.MaxFieldLength.LIMITED); // Danish collation orders the words below in the given order // (example taken from TestSort.testInternationalSort() ). System.String[] words = new System.String[] { "H\u00D8T", "H\u00C5T", "MAND" }; for (int docnum = 0; docnum < words.Length; ++docnum) { Document doc = new Document(); doc.Add(new Field("content", words[docnum], Field.Store.YES, Field.Index.UN_TOKENIZED)); doc.Add(new Field("body", "body", Field.Store.YES, Field.Index.UN_TOKENIZED)); writer.AddDocument(doc); } writer.Optimize(); writer.Close(); IndexReader reader = IndexReader.Open(danishIndex); IndexSearcher search = new IndexSearcher(reader); System.Globalization.CompareInfo c = new System.Globalization.CultureInfo("da" + "-" + "dk").CompareInfo; // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ], // but Danish collation does. ScoreDoc[] result = search.Search(Csrq("content", "H\u00D8T", "MAND", F, F, c), null, 1000).scoreDocs; AssertEquals("The index Term should be included.", 1, result.Length); result = search.Search(Csrq("content", "H\u00C5T", "MAND", F, F, c), null, 1000).scoreDocs; AssertEquals("The index Term should not be included.", 0, result.Length); search.Close(); }
public virtual void TestFarsi() { /* build an index */ RAMDirectory farsiIndex = new RAMDirectory(); IndexWriter writer = new IndexWriter(farsiIndex, new SimpleAnalyzer(), T, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("content", "\u0633\u0627\u0628", Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("body", "body", Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); writer.Optimize(); writer.Close(); IndexReader reader = IndexReader.Open(farsiIndex); IndexSearcher search = new IndexSearcher(reader); // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi // characters properly. System.Globalization.CompareInfo c = new System.Globalization.CultureInfo("ar").CompareInfo; // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi // orders the U+0698 character before the U+0633 character, so the single // index Term below should NOT be returned by a ConstantScoreRangeQuery // with a Farsi Collator (or an Arabic one for the case when Farsi is // not supported). ScoreDoc[] result = search.Search(Csrq("content", "\u062F", "\u0698", T, T, c), null, 1000).scoreDocs; AssertEquals("The index Term should not be included.", 0, result.Length); result = search.Search(Csrq("content", "\u0633", "\u0638", T, T, c), null, 1000).scoreDocs; AssertEquals("The index Term should be included.", 1, result.Length); search.Close(); }
public override void SetUp() { base.SetUp(); // prepare a small index with just a few documents. base.SetUp(); dir = new RAMDirectory(); anlzr = new StandardAnalyzer(); IndexWriter iw = new IndexWriter(dir, anlzr, IndexWriter.MaxFieldLength.LIMITED); // add docs not exactly in natural ID order, to verify we do check the order of docs by scores int remaining = N_DOCS; bool[] done = new bool[N_DOCS]; int i = 0; while (remaining > 0) { if (done[i]) { throw new System.Exception("to set this test correctly N_DOCS=" + N_DOCS + " must be primary and greater than 2!"); } AddDoc(iw, i); done[i] = true; i = (i + 4) % N_DOCS; if (doMultiSegment && remaining % 3 == 0) { iw.Commit(); } remaining--; } iw.Close(); }
public virtual void CreateRandomTerms(int nDocs, int nTerms, double power, Directory dir) { int[] freq = new int[nTerms]; for (int i = 0; i < nTerms; i++) { int f = (nTerms + 1) - i; // make first terms less frequent freq[i] = (int) System.Math.Ceiling(System.Math.Pow(f, power)); terms[i] = new Term("f", System.Convert.ToString((char) ('A' + i))); } IndexWriter iw = new IndexWriter(dir, new WhitespaceAnalyzer(), true); for (int i = 0; i < nDocs; i++) { Document d = new Document(); for (int j = 0; j < nTerms; j++) { if (r.Next(freq[j]) == 0) { d.Add(new Field("f", terms[j].Text(), Field.Store.NO, Field.Index.UN_TOKENIZED)); //System.out.println(d); } } iw.AddDocument(d); } iw.Optimize(); iw.Close(); }
public virtual void CreateDummySearcher() { // Create a dummy index with nothing in it. // This could possibly fail if Lucene starts checking for docid ranges... RAMDirectory rd = new RAMDirectory(); IndexWriter iw = new IndexWriter(rd, new WhitespaceAnalyzer(), true); iw.Close(); s = new IndexSearcher(rd); }
public virtual void TestPhrasePrefix() { RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document doc1 = new Document(); Document doc2 = new Document(); Document doc3 = new Document(); Document doc4 = new Document(); Document doc5 = new Document(); doc1.Add(new Field("body", "blueberry pie", Field.Store.YES, Field.Index.ANALYZED)); doc2.Add(new Field("body", "blueberry strudel", Field.Store.YES, Field.Index.ANALYZED)); doc3.Add(new Field("body", "blueberry pizza", Field.Store.YES, Field.Index.ANALYZED)); doc4.Add(new Field("body", "blueberry chewing gum", Field.Store.YES, Field.Index.ANALYZED)); doc5.Add(new Field("body", "piccadilly circus", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc1); writer.AddDocument(doc2); writer.AddDocument(doc3); writer.AddDocument(doc4); writer.AddDocument(doc5); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(indexStore); //PhrasePrefixQuery query1 = new PhrasePrefixQuery(); MultiPhraseQuery query1 = new MultiPhraseQuery(); //PhrasePrefixQuery query2 = new PhrasePrefixQuery(); MultiPhraseQuery query2 = new MultiPhraseQuery(); query1.Add(new Term("body", "blueberry")); query2.Add(new Term("body", "strawberry")); System.Collections.ArrayList termsWithPrefix = new System.Collections.ArrayList(); IndexReader ir = IndexReader.Open(indexStore); // this TermEnum gives "piccadilly", "pie" and "pizza". System.String prefix = "pi"; TermEnum te = ir.Terms(new Term("body", prefix + "*")); do { if (te.Term().Text().StartsWith(prefix)) { termsWithPrefix.Add(te.Term()); } } while (te.Next()); query1.Add((Term[]) termsWithPrefix.ToArray(typeof(Term))); query2.Add((Term[]) termsWithPrefix.ToArray(typeof(Term))); ScoreDoc[] result; result = searcher.Search(query1, null, 1000).scoreDocs; Assert.AreEqual(2, result.Length); result = searcher.Search(query2, null, 1000).scoreDocs; Assert.AreEqual(0, result.Length); }
public virtual void TestBefore() { // create an index RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); long now = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); Document doc = new Document(); // add time that is in the past doc.Add(new Field("datefield", DateTools.TimeToString(now - 1000, DateTools.Resolution.MILLISECOND), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("body", "Today is a very sunny day in New York City", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(indexStore, true); // filter that should preserve matches //DateFilter df1 = DateFilter.Before("datefield", now); TermRangeFilter df1 = new TermRangeFilter("datefield", DateTools.TimeToString(now - 2000, DateTools.Resolution.MILLISECOND), DateTools.TimeToString(now, DateTools.Resolution.MILLISECOND), false, true); // filter that should discard matches //DateFilter df2 = DateFilter.Before("datefield", now - 999999); TermRangeFilter df2 = new TermRangeFilter("datefield", DateTools.TimeToString(0, DateTools.Resolution.MILLISECOND), DateTools.TimeToString(now - 2000, DateTools.Resolution.MILLISECOND), true, false); // search something that doesn't exist with DateFilter Query query1 = new TermQuery(new Term("body", "NoMatchForThis")); // search for something that does exists Query query2 = new TermQuery(new Term("body", "sunny")); ScoreDoc[] result; // ensure that queries return expected results without DateFilter first result = searcher.Search(query1, null, 1000).ScoreDocs; Assert.AreEqual(0, result.Length); result = searcher.Search(query2, null, 1000).ScoreDocs; Assert.AreEqual(1, result.Length); // run queries with DateFilter result = searcher.Search(query1, df1, 1000).ScoreDocs; Assert.AreEqual(0, result.Length); result = searcher.Search(query1, df2, 1000).ScoreDocs; Assert.AreEqual(0, result.Length); result = searcher.Search(query2, df1, 1000).ScoreDocs; Assert.AreEqual(1, result.Length); result = searcher.Search(query2, df2, 1000).ScoreDocs; Assert.AreEqual(0, result.Length); }
public virtual void TestOutOfOrderCollection() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, null, MaxFieldLength.UNLIMITED); for (int i = 0; i < 10; i++) { writer.AddDocument(new Document()); } writer.Commit(); writer.Close(); bool[] inOrder = new bool[]{false, true}; System.String[] actualTSDCClass = new System.String[]{"OutOfOrderTopScoreDocCollector", "InOrderTopScoreDocCollector"}; // Save the original value to set later. bool origVal = BooleanQuery.GetAllowDocsOutOfOrder(); BooleanQuery.SetAllowDocsOutOfOrder(true); BooleanQuery bq = new BooleanQuery(); // Add a Query with SHOULD, since bw.scorer() returns BooleanScorer2 // which delegates to BS if there are no mandatory clauses. bq.Add(new MatchAllDocsQuery(), Occur.SHOULD); // Set minNrShouldMatch to 1 so that BQ will not optimize rewrite to return // the clause instead of BQ. bq.SetMinimumNumberShouldMatch(1); try { IndexSearcher searcher = new IndexSearcher(dir); for (int i = 0; i < inOrder.Length; i++) { TopDocsCollector tdc = TopScoreDocCollector.create(3, inOrder[i]); Assert.AreEqual("Lucene.Net.Search.TopScoreDocCollector+" + actualTSDCClass[i], tdc.GetType().FullName); searcher.Search(new MatchAllDocsQuery(), tdc); ScoreDoc[] sd = tdc.TopDocs().scoreDocs; Assert.AreEqual(3, sd.Length); for (int j = 0; j < sd.Length; j++) { Assert.AreEqual(j, sd[j].doc, "expected doc Id " + j + " found " + sd[j].doc); } } } finally { // Whatever happens, reset BooleanQuery.allowDocsOutOfOrder to the // original value. Don't set it to false in case the implementation in BQ // will change some day. BooleanQuery.SetAllowDocsOutOfOrder(origVal); } }
public virtual void TestBefore() { // create an index RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true); long now = (long) (DateTime.UtcNow - new DateTime(1970, 1, 1)).TotalMilliseconds; Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); // add time that is in the past doc.Add(new Field("datefield", Lucene.Net.Documents.DateTools.TimeToString(now - 1000 * 100000, Lucene.Net.Documents.DateTools.Resolution.MILLISECOND), Field.Store.YES, Field.Index.UN_TOKENIZED)); doc.Add(new Field("body", "Today is a very sunny day in New York City", Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(indexStore); // filter that should preserve matches //DateFilter df1 = DateFilter.Before("datefield", now); RangeFilter df1 = new RangeFilter("datefield", Lucene.Net.Documents.DateTools.TimeToString(now - 2000 * 100000, Lucene.Net.Documents.DateTools.Resolution.MILLISECOND), Lucene.Net.Documents.DateTools.TimeToString(now, Lucene.Net.Documents.DateTools.Resolution.MILLISECOND), false, true); // filter that should discard matches //DateFilter df2 = DateFilter.Before("datefield", now - 999999); RangeFilter df2 = new RangeFilter("datefield", Lucene.Net.Documents.DateTools.TimeToString(0, Lucene.Net.Documents.DateTools.Resolution.MILLISECOND), Lucene.Net.Documents.DateTools.TimeToString(now - 2000 * 100000, Lucene.Net.Documents.DateTools.Resolution.MILLISECOND), true, false); // search something that doesn't exist with DateFilter Query query1 = new TermQuery(new Term("body", "NoMatchForThis")); // search for something that does exists Query query2 = new TermQuery(new Term("body", "sunny")); Hits result; // ensure that queries return expected results without DateFilter first result = searcher.Search(query1); Assert.AreEqual(0, result.Length()); result = searcher.Search(query2); Assert.AreEqual(1, result.Length()); // run queries with DateFilter result = searcher.Search(query1, df1); Assert.AreEqual(0, result.Length()); result = searcher.Search(query1, df2); Assert.AreEqual(0, result.Length()); result = searcher.Search(query2, df1); Assert.AreEqual(1, result.Length()); result = searcher.Search(query2, df2); Assert.AreEqual(0, result.Length()); }
public virtual void SetUp() { // Create an index writer. directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); for (int i = 0; i < N; i++) { writer.AddDocument(CreateDocument(i)); } writer.Optimize(); writer.Close(); }
public virtual void TestBefore() { // create an index RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true); long now = (System.DateTime.Now.Ticks - 621355968000000000) / 10000; Document doc = new Document(); // add time that is in the past doc.Add(Field.Keyword("datefield", DateField.TimeToString(now - 1000))); doc.Add(Field.Text("body", "Today is a very sunny day in New York City")); writer.AddDocument(doc); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(indexStore); // filter that should preserve matches DateFilter df1 = DateFilter.Before("datefield", now); // filter that should discard matches DateFilter df2 = DateFilter.Before("datefield", now - 999999); // search something that doesn't exist with DateFilter Query query1 = new TermQuery(new Term("body", "NoMatchForThis")); // search for something that does exists Query query2 = new TermQuery(new Term("body", "sunny")); Hits result; // ensure that queries return expected results without DateFilter first result = searcher.Search(query1); Assert.AreEqual(0, result.Length()); result = searcher.Search(query2); Assert.AreEqual(1, result.Length()); // run queries with DateFilter result = searcher.Search(query1, df1); Assert.AreEqual(0, result.Length()); result = searcher.Search(query1, df2); Assert.AreEqual(0, result.Length()); result = searcher.Search(query2, df1); Assert.AreEqual(1, result.Length()); result = searcher.Search(query2, df2); Assert.AreEqual(0, result.Length()); }
public virtual void TestMethod() { RAMDirectory directory = new RAMDirectory(); System.String[] categories = new System.String[]{"food", "foodanddrink", "foodanddrinkandgoodtimes", "food and drink"}; Query rw1 = null; Query rw2 = null; try { IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); for (int i = 0; i < categories.Length; i++) { Document doc = new Document(); doc.Add(Field.Keyword("category", categories[i])); writer.AddDocument(doc); } writer.Close(); IndexReader reader = IndexReader.Open(directory); PrefixQuery query = new PrefixQuery(new Term("category", "foo")); rw1 = query.Rewrite(reader); BooleanQuery bq = new BooleanQuery(); bq.Add(query, true, false); rw2 = bq.Rewrite(reader); } catch (System.IO.IOException e) { Assert.Fail(e.Message); } BooleanQuery bq1 = null; if (rw1 is BooleanQuery) { bq1 = (BooleanQuery) rw1; } BooleanQuery bq2 = null; if (rw2 is BooleanQuery) { bq2 = (BooleanQuery) rw2; } else { Assert.Fail("Rewrite"); } Assert.AreEqual(bq1.GetClauses().Length, bq2.GetClauses().Length, "Number of Clauses Mismatch"); }
public override void SetUp() { base.SetUp(); // Create an index writer. directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); for (int i = 0; i < N; i++) { writer.AddDocument(CreateDocument(i)); } writer.Optimize(); writer.Close(); }
public override void SetUp() { base.SetUp(); RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); for (int i = 0; i < docFields.Length; i++) { Document doc = new Document(); doc.Add(new Field(field, docFields[i], Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(doc); } writer.Close(); searcher = new IndexSearcher(directory); }
public virtual void SetUp() { RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); System.Text.StringBuilder buffer = new System.Text.StringBuilder(); for (int i = 0; i < docFields.Length; i++) { Document doc = new Document(); doc.Add(Field.Text(field, docFields[i])); writer.AddDocument(doc); } writer.Close(); searcher = new IndexSearcher(directory); }
public static void Main(System.String[] args) { System.String usage = typeof(IndexFiles) + " <root_directory>"; if (args.Length == 0) { System.Console.Error.WriteLine("Usage: " + usage); System.Environment.Exit(1); } bool tmpBool; if (System.IO.File.Exists(INDEX_DIR.FullName)) tmpBool = true; else tmpBool = System.IO.Directory.Exists(INDEX_DIR.FullName); if (tmpBool) { System.Console.Out.WriteLine("Cannot save index to '" + INDEX_DIR + "' directory, please delete it first"); System.Environment.Exit(1); } System.IO.FileInfo docDir = new System.IO.FileInfo(args[0]); bool tmpBool2; if (System.IO.File.Exists(docDir.FullName)) tmpBool2 = true; else tmpBool2 = System.IO.Directory.Exists(docDir.FullName); if (!tmpBool2) // || !docDir.canRead()) // {{Aroush}} what is canRead() in C#? { System.Console.Out.WriteLine("Document directory '" + docDir.FullName + "' does not exist or is not readable, please check the path"); System.Environment.Exit(1); } System.DateTime start = System.DateTime.Now; try { IndexWriter writer = new IndexWriter(FSDirectory.Open(INDEX_DIR), new StandardAnalyzer(Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED); System.Console.Out.WriteLine("Indexing to directory '" + INDEX_DIR + "'..."); IndexDocs(writer, docDir); System.Console.Out.WriteLine("Optimizing..."); writer.Optimize(); writer.Close(); System.DateTime end = System.DateTime.Now; System.Console.Out.WriteLine(end.Millisecond - start.Millisecond + " total milliseconds"); } catch (System.IO.IOException e) { System.Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message); } }
public override void SetUp() { base.SetUp(); RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); for (int i = 0; i < docFields.Length; i++) { Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); doc.Add(new Field(field, docFields[i], Field.Store.NO, Field.Index.TOKENIZED)); writer.AddDocument(doc); } writer.Close(); searcher = new IndexSearcher(directory); }
public override void SetUp() { base.SetUp(); directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true); Document doc = new Document(); doc.Add(new Field("partnum", "Q36", Field.Store.YES, Field.Index.UN_TOKENIZED)); doc.Add(new Field("description", "Illidium Space Modulator", Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); writer.Close(); searcher = new IndexSearcher(directory); }