public virtual void TestPhraseQueryWithStopAnalyzer() { RAMDirectory directory = new RAMDirectory(); StopAnalyzer stopAnalyzer = new StopAnalyzer(); IndexWriter writer = new IndexWriter(directory, stopAnalyzer, true); Document doc = new Document(); doc.Add(Field.Text("Field", "the stop words are here")); writer.AddDocument(doc); writer.Close(); IndexSearcher searcher = new IndexSearcher(directory); // valid exact phrase query PhraseQuery query = new PhraseQuery(); query.Add(new Term("Field", "stop")); query.Add(new Term("Field", "words")); Hits hits = searcher.Search(query); Assert.AreEqual(1, hits.Length()); // currently StopAnalyzer does not leave "holes", so this matches. query = new PhraseQuery(); query.Add(new Term("Field", "words")); query.Add(new Term("Field", "here")); hits = searcher.Search(query); Assert.AreEqual(1, hits.Length()); searcher.Close(); }
public virtual void SetUp() { directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); Document doc = new Document(); doc.Add(Field.Text("Field", "one two three four five")); doc.Add(Field.Text("sorter", "b")); writer.AddDocument(doc); doc = new Document(); doc.Add(Field.Text("Field", "one two three four")); doc.Add(Field.Text("sorter", "d")); writer.AddDocument(doc); doc = new Document(); doc.Add(Field.Text("Field", "one two three y")); doc.Add(Field.Text("sorter", "a")); writer.AddDocument(doc); doc = new Document(); doc.Add(Field.Text("Field", "one two x")); doc.Add(Field.Text("sorter", "c")); writer.AddDocument(doc); writer.Optimize(); writer.Close(); searcher = new IndexSearcher(directory); query = new TermQuery(new Term("Field", "three")); filter = new AnonymousClassFilter(this); }
public virtual void TestSetPosition() { Analyzer analyzer = new AnonymousClassAnalyzer(this); RAMDirectory store = new RAMDirectory(); IndexWriter writer = new IndexWriter(store, analyzer, true); Document d = new Document(); d.Add(Field.Text("Field", "bogus")); writer.AddDocument(d); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(store); PhraseQuery q; Hits hits; q = new PhraseQuery(); q.Add(new Term("Field", "1")); q.Add(new Term("Field", "2")); hits = searcher.Search(q); Assert.AreEqual(0, hits.Length()); q = new PhraseQuery(); q.Add(new Term("Field", "2")); q.Add(new Term("Field", "3")); hits = searcher.Search(q); Assert.AreEqual(1, hits.Length()); q = new PhraseQuery(); q.Add(new Term("Field", "3")); q.Add(new Term("Field", "4")); hits = searcher.Search(q); Assert.AreEqual(0, hits.Length()); q = new PhraseQuery(); q.Add(new Term("Field", "2")); q.Add(new Term("Field", "4")); hits = searcher.Search(q); Assert.AreEqual(1, hits.Length()); q = new PhraseQuery(); q.Add(new Term("Field", "3")); q.Add(new Term("Field", "5")); hits = searcher.Search(q); Assert.AreEqual(1, hits.Length()); q = new PhraseQuery(); q.Add(new Term("Field", "4")); q.Add(new Term("Field", "5")); hits = searcher.Search(q); Assert.AreEqual(1, hits.Length()); q = new PhraseQuery(); q.Add(new Term("Field", "2")); q.Add(new Term("Field", "5")); hits = searcher.Search(q); Assert.AreEqual(0, hits.Length()); }
public virtual void TestPhrasePrefix() { RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true); Document doc1 = new Document(); Document doc2 = new Document(); Document doc3 = new Document(); Document doc4 = new Document(); Document doc5 = new Document(); doc1.Add(Field.Text("body", "blueberry pie")); doc2.Add(Field.Text("body", "blueberry strudel")); doc3.Add(Field.Text("body", "blueberry pizza")); doc4.Add(Field.Text("body", "blueberry chewing gum")); doc5.Add(Field.Text("body", "piccadilly circus")); writer.AddDocument(doc1); writer.AddDocument(doc2); writer.AddDocument(doc3); writer.AddDocument(doc4); writer.AddDocument(doc5); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(indexStore); PhrasePrefixQuery query1 = new PhrasePrefixQuery(); PhrasePrefixQuery query2 = new PhrasePrefixQuery(); query1.Add(new Term("body", "blueberry")); query2.Add(new Term("body", "strawberry")); System.Collections.ArrayList termsWithPrefix = new System.Collections.ArrayList(); IndexReader ir = IndexReader.Open(indexStore); // this TermEnum gives "piccadilly", "pie" and "pizza". System.String prefix = "pi"; TermEnum te = ir.Terms(new Term("body", prefix + "*")); do { if (te.Term().Text().StartsWith(prefix)) { termsWithPrefix.Add(te.Term()); } }while (te.Next()); query1.Add((Term[])termsWithPrefix.ToArray(typeof(Term))); query2.Add((Term[])termsWithPrefix.ToArray(typeof(Term))); Hits result; result = searcher.Search(query1); Assert.AreEqual(2, result.Length()); result = searcher.Search(query2); Assert.AreEqual(0, result.Length()); }
private void AddDocumentWithDifferentFields(IndexWriter writer) { Document doc = new Document(); doc.Add(Field.Keyword("keyword2", "test1")); doc.Add(Field.Text("text2", "test1")); doc.Add(Field.UnIndexed("unindexed2", "test1")); doc.Add(Field.UnStored("unstored2", "test1")); writer.AddDocument(doc); }
public virtual void TestAfter() { // create an index RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true); long now = (System.DateTime.Now.Ticks - 621355968000000000) / 10000; Document doc = new Document(); // add time that is in the future doc.Add(Field.Keyword("datefield", DateField.TimeToString(now + 888888))); doc.Add(Field.Text("body", "Today is a very sunny day in New York City")); writer.AddDocument(doc); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(indexStore); // filter that should preserve matches DateFilter df1 = DateFilter.After("datefield", now); // filter that should discard matches DateFilter df2 = DateFilter.After("datefield", now + 999999); // search something that doesn't exist with DateFilter Query query1 = new TermQuery(new Term("body", "NoMatchForThis")); // search for something that does exists Query query2 = new TermQuery(new Term("body", "sunny")); Hits result; // ensure that queries return expected results without DateFilter first result = searcher.Search(query1); Assert.AreEqual(0, result.Length()); result = searcher.Search(query2); Assert.AreEqual(1, result.Length()); // run queries with DateFilter result = searcher.Search(query1, df1); Assert.AreEqual(0, result.Length()); result = searcher.Search(query1, df2); Assert.AreEqual(0, result.Length()); result = searcher.Search(query2, df1); Assert.AreEqual(1, result.Length()); result = searcher.Search(query2, df2); Assert.AreEqual(0, result.Length()); }
public virtual void TestSimilarity_() { RAMDirectory store = new RAMDirectory(); IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true); writer.SetSimilarity(new SimpleSimilarity()); Document d1 = new Document(); d1.Add(Field.Text("Field", "a c")); Document d2 = new Document(); d2.Add(Field.Text("Field", "a b c")); writer.AddDocument(d1); writer.AddDocument(d2); writer.Optimize(); writer.Close(); float[] scores = new float[4]; Searcher searcher = new IndexSearcher(store); searcher.SetSimilarity(new SimpleSimilarity()); Term a = new Term("Field", "a"); Term b = new Term("Field", "b"); Term c = new Term("Field", "c"); searcher.Search(new TermQuery(b), new AnonymousClassHitCollector(this)); BooleanQuery bq = new BooleanQuery(); bq.Add(new TermQuery(a), false, false); bq.Add(new TermQuery(b), false, false); //System.out.println(bq.toString("Field")); searcher.Search(bq, new AnonymousClassHitCollector1(this)); PhraseQuery pq = new PhraseQuery(); pq.Add(a); pq.Add(c); //System.out.println(pq.toString("Field")); searcher.Search(pq, new AnonymousClassHitCollector2(this)); pq.SetSlop(2); //System.out.println(pq.toString("Field")); searcher.Search(pq, new AnonymousClassHitCollector3(this)); }
private Document MakeDocumentWithFields() { Document doc = new Document(); doc.Add(Field.Keyword("keyword", "test1")); doc.Add(Field.Keyword("keyword", "test2")); doc.Add(Field.Text("text", "test1")); doc.Add(Field.Text("text", "test2")); doc.Add(Field.UnIndexed("unindexed", "test1")); doc.Add(Field.UnIndexed("unindexed", "test2")); doc.Add(Field.UnStored("unstored", "test1")); doc.Add(Field.UnStored("unstored", "test2")); return(doc); }
public virtual void SetUp() { RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); System.Text.StringBuilder buffer = new System.Text.StringBuilder(); for (int i = 0; i < docFields.Length; i++) { Document doc = new Document(); doc.Add(Field.Text(field, docFields[i])); writer.AddDocument(doc); } writer.Close(); searcher = new IndexSearcher(directory); }
private RAMDirectory GetIndexStore(System.String field, System.String[] contents) { RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true); for (int i = 0; i < contents.Length; ++i) { Document doc = new Document(); doc.Add(Field.Text(field, contents[i])); writer.AddDocument(doc); } writer.Optimize(); writer.Close(); return(indexStore); }
public virtual void SetUp() { IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true); //writer.setUseCompoundFile(true); //writer.infoStream = System.out; System.Text.StringBuilder buffer = new System.Text.StringBuilder(); for (int i = 0; i < 1000; i++) { Document doc = new Document(); doc.Add(Field.Text("Field", English.IntToEnglish(i), true)); writer.AddDocument(doc); } writer.Close(); searcher = new IndexSearcher(directory); }
public virtual void SetUp() { directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); Document doc = new Document(); doc.Add(Field.Text("Field", "one two three four five")); writer.AddDocument(doc); writer.Optimize(); writer.Close(); searcher = new IndexSearcher(directory); query = new PhraseQuery(); }
public virtual void SetUp() { RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true); //writer.infoStream = System.out; for (int i = 0; i < 1000; i++) { Document doc = new Document(); doc.Add(Field.Text("Field", English.IntToEnglish(i))); writer.AddDocument(doc); } writer.Close(); searcher = new IndexSearcher(directory); }
static DocHelper() { textField1 = Field.Text(TEXT_FIELD_1_KEY, FIELD_1_TEXT, false); textField2 = Field.Text(TEXT_FIELD_2_KEY, FIELD_2_TEXT, true); keyField = Field.Keyword(KEYWORD_FIELD_KEY, KEYWORD_TEXT); unIndField = Field.UnIndexed(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT); unStoredField1 = Field.UnStored(UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT, false); unStoredField2 = Field.UnStored(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT, true); { nameValues = new System.Collections.Hashtable(); nameValues[TEXT_FIELD_1_KEY] = FIELD_1_TEXT; nameValues[TEXT_FIELD_2_KEY] = FIELD_2_TEXT; nameValues[KEYWORD_FIELD_KEY] = KEYWORD_TEXT; nameValues[UNINDEXED_FIELD_KEY] = UNINDEXED_FIELD_TEXT; nameValues[UNSTORED_FIELD_1_KEY] = UNSTORED_1_FIELD_TEXT; nameValues[UNSTORED_FIELD_2_KEY] = UNSTORED_2_FIELD_TEXT; } }
private static void StartServer() { // construct an index RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true); Document doc = new Document(); doc.Add(Field.Text("test", "test text")); writer.AddDocument(doc); writer.Optimize(); writer.Close(); // publish it //// LocateRegistry.CreateRegistry(1099); // {{Aroush}} Lucene.Net.Search.Searchable local = new IndexSearcher(indexStore); RemoteSearchable impl = new RemoteSearchable(local); System.Runtime.Remoting.RemotingServices.Marshal(impl, "http://localhost/Searchable"); }
public virtual void TestDocBoost_() { RAMDirectory store = new RAMDirectory(); IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true); Field f1 = Field.Text("Field", "word"); Field f2 = Field.Text("Field", "word"); f2.SetBoost(2.0f); Document d1 = new Document(); Document d2 = new Document(); Document d3 = new Document(); Document d4 = new Document(); d3.SetBoost(3.0f); d4.SetBoost(2.0f); d1.Add(f1); // boost = 1 d2.Add(f2); // boost = 2 d3.Add(f1); // boost = 3 d4.Add(f2); // boost = 4 writer.AddDocument(d1); writer.AddDocument(d2); writer.AddDocument(d3); writer.AddDocument(d4); writer.Optimize(); writer.Close(); float[] scores = new float[4]; new IndexSearcher(store).Search(new TermQuery(new Term("Field", "word")), new AnonymousClassHitCollector(scores, this)); float lastScore = 0.0f; for (int i = 0; i < 4; i++) { Assert.IsTrue(scores[i] > lastScore); lastScore = scores[i]; } }
public virtual void TestNot_() { RAMDirectory store = new RAMDirectory(); IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true); Document d1 = new Document(); d1.Add(Field.Text("Field", "a b")); writer.AddDocument(d1); writer.Optimize(); writer.Close(); Searcher searcher = new IndexSearcher(store); Query query = Lucene.Net.QueryParsers.QueryParser.Parse("a NOT b", "Field", new SimpleAnalyzer()); //System.out.println(query); Hits hits = searcher.Search(query); Assert.AreEqual(0, hits.Length()); }
public virtual void TestFilterIndexReader_() { RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); Document d1 = new Document(); d1.Add(Field.Text("default", "one two")); writer.AddDocument(d1); Document d2 = new Document(); d2.Add(Field.Text("default", "one three")); writer.AddDocument(d2); Document d3 = new Document(); d3.Add(Field.Text("default", "two four")); writer.AddDocument(d3); writer.Close(); IndexReader reader = new TestReader(IndexReader.Open(directory)); TermEnum terms = reader.Terms(); while (terms.Next()) { Assert.IsTrue(terms.Term().Text().IndexOf((System.Char) 'e') != -1); } terms.Close(); TermPositions positions = reader.TermPositions(new Term("default", "one")); while (positions.Next()) { Assert.IsTrue((positions.Doc() % 2) == 1); } reader.Close(); }
public virtual void TestSetNorm_() { RAMDirectory store = new RAMDirectory(); IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true); // add the same document four times Field f1 = Field.Text("Field", "word"); Document d1 = new Document(); d1.Add(f1); writer.AddDocument(d1); writer.AddDocument(d1); writer.AddDocument(d1); writer.AddDocument(d1); writer.Close(); // reset the boost of each instance of this document IndexReader reader = IndexReader.Open(store); reader.SetNorm(0, "Field", 1.0f); reader.SetNorm(1, "Field", 2.0f); reader.SetNorm(2, "Field", 4.0f); reader.SetNorm(3, "Field", 16.0f); reader.Close(); // check that searches are ordered by this boost float[] scores = new float[4]; new IndexSearcher(store).Search(new TermQuery(new Term("Field", "word")), new AnonymousClassHitCollector(scores, this)); float lastScore = 0.0f; for (int i = 0; i < 4; i++) { Assert.IsTrue(scores[i] > lastScore); lastScore = scores[i]; } }
/// <summary>Makes a document for a File. /// <p> /// The document has three fields: /// <ul> /// <li><code>path</code>--containing the pathname of the file, as a stored, /// tokenized Field; /// <li><code>modified</code>--containing the last modified date of the file as /// a keyword Field as encoded by <a /// href="lucene.document.DateField.html">DateField</a>; and /// <li><code>contents</code>--containing the full contents of the file, as a /// Reader Field; /// </summary> public static Document Document(System.IO.FileInfo f) { // make a new, empty document Document doc = new Document(); // Add the path of the file as a Field named "path". Use a Text Field, so // that the index stores the path, and so that the path is searchable doc.Add(Field.Text("path", f.FullName)); // Add the last modified date of the file a Field named "modified". Use a // Keyword Field, so that it's searchable, but so that no attempt is made // to tokenize the Field into words. doc.Add(Field.Keyword("modified", DateField.TimeToString(((f.LastWriteTime.Ticks - 621355968000000000) / 10000)))); // Add the contents of the file a Field named "contents". Use a Text // Field, specifying a Reader, so that the text of the file is tokenized. // ?? why doesn't FileReader work here ?? System.IO.FileStream is_Renamed = new System.IO.FileStream(f.FullName, System.IO.FileMode.Open, System.IO.FileAccess.Read); System.IO.StreamReader reader = new System.IO.StreamReader(new System.IO.StreamReader(is_Renamed, System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(is_Renamed, System.Text.Encoding.Default).CurrentEncoding); doc.Add(Field.Text("contents", reader)); // return the document return(doc); }
private void SetupDoc(Document doc, System.String text) { doc.Add(Field.Text("Field", text, true)); //System.out.println("Document: " + doc); }
public virtual void TestEmptyIndex() { // creating two directories for indices Directory indexStoreA = new RAMDirectory(); Directory indexStoreB = new RAMDirectory(); // creating a document to store Document lDoc = new Document(); lDoc.Add(Field.Text("fulltext", "Once upon a time.....")); lDoc.Add(Field.Keyword("id", "doc1")); lDoc.Add(Field.Keyword("handle", "1")); // creating a document to store Document lDoc2 = new Document(); lDoc2.Add(Field.Text("fulltext", "in a galaxy far far away.....")); lDoc2.Add(Field.Keyword("id", "doc2")); lDoc2.Add(Field.Keyword("handle", "1")); // creating a document to store Document lDoc3 = new Document(); lDoc3.Add(Field.Text("fulltext", "a bizarre bug manifested itself....")); lDoc3.Add(Field.Keyword("id", "doc3")); lDoc3.Add(Field.Keyword("handle", "1")); // creating an index writer for the first index IndexWriter writerA = new IndexWriter(indexStoreA, new StandardAnalyzer(), true); // creating an index writer for the second index, but writing nothing IndexWriter writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(), true); //-------------------------------------------------------------------- // scenario 1 //-------------------------------------------------------------------- // writing the documents to the first index writerA.AddDocument(lDoc); writerA.AddDocument(lDoc2); writerA.AddDocument(lDoc3); writerA.Optimize(); writerA.Close(); // closing the second index writerB.Close(); // creating the query Query query = Lucene.Net.QueryParsers.QueryParser.Parse("handle:1", "fulltext", new StandardAnalyzer()); // building the searchables Searcher[] searchers = new Searcher[2]; // VITAL STEP:adding the searcher for the empty index first, before the searcher for the populated index searchers[0] = new IndexSearcher(indexStoreB); searchers[1] = new IndexSearcher(indexStoreA); // creating the multiSearcher Searcher mSearcher = GetMultiSearcherInstance(searchers); // performing the search Hits hits = mSearcher.Search(query); Assert.AreEqual(3, hits.Length()); try { // iterating over the hit documents for (int i = 0; i < hits.Length(); i++) { Document d = hits.Doc(i); } } catch (System.IndexOutOfRangeException e) { Assert.Fail("ArrayIndexOutOfBoundsException thrown: " + e.Message); System.Console.Error.WriteLine(e.Source); } finally { mSearcher.Close(); } //-------------------------------------------------------------------- // scenario 2 //-------------------------------------------------------------------- // adding one document to the empty index writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(), false); writerB.AddDocument(lDoc); writerB.Optimize(); writerB.Close(); // building the searchables Searcher[] searchers2 = new Searcher[2]; // VITAL STEP:adding the searcher for the empty index first, before the searcher for the populated index searchers2[0] = new IndexSearcher(indexStoreB); searchers2[1] = new IndexSearcher(indexStoreA); // creating the mulitSearcher Searcher mSearcher2 = GetMultiSearcherInstance(searchers2); // performing the same search Hits hits2 = mSearcher2.Search(query); Assert.AreEqual(4, hits2.Length()); try { // iterating over the hit documents for (int i = 0; i < hits2.Length(); i++) { // no exception should happen at this point Document d = hits2.Doc(i); } } catch (System.Exception e) { Assert.Fail("Exception thrown: " + e.Message); System.Console.Error.WriteLine(e.Source); } finally { mSearcher2.Close(); } //-------------------------------------------------------------------- // scenario 3 //-------------------------------------------------------------------- // deleting the document just added, this will cause a different exception to take place Term term = new Term("id", "doc1"); IndexReader readerB = IndexReader.Open(indexStoreB); readerB.Delete(term); readerB.Close(); // optimizing the index with the writer writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(), false); writerB.Optimize(); writerB.Close(); // building the searchables Searcher[] searchers3 = new Searcher[2]; searchers3[0] = new IndexSearcher(indexStoreB); searchers3[1] = new IndexSearcher(indexStoreA); // creating the mulitSearcher Searcher mSearcher3 = GetMultiSearcherInstance(searchers3); // performing the same search Hits hits3 = mSearcher3.Search(query); Assert.AreEqual(3, hits3.Length()); try { // iterating over the hit documents for (int i = 0; i < hits3.Length(); i++) { Document d = hits3.Doc(i); } } catch (System.IO.IOException e) { Assert.Fail("IOException thrown: " + e.Message); System.Console.Error.WriteLine(e.Source); } finally { mSearcher3.Close(); } }