public override void SetUp() { base.SetUp(); directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("field", "one two three four five", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("sorter", "b", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("field", "one two three four", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("sorter", "d", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("field", "one two three y", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("sorter", "a", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("field", "one two x", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("sorter", "c", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Optimize(); writer.Close(); searcher = new IndexSearcher(directory, true); query = new TermQuery(new Term("field", "three")); filter = NewStaticFilterB(); }
public virtual void TestAddSameDocTwice() { // LUCENE-5367: this was a problem with the previous code, making sure it // works with the new code. Directory indexDir = NewDirectory(), taxoDir = NewDirectory(); IndexWriter indexWriter = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); FacetsConfig facetsConfig = new FacetsConfig(); Document doc = new Document(); doc.Add(new FacetField("a", "b")); doc = facetsConfig.Build(taxoWriter, doc); // these two addDocument() used to fail indexWriter.AddDocument(doc); indexWriter.AddDocument(doc); IOUtils.Close(indexWriter, taxoWriter); DirectoryReader indexReader = DirectoryReader.Open(indexDir); DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir); IndexSearcher searcher = NewSearcher(indexReader); FacetsCollector fc = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), fc); Facets facets = GetTaxonomyFacetCounts(taxoReader, facetsConfig, fc); FacetResult res = facets.GetTopChildren(10, "a"); Assert.AreEqual(1, res.LabelValues.Length); Assert.AreEqual(2, res.LabelValues[0].value); IOUtils.Close(indexReader, taxoReader); IOUtils.Close(indexDir, taxoDir); }
public virtual void SetUp() { // Create an index writer. directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); // oldest doc: // Add the first document. text = "Document 1" dateTime = Oct 10 03:25:22 EDT 2007 writer.AddDocument(CreateDocument("Document 1", 633275835220000000L)); // Add the second document. text = "Document 2" dateTime = Oct 10 03:25:26 EDT 2007 writer.AddDocument(CreateDocument("Document 2", 633275835260000000L)); // Add the third document. text = "Document 3" dateTime = Oct 11 07:12:13 EDT 2007 writer.AddDocument(CreateDocument("Document 3", 633276835330000000L)); // Add the fourth document. text = "Document 4" dateTime = Oct 11 08:02:09 EDT 2007 writer.AddDocument(CreateDocument("Document 4", 633276865290000000L)); // latest doc: // Add the fifth document. text = "Document 5" dateTime = Oct 12 13:25:43 EDT 2007 writer.AddDocument(CreateDocument("Document 5", 633277923430000000L)); //// oldest doc: //// Add the first document. text = "Document 1" dateTime = Oct 10 03:25:22 EDT 2007 //writer.AddDocument(CreateDocument("Document 1", 1192001122000L)); //// Add the second document. text = "Document 2" dateTime = Oct 10 03:25:26 EDT 2007 //writer.AddDocument(CreateDocument("Document 2", 1192001126000L)); //// Add the third document. text = "Document 3" dateTime = Oct 11 07:12:13 EDT 2007 //writer.AddDocument(CreateDocument("Document 3", 1192101133000L)); //// Add the fourth document. text = "Document 4" dateTime = Oct 11 08:02:09 EDT 2007 //writer.AddDocument(CreateDocument("Document 4", 1192104129000L)); //// latest doc: //// Add the fifth document. text = "Document 5" dateTime = Oct 12 13:25:43 EDT 2007 //writer.AddDocument(CreateDocument("Document 5", 1192209943000L)); writer.Optimize(); writer.Close(); }
public override void SetUp() { base.SetUp(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); for (int i = 0; i < 5137; ++i) { Document doc = new Document(); doc.Add(new Field(FIELD, "meaninglessnames", Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); } { Document doc = new Document(); doc.Add(new Field(FIELD, "tangfulin", Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); } for (int i = 5138; i < 11377; ++i) { Document doc = new Document(); doc.Add(new Field(FIELD, "meaninglessnames", Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); } { Document doc = new Document(); doc.Add(new Field(FIELD, "tangfulin", Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); } writer.Close(); }
void Index() { Lucene.Net.Index.IndexWriter wr = new Lucene.Net.Index.IndexWriter(dir, new Lucene.Net.Analysis.WhitespaceAnalyzer(), Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED); Lucene.Net.Documents.Document doc = null; Lucene.Net.Documents.Field f = null; doc = new Lucene.Net.Documents.Document(); f = new Lucene.Net.Documents.Field("field", "a b c d", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED); doc.Add(f); wr.AddDocument(doc); doc = new Lucene.Net.Documents.Document(); f = new Lucene.Net.Documents.Field("field", "a b a d", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED); doc.Add(f); wr.AddDocument(doc); doc = new Lucene.Net.Documents.Document(); f = new Lucene.Net.Documents.Field("field", "a b e f", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED); doc.Add(f); wr.AddDocument(doc); doc = new Lucene.Net.Documents.Document(); f = new Lucene.Net.Documents.Field("field", "x y z", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED); doc.Add(f); wr.AddDocument(doc); wr.Close(); }
public virtual void TestPhrasePrefix() { RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document doc1 = new Document(); Document doc2 = new Document(); Document doc3 = new Document(); Document doc4 = new Document(); Document doc5 = new Document(); doc1.Add(new Field("body", "blueberry pie", Field.Store.YES, Field.Index.ANALYZED)); doc2.Add(new Field("body", "blueberry strudel", Field.Store.YES, Field.Index.ANALYZED)); doc3.Add(new Field("body", "blueberry pizza", Field.Store.YES, Field.Index.ANALYZED)); doc4.Add(new Field("body", "blueberry chewing gum", Field.Store.YES, Field.Index.ANALYZED)); doc5.Add(new Field("body", "piccadilly circus", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc1); writer.AddDocument(doc2); writer.AddDocument(doc3); writer.AddDocument(doc4); writer.AddDocument(doc5); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(indexStore); //PhrasePrefixQuery query1 = new PhrasePrefixQuery(); MultiPhraseQuery query1 = new MultiPhraseQuery(); //PhrasePrefixQuery query2 = new PhrasePrefixQuery(); MultiPhraseQuery query2 = new MultiPhraseQuery(); query1.Add(new Term("body", "blueberry")); query2.Add(new Term("body", "strawberry")); System.Collections.ArrayList termsWithPrefix = new System.Collections.ArrayList(); IndexReader ir = IndexReader.Open(indexStore); // this TermEnum gives "piccadilly", "pie" and "pizza". System.String prefix = "pi"; TermEnum te = ir.Terms(new Term("body", prefix + "*")); do { if (te.Term().Text().StartsWith(prefix)) { termsWithPrefix.Add(te.Term()); } } while (te.Next()); query1.Add((Term[]) termsWithPrefix.ToArray(typeof(Term))); query2.Add((Term[]) termsWithPrefix.ToArray(typeof(Term))); ScoreDoc[] result; result = searcher.Search(query1, null, 1000).scoreDocs; Assert.AreEqual(2, result.Length); result = searcher.Search(query2, null, 1000).scoreDocs; Assert.AreEqual(0, result.Length); }
public virtual void TestDemo_Renamed_Method() { Analyzer analyzer = new StandardAnalyzer(); // Store the index in memory: Directory directory = new RAMDirectory(); // To store an index on disk, use this instead (note that the // parameter true will overwrite the index in that directory // if one exists): //Directory directory = FSDirectory.getDirectory("/tmp/testindex", true); IndexWriter iwriter = new IndexWriter(directory, analyzer, true); iwriter.SetMaxFieldLength(25000); Document doc = new Document(); System.String text = "This is the text to be indexed."; doc.Add(new Field("fieldname", text, Field.Store.YES, Field.Index.TOKENIZED)); iwriter.AddDocument(doc); iwriter.Close(); // Now search the index: IndexSearcher isearcher = new IndexSearcher(directory); // Parse a simple query that searches for "text": Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser("fieldname", analyzer); Query query = parser.Parse("text"); Hits hits = isearcher.Search(query); Assert.AreEqual(1, hits.Length()); // Iterate through the results: for (int i = 0; i < hits.Length(); i++) { Document hitDoc = hits.Doc(i); Assert.AreEqual("This is the text to be indexed.", hitDoc.Get("fieldname")); } isearcher.Close(); directory.Close(); }
public virtual void TestFilterWorks() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); for (int i = 0; i < 500; i++) { Document document = new Document(); document.Add(new Field("field", English.IntToEnglish(i) + " equals " + English.IntToEnglish(i), Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(document); } writer.Close(); IndexReader reader = IndexReader.Open(dir, true); SpanTermQuery query = new SpanTermQuery(new Term("field", English.IntToEnglish(10).Trim())); SpanQueryFilter filter = new SpanQueryFilter(query); SpanFilterResult result = filter.BitSpans(reader); DocIdSet docIdSet = result.DocIdSet; Assert.IsTrue(docIdSet != null, "docIdSet is null and it shouldn't be"); AssertContainsDocId("docIdSet doesn't contain docId 10", docIdSet, 10); var spans = result.Positions; Assert.IsTrue(spans != null, "spans is null and it shouldn't be"); int size = GetDocIdSetSize(docIdSet); Assert.IsTrue(spans.Count == size, "spans Size: " + spans.Count + " is not: " + size); for (System.Collections.IEnumerator iterator = spans.GetEnumerator(); iterator.MoveNext(); ) { SpanFilterResult.PositionInfo info = (SpanFilterResult.PositionInfo) iterator.Current; Assert.IsTrue(info != null, "info is null and it shouldn't be"); //The doc should indicate the bit is on AssertContainsDocId("docIdSet doesn't contain docId " + info.Doc, docIdSet, info.Doc); //There should be two positions in each Assert.IsTrue(info.Positions.Count == 2, "info.getPositions() Size: " + info.Positions.Count + " is not: " + 2); } reader.Close(); }
public virtual void TestBasic() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("field", "value", Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Close(); TermQuery termQuery = new TermQuery(new Term("field", "value")); // should not throw exception with primitive query QueryWrapperFilter qwf = new QueryWrapperFilter(termQuery); IndexSearcher searcher = new IndexSearcher(dir, true); TopDocs hits = searcher.Search(new MatchAllDocsQuery(), qwf, 10); Assert.AreEqual(1, hits.totalHits); // should not throw exception with complex primitive query BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.Add(termQuery, Occur.MUST); booleanQuery.Add(new TermQuery(new Term("field", "missing")), Occur.MUST_NOT); qwf = new QueryWrapperFilter(termQuery); hits = searcher.Search(new MatchAllDocsQuery(), qwf, 10); Assert.AreEqual(1, hits.totalHits); // should not throw exception with non primitive Query (doesn't implement // Query#createWeight) qwf = new QueryWrapperFilter(new FuzzyQuery(new Term("field", "valu"))); hits = searcher.Search(new MatchAllDocsQuery(), qwf, 10); Assert.AreEqual(1, hits.totalHits); }
public void SearchFiltered(IndexWriter writer, Directory directory, Filter filter, bool optimize) { try { for (int i = 0; i < 60; i++) {//Simple docs Document doc = new Document(); doc.Add(new Field(FIELD, i.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); } if (optimize) writer.Optimize(); writer.Close(); BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.Add(new TermQuery(new Term(FIELD, "36")), Occur.SHOULD); IndexSearcher indexSearcher = new IndexSearcher(directory); ScoreDoc[] hits = indexSearcher.Search(booleanQuery, filter, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "Number of matched documents"); } catch (System.IO.IOException e) { Assert.Fail(e.Message); } }
public override void SetUp() { base.SetUp(); directory = new RAMDirectory(); PayloadAnalyzer analyzer = new PayloadAnalyzer(this); IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.SetSimilarity(similarity); //writer.infoStream = System.out; for (int i = 0; i < 1000; i++) { Document doc = new Document(); Field noPayloadField = new Field(PayloadHelper.NO_PAYLOAD_FIELD, English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED); //noPayloadField.setBoost(0); doc.Add(noPayloadField); doc.Add(new Field("field", English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("multiField", English.IntToEnglish(i) + " " + English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); } writer.Optimize(); writer.Close(); searcher = new IndexSearcher(directory, true); searcher.SetSimilarity(similarity); }
public virtual void TestDanish() { /* build an index */ RAMDirectory danishIndex = new RAMDirectory(); IndexWriter writer = new IndexWriter(danishIndex, new SimpleAnalyzer(), T, IndexWriter.MaxFieldLength.LIMITED); // Danish collation orders the words below in the given order // (example taken from TestSort.testInternationalSort() ). System.String[] words = new System.String[] { "H\u00D8T", "H\u00C5T", "MAND" }; for (int docnum = 0; docnum < words.Length; ++docnum) { Document doc = new Document(); doc.Add(new Field("content", words[docnum], Field.Store.YES, Field.Index.UN_TOKENIZED)); doc.Add(new Field("body", "body", Field.Store.YES, Field.Index.UN_TOKENIZED)); writer.AddDocument(doc); } writer.Optimize(); writer.Close(); IndexReader reader = IndexReader.Open(danishIndex); IndexSearcher search = new IndexSearcher(reader); System.Globalization.CompareInfo c = new System.Globalization.CultureInfo("da" + "-" + "dk").CompareInfo; // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ], // but Danish collation does. ScoreDoc[] result = search.Search(Csrq("content", "H\u00D8T", "MAND", F, F, c), null, 1000).scoreDocs; AssertEquals("The index Term should be included.", 1, result.Length); result = search.Search(Csrq("content", "H\u00C5T", "MAND", F, F, c), null, 1000).scoreDocs; AssertEquals("The index Term should not be included.", 0, result.Length); search.Close(); }
public override void SetUp() { base.SetUp(); RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); long theLong = System.Int64.MaxValue; double theDouble = System.Double.MaxValue; sbyte theByte = (sbyte)System.SByte.MaxValue; short theShort = System.Int16.MaxValue; int theInt = System.Int32.MaxValue; float theFloat = System.Single.MaxValue; for (int i = 0; i < NUM_DOCS; i++) { Document doc = new Document(); doc.Add(new Field("theLong", System.Convert.ToString(theLong--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theDouble", (theDouble--).ToString("E16"), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theByte", System.Convert.ToString((sbyte)theByte--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theShort", System.Convert.ToString(theShort--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theInt", System.Convert.ToString(theInt--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theFloat", (theFloat--).ToString("E8"), Field.Store.NO, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); } writer.Close(); reader = IndexReader.Open(directory); }
public void SearchFiltered(IndexWriter writer, Directory directory, Filter filter, bool optimize) { try { for (int i = 0; i < 60; i++) {//Simple docs Document doc = new Document(); doc.Add(new Field(FIELD, i.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); } if (optimize) { writer.Optimize(); } writer.Close(); BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.Add(new TermQuery(new Term(FIELD, "36")), BooleanClause.Occur.SHOULD); IndexSearcher indexSearcher = new IndexSearcher(directory); ScoreDoc[] hits = indexSearcher.Search(booleanQuery, filter, 1000).scoreDocs; Assert.AreEqual(1, hits.Length, "Number of matched documents"); } catch (System.IO.IOException e) { Assert.Fail(e.Message); } }
private void AddDoc(IndexWriter iw, int i) { Document d = new Document(); IFieldable f; int scoreAndID = i + 1; f = new Field(ID_FIELD, Id2String(scoreAndID), Field.Store.YES, Field.Index.NOT_ANALYZED); // for debug purposes f.OmitNorms = true; d.Add(f); f = new Field(TEXT_FIELD, "text of doc" + scoreAndID + TextLine(i), Field.Store.NO, Field.Index.ANALYZED); // for regular search f.OmitNorms = true; d.Add(f); f = new Field(INT_FIELD, "" + scoreAndID, Field.Store.NO, Field.Index.NOT_ANALYZED); // for function scoring f.OmitNorms = true; d.Add(f); f = new Field(FLOAT_FIELD, scoreAndID + ".000", Field.Store.NO, Field.Index.NOT_ANALYZED); // for function scoring f.OmitNorms = true; d.Add(f); iw.AddDocument(d); Log("added: " + d); }
public virtual void TestNullOrSubScorer() { Directory dir = new MockRAMDirectory(); IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); Document doc = new Document(); doc.Add(new Field("field", "a b c d", Field.Store.NO, Field.Index.ANALYZED)); w.AddDocument(doc); IndexReader r = w.GetReader(); IndexSearcher s = new IndexSearcher(r); BooleanQuery q = new BooleanQuery(); q.Add(new TermQuery(new Term("field", "a")), Occur.SHOULD); // LUCENE-2617: make sure that a term not in the index still contributes to the score via coord factor float score = s.Search(q, 10).MaxScore; Query subQuery = new TermQuery(new Term("field", "not_in_index")); subQuery.Boost = 0; q.Add(subQuery, Occur.SHOULD); float score2 = s.Search(q, 10).MaxScore; Assert.AreEqual(score * .5, score2, 1e-6); // LUCENE-2617: make sure that a clause not in the index still contributes to the score via coord factor BooleanQuery qq = (BooleanQuery)q.Clone(); PhraseQuery phrase = new PhraseQuery(); phrase.Add(new Term("field", "not_in_index")); phrase.Add(new Term("field", "another_not_in_index")); phrase.Boost = 0; qq.Add(phrase, Occur.SHOULD); score2 = s.Search(qq, 10).MaxScore; Assert.AreEqual(score * (1.0 / 3), score2, 1e-6); // now test BooleanScorer2 subQuery = new TermQuery(new Term("field", "b")); subQuery.Boost = 0; q.Add(subQuery, Occur.MUST); score2 = s.Search(q, 10).MaxScore; Assert.AreEqual(score * (2.0 / 3), score2, 1e-6); // PhraseQuery w/ no terms added returns a null scorer PhraseQuery pq = new PhraseQuery(); q.Add(pq, Occur.SHOULD); Assert.AreEqual(1, s.Search(q, 10).TotalHits); // A required clause which returns null scorer should return null scorer to // IndexSearcher. q = new BooleanQuery(); pq = new PhraseQuery(); q.Add(new TermQuery(new Term("field", "a")), Occur.SHOULD); q.Add(pq, Occur.MUST); Assert.AreEqual(0, s.Search(q, 10).TotalHits); DisjunctionMaxQuery dmq = new DisjunctionMaxQuery(1.0f); dmq.Add(new TermQuery(new Term("field", "a"))); dmq.Add(pq); Assert.AreEqual(1, s.Search(dmq, 10).TotalHits); r.Close(); w.Close(); dir.Close(); }
public virtual void TestFarsi() { /* build an index */ RAMDirectory farsiIndex = new RAMDirectory(); IndexWriter writer = new IndexWriter(farsiIndex, new SimpleAnalyzer(), T, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("content", "\u0633\u0627\u0628", Field.Store.YES, Field.Index.UN_TOKENIZED)); doc.Add(new Field("body", "body", Field.Store.YES, Field.Index.UN_TOKENIZED)); writer.AddDocument(doc); writer.Optimize(); writer.Close(); IndexReader reader = IndexReader.Open(farsiIndex); IndexSearcher search = new IndexSearcher(reader); Query q = new TermQuery(new Term("body", "body")); // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi // characters properly. System.Globalization.CompareInfo collator = new System.Globalization.CultureInfo("ar").CompareInfo; // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi // orders the U+0698 character before the U+0633 character, so the single // index Term below should NOT be returned by a TermRangeFilter with a Farsi // Collator (or an Arabic one for the case when Farsi is not supported). Hits result = search.Search(q, new TermRangeFilter("content", "\u062F", "\u0698", T, T, collator)); Assert.AreEqual(0, result.Length(), "The index Term should not be included."); result = search.Search(q, new TermRangeFilter("content", "\u0633", "\u0638", T, T, collator)); Assert.AreEqual(1, result.Length(), "The index Term should be included."); search.Close(); }
public virtual void TestNullOrSubScorer() { Directory dir = new MockRAMDirectory(); IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); Document doc = new Document(); doc.Add(new Field("field", "a b c d", Field.Store.NO, Field.Index.ANALYZED)); w.AddDocument(doc); IndexReader r = w.GetReader(); IndexSearcher s = new IndexSearcher(r); BooleanQuery q = new BooleanQuery(); q.Add(new TermQuery(new Term("field", "a")), BooleanClause.Occur.SHOULD); // PhraseQuery w/ no terms added returns a null scorer PhraseQuery pq = new PhraseQuery(); q.Add(pq, BooleanClause.Occur.SHOULD); Assert.AreEqual(1, s.Search(q, 10).TotalHits); // A required clause which returns null scorer should return null scorer to // IndexSearcher. q = new BooleanQuery(); pq = new PhraseQuery(); q.Add(new TermQuery(new Term("field", "a")), BooleanClause.Occur.SHOULD); q.Add(pq, BooleanClause.Occur.MUST); Assert.AreEqual(0, s.Search(q, 10).TotalHits); DisjunctionMaxQuery dmq = new DisjunctionMaxQuery(1.0f); dmq.Add(new TermQuery(new Term("field", "a"))); dmq.Add(pq); Assert.AreEqual(1, s.Search(dmq, 10).TotalHits); r.Close(); w.Close(); dir.Close(); }
public override void SetUp() { base.SetUp(); RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); long theLong = System.Int64.MaxValue; double theDouble = System.Double.MaxValue; sbyte theByte = (sbyte) System.SByte.MaxValue; short theShort = System.Int16.MaxValue; int theInt = System.Int32.MaxValue; float theFloat = System.Single.MaxValue; for (int i = 0; i < NUM_DOCS; i++) { Document doc = new Document(); doc.Add(new Field("theLong", System.Convert.ToString(theLong--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theDouble", (theDouble--).ToString("E16"), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theByte", System.Convert.ToString((sbyte) theByte--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theShort", System.Convert.ToString(theShort--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theInt", System.Convert.ToString(theInt--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theFloat", (theFloat--).ToString("E8"), Field.Store.NO, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); } writer.Close(); reader = IndexReader.Open(directory); }
public virtual void TestDemo_Renamed() { Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); // Store the index in memory: Directory directory = new RAMDirectory(); // To store an index on disk, use this instead: //Directory directory = FSDirectory.open("/tmp/testindex"); IndexWriter iwriter = new IndexWriter(directory, analyzer, true, new IndexWriter.MaxFieldLength(25000)); Document doc = new Document(); System.String text = "This is the text to be indexed."; doc.Add(new Field("fieldname", text, Field.Store.YES, Field.Index.ANALYZED)); iwriter.AddDocument(doc); iwriter.Close(); // Now search the index: IndexSearcher isearcher = new IndexSearcher(directory, true); // read-only=true // Parse a simple query that searches for "text": QueryParser parser = new QueryParser("fieldname", analyzer); Query query = parser.Parse("text"); ScoreDoc[] hits = isearcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); // Iterate through the results: for (int i = 0; i < hits.Length; i++) { Document hitDoc = isearcher.Doc(hits[i].doc); Assert.AreEqual(hitDoc.Get("fieldname"), "This is the text to be indexed."); } isearcher.Close(); directory.Close(); }
public virtual void TestMmapIndex() { FSDirectory storeDirectory; storeDirectory = new MMapDirectory(new System.IO.FileInfo(storePathname), null); // plan to add a set of useful stopwords, consider changing some of the // interior filters. StandardAnalyzer analyzer = new StandardAnalyzer(new System.Collections.Hashtable()); // TODO: something about lock timeouts and leftover locks. IndexWriter writer = new IndexWriter(storeDirectory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); IndexSearcher searcher = new IndexSearcher(storePathname); for (int dx = 0; dx < 1000; dx++) { System.String f = RandomField(); Document doc = new Document(); doc.Add(new Field("data", f, Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); } searcher.Close(); writer.Close(); RmDir(new System.IO.FileInfo(storePathname)); }
public override void SetUp() { base.SetUp(); System.String tempDir = System.IO.Path.GetTempPath(); if (tempDir == null) { throw new System.IO.IOException("java.io.tmpdir undefined, cannot run test"); } indexDir = new System.IO.DirectoryInfo(Path.Combine(tempDir, "RAMDirIndex")); Directory dir = FSDirectory.Open(indexDir); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); // add some documents Document doc = null; for (int i = 0; i < docsToAdd; i++) { doc = new Document(); doc.Add(new Field("content", English.IntToEnglish(i).Trim(), Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); } Assert.AreEqual(docsToAdd, writer.MaxDoc()); writer.Close(); dir.Close(); }
public virtual void CreateRandomTerms(int nDocs, int nTerms, double power, Directory dir) { int[] freq = new int[nTerms]; for (int i = 0; i < nTerms; i++) { int f = (nTerms + 1) - i; // make first terms less frequent freq[i] = (int) System.Math.Ceiling(System.Math.Pow(f, power)); terms[i] = new Term("f", System.Convert.ToString((char) ('A' + i))); } IndexWriter iw = new IndexWriter(dir, new WhitespaceAnalyzer(), true); for (int i = 0; i < nDocs; i++) { Document d = new Document(); for (int j = 0; j < nTerms; j++) { if (r.Next(freq[j]) == 0) { d.Add(new Field("f", terms[j].Text(), Field.Store.NO, Field.Index.UN_TOKENIZED)); //System.out.println(d); } } iw.AddDocument(d); } iw.Optimize(); iw.Close(); }
public virtual void SetUp() { directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); Document doc = new Document(); doc.Add(Field.Text("Field", "one two three four five")); doc.Add(Field.Text("sorter", "b")); writer.AddDocument(doc); doc = new Document(); doc.Add(Field.Text("Field", "one two three four")); doc.Add(Field.Text("sorter", "d")); writer.AddDocument(doc); doc = new Document(); doc.Add(Field.Text("Field", "one two three y")); doc.Add(Field.Text("sorter", "a")); writer.AddDocument(doc); doc = new Document(); doc.Add(Field.Text("Field", "one two x")); doc.Add(Field.Text("sorter", "c")); writer.AddDocument(doc); writer.Optimize(); writer.Close(); searcher = new IndexSearcher(directory); query = new TermQuery(new Term("Field", "three")); filter = new AnonymousClassFilter(this); }
public static void IndexDocs(IndexWriter writer, System.IO.FileInfo file) { if (System.IO.Directory.Exists(file.FullName)) { System.String[] files = System.IO.Directory.GetFileSystemEntries(file.FullName); // an IO error could occur if (files != null) { for (int i = 0; i < files.Length; i++) { IndexDocs(writer, new System.IO.FileInfo(files[i])); } } } else { System.Console.Out.WriteLine("adding " + file); try { writer.AddDocument(FileDocument.Document(file)); } // at least on windows, some temporary files raise this exception with an "access denied" message // checking if the file can be read doesn't help catch (System.IO.FileNotFoundException fnfe) { ; } } }
public virtual void TestMethod() { RAMDirectory directory = new RAMDirectory(); System.String[] values = new System.String[]{"1", "2", "3", "4"}; try { IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); for (int i = 0; i < values.Length; i++) { Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); doc.Add(new Field(FIELD, values[i], Field.Store.YES, Field.Index.UN_TOKENIZED)); writer.AddDocument(doc); } writer.Close(); BooleanQuery booleanQuery1 = new BooleanQuery(); booleanQuery1.Add(new TermQuery(new Term(FIELD, "1")), BooleanClause.Occur.SHOULD); booleanQuery1.Add(new TermQuery(new Term(FIELD, "2")), BooleanClause.Occur.SHOULD); BooleanQuery query = new BooleanQuery(); query.Add(booleanQuery1, BooleanClause.Occur.MUST); query.Add(new TermQuery(new Term(FIELD, "9")), BooleanClause.Occur.MUST_NOT); IndexSearcher indexSearcher = new IndexSearcher(directory); Hits hits = indexSearcher.Search(query); Assert.AreEqual(2, hits.Length(), "Number of matched documents"); } catch (System.IO.IOException e) { Assert.Fail(e.Message); } }
/// <summary> Adds the document to the index. /// /// </summary> /// <param name="writer">the Lucene index writer /// </param> /// <param name="id">the unique id of the document /// </param> /// <param name="text">the text of the document /// </param> /// <throws> IOException </throws> protected internal virtual void addDocument(IndexWriter writer, System.String id, System.String text) { Document document = new Document(); document.Add(new Field(FIELD_ID, id, Field.Store.YES, Field.Index.NOT_ANALYZED)); document.Add(new Field(FIELD_TEXT, text, Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(document, null); }
public void TestCustomMergeScheduler() { // we don't really need to execute anything, just to make sure the custom MS // compiles. But ensure that it can be used as well, e.g., no other hidden // dependencies or something. Therefore, don't use any random API ! Directory dir = new RAMDirectory(); IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, null); conf.SetMergeScheduler(new ReportingMergeScheduler()); IndexWriter writer = new IndexWriter(dir, conf); writer.AddDocument(new Document()); writer.Commit(); // trigger flush writer.AddDocument(new Document()); writer.Commit(); // trigger flush writer.ForceMerge(1); writer.Dispose(); dir.Dispose(); }
private void Add(System.String s, System.String type, IndexWriter writer) { Document doc = new Document(); doc.Add(new Field("body", s, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("type", type, Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); }
// LUCENE-1404 private void AddDoc(IndexWriter writer, System.String id, System.String text) { Document doc = new Document(); doc.Add(new Field("id", id, Field.Store.YES, Field.Index.UN_TOKENIZED)); doc.Add(new Field("text", text, Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); }
public virtual void TestSetPosition() { Analyzer analyzer = new AnonymousClassAnalyzer(this); RAMDirectory store = new RAMDirectory(); IndexWriter writer = new IndexWriter(store, analyzer, true); Document d = new Document(); d.Add(Field.Text("Field", "bogus")); writer.AddDocument(d); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(store); PhraseQuery q; Hits hits; q = new PhraseQuery(); q.Add(new Term("Field", "1")); q.Add(new Term("Field", "2")); hits = searcher.Search(q); Assert.AreEqual(0, hits.Length()); q = new PhraseQuery(); q.Add(new Term("Field", "2")); q.Add(new Term("Field", "3")); hits = searcher.Search(q); Assert.AreEqual(1, hits.Length()); q = new PhraseQuery(); q.Add(new Term("Field", "3")); q.Add(new Term("Field", "4")); hits = searcher.Search(q); Assert.AreEqual(0, hits.Length()); q = new PhraseQuery(); q.Add(new Term("Field", "2")); q.Add(new Term("Field", "4")); hits = searcher.Search(q); Assert.AreEqual(1, hits.Length()); q = new PhraseQuery(); q.Add(new Term("Field", "3")); q.Add(new Term("Field", "5")); hits = searcher.Search(q); Assert.AreEqual(1, hits.Length()); q = new PhraseQuery(); q.Add(new Term("Field", "4")); q.Add(new Term("Field", "5")); hits = searcher.Search(q); Assert.AreEqual(1, hits.Length()); q = new PhraseQuery(); q.Add(new Term("Field", "2")); q.Add(new Term("Field", "5")); hits = searcher.Search(q); Assert.AreEqual(0, hits.Length()); }
public virtual void TestIterator() { RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("field", "iterator test doc 1", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("field", "iterator test doc 2", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Close(); _TestUtil.CheckIndex(directory); IndexSearcher searcher = new IndexSearcher(directory); Hits hits = searcher.Search(new TermQuery(new Term("field", "iterator"))); HitIterator iterator = (HitIterator) hits.Iterator(); Assert.AreEqual(2, iterator.Length()); Assert.IsTrue(iterator.MoveNext()); Hit hit = (Hit) iterator.Current; Assert.AreEqual("iterator test doc 1", hit.Get("field")); Assert.IsTrue(iterator.MoveNext()); hit = (Hit) iterator.Current; Assert.AreEqual("iterator test doc 2", hit.GetDocument().Get("field")); Assert.IsFalse(iterator.MoveNext()); bool caughtException = false; try { System.Object generatedAux = iterator.Current; } catch (System.ArgumentOutOfRangeException e) { Assert.IsTrue(true); caughtException = true; } Assert.IsTrue(caughtException); }
private void AddDoc(System.String text, IndexWriter iw, float boost) { Document doc = new Document(); Field f = new Field("key", text, Field.Store.YES, Field.Index.ANALYZED); f.SetBoost(boost); doc.Add(f); iw.AddDocument(doc); }
public virtual void TestMutipleDocument() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new KeywordAnalyzer(), true); Document doc = new Document(); doc.Add(new Field("partnum", "Q36", Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("partnum", "Q37", Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); writer.Close(); IndexReader reader = IndexReader.Open(dir); TermDocs td = reader.TermDocs(new Term("partnum", "Q36")); Assert.IsTrue(td.Next()); td = reader.TermDocs(new Term("partnum", "Q37")); Assert.IsTrue(td.Next()); }
private void InsertDoc(IndexWriter writer, System.String content) { Document doc = new Document(); doc.Add(Field.Keyword("id", "id" + docCount)); doc.Add(Field.UnStored("content", content)); writer.AddDocument(doc); docCount++; }
public static void IndexingArticle(Article article) { Lucene.Net.Index.IndexWriter lwriter = new Lucene.Net.Index.IndexWriter(HttpContext.Current.Server.MapPath(ConfigurationManager.AppSettings["IndexingArticle"]), new Lucene.Net.Analysis.Standard.StandardAnalyzer(), false); Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); doc.Add(new Lucene.Net.Documents.Field("ArticleId", article.Id.ToString(), Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.UN_TOKENIZED)); doc.Add(new Lucene.Net.Documents.Field("ArticleTitle", article.Title, Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.TOKENIZED)); doc.Add(new Lucene.Net.Documents.Field("ArticleDetail", ParseHTML(Article.GetContent(article.Id)), Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.TOKENIZED)); lwriter.AddDocument(doc); lwriter.Close(); }
public override void SetUp() { base.SetUp(); RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.AddDocument(Doc(new Field[] { Field("id", "0"), Field("gender", "male"), Field("first", "james"), Field("last", "jones") })); writer.AddDocument(Doc(new Field[] { Field("id", "1"), Field("gender", "male"), Field("first", "james"), Field("last", "smith"), Field("gender", "female"), Field("first", "sally"), Field("last", "jones") })); writer.AddDocument(Doc(new Field[] { Field("id", "2"), Field("gender", "female"), Field("first", "greta"), Field("last", "jones"), Field("gender", "female"), Field("first", "sally"), Field("last", "smith"), Field("gender", "male"), Field("first", "james"), Field("last", "jones") })); writer.AddDocument(Doc(new Field[] { Field("id", "3"), Field("gender", "female"), Field("first", "lisa"), Field("last", "jones"), Field("gender", "male"), Field("first", "bob"), Field("last", "costas") })); writer.AddDocument(Doc(new Field[] { Field("id", "4"), Field("gender", "female"), Field("first", "sally"), Field("last", "smith"), Field("gender", "female"), Field("first", "linda"), Field("last", "dixit"), Field("gender", "male"), Field("first", "bubba"), Field("last", "jones") })); writer.Close(); searcher = new IndexSearcher(directory); }
public virtual void CreateDummySearcher() { // Create a dummy index with nothing in it. // This could possibly fail if Lucene starts checking for docid ranges... RAMDirectory rd = new RAMDirectory(); IndexWriter iw = new IndexWriter(rd, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); iw.AddDocument(new Document()); iw.Close(); s = new IndexSearcher(rd); }
private void InsertDoc(IndexWriter writer, System.String content) { Document doc = new Document(); doc.Add(new Field("id", "id" + docCount, Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("content", content, Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(doc); docCount++; }
public virtual void TestBefore() { // create an index RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); long now = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); Document doc = new Document(); // add time that is in the past doc.Add(new Field("datefield", DateTools.TimeToString(now - 1000, DateTools.Resolution.MILLISECOND), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("body", "Today is a very sunny day in New York City", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc, null); writer.Optimize(null); writer.Close(); IndexSearcher searcher = new IndexSearcher(indexStore, true, null); // filter that should preserve matches //DateFilter df1 = DateFilter.Before("datefield", now); TermRangeFilter df1 = new TermRangeFilter("datefield", DateTools.TimeToString(now - 2000, DateTools.Resolution.MILLISECOND), DateTools.TimeToString(now, DateTools.Resolution.MILLISECOND), false, true); // filter that should discard matches //DateFilter df2 = DateFilter.Before("datefield", now - 999999); TermRangeFilter df2 = new TermRangeFilter("datefield", DateTools.TimeToString(0, DateTools.Resolution.MILLISECOND), DateTools.TimeToString(now - 2000, DateTools.Resolution.MILLISECOND), true, false); // search something that doesn't exist with DateFilter Query query1 = new TermQuery(new Term("body", "NoMatchForThis")); // search for something that does exists Query query2 = new TermQuery(new Term("body", "sunny")); ScoreDoc[] result; // ensure that queries return expected results without DateFilter first result = searcher.Search(query1, null, 1000, null).ScoreDocs; Assert.AreEqual(0, result.Length); result = searcher.Search(query2, null, 1000, null).ScoreDocs; Assert.AreEqual(1, result.Length); // run queries with DateFilter result = searcher.Search(query1, df1, 1000, null).ScoreDocs; Assert.AreEqual(0, result.Length); result = searcher.Search(query1, df2, 1000, null).ScoreDocs; Assert.AreEqual(0, result.Length); result = searcher.Search(query2, df1, 1000, null).ScoreDocs; Assert.AreEqual(1, result.Length); result = searcher.Search(query2, df2, 1000, null).ScoreDocs; Assert.AreEqual(0, result.Length); }
public virtual void TestMethod() { RAMDirectory directory = new RAMDirectory(); System.String[] categories = new System.String[] { "food", "foodanddrink", "foodanddrinkandgoodtimes", "food and drink" }; Query rw1 = null; Query rw2 = null; try { IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); for (int i = 0; i < categories.Length; i++) { Document doc = new Document(); doc.Add(Field.Keyword("category", categories[i])); writer.AddDocument(doc); } writer.Close(); IndexReader reader = IndexReader.Open(directory); PrefixQuery query = new PrefixQuery(new Term("category", "foo")); rw1 = query.Rewrite(reader); BooleanQuery bq = new BooleanQuery(); bq.Add(query, true, false); rw2 = bq.Rewrite(reader); } catch (System.IO.IOException e) { Assert.Fail(e.Message); } BooleanQuery bq1 = null; if (rw1 is BooleanQuery) { bq1 = (BooleanQuery)rw1; } BooleanQuery bq2 = null; if (rw2 is BooleanQuery) { bq2 = (BooleanQuery)rw2; } else { Assert.Fail("Rewrite"); } Assert.AreEqual(bq1.GetClauses().Length, bq2.GetClauses().Length, "Number of Clauses Mismatch"); }
public virtual void TestDocBoost_() { RAMDirectory store = new RAMDirectory(); IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true); Field f1 = Field.Text("Field", "word"); Field f2 = Field.Text("Field", "word"); f2.SetBoost(2.0f); Document d1 = new Document(); Document d2 = new Document(); Document d3 = new Document(); Document d4 = new Document(); d3.SetBoost(3.0f); d4.SetBoost(2.0f); d1.Add(f1); // boost = 1 d2.Add(f2); // boost = 2 d3.Add(f1); // boost = 3 d4.Add(f2); // boost = 4 writer.AddDocument(d1); writer.AddDocument(d2); writer.AddDocument(d3); writer.AddDocument(d4); writer.Optimize(); writer.Close(); float[] scores = new float[4]; new IndexSearcher(store).Search(new TermQuery(new Term("Field", "word")), new AnonymousClassHitCollector(scores, this)); float lastScore = 0.0f; for (int i = 0; i < 4; i++) { Assert.IsTrue(scores[i] > lastScore); lastScore = scores[i]; } }
public virtual void TestWithThreads() { // LUCENE-5303: OrdinalsCache used the ThreadLocal BinaryDV instead of reader.getCoreCacheKey(). Store.Directory indexDir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); IndexWriter writer = new IndexWriter(indexDir, conf); var taxoWriter = new DirectoryTaxonomyWriter(taxoDir); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(new FacetField("A", "1")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new FacetField("A", "2")); writer.AddDocument(config.Build(taxoWriter, doc)); var reader = DirectoryReader.Open(writer, true); CachedOrdinalsReader ordsReader = new CachedOrdinalsReader(new DocValuesOrdinalsReader(FacetsConfig.DEFAULT_INDEX_FIELD_NAME)); ThreadClass[] threads = new ThreadClass[3]; for (int i = 0; i < threads.Length; i++) { threads[i] = new ThreadAnonymousInnerClassHelper(this, "CachedOrdsThread-" + i, reader, ordsReader); } long ramBytesUsed = 0; foreach (ThreadClass t in threads) { t.Start(); t.Join(); if (ramBytesUsed == 0) { ramBytesUsed = ordsReader.RamBytesUsed(); } else { Assert.AreEqual(ramBytesUsed, ordsReader.RamBytesUsed()); } } IOUtils.Close(writer, taxoWriter, reader, indexDir, taxoDir); }
public virtual void TestDocBoost_Renamed() { RAMDirectory store = new RAMDirectory(); IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Fieldable f1 = new Field("field", "word", Field.Store.YES, Field.Index.ANALYZED); Fieldable f2 = new Field("field", "word", Field.Store.YES, Field.Index.ANALYZED); f2.SetBoost(2.0f); Document d1 = new Document(); Document d2 = new Document(); Document d3 = new Document(); Document d4 = new Document(); d3.SetBoost(3.0f); d4.SetBoost(2.0f); d1.Add(f1); // boost = 1 d2.Add(f2); // boost = 2 d3.Add(f1); // boost = 3 d4.Add(f2); // boost = 4 writer.AddDocument(d1); writer.AddDocument(d2); writer.AddDocument(d3); writer.AddDocument(d4); writer.Optimize(); writer.Close(); float[] scores = new float[4]; new IndexSearcher(store).Search(new TermQuery(new Term("field", "word")), new AnonymousClassCollector(scores, this)); float lastScore = 0.0f; for (int i = 0; i < 4; i++) { Assert.IsTrue(scores[i] > lastScore); lastScore = scores[i]; } }
public virtual void TestMutipleDocument() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new KeywordAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("partnum", "Q36", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("partnum", "Q37", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Close(); IndexReader reader = IndexReader.Open(dir); TermDocs td = reader.TermDocs(new Term("partnum", "Q36")); Assert.IsTrue(td.Next()); td = reader.TermDocs(new Term("partnum", "Q37")); Assert.IsTrue(td.Next()); }
public virtual void TestAfter() { // create an index RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true); long now = (System.DateTime.Now.Ticks - 621355968000000000) / 10000; Document doc = new Document(); // add time that is in the future doc.Add(Field.Keyword("datefield", DateField.TimeToString(now + 888888))); doc.Add(Field.Text("body", "Today is a very sunny day in New York City")); writer.AddDocument(doc); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(indexStore); // filter that should preserve matches DateFilter df1 = DateFilter.After("datefield", now); // filter that should discard matches DateFilter df2 = DateFilter.After("datefield", now + 999999); // search something that doesn't exist with DateFilter Query query1 = new TermQuery(new Term("body", "NoMatchForThis")); // search for something that does exists Query query2 = new TermQuery(new Term("body", "sunny")); Hits result; // ensure that queries return expected results without DateFilter first result = searcher.Search(query1); Assert.AreEqual(0, result.Length()); result = searcher.Search(query2); Assert.AreEqual(1, result.Length()); // run queries with DateFilter result = searcher.Search(query1, df1); Assert.AreEqual(0, result.Length()); result = searcher.Search(query1, df2); Assert.AreEqual(0, result.Length()); result = searcher.Search(query2, df1); Assert.AreEqual(1, result.Length()); result = searcher.Search(query2, df2); Assert.AreEqual(0, result.Length()); }
public virtual void TestOutOfOrderCollection() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, null, MaxFieldLength.UNLIMITED); for (int i = 0; i < 10; i++) { writer.AddDocument(new Document()); } writer.Commit(); writer.Close(); bool[] inOrder = new bool[]{false, true}; System.String[] actualTSDCClass = new System.String[]{"OutOfOrderTopScoreDocCollector", "InOrderTopScoreDocCollector"}; // Save the original value to set later. bool origVal = BooleanQuery.GetAllowDocsOutOfOrder(); BooleanQuery.SetAllowDocsOutOfOrder(true); BooleanQuery bq = new BooleanQuery(); // Add a Query with SHOULD, since bw.scorer() returns BooleanScorer2 // which delegates to BS if there are no mandatory clauses. bq.Add(new MatchAllDocsQuery(), Occur.SHOULD); // Set minNrShouldMatch to 1 so that BQ will not optimize rewrite to return // the clause instead of BQ. bq.SetMinimumNumberShouldMatch(1); try { IndexSearcher searcher = new IndexSearcher(dir); for (int i = 0; i < inOrder.Length; i++) { TopDocsCollector tdc = TopScoreDocCollector.create(3, inOrder[i]); Assert.AreEqual("Lucene.Net.Search.TopScoreDocCollector+" + actualTSDCClass[i], tdc.GetType().FullName); searcher.Search(new MatchAllDocsQuery(), tdc); ScoreDoc[] sd = tdc.TopDocs().scoreDocs; Assert.AreEqual(3, sd.Length); for (int j = 0; j < sd.Length; j++) { Assert.AreEqual(j, sd[j].doc, "expected doc Id " + j + " found " + sd[j].doc); } } } finally { // Whatever happens, reset BooleanQuery.allowDocsOutOfOrder to the // original value. Don't set it to false in case the implementation in BQ // will change some day. BooleanQuery.SetAllowDocsOutOfOrder(origVal); } }
public virtual void TestOutOfOrderCollection() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, null, MaxFieldLength.UNLIMITED); for (int i = 0; i < 10; i++) { writer.AddDocument(new Document()); } writer.Commit(); writer.Close(); bool[] inOrder = new bool[] { false, true }; System.String[] actualTSDCClass = new System.String[] { "OutOfOrderTopScoreDocCollector", "InOrderTopScoreDocCollector" }; // Save the original value to set later. bool origVal = BooleanQuery.GetAllowDocsOutOfOrder(); BooleanQuery.SetAllowDocsOutOfOrder(true); BooleanQuery bq = new BooleanQuery(); // Add a Query with SHOULD, since bw.scorer() returns BooleanScorer2 // which delegates to BS if there are no mandatory clauses. bq.Add(new MatchAllDocsQuery(), Occur.SHOULD); // Set minNrShouldMatch to 1 so that BQ will not optimize rewrite to return // the clause instead of BQ. bq.SetMinimumNumberShouldMatch(1); try { IndexSearcher searcher = new IndexSearcher(dir); for (int i = 0; i < inOrder.Length; i++) { TopDocsCollector tdc = TopScoreDocCollector.create(3, inOrder[i]); Assert.AreEqual("Lucene.Net.Search.TopScoreDocCollector+" + actualTSDCClass[i], tdc.GetType().FullName); searcher.Search(new MatchAllDocsQuery(), tdc); ScoreDoc[] sd = tdc.TopDocs().ScoreDocs; Assert.AreEqual(3, sd.Length); for (int j = 0; j < sd.Length; j++) { Assert.AreEqual(j, sd[j].doc, "expected doc Id " + j + " found " + sd[j].doc); } } } finally { // Whatever happens, reset BooleanQuery.allowDocsOutOfOrder to the // original value. Don't set it to false in case the implementation in BQ // will change some day. BooleanQuery.SetAllowDocsOutOfOrder(origVal); } }
private static void IndexDocsNoFacets(IndexWriter indexWriter) { int numDocs = AtLeast(2); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); AddField(doc); indexWriter.AddDocument(doc); } indexWriter.Commit(); // flush a segment }
void LUCENENET_100_CreateIndex() { Lucene.Net.Index.IndexWriter w = new Lucene.Net.Index.IndexWriter(LUCENENET_100_Dir, new Lucene.Net.Analysis.Standard.StandardAnalyzer(), true); Lucene.Net.Documents.Field f1 = new Lucene.Net.Documents.Field("field1", "dark side of the moon", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.TOKENIZED); Lucene.Net.Documents.Field f2 = new Lucene.Net.Documents.Field("field2", "123", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.UN_TOKENIZED); Lucene.Net.Documents.Document d = new Lucene.Net.Documents.Document(); d.Add(f1); d.Add(f2); w.AddDocument(d); f1 = new Lucene.Net.Documents.Field("field1", "Fly me to the moon", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.TOKENIZED); f2 = new Lucene.Net.Documents.Field("field2", "456", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.UN_TOKENIZED); d = new Lucene.Net.Documents.Document(); d.Add(f1); d.Add(f2); w.AddDocument(d); w.Close(); }
public virtual void TestBefore() { // create an index RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); long now = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); Document doc = new Document(); // add time that is in the past doc.Add(new Field("datefield", DateTools.TimeToString(now - 1000, DateTools.Resolution.MILLISECOND), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("body", "Today is a very sunny day in New York City", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(indexStore, true); // filter that should preserve matches //DateFilter df1 = DateFilter.Before("datefield", now); TermRangeFilter df1 = new TermRangeFilter("datefield", DateTools.TimeToString(now - 2000, DateTools.Resolution.MILLISECOND), DateTools.TimeToString(now, DateTools.Resolution.MILLISECOND), false, true); // filter that should discard matches //DateFilter df2 = DateFilter.Before("datefield", now - 999999); TermRangeFilter df2 = new TermRangeFilter("datefield", DateTools.TimeToString(0, DateTools.Resolution.MILLISECOND), DateTools.TimeToString(now - 2000, DateTools.Resolution.MILLISECOND), true, false); // search something that doesn't exist with DateFilter Query query1 = new TermQuery(new Term("body", "NoMatchForThis")); // search for something that does exists Query query2 = new TermQuery(new Term("body", "sunny")); ScoreDoc[] result; // ensure that queries return expected results without DateFilter first result = searcher.Search(query1, null, 1000).ScoreDocs; Assert.AreEqual(0, result.Length); result = searcher.Search(query2, null, 1000).ScoreDocs; Assert.AreEqual(1, result.Length); // run queries with DateFilter result = searcher.Search(query1, df1, 1000).ScoreDocs; Assert.AreEqual(0, result.Length); result = searcher.Search(query1, df2, 1000).ScoreDocs; Assert.AreEqual(0, result.Length); result = searcher.Search(query2, df1, 1000).ScoreDocs; Assert.AreEqual(1, result.Length); result = searcher.Search(query2, df2, 1000).ScoreDocs; Assert.AreEqual(0, result.Length); }
public void Test_Index_ReusableStringReader() { Lucene.Net.Index.IndexWriter wr = new Lucene.Net.Index.IndexWriter(new Lucene.Net.Store.RAMDirectory(), new TestAnalyzer(), true); Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); Lucene.Net.Documents.Field f1 = new Lucene.Net.Documents.Field("f1", TEST_STRING, Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.TOKENIZED); doc.Add(f1); wr.AddDocument(doc); wr.Close(); }
internal bool Validate = true; // set to false when doing performance testing #endregion Fields #region Methods // TODO: this should be setUp().... public virtual void CreateDummySearcher() { // Create a dummy index with nothing in it. // this could possibly fail if Lucene starts checking for docid ranges... d = NewDirectory(); IndexWriter iw = new IndexWriter(d, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); iw.AddDocument(new Document()); iw.Dispose(); r = DirectoryReader.Open(d); s = NewSearcher(r); }
public virtual void TestBefore() { // create an index RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true); long now = (long) (DateTime.UtcNow - new DateTime(1970, 1, 1)).TotalMilliseconds; Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); // add time that is in the past doc.Add(new Field("datefield", Lucene.Net.Documents.DateTools.TimeToString(now - 1000 * 100000, Lucene.Net.Documents.DateTools.Resolution.MILLISECOND), Field.Store.YES, Field.Index.UN_TOKENIZED)); doc.Add(new Field("body", "Today is a very sunny day in New York City", Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(indexStore); // filter that should preserve matches //DateFilter df1 = DateFilter.Before("datefield", now); RangeFilter df1 = new RangeFilter("datefield", Lucene.Net.Documents.DateTools.TimeToString(now - 2000 * 100000, Lucene.Net.Documents.DateTools.Resolution.MILLISECOND), Lucene.Net.Documents.DateTools.TimeToString(now, Lucene.Net.Documents.DateTools.Resolution.MILLISECOND), false, true); // filter that should discard matches //DateFilter df2 = DateFilter.Before("datefield", now - 999999); RangeFilter df2 = new RangeFilter("datefield", Lucene.Net.Documents.DateTools.TimeToString(0, Lucene.Net.Documents.DateTools.Resolution.MILLISECOND), Lucene.Net.Documents.DateTools.TimeToString(now - 2000 * 100000, Lucene.Net.Documents.DateTools.Resolution.MILLISECOND), true, false); // search something that doesn't exist with DateFilter Query query1 = new TermQuery(new Term("body", "NoMatchForThis")); // search for something that does exists Query query2 = new TermQuery(new Term("body", "sunny")); Hits result; // ensure that queries return expected results without DateFilter first result = searcher.Search(query1); Assert.AreEqual(0, result.Length()); result = searcher.Search(query2); Assert.AreEqual(1, result.Length()); // run queries with DateFilter result = searcher.Search(query1, df1); Assert.AreEqual(0, result.Length()); result = searcher.Search(query1, df2); Assert.AreEqual(0, result.Length()); result = searcher.Search(query2, df1); Assert.AreEqual(1, result.Length()); result = searcher.Search(query2, df2); Assert.AreEqual(0, result.Length()); }
public virtual void TestSlopScoring() { Directory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("field", "foo firstname lastname foo", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); Document doc2 = new Document(); doc2.Add(new Field("field", "foo firstname xxx lastname foo", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc2); Document doc3 = new Document(); doc3.Add(new Field("field", "foo firstname xxx yyy lastname foo", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc3); writer.Optimize(); writer.Close(); Searcher searcher = new IndexSearcher(directory, true); PhraseQuery query = new PhraseQuery(); query.Add(new Term("field", "firstname")); query.Add(new Term("field", "lastname")); query.Slop = System.Int32.MaxValue; ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); // Make sure that those matches where the terms appear closer to // each other get a higher score: Assert.AreEqual(0.71, hits[0].Score, 0.01); Assert.AreEqual(0, hits[0].Doc); Assert.AreEqual(0.44, hits[1].Score, 0.01); Assert.AreEqual(1, hits[1].Doc); Assert.AreEqual(0.31, hits[2].Score, 0.01); Assert.AreEqual(2, hits[2].Doc); QueryUtils.Check(query, searcher); }
public virtual void TestBefore() { // create an index RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true); long now = (System.DateTime.Now.Ticks - 621355968000000000) / 10000; Document doc = new Document(); // add time that is in the past doc.Add(Field.Keyword("datefield", DateField.TimeToString(now - 1000))); doc.Add(Field.Text("body", "Today is a very sunny day in New York City")); writer.AddDocument(doc); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(indexStore); // filter that should preserve matches DateFilter df1 = DateFilter.Before("datefield", now); // filter that should discard matches DateFilter df2 = DateFilter.Before("datefield", now - 999999); // search something that doesn't exist with DateFilter Query query1 = new TermQuery(new Term("body", "NoMatchForThis")); // search for something that does exists Query query2 = new TermQuery(new Term("body", "sunny")); Hits result; // ensure that queries return expected results without DateFilter first result = searcher.Search(query1); Assert.AreEqual(0, result.Length()); result = searcher.Search(query2); Assert.AreEqual(1, result.Length()); // run queries with DateFilter result = searcher.Search(query1, df1); Assert.AreEqual(0, result.Length()); result = searcher.Search(query1, df2); Assert.AreEqual(0, result.Length()); result = searcher.Search(query2, df1); Assert.AreEqual(1, result.Length()); result = searcher.Search(query2, df2); Assert.AreEqual(0, result.Length()); }
public virtual void SetUp() { // Create an index writer. directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); for (int i = 0; i < N; i++) { writer.AddDocument(CreateDocument(i)); } writer.Optimize(); writer.Close(); }
public virtual void TestMethod() { RAMDirectory directory = new RAMDirectory(); System.String[] categories = new System.String[]{"food", "foodanddrink", "foodanddrinkandgoodtimes", "food and drink"}; Query rw1 = null; Query rw2 = null; try { IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); for (int i = 0; i < categories.Length; i++) { Document doc = new Document(); doc.Add(Field.Keyword("category", categories[i])); writer.AddDocument(doc); } writer.Close(); IndexReader reader = IndexReader.Open(directory); PrefixQuery query = new PrefixQuery(new Term("category", "foo")); rw1 = query.Rewrite(reader); BooleanQuery bq = new BooleanQuery(); bq.Add(query, true, false); rw2 = bq.Rewrite(reader); } catch (System.IO.IOException e) { Assert.Fail(e.Message); } BooleanQuery bq1 = null; if (rw1 is BooleanQuery) { bq1 = (BooleanQuery) rw1; } BooleanQuery bq2 = null; if (rw2 is BooleanQuery) { bq2 = (BooleanQuery) rw2; } else { Assert.Fail("Rewrite"); } Assert.AreEqual(bq1.GetClauses().Length, bq2.GetClauses().Length, "Number of Clauses Mismatch"); }
private static void IndexDocsWithFacetsNoTerms(IndexWriter indexWriter, TaxonomyWriter taxoWriter, IDictionary<string, int?> expectedCounts) { Random random = Random(); int numDocs = AtLeast(random, 2); FacetsConfig config = Config; for (int i = 0; i < numDocs; i++) { Document doc = new Document(); AddFacets(doc, config, false); indexWriter.AddDocument(config.Build(taxoWriter, doc)); } indexWriter.Commit(); // flush a segment }