public virtual void SetUp() { directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); Document doc = new Document(); doc.Add(Field.Text("Field", "one two three four five")); doc.Add(Field.Text("sorter", "b")); writer.AddDocument(doc); doc = new Document(); doc.Add(Field.Text("Field", "one two three four")); doc.Add(Field.Text("sorter", "d")); writer.AddDocument(doc); doc = new Document(); doc.Add(Field.Text("Field", "one two three y")); doc.Add(Field.Text("sorter", "a")); writer.AddDocument(doc); doc = new Document(); doc.Add(Field.Text("Field", "one two x")); doc.Add(Field.Text("sorter", "c")); writer.AddDocument(doc); writer.Optimize(); writer.Close(); searcher = new IndexSearcher(directory); query = new TermQuery(new Term("Field", "three")); filter = new AnonymousClassFilter(this); }
override public void Run() { try { for (int j = 0; j < Lucene.Net.Index.TestThreadedOptimize.NUM_ITER2; j++) { writerFinal.Optimize(false); for (int k = 0; k < 17 * (1 + iFinal); k++) { Document d = new Document(); d.Add(new Field("id", iterFinal + "_" + iFinal + "_" + j + "_" + k, Field.Store.YES, Field.Index.NOT_ANALYZED)); d.Add(new Field("contents", English.IntToEnglish(iFinal + k), Field.Store.NO, Field.Index.ANALYZED)); writerFinal.AddDocument(d); } for (int k = 0; k < 9 * (1 + iFinal); k++) writerFinal.DeleteDocuments(new Term("id", iterFinal + "_" + iFinal + "_" + j + "_" + k)); writerFinal.Optimize(); } } catch (System.Exception t) { Enclosing_Instance.setFailed(); System.Console.Out.WriteLine(SupportClass.ThreadClass.Current().Name + ": hit exception"); System.Console.Out.WriteLine(t.StackTrace); } }
public static LDocument Convert(Document doc, Schema schema) { var ldoc = new LDocument(); foreach (var sf in schema.Fields) { foreach (var lf in Convert(sf, doc)) { ldoc.Add(lf); } } ldoc.Add( new LField( SchemaNameField, schema.Name, ConvertToStore(true, false), ConvertToIndexFlag(false, false) ) ); ldoc.Add( new LField( SchemaVersionField, schema.Version, ConvertToStore(true, false), ConvertToIndexFlag(false, false) ) ); return ldoc; }
public override void SetUp() { base.SetUp(); Document doc; RAMDirectory rd1 = new RAMDirectory(); IndexWriter iw1 = new IndexWriter(rd1, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); doc = new Document(); doc.Add(new Field("field1", "the quick brown fox jumps", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("field2", "the quick brown fox jumps", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("field4", "", Field.Store.NO, Field.Index.ANALYZED)); iw1.AddDocument(doc); iw1.Close(); RAMDirectory rd2 = new RAMDirectory(); IndexWriter iw2 = new IndexWriter(rd2, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); doc = new Document(); doc.Add(new Field("field0", "", Field.Store.NO, Field.Index.ANALYZED)); doc.Add(new Field("field1", "the fox jumps over the lazy dog", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("field3", "the fox jumps over the lazy dog", Field.Store.YES, Field.Index.ANALYZED)); iw2.AddDocument(doc); iw2.Close(); this.ir1 = IndexReader.Open(rd1, true); this.ir2 = IndexReader.Open(rd2, true); }
public override void SetUp() { base.SetUp(); directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("field", "one two three four five", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("sorter", "b", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("field", "one two three four", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("sorter", "d", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("field", "one two three y", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("sorter", "a", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("field", "one two x", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("sorter", "c", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Optimize(); writer.Close(); searcher = new IndexSearcher(directory); query = new TermQuery(new Term("field", "three")); filter = NewStaticFilterB(); }
/*public TestCustomSearcherSort(System.String name):base(name) { }*/ /*[STAThread] public static void Main(System.String[] argv) { // TestRunner.run(suite()); // {{Aroush-2.9}} how is this done in NUnit? }*/ /*public static Test suite() { return new TestSuite(typeof(TestCustomSearcherSort)); }*/ // create an index for testing private Directory GetIndex() { RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED); RandomGen random = new RandomGen(this, NewRandom()); for (int i = 0; i < INDEX_SIZE; ++i) { // don't decrease; if to low the problem doesn't show up Document doc = new Document(); if ((i % 5) != 0) { // some documents must not have an entry in the first sort field doc.Add(new Field("publicationDate_", random.GetLuceneDate(), Field.Store.YES, Field.Index.NOT_ANALYZED)); } if ((i % 7) == 0) { // some documents to match the query (see below) doc.Add(new Field("content", "test", Field.Store.YES, Field.Index.ANALYZED)); } // every document has a defined 'mandant' field doc.Add(new Field("mandant", System.Convert.ToString(i % 3), Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); } writer.Optimize(); writer.Close(); return indexStore; }
/// <summary>Makes a document for a File. /// <p> /// The document has three fields: /// <ul> /// <li><code>path</code>--containing the pathname of the file, as a stored, /// tokenized Field; /// <li><code>modified</code>--containing the last modified date of the file as /// a keyword Field as encoded by <a /// href="lucene.document.DateField.html">DateField</a>; and /// <li><code>contents</code>--containing the full contents of the file, as a /// Reader Field; /// </summary> public static Document Document(System.IO.FileInfo f) { // make a new, empty document Document doc = new Document(); // Add the path of the file as a Field named "path". Use a Text Field, so // that the index stores the path, and so that the path is searchable doc.Add(Field.Text("path", f.FullName)); // Add the last modified date of the file a Field named "modified". Use a // Keyword Field, so that it's searchable, but so that no attempt is made // to tokenize the Field into words. doc.Add(Field.Keyword("modified", DateField.TimeToString(((f.LastWriteTime.Ticks - 621355968000000000) / 10000)))); // Add the contents of the file a Field named "contents". Use a Text // Field, specifying a Reader, so that the text of the file is tokenized. // ?? why doesn't FileReader work here ?? System.IO.FileStream is_Renamed = new System.IO.FileStream(f.FullName, System.IO.FileMode.Open, System.IO.FileAccess.Read); System.IO.StreamReader reader = new System.IO.StreamReader(new System.IO.StreamReader(is_Renamed, System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(is_Renamed, System.Text.Encoding.Default).CurrentEncoding); doc.Add(Field.Text("contents", reader)); // return the document return doc; }
public void IndexFile(string filePath) { Lucene.Net.Documents.Document document = new Lucene.Net.Documents.Document(); System.IO.StreamReader reader = new System.IO.StreamReader(filePath); document.Add(new Lucene.Net.Documents.Field("contents", reader)); document.Add(new Lucene.Net.Documents.Field("filepath", filePath, Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.NOT_ANALYZED)); this.IndexWriter.AddDocument(document); }
public virtual void TestBefore() { // create an index RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); long now = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); Document doc = new Document(); // add time that is in the past doc.Add(new Field("datefield", DateTools.TimeToString(now - 1000, DateTools.Resolution.MILLISECOND), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("body", "Today is a very sunny day in New York City", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(indexStore, true); // filter that should preserve matches //DateFilter df1 = DateFilter.Before("datefield", now); TermRangeFilter df1 = new TermRangeFilter("datefield", DateTools.TimeToString(now - 2000, DateTools.Resolution.MILLISECOND), DateTools.TimeToString(now, DateTools.Resolution.MILLISECOND), false, true); // filter that should discard matches //DateFilter df2 = DateFilter.Before("datefield", now - 999999); TermRangeFilter df2 = new TermRangeFilter("datefield", DateTools.TimeToString(0, DateTools.Resolution.MILLISECOND), DateTools.TimeToString(now - 2000, DateTools.Resolution.MILLISECOND), true, false); // search something that doesn't exist with DateFilter Query query1 = new TermQuery(new Term("body", "NoMatchForThis")); // search for something that does exists Query query2 = new TermQuery(new Term("body", "sunny")); ScoreDoc[] result; // ensure that queries return expected results without DateFilter first result = searcher.Search(query1, null, 1000).ScoreDocs; Assert.AreEqual(0, result.Length); result = searcher.Search(query2, null, 1000).ScoreDocs; Assert.AreEqual(1, result.Length); // run queries with DateFilter result = searcher.Search(query1, df1, 1000).ScoreDocs; Assert.AreEqual(0, result.Length); result = searcher.Search(query1, df2, 1000).ScoreDocs; Assert.AreEqual(0, result.Length); result = searcher.Search(query2, df1, 1000).ScoreDocs; Assert.AreEqual(1, result.Length); result = searcher.Search(query2, df2, 1000).ScoreDocs; Assert.AreEqual(0, result.Length); }
public virtual void TestBefore() { // create an index RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true); long now = (System.DateTime.Now.Ticks - 621355968000000000) / 10000; Document doc = new Document(); // add time that is in the past doc.Add(Field.Keyword("datefield", DateField.TimeToString(now - 1000))); doc.Add(Field.Text("body", "Today is a very sunny day in New York City")); writer.AddDocument(doc); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(indexStore); // filter that should preserve matches DateFilter df1 = DateFilter.Before("datefield", now); // filter that should discard matches DateFilter df2 = DateFilter.Before("datefield", now - 999999); // search something that doesn't exist with DateFilter Query query1 = new TermQuery(new Term("body", "NoMatchForThis")); // search for something that does exists Query query2 = new TermQuery(new Term("body", "sunny")); Hits result; // ensure that queries return expected results without DateFilter first result = searcher.Search(query1); Assert.AreEqual(0, result.Length()); result = searcher.Search(query2); Assert.AreEqual(1, result.Length()); // run queries with DateFilter result = searcher.Search(query1, df1); Assert.AreEqual(0, result.Length()); result = searcher.Search(query1, df2); Assert.AreEqual(0, result.Length()); result = searcher.Search(query2, df1); Assert.AreEqual(1, result.Length()); result = searcher.Search(query2, df2); Assert.AreEqual(0, result.Length()); }
public override void SetUp() { base.SetUp(); RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); for (int i = 0; i < docFields.Length; i++) { Document doc = new Document(); doc.Add(new Field(KEY, "" + i, Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field(FIELD, docFields[i], Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(doc); } writer.Close(); searcher = new IndexSearcher(directory); }
public override void SetUp() { base.SetUp(); directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true); Document doc = new Document(); doc.Add(new Field("partnum", "Q36", Field.Store.YES, Field.Index.UN_TOKENIZED)); doc.Add(new Field("description", "Illidium Space Modulator", Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); writer.Close(); searcher = new IndexSearcher(directory); }
public virtual void TestBasic() { AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet()); Directory dir = NewDirectory(); FacetsConfig config = new FacetsConfig(); config.SetMultiValued("a", true); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); Document doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo")); doc.Add(new SortedSetDocValuesFacetField("a", "bar")); doc.Add(new SortedSetDocValuesFacetField("a", "zoo")); doc.Add(new SortedSetDocValuesFacetField("b", "baz")); writer.AddDocument(config.Build(doc)); if (Random().NextBoolean()) { writer.Commit(); } doc = new Document(); doc.Add(new SortedSetDocValuesFacetField("a", "foo")); writer.AddDocument(config.Build(doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); // Per-top-reader state: SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); SortedSetDocValuesFacetCounts facets = new SortedSetDocValuesFacetCounts(state, c); Assert.AreEqual("dim=a path=[] value=4 childCount=3\n foo (2)\n bar (1)\n zoo (1)\n", facets.GetTopChildren(10, "a").ToString()); Assert.AreEqual("dim=b path=[] value=1 childCount=1\n baz (1)\n", facets.GetTopChildren(10, "b").ToString()); // DrillDown: DrillDownQuery q = new DrillDownQuery(config); q.Add("a", "foo"); q.Add("b", "baz"); TopDocs hits = searcher.Search(q, 1); Assert.AreEqual(1, hits.TotalHits); IOUtils.Close(writer, searcher.IndexReader, dir); }
public virtual void TestNullOrSubScorer() { Directory dir = new MockRAMDirectory(); IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); Document doc = new Document(); doc.Add(new Field("field", "a b c d", Field.Store.NO, Field.Index.ANALYZED)); w.AddDocument(doc); IndexReader r = w.GetReader(); IndexSearcher s = new IndexSearcher(r); BooleanQuery q = new BooleanQuery(); q.Add(new TermQuery(new Term("field", "a")), Occur.SHOULD); // LUCENE-2617: make sure that a term not in the index still contributes to the score via coord factor float score = s.Search(q, 10).MaxScore; Query subQuery = new TermQuery(new Term("field", "not_in_index")); subQuery.Boost = 0; q.Add(subQuery, Occur.SHOULD); float score2 = s.Search(q, 10).MaxScore; Assert.AreEqual(score * .5, score2, 1e-6); // LUCENE-2617: make sure that a clause not in the index still contributes to the score via coord factor BooleanQuery qq = (BooleanQuery)q.Clone(); PhraseQuery phrase = new PhraseQuery(); phrase.Add(new Term("field", "not_in_index")); phrase.Add(new Term("field", "another_not_in_index")); phrase.Boost = 0; qq.Add(phrase, Occur.SHOULD); score2 = s.Search(qq, 10).MaxScore; Assert.AreEqual(score * (1.0 / 3), score2, 1e-6); // now test BooleanScorer2 subQuery = new TermQuery(new Term("field", "b")); subQuery.Boost = 0; q.Add(subQuery, Occur.MUST); score2 = s.Search(q, 10).MaxScore; Assert.AreEqual(score * (2.0 / 3), score2, 1e-6); // PhraseQuery w/ no terms added returns a null scorer PhraseQuery pq = new PhraseQuery(); q.Add(pq, Occur.SHOULD); Assert.AreEqual(1, s.Search(q, 10).TotalHits); // A required clause which returns null scorer should return null scorer to // IndexSearcher. q = new BooleanQuery(); pq = new PhraseQuery(); q.Add(new TermQuery(new Term("field", "a")), Occur.SHOULD); q.Add(pq, Occur.MUST); Assert.AreEqual(0, s.Search(q, 10).TotalHits); DisjunctionMaxQuery dmq = new DisjunctionMaxQuery(1.0f); dmq.Add(new TermQuery(new Term("field", "a"))); dmq.Add(pq); Assert.AreEqual(1, s.Search(dmq, 10).TotalHits); r.Close(); w.Close(); dir.Close(); }
public virtual void TestDemo_Renamed_Method() { Analyzer analyzer = new StandardAnalyzer(); // Store the index in memory: Directory directory = new RAMDirectory(); // To store an index on disk, use this instead (note that the // parameter true will overwrite the index in that directory // if one exists): //Directory directory = FSDirectory.getDirectory("/tmp/testindex", true); IndexWriter iwriter = new IndexWriter(directory, analyzer, true); iwriter.SetMaxFieldLength(25000); Document doc = new Document(); System.String text = "This is the text to be indexed."; doc.Add(new Field("fieldname", text, Field.Store.YES, Field.Index.TOKENIZED)); iwriter.AddDocument(doc); iwriter.Close(); // Now search the index: IndexSearcher isearcher = new IndexSearcher(directory); // Parse a simple query that searches for "text": Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser("fieldname", analyzer); Query query = parser.Parse("text"); Hits hits = isearcher.Search(query); Assert.AreEqual(1, hits.Length()); // Iterate through the results: for (int i = 0; i < hits.Length(); i++) { Document hitDoc = hits.Doc(i); Assert.AreEqual("This is the text to be indexed.", hitDoc.Get("fieldname")); } isearcher.Close(); directory.Close(); }
/// <summary> /// index current document /// </summary> /// <param name="doc"></param> /// <returns></returns> public bool AddDocument(Document doc) { var status = true; var fields = doc.GetFields(); if (fields == null || fields.Length == 0) { status = false; } else { var luceneDoc = new Lucene.Net.Documents.Document(); foreach (var field in fields) { luceneDoc.Add(new Lucene.Net.Documents.Field( field.FieldName, // field name field.FieldValue.ToString(), // field value field.Store ? Lucene.Net.Documents.Field.Store.YES : Lucene.Net.Documents.Field.Store.NO, field.Analyse ? Lucene.Net.Documents.Field.Index.ANALYZED : Lucene.Net.Documents.Field.Index.NOT_ANALYZED)); } this.indexWriter.AddDocument(luceneDoc); } return status; }
public virtual void TestAddSameDocTwice() { // LUCENE-5367: this was a problem with the previous code, making sure it // works with the new code. Directory indexDir = NewDirectory(), taxoDir = NewDirectory(); IndexWriter indexWriter = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); FacetsConfig facetsConfig = new FacetsConfig(); Document doc = new Document(); doc.Add(new FacetField("a", "b")); doc = facetsConfig.Build(taxoWriter, doc); // these two addDocument() used to fail indexWriter.AddDocument(doc); indexWriter.AddDocument(doc); IOUtils.Close(indexWriter, taxoWriter); DirectoryReader indexReader = DirectoryReader.Open(indexDir); DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir); IndexSearcher searcher = NewSearcher(indexReader); FacetsCollector fc = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), fc); Facets facets = GetTaxonomyFacetCounts(taxoReader, facetsConfig, fc); FacetResult res = facets.GetTopChildren(10, "a"); Assert.AreEqual(1, res.LabelValues.Length); Assert.AreEqual(2, res.LabelValues[0].value); IOUtils.Close(indexReader, taxoReader); IOUtils.Close(indexDir, taxoDir); }
public override void SetUp() { base.SetUp(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); for (int i = 0; i < 5137; ++i) { Document doc = new Document(); doc.Add(new Field(FIELD, "meaninglessnames", Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); } { Document doc = new Document(); doc.Add(new Field(FIELD, "tangfulin", Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); } for (int i = 5138; i < 11377; ++i) { Document doc = new Document(); doc.Add(new Field(FIELD, "meaninglessnames", Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); } { Document doc = new Document(); doc.Add(new Field(FIELD, "tangfulin", Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); } writer.Close(); }
public virtual void TestBasic() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("field", "value", Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Close(); TermQuery termQuery = new TermQuery(new Term("field", "value")); // should not throw exception with primitive query QueryWrapperFilter qwf = new QueryWrapperFilter(termQuery); IndexSearcher searcher = new IndexSearcher(dir, true); TopDocs hits = searcher.Search(new MatchAllDocsQuery(), qwf, 10); Assert.AreEqual(1, hits.totalHits); // should not throw exception with complex primitive query BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.Add(termQuery, Occur.MUST); booleanQuery.Add(new TermQuery(new Term("field", "missing")), Occur.MUST_NOT); qwf = new QueryWrapperFilter(termQuery); hits = searcher.Search(new MatchAllDocsQuery(), qwf, 10); Assert.AreEqual(1, hits.totalHits); // should not throw exception with non primitive Query (doesn't implement // Query#createWeight) qwf = new QueryWrapperFilter(new FuzzyQuery(new Term("field", "valu"))); hits = searcher.Search(new MatchAllDocsQuery(), qwf, 10); Assert.AreEqual(1, hits.totalHits); }
public virtual void CreateRandomTerms(int nDocs, int nTerms, double power, Directory dir) { int[] freq = new int[nTerms]; for (int i = 0; i < nTerms; i++) { int f = (nTerms + 1) - i; // make first terms less frequent freq[i] = (int) System.Math.Ceiling(System.Math.Pow(f, power)); terms[i] = new Term("f", System.Convert.ToString((char) ('A' + i))); } IndexWriter iw = new IndexWriter(dir, new WhitespaceAnalyzer(), true); for (int i = 0; i < nDocs; i++) { Document d = new Document(); for (int j = 0; j < nTerms; j++) { if (r.Next(freq[j]) == 0) { d.Add(new Field("f", terms[j].Text(), Field.Store.NO, Field.Index.UN_TOKENIZED)); //System.out.println(d); } } iw.AddDocument(d); } iw.Optimize(); iw.Close(); }
public void SearchFiltered(IndexWriter writer, Directory directory, Filter filter, bool optimize) { try { for (int i = 0; i < 60; i++) {//Simple docs Document doc = new Document(); doc.Add(new Field(FIELD, i.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); } if (optimize) writer.Optimize(); writer.Close(); BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.Add(new TermQuery(new Term(FIELD, "36")), Occur.SHOULD); IndexSearcher indexSearcher = new IndexSearcher(directory); ScoreDoc[] hits = indexSearcher.Search(booleanQuery, filter, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "Number of matched documents"); } catch (System.IO.IOException e) { Assert.Fail(e.Message); } }
public virtual void TestNullOrSubScorer() { Directory dir = new MockRAMDirectory(); IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); Document doc = new Document(); doc.Add(new Field("field", "a b c d", Field.Store.NO, Field.Index.ANALYZED)); w.AddDocument(doc); IndexReader r = w.GetReader(); IndexSearcher s = new IndexSearcher(r); BooleanQuery q = new BooleanQuery(); q.Add(new TermQuery(new Term("field", "a")), BooleanClause.Occur.SHOULD); // PhraseQuery w/ no terms added returns a null scorer PhraseQuery pq = new PhraseQuery(); q.Add(pq, BooleanClause.Occur.SHOULD); Assert.AreEqual(1, s.Search(q, 10).TotalHits); // A required clause which returns null scorer should return null scorer to // IndexSearcher. q = new BooleanQuery(); pq = new PhraseQuery(); q.Add(new TermQuery(new Term("field", "a")), BooleanClause.Occur.SHOULD); q.Add(pq, BooleanClause.Occur.MUST); Assert.AreEqual(0, s.Search(q, 10).TotalHits); DisjunctionMaxQuery dmq = new DisjunctionMaxQuery(1.0f); dmq.Add(new TermQuery(new Term("field", "a"))); dmq.Add(pq); Assert.AreEqual(1, s.Search(dmq, 10).TotalHits); r.Close(); w.Close(); dir.Close(); }
public virtual void TestFilterWorks() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); for (int i = 0; i < 500; i++) { Document document = new Document(); document.Add(new Field("field", English.IntToEnglish(i) + " equals " + English.IntToEnglish(i), Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(document); } writer.Close(); IndexReader reader = IndexReader.Open(dir, true); SpanTermQuery query = new SpanTermQuery(new Term("field", English.IntToEnglish(10).Trim())); SpanQueryFilter filter = new SpanQueryFilter(query); SpanFilterResult result = filter.BitSpans(reader); DocIdSet docIdSet = result.DocIdSet; Assert.IsTrue(docIdSet != null, "docIdSet is null and it shouldn't be"); AssertContainsDocId("docIdSet doesn't contain docId 10", docIdSet, 10); var spans = result.Positions; Assert.IsTrue(spans != null, "spans is null and it shouldn't be"); int size = GetDocIdSetSize(docIdSet); Assert.IsTrue(spans.Count == size, "spans Size: " + spans.Count + " is not: " + size); for (System.Collections.IEnumerator iterator = spans.GetEnumerator(); iterator.MoveNext(); ) { SpanFilterResult.PositionInfo info = (SpanFilterResult.PositionInfo) iterator.Current; Assert.IsTrue(info != null, "info is null and it shouldn't be"); //The doc should indicate the bit is on AssertContainsDocId("docIdSet doesn't contain docId " + info.Doc, docIdSet, info.Doc); //There should be two positions in each Assert.IsTrue(info.Positions.Count == 2, "info.getPositions() Size: " + info.Positions.Count + " is not: " + 2); } reader.Close(); }
public override void SetUp() { base.SetUp(); IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); //writer.setUseCompoundFile(true); //writer.infoStream = System.out; for (int i = 0; i < 1000; i++) { Document doc = new Document(); Field.TermVector termVector; int mod3 = i % 3; int mod2 = i % 2; if (mod2 == 0 && mod3 == 0) { termVector = Field.TermVector.WITH_POSITIONS_OFFSETS; } else if (mod2 == 0) { termVector = Field.TermVector.WITH_POSITIONS; } else if (mod3 == 0) { termVector = Field.TermVector.WITH_OFFSETS; } else { termVector = Field.TermVector.YES; } doc.Add(new Field("field", English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED, termVector)); writer.AddDocument(doc); } writer.Close(); searcher = new IndexSearcher(directory, true); }
override public void Run() { Document doc = new Document(); doc.Add(new Field("content", "aaa", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); while ((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) < stopTime) { for (int i = 0; i < 27; i++) { try { writer.AddDocument(doc); } catch (System.Exception t) { System.Console.Out.WriteLine(t.StackTrace); Assert.Fail("addDocument failed"); } } try { System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 1)); } catch (System.Threading.ThreadInterruptedException ie) { SupportClass.ThreadClass.Current().Interrupt(); throw new System.SystemException("", ie); } } }
override public void Run() { Document doc = new Document(); doc.Add(new Field("content", "aaa", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); while ((System.DateTime.Now.Ticks - 621355968000000000) / 10000 < stopTime) { for (int i = 0; i < 27; i++) { try { writer.AddDocument(doc); } catch (System.IO.IOException cie) { System.SystemException re = new System.SystemException("addDocument failed", cie); throw re; } } try { System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 1)); } catch (System.Threading.ThreadInterruptedException) { SupportClass.ThreadClass.Current().Interrupt(); } } }
public virtual void TestSimpleSkip() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new PayloadAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Term term = new Term("test", "a"); for (int i = 0; i < 5000; i++) { Document d1 = new Document(); d1.Add(new Field(term.Field(), term.Text(), Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(d1); } writer.Flush(); writer.Optimize(); writer.Close(); IndexReader reader = SegmentReader.GetOnlySegmentReader(dir); SegmentTermPositions tp = (SegmentTermPositions) reader.TermPositions(); tp.freqStream_ForNUnit = new CountingStream(this, tp.freqStream_ForNUnit); for (int i = 0; i < 2; i++) { counter = 0; tp.Seek(term); CheckSkipTo(tp, 14, 185); // no skips CheckSkipTo(tp, 17, 190); // one skip on level 0 CheckSkipTo(tp, 287, 200); // one skip on level 1, two on level 0 // this test would fail if we had only one skip level, // because than more bytes would be read from the freqStream CheckSkipTo(tp, 4800, 250); // one skip on level 2 } }
void Index() { Lucene.Net.Index.IndexWriter wr = new Lucene.Net.Index.IndexWriter(dir, new Lucene.Net.Analysis.WhitespaceAnalyzer(), Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED); Lucene.Net.Documents.Document doc = null; Lucene.Net.Documents.Field f = null; doc = new Lucene.Net.Documents.Document(); f = new Lucene.Net.Documents.Field("field", "a b c d", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED); doc.Add(f); wr.AddDocument(doc); doc = new Lucene.Net.Documents.Document(); f = new Lucene.Net.Documents.Field("field", "a b a d", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED); doc.Add(f); wr.AddDocument(doc); doc = new Lucene.Net.Documents.Document(); f = new Lucene.Net.Documents.Field("field", "a b e f", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED); doc.Add(f); wr.AddDocument(doc); doc = new Lucene.Net.Documents.Document(); f = new Lucene.Net.Documents.Field("field", "x y z", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED); doc.Add(f); wr.AddDocument(doc); wr.Close(); }
public virtual void TestDemo_Renamed() { Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); // Store the index in memory: Directory directory = new RAMDirectory(); // To store an index on disk, use this instead: //Directory directory = FSDirectory.open("/tmp/testindex"); IndexWriter iwriter = new IndexWriter(directory, analyzer, true, new IndexWriter.MaxFieldLength(25000)); Document doc = new Document(); System.String text = "This is the text to be indexed."; doc.Add(new Field("fieldname", text, Field.Store.YES, Field.Index.ANALYZED)); iwriter.AddDocument(doc); iwriter.Close(); // Now search the index: IndexSearcher isearcher = new IndexSearcher(directory, true); // read-only=true // Parse a simple query that searches for "text": QueryParser parser = new QueryParser("fieldname", analyzer); Query query = parser.Parse("text"); ScoreDoc[] hits = isearcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); // Iterate through the results: for (int i = 0; i < hits.Length; i++) { Document hitDoc = isearcher.Doc(hits[i].doc); Assert.AreEqual(hitDoc.Get("fieldname"), "This is the text to be indexed."); } isearcher.Close(); directory.Close(); }
private IndexWriter InitIndex(MockRAMDirectory dir) { dir.SetLockFactory(NoLockFactory.Instance); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); //writer.setMaxBufferedDocs(2); writer.SetMaxBufferedDocs(10); ((ConcurrentMergeScheduler) writer.MergeScheduler).SetSuppressExceptions(); Document doc = new Document(); doc.Add(new Field("content", "aaa", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("id", "0", Field.Store.YES, Field.Index.ANALYZED)); for (int i = 0; i < 157; i++) writer.AddDocument(doc); return writer; }
public override void SetUp() { base.SetUp(); RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); //writer.infoStream = System.out; for (int i = 0; i < 1000; i++) { Document doc = new Document(); doc.Add(new Field("field", English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); } writer.Close(); searcher = new IndexSearcher(directory); }
public override void TestInitialize() { base.TestInitialize(); Directory = new SqlServerDirectory(Connection, new Options()); IndexWriter writer = new IndexWriter(Directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); for (int i = 0; i < Values.Length; i++) { Document doc = new Document(); doc.Add(new Field(FIELD, Values[i], Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); } writer.Close(); IndexSearcher = new IndexSearcher(Directory, false); indexReader = IndexSearcher.IndexReader; }
public override void SetUp() { base.SetUp(); directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); for (int i = 0; i < values.Length; i++) { Document doc = new Document(); doc.Add(new Field(FIELD, values[i], Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); } writer.Close(); indexSearcher = new IndexSearcher(directory, false); indexReader = indexSearcher.IndexReader; }
public virtual void TestCloseStoredFields() { Directory dir = new MockRAMDirectory(); IndexWriter w = new IndexWriter(dir, new SimpleAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); w.SetUseCompoundFile(false); Document doc = new Document(); doc.Add(new Field("field", "yes it's stored", Field.Store.YES, Field.Index.ANALYZED)); w.AddDocument(doc); w.Close(); IndexReader r1 = IndexReader.Open(dir); IndexReader r2 = r1.Clone(false); r1.Close(); r2.Close(); dir.Close(); }
private Lucene.Net.Documents.Document CreateDocument(LocationModel location) { try { Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); doc.Add(new Lucene.Net.Documents.Field("ID", location.ID.ToString(), Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED)); doc.Add(new Lucene.Net.Documents.Field("PLC_ID", location.PLC_ID, Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED)); doc.Add(new Lucene.Net.Documents.Field("ZIP_CD", location.ZIP_CD, Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED)); doc.Add(new Lucene.Net.Documents.Field("FRMTD_ADDR", location.FRMTD_ADDR, Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED)); doc.Add(new Lucene.Net.Documents.Field("NELAT", location.NELAT, Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED)); doc.Add(new Lucene.Net.Documents.Field("NELON", location.NELON, Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED)); doc.Add(new Lucene.Net.Documents.Field("SWLAT", location.SWLAT, Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED)); doc.Add(new Lucene.Net.Documents.Field("SWLON", location.SWLON, Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED)); return(doc); } catch { throw; } }
public virtual void TestCaching() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); TokenStream stream = new AnonymousClassTokenStream(this); stream = new CachingTokenFilter(stream); doc.Add(new Field("preanalyzed", stream, TermVector.NO)); // 1) we consume all tokens twice before we add the doc to the index checkTokens(stream); stream.Reset(); checkTokens(stream); // 2) now add the document to the index and verify if all tokens are indexed // don't reset the stream here, the DocumentWriter should do that implicitly writer.AddDocument(doc); writer.Close(); IndexReader reader = IndexReader.Open(dir); TermPositions termPositions = reader.TermPositions(new Term("preanalyzed", "term1")); Assert.IsTrue(termPositions.Next()); Assert.AreEqual(1, termPositions.Freq()); Assert.AreEqual(0, termPositions.NextPosition()); termPositions.Seek(new Term("preanalyzed", "term2")); Assert.IsTrue(termPositions.Next()); Assert.AreEqual(2, termPositions.Freq()); Assert.AreEqual(1, termPositions.NextPosition()); Assert.AreEqual(3, termPositions.NextPosition()); termPositions.Seek(new Term("preanalyzed", "term3")); Assert.IsTrue(termPositions.Next()); Assert.AreEqual(1, termPositions.Freq()); Assert.AreEqual(2, termPositions.NextPosition()); reader.Close(); // 3) reset stream and consume tokens again stream.Reset(); checkTokens(stream); }
public virtual void TestReuseAcrossWriters() { Directory dir = new MockRAMDirectory(); SnapshotDeletionPolicy dp = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), dp, IndexWriter.MaxFieldLength.UNLIMITED); // Force frequent flushes writer.SetMaxBufferedDocs(2); Document doc = new Document(); doc.Add(new Field("content", "aaa", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); for (int i = 0; i < 7; i++) { writer.AddDocument(doc); if (i % 2 == 0) { writer.Commit(); } } IndexCommit cp = dp.Snapshot(); CopyFiles(dir, cp); writer.Close(); CopyFiles(dir, cp); writer = new IndexWriter(dir, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), dp, IndexWriter.MaxFieldLength.UNLIMITED); CopyFiles(dir, cp); for (int i = 0; i < 7; i++) { writer.AddDocument(doc); if (i % 2 == 0) { writer.Commit(); } } CopyFiles(dir, cp); writer.Close(); CopyFiles(dir, cp); dp.Release(); writer = new IndexWriter(dir, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), dp, IndexWriter.MaxFieldLength.UNLIMITED); writer.Close(); Assert.Throws<System.IO.FileNotFoundException>(() => CopyFiles(dir, cp), "did not hit expected IOException"); dir.Close(); }
private static void StartServer() { // construct an index RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true); Document doc = new Document(); doc.Add(Field.Text("test", "test text")); writer.AddDocument(doc); writer.Optimize(); writer.Close(); // publish it //// LocateRegistry.CreateRegistry(1099); // {{Aroush}} Lucene.Net.Search.Searchable local = new IndexSearcher(indexStore); RemoteSearchable impl = new RemoteSearchable(local); System.Runtime.Remoting.RemotingServices.Marshal(impl, "http://localhost/Searchable"); }
static TestNumericRangeQuery32() { { try { // set the theoretical maximum term count for 8bit (see docs for the number) BooleanQuery.MaxClauseCount = 3 * 255 * 2 + 255; directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, MaxFieldLength.UNLIMITED, null); NumericField field8 = new NumericField("field8", 8, Field.Store.YES, true), field4 = new NumericField("field4", 4, Field.Store.YES, true), field2 = new NumericField("field2", 2, Field.Store.YES, true), fieldNoTrie = new NumericField("field" + System.Int32.MaxValue, System.Int32.MaxValue, Field.Store.YES, true), ascfield8 = new NumericField("ascfield8", 8, Field.Store.NO, true), ascfield4 = new NumericField("ascfield4", 4, Field.Store.NO, true), ascfield2 = new NumericField("ascfield2", 2, Field.Store.NO, true); Document doc = new Document(); // add fields, that have a distance to test general functionality doc.Add(field8); doc.Add(field4); doc.Add(field2); doc.Add(fieldNoTrie); // add ascending fields with a distance of 1, beginning at -noDocs/2 to test the correct splitting of range and inclusive/exclusive doc.Add(ascfield8); doc.Add(ascfield4); doc.Add(ascfield2); // Add a series of noDocs docs with increasing int values for (int l = 0; l < noDocs; l++) { int val = distance * l + startOffset; field8.SetIntValue(val); field4.SetIntValue(val); field2.SetIntValue(val); fieldNoTrie.SetIntValue(val); val = l - (noDocs / 2); ascfield8.SetIntValue(val); ascfield4.SetIntValue(val); ascfield2.SetIntValue(val); writer.AddDocument(doc, null); } writer.Optimize(null); writer.Close(); searcher = new IndexSearcher(directory, true, null); } catch (System.Exception e) { throw new System.SystemException("", e); } } }
private void RunTest(Directory dir) { // Run for ~7 seconds long stopTime = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) + 7000; SnapshotDeletionPolicy dp = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()); IndexWriter writer = new IndexWriter(dir, true, new StandardAnalyzer(), dp); // Force frequent commits writer.SetMaxBufferedDocs(2); SupportClass.ThreadClass t = new AnonymousClassThread(stopTime, writer, this); t.Start(); // While the above indexing thread is running, take many // backups: while ((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) < stopTime) { BackupIndex(dir, dp); System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 20)); if (!t.IsAlive) { break; } } t.Join(); // Add one more document to force writer to commit a // final segment, so deletion policy has a chance to // delete again: Document doc = new Document(); doc.Add(new Field("content", "aaa", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); writer.AddDocument(doc); // Make sure we don't have any leftover files in the // directory: writer.Close(); TestIndexWriter.AssertNoUnreferencedFiles(dir, "some files were not deleted but should have been"); }
public virtual void TestDocBoost_() { RAMDirectory store = new RAMDirectory(); IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true); Field f1 = Field.Text("Field", "word"); Field f2 = Field.Text("Field", "word"); f2.SetBoost(2.0f); Document d1 = new Document(); Document d2 = new Document(); Document d3 = new Document(); Document d4 = new Document(); d3.SetBoost(3.0f); d4.SetBoost(2.0f); d1.Add(f1); // boost = 1 d2.Add(f2); // boost = 2 d3.Add(f1); // boost = 3 d4.Add(f2); // boost = 4 writer.AddDocument(d1); writer.AddDocument(d2); writer.AddDocument(d3); writer.AddDocument(d4); writer.Optimize(); writer.Close(); float[] scores = new float[4]; new IndexSearcher(store).Search(new TermQuery(new Term("Field", "word")), new AnonymousClassHitCollector(scores, this)); float lastScore = 0.0f; for (int i = 0; i < 4; i++) { Assert.IsTrue(scores[i] > lastScore); lastScore = scores[i]; } }
public void TestSegmentWarmer() { Directory dir = new MockRAMDirectory(); IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED, null); w.SetMaxBufferedDocs(2); w.GetReader(null).Close(); w.MergedSegmentWarmer = new AnonymousIndexReaderWarmer(); Document doc = new Document(); doc.Add(new Field("foo", "bar", Field.Store.YES, Field.Index.NOT_ANALYZED)); for (int i = 0; i < 20; i++) { w.AddDocument(doc, null); } w.WaitForMerges(); w.Close(); dir.Close(); }
public virtual void TestNot_() { RAMDirectory store = new RAMDirectory(); IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true); Document d1 = new Document(); d1.Add(Field.Text("Field", "a b")); writer.AddDocument(d1); writer.Optimize(); writer.Close(); Searcher searcher = new IndexSearcher(store); Query query = Lucene.Net.QueryParsers.QueryParser.Parse("a NOT b", "Field", new SimpleAnalyzer()); //System.out.println(query); Hits hits = searcher.Search(query); Assert.AreEqual(0, hits.Length()); }
public virtual void TestMutipleDocument() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new KeywordAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); Document doc = new Document(); doc.Add(new Field("partnum", "Q36", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc, null); doc = new Document(); doc.Add(new Field("partnum", "Q37", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc, null); writer.Close(); IndexReader reader = IndexReader.Open((Directory)dir, true, null); TermDocs td = reader.TermDocs(new Term("partnum", "Q36"), null); Assert.IsTrue(td.Next(null)); td = reader.TermDocs(new Term("partnum", "Q37"), null); Assert.IsTrue(td.Next(null)); }
public virtual IndexSearcher GetSpanNotSearcher() { RAMDirectory directory = new RAMDirectory(); PayloadAnalyzer analyzer = new PayloadAnalyzer(this); IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); writer.SetSimilarity(similarity); Document doc = new Document(); doc.Add(new Field(PayloadHelper.FIELD, "one two three one four three", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Close(); IndexSearcher searcher = new IndexSearcher(directory, true); searcher.Similarity = similarity; return(searcher); }
private void CreateIndex(int numHits) { int numDocs = 500; Directory directory = new SeekCountingDirectory(this); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetUseCompoundFile(false); writer.SetMaxBufferedDocs(10); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); System.String content; if (i % (numDocs / numHits) == 0) { // add a document that matches the query "term1 term2" content = this.term1 + " " + this.term2; } else if (i % 15 == 0) { // add a document that only contains term1 content = this.term1 + " " + this.term1; } else { // add a document that contains term2 but not term 1 content = this.term3 + " " + this.term2; } doc.Add(new Field(this.field, content, Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); } // make sure the index has only a single segment writer.Optimize(); writer.Close(); SegmentReader reader = SegmentReader.GetOnlySegmentReader(directory); this.searcher = new IndexSearcher(reader); }
public virtual void TestFilterIndexReader_() { RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); Document d1 = new Document(); d1.Add(Field.Text("default", "one two")); writer.AddDocument(d1); Document d2 = new Document(); d2.Add(Field.Text("default", "one three")); writer.AddDocument(d2); Document d3 = new Document(); d3.Add(Field.Text("default", "two four")); writer.AddDocument(d3); writer.Close(); IndexReader reader = new TestReader(IndexReader.Open(directory)); TermEnum terms = reader.Terms(); while (terms.Next()) { Assert.IsTrue(terms.Term().Text().IndexOf((System.Char) 'e') != -1); } terms.Close(); TermPositions positions = reader.TermPositions(new Term("default", "one")); while (positions.Next()) { Assert.IsTrue((positions.Doc() % 2) == 1); } reader.Close(); }
public virtual void TestMissingTerms() { System.String fieldName = "field1"; MockRAMDirectory rd = new MockRAMDirectory(); IndexWriter w = new IndexWriter(rd, new KeywordAnalyzer(), MaxFieldLength.UNLIMITED, null); for (int i = 0; i < 100; i++) { Document doc = new Document(); int term = i * 10; //terms are units of 10; doc.Add(new Field(fieldName, "" + term, Field.Store.YES, Field.Index.NOT_ANALYZED)); w.AddDocument(doc, null); } w.Close(); IndexReader reader = IndexReader.Open((Directory)rd, true, null); IndexSearcher searcher = new IndexSearcher(reader); int numDocs = reader.NumDocs(); ScoreDoc[] results; MatchAllDocsQuery q = new MatchAllDocsQuery(); System.Collections.ArrayList terms = new System.Collections.ArrayList(); terms.Add("5"); results = searcher.Search(q, new FieldCacheTermsFilter(fieldName, (System.String[])terms.ToArray(typeof(System.String))), numDocs, null).ScoreDocs; Assert.AreEqual(0, results.Length, "Must match nothing"); terms = new System.Collections.ArrayList(); terms.Add("10"); results = searcher.Search(q, new FieldCacheTermsFilter(fieldName, (System.String[])terms.ToArray(typeof(System.String))), numDocs, null).ScoreDocs; Assert.AreEqual(1, results.Length, "Must match 1"); terms = new System.Collections.ArrayList(); terms.Add("10"); terms.Add("20"); results = searcher.Search(q, new FieldCacheTermsFilter(fieldName, (System.String[])terms.ToArray(typeof(System.String))), numDocs, null).ScoreDocs; Assert.AreEqual(2, results.Length, "Must match 2"); reader.Close(); rd.Close(); }
public virtual void TestNot_Renamed() { RAMDirectory store = new RAMDirectory(); IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document d1 = new Document(); d1.Add(new Field("field", "a b", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(d1); writer.Optimize(); writer.Close(); Searcher searcher = new IndexSearcher(store); QueryParser parser = new QueryParser("field", new SimpleAnalyzer()); Query query = parser.Parse("a NOT b"); //System.out.println(query); ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(0, hits.Length); }
public virtual void TestStopWordSearching() { Analyzer analyzer = new StandardAnalyzer(); Directory ramDir = new RAMDirectory(); IndexWriter iw = new IndexWriter(ramDir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("body", "blah the footest blah", Field.Store.NO, Field.Index.ANALYZED)); iw.AddDocument(doc); iw.Close(); MultiFieldQueryParser mfqp = new MultiFieldQueryParser(new System.String[] { "body" }, analyzer); mfqp.SetDefaultOperator(QueryParser.Operator.AND); Query q = mfqp.Parse("the footest"); IndexSearcher is_Renamed = new IndexSearcher(ramDir); ScoreDoc[] hits = is_Renamed.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); is_Renamed.Close(); }
public virtual void TestIncompatibleIndexes() { // two documents: Directory dir1 = GetDir1(); // one document only: Directory dir2 = new MockRAMDirectory(); IndexWriter w2 = new IndexWriter(dir2, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED); Document d3 = new Document(); d3.Add(new Field("f3", "v1", Field.Store.YES, Field.Index.ANALYZED)); w2.AddDocument(d3); w2.Close(); ParallelReader pr = new ParallelReader(); pr.Add(IndexReader.Open(dir1, false)); Assert.Throws <ArgumentException>(() => pr.Add(IndexReader.Open(dir2, false)), "didn't get exptected exception: indexes don't have same number of documents"); }
public virtual void TestStopWordSearching() { Analyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT); Directory ramDir = new RAMDirectory(); var iw = new IndexWriter(ramDir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); var doc = new Document(); doc.Add(new Field("body", "blah the footest blah", Field.Store.NO, Field.Index.ANALYZED)); iw.AddDocument(doc); iw.Close(); var mfqp = new MultiFieldQueryParser(Util.Version.LUCENE_CURRENT, new[] { "body" }, analyzer); mfqp.DefaultOperator = QueryParser.Operator.AND; var q = mfqp.Parse("the footest"); var is_Renamed = new IndexSearcher(ramDir, true); var hits = is_Renamed.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); is_Renamed.Close(); }
override public void Run() { Document doc = new Document(); doc.Add(new Field("content", "aaa", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); do { for (int i = 0; i < 27; i++) { try { writer.AddDocument(doc, null); } catch (System.Exception t) { System.Console.Out.WriteLine(t.StackTrace); Assert.Fail("addDocument failed"); } if (i % 2 == 0) { try { writer.Commit(null); } catch (Exception e) { throw new SystemException("", e); } } } try { System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 1)); } catch (System.Threading.ThreadInterruptedException ie) { throw; } } while ((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) < stopTime); }
public void Test_Store_RAMDirectory() { Lucene.Net.Store.RAMDirectory ramDIR = new Lucene.Net.Store.RAMDirectory(); //Index 1 Doc Lucene.Net.Index.IndexWriter wr = new Lucene.Net.Index.IndexWriter(ramDIR, new Lucene.Net.Analysis.WhitespaceAnalyzer(), true); Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); doc.Add(new Lucene.Net.Documents.Field("field1", "value1 value11", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.TOKENIZED)); wr.AddDocument(doc); wr.Close(); //now serialize it System.Runtime.Serialization.Formatters.Binary.BinaryFormatter serializer = new System.Runtime.Serialization.Formatters.Binary.BinaryFormatter(); System.IO.MemoryStream memoryStream = new System.IO.MemoryStream(); serializer.Serialize(memoryStream, ramDIR); //Close DIR ramDIR.Close(); ramDIR = null; //now deserialize memoryStream.Seek(0, System.IO.SeekOrigin.Begin); Lucene.Net.Store.RAMDirectory ramDIR2 = (Lucene.Net.Store.RAMDirectory)serializer.Deserialize(memoryStream); //Add 1 more doc wr = new Lucene.Net.Index.IndexWriter(ramDIR2, new Lucene.Net.Analysis.WhitespaceAnalyzer(), false); doc = new Lucene.Net.Documents.Document(); doc.Add(new Lucene.Net.Documents.Field("field1", "value1 value11", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.TOKENIZED)); wr.AddDocument(doc); wr.Close(); //Search Lucene.Net.Search.IndexSearcher s = new Lucene.Net.Search.IndexSearcher(ramDIR2); Lucene.Net.QueryParsers.QueryParser qp = new Lucene.Net.QueryParsers.QueryParser("field1", new Lucene.Net.Analysis.Standard.StandardAnalyzer()); Lucene.Net.Search.Query q = qp.Parse("value1"); Lucene.Net.Search.TopDocs topDocs = s.Search(q, 100); s.Close(); Assert.AreEqual(topDocs.totalHits, 2, "See the issue: LUCENENET-174"); }
public virtual void TestSubclassConcurrentMergeScheduler() { MockRAMDirectory dir = new MockRAMDirectory(); dir.FailOn(new FailOnlyOnMerge()); Document doc = new Document(); Field idField = new Field("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED); doc.Add(idField); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); MyMergeScheduler ms = new MyMergeScheduler(this); writer.SetMergeScheduler(ms); writer.SetMaxBufferedDocs(2); writer.SetRAMBufferSizeMB(Lucene.Net.Index.IndexWriter.DISABLE_AUTO_FLUSH); for (int i = 0; i < 20; i++) { writer.AddDocument(doc); } ms.Sync(); writer.Close(); Console.WriteLine("merge thread"); Assert.IsTrue(mergeThreadCreated); Console.WriteLine("merge called"); Assert.IsTrue(mergeCalled); Console.WriteLine("exec called"); Assert.IsTrue(excCalled); Console.WriteLine("exec true"); dir.Close(); Console.WriteLine("Last"); Assert.IsTrue(ConcurrentMergeScheduler.AnyUnhandledExceptions()); }
static Lucene.Net.Store.FSDirectory LuceneIndex() { //var indexFileLocation = @"C:\Users\Kieran\Documents\Visual Studio 2015\Projects\LuceneTest\LuceneTest\LuceneIndex"; var luceneDirectory = Lucene.Net.Store.FSDirectory.Open(indexFileLocation); var luceneAnalyser = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); var luceneIndexWriter = new Lucene.Net.Index.IndexWriter(luceneDirectory, luceneAnalyser, true, Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED); var data = Builder <TestData> .CreateListOfSize(50).All().With(x => x.Data = Faker.Name.FullName()).Build(); foreach (var testdata in data) { var luceneDocument = new Lucene.Net.Documents.Document(); luceneDocument.Add(new Lucene.Net.Documents.Field("Data", testdata.Data, Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.YES)); luceneIndexWriter.AddDocument(luceneDocument); } luceneIndexWriter.Optimize(); luceneIndexWriter.Close(); return(luceneDirectory); }
public virtual void TestFilterWorks() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); for (int i = 0; i < 500; i++) { Document document = new Document(); document.Add(new Field("field", English.IntToEnglish(i) + " equals " + English.IntToEnglish(i), Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(document, null); } writer.Close(); IndexReader reader = IndexReader.Open(dir, true, null); SpanTermQuery query = new SpanTermQuery(new Term("field", English.IntToEnglish(10).Trim())); SpanQueryFilter filter = new SpanQueryFilter(query); SpanFilterResult result = filter.BitSpans(reader, null); DocIdSet docIdSet = result.DocIdSet; Assert.IsTrue(docIdSet != null, "docIdSet is null and it shouldn't be"); AssertContainsDocId("docIdSet doesn't contain docId 10", docIdSet, 10); var spans = result.Positions; Assert.IsTrue(spans != null, "spans is null and it shouldn't be"); int size = GetDocIdSetSize(docIdSet); Assert.IsTrue(spans.Count == size, "spans Size: " + spans.Count + " is not: " + size); for (System.Collections.IEnumerator iterator = spans.GetEnumerator(); iterator.MoveNext();) { SpanFilterResult.PositionInfo info = (SpanFilterResult.PositionInfo)iterator.Current; Assert.IsTrue(info != null, "info is null and it shouldn't be"); //The doc should indicate the bit is on AssertContainsDocId("docIdSet doesn't contain docId " + info.Doc, docIdSet, info.Doc); //There should be two positions in each Assert.IsTrue(info.Positions.Count == 2, "info.getPositions() Size: " + info.Positions.Count + " is not: " + 2); } reader.Close(); }
public virtual void TestShrinkToAfterShortestMatch() { RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new TestPayloadAnalyzer(this), IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("content", new System.IO.StreamReader(new System.IO.MemoryStream(System.Text.Encoding.ASCII.GetBytes("a b c d e f g h i j a k"))))); writer.AddDocument(doc); writer.Close(); IndexSearcher is_Renamed = new IndexSearcher(directory, true); SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a")); SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k")); SpanQuery[] sqs = new SpanQuery[] { stq1, stq2 }; SpanNearQuery snq = new SpanNearQuery(sqs, 1, true); Spans spans = snq.GetSpans(is_Renamed.IndexReader); TopDocs topDocs = is_Renamed.Search(snq, 1); System.Collections.Hashtable payloadSet = new System.Collections.Hashtable(); for (int i = 0; i < topDocs.ScoreDocs.Length; i++) { while (spans.Next()) { System.Collections.Generic.ICollection <byte[]> payloads = spans.GetPayload(); for (System.Collections.IEnumerator it = payloads.GetEnumerator(); it.MoveNext();) { CollectionsHelper.AddIfNotContains(payloadSet, new System.String(System.Text.UTF8Encoding.UTF8.GetChars((byte[])it.Current))); } } } Assert.AreEqual(2, payloadSet.Count); Assert.IsTrue(payloadSet.Contains("a:Noise:10")); Assert.IsTrue(payloadSet.Contains("k:Noise:11")); }
public virtual void TestNullOrSubScorer() { Directory dir = new MockRAMDirectory(); IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); Document doc = new Document(); doc.Add(new Field("field", "a b c d", Field.Store.NO, Field.Index.ANALYZED)); w.AddDocument(doc); IndexReader r = w.GetReader(); IndexSearcher s = new IndexSearcher(r); BooleanQuery q = new BooleanQuery(); q.Add(new TermQuery(new Term("field", "a")), BooleanClause.Occur.SHOULD); // PhraseQuery w/ no terms added returns a null scorer PhraseQuery pq = new PhraseQuery(); q.Add(pq, BooleanClause.Occur.SHOULD); Assert.AreEqual(1, s.Search(q, 10).totalHits); // A required clause which returns null scorer should return null scorer to // IndexSearcher. q = new BooleanQuery(); pq = new PhraseQuery(); q.Add(new TermQuery(new Term("field", "a")), BooleanClause.Occur.SHOULD); q.Add(pq, BooleanClause.Occur.MUST); Assert.AreEqual(0, s.Search(q, 10).totalHits); DisjunctionMaxQuery dmq = new DisjunctionMaxQuery(1.0f); dmq.Add(new TermQuery(new Term("field", "a"))); dmq.Add(pq); Assert.AreEqual(1, s.Search(dmq, 10).totalHits); r.Close(); w.Close(); dir.Close(); }
public virtual void TestSetNorm_() { RAMDirectory store = new RAMDirectory(); IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true); // add the same document four times Field f1 = Field.Text("Field", "word"); Document d1 = new Document(); d1.Add(f1); writer.AddDocument(d1); writer.AddDocument(d1); writer.AddDocument(d1); writer.AddDocument(d1); writer.Close(); // reset the boost of each instance of this document IndexReader reader = IndexReader.Open(store); reader.SetNorm(0, "Field", 1.0f); reader.SetNorm(1, "Field", 2.0f); reader.SetNorm(2, "Field", 4.0f); reader.SetNorm(3, "Field", 16.0f); reader.Close(); // check that searches are ordered by this boost float[] scores = new float[4]; new IndexSearcher(store).Search(new TermQuery(new Term("Field", "word")), new AnonymousClassHitCollector(scores, this)); float lastScore = 0.0f; for (int i = 0; i < 4; i++) { Assert.IsTrue(scores[i] > lastScore); lastScore = scores[i]; } }