void Index() { Lucene.Net.Index.IndexWriter wr = new Lucene.Net.Index.IndexWriter(dir, new Lucene.Net.Analysis.WhitespaceAnalyzer(), Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED); Lucene.Net.Documents.Document doc = null; Lucene.Net.Documents.Field f = null; doc = new Lucene.Net.Documents.Document(); f = new Lucene.Net.Documents.Field("field", "a b c d", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED); doc.Add(f); wr.AddDocument(doc); doc = new Lucene.Net.Documents.Document(); f = new Lucene.Net.Documents.Field("field", "a b a d", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED); doc.Add(f); wr.AddDocument(doc); doc = new Lucene.Net.Documents.Document(); f = new Lucene.Net.Documents.Field("field", "a b e f", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED); doc.Add(f); wr.AddDocument(doc); doc = new Lucene.Net.Documents.Document(); f = new Lucene.Net.Documents.Field("field", "x y z", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED); doc.Add(f); wr.AddDocument(doc); wr.Close(); }
public virtual void TestFilterWorks() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); for (int i = 0; i < 500; i++) { Document document = new Document(); document.Add(new Field("field", English.IntToEnglish(i) + " equals " + English.IntToEnglish(i), Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(document); } writer.Close(); IndexReader reader = IndexReader.Open(dir, true); SpanTermQuery query = new SpanTermQuery(new Term("field", English.IntToEnglish(10).Trim())); SpanQueryFilter filter = new SpanQueryFilter(query); SpanFilterResult result = filter.BitSpans(reader); DocIdSet docIdSet = result.DocIdSet; Assert.IsTrue(docIdSet != null, "docIdSet is null and it shouldn't be"); AssertContainsDocId("docIdSet doesn't contain docId 10", docIdSet, 10); var spans = result.Positions; Assert.IsTrue(spans != null, "spans is null and it shouldn't be"); int size = GetDocIdSetSize(docIdSet); Assert.IsTrue(spans.Count == size, "spans Size: " + spans.Count + " is not: " + size); for (System.Collections.IEnumerator iterator = spans.GetEnumerator(); iterator.MoveNext(); ) { SpanFilterResult.PositionInfo info = (SpanFilterResult.PositionInfo) iterator.Current; Assert.IsTrue(info != null, "info is null and it shouldn't be"); //The doc should indicate the bit is on AssertContainsDocId("docIdSet doesn't contain docId " + info.Doc, docIdSet, info.Doc); //There should be two positions in each Assert.IsTrue(info.Positions.Count == 2, "info.getPositions() Size: " + info.Positions.Count + " is not: " + 2); } reader.Close(); }
public virtual void CreateRandomTerms(int nDocs, int nTerms, double power, Directory dir) { int[] freq = new int[nTerms]; for (int i = 0; i < nTerms; i++) { int f = (nTerms + 1) - i; // make first terms less frequent freq[i] = (int) System.Math.Ceiling(System.Math.Pow(f, power)); terms[i] = new Term("f", System.Convert.ToString((char) ('A' + i))); } IndexWriter iw = new IndexWriter(dir, new WhitespaceAnalyzer(), true); for (int i = 0; i < nDocs; i++) { Document d = new Document(); for (int j = 0; j < nTerms; j++) { if (r.Next(freq[j]) == 0) { d.Add(new Field("f", terms[j].Text(), Field.Store.NO, Field.Index.UN_TOKENIZED)); //System.out.println(d); } } iw.AddDocument(d); } iw.Optimize(); iw.Close(); }
public virtual void TestDemo_Renamed_Method() { Analyzer analyzer = new StandardAnalyzer(); // Store the index in memory: Directory directory = new RAMDirectory(); // To store an index on disk, use this instead (note that the // parameter true will overwrite the index in that directory // if one exists): //Directory directory = FSDirectory.getDirectory("/tmp/testindex", true); IndexWriter iwriter = new IndexWriter(directory, analyzer, true); iwriter.SetMaxFieldLength(25000); Document doc = new Document(); System.String text = "This is the text to be indexed."; doc.Add(new Field("fieldname", text, Field.Store.YES, Field.Index.TOKENIZED)); iwriter.AddDocument(doc); iwriter.Close(); // Now search the index: IndexSearcher isearcher = new IndexSearcher(directory); // Parse a simple query that searches for "text": Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser("fieldname", analyzer); Query query = parser.Parse("text"); Hits hits = isearcher.Search(query); Assert.AreEqual(1, hits.Length()); // Iterate through the results: for (int i = 0; i < hits.Length(); i++) { Document hitDoc = hits.Doc(i); Assert.AreEqual("This is the text to be indexed.", hitDoc.Get("fieldname")); } isearcher.Close(); directory.Close(); }
public override void SetUp() { base.SetUp(); Document doc; RAMDirectory rd1 = new RAMDirectory(); IndexWriter iw1 = new IndexWriter(rd1, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); doc = new Document(); doc.Add(new Field("field1", "the quick brown fox jumps", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("field2", "the quick brown fox jumps", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("field4", "", Field.Store.NO, Field.Index.ANALYZED)); iw1.AddDocument(doc); iw1.Close(); RAMDirectory rd2 = new RAMDirectory(); IndexWriter iw2 = new IndexWriter(rd2, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); doc = new Document(); doc.Add(new Field("field0", "", Field.Store.NO, Field.Index.ANALYZED)); doc.Add(new Field("field1", "the fox jumps over the lazy dog", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("field3", "the fox jumps over the lazy dog", Field.Store.YES, Field.Index.ANALYZED)); iw2.AddDocument(doc); iw2.Close(); this.ir1 = IndexReader.Open(rd1, true); this.ir2 = IndexReader.Open(rd2, true); }
public static LDocument Convert(Document doc, Schema schema) { var ldoc = new LDocument(); foreach (var sf in schema.Fields) { foreach (var lf in Convert(sf, doc)) { ldoc.Add(lf); } } ldoc.Add( new LField( SchemaNameField, schema.Name, ConvertToStore(true, false), ConvertToIndexFlag(false, false) ) ); ldoc.Add( new LField( SchemaVersionField, schema.Version, ConvertToStore(true, false), ConvertToIndexFlag(false, false) ) ); return ldoc; }
public virtual void TestBasic() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("field", "value", Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Close(); TermQuery termQuery = new TermQuery(new Term("field", "value")); // should not throw exception with primitive query QueryWrapperFilter qwf = new QueryWrapperFilter(termQuery); IndexSearcher searcher = new IndexSearcher(dir, true); TopDocs hits = searcher.Search(new MatchAllDocsQuery(), qwf, 10); Assert.AreEqual(1, hits.totalHits); // should not throw exception with complex primitive query BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.Add(termQuery, Occur.MUST); booleanQuery.Add(new TermQuery(new Term("field", "missing")), Occur.MUST_NOT); qwf = new QueryWrapperFilter(termQuery); hits = searcher.Search(new MatchAllDocsQuery(), qwf, 10); Assert.AreEqual(1, hits.totalHits); // should not throw exception with non primitive Query (doesn't implement // Query#createWeight) qwf = new QueryWrapperFilter(new FuzzyQuery(new Term("field", "valu"))); hits = searcher.Search(new MatchAllDocsQuery(), qwf, 10); Assert.AreEqual(1, hits.totalHits); }
/// <summary>Adds field info for a Document. </summary> public void Add(Document doc) { foreach(Field field in doc.Fields()) { Add(field.Name(), field.IsIndexed(), field.IsTermVectorStored(), field.IsStorePositionWithTermVector(), field.IsStoreOffsetWithTermVector(), field.GetOmitNorms()); } }
public override void SetUp() { base.SetUp(); RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); long theLong = System.Int64.MaxValue; double theDouble = System.Double.MaxValue; sbyte theByte = (sbyte) System.SByte.MaxValue; short theShort = System.Int16.MaxValue; int theInt = System.Int32.MaxValue; float theFloat = System.Single.MaxValue; for (int i = 0; i < NUM_DOCS; i++) { Document doc = new Document(); doc.Add(new Field("theLong", System.Convert.ToString(theLong--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theDouble", (theDouble--).ToString("E16"), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theByte", System.Convert.ToString((sbyte) theByte--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theShort", System.Convert.ToString(theShort--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theInt", System.Convert.ToString(theInt--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theFloat", (theFloat--).ToString("E8"), Field.Store.NO, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); } writer.Close(); reader = IndexReader.Open(directory); }
override public void Run() { try { for (int j = 0; j < Lucene.Net.Index.TestThreadedOptimize.NUM_ITER2; j++) { writerFinal.Optimize(false); for (int k = 0; k < 17 * (1 + iFinal); k++) { Document d = new Document(); d.Add(new Field("id", iterFinal + "_" + iFinal + "_" + j + "_" + k, Field.Store.YES, Field.Index.NOT_ANALYZED)); d.Add(new Field("contents", English.IntToEnglish(iFinal + k), Field.Store.NO, Field.Index.ANALYZED)); writerFinal.AddDocument(d); } for (int k = 0; k < 9 * (1 + iFinal); k++) writerFinal.DeleteDocuments(new Term("id", iterFinal + "_" + iFinal + "_" + j + "_" + k)); writerFinal.Optimize(); } } catch (System.Exception t) { Enclosing_Instance.setFailed(); System.Console.Out.WriteLine(SupportClass.ThreadClass.Current().Name + ": hit exception"); System.Console.Out.WriteLine(t.StackTrace); } }
public virtual void TestNullOrSubScorer() { Directory dir = new MockRAMDirectory(); IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); Document doc = new Document(); doc.Add(new Field("field", "a b c d", Field.Store.NO, Field.Index.ANALYZED)); w.AddDocument(doc); IndexReader r = w.GetReader(); IndexSearcher s = new IndexSearcher(r); BooleanQuery q = new BooleanQuery(); q.Add(new TermQuery(new Term("field", "a")), BooleanClause.Occur.SHOULD); // PhraseQuery w/ no terms added returns a null scorer PhraseQuery pq = new PhraseQuery(); q.Add(pq, BooleanClause.Occur.SHOULD); Assert.AreEqual(1, s.Search(q, 10).TotalHits); // A required clause which returns null scorer should return null scorer to // IndexSearcher. q = new BooleanQuery(); pq = new PhraseQuery(); q.Add(new TermQuery(new Term("field", "a")), BooleanClause.Occur.SHOULD); q.Add(pq, BooleanClause.Occur.MUST); Assert.AreEqual(0, s.Search(q, 10).TotalHits); DisjunctionMaxQuery dmq = new DisjunctionMaxQuery(1.0f); dmq.Add(new TermQuery(new Term("field", "a"))); dmq.Add(pq); Assert.AreEqual(1, s.Search(dmq, 10).TotalHits); r.Close(); w.Close(); dir.Close(); }
override public void Run() { Document doc = new Document(); doc.Add(new Field("content", "aaa", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); while ((System.DateTime.Now.Ticks - 621355968000000000) / 10000 < stopTime) { for (int i = 0; i < 27; i++) { try { writer.AddDocument(doc); } catch (System.IO.IOException cie) { System.SystemException re = new System.SystemException("addDocument failed", cie); throw re; } } try { System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 1)); } catch (System.Threading.ThreadInterruptedException) { SupportClass.ThreadClass.Current().Interrupt(); } } }
public static void AddDocuments(this IndexWriter wrtr, LDocument[] docs) { foreach (var d in docs) { wrtr.AddDocument(d); } }
public virtual void SetUp() { directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); Document doc = new Document(); doc.Add(Field.Text("Field", "one two three four five")); doc.Add(Field.Text("sorter", "b")); writer.AddDocument(doc); doc = new Document(); doc.Add(Field.Text("Field", "one two three four")); doc.Add(Field.Text("sorter", "d")); writer.AddDocument(doc); doc = new Document(); doc.Add(Field.Text("Field", "one two three y")); doc.Add(Field.Text("sorter", "a")); writer.AddDocument(doc); doc = new Document(); doc.Add(Field.Text("Field", "one two x")); doc.Add(Field.Text("sorter", "c")); writer.AddDocument(doc); writer.Optimize(); writer.Close(); searcher = new IndexSearcher(directory); query = new TermQuery(new Term("Field", "three")); filter = new AnonymousClassFilter(this); }
/// <summary>Makes a document for a File. /// <p> /// The document has three fields: /// <ul> /// <li><code>path</code>--containing the pathname of the file, as a stored, /// tokenized Field; /// <li><code>modified</code>--containing the last modified date of the file as /// a keyword Field as encoded by <a /// href="lucene.document.DateField.html">DateField</a>; and /// <li><code>contents</code>--containing the full contents of the file, as a /// Reader Field; /// </summary> public static Document Document(System.IO.FileInfo f) { // make a new, empty document Document doc = new Document(); // Add the path of the file as a Field named "path". Use a Text Field, so // that the index stores the path, and so that the path is searchable doc.Add(Field.Text("path", f.FullName)); // Add the last modified date of the file a Field named "modified". Use a // Keyword Field, so that it's searchable, but so that no attempt is made // to tokenize the Field into words. doc.Add(Field.Keyword("modified", DateField.TimeToString(((f.LastWriteTime.Ticks - 621355968000000000) / 10000)))); // Add the contents of the file a Field named "contents". Use a Text // Field, specifying a Reader, so that the text of the file is tokenized. // ?? why doesn't FileReader work here ?? System.IO.FileStream is_Renamed = new System.IO.FileStream(f.FullName, System.IO.FileMode.Open, System.IO.FileAccess.Read); System.IO.StreamReader reader = new System.IO.StreamReader(new System.IO.StreamReader(is_Renamed, System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(is_Renamed, System.Text.Encoding.Default).CurrentEncoding); doc.Add(Field.Text("contents", reader)); // return the document return doc; }
override public void Run() { Document doc = new Document(); doc.Add(new Field("content", "aaa", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); while ((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) < stopTime) { for (int i = 0; i < 27; i++) { try { writer.AddDocument(doc); } catch (System.Exception t) { System.Console.Out.WriteLine(t.StackTrace); Assert.Fail("addDocument failed"); } } try { System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 1)); } catch (System.Threading.ThreadInterruptedException ie) { SupportClass.ThreadClass.Current().Interrupt(); throw new System.SystemException("", ie); } } }
/// <summary> /// index current document /// </summary> /// <param name="doc"></param> /// <returns></returns> public bool AddDocument(Document doc) { var status = true; var fields = doc.GetFields(); if (fields == null || fields.Length == 0) { status = false; } else { var luceneDoc = new Lucene.Net.Documents.Document(); foreach (var field in fields) { luceneDoc.Add(new Lucene.Net.Documents.Field( field.FieldName, // field name field.FieldValue.ToString(), // field value field.Store ? Lucene.Net.Documents.Field.Store.YES : Lucene.Net.Documents.Field.Store.NO, field.Analyse ? Lucene.Net.Documents.Field.Index.ANALYZED : Lucene.Net.Documents.Field.Index.NOT_ANALYZED)); } this.indexWriter.AddDocument(luceneDoc); } return status; }
public virtual void TestNullOrSubScorer() { Directory dir = new MockRAMDirectory(); IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); Document doc = new Document(); doc.Add(new Field("field", "a b c d", Field.Store.NO, Field.Index.ANALYZED)); w.AddDocument(doc); IndexReader r = w.GetReader(); IndexSearcher s = new IndexSearcher(r); BooleanQuery q = new BooleanQuery(); q.Add(new TermQuery(new Term("field", "a")), Occur.SHOULD); // LUCENE-2617: make sure that a term not in the index still contributes to the score via coord factor float score = s.Search(q, 10).MaxScore; Query subQuery = new TermQuery(new Term("field", "not_in_index")); subQuery.Boost = 0; q.Add(subQuery, Occur.SHOULD); float score2 = s.Search(q, 10).MaxScore; Assert.AreEqual(score * .5, score2, 1e-6); // LUCENE-2617: make sure that a clause not in the index still contributes to the score via coord factor BooleanQuery qq = (BooleanQuery)q.Clone(); PhraseQuery phrase = new PhraseQuery(); phrase.Add(new Term("field", "not_in_index")); phrase.Add(new Term("field", "another_not_in_index")); phrase.Boost = 0; qq.Add(phrase, Occur.SHOULD); score2 = s.Search(qq, 10).MaxScore; Assert.AreEqual(score * (1.0 / 3), score2, 1e-6); // now test BooleanScorer2 subQuery = new TermQuery(new Term("field", "b")); subQuery.Boost = 0; q.Add(subQuery, Occur.MUST); score2 = s.Search(q, 10).MaxScore; Assert.AreEqual(score * (2.0 / 3), score2, 1e-6); // PhraseQuery w/ no terms added returns a null scorer PhraseQuery pq = new PhraseQuery(); q.Add(pq, Occur.SHOULD); Assert.AreEqual(1, s.Search(q, 10).TotalHits); // A required clause which returns null scorer should return null scorer to // IndexSearcher. q = new BooleanQuery(); pq = new PhraseQuery(); q.Add(new TermQuery(new Term("field", "a")), Occur.SHOULD); q.Add(pq, Occur.MUST); Assert.AreEqual(0, s.Search(q, 10).TotalHits); DisjunctionMaxQuery dmq = new DisjunctionMaxQuery(1.0f); dmq.Add(new TermQuery(new Term("field", "a"))); dmq.Add(pq); Assert.AreEqual(1, s.Search(dmq, 10).TotalHits); r.Close(); w.Close(); dir.Close(); }
public virtual void TestAddSameDocTwice() { // LUCENE-5367: this was a problem with the previous code, making sure it // works with the new code. Directory indexDir = NewDirectory(), taxoDir = NewDirectory(); IndexWriter indexWriter = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); FacetsConfig facetsConfig = new FacetsConfig(); Document doc = new Document(); doc.Add(new FacetField("a", "b")); doc = facetsConfig.Build(taxoWriter, doc); // these two addDocument() used to fail indexWriter.AddDocument(doc); indexWriter.AddDocument(doc); IOUtils.Close(indexWriter, taxoWriter); DirectoryReader indexReader = DirectoryReader.Open(indexDir); DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir); IndexSearcher searcher = NewSearcher(indexReader); FacetsCollector fc = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), fc); Facets facets = GetTaxonomyFacetCounts(taxoReader, facetsConfig, fc); FacetResult res = facets.GetTopChildren(10, "a"); Assert.AreEqual(1, res.LabelValues.Length); Assert.AreEqual(2, res.LabelValues[0].value); IOUtils.Close(indexReader, taxoReader); IOUtils.Close(indexDir, taxoDir); }
/*public TestCustomSearcherSort(System.String name):base(name) { }*/ /*[STAThread] public static void Main(System.String[] argv) { // TestRunner.run(suite()); // {{Aroush-2.9}} how is this done in NUnit? }*/ /*public static Test suite() { return new TestSuite(typeof(TestCustomSearcherSort)); }*/ // create an index for testing private Directory GetIndex() { RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED); RandomGen random = new RandomGen(this, NewRandom()); for (int i = 0; i < INDEX_SIZE; ++i) { // don't decrease; if to low the problem doesn't show up Document doc = new Document(); if ((i % 5) != 0) { // some documents must not have an entry in the first sort field doc.Add(new Field("publicationDate_", random.GetLuceneDate(), Field.Store.YES, Field.Index.NOT_ANALYZED)); } if ((i % 7) == 0) { // some documents to match the query (see below) doc.Add(new Field("content", "test", Field.Store.YES, Field.Index.ANALYZED)); } // every document has a defined 'mandant' field doc.Add(new Field("mandant", System.Convert.ToString(i % 3), Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); } writer.Optimize(); writer.Close(); return indexStore; }
public void SearchFiltered(IndexWriter writer, Directory directory, Filter filter, bool optimize) { try { for (int i = 0; i < 60; i++) {//Simple docs Document doc = new Document(); doc.Add(new Field(FIELD, i.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); } if (optimize) writer.Optimize(); writer.Close(); BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.Add(new TermQuery(new Term(FIELD, "36")), Occur.SHOULD); IndexSearcher indexSearcher = new IndexSearcher(directory); ScoreDoc[] hits = indexSearcher.Search(booleanQuery, filter, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length, "Number of matched documents"); } catch (System.IO.IOException e) { Assert.Fail(e.Message); } }
public override void SetUp() { base.SetUp(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); for (int i = 0; i < 5137; ++i) { Document doc = new Document(); doc.Add(new Field(FIELD, "meaninglessnames", Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); } { Document doc = new Document(); doc.Add(new Field(FIELD, "tangfulin", Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); } for (int i = 5138; i < 11377; ++i) { Document doc = new Document(); doc.Add(new Field(FIELD, "meaninglessnames", Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); } { Document doc = new Document(); doc.Add(new Field(FIELD, "tangfulin", Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); } writer.Close(); }
public override void SetUp() { base.SetUp(); IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); //writer.setUseCompoundFile(true); //writer.infoStream = System.out; for (int i = 0; i < 1000; i++) { Document doc = new Document(); Field.TermVector termVector; int mod3 = i % 3; int mod2 = i % 2; if (mod2 == 0 && mod3 == 0) { termVector = Field.TermVector.WITH_POSITIONS_OFFSETS; } else if (mod2 == 0) { termVector = Field.TermVector.WITH_POSITIONS; } else if (mod3 == 0) { termVector = Field.TermVector.WITH_OFFSETS; } else { termVector = Field.TermVector.YES; } doc.Add(new Field("field", English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED, termVector)); writer.AddDocument(doc); } writer.Close(); searcher = new IndexSearcher(directory, true); }
public override void SetUp() { base.SetUp(); directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("field", "one two three four five", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("sorter", "b", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("field", "one two three four", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("sorter", "d", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("field", "one two three y", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("sorter", "a", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("field", "one two x", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("sorter", "c", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Optimize(); writer.Close(); searcher = new IndexSearcher(directory); query = new TermQuery(new Term("field", "three")); filter = NewStaticFilterB(); }
protected override void Context() { var builder = new DocumentBuilder(); var mapping = LuceneMapper.GetMappingForType(typeof (TestObject)); var item = new TestObject() { Id = Guid.NewGuid(), IgnoredProperty = "Property", LongId = 123456, ValidProperty = "Valid property", Text = "Abc def ghi ijkl mno pqr stuv"}; _document = builder.BuildDocumentForMapping(item, mapping); }
public virtual void TestDemo_Renamed() { Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); // Store the index in memory: Directory directory = new RAMDirectory(); // To store an index on disk, use this instead: //Directory directory = FSDirectory.open("/tmp/testindex"); IndexWriter iwriter = new IndexWriter(directory, analyzer, true, new IndexWriter.MaxFieldLength(25000)); Document doc = new Document(); System.String text = "This is the text to be indexed."; doc.Add(new Field("fieldname", text, Field.Store.YES, Field.Index.ANALYZED)); iwriter.AddDocument(doc); iwriter.Close(); // Now search the index: IndexSearcher isearcher = new IndexSearcher(directory, true); // read-only=true // Parse a simple query that searches for "text": QueryParser parser = new QueryParser("fieldname", analyzer); Query query = parser.Parse("text"); ScoreDoc[] hits = isearcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); // Iterate through the results: for (int i = 0; i < hits.Length; i++) { Document hitDoc = isearcher.Doc(hits[i].doc); Assert.AreEqual(hitDoc.Get("fieldname"), "This is the text to be indexed."); } isearcher.Close(); directory.Close(); }
public virtual void TestSimpleSkip() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new PayloadAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Term term = new Term("test", "a"); for (int i = 0; i < 5000; i++) { Document d1 = new Document(); d1.Add(new Field(term.Field(), term.Text(), Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(d1); } writer.Flush(); writer.Optimize(); writer.Close(); IndexReader reader = SegmentReader.GetOnlySegmentReader(dir); SegmentTermPositions tp = (SegmentTermPositions) reader.TermPositions(); tp.freqStream_ForNUnit = new CountingStream(this, tp.freqStream_ForNUnit); for (int i = 0; i < 2; i++) { counter = 0; tp.Seek(term); CheckSkipTo(tp, 14, 185); // no skips CheckSkipTo(tp, 17, 190); // one skip on level 0 CheckSkipTo(tp, 287, 200); // one skip on level 1, two on level 0 // this test would fail if we had only one skip level, // because than more bytes would be read from the freqStream CheckSkipTo(tp, 4800, 250); // one skip on level 2 } }
public void TearDown() { mergedDir = new RAMDirectory(); merge1Dir = new RAMDirectory(); doc1 = new Document(); merge2Dir = new RAMDirectory(); doc2 = new Document(); }
/// <summary> Adds the fields above to a document </summary> /// <param name="doc">The document to write /// </param> public static void SetupDoc(Document doc) { doc.Add(textField1); doc.Add(textField2); doc.Add(keyField); doc.Add(unIndField); doc.Add(unStoredField1); doc.Add(unStoredField2); }
protected internal static Document Doc(Field[] fields) { Document doc = new Document(); for (int i = 0; i < fields.Length; i++) { doc.Add(fields[i]); } return doc; }
protected override unsafe Document DirectGet(Lucene.Net.Documents.Document input, string id, DocumentFields fields, IState state) { var reduceValue = input.GetField(Constants.Documents.Indexing.Fields.ReduceKeyValueFieldName).GetBinaryValue(state); var allocation = _context.GetMemory(reduceValue.Length); UnmanagedWriteBuffer buffer = new UnmanagedWriteBuffer(_context, allocation); buffer.Write(reduceValue, 0, reduceValue.Length); var result = new BlittableJsonReaderObject(allocation.Address, reduceValue.Length, _context, buffer); return(new Document { Data = result }); }
public virtual void TestFlushExceptions() { MockRAMDirectory directory = new MockRAMDirectory(); FailOnlyOnFlush failure = new FailOnlyOnFlush(); directory.FailOn(failure); IndexWriter writer = new IndexWriter(directory, true, ANALYZER, true); ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler(); writer.SetMergeScheduler(cms); writer.SetMaxBufferedDocs(2); Document doc = new Document(); Field idField = new Field("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED); doc.Add(idField); for (int i = 0; i < 10; i++) { for (int j = 0; j < 20; j++) { idField.SetValue(System.Convert.ToString(i * 20 + j)); writer.AddDocument(doc); } writer.AddDocument(doc); failure.SetDoFail(); try { writer.Flush(); Assert.Fail("failed to hit IOException"); } catch (System.IO.IOException ioe) { failure.ClearDoFail(); } } writer.Close(); IndexReader reader = IndexReader.Open(directory); Assert.AreEqual(200, reader.NumDocs()); reader.Close(); directory.Close(); }
public virtual void TestReuseAcrossWriters() { Directory dir = new MockRAMDirectory(); SnapshotDeletionPolicy dp = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()); IndexWriter writer = new IndexWriter(dir, true, new StandardAnalyzer(), dp); // Force frequent commits writer.SetMaxBufferedDocs(2); Document doc = new Document(); doc.Add(new Field("content", "aaa", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); for (int i = 0; i < 7; i++) { writer.AddDocument(doc); } IndexCommit cp = (IndexCommit)dp.Snapshot(); CopyFiles(dir, cp); writer.Close(); CopyFiles(dir, cp); writer = new IndexWriter(dir, true, new StandardAnalyzer(), dp); CopyFiles(dir, cp); for (int i = 0; i < 7; i++) { writer.AddDocument(doc); } CopyFiles(dir, cp); writer.Close(); CopyFiles(dir, cp); dp.Release(); writer = new IndexWriter(dir, true, new StandardAnalyzer(), dp); writer.Close(); try { CopyFiles(dir, cp); Assert.Fail("did not hit expected IOException"); } catch (System.IO.IOException ioe) { // expected } dir.Close(); }
static TestNumericRangeQuery32() { { try { // set the theoretical maximum term count for 8bit (see docs for the number) BooleanQuery.MaxClauseCount = 3 * 255 * 2 + 255; directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, MaxFieldLength.UNLIMITED, null); NumericField field8 = new NumericField("field8", 8, Field.Store.YES, true), field4 = new NumericField("field4", 4, Field.Store.YES, true), field2 = new NumericField("field2", 2, Field.Store.YES, true), fieldNoTrie = new NumericField("field" + System.Int32.MaxValue, System.Int32.MaxValue, Field.Store.YES, true), ascfield8 = new NumericField("ascfield8", 8, Field.Store.NO, true), ascfield4 = new NumericField("ascfield4", 4, Field.Store.NO, true), ascfield2 = new NumericField("ascfield2", 2, Field.Store.NO, true); Document doc = new Document(); // add fields, that have a distance to test general functionality doc.Add(field8); doc.Add(field4); doc.Add(field2); doc.Add(fieldNoTrie); // add ascending fields with a distance of 1, beginning at -noDocs/2 to test the correct splitting of range and inclusive/exclusive doc.Add(ascfield8); doc.Add(ascfield4); doc.Add(ascfield2); // Add a series of noDocs docs with increasing int values for (int l = 0; l < noDocs; l++) { int val = distance * l + startOffset; field8.SetIntValue(val); field4.SetIntValue(val); field2.SetIntValue(val); fieldNoTrie.SetIntValue(val); val = l - (noDocs / 2); ascfield8.SetIntValue(val); ascfield4.SetIntValue(val); ascfield2.SetIntValue(val); writer.AddDocument(doc, null); } writer.Optimize(null); writer.Close(); searcher = new IndexSearcher(directory, true, null); } catch (System.Exception e) { throw new System.SystemException("", e); } } }
public virtual void TestMixedVectrosVectors() { IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("field", "one", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); doc.Add(new Field("field", "one", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES)); doc.Add(new Field("field", "one", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS)); doc.Add(new Field("field", "one", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_OFFSETS)); doc.Add(new Field("field", "one", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); writer.AddDocument(doc); writer.Close(); searcher = new IndexSearcher(directory); Query query = new TermQuery(new Term("field", "one")); ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); TermFreqVector[] vector = searcher.reader_ForNUnit.GetTermFreqVectors(hits[0].doc); Assert.IsTrue(vector != null); Assert.IsTrue(vector.Length == 1); TermPositionVector tfv = (TermPositionVector)vector[0]; Assert.IsTrue(tfv.GetField().Equals("field")); System.String[] terms = tfv.GetTerms(); Assert.AreEqual(1, terms.Length); Assert.AreEqual(terms[0], "one"); Assert.AreEqual(5, tfv.GetTermFrequencies()[0]); int[] positions = tfv.GetTermPositions(0); Assert.AreEqual(5, positions.Length); for (int i = 0; i < 5; i++) { Assert.AreEqual(i, positions[i]); } TermVectorOffsetInfo[] offsets = tfv.GetOffsets(0); Assert.AreEqual(5, offsets.Length); for (int i = 0; i < 5; i++) { Assert.AreEqual(4 * i, offsets[i].GetStartOffset()); Assert.AreEqual(4 * i + 3, offsets[i].GetEndOffset()); } }
public virtual void TestHangOnClose() { Directory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMergePolicy(new LogByteSizeMergePolicy(writer)); writer.SetMaxBufferedDocs(5); writer.SetUseCompoundFile(false); writer.SetMergeFactor(100); Document doc = new Document(); doc.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); for (int i = 0; i < 60; i++) { writer.AddDocument(doc); } writer.SetMaxBufferedDocs(200); Document doc2 = new Document(); doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO)); doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO)); doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO)); doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO)); for (int i = 0; i < 10; i++) { writer.AddDocument(doc2); } writer.Close(); Directory dir2 = new MockRAMDirectory(); writer = new IndexWriter(dir2, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy(writer); lmp.SetMinMergeMB(0.0001); writer.SetMergePolicy(lmp); writer.SetMergeFactor(4); writer.SetUseCompoundFile(false); writer.SetMergeScheduler(new SerialMergeScheduler()); writer.AddIndexesNoOptimize(new Directory[] { dir }); writer.Close(); dir.Close(); dir2.Close(); }
private void TestRightOpenRange(int precisionStep) { System.String field = "field" + precisionStep; int count = 3000; int lower = (count - 1) * distance + (distance / 3) + startOffset; NumericRangeQuery <int> q = NumericRangeQuery.NewIntRange(field, precisionStep, lower, null, true, true); TopDocs topDocs = searcher.Search(q, null, noDocs, Sort.INDEXORDER, null); System.Console.Out.WriteLine("Found " + q.TotalNumberOfTerms + " distinct terms in right open range for field '" + field + "'."); ScoreDoc[] sd = topDocs.ScoreDocs; Assert.IsNotNull(sd); Assert.AreEqual(noDocs - count, sd.Length, "Score doc count"); Document doc = searcher.Doc(sd[0].Doc, null); Assert.AreEqual(count * distance + startOffset, System.Int32.Parse(doc.Get(field, null)), "First doc"); doc = searcher.Doc(sd[sd.Length - 1].Doc, null); Assert.AreEqual((noDocs - 1) * distance + startOffset, System.Int32.Parse(doc.Get(field, null)), "Last doc"); }
// make sure the documents returned by the search match the expected list private void AssertMatches(Searcher searcher, Query query, Sort sort, System.String expectedResult) { Hits result = searcher.Search(query, sort); System.Text.StringBuilder buff = new System.Text.StringBuilder(10); int n = result.Length(); for (int i = 0; i < n; ++i) { Document doc = result.Doc(i); System.String[] v = doc.GetValues("tracer"); for (int j = 0; j < v.Length; ++j) { buff.Append(v[j]); } } Assert.AreEqual(expectedResult, buff.ToString()); }
private static void IndexDocsWithFacetsAndSomeTerms(IndexWriter indexWriter, TaxonomyWriter taxoWriter, IDictionary<string, int?> expectedCounts) { Random random = Random(); int numDocs = AtLeast(random, 2); FacetsConfig config = Config; for (int i = 0; i < numDocs; i++) { Document doc = new Document(); bool hasContent = random.NextBoolean(); if (hasContent) { AddField(doc); } AddFacets(doc, config, hasContent); indexWriter.AddDocument(config.Build(taxoWriter, doc)); } indexWriter.Commit(); // flush a segment }
public virtual void TestCloseStoredFields() { Directory dir = new MockRAMDirectory(); IndexWriter w = new IndexWriter(dir, new SimpleAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); w.UseCompoundFile = false; Document doc = new Document(); doc.Add(new Field("field", "yes it's stored", Field.Store.YES, Field.Index.ANALYZED)); w.AddDocument(doc); w.Close(); IndexReader r1 = IndexReader.Open(dir, false); IndexReader r2 = r1.Clone(false); r1.Close(); r2.Close(); dir.Close(); }
public override void SetUp() { base.SetUp(); RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); //writer.infoStream = System.out; for (int i = 0; i < 1000; i++) { Document doc = new Document(); doc.Add(new Field("field", English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc, null); } writer.Close(); searcher = new IndexSearcher(directory, true, null); }
public virtual void TestDocument() { Assert.IsTrue(reader.NumDocs() == 1); Assert.IsTrue(reader.MaxDoc() >= 1); Document result = reader.Document(0); Assert.IsTrue(result != null); //There are 2 unstored fields on the document that are not preserved across writing Assert.IsTrue(DocHelper.NumFields(result) == DocHelper.NumFields(testDoc) - DocHelper.unstored.Count); System.Collections.IList fields = result.GetFields(); for (System.Collections.IEnumerator iter = fields.GetEnumerator(); iter.MoveNext();) { Fieldable field = (Fieldable)iter.Current; Assert.IsTrue(field != null); Assert.IsTrue(DocHelper.nameValues.Contains(field.Name())); } }
public override void SetUp() { base.SetUp(); directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); for (int i = 0; i < values.Length; i++) { Document doc = new Document(); doc.Add(new Field(FIELD, values[i], Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); } writer.Close(); indexSearcher = new IndexSearcher(directory); indexReader = indexSearcher.GetIndexReader(); }
public static Document CreateDocument(int n, System.String indexName, int numFields) { System.Text.StringBuilder sb = new System.Text.StringBuilder(); Document doc = new Document(); doc.Add(new Field("id", System.Convert.ToString(n), Field.Store.YES, Field.Index.NOT_ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); doc.Add(new Field("indexname", indexName, Field.Store.YES, Field.Index.NOT_ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); sb.Append("a"); sb.Append(n); doc.Add(new Field("field1", sb.ToString(), Field.Store.YES, Field.Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); sb.Append(" b"); sb.Append(n); for (int i = 1; i < numFields; i++) { doc.Add(new Field("field" + (i + 1), sb.ToString(), Field.Store.YES, Field.Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); } return(doc); }
public override void TestInitialize() { base.TestInitialize(); Directory = new SqlServerDirectory(Connection, new Options()); IndexWriter writer = new IndexWriter(Directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); for (int i = 0; i < Values.Length; i++) { Document doc = new Document(); doc.Add(new Field(FIELD, Values[i], Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); } writer.Close(); IndexSearcher = new IndexSearcher(Directory, false); indexReader = IndexSearcher.IndexReader; }
/// <summary> /// Add the given document to the index. /// </summary> /// <param name="id">The id of the document.</param> /// <param name="fileName">The fileName of the document.</param> /// <param name="type">The type of document.</param> /// <param name="name">The name of the document.</param> /// <param name="body">The body of the document.</param> public void Add( string id, string fileName, string type, string name, string body) { if (!IsWriteMode) { throw new Exception("You can't modify the index when not in write mode."); } Lucene.Net.Documents.Document document = new Lucene.Net.Documents.Document(); document.Add(new Lucene.Net.Documents.Field( "id", id, Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.NOT_ANALYZED)); document.Add(new Lucene.Net.Documents.Field( "fileName", fileName, Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.NOT_ANALYZED)); document.Add(new Lucene.Net.Documents.Field( "type", type, Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.NO)); document.Add(new Lucene.Net.Documents.Field( "name", name, Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.NO)); document.Add(new Lucene.Net.Documents.Field( "body", body, Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED)); _writer.UpdateDocument( new Lucene.Net.Index.Term("fileName", fileName), document); }
/// <summary>Makes a document for a File. /// <p> /// The document has three fields: /// <ul> /// <li><code>path</code>--containing the pathname of the file, as a stored, /// untokenized field; /// <li><code>modified</code>--containing the last modified date of the file as /// a field as created by <a /// href="lucene.document.DateTools.html">DateTools</a>; and /// <li><code>contents</code>--containing the full contents of the file, as a /// Reader field; /// </summary> public static Document Document(string fullName, UInt64 lastWriteTime) { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a field that is // indexed (i.e. searchable), but don't tokenize the field into words. doc.Add(new Field("path", fullName, Field.Store.YES, Field.Index.ANALYZED)); // Add the last modified date of the file a field named "modified". Use // a field that is indexed (i.e. searchable), but don't tokenize the field // into words. doc.Add(new Field("modified", DateTools.TimeToString((long)lastWriteTime, DateTools.Resolution.MINUTE), Field.Store.YES, Field.Index.NOT_ANALYZED)); if (System.IO.Path.GetExtension(fullName).Equals(".bab", StringComparison.OrdinalIgnoreCase)) { try { doc.Add(new Field("contents", new XMLTokenStream(fullName))); } catch (System.IO.IOException /* e*/) { } } else { // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in the system's default encoding. // If that's not the case searching for special characters will fail. try { System.IO.StreamReader io = new System.IO.StreamReader(fullName, System.Text.Encoding.Default); doc.Add(new Field("contents", io)); } catch (System.IO.IOException e) { } } // return the document return(doc); }
public virtual void TestMultiValuedNRQ() { System.Random rnd = NewRandom(); RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, MaxFieldLength.UNLIMITED); //DecimalFormat format = new DecimalFormat("00000000000", new System.Globalization.CultureInfo("en-US").NumberFormat); for (int l = 0; l < 5000; l++) { Document doc = new Document(); for (int m = 0, c = rnd.Next(10); m <= c; m++) { int value_Renamed = rnd.Next(System.Int32.MaxValue); doc.Add(new Field("asc", value_Renamed.ToString().PadLeft(11, '0'), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new NumericField("trie", Field.Store.NO, true).SetIntValue(value_Renamed)); } writer.AddDocument(doc); } writer.Close(); Searcher searcher = new IndexSearcher(directory, true); for (int i = 0; i < 50; i++) { int lower = rnd.Next(System.Int32.MaxValue); int upper = rnd.Next(System.Int32.MaxValue); if (lower > upper) { int a = lower; lower = upper; upper = a; } TermRangeQuery cq = new TermRangeQuery("asc", lower.ToString().PadLeft(11, '0'), upper.ToString().PadLeft(11, '0'), true, true); System.Int32 tempAux = (System.Int32)lower; System.Int32 tempAux2 = (System.Int32)upper; NumericRangeQuery tq = NumericRangeQuery.NewIntRange("trie", tempAux, tempAux2, true, true); TopDocs trTopDocs = searcher.Search(cq, 1); TopDocs nrTopDocs = searcher.Search(tq, 1); Assert.AreEqual(trTopDocs.TotalHits, nrTopDocs.TotalHits, "Returned count for NumericRangeQuery and TermRangeQuery must be equal"); } searcher.Close(); directory.Close(); }
public virtual void TestCaching() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); TokenStream stream = new AnonymousClassTokenStream(this); stream = new CachingTokenFilter(stream); doc.Add(new Field("preanalyzed", stream, TermVector.NO)); // 1) we consume all tokens twice before we add the doc to the index checkTokens(stream); stream.Reset(); checkTokens(stream); // 2) now add the document to the index and verify if all tokens are indexed // don't reset the stream here, the DocumentWriter should do that implicitly writer.AddDocument(doc); writer.Close(); IndexReader reader = IndexReader.Open(dir); TermPositions termPositions = reader.TermPositions(new Term("preanalyzed", "term1")); Assert.IsTrue(termPositions.Next()); Assert.AreEqual(1, termPositions.Freq()); Assert.AreEqual(0, termPositions.NextPosition()); termPositions.Seek(new Term("preanalyzed", "term2")); Assert.IsTrue(termPositions.Next()); Assert.AreEqual(2, termPositions.Freq()); Assert.AreEqual(1, termPositions.NextPosition()); Assert.AreEqual(3, termPositions.NextPosition()); termPositions.Seek(new Term("preanalyzed", "term3")); Assert.IsTrue(termPositions.Next()); Assert.AreEqual(1, termPositions.Freq()); Assert.AreEqual(2, termPositions.NextPosition()); reader.Close(); // 3) reset stream and consume tokens again stream.Reset(); checkTokens(stream); }
public virtual void TestReuseAcrossWriters() { Directory dir = new MockRAMDirectory(); SnapshotDeletionPolicy dp = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), dp, IndexWriter.MaxFieldLength.UNLIMITED); // Force frequent flushes writer.SetMaxBufferedDocs(2); Document doc = new Document(); doc.Add(new Field("content", "aaa", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); for (int i = 0; i < 7; i++) { writer.AddDocument(doc); if (i % 2 == 0) { writer.Commit(); } } IndexCommit cp = dp.Snapshot(); CopyFiles(dir, cp); writer.Close(); CopyFiles(dir, cp); writer = new IndexWriter(dir, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), dp, IndexWriter.MaxFieldLength.UNLIMITED); CopyFiles(dir, cp); for (int i = 0; i < 7; i++) { writer.AddDocument(doc); if (i % 2 == 0) { writer.Commit(); } } CopyFiles(dir, cp); writer.Close(); CopyFiles(dir, cp); dp.Release(); writer = new IndexWriter(dir, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), dp, IndexWriter.MaxFieldLength.UNLIMITED); writer.Close(); Assert.Throws<System.IO.FileNotFoundException>(() => CopyFiles(dir, cp), "did not hit expected IOException"); dir.Close(); }
public override Document Get(Lucene.Net.Documents.Document input, float score, IState state) { if (FieldsToFetch.IsProjection) { return(GetProjection(input, score, null, state)); } using (_storageScope = _storageScope?.Start() ?? RetrieverScope?.For(nameof(QueryTimingsScope.Names.Storage))) { var doc = DirectGet(input, null, DocumentFields.All, state); if (doc != null) { doc.IndexScore = score; } return(doc); } }
} // End Function GetWrappedAnalyzer private static void BuildIndex(string indexPath, System.Collections.Generic.IEnumerable <string> dataToIndex) { Lucene.Net.Util.LuceneVersion version = Lucene.Net.Util.LuceneVersion.LUCENE_48; Lucene.Net.Store.Directory luceneIndexDirectory = Lucene.Net.Store.FSDirectory.Open(indexPath); // Lucene.Net.Analysis.Analyzer analyzer = new Lucene.Net.Analysis.Core.WhitespaceAnalyzer(version); // Lucene.Net.Analysis.Analyzer analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(version); // Lucene.Net.Analysis.Analyzer analyzer = new Lucene.Net.Analysis.Core.KeywordAnalyzer(); Lucene.Net.Analysis.Analyzer analyzer = GetWrappedAnalyzer(); Lucene.Net.Index.IndexWriterConfig writerConfig = new Lucene.Net.Index.IndexWriterConfig(version, analyzer); writerConfig.OpenMode = Lucene.Net.Index.OpenMode.CREATE; // Overwrite, if exists using (Lucene.Net.Index.IndexWriter writer = new Lucene.Net.Index.IndexWriter(luceneIndexDirectory, writerConfig)) { foreach (string thisValue in dataToIndex) { Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); string directory_name = System.IO.Path.GetDirectoryName(thisValue); string file_name = System.IO.Path.GetFileName(thisValue); string filename_no_extension = System.IO.Path.GetFileNameWithoutExtension(thisValue); string extension = System.IO.Path.GetExtension(thisValue); // StringField indexes but doesn't tokenize doc.Add(new Lucene.Net.Documents.StringField("full_name", thisValue, Lucene.Net.Documents.Field.Store.YES)); doc.Add(new Lucene.Net.Documents.StringField("directory_name", directory_name, Lucene.Net.Documents.Field.Store.YES)); doc.Add(new Lucene.Net.Documents.StringField("file_name", file_name, Lucene.Net.Documents.Field.Store.YES)); doc.Add(new Lucene.Net.Documents.StringField("filename_no_extension", filename_no_extension, Lucene.Net.Documents.Field.Store.YES)); doc.Add(new Lucene.Net.Documents.StringField("extension", extension, Lucene.Net.Documents.Field.Store.YES)); // doc.Add( new Lucene.Net.Documents.TextField("favoritePhrase", thisValue, Lucene.Net.Documents.Field.Store.YES) ); writer.AddDocument(doc); } // Next thisValue // writer.Optimize(); writer.Flush(true, true); } // Dispose needs to be called, otherwise the index cannot be read ... } // End Sub BuildIndex
private static void StartServer() { // construct an index RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true); Document doc = new Document(); doc.Add(Field.Text("test", "test text")); writer.AddDocument(doc); writer.Optimize(); writer.Close(); // publish it //// LocateRegistry.CreateRegistry(1099); // {{Aroush}} Lucene.Net.Search.Searchable local = new IndexSearcher(indexStore); RemoteSearchable impl = new RemoteSearchable(local); System.Runtime.Remoting.RemotingServices.Marshal(impl, "http://localhost/Searchable"); }
public virtual void DoTestDocument() { sis.Read(dir); IndexReader reader = OpenReader(); Assert.IsTrue(reader != null); Document newDoc1 = reader.Document(0); Assert.IsTrue(newDoc1 != null); Assert.IsTrue(DocHelper.NumFields(newDoc1) == DocHelper.NumFields(doc1) - DocHelper.unstored.Count); Document newDoc2 = reader.Document(1); Assert.IsTrue(newDoc2 != null); Assert.IsTrue(DocHelper.NumFields(newDoc2) == DocHelper.NumFields(doc2) - DocHelper.unstored.Count); TermFreqVector vector = reader.GetTermFreqVector(0, DocHelper.TEXT_FIELD_2_KEY); Assert.IsTrue(vector != null); TestSegmentReader.CheckNorms(reader); }
public virtual void SetUp() { //create a user index userindex = new RAMDirectory(); IndexWriter writer = new IndexWriter(userindex, new SimpleAnalyzer(), true); for (int i = 0; i < 1000; i++) { Document doc = new Document(); doc.Add(new Field("field1", English.IntToEnglish(i), Field.Store.YES, Field.Index.TOKENIZED)); doc.Add(new Field("field2", English.IntToEnglish(i + 1), Field.Store.YES, Field.Index.TOKENIZED)); // + word thousand writer.AddDocument(doc); } writer.Close(); // create the spellChecker spellindex = new RAMDirectory(); spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(spellindex); }
private void RunTest(Directory dir) { // Run for ~7 seconds long stopTime = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) + 7000; SnapshotDeletionPolicy dp = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()); IndexWriter writer = new IndexWriter(dir, true, new StandardAnalyzer(), dp); // Force frequent commits writer.SetMaxBufferedDocs(2); SupportClass.ThreadClass t = new AnonymousClassThread(stopTime, writer, this); t.Start(); // While the above indexing thread is running, take many // backups: while ((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) < stopTime) { BackupIndex(dir, dp); System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 20)); if (!t.IsAlive) { break; } } t.Join(); // Add one more document to force writer to commit a // final segment, so deletion policy has a chance to // delete again: Document doc = new Document(); doc.Add(new Field("content", "aaa", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); writer.AddDocument(doc); // Make sure we don't have any leftover files in the // directory: writer.Close(); TestIndexWriter.AssertNoUnreferencedFiles(dir, "some files were not deleted but should have been"); }
public virtual void TestDocBoost_() { RAMDirectory store = new RAMDirectory(); IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true); Field f1 = Field.Text("Field", "word"); Field f2 = Field.Text("Field", "word"); f2.SetBoost(2.0f); Document d1 = new Document(); Document d2 = new Document(); Document d3 = new Document(); Document d4 = new Document(); d3.SetBoost(3.0f); d4.SetBoost(2.0f); d1.Add(f1); // boost = 1 d2.Add(f2); // boost = 2 d3.Add(f1); // boost = 3 d4.Add(f2); // boost = 4 writer.AddDocument(d1); writer.AddDocument(d2); writer.AddDocument(d3); writer.AddDocument(d4); writer.Optimize(); writer.Close(); float[] scores = new float[4]; new IndexSearcher(store).Search(new TermQuery(new Term("Field", "word")), new AnonymousClassHitCollector(scores, this)); float lastScore = 0.0f; for (int i = 0; i < 4; i++) { Assert.IsTrue(scores[i] > lastScore); lastScore = scores[i]; } }
/// <summary> Test stored fields for a segment.</summary> private Status.StoredFieldStatus TestStoredFields(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format) { Status.StoredFieldStatus status = new Status.StoredFieldStatus(); try { if (infoStream != null) { infoStream.Write(" test: stored fields......."); } // Scan stored fields for all documents for (int j = 0; j < info.docCount; ++j) { if (!reader.IsDeleted(j)) { status.docCount++; Document doc = reader.Document(j); status.totFields += doc.GetFields().Count; } } // Validate docCount if (status.docCount != reader.NumDocs()) { throw new System.SystemException("docCount=" + status.docCount + " but saw " + status.docCount + " undeleted docs"); } Msg(string.Format(format, "OK [{0:d} total field count; avg {1:f} fields per doc]", new object[] { status.totFields, (((float)status.totFields) / status.docCount) })); } catch (System.Exception e) { Msg("ERROR [" + System.Convert.ToString(e.Message) + "]"); status.error = e; if (infoStream != null) { infoStream.WriteLine(e.StackTrace); } } return(status); }
public virtual IndexSearcher GetSpanNotSearcher() { RAMDirectory directory = new RAMDirectory(); PayloadAnalyzer analyzer = new PayloadAnalyzer(this); IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); writer.SetSimilarity(similarity); Document doc = new Document(); doc.Add(new Field(PayloadHelper.FIELD, "one two three one four three", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Close(); IndexSearcher searcher = new IndexSearcher(directory, true); searcher.Similarity = similarity; return(searcher); }
public virtual void SetUp() { //create a user index userindex = new RAMDirectory(); IndexWriter writer = new IndexWriter(userindex, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); for (int i = 0; i < 1000; i++) { Document doc = new Document(); doc.Add(new Field("field1", English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("field2", English.IntToEnglish(i + 1), Field.Store.YES, Field.Index.ANALYZED)); // + word thousand writer.AddDocument(doc); } writer.Close(); // create the spellChecker spellindex = new RAMDirectory(); searchers = ArrayList.Synchronized(new ArrayList()); spellChecker = new SpellCheckerMock(spellindex, this); }