public override Query Rewrite(IndexReader reader) { Query orig = new RegexQuery(term).Rewrite(reader); // RegexQuery (via MultiTermQuery).rewrite always returns a BooleanQuery BooleanQuery bq = (BooleanQuery)orig; BooleanClause[] clauses = bq.GetClauses(); SpanQuery[] sqs = new SpanQuery[clauses.Length]; for (int i = 0; i < clauses.Length; i++) { BooleanClause clause = clauses[i]; // Clauses from RegexQuery.rewrite are always TermQuery's TermQuery tq = (TermQuery)clause.GetQuery(); sqs[i] = new SpanTermQuery(tq.GetTerm()); sqs[i].SetBoost(tq.GetBoost()); } SpanOrQuery query = new SpanOrQuery(sqs); query.SetBoost(orig.GetBoost()); return(query); }
public virtual void TestGetValuesForIndexedDocument() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true); writer.AddDocument(MakeDocumentWithFields()); writer.Close(); Searcher searcher = new IndexSearcher(dir); // search for something that does exists Query query = new TermQuery(new Term("keyword", "test1")); // ensure that queries return expected results without DateFilter first Hits hits = searcher.Search(query); Assert.AreEqual(1, hits.Length()); try { DoAssert(hits.Doc(0), true); } catch (System.Exception e) { System.Console.Error.WriteLine(e.StackTrace); System.Console.Error.Write("\n"); } finally { searcher.Close(); } }
/// <summary> Add a clause to a boolean query.</summary> private static void Add(BooleanQuery q, System.String k, System.String v, float boost) { Query tq = new TermQuery(new Term(k, v)); tq.SetBoost(boost); q.Add(new BooleanClause(tq, BooleanClause.Occur.SHOULD)); }
public virtual void TestAfterClose() { Directory dir1 = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); writer.SetInfoStream(infoStream); // create the index CreateIndexNoClose(false, "test", writer); IndexReader r = writer.GetReader(); writer.Close(); _TestUtil.CheckIndex(dir1); // reader should remain usable even after IndexWriter is closed: Assert.AreEqual(100, r.NumDocs()); Query q = new TermQuery(new Term("indexname", "test")); Assert.AreEqual(100, new IndexSearcher(r).Search(q, 10).totalHits); try { r.Reopen(); Assert.Fail("failed to hit AlreadyClosedException"); } catch (AlreadyClosedException ace) { // expected } r.Close(); dir1.Close(); }
public virtual void TestDifferentNumResults() { // test the collector w/ FacetRequests and different numResults DirectoryReader indexReader = DirectoryReader.Open(indexDir); var taxoReader = new DirectoryTaxonomyReader(taxoDir); IndexSearcher searcher = NewSearcher(indexReader); FacetsCollector sfc = new FacetsCollector(); TermQuery q = new TermQuery(A); searcher.Search(q, sfc); Facets facets = GetTaxonomyFacetCounts(taxoReader, Config, sfc); FacetResult result = facets.GetTopChildren(NUM_CHILDREN_CP_A, CP_A); Assert.AreEqual(-1, (int)result.Value); foreach (LabelAndValue labelValue in result.LabelValues) { Assert.AreEqual(termExpectedCounts[CP_A + "/" + labelValue.label], labelValue.value); } result = facets.GetTopChildren(NUM_CHILDREN_CP_B, CP_B); Assert.AreEqual(termExpectedCounts[CP_B], result.Value); foreach (LabelAndValue labelValue in result.LabelValues) { Assert.AreEqual(termExpectedCounts[CP_B + "/" + labelValue.label], labelValue.value); } IOUtils.Close(indexReader, taxoReader); }
public virtual void TestDuringAddDelete() { Directory dir1 = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); writer.SetInfoStream(infoStream); writer.SetMergeFactor(2); // create the index CreateIndexNoClose(false, "test", writer); writer.Commit(); IndexReader r = writer.GetReader(); int NUM_THREAD = 5; float SECONDS = 3; long endTime = (long)((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) + 1000.0 * SECONDS); System.Collections.IList excs = (System.Collections.IList)System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(new System.Collections.ArrayList())); SupportClass.ThreadClass[] threads = new SupportClass.ThreadClass[NUM_THREAD]; for (int i = 0; i < NUM_THREAD; i++) { threads[i] = new AnonymousClassThread1(endTime, writer, excs, this); threads[i].IsBackground = true; threads[i].Start(); } int sum = 0; while ((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) < endTime) { IndexReader r2 = r.Reopen(); if (r2 != r) { r.Close(); r = r2; } Query q = new TermQuery(new Term("indexname", "test")); sum += new IndexSearcher(r).Search(q, 10).totalHits; } for (int i = 0; i < NUM_THREAD; i++) { threads[i].Join(); } Assert.IsTrue(sum > 0); Assert.AreEqual(0, excs.Count); writer.Close(); _TestUtil.CheckIndex(dir1); r.Close(); dir1.Close(); }
public virtual void TestFieldSetValue() { Field field = new Field("id", "id1", Field.Store.YES, Field.Index.NOT_ANALYZED); Document doc = new Document(); doc.Add(field); doc.Add(new Field("keyword", "test", Field.Store.YES, Field.Index.NOT_ANALYZED)); RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.AddDocument(doc); field.SetValue("id2"); writer.AddDocument(doc); field.SetValue("id3"); writer.AddDocument(doc); writer.Close(); Searcher searcher = new IndexSearcher(dir); Query query = new TermQuery(new Term("keyword", "test")); // ensure that queries return expected results without DateFilter first ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); int result = 0; for (int i = 0; i < 3; i++) { Document doc2 = searcher.Doc(hits[i].doc); Field f = doc2.GetField("id"); if (f.StringValue().Equals("id1")) { result |= 1; } else if (f.StringValue().Equals("id2")) { result |= 2; } else if (f.StringValue().Equals("id3")) { result |= 4; } else { Assert.Fail("unexpected id field"); } } searcher.Close(); dir.Close(); Assert.AreEqual(7, result, "did not see all IDs"); }
public bool CheckDocExist(OfficeData officeData) { Lucene.Net.Search.Query query1 = new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("FileName", officeData.FileName)); Lucene.Net.Search.Query query2 = new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("LastWriteTime", officeData.LastWriteTime)); Lucene.Net.Search.BooleanQuery query3 = new Lucene.Net.Search.BooleanQuery(); query3.Add(query1, Lucene.Net.Search.Occur.MUST); query3.Add(query2, Lucene.Net.Search.Occur.MUST); Lucene.Net.Search.TopDocs topDocs = searcher.Search(query3, 2); if (topDocs.TotalHits == 0) { return(false); } return(true); }
override public void Run() { IndexSearcher searcher = null; Query query = new TermQuery(new Term("content", "aaa")); for (int i = 0; i < this.numIteration; i++) { try { searcher = new IndexSearcher(dir); } catch (System.Exception e) { hitException = true; System.Console.Out.WriteLine("Stress Test Index Searcher: create hit unexpected exception: " + e.ToString()); System.Console.Out.WriteLine(e.StackTrace); break; } if (searcher != null) { ScoreDoc[] hits = null; try { hits = searcher.Search(query, null, 1000).scoreDocs; } catch (System.IO.IOException e) { hitException = true; System.Console.Out.WriteLine("Stress Test Index Searcher: search hit unexpected exception: " + e.ToString()); System.Console.Out.WriteLine(e.StackTrace); break; } // System.out.println(hits.length() + " total results"); try { searcher.Close(); } catch (System.IO.IOException e) { hitException = true; System.Console.Out.WriteLine("Stress Test Index Searcher: close hit unexpected exception: " + e.ToString()); System.Console.Out.WriteLine(e.StackTrace); break; } searcher = null; } } }
private bool videoExistsInIndex(string id, Lucene.Net.Store.Directory index) { bool exist = false; Lucene.Net.Search.TermQuery termQuery = new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("bctid", id)); Lucene.Net.Search.Searcher termSearcher = new Lucene.Net.Search.IndexSearcher(index, true); Lucene.Net.Search.TopScoreDocCollector termCollector = Lucene.Net.Search.TopScoreDocCollector.Create(1, true); termSearcher.Search(termQuery, termCollector); int termResults = termCollector.TopDocs().TotalHits; if (termResults > 0) { exist = true; } return(exist); }
/// <summary> Create the More like query from a PriorityQueue</summary> private Query CreateQuery(PriorityQueue q) { BooleanQuery query = new BooleanQuery(); System.Object cur; int qterms = 0; float bestScore = 0; while (((cur = q.Pop()) != null)) { System.Object[] ar = (System.Object[])cur; TermQuery tq = new TermQuery(new Term((System.String)ar[1], (System.String)ar[0])); if (boost) { if (qterms == 0) { bestScore = (float)((System.Single)ar[2]); } float myScore = (float)((System.Single)ar[2]); tq.SetBoost(myScore / bestScore); } try { query.Add(tq, BooleanClause.Occur.SHOULD); } catch (BooleanQuery.TooManyClauses) { break; } qterms++; if (maxQueryTerms > 0 && qterms >= maxQueryTerms) { break; } } return(query); }
public static bool PreviouslyIndexed(string url) { string indexFileLocation = indexDir; Lucene.Net.Store.Directory dir = Lucene.Net.Store.FSDirectory.GetDirectory(indexFileLocation, false); Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(dir); Lucene.Net.Search.Hits hits = null; try { Lucene.Net.Search.Query query = new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("url", url)); hits = searcher.Search(query); } catch { } finally { searcher.Close(); } return hits.Length() > 0; }
public virtual void TestGetValuesForIndexedDocument() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.AddDocument(MakeDocumentWithFields()); writer.Close(); Searcher searcher = new IndexSearcher(dir); // search for something that does exists Query query = new TermQuery(new Term("keyword", "test1")); // ensure that queries return expected results without DateFilter first ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); DoAssert(searcher.Doc(hits[0].doc), true); searcher.Close(); }
public static List<IndexedItem> SearchProjects(string s) { List<IndexedItem> retVal = new List<IndexedItem>(); string indexFileLocation = indexDir; Lucene.Net.Store.Directory dir = Lucene.Net.Store.FSDirectory.GetDirectory(indexFileLocation, false); Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(dir); try { Lucene.Net.Search.Query query = new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("content", s)); query = query.Combine(new Lucene.Net.Search.Query[] { query, new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("url", fromUrl)) }); query = query.Combine(new Lucene.Net.Search.Query[] { query, new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("title", s)) }); //execute the query Lucene.Net.Search.Hits hits = searcher.Search(query); //iterate over the results. for (int i = 0; i < hits.Length(); i++) { Lucene.Net.Documents.Document doc = hits.Doc(i); string article = doc.Get("content"); string title = doc.Get("title"); string url = doc.Get("url"); retVal.Add(new IndexedItem { Article = article, Href = url, Title = title }); } foreach (IndexedItem ind in retVal) { Console.WriteLine(ind.Href); } retVal = retVal.Distinct().ToList(); } catch { } finally { searcher.Close(); } return retVal; }
void LUCENENET_100_ClientSearch() { try { Lucene.Net.Search.Searchable s = (Lucene.Net.Search.Searchable)Activator.GetObject(typeof(Lucene.Net.Search.Searchable), @"tcp://localhost:38087/Searcher"); Lucene.Net.Search.MultiSearcher searcher = new Lucene.Net.Search.MultiSearcher(new Lucene.Net.Search.Searchable[] { s }); Lucene.Net.Search.Query q = new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("field1", "moon")); Lucene.Net.Search.Sort sort = new Lucene.Net.Search.Sort(); sort.SetSort(new Lucene.Net.Search.SortField("field2", Lucene.Net.Search.SortField.INT)); Lucene.Net.Search.TopDocs h = searcher.Search(q, null, 100, sort); } catch (Exception ex) { LUCENENET_100_Exception = ex; } finally { LUCENENET_100_testFinished = true; } }
public string Visit_WithValidTermQuery_ReturnsValidReponse() { var query = new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("City", "TelAviv")); var phraseQuery = new LuceneTermQuery { LuceneQuery = query, }; var luceneVisitor = new LuceneVisitor(); phraseQuery.Accept(luceneVisitor); var es = phraseQuery.ESQuery; Assert.NotNull(es); var visitor = VisitorTestsUtils.CreateAndVisitRootVisitor(); visitor.Visit((QueryStringClause)es); return(((QueryStringClause)es).KustoQL); }
public string Visit_WithValidTermQuery_ReturnsValidReponse() { var query = new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("City", "TelAviv")); var phraseQuery = new LuceneTermQuery { LuceneQuery = query, }; var luceneVisitor = new LuceneVisitor(); phraseQuery.Accept(luceneVisitor); var es = phraseQuery.ESQuery; Assert.NotNull(es); var visitor = new ElasticSearchDSLVisitor(SchemaRetrieverMock.CreateMockSchemaRetriever()); visitor.Visit((QueryStringClause)es); return(((QueryStringClause)es).KustoQL); }
public void Query(string searchText) { //state the file location of the index DirectoryInfo directoryInfo = new DirectoryInfo(_Location); Lucene.Net.Store.Directory dir = Lucene.Net.Store.FSDirectory.Open(directoryInfo); //create an index searcher that will perform the search Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(dir, true); //build a query object Lucene.Net.Index.Term searchTerm = new Lucene.Net.Index.Term("content", searchText); Lucene.Net.Search.Query query = new Lucene.Net.Search.TermQuery(searchTerm); //execute the query Lucene.Net.Search.TopDocs hits = searcher.Search(query, null, 100); //iterate over the results. for (int i = 0; i < hits.TotalHits; i++) { Lucene.Net.Search.ScoreDoc doc = hits.ScoreDocs[i]; } }
/// <summary> Simple similarity query generators. /// Takes every unique word and forms a boolean query where all words are optional. /// After you get this you'll use to to query your <see cref="IndexSearcher"/> for similar docs. /// The only caveat is the first hit returned <b>should be</b> your source document - you'll /// need to then ignore that. /// /// <p/> /// /// So, if you have a code fragment like this: /// <br/> /// <code> /// Query q = formSimilaryQuery( "I use Lucene to search fast. Fast searchers are good", new StandardAnalyzer(), "contents", null); /// </code> /// /// <p/> /// The query returned, in string form, will be <c>'(i use lucene to search fast searchers are good')</c>. /// /// <p/> /// The philosophy behind this method is "two documents are similar if they share lots of words". /// Note that behind the scenes, Lucenes scoring algorithm will tend to give two documents a higher similarity score if the share more uncommon words. /// /// <P/> /// This method is fail-safe in that if a long 'body' is passed in and /// <see cref="BooleanQuery.Add(BooleanClause)"/> (used internally) /// throws /// <see cref="BooleanQuery.TooManyClauses"/>, the /// query as it is will be returned. /// /// /// /// /// /// </summary> /// <param name="body">the body of the document you want to find similar documents to /// </param> /// <param name="a">the analyzer to use to parse the body /// </param> /// <param name="field">the field you want to search on, probably something like "contents" or "body" /// </param> /// <param name="stop">optional set of stop words to ignore /// </param> /// <returns> a query with all unique words in 'body' /// </returns> /// <throws> IOException this can't happen... </throws> public static Query FormSimilarQuery(System.String body, Analyzer a, System.String field, System.Collections.Hashtable stop) { TokenStream ts = a.TokenStream(field, new System.IO.StringReader(body)); Lucene.Net.Analysis.Token t; BooleanQuery tmp = new BooleanQuery(); System.Collections.Hashtable already = new System.Collections.Hashtable(); // ignore dups while ((t = ts.Next()) != null) { System.String word = t.TermText(); // ignore opt stop words if (stop != null && stop.Contains(word)) { continue; } // ignore dups if (already.Contains(word) == true) { continue; } already.Add(word, word); // add to query TermQuery tq = new TermQuery(new Term(field, word)); try { tmp.Add(tq, BooleanClause.Occur.SHOULD); //false, false); } catch (BooleanQuery.TooManyClauses) { // fail-safe, just return what we have, not the end of the world break; } } return(tmp); }
public virtual void TestGetValuesForIndexedDocument() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.AddDocument(MakeDocumentWithFields()); writer.Close(); Searcher searcher = new IndexSearcher(dir); // search for something that does exists Query query = new TermQuery(new Term("keyword", "test1")); // ensure that queries return expected results without DateFilter first ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); DoAssert(searcher.Doc(hits[0].doc), true); searcher.Close(); }
public virtual void TestDuringAddIndexes_LuceneNet() { MockRAMDirectory dir1 = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); writer.SetInfoStream(infoStream); writer.SetMergeFactor(2); // create the index CreateIndexNoClose(false, "test", writer); writer.Commit(); Directory[] dirs = new Directory[10]; for (int i = 0; i < 10; i++) { dirs[i] = new MockRAMDirectory(dir1); } IndexReader r = writer.GetReader(); int NUM_THREAD = 5; float SECONDS = 3; long endTime = (long)((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) + 1000.0 * SECONDS); System.Collections.IList excs = (System.Collections.IList)System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(new System.Collections.ArrayList())); SupportClass.ThreadClass[] threads = new SupportClass.ThreadClass[NUM_THREAD]; for (int i = 0; i < NUM_THREAD; i++) { threads[i] = new AnonymousClassThread(endTime, writer, dirs, excs, this); threads[i].IsBackground = true; threads[i].Start(); } int lastCount = 0; while ((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) < endTime) { using (IndexReader r2 = writer.GetReader()) { Query q = new TermQuery(new Term("indexname", "test")); int count = new IndexSearcher(r2).Search(q, 10).TotalHits; Assert.IsTrue(count >= lastCount); lastCount = count; } } for (int i = 0; i < NUM_THREAD; i++) { threads[i].Join(); } Assert.AreEqual(0, excs.Count); r.Close(); Assert.AreEqual(0, dir1.GetOpenDeletedFiles().Count); writer.Close(); _TestUtil.CheckIndex(dir1); dir1.Close(); }
public virtual void TestAfterClose() { Directory dir1 = GetAssertNoDeletesDirectory(NewDirectory()); IndexWriter writer = new IndexWriter(dir1, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); // create the index CreateIndexNoClose(false, "test", writer); DirectoryReader r = writer.Reader; writer.Dispose(); TestUtil.CheckIndex(dir1); // reader should remain usable even after IndexWriter is closed: Assert.AreEqual(100, r.NumDocs); Query q = new TermQuery(new Term("indexname", "test")); IndexSearcher searcher = NewSearcher(r); Assert.AreEqual(100, searcher.Search(q, 10).TotalHits); try { DirectoryReader.OpenIfChanged(r); Assert.Fail("failed to hit AlreadyClosedException"); } catch (AlreadyClosedException ace) { // expected } r.Dispose(); dir1.Dispose(); }
public virtual void TestMaxSizeHighlightTruncates() { System.String goodWord = "goodtoken"; System.String[] stopWords = new System.String[]{"stoppedtoken"}; TermQuery query = new TermQuery(new Term("data", goodWord)); SimpleHTMLFormatter fm = new SimpleHTMLFormatter(); Highlighter hg = new Highlighter(fm, new QueryScorer(query)); hg.SetTextFragmenter(new NullFragmenter()); System.String match = null; System.Text.StringBuilder sb = new System.Text.StringBuilder(); sb.Append(goodWord); for (int i = 0; i < 10000; i++) { sb.Append(" "); sb.Append(stopWords[0]); } hg.SetMaxDocBytesToAnalyze(100); match = hg.GetBestFragment(new StandardAnalyzer(stopWords), "data", sb.ToString()); Assert.IsTrue(match.Length < hg.GetMaxDocBytesToAnalyze(), "Matched text should be no more than 100 chars in length "); //add another tokenized word to the overrall length - but set way beyond //the length of text under consideration (after a large slug of stop words + whitespace) sb.Append(" "); sb.Append(goodWord); match = hg.GetBestFragment(new StandardAnalyzer(stopWords), "data", sb.ToString()); Assert.IsTrue(match.Length < hg.GetMaxDocBytesToAnalyze(), "Matched text should be no more than 100 chars in length "); }
public virtual void TestFarsiRangeFilterCollating(Analyzer analyzer, BytesRef firstBeg, BytesRef firstEnd, BytesRef secondBeg, BytesRef secondEnd) { Directory dir = NewDirectory(); IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); doc.Add(new TextField("content", "\u0633\u0627\u0628", Field.Store.YES)); doc.Add(new StringField("body", "body", Field.Store.YES)); writer.AddDocument(doc); writer.Dispose(); IndexReader reader = DirectoryReader.Open(dir); IndexSearcher searcher = new IndexSearcher(reader); Query query = new TermQuery(new Term("body", "body")); // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi // orders the U+0698 character before the U+0633 character, so the single // index Term below should NOT be returned by a TermRangeFilter with a Farsi // Collator (or an Arabic one for the case when Farsi searcher not // supported). ScoreDoc[] result = searcher.Search(query, new TermRangeFilter("content", firstBeg, firstEnd, true, true), 1).ScoreDocs; Assert.AreEqual(0, result.Length, "The index Term should not be included."); result = searcher.Search(query, new TermRangeFilter("content", secondBeg, secondEnd, true, true), 1).ScoreDocs; Assert.AreEqual(1, result.Length, "The index Term should be included."); reader.Dispose(); dir.Dispose(); }
public virtual void TestDuringAddIndexes() { Directory dir1 = GetAssertNoDeletesDirectory(NewDirectory()); IndexWriter writer = new IndexWriter(dir1, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy(2))); // create the index CreateIndexNoClose(false, "test", writer); writer.Commit(); Directory[] dirs = new Directory[10]; for (int i = 0; i < 10; i++) { dirs[i] = new MockDirectoryWrapper(Random(), new RAMDirectory(dir1, NewIOContext(Random()))); } DirectoryReader r = writer.Reader; const float SECONDS = 0.5f; long endTime = (long)(Environment.TickCount + 1000.0 * SECONDS); IList<Exception> excs = new SynchronizedCollection<Exception>(); // Only one thread can addIndexes at a time, because // IndexWriter acquires a write lock in each directory: var threads = new ThreadClass[1]; for (int i = 0; i < threads.Length; i++) { threads[i] = new ThreadAnonymousInnerClassHelper(writer, dirs, endTime, excs); threads[i].SetDaemon(true); threads[i].Start(); } int lastCount = 0; while (Environment.TickCount < endTime) { DirectoryReader r2 = DirectoryReader.OpenIfChanged(r); if (r2 != null) { r.Dispose(); r = r2; } Query q = new TermQuery(new Term("indexname", "test")); IndexSearcher searcher = NewSearcher(r); int count = searcher.Search(q, 10).TotalHits; Assert.IsTrue(count >= lastCount); lastCount = count; } for (int i = 0; i < threads.Length; i++) { threads[i].Join(); } // final check DirectoryReader dr2 = DirectoryReader.OpenIfChanged(r); if (dr2 != null) { r.Dispose(); r = dr2; } Query q2 = new TermQuery(new Term("indexname", "test")); IndexSearcher searcher_ = NewSearcher(r); int count_ = searcher_.Search(q2, 10).TotalHits; Assert.IsTrue(count_ >= lastCount); Assert.AreEqual(0, excs.Count); r.Dispose(); if (dir1 is MockDirectoryWrapper) { ICollection<string> openDeletedFiles = ((MockDirectoryWrapper)dir1).OpenDeletedFiles; Assert.AreEqual(0, openDeletedFiles.Count, "openDeleted=" + openDeletedFiles); } writer.Dispose(); dir1.Dispose(); }
public virtual void TestAfterClose() { Directory dir1 = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); writer.SetInfoStream(infoStream); // create the index CreateIndexNoClose(false, "test", writer); IndexReader r = writer.GetReader(); writer.Close(); _TestUtil.CheckIndex(dir1); // reader should remain usable even after IndexWriter is closed: Assert.AreEqual(100, r.NumDocs()); Query q = new TermQuery(new Term("indexname", "test")); Assert.AreEqual(100, new IndexSearcher(r).Search(q, 10).TotalHits); Assert.Throws<AlreadyClosedException>(() => r.Reopen(), "failed to hit AlreadyClosedException"); r.Close(); dir1.Close(); }
public virtual void TestBasic() { Directory dir = new MockRAMDirectory(); Analyzer analyzer = new StandardAnalyzer(); IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMergeFactor(2); writer.SetMaxBufferedDocs(2); writer.SetSimilarity(new SimpleSimilarity()); System.Text.StringBuilder sb = new System.Text.StringBuilder(265); System.String term = "term"; for (int i = 0; i < 30; i++) { Document d = new Document(); sb.Append(term).Append(" "); System.String content = sb.ToString(); Field noTf = new Field("noTf", content + (i % 2 == 0?"":" notf"), Field.Store.NO, Field.Index.ANALYZED); noTf.SetOmitTermFreqAndPositions(true); d.Add(noTf); Field tf = new Field("tf", content + (i % 2 == 0?" tf":""), Field.Store.NO, Field.Index.ANALYZED); d.Add(tf); writer.AddDocument(d); //System.out.println(d); } writer.Optimize(); // flush writer.Close(); _TestUtil.CheckIndex(dir); /* * Verify the index */ Searcher searcher = new IndexSearcher(dir); searcher.SetSimilarity(new SimpleSimilarity()); Term a = new Term("noTf", term); Term b = new Term("tf", term); Term c = new Term("noTf", "notf"); Term d2 = new Term("tf", "tf"); TermQuery q1 = new TermQuery(a); TermQuery q2 = new TermQuery(b); TermQuery q3 = new TermQuery(c); TermQuery q4 = new TermQuery(d2); searcher.Search(q1, new AnonymousClassCountingHitCollector(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q2, new AnonymousClassCountingHitCollector1(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q3, new AnonymousClassCountingHitCollector2(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q4, new AnonymousClassCountingHitCollector3(this)); //System.out.println(CountingHitCollector.getCount()); BooleanQuery bq = new BooleanQuery(); bq.Add(q1, Occur.MUST); bq.Add(q4, Occur.MUST); searcher.Search(bq, new AnonymousClassCountingHitCollector4(this)); Assert.IsTrue(15 == CountingHitCollector.GetCount()); searcher.Close(); dir.Close(); }
public virtual void TestDuringAddIndexes_LuceneNet() { MockRAMDirectory dir1 = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); writer.SetInfoStream(infoStream); writer.MergeFactor = 2; // create the index CreateIndexNoClose(false, "test", writer); writer.Commit(); Directory[] dirs = new Directory[10]; for (int i = 0; i < 10; i++) { dirs[i] = new MockRAMDirectory(dir1); } IndexReader r = writer.GetReader(); int NUM_THREAD = 5; float SECONDS = 3; long endTime = (long)((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) + 1000.0 * SECONDS); System.Collections.IList excs = (System.Collections.IList)System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(new System.Collections.ArrayList())); ThreadClass[] threads = new ThreadClass[NUM_THREAD]; for (int i = 0; i < NUM_THREAD; i++) { threads[i] = new AnonymousClassThread(endTime, writer, dirs, excs, this); threads[i].IsBackground = true; threads[i].Start(); } int lastCount = 0; while ((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) < endTime) { using (IndexReader r2 = writer.GetReader()) { Query q = new TermQuery(new Term("indexname", "test")); int count = new IndexSearcher(r2).Search(q, 10).TotalHits; Assert.IsTrue(count >= lastCount); lastCount = count; } } for (int i = 0; i < NUM_THREAD; i++) { threads[i].Join(); } Assert.AreEqual(0, excs.Count); r.Close(); Assert.AreEqual(0, dir1.GetOpenDeletedFiles().Count); writer.Close(); _TestUtil.CheckIndex(dir1); dir1.Close(); }
public virtual void TestDuringAddDelete() { Directory dir1 = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); writer.SetInfoStream(infoStream); writer.MergeFactor = 2; // create the index CreateIndexNoClose(false, "test", writer); writer.Commit(); IndexReader r = writer.GetReader(); int NUM_THREAD = 5; float SECONDS = 3; long endTime = (long) ((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) + 1000.0 * SECONDS); System.Collections.IList excs = (System.Collections.IList) System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(new System.Collections.ArrayList())); ThreadClass[] threads = new ThreadClass[NUM_THREAD]; for (int i = 0; i < NUM_THREAD; i++) { threads[i] = new AnonymousClassThread1(endTime, writer, excs, this); threads[i].IsBackground = true; threads[i].Start(); } int sum = 0; while ((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) < endTime) { IndexReader r2 = r.Reopen(); if (r2 != r) { r.Close(); r = r2; } Query q = new TermQuery(new Term("indexname", "test")); sum += new IndexSearcher(r).Search(q, 10).TotalHits; } for (int i = 0; i < NUM_THREAD; i++) { threads[i].Join(); } Assert.IsTrue(sum > 0); Assert.AreEqual(0, excs.Count); writer.Close(); _TestUtil.CheckIndex(dir1); r.Close(); dir1.Close(); }
public virtual void TestSegmentsWithoutCategoriesOrResults() { // tests the accumulator when there are segments with no results var indexDir = NewDirectory(); var taxoDir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); //iwc.MergePolicy = NoMergePolicy.INSTANCE; // prevent merges IndexWriter indexWriter = new IndexWriter(indexDir, iwc); var taxoWriter = new DirectoryTaxonomyWriter(taxoDir); FacetsConfig config = new FacetsConfig(); indexTwoDocs(taxoWriter, indexWriter, config, false); // 1st segment, no content, with categories indexTwoDocs(taxoWriter, indexWriter, null, true); // 2nd segment, with content, no categories indexTwoDocs(taxoWriter, indexWriter, config, true); // 3rd segment ok indexTwoDocs(taxoWriter, indexWriter, null, false); // 4th segment, no content, or categories indexTwoDocs(taxoWriter, indexWriter, null, true); // 5th segment, with content, no categories indexTwoDocs(taxoWriter, indexWriter, config, true); // 6th segment, with content, with categories indexTwoDocs(taxoWriter, indexWriter, null, true); // 7th segment, with content, no categories IOUtils.Close(indexWriter, taxoWriter); DirectoryReader indexReader = DirectoryReader.Open(indexDir); var taxoReader = new DirectoryTaxonomyReader(taxoDir); IndexSearcher indexSearcher = NewSearcher(indexReader); // search for "f:a", only segments 1 and 3 should match results Query q = new TermQuery(new Term("f", "a")); FacetsCollector sfc = new FacetsCollector(); indexSearcher.Search(q, sfc); Facets facets = GetTaxonomyFacetCounts(taxoReader, config, sfc); FacetResult result = facets.GetTopChildren(10, "A"); Assert.AreEqual(2, result.LabelValues.Length, "wrong number of children"); foreach (LabelAndValue labelValue in result.LabelValues) { Assert.AreEqual(2, (int)labelValue.value, "wrong weight for child " + labelValue.label); } IOUtils.Close(indexReader, taxoReader, indexDir, taxoDir); }
public virtual void TestScoring() { // verify that drill-down queries do not modify scores IndexSearcher searcher = NewSearcher(reader); float[] scores = new float[reader.MaxDoc]; Query q = new TermQuery(new Term("content", "foo")); TopDocs docs = searcher.Search(q, reader.MaxDoc); // fetch all available docs to this query foreach (ScoreDoc sd in docs.ScoreDocs) { scores[sd.Doc] = sd.Score; } // create a drill-down query with category "a", scores should not change DrillDownQuery q2 = new DrillDownQuery(config, q); q2.Add("a"); docs = searcher.Search(q2, reader.MaxDoc); // fetch all available docs to this query foreach (ScoreDoc sd in docs.ScoreDocs) { Assert.AreEqual(scores[sd.Doc], sd.Score, 0f, "score of doc=" + sd.Doc + " modified"); } }
public virtual void TestKeepLastNDeletionPolicyWithReader() { int N = 10; for (int pass = 0; pass < 2; pass++) { bool useCompoundFile = (pass % 2) != 0; KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(this, N); Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, policy, IndexWriter.MaxFieldLength.UNLIMITED); writer.UseCompoundFile = useCompoundFile; writer.Close(); Term searchTerm = new Term("content", "aaa"); Query query = new TermQuery(searchTerm); for (int i = 0; i < N + 1; i++) { writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, policy, IndexWriter.MaxFieldLength.UNLIMITED); writer.UseCompoundFile = useCompoundFile; for (int j = 0; j < 17; j++) { AddDoc(writer); } // this is a commit writer.Close(); IndexReader reader = IndexReader.Open(dir, policy, false); reader.DeleteDocument(3 * i + 1); reader.SetNorm(4 * i + 1, "content", 2.0F); IndexSearcher searcher = new IndexSearcher(reader); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(16 * (1 + i), hits.Length); // this is a commit reader.Close(); searcher.Close(); } writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, policy, IndexWriter.MaxFieldLength.UNLIMITED); writer.UseCompoundFile = useCompoundFile; writer.Optimize(); // this is a commit writer.Close(); Assert.AreEqual(2 * (N + 2), policy.numOnInit); Assert.AreEqual(2 * (N + 2) - 1, policy.numOnCommit); IndexSearcher searcher2 = new IndexSearcher(dir, false); ScoreDoc[] hits2 = searcher2.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(176, hits2.Length); // Simplistic check: just verify only the past N segments_N's still // exist, and, I can open a reader on each: long gen = SegmentInfos.GetCurrentSegmentGeneration(dir); dir.DeleteFile(IndexFileNames.SEGMENTS_GEN); int expectedCount = 176; for (int i = 0; i < N + 1; i++) { try { IndexReader reader = IndexReader.Open(dir, true); // Work backwards in commits on what the expected // count should be. searcher2 = new IndexSearcher(reader); hits2 = searcher2.Search(query, null, 1000).ScoreDocs; if (i > 1) { if (i % 2 == 0) { expectedCount += 1; } else { expectedCount -= 17; } } Assert.AreEqual(expectedCount, hits2.Length); searcher2.Close(); reader.Close(); if (i == N) { Assert.Fail("should have failed on commits before last 5"); } } catch (System.IO.IOException e) { if (i != N) { throw e; } } if (i < N) { dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); } gen--; } dir.Close(); } }
public virtual void TestDuringAddIndexes() { MockRAMDirectory dir1 = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); writer.SetInfoStream(infoStream); writer.MergeFactor = 2; // create the index CreateIndexNoClose(false, "test", writer); writer.Commit(); Directory[] dirs = new Directory[10]; for (int i = 0; i < 10; i++) { dirs[i] = new MockRAMDirectory(dir1); } IndexReader r = writer.GetReader(); int NUM_THREAD = 5; float SECONDS = 3; long endTime = (long)((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) + 1000.0 * SECONDS); System.Collections.IList excs = (System.Collections.IList)System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(new System.Collections.ArrayList())); ThreadClass[] threads = new ThreadClass[NUM_THREAD]; for (int i = 0; i < NUM_THREAD; i++) { threads[i] = new AnonymousClassThread(endTime, writer, dirs, excs, this); threads[i].IsBackground = true; threads[i].Start(); } int lastCount = 0; while ((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) < endTime) { IndexReader r2 = r.Reopen(); if (r2 != r) { r.Close(); r = r2; } Query q = new TermQuery(new Term("indexname", "test")); int count = new IndexSearcher(r).Search(q, 10).TotalHits; Assert.IsTrue(count >= lastCount); lastCount = count; } for (int i = 0; i < NUM_THREAD; i++) { threads[i].Join(); } Assert.AreEqual(0, excs.Count); r.Close(); try { Assert.AreEqual(0, dir1.GetOpenDeletedFiles().Count); } catch { //DIGY: //I think this is an expected behaviour. //There isn't any pending files to be deleted after "writer.Close()". //But, since lucene.java's test case is designed that way //and I might be wrong, I will add a warning // Assert only in debug mode, so that CheckIndex is called during release. #if DEBUG Assert.Inconclusive("", 0, dir1.GetOpenDeletedFiles().Count); #endif } writer.Close(); _TestUtil.CheckIndex(dir1); dir1.Close(); }
private static void GetTermsFromTermQuery(TermQuery query, HashSetSupport terms, string fieldName) { if ((fieldName == null) || (query.GetTerm().Field() == (string)(object)fieldName)) { terms.Add(new WeightedTerm(query.GetBoost(), query.GetTerm().Text())); } }
public virtual void TestKeepLastNDeletionPolicyWithCreates() { int N = 10; for (int pass = 0; pass < 4; pass++) { bool autoCommit = pass < 2; bool useCompoundFile = (pass % 2) > 0; KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(this, N); Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); writer.SetMaxBufferedDocs(10); writer.SetUseCompoundFile(useCompoundFile); writer.Close(); Term searchTerm = new Term("content", "aaa"); Query query = new TermQuery(searchTerm); for (int i = 0; i < N + 1; i++) { writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy); writer.SetMaxBufferedDocs(10); writer.SetUseCompoundFile(useCompoundFile); for (int j = 0; j < 17; j++) { AddDoc(writer); } // this is a commit when autoCommit=false: writer.Close(); IndexReader reader = IndexReader.Open(dir, policy); reader.DeleteDocument(3); reader.SetNorm(5, "content", 2.0F); IndexSearcher searcher = new IndexSearcher(reader); ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(16, hits.Length); // this is a commit when autoCommit=false: reader.Close(); searcher.Close(); writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); // This will not commit: there are no changes // pending because we opened for "create": writer.Close(); } Assert.AreEqual(1 + 3 * (N + 1), policy.numOnInit); if (!autoCommit) { Assert.AreEqual(3 * (N + 1), policy.numOnCommit); } IndexSearcher searcher2 = new IndexSearcher(dir); ScoreDoc[] hits2 = searcher2.Search(query, null, 1000).scoreDocs; Assert.AreEqual(0, hits2.Length); // Simplistic check: just verify only the past N segments_N's still // exist, and, I can open a reader on each: long gen = SegmentInfos.GetCurrentSegmentGeneration(dir); dir.DeleteFile(IndexFileNames.SEGMENTS_GEN); int expectedCount = 0; for (int i = 0; i < N + 1; i++) { try { IndexReader reader = IndexReader.Open(dir); // Work backwards in commits on what the expected // count should be. Only check this in the // autoCommit false case: if (!autoCommit) { searcher2 = new IndexSearcher(reader); hits2 = searcher2.Search(query, null, 1000).scoreDocs; Assert.AreEqual(expectedCount, hits2.Length); searcher2.Close(); if (expectedCount == 0) { expectedCount = 16; } else if (expectedCount == 16) { expectedCount = 17; } else if (expectedCount == 17) { expectedCount = 0; } } reader.Close(); if (i == N) { Assert.Fail("should have failed on commits before last " + N); } } catch (System.IO.IOException e) { if (i != N) { throw e; } } if (i < N) { dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); } gen--; } dir.Close(); } }
public virtual void TestTerm() { Query query = new TermQuery(new Term("field", "seventy")); CheckHits(query, new int[] { 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 870, 871, 872, 873, 874, 875, 876, 877, 878, 879, 970, 971, 972, 973, 974, 975, 976, 977, 978, 979 }); }
public virtual void TestDuringAddDelete() { Directory dir1 = NewDirectory(); var writer = new IndexWriter(dir1, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy(2))); // create the index CreateIndexNoClose(false, "test", writer); writer.Commit(); DirectoryReader r = writer.Reader; const float SECONDS = 0.5f; long endTime = (long)(Environment.TickCount + 1000.0 * SECONDS); IList<Exception> excs = new SynchronizedCollection<Exception>(); var threads = new ThreadClass[NumThreads]; for (int i = 0; i < NumThreads; i++) { threads[i] = new ThreadAnonymousInnerClassHelper2(writer, r, endTime, excs); threads[i].SetDaemon(true); threads[i].Start(); } int sum = 0; while (Environment.TickCount < endTime) { DirectoryReader r2 = DirectoryReader.OpenIfChanged(r); if (r2 != null) { r.Dispose(); r = r2; } Query q = new TermQuery(new Term("indexname", "test")); IndexSearcher searcher = NewSearcher(r); sum += searcher.Search(q, 10).TotalHits; } for (int i = 0; i < NumThreads; i++) { threads[i].Join(); } // at least search once DirectoryReader dr2 = DirectoryReader.OpenIfChanged(r); if (dr2 != null) { r.Dispose(); r = dr2; } Query q2 = new TermQuery(new Term("indexname", "test")); IndexSearcher indSearcher = NewSearcher(r); sum += indSearcher.Search(q2, 10).TotalHits; Assert.IsTrue(sum > 0, "no documents found at all"); Assert.AreEqual(0, excs.Count); writer.Dispose(); r.Dispose(); dir1.Dispose(); }
public virtual void TestTerm2() { Query query = new TermQuery(new Term("field", "seventish")); CheckHits(query, new int[] {}); }
public virtual void TestOffByOne() { TermQuery query = new TermQuery(new Term("data", "help")); Highlighter hg = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(query)); hg.SetTextFragmenter(new NullFragmenter()); System.String match = null; match = hg.GetBestFragment(new StandardAnalyzer(), "data", "help me [54-65]"); Assert.AreEqual("<B>help</B> me [54-65]", match); }
public override void Run() { IndexReader reader = null; IndexSearcher searcher = null; Query query = new TermQuery(new Term("content", "aaa")); for (int i = 0; i < this.NumIteration; i++) { try { reader = DirectoryReader.Open(Dir); searcher = NewSearcher(reader); } catch (Exception e) { HitException = true; Console.WriteLine("Stress Test Index Searcher: create hit unexpected exception: " + e.ToString()); Console.Out.Write(e.StackTrace); break; } try { searcher.Search(query, null, 1000); } catch (IOException e) { HitException = true; Console.WriteLine("Stress Test Index Searcher: search hit unexpected exception: " + e.ToString()); Console.Out.Write(e.StackTrace); break; } // System.out.println(hits.Length() + " total results"); try { reader.Dispose(); } catch (IOException e) { HitException = true; Console.WriteLine("Stress Test Index Searcher: close hit unexpected exception: " + e.ToString()); Console.Out.Write(e.StackTrace); break; } } }
/// <summary> Simple similarity query generators. /// Takes every unique word and forms a boolean query where all words are optional. /// After you get this you'll use to to query your {@link IndexSearcher} for similar docs. /// The only caveat is the first hit returned <b>should be</b> your source document - you'll /// need to then ignore that. /// /// <p> /// /// So, if you have a code fragment like this: /// <br> /// <code> /// Query q = formSimilaryQuery( "I use Lucene to search fast. Fast searchers are good", new StandardAnalyzer(), "contents", null); /// </code> /// /// <p> /// /// </summary> /// <summary> The query returned, in string form, will be <code>'(i use lucene to search fast searchers are good')</code>. /// /// <p> /// The philosophy behind this method is "two documents are similar if they share lots of words". /// Note that behind the scenes, Lucenes scoring algorithm will tend to give two documents a higher similarity score if the share more uncommon words. /// /// <P> /// This method is fail-safe in that if a long 'body' is passed in and /// {@link BooleanQuery#add BooleanQuery.add()} (used internally) /// throws /// {@link org.apache.lucene.search.BooleanQuery.TooManyClauses BooleanQuery.TooManyClauses}, the /// query as it is will be returned. /// /// /// /// /// /// </summary> /// <param name="body">the body of the document you want to find similar documents to /// </param> /// <param name="a">the analyzer to use to parse the body /// </param> /// <param name="field">the field you want to search on, probably something like "contents" or "body" /// </param> /// <param name="stop">optional set of stop words to ignore /// </param> /// <returns> a query with all unique words in 'body' /// </returns> /// <throws> IOException this can't happen... </throws> public static Query FormSimilarQuery(System.String body, Analyzer a, System.String field, System.Collections.Hashtable stop) { TokenStream ts = a.TokenStream(field, new System.IO.StringReader(body)); TermAttribute termAtt = (TermAttribute)ts.AddAttribute(typeof(TermAttribute)); BooleanQuery tmp = new BooleanQuery(); System.Collections.Hashtable already = new System.Collections.Hashtable(); // ignore dups while (ts.IncrementToken()) { String word = termAtt.Term(); // ignore opt stop words if (stop != null && stop.Contains(word)) continue; // ignore dups if (already.Contains(word) == true) continue; already.Add(word, word); // add to query TermQuery tq = new TermQuery(new Term(field, word)); try { tmp.Add(tq, BooleanClause.Occur.SHOULD); } catch (BooleanQuery.TooManyClauses) { // fail-safe, just return what we have, not the end of the world break; } } return tmp; }
public virtual void TestFieldSetValue() { Field field = new Field("id", "id1", Field.Store.YES, Field.Index.NOT_ANALYZED); Document doc = new Document(); doc.Add(field); doc.Add(new Field("keyword", "test", Field.Store.YES, Field.Index.NOT_ANALYZED)); RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.AddDocument(doc); field.SetValue("id2"); writer.AddDocument(doc); field.SetValue("id3"); writer.AddDocument(doc); writer.Close(); Searcher searcher = new IndexSearcher(dir); Query query = new TermQuery(new Term("keyword", "test")); // ensure that queries return expected results without DateFilter first ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(3, hits.Length); int result = 0; for (int i = 0; i < 3; i++) { Document doc2 = searcher.Doc(hits[i].doc); Field f = doc2.GetField("id"); if (f.StringValue().Equals("id1")) result |= 1; else if (f.StringValue().Equals("id2")) result |= 2; else if (f.StringValue().Equals("id3")) result |= 4; else Assert.Fail("unexpected id field"); } searcher.Close(); dir.Close(); Assert.AreEqual(7, result, "did not see all IDs"); }
public static Lucene.Net.Search.Query ConvertQueryToLuceneQuery(Query query) { if (query == null) { throw new ArgumentNullException("query"); } Lucene.Net.Search.Query lQuery; if (query is MatchAllDocsQuery) { var lMatchAllDocsQuery = new Lucene.Net.Search.MatchAllDocsQuery(); lQuery = lMatchAllDocsQuery; } else if (query is TermQuery) { var termQuery = query as TermQuery; var term = Term.ConvertToLuceneTerm(termQuery.Term); lQuery = new Lucene.Net.Search.TermQuery(term); } else if (query is TermRangeQuery) { var termRangeQuery = query as TermRangeQuery; var lTermRangeQuery = new Lucene.Net.Search.TermRangeQuery(termRangeQuery.FieldName, termRangeQuery.LowerTerm, termRangeQuery.UpperTerm, termRangeQuery.LowerInclusive, termRangeQuery.UpperInclusive); lQuery = lTermRangeQuery; } else if (query is PhraseQuery) { var phraseQuery = query as PhraseQuery; var lPhraseQuery = new Lucene.Net.Search.PhraseQuery(); foreach (var term in phraseQuery.Terms) { var lTerm = Term.ConvertToLuceneTerm(term); lPhraseQuery.Add(lTerm); } if (phraseQuery.Slop.HasValue) { lPhraseQuery.Slop = phraseQuery.Slop.Value; } lQuery = lPhraseQuery; } else if (query is PrefixQuery) { var prefixQuery = query as PrefixQuery; var term = Term.ConvertToLuceneTerm(prefixQuery.Term); var lPrefixQuery = new Lucene.Net.Search.PrefixQuery(term); lQuery = lPrefixQuery; } else if (query is RegexQuery) { var regexQuery = query as RegexQuery; var term = Term.ConvertToLuceneTerm(regexQuery.Term); var lRegexQuery = new Contrib.Regex.RegexQuery(term); lQuery = lRegexQuery; } else if (query is FuzzyQuery) { var fuzzyQuery = query as FuzzyQuery; var term = Term.ConvertToLuceneTerm(fuzzyQuery.Term); var lFuzzyQuery = new Lucene.Net.Search.FuzzyQuery(term); lQuery = lFuzzyQuery; } else if (query is BooleanQuery) { var booleanQuery = query as BooleanQuery; var lBooleanQuery = new Lucene.Net.Search.BooleanQuery(); foreach (var clause in booleanQuery.Clauses) { var lNestedQuery = Query.ConvertQueryToLuceneQuery(clause.Query); Lucene.Net.Search.Occur lOccur; switch (clause.Occur) { case Occur.Must: lOccur = Lucene.Net.Search.Occur.MUST; break; case Occur.MustNot: lOccur = Lucene.Net.Search.Occur.MUST_NOT; break; case Occur.Should: lOccur = Lucene.Net.Search.Occur.SHOULD; break; default: throw new InvalidOperationException("Occur not implemented or defined."); } var lClause = new Lucene.Net.Search.BooleanClause(lNestedQuery, lOccur); lBooleanQuery.Add(lClause); } if (booleanQuery.MinimumNumberShouldMatch.HasValue) { lBooleanQuery.MinimumNumberShouldMatch = booleanQuery.MinimumNumberShouldMatch.Value; } lQuery = lBooleanQuery; } else if (query is WildcardQuery) { var wildcardQuery = query as WildcardQuery; var lTerm = Term.ConvertToLuceneTerm(wildcardQuery.Term); var lWildcardQuery = new Lucene.Net.Search.WildcardQuery(lTerm); lQuery = lWildcardQuery; } else if (query is DoubleNumericRangeQuery) { var doubleNumericRangeQuery = query as DoubleNumericRangeQuery; var ldoubleNumericRangeQuery = Lucene.Net.Search.NumericRangeQuery.NewDoubleRange( doubleNumericRangeQuery.FieldName, doubleNumericRangeQuery.Min, doubleNumericRangeQuery.Max, doubleNumericRangeQuery.MinInclusive, doubleNumericRangeQuery.MaxInclusive); lQuery = ldoubleNumericRangeQuery; } else if (query is FloatNumericRangeQuery) { var floatNumericRangeQuery = query as FloatNumericRangeQuery; var lfloatNumericRangeQuery = Lucene.Net.Search.NumericRangeQuery.NewFloatRange( floatNumericRangeQuery.FieldName, floatNumericRangeQuery.Min, floatNumericRangeQuery.Max, floatNumericRangeQuery.MinInclusive, floatNumericRangeQuery.MaxInclusive); lQuery = lfloatNumericRangeQuery; } else if (query is IntNumericRangeQuery) { var intNumericRangeQuery = query as IntNumericRangeQuery; var lintNumericRangeQuery = Lucene.Net.Search.NumericRangeQuery.NewIntRange( intNumericRangeQuery.FieldName, intNumericRangeQuery.Min, intNumericRangeQuery.Max, intNumericRangeQuery.MinInclusive, intNumericRangeQuery.MaxInclusive); lQuery = lintNumericRangeQuery; } else if (query is LongNumericRangeQuery) { var longNumericRangeQuery = query as LongNumericRangeQuery; var llongNumericRangeQuery = Lucene.Net.Search.NumericRangeQuery.NewLongRange( longNumericRangeQuery.FieldName, longNumericRangeQuery.Min, longNumericRangeQuery.Max, longNumericRangeQuery.MinInclusive, longNumericRangeQuery.MaxInclusive); lQuery = llongNumericRangeQuery; } else if (query is QueryParserQuery) { var queryParserQuery = query as QueryParserQuery; var queryParser = new Lucene.Net.QueryParsers.QueryParser(Version.LUCENE_30, queryParserQuery.DefaultField, new StandardAnalyzer(Version.LUCENE_30)) { AllowLeadingWildcard = queryParserQuery.AllowLeadingWildcard }; lQuery = queryParser.Parse(queryParserQuery.Query); } else if (query is MultiFieldQueryParserQuery) { var multiFieldQueryParserQuery = query as MultiFieldQueryParserQuery; if (multiFieldQueryParserQuery.FieldNames == null) { multiFieldQueryParserQuery.FieldNames = new List <string>(); } var queryParser = new Lucene.Net.QueryParsers.MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, multiFieldQueryParserQuery.FieldNames.ToArray(), new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30)); lQuery = queryParser.Parse(multiFieldQueryParserQuery.Query); } else { throw new ArgumentException(@"Unknown or invalid query object", "query"); } if (query.Boost.HasValue) { lQuery.Boost = query.Boost.Value; } return(lQuery); }
/// <summary> Create the More like query from a PriorityQueue</summary> private Query CreateQuery(PriorityQueue q) { BooleanQuery query = new BooleanQuery(); System.Object cur; int qterms = 0; float bestScore = 0; while (((cur = q.Pop()) != null)) { System.Object[] ar = (System.Object[]) cur; TermQuery tq = new TermQuery(new Term((System.String) ar[1], (System.String) ar[0])); if (boost) { if (qterms == 0) { bestScore = (float) ((System.Single) ar[2]); } float myScore = (float) ((System.Single) ar[2]); tq.SetBoost(myScore / bestScore); } try { query.Add(tq, BooleanClause.Occur.SHOULD); } catch (BooleanQuery.TooManyClauses ignore) { break; } qterms++; if (maxQueryTerms > 0 && qterms >= maxQueryTerms) { break; } } return query; }
public virtual void TestTwoFieldsTwoFormats() { Analyzer analyzer = new MockAnalyzer(Random()); Directory directory = NewDirectory(); // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1 IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); DocValuesFormat fast = DocValuesFormat.ForName("Lucene45"); DocValuesFormat slow = DocValuesFormat.ForName("Lucene45"); iwc.SetCodec(new Lucene46CodecAnonymousInnerClassHelper(this, fast, slow)); IndexWriter iwriter = new IndexWriter(directory, iwc); Document doc = new Document(); string longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm"; string text = "this is the text to be indexed. " + longTerm; doc.Add(NewTextField("fieldname", text, Field.Store.YES)); doc.Add(new NumericDocValuesField("dv1", 5)); doc.Add(new BinaryDocValuesField("dv2", new BytesRef("hello world"))); iwriter.AddDocument(doc); iwriter.Dispose(); // Now search the index: IndexReader ireader = DirectoryReader.Open(directory); // read-only=true IndexSearcher isearcher = NewSearcher(ireader); Assert.AreEqual(1, isearcher.Search(new TermQuery(new Term("fieldname", longTerm)), 1).TotalHits); Query query = new TermQuery(new Term("fieldname", "text")); TopDocs hits = isearcher.Search(query, null, 1); Assert.AreEqual(1, hits.TotalHits); BytesRef scratch = new BytesRef(); // Iterate through the results: for (int i = 0; i < hits.ScoreDocs.Length; i++) { Document hitDoc = isearcher.Doc(hits.ScoreDocs[i].Doc); Assert.AreEqual(text, hitDoc.Get("fieldname")); Debug.Assert(ireader.Leaves.Count == 1); NumericDocValues dv = ((AtomicReader)ireader.Leaves[0].Reader).GetNumericDocValues("dv1"); Assert.AreEqual(5, dv.Get(hits.ScoreDocs[i].Doc)); BinaryDocValues dv2 = ((AtomicReader)ireader.Leaves[0].Reader).GetBinaryDocValues("dv2"); dv2.Get(hits.ScoreDocs[i].Doc, scratch); Assert.AreEqual(new BytesRef("hello world"), scratch); } ireader.Dispose(); directory.Dispose(); }
public virtual void TestQueryImplicitDefaultParams() { IndexSearcher searcher = NewSearcher(reader); // Create the base query to start with DrillDownQuery q = new DrillDownQuery(config); q.Add("a"); // Making sure the query yields 5 documents with the facet "b" and the // previous (facet "a") query as a base query DrillDownQuery q2 = new DrillDownQuery(config, q); q2.Add("b"); TopDocs docs = searcher.Search(q2, 100); Assert.AreEqual(5, docs.TotalHits); // Check that content:foo (which yields 50% results) and facet/b (which yields 20%) // would gather together 10 results (10%..) Query fooQuery = new TermQuery(new Term("content", "foo")); DrillDownQuery q4 = new DrillDownQuery(config, fooQuery); q4.Add("b"); docs = searcher.Search(q4, 100); Assert.AreEqual(10, docs.TotalHits); }
public virtual void TestRandomSampling() { Directory dir = NewDirectory(); Directory taxoDir = NewDirectory(); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); FacetsConfig config = new FacetsConfig(); int numDocs = AtLeast(10000); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.Add(new StringField("EvenOdd", (i % 2 == 0) ? "even" : "odd", Store.NO)); doc.Add(new FacetField("iMod10", Convert.ToString(i % 10))); writer.AddDocument(config.Build(taxoWriter, doc)); } Random random = Random(); // NRT open IndexSearcher searcher = NewSearcher(writer.Reader); var taxoReader = new DirectoryTaxonomyReader(taxoWriter); IOUtils.Close(writer, taxoWriter); // Test empty results RandomSamplingFacetsCollector collectRandomZeroResults = new RandomSamplingFacetsCollector(numDocs / 10, random.NextLong()); // There should be no divisions by zero searcher.Search(new TermQuery(new Term("EvenOdd", "NeverMatches")), collectRandomZeroResults); // There should be no divisions by zero and no null result Assert.NotNull(collectRandomZeroResults.GetMatchingDocs); // There should be no results at all foreach (MatchingDocs doc in collectRandomZeroResults.GetMatchingDocs) { Assert.AreEqual(0, doc.totalHits); } // Now start searching and retrieve results. // Use a query to select half of the documents. TermQuery query = new TermQuery(new Term("EvenOdd", "even")); // there will be 5 facet values (0, 2, 4, 6 and 8), as only the even (i % // 10) are hits. // there is a REAL small chance that one of the 5 values will be missed when // sampling. // but is that 0.8 (chance not to take a value) ^ 2000 * 5 (any can be // missing) ~ 10^-193 // so that is probably not going to happen. int maxNumChildren = 5; RandomSamplingFacetsCollector random100Percent = new RandomSamplingFacetsCollector(numDocs, random.NextLong()); // no sampling RandomSamplingFacetsCollector random10Percent = new RandomSamplingFacetsCollector(numDocs / 10, random.NextLong()); // 10 % of total docs, 20% of the hits FacetsCollector fc = new FacetsCollector(); searcher.Search(query, MultiCollector.Wrap(fc, random100Percent, random10Percent)); FastTaxonomyFacetCounts random10FacetCounts = new FastTaxonomyFacetCounts(taxoReader, config, random10Percent); FastTaxonomyFacetCounts random100FacetCounts = new FastTaxonomyFacetCounts(taxoReader, config, random100Percent); FastTaxonomyFacetCounts exactFacetCounts = new FastTaxonomyFacetCounts(taxoReader, config, fc); FacetResult random10Result = random10Percent.AmortizeFacetCounts(random10FacetCounts.GetTopChildren(10, "iMod10"), config, searcher); FacetResult random100Result = random100FacetCounts.GetTopChildren(10, "iMod10"); FacetResult exactResult = exactFacetCounts.GetTopChildren(10, "iMod10"); Assert.AreEqual(random100Result, exactResult); // we should have five children, but there is a small chance we have less. // (see above). Assert.True(random10Result.ChildCount <= maxNumChildren); // there should be one child at least. Assert.True(random10Result.ChildCount >= 1); // now calculate some statistics to determine if the sampled result is 'ok'. // because random sampling is used, the results will vary each time. int sum = 0; foreach (LabelAndValue lav in random10Result.LabelValues) { sum += (int)lav.value; } float mu = (float)sum / (float)maxNumChildren; float variance = 0; foreach (LabelAndValue lav in random10Result.LabelValues) { variance += (float)Math.Pow((mu - (int)lav.value), 2); } variance = variance / maxNumChildren; float sigma = (float)Math.Sqrt(variance); // we query only half the documents and have 5 categories. The average // number of docs in a category will thus be the total divided by 5*2 float targetMu = numDocs / (5.0f * 2.0f); // the average should be in the range and the standard deviation should not // be too great Assert.True(sigma < 200); Assert.True(targetMu - 3 * sigma < mu && mu < targetMu + 3 * sigma); IOUtils.Close(searcher.IndexReader, taxoReader, dir, taxoDir); }
public virtual void TestRandomSearchPerformance() { IndexSearcher searcher = new IndexSearcher(Reader); foreach (Term t in SampleTerms) { TermQuery query = new TermQuery(t); TopDocs topDocs = searcher.Search(query, 10); Assert.IsTrue(topDocs.TotalHits > 0); } }
public virtual void TestKeepLastNDeletionPolicyWithCreates() { int N = 10; for (int pass = 0; pass < 4; pass++) { bool autoCommit = pass < 2; bool useCompoundFile = (pass % 2) > 0; KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(this, N); Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); writer.SetMaxBufferedDocs(10); writer.SetUseCompoundFile(useCompoundFile); writer.Close(); Term searchTerm = new Term("content", "aaa"); Query query = new TermQuery(searchTerm); for (int i = 0; i < N + 1; i++) { writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy); writer.SetMaxBufferedDocs(10); writer.SetUseCompoundFile(useCompoundFile); for (int j = 0; j < 17; j++) { AddDoc(writer); } // this is a commit when autoCommit=false: writer.Close(); IndexReader reader = IndexReader.Open(dir, policy); reader.DeleteDocument(3); reader.SetNorm(5, "content", 2.0F); IndexSearcher searcher = new IndexSearcher(reader); Hits hits = searcher.Search(query); Assert.AreEqual(16, hits.Length()); // this is a commit when autoCommit=false: reader.Close(); searcher.Close(); writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); // This will not commit: there are no changes // pending because we opened for "create": writer.Close(); } Assert.AreEqual(1 + 3 * (N + 1), policy.numOnInit); if (autoCommit) { Assert.IsTrue(policy.numOnCommit > 3 * (N + 1) - 1); } else { Assert.AreEqual(2 * (N + 1), policy.numOnCommit); } IndexSearcher searcher2 = new IndexSearcher(dir); Hits hits2 = searcher2.Search(query); Assert.AreEqual(0, hits2.Length()); // Simplistic check: just verify only the past N segments_N's still // exist, and, I can open a reader on each: long gen = SegmentInfos.GetCurrentSegmentGeneration(dir); dir.DeleteFile(IndexFileNames.SEGMENTS_GEN); int expectedCount = 0; for (int i = 0; i < N + 1; i++) { try { IndexReader reader = IndexReader.Open(dir); // Work backwards in commits on what the expected // count should be. Only check this in the // autoCommit false case: if (!autoCommit) { searcher2 = new IndexSearcher(reader); hits2 = searcher2.Search(query); Assert.AreEqual(expectedCount, hits2.Length()); searcher2.Close(); if (expectedCount == 0) { expectedCount = 16; } else if (expectedCount == 16) { expectedCount = 17; } else if (expectedCount == 17) { expectedCount = 0; } } reader.Close(); if (i == N) { Assert.Fail("should have failed on commits before last " + N); } } catch (System.IO.IOException e) { if (i != N) { throw e; } } if (i < N) { dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); } gen--; } dir.Close(); } }
public virtual void TestDuringAddIndexes() { MockRAMDirectory dir1 = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); writer.SetInfoStream(infoStream); writer.MergeFactor = 2; // create the index CreateIndexNoClose(false, "test", writer); writer.Commit(); Directory[] dirs = new Directory[10]; for (int i = 0; i < 10; i++) { dirs[i] = new MockRAMDirectory(dir1); } IndexReader r = writer.GetReader(); int NUM_THREAD = 5; float SECONDS = 3; long endTime = (long) ((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) + 1000.0 * SECONDS); System.Collections.IList excs = (System.Collections.IList) System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(new System.Collections.ArrayList())); ThreadClass[] threads = new ThreadClass[NUM_THREAD]; for (int i = 0; i < NUM_THREAD; i++) { threads[i] = new AnonymousClassThread(endTime, writer, dirs, excs, this); threads[i].IsBackground = true; threads[i].Start(); } int lastCount = 0; while ((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) < endTime) { IndexReader r2 = r.Reopen(); if (r2 != r) { r.Close(); r = r2; } Query q = new TermQuery(new Term("indexname", "test")); int count = new IndexSearcher(r).Search(q, 10).TotalHits; Assert.IsTrue(count >= lastCount); lastCount = count; } for (int i = 0; i < NUM_THREAD; i++) { threads[i].Join(); } Assert.AreEqual(0, excs.Count); r.Close(); try { Assert.AreEqual(0, dir1.GetOpenDeletedFiles().Count); } catch { //DIGY: //I think this is an expected behaviour. //There isn't any pending files to be deleted after "writer.Close()". //But, since lucene.java's test case is designed that way //and I might be wrong, I will add a warning // Assert only in debug mode, so that CheckIndex is called during release. #if DEBUG Assert.Inconclusive("", 0, dir1.GetOpenDeletedFiles().Count); #endif } writer.Close(); _TestUtil.CheckIndex(dir1); dir1.Close(); }
// Test using various international locales with accented characters (which // sort differently depending on locale) // // Copied (and slightly modified) from // Lucene.Net.Search.TestSort.testInternationalSort() // // TODO: this test is really fragile. there are already 3 different cases, // depending upon unicode version. public virtual void TestCollationKeySort(Analyzer usAnalyzer, Analyzer franceAnalyzer, Analyzer swedenAnalyzer, Analyzer denmarkAnalyzer, string usResult, string frResult, string svResult, string dkResult) { Directory indexStore = NewDirectory(); IndexWriter writer = new IndexWriter(indexStore, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false))); // document data: // the tracer field is used to determine which document was hit string[][] sortData = new string[][] { new string[] { "A", "x", "p\u00EAche", "p\u00EAche", "p\u00EAche", "p\u00EAche" }, new string[] { "B", "y", "HAT", "HAT", "HAT", "HAT" }, new string[] { "C", "x", "p\u00E9ch\u00E9", "p\u00E9ch\u00E9", "p\u00E9ch\u00E9", "p\u00E9ch\u00E9" }, new string[] { "D", "y", "HUT", "HUT", "HUT", "HUT" }, new string[] { "E", "x", "peach", "peach", "peach", "peach" }, new string[] { "F", "y", "H\u00C5T", "H\u00C5T", "H\u00C5T", "H\u00C5T" }, new string[] { "G", "x", "sin", "sin", "sin", "sin" }, new string[] { "H", "y", "H\u00D8T", "H\u00D8T", "H\u00D8T", "H\u00D8T" }, new string[] { "I", "x", "s\u00EDn", "s\u00EDn", "s\u00EDn", "s\u00EDn" }, new string[] { "J", "y", "HOT", "HOT", "HOT", "HOT" } }; FieldType customType = new FieldType(); customType.Stored = true; for (int i = 0; i < sortData.Length; ++i) { Document doc = new Document(); doc.Add(new Field("tracer", sortData[i][0], customType)); doc.Add(new TextField("contents", sortData[i][1], Field.Store.NO)); if (sortData[i][2] != null) { doc.Add(new TextField("US", usAnalyzer.TokenStream("US", new StringReader(sortData[i][2])))); } if (sortData[i][3] != null) { doc.Add(new TextField("France", franceAnalyzer.TokenStream("France", new StringReader(sortData[i][3])))); } if (sortData[i][4] != null) { doc.Add(new TextField("Sweden", swedenAnalyzer.TokenStream("Sweden", new StringReader(sortData[i][4])))); } if (sortData[i][5] != null) { doc.Add(new TextField("Denmark", denmarkAnalyzer.TokenStream("Denmark", new StringReader(sortData[i][5])))); } writer.AddDocument(doc); } writer.ForceMerge(1); writer.Dispose(); IndexReader reader = DirectoryReader.Open(indexStore); IndexSearcher searcher = new IndexSearcher(reader); Sort sort = new Sort(); Query queryX = new TermQuery(new Term("contents", "x")); Query queryY = new TermQuery(new Term("contents", "y")); sort.SetSort(new SortField("US", SortField.Type_e.STRING)); AssertMatches(searcher, queryY, sort, usResult); sort.SetSort(new SortField("France", SortField.Type_e.STRING)); AssertMatches(searcher, queryX, sort, frResult); sort.SetSort(new SortField("Sweden", SortField.Type_e.STRING)); AssertMatches(searcher, queryY, sort, svResult); sort.SetSort(new SortField("Denmark", SortField.Type_e.STRING)); AssertMatches(searcher, queryY, sort, dkResult); reader.Dispose(); indexStore.Dispose(); }