private void CreateRandomIndexes(int maxSegments) { dir = NewDirectory(); numDocs = AtLeast(150); int numTerms = TestUtil.NextInt(Random(), 1, numDocs / 5); ISet<string> randomTerms = new HashSet<string>(); while (randomTerms.size() < numTerms) { randomTerms.add(TestUtil.RandomSimpleString(Random())); } terms = new List<string>(randomTerms); int seed = Random().Next(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random(seed))); iwc.SetMergePolicy(TestSortingMergePolicy.NewSortingMergePolicy(sort)); iw = new RandomIndexWriter(new Random(seed), dir, iwc); for (int i = 0; i < numDocs; ++i) { Document doc = RandomDocument(); iw.AddDocument(doc); if (i == numDocs / 2 || (i != numDocs - 1 && Random().nextInt(8) == 0)) { iw.Commit(); } if (Random().nextInt(15) == 0) { string term = RandomInts.RandomFrom(Random(), terms); iw.DeleteDocuments(new Term("s", term)); } } reader = iw.Reader; }
public override void SetUp() { base.SetUp(); PayloadHelper helper = new PayloadHelper(); Searcher_Renamed = helper.SetUp(Random(), Similarity, 1000); IndexReader = Searcher_Renamed.IndexReader; }
public void BeforeClass() { Dir = NewDirectory(); Sdir1 = NewDirectory(); Sdir2 = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, new MockAnalyzer(Random()), Similarity, TimeZone); RandomIndexWriter swriter1 = new RandomIndexWriter(Random(), Sdir1, new MockAnalyzer(Random()), Similarity, TimeZone); RandomIndexWriter swriter2 = new RandomIndexWriter(Random(), Sdir2, new MockAnalyzer(Random()), Similarity, TimeZone); for (int i = 0; i < 10; i++) { Document doc = new Document(); doc.Add(NewStringField("data", Convert.ToString(i), Field.Store.NO)); writer.AddDocument(doc); ((i % 2 == 0) ? swriter1 : swriter2).AddDocument(doc); } writer.ForceMerge(1); swriter1.ForceMerge(1); swriter2.ForceMerge(1); writer.Dispose(); swriter1.Dispose(); swriter2.Dispose(); Reader = DirectoryReader.Open(Dir); Searcher = NewSearcher(Reader); MultiReader = new MultiReader(new IndexReader[] { DirectoryReader.Open(Sdir1), DirectoryReader.Open(Sdir2) }, true); MultiSearcher = NewSearcher(MultiReader); MultiReaderDupls = new MultiReader(new IndexReader[] { DirectoryReader.Open(Sdir1), DirectoryReader.Open(Dir) }, true); MultiSearcherDupls = NewSearcher(MultiReaderDupls); }
public override void SetUp() { base.SetUp(); Dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000))); Document doc = new Document(); Field field = NewStringField("field", "", Field.Store.NO); doc.Add(field); NumberFormatInfo df = new NumberFormatInfo(); df.NumberDecimalDigits = 0; //NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ROOT)); for (int i = 0; i < 1000; i++) { field.StringValue = i.ToString(df); writer.AddDocument(doc); } Reader = writer.Reader; Searcher = NewSearcher(Reader); writer.Dispose(); if (VERBOSE) { Console.WriteLine("TEST: setUp searcher=" + Searcher); } }
public override void SetUp() { base.SetUp(); Dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000))); Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_STORED); customType.OmitNorms = true; Field field = NewField("field", "", customType); doc.Add(field); NumberFormatInfo df = new NumberFormatInfo(); df.NumberDecimalDigits = 0; //NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ROOT)); for (int i = 0; i < 1000; i++) { field.StringValue = i.ToString(df); writer.AddDocument(doc); } Reader = writer.Reader; writer.Dispose(); Searcher = NewSearcher(Reader); }
public void BeforeClass() { Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, Similarity, TimeZone); Document doc = new Document(); Field field = NewStringField(FIELD, "meaninglessnames", Field.Store.NO); doc.Add(field); for (int i = 0; i < 5137; ++i) { writer.AddDocument(doc); } field.StringValue = "tangfulin"; writer.AddDocument(doc); field.StringValue = "meaninglessnames"; for (int i = 5138; i < 11377; ++i) { writer.AddDocument(doc); } field.StringValue = "tangfulin"; writer.AddDocument(doc); Reader = writer.Reader; Searcher = NewSearcher(Reader); writer.Dispose(); }
public override void SetUp() { base.SetUp(); INDEX_SIZE = AtLeast(2000); Index = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Index); RandomGen random = new RandomGen(this, Random()); for (int i = 0; i < INDEX_SIZE; ++i) // don't decrease; if to low the { // problem doesn't show up Document doc = new Document(); if ((i % 5) != 0) // some documents must not have an entry in the first { // sort field doc.Add(NewStringField("publicationDate_", random.LuceneDate, Field.Store.YES)); } if ((i % 7) == 0) // some documents to match the query (see below) { doc.Add(NewTextField("content", "test", Field.Store.YES)); } // every document has a defined 'mandant' field doc.Add(NewStringField("mandant", Convert.ToString(i % 3), Field.Store.YES)); writer.AddDocument(doc); } Reader = writer.Reader; writer.Dispose(); Query = new TermQuery(new Term("content", "test")); }
/// <summary> /// Initializes a new instance of the <see cref="SearchResults" /> class. /// </summary> /// <param name="searcher">The searcher.</param> /// <param name="reader">The reader.</param> /// <param name="docs">The hits.</param> /// <param name="criteria">The criteria.</param> /// <param name="query">The query.</param> public LuceneSearchResults( Searcher searcher, IndexReader reader, TopDocs docs, ISearchCriteria criteria, Query query) { Results = new SearchResults(criteria, null); CreateDocuments(searcher, docs); CreateFacets(reader, query); CreateSuggestions(reader, criteria); }
public static void AfterClass() { s = null; r.Dispose(); r = null; Index.Dispose(); Index = null; }
internal MatchAllScorer(MatchAllDocsQuery outerInstance, IndexReader reader, Bits liveDocs, Weight w, float score) : base(w) { this.OuterInstance = outerInstance; this.LiveDocs = liveDocs; this.Score_Renamed = score; MaxDoc = reader.MaxDoc; }
public static void AfterClass() { Searcher = null; Reader.Dispose(); Reader = null; Directory.Dispose(); Directory = null; }
public static void BeforeClass() { Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy())); for (int i = 0; i < DocFields.Length; i++) { Document doc = new Document(); doc.Add(NewTextField(field, DocFields[i], Field.Store.NO)); writer.AddDocument(doc); } writer.Dispose(); LittleReader = DirectoryReader.Open(Directory); Searcher = NewSearcher(LittleReader); // this is intentionally using the baseline sim, because it compares against bigSearcher (which uses a random one) Searcher.Similarity = new DefaultSimilarity(); // Make big index Dir2 = new MockDirectoryWrapper(Random(), new RAMDirectory(Directory, IOContext.DEFAULT)); // First multiply small test index: MulFactor = 1; int docCount = 0; if (VERBOSE) { Console.WriteLine("\nTEST: now copy index..."); } do { if (VERBOSE) { Console.WriteLine("\nTEST: cycle..."); } Directory copy = new MockDirectoryWrapper(Random(), new RAMDirectory(Dir2, IOContext.DEFAULT)); RandomIndexWriter w = new RandomIndexWriter(Random(), Dir2); w.AddIndexes(copy); docCount = w.MaxDoc(); w.Dispose(); MulFactor *= 2; } while (docCount < 3000); RandomIndexWriter riw = new RandomIndexWriter(Random(), Dir2, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000))); Document doc_ = new Document(); doc_.Add(NewTextField("field2", "xxx", Field.Store.NO)); for (int i = 0; i < NUM_EXTRA_DOCS / 2; i++) { riw.AddDocument(doc_); } doc_ = new Document(); doc_.Add(NewTextField("field2", "big bad bug", Field.Store.NO)); for (int i = 0; i < NUM_EXTRA_DOCS / 2; i++) { riw.AddDocument(doc_); } Reader = riw.Reader; BigSearcher = NewSearcher(Reader); riw.Dispose(); }
public static void AfterClassDrillDownQueryTest() { IOUtils.Close(reader, taxo, dir, taxoDir); reader = null; taxo = null; dir = null; taxoDir = null; config = null; }
// TODO: this should be setUp().... public virtual void CreateDummySearcher() { // Create a dummy index with nothing in it. // this could possibly fail if Lucene starts checking for docid ranges... d = NewDirectory(); IndexWriter iw = new IndexWriter(d, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); iw.AddDocument(new Document()); iw.Dispose(); r = DirectoryReader.Open(d); s = NewSearcher(r); }
public override void SetUp() { base.SetUp(); Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, Similarity, TimeZone); Document doc = new Document(); doc.Add(NewTextField(FN, "the quick brown fox jumps over the lazy ??? dog 493432 49344", Field.Store.NO)); writer.AddDocument(doc); Reader = writer.Reader; writer.Dispose(); Searcher = NewSearcher(Reader); }
public override void SetUp() { base.SetUp(); // create test index MDirectory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), MDirectory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)).SetMergePolicy(NewLogMergePolicy()).SetSimilarity(new DefaultSimilarity())); AddDocument(writer, "1", "I think it should work."); AddDocument(writer, "2", "I think it should work."); AddDocument(writer, "3", "I think it should work."); AddDocument(writer, "4", "I think it should work."); Reader = writer.Reader; writer.Dispose(); Searcher = NewSearcher(Reader); Searcher.Similarity = new DefaultSimilarity(); }
public TaxonomyIndexArrays(IndexReader reader) { parents = new int[reader.MaxDoc]; if (parents.Length > 0) { InitParents(reader, 0); // Starting Lucene 2.9, following the change LUCENE-1542, we can // no longer reliably read the parent "-1" (see comment in // LuceneTaxonomyWriter.SinglePositionTokenStream). We have no way // to fix this in indexing without breaking backward-compatibility // with existing indexes, so what we'll do instead is just // hard-code the parent of ordinal 0 to be -1, and assume (as is // indeed the case) that no other parent can be -1. parents[0] = TaxonomyReader.INVALID_ORDINAL; } }
public void BeforeClass() { Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy())); writer.AddDocument(Doc(new Field[] { GetField("id", "0"), GetField("gender", "male"), GetField("first", "james"), GetField("last", "jones") })); writer.AddDocument(Doc(new Field[] { GetField("id", "1"), GetField("gender", "male"), GetField("first", "james"), GetField("last", "smith"), GetField("gender", "female"), GetField("first", "sally"), GetField("last", "jones") })); writer.AddDocument(Doc(new Field[] { GetField("id", "2"), GetField("gender", "female"), GetField("first", "greta"), GetField("last", "jones"), GetField("gender", "female"), GetField("first", "sally"), GetField("last", "smith"), GetField("gender", "male"), GetField("first", "james"), GetField("last", "jones") })); writer.AddDocument(Doc(new Field[] { GetField("id", "3"), GetField("gender", "female"), GetField("first", "lisa"), GetField("last", "jones"), GetField("gender", "male"), GetField("first", "bob"), GetField("last", "costas") })); writer.AddDocument(Doc(new Field[] { GetField("id", "4"), GetField("gender", "female"), GetField("first", "sally"), GetField("last", "smith"), GetField("gender", "female"), GetField("first", "linda"), GetField("last", "dixit"), GetField("gender", "male"), GetField("first", "bubba"), GetField("last", "jones") })); Reader = writer.Reader; writer.Dispose(); Searcher = NewSearcher(Reader); }
public TaxonomyIndexArrays(IndexReader reader, TaxonomyIndexArrays copyFrom) { Debug.Assert(copyFrom != null); // note that copyParents.length may be equal to reader.maxDoc(). this is not a bug // it may be caused if e.g. the taxonomy segments were merged, and so an updated // NRT reader was obtained, even though nothing was changed. this is not very likely // to happen. int[] copyParents = copyFrom.Parents; this.parents = new int[reader.MaxDoc]; Array.Copy(copyParents, 0, parents, 0, copyParents.Length); InitParents(reader, copyParents.Length); if (copyFrom.initializedChildren) { InitChildrenSiblings(copyFrom); } }
public override void SetUp() { base.SetUp(); Directory = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), Directory, Similarity, TimeZone); Document doc = new Document(); Field field = NewTextField("field", "", Field.Store.NO); doc.Add(field); field.StringValue = "quick brown fox"; iw.AddDocument(doc); field.StringValue = "jumps over lazy broun dog"; iw.AddDocument(doc); field.StringValue = "jumps over extremely very lazy broxn dog"; iw.AddDocument(doc); Reader = iw.Reader; iw.Dispose(); Searcher = NewSearcher(Reader); }
public override void SetUp() { base.SetUp(); Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, Similarity, TimeZone); Document doc = new Document(); Field titleField = NewTextField("title", "some title", Field.Store.NO); Field field = NewTextField(FN, "", Field.Store.NO); Field footerField = NewTextField("footer", "a footer", Field.Store.NO); doc.Add(titleField); doc.Add(field); doc.Add(footerField); field.StringValue = "\uD866\uDF05abcdef"; writer.AddDocument(doc); field.StringValue = "\uD866\uDF06ghijkl"; writer.AddDocument(doc); // this sorts before the previous two in UTF-8/UTF-32, but after in UTF-16!!! field.StringValue = "\uFB94mnopqr"; writer.AddDocument(doc); field.StringValue = "\uFB95stuvwx"; // this one too. writer.AddDocument(doc); field.StringValue = "a\uFFFCbc"; writer.AddDocument(doc); field.StringValue = "a\uFFFDbc"; writer.AddDocument(doc); field.StringValue = "a\uFFFEbc"; writer.AddDocument(doc); field.StringValue = "a\uFB94bc"; writer.AddDocument(doc); field.StringValue = "bacadaba"; writer.AddDocument(doc); field.StringValue = "\uFFFD"; writer.AddDocument(doc); field.StringValue = "\uFFFD\uD866\uDF05"; writer.AddDocument(doc); field.StringValue = "\uFFFD\uFFFD"; writer.AddDocument(doc); Reader = writer.Reader; Searcher = NewSearcher(Reader); writer.Dispose(); }
public override Query Rewrite(IndexReader reader) { Query newQuery = baseQuery; while (true) { Query rewrittenQuery = newQuery.Rewrite(reader); if (rewrittenQuery == newQuery) { break; } newQuery = rewrittenQuery; } if (newQuery == baseQuery) { return this; } else { return new DrillSidewaysQuery(newQuery, drillDownCollector, drillSidewaysCollectors, drillDownQueries, scoreSubDocsAtOnce); } }
public override void SetUp() { base.SetUp(); Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, Similarity, TimeZone); Document doc = new Document(); Field titleField = NewTextField("title", "some title", Field.Store.NO); Field field = NewTextField(FN, "this is document one 2345", Field.Store.NO); Field footerField = NewTextField("footer", "a footer", Field.Store.NO); doc.Add(titleField); doc.Add(field); doc.Add(footerField); writer.AddDocument(doc); field.StringValue = "some text from doc two a short piece 5678.91"; writer.AddDocument(doc); field.StringValue = "doc three has some different stuff" + " with numbers 1234 5678.9 and letter b"; writer.AddDocument(doc); Reader = writer.Reader; Searcher = NewSearcher(Reader); writer.Dispose(); }
public override void SetUp() { base.SetUp(); Dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false)).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000))); Document doc = new Document(); Field field = NewStringField("field", "", Field.Store.NO); doc.Add(field); // we generate aweful prefixes: good for testing. // but for preflex codec, the test can be very slow, so use less iterations. string codec = Codec.Default.Name; int num = codec.Equals("Lucene3x") ? 200 * RANDOM_MULTIPLIER : AtLeast(1000); for (int i = 0; i < num; i++) { field.StringValue = TestUtil.RandomUnicodeString(Random(), 10); writer.AddDocument(doc); } Reader = writer.Reader; Searcher = NewSearcher(Reader); writer.Dispose(); }
public override void SetUp() { base.SetUp(); // Create an index writer. Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, Similarity, TimeZone); // oldest doc: // Add the first document. text = "Document 1" dateTime = Oct 10 03:25:22 EDT 2007 writer.AddDocument(CreateDocument("Document 1", 1192001122000L)); // Add the second document. text = "Document 2" dateTime = Oct 10 03:25:26 EDT 2007 writer.AddDocument(CreateDocument("Document 2", 1192001126000L)); // Add the third document. text = "Document 3" dateTime = Oct 11 07:12:13 EDT 2007 writer.AddDocument(CreateDocument("Document 3", 1192101133000L)); // Add the fourth document. text = "Document 4" dateTime = Oct 11 08:02:09 EDT 2007 writer.AddDocument(CreateDocument("Document 4", 1192104129000L)); // latest doc: // Add the fifth document. text = "Document 5" dateTime = Oct 12 13:25:43 EDT 2007 writer.AddDocument(CreateDocument("Document 5", 1192209943000L)); Reader = writer.Reader; writer.Dispose(); }
public void BeforeClass() { string[] data = new string[] { "A 1 2 3 4 5 6", "Z 4 5 6", null, "B 2 4 5 6", "Y 3 5 6", null, "C 3 6", "X 4 5 6" }; Small = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Small, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false)).SetMergePolicy(NewLogMergePolicy())); FieldType customType = new FieldType(TextField.TYPE_STORED); customType.Tokenized = false; for (int i = 0; i < data.Length; i++) { Document doc = new Document(); doc.Add(NewField("id", Convert.ToString(i), customType)); // Field.Keyword("id",String.valueOf(i))); doc.Add(NewField("all", "all", customType)); // Field.Keyword("all","all")); if (null != data[i]) { doc.Add(NewTextField("data", data[i], Field.Store.YES)); // Field.Text("data",data[i])); } writer.AddDocument(doc); } Reader = writer.Reader; writer.Dispose(); }
public void BeforeClass() { string[] data = new string[] { "A 1 2 3 4 5 6", "Z 4 5 6", null, "B 2 4 5 6", "Y 3 5 6", null, "C 3 6", "X 4 5 6" }; Index = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), Index, Similarity, TimeZone); for (int i = 0; i < data.Length; i++) { Document doc = new Document(); doc.Add(NewStringField("id", Convert.ToString(i), Field.Store.YES)); //Field.Keyword("id",String.valueOf(i))); doc.Add(NewStringField("all", "all", Field.Store.YES)); //Field.Keyword("all","all")); if (null != data[i]) { doc.Add(NewTextField("data", data[i], Field.Store.YES)); //Field.Text("data",data[i])); } w.AddDocument(doc); } r = w.Reader; s = NewSearcher(r); w.Dispose(); //System.out.println("Set up " + getName()); }
public override void SetNextReader(IndexReader reader, int docBase) { currentReaderValues = FieldCache_Fields.DEFAULT.GetLongs(reader, field); }
public ShardIndexSearcher(ShardSearchingTestBase.NodeState nodeState, long[] nodeVersions, IndexReader localReader, int nodeID) : base(localReader) { this.outerInstance = nodeState; this.nodeVersions = nodeVersions; MyNodeID = nodeID; Debug.Assert(MyNodeID == nodeState.MyNodeID, "myNodeID=" + nodeID + " NodeState.this.myNodeID=" + nodeState.MyNodeID); }
public void BeforeClass() { // NOTE: turn off compound file, this test will open some index files directly. OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; IndexWriterConfig config = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false)).SetUseCompoundFile(false); TermIndexInterval = config.TermIndexInterval; IndexDivisor = TestUtil.NextInt(Random(), 1, 10); NUMBER_OF_DOCUMENTS = AtLeast(100); NUMBER_OF_FIELDS = AtLeast(Math.Max(10, 3 * TermIndexInterval * IndexDivisor / NUMBER_OF_DOCUMENTS)); Directory = NewDirectory(); config.SetCodec(new PreFlexRWCodec(OLD_FORMAT_IMPERSONATION_IS_ACTIVE)); LogMergePolicy mp = NewLogMergePolicy(); // NOTE: turn off compound file, this test will open some index files directly. mp.NoCFSRatio = 0.0; config.SetMergePolicy(mp); Populate(Directory, config); DirectoryReader r0 = IndexReader.Open(Directory); SegmentReader r = LuceneTestCase.GetOnlySegmentReader(r0); string segment = r.SegmentName; r.Dispose(); FieldInfosReader infosReader = (new PreFlexRWCodec(OLD_FORMAT_IMPERSONATION_IS_ACTIVE)).FieldInfosFormat().FieldInfosReader; FieldInfos fieldInfos = infosReader.Read(Directory, segment, "", IOContext.READONCE); string segmentFileName = IndexFileNames.SegmentFileName(segment, "", Lucene3xPostingsFormat.TERMS_INDEX_EXTENSION); long tiiFileLength = Directory.FileLength(segmentFileName); IndexInput input = Directory.OpenInput(segmentFileName, NewIOContext(Random())); TermEnum = new SegmentTermEnum(Directory.OpenInput(IndexFileNames.SegmentFileName(segment, "", Lucene3xPostingsFormat.TERMS_EXTENSION), NewIOContext(Random())), fieldInfos, false); int totalIndexInterval = TermEnum.IndexInterval * IndexDivisor; SegmentTermEnum indexEnum = new SegmentTermEnum(input, fieldInfos, true); Index = new TermInfosReaderIndex(indexEnum, IndexDivisor, tiiFileLength, totalIndexInterval); indexEnum.Dispose(); input.Dispose(); Reader = IndexReader.Open(Directory); SampleTerms = Sample(Random(), Reader, 1000); }
public LuceneDictionary(IndexReader reader, System.String field) { this.reader = reader; this.field = field; }
public void TestWhichMTQMatched() { Directory dir = NewDirectory(); // use simpleanalyzer for more natural tokenization (else "test." is a token) Analyzer analyzer = new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc); FieldType offsetsType = new FieldType(TextField.TYPE_STORED); offsetsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); Field body = new Field("body", "", offsetsType); Document doc = new Document(); doc.Add(body); body.SetStringValue("Test a one sentence document."); iw.AddDocument(doc); IndexReader ir = iw.Reader; iw.Dispose(); IndexSearcher searcher = NewSearcher(ir); PostingsHighlighter highlighter = new PostingsHighlighterAnalyzerHelper(analyzer); //PostingsHighlighter highlighter = new PostingsHighlighter() { // @Override // protected Analyzer getIndexAnalyzer(String field) //{ // return analyzer; //} // }; BooleanQuery query = new BooleanQuery(); query.Add(new WildcardQuery(new Term("body", "te*")), Occur.SHOULD); query.Add(new WildcardQuery(new Term("body", "one")), Occur.SHOULD); query.Add(new WildcardQuery(new Term("body", "se*")), Occur.SHOULD); TopDocs topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER); assertEquals(1, topDocs.TotalHits); String[] snippets = highlighter.Highlight("body", query, searcher, topDocs); assertEquals(1, snippets.Length); // Default formatter just bolds each hit: assertEquals("<b>Test</b> a <b>one</b> <b>sentence</b> document.", snippets[0]); // Now use our own formatter, that also stuffs the // matching term's text into the result: highlighter = new PostingsHighlighterAnalyzerAndFormatterHelper(analyzer, new PassageFormatterHelper()); //highlighter = new PostingsHighlighter() //{ // @Override // protected Analyzer getIndexAnalyzer(String field) //{ // return analyzer; //} //@Override // protected PassageFormatter getFormatter(String field) //{ // return new PassageFormatter() { // @Override // public Object format(Passage passages[], String content) //{ // // Copied from DefaultPassageFormatter, but // // tweaked to include the matched term: // StringBuilder sb = new StringBuilder(); // int pos = 0; // for (Passage passage : passages) // { // // don't add ellipsis if its the first one, or if its connected. // if (passage.startOffset > pos && pos > 0) // { // sb.append("... "); // } // pos = passage.startOffset; // for (int i = 0; i < passage.numMatches; i++) // { // int start = passage.matchStarts[i]; // int end = passage.matchEnds[i]; // // its possible to have overlapping terms // if (start > pos) // { // sb.append(content, pos, start); // } // if (end > pos) // { // sb.append("<b>"); // sb.append(content, Math.max(pos, start), end); // sb.append('('); // sb.append(passage.getMatchTerms()[i].utf8ToString()); // sb.append(')'); // sb.append("</b>"); // pos = end; // } // } // // its possible a "term" from the analyzer could span a sentence boundary. // sb.append(content, pos, Math.max(pos, passage.endOffset)); // pos = passage.endOffset; // } // return sb.toString(); //} // }; // } // }; assertEquals(1, topDocs.TotalHits); snippets = highlighter.Highlight("body", query, searcher, topDocs); assertEquals(1, snippets.Length); // Default formatter bolds each hit: assertEquals("<b>Test(body:te*)</b> a <b>one(body:one)</b> <b>sentence(body:se*)</b> document.", snippets[0]); ir.Dispose(); dir.Dispose(); }
public SearcherFactoryAnonymousInnerClassHelper3(TestSearcherManager outerInstance, IndexReader other) { this.OuterInstance = outerInstance; this.Other = other; }
private static IndexSearcher GetIndexSearcher(IndexReader reader) { return new IndexSearcher(reader); }
public override void BeforeClass() { base.BeforeClass(); NUM_DOCS = AtLeast(500); NUM_ORDS = AtLeast(2); directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random, directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NewLogMergePolicy())); long theLong = long.MaxValue; double theDouble = double.MaxValue; sbyte theByte = sbyte.MaxValue; short theShort = short.MaxValue; int theInt = int.MaxValue; float theFloat = float.MaxValue; unicodeStrings = new string[NUM_DOCS]; //MultiValued = new BytesRef[NUM_DOCS, NUM_ORDS]; multiValued = RectangularArrays.ReturnRectangularArray <BytesRef>(NUM_DOCS, NUM_ORDS); if (Verbose) { Console.WriteLine("TEST: setUp"); } for (int i = 0; i < NUM_DOCS; i++) { Document doc = new Document(); doc.Add(NewStringField("theLong", (theLong--).ToString(CultureInfo.InvariantCulture), Field.Store.NO)); doc.Add(NewStringField("theDouble", (theDouble--).ToString("R", CultureInfo.InvariantCulture), Field.Store.NO)); doc.Add(NewStringField("theByte", (theByte--).ToString(CultureInfo.InvariantCulture), Field.Store.NO)); doc.Add(NewStringField("theShort", (theShort--).ToString(CultureInfo.InvariantCulture), Field.Store.NO)); doc.Add(NewStringField("theInt", (theInt--).ToString(CultureInfo.InvariantCulture), Field.Store.NO)); doc.Add(NewStringField("theFloat", (theFloat--).ToString("R", CultureInfo.InvariantCulture), Field.Store.NO)); if (i % 2 == 0) { doc.Add(NewStringField("sparse", (i).ToString(CultureInfo.InvariantCulture), Field.Store.NO)); } if (i % 2 == 0) { doc.Add(new Int32Field("numInt", i, Field.Store.NO)); } // sometimes skip the field: if (Random.Next(40) != 17) { unicodeStrings[i] = GenerateString(i); doc.Add(NewStringField("theRandomUnicodeString", unicodeStrings[i], Field.Store.YES)); } // sometimes skip the field: if (Random.Next(10) != 8) { for (int j = 0; j < NUM_ORDS; j++) { string newValue = GenerateString(i); multiValued[i][j] = new BytesRef(newValue); doc.Add(NewStringField("theRandomUnicodeMultiValuedField", newValue, Field.Store.YES)); } Array.Sort(multiValued[i]); } writer.AddDocument(doc); } IndexReader r = writer.GetReader(); reader = SlowCompositeReaderWrapper.Wrap(r); writer.Dispose(); }
private SuggestionQueryResult QueryOverMultipleWords(SuggestionQuery suggestionQuery, IndexReader indexReader, string queryText) { var individualTerms = queryText.Split(new[] { ' ', '\t', '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries); var result = new HashSet <string>(); var maxSuggestions = suggestionQuery.MaxSuggestions; foreach (var term in individualTerms) { if (maxSuggestions <= 0) { break; } foreach (var suggestion in spellChecker.SuggestSimilar(term, suggestionQuery.MaxSuggestions, // we can filter out duplicates, so taking more indexReader, suggestionQuery.Field, suggestionQuery.Popularity)) { if (result.Add(suggestion) == false) { continue; } maxSuggestions--; if (maxSuggestions <= 0) { break; } } } return(new SuggestionQueryResult { Suggestions = result.ToArray() }); }
public virtual void TestWickedLongTerm() { using (RAMDirectory dir = new RAMDirectory()) { char[] chars = new char[IndexWriter.MAX_TERM_LENGTH]; Arrays.Fill(chars, 'x'); string bigTerm = new string(chars); Document doc = new Document(); using (IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new ClassicAnalyzer(TEST_VERSION_CURRENT)))) { // This produces a too-long term: string contents = "abc xyz x" + bigTerm + " another term"; doc.Add(new TextField("content", contents, Field.Store.NO)); writer.AddDocument(doc); // Make sure we can add another normal document doc = new Document(); doc.Add(new TextField("content", "abc bbb ccc", Field.Store.NO)); writer.AddDocument(doc); } #pragma warning disable 612, 618 using (IndexReader reader = IndexReader.Open(dir)) #pragma warning restore 612, 618 { // Make sure all terms < max size were indexed assertEquals(2, reader.DocFreq(new Term("content", "abc"))); assertEquals(1, reader.DocFreq(new Term("content", "bbb"))); assertEquals(1, reader.DocFreq(new Term("content", "term"))); assertEquals(1, reader.DocFreq(new Term("content", "another"))); // Make sure position is still incremented when // massive term is skipped: DocsAndPositionsEnum tps = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), "content", new BytesRef("another")); assertTrue(tps.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertEquals(1, tps.Freq); assertEquals(3, tps.NextPosition()); // Make sure the doc that has the massive term is in // the index: assertEquals("document with wicked long term should is not in the index!", 2, reader.NumDocs); } // Make sure we can add a document with exactly the // maximum length term, and search on that term: doc = new Document(); doc.Add(new TextField("content", bigTerm, Field.Store.NO)); ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT); sa.MaxTokenLength = 100000; using (var writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, sa))) { writer.AddDocument(doc); } #pragma warning disable 612, 618 using (var reader = IndexReader.Open(dir)) #pragma warning restore 612, 618 { assertEquals(1, reader.DocFreq(new Term("content", bigTerm))); } } }
/// <summary> /// Expert: highlights the top-N passages from multiple fields, /// for the provided int[] docids, to custom object as /// returned by the <see cref="PassageFormatter"/>. Use /// this API to render to something other than <see cref="string"/>. /// </summary> /// <param name="fieldsIn">field names to highlight. Must have a stored string value and also be indexed with offsets.</param> /// <param name="query">query to highlight.</param> /// <param name="searcher">searcher that was previously used to execute the query.</param> /// <param name="docidsIn">containing the document IDs to highlight.</param> /// <param name="maxPassagesIn">The maximum number of top-N ranked passages per-field used to form the highlighted snippets.</param> /// <returns> /// <see cref="T:IDictionary{string, object[]}"/> keyed on field name, containing the array of formatted snippets /// corresponding to the documents in <paramref name="docidsIn"/>. /// If no highlights were found for a document, the /// first <paramref name="maxPassagesIn"/> from the field will /// be returned. /// </returns> /// <exception cref="IOException">if an I/O error occurred during processing</exception> /// <exception cref="ArgumentException">if <c>field</c> was indexed without <see cref="IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS"/></exception> protected internal virtual IDictionary <string, object[]> HighlightFieldsAsObjects(string[] fieldsIn, Query query, IndexSearcher searcher, int[] docidsIn, int[] maxPassagesIn) { if (fieldsIn.Length < 1) { throw new ArgumentException("fieldsIn must not be empty"); } if (fieldsIn.Length != maxPassagesIn.Length) { throw new ArgumentException("invalid number of maxPassagesIn"); } IndexReader reader = searcher.IndexReader; Query rewritten = Rewrite(query); JCG.SortedSet <Term> queryTerms = new JCG.SortedSet <Term>(); rewritten.ExtractTerms(queryTerms); IndexReaderContext readerContext = reader.Context; IList <AtomicReaderContext> leaves = readerContext.Leaves; // Make our own copies because we sort in-place: int[] docids = new int[docidsIn.Length]; System.Array.Copy(docidsIn, 0, docids, 0, docidsIn.Length); string[] fields = new string[fieldsIn.Length]; System.Array.Copy(fieldsIn, 0, fields, 0, fieldsIn.Length); int[] maxPassages = new int[maxPassagesIn.Length]; System.Array.Copy(maxPassagesIn, 0, maxPassages, 0, maxPassagesIn.Length); // sort for sequential io ArrayUtil.TimSort(docids); new InPlaceMergeSorterAnonymousHelper(fields, maxPassages).Sort(0, fields.Length); // pull stored data: IList <string[]> contents = LoadFieldValues(searcher, fields, docids, maxLength); IDictionary <string, object[]> highlights = new Dictionary <string, object[]>(); for (int i = 0; i < fields.Length; i++) { string field = fields[i]; int numPassages = maxPassages[i]; Term floor = new Term(field, ""); Term ceiling = new Term(field, UnicodeUtil.BIG_TERM); // LUCENENET: Call custom GetViewBetween overload to mimic Java's exclusive upper bound behavior. var fieldTerms = queryTerms.GetViewBetween(floor, lowerValueInclusive: true, ceiling, upperValueInclusive: false); // TODO: should we have some reasonable defaults for term pruning? (e.g. stopwords) // Strip off the redundant field: BytesRef[] terms = new BytesRef[fieldTerms.Count]; int termUpto = 0; foreach (Term term in fieldTerms) { terms[termUpto++] = term.Bytes; } IDictionary <int, object> fieldHighlights = HighlightField(field, contents[i], GetBreakIterator(field), terms, docids, leaves, numPassages, query); object[] result = new object[docids.Length]; for (int j = 0; j < docidsIn.Length; j++) { fieldHighlights.TryGetValue(docidsIn[j], out result[j]); } highlights[field] = result; } return(highlights); }
public ExplanationAssertingSearcher(IndexReader r) : base(r) { }
public static TermInfo[] GetHighFreqTerms(Directory dir, Hashtable junkWords, int numTerms, String[] fields) { if (dir == null || fields == null) { return(new TermInfo[0]); } IndexReader reader = IndexReader.Open(dir, true); TermInfoQueue tiq = new TermInfoQueue(numTerms); TermEnum terms = reader.Terms(); int minFreq = 0; while (terms.Next()) { String field = terms.Term().Field(); if (fields != null && fields.Length > 0) { bool skip = true; for (int i = 0; i < fields.Length; i++) { if (field.Equals(fields[i])) { skip = false; break; } } if (skip) { continue; } } if (junkWords != null && junkWords[terms.Term().Text()] != null) { continue; } if (terms.DocFreq() > minFreq) { TermInfo top = (TermInfo)tiq.Add(new TermInfo(terms.Term(), terms.DocFreq())); if (tiq.Size() >= numTerms) // if tiq overfull { tiq.Pop(); // remove lowest in tiq minFreq = top.DocFreq; // reset minFreq } } } TermInfo[] res = new TermInfo[tiq.Size()]; for (int i = 0; i < res.Length; i++) { res[res.Length - i - 1] = (TermInfo)tiq.Pop(); } reader.Close(); return(res); }
public virtual void TestPerFieldCodec() { int NUM_DOCS = AtLeast(173); if (Verbose) { Console.WriteLine("TEST: NUM_DOCS=" + NUM_DOCS); } using BaseDirectoryWrapper dir = NewDirectory(); dir.CheckIndexOnDispose = false; // we use a custom codec provider using IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetCodec(new CustomPerFieldCodec()).SetMergePolicy(NewLogMergePolicy(3))); Documents.Document doc = new Documents.Document(); // uses default codec: doc.Add(NewTextField("field1", "this field uses the standard codec as the test", Field.Store.NO)); // uses pulsing codec: Field field2 = NewTextField("field2", "this field uses the pulsing codec as the test", Field.Store.NO); doc.Add(field2); Field idField = NewStringField("id", "", Field.Store.NO); doc.Add(idField); for (int i = 0; i < NUM_DOCS; i++) { idField.SetStringValue("" + i); w.AddDocument(doc); if ((i + 1) % 10 == 0) { w.Commit(); } } if (Verbose) { Console.WriteLine("TEST: now delete id=77"); } w.DeleteDocuments(new Term("id", "77")); using (IndexReader r = DirectoryReader.Open(w, true)) { Assert.AreEqual(NUM_DOCS - 1, r.NumDocs); IndexSearcher s = NewSearcher(r); Assert.AreEqual(NUM_DOCS - 1, s.Search(new TermQuery(new Term("field1", "standard")), 1).TotalHits); Assert.AreEqual(NUM_DOCS - 1, s.Search(new TermQuery(new Term("field2", "pulsing")), 1).TotalHits); } if (Verbose) { Console.WriteLine("\nTEST: now delete 2nd doc"); } w.DeleteDocuments(new Term("id", "44")); if (Verbose) { Console.WriteLine("\nTEST: now force merge"); } w.ForceMerge(1); if (Verbose) { Console.WriteLine("\nTEST: now open reader"); } using (IndexReader r = DirectoryReader.Open(w, true)) { Assert.AreEqual(NUM_DOCS - 2, r.MaxDoc); Assert.AreEqual(NUM_DOCS - 2, r.NumDocs); IndexSearcher s = NewSearcher(r); Assert.AreEqual(NUM_DOCS - 2, s.Search(new TermQuery(new Term("field1", "standard")), 1).TotalHits); Assert.AreEqual(NUM_DOCS - 2, s.Search(new TermQuery(new Term("field2", "pulsing")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("id", "76")), 1).TotalHits); Assert.AreEqual(0, s.Search(new TermQuery(new Term("id", "77")), 1).TotalHits); Assert.AreEqual(0, s.Search(new TermQuery(new Term("id", "44")), 1).TotalHits); if (Verbose) { Console.WriteLine("\nTEST: now close NRT reader"); } } }
public CustomSearcher(TestCustomSearcherSort outerInstance, IndexReader r, int switcher) : base(r) { this.OuterInstance = outerInstance; this.Switcher = switcher; }
private IndexReader GetIndexReader() { return IndexReader.Open(IndexDirectory, true); }
public virtual void TestTransitionAPI() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Documents.Document doc = new Documents.Document(); doc.Add(new Field("stored", "abc", Field.Store.YES, Field.Index.NO)); doc.Add(new Field("stored_indexed", "abc xyz", Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("stored_tokenized", "abc xyz", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("indexed", "abc xyz", Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("tokenized", "abc xyz", Field.Store.NO, Field.Index.ANALYZED)); doc.Add(new Field("tokenized_reader", new StringReader("abc xyz"))); doc.Add(new Field("tokenized_tokenstream", w.w.Analyzer.TokenStream("tokenized_tokenstream", new StringReader("abc xyz")))); doc.Add(new Field("binary", new byte[10])); doc.Add(new Field("tv", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES)); doc.Add(new Field("tv_pos", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS)); doc.Add(new Field("tv_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_OFFSETS)); doc.Add(new Field("tv_pos_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); w.AddDocument(doc); IndexReader r = w.Reader; w.Dispose(); doc = r.Document(0); // 4 stored fields Assert.AreEqual(4, doc.Fields.Count); Assert.AreEqual("abc", doc.Get("stored")); Assert.AreEqual("abc xyz", doc.Get("stored_indexed")); Assert.AreEqual("abc xyz", doc.Get("stored_tokenized")); BytesRef br = doc.GetBinaryValue("binary"); Assert.IsNotNull(br); Assert.AreEqual(10, br.Length); IndexSearcher s = new IndexSearcher(r); Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_indexed", "abc xyz")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_tokenized", "abc")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_tokenized", "xyz")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("indexed", "abc xyz")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized", "abc")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized", "xyz")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_reader", "abc")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_reader", "xyz")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_tokenstream", "abc")), 1).TotalHits); Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_tokenstream", "xyz")), 1).TotalHits); foreach (string field in new string[] { "tv", "tv_pos", "tv_off", "tv_pos_off" }) { Fields tvFields = r.GetTermVectors(0); Terms tvs = tvFields.Terms(field); Assert.IsNotNull(tvs); Assert.AreEqual(2, tvs.Size()); TermsEnum tvsEnum = tvs.Iterator(null); Assert.AreEqual(new BytesRef("abc"), tvsEnum.Next()); DocsAndPositionsEnum dpEnum = tvsEnum.DocsAndPositions(null, null); if (field.Equals("tv")) { Assert.IsNull(dpEnum); } else { Assert.IsNotNull(dpEnum); } Assert.AreEqual(new BytesRef("xyz"), tvsEnum.Next()); Assert.IsNull(tvsEnum.Next()); } r.Dispose(); dir.Dispose(); }
public override IndexSearcher NewSearcher(IndexReader ignored) { return(LuceneTestCase.NewSearcher(Other)); }
public override Query Rewrite(IndexReader reader, MultiTermQuery query) { return(@delegate.Rewrite(reader, query)); }
/// <summary> /// Build the suggest index, using up to the specified /// amount of temporary RAM while building. Note that /// the weights for the suggestions are ignored. /// </summary> public virtual void Build(IInputIterator iterator, double ramBufferSizeMB) { if (iterator.HasPayloads) { throw new ArgumentException("this suggester doesn't support payloads"); } if (iterator.HasContexts) { throw new ArgumentException("this suggester doesn't support contexts"); } string prefix = this.GetType().Name; var directory = OfflineSorter.DefaultTempDir(); // LUCENENET specific - using GetRandomFileName() instead of picking a random int DirectoryInfo tempIndexPath = null; while (true) { tempIndexPath = new DirectoryInfo(Path.Combine(directory.FullName, prefix + ".index." + Path.GetFileNameWithoutExtension(Path.GetRandomFileName()))); tempIndexPath.Create(); if (System.IO.Directory.Exists(tempIndexPath.FullName)) { break; } } Directory dir = FSDirectory.Open(tempIndexPath); try { #pragma warning disable 612, 618 IndexWriterConfig iwc = new IndexWriterConfig(LuceneVersion.LUCENE_CURRENT, indexAnalyzer); #pragma warning restore 612, 618 iwc.SetOpenMode(OpenMode.CREATE); iwc.SetRAMBufferSizeMB(ramBufferSizeMB); IndexWriter writer = new IndexWriter(dir, iwc); var ft = new FieldType(TextField.TYPE_NOT_STORED); // TODO: if only we had IndexOptions.TERMS_ONLY... ft.IndexOptions = IndexOptions.DOCS_AND_FREQS; ft.OmitNorms = true; ft.Freeze(); Document doc = new Document(); Field field = new Field("body", "", ft); doc.Add(field); totTokens = 0; IndexReader reader = null; bool success = false; count = 0; try { while (true) { BytesRef surfaceForm = iterator.Next(); if (surfaceForm == null) { break; } field.SetStringValue(surfaceForm.Utf8ToString()); writer.AddDocument(doc); count++; } reader = DirectoryReader.Open(writer, false); Terms terms = MultiFields.GetTerms(reader, "body"); if (terms == null) { throw new ArgumentException("need at least one suggestion"); } // Move all ngrams into an FST: TermsEnum termsEnum = terms.GetIterator(null); Outputs <long?> outputs = PositiveInt32Outputs.Singleton; Builder <long?> builder = new Builder <long?>(FST.INPUT_TYPE.BYTE1, outputs); Int32sRef scratchInts = new Int32sRef(); while (true) { BytesRef term = termsEnum.Next(); if (term == null) { break; } int ngramCount = CountGrams(term); if (ngramCount > grams) { throw new ArgumentException("tokens must not contain separator byte; got token=" + term + " but gramCount=" + ngramCount + ", which is greater than expected max ngram size=" + grams); } if (ngramCount == 1) { totTokens += termsEnum.TotalTermFreq; } builder.Add(Lucene.Net.Util.Fst.Util.ToInt32sRef(term, scratchInts), EncodeWeight(termsEnum.TotalTermFreq)); } fst = builder.Finish(); if (fst == null) { throw new ArgumentException("need at least one suggestion"); } //System.out.println("FST: " + fst.getNodeCount() + " nodes"); /* * PrintWriter pw = new PrintWriter("/x/tmp/out.dot"); * Util.toDot(fst, pw, true, true); * pw.close(); */ success = true; } finally { if (success) { IOUtils.Dispose(writer, reader); } else { IOUtils.DisposeWhileHandlingException(writer, reader); } } } finally { try { IOUtils.Dispose(dir); } finally { // LUCENENET specific - since we are removing the entire directory anyway, // it doesn't make sense to first do a loop in order remove the files. // Let the System.IO.Directory.Delete() method handle that. // We also need to dispose the Directory instance first before deleting from disk. try { System.IO.Directory.Delete(tempIndexPath.FullName, true); } catch (Exception e) { throw new InvalidOperationException("failed to remove " + tempIndexPath, e); } } } }
/// <summary> /// Constructor requiring an <see cref="IndexReader"/>. /// </summary> public MoreLikeThis(IndexReader ir) : this(ir, new DefaultSimilarity()) { }
private void ExecuteRandomJoin(bool multipleValuesPerDocument, int maxIndexIter, int maxSearchIter, int numberOfDocumentsToIndex) { for (int indexIter = 1; indexIter <= maxIndexIter; indexIter++) { if (VERBOSE) { Console.WriteLine("indexIter=" + indexIter); } Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false)) .SetMergePolicy(NewLogMergePolicy())); bool scoreDocsInOrder = TestJoinUtil.Random().NextBoolean(); IndexIterationContext context = CreateContext(numberOfDocumentsToIndex, w, multipleValuesPerDocument, scoreDocsInOrder); IndexReader topLevelReader = w.Reader; w.Dispose(); for (int searchIter = 1; searchIter <= maxSearchIter; searchIter++) { if (VERBOSE) { Console.WriteLine("searchIter=" + searchIter); } IndexSearcher indexSearcher = NewSearcher(topLevelReader); int r = Random().Next(context.RandomUniqueValues.Length); bool from = context.RandomFrom[r]; string randomValue = context.RandomUniqueValues[r]; FixedBitSet expectedResult = CreateExpectedResult(randomValue, from, indexSearcher.IndexReader, context); Query actualQuery = new TermQuery(new Term("value", randomValue)); if (VERBOSE) { Console.WriteLine("actualQuery=" + actualQuery); } var scoreModeLength = Enum.GetNames(typeof(ScoreMode)).Length; ScoreMode scoreMode = (ScoreMode)Random().Next(scoreModeLength); if (VERBOSE) { Console.WriteLine("scoreMode=" + scoreMode); } Query joinQuery; if (from) { joinQuery = JoinUtil.CreateJoinQuery("from", multipleValuesPerDocument, "to", actualQuery, indexSearcher, scoreMode); } else { joinQuery = JoinUtil.CreateJoinQuery("to", multipleValuesPerDocument, "from", actualQuery, indexSearcher, scoreMode); } if (VERBOSE) { Console.WriteLine("joinQuery=" + joinQuery); } // Need to know all documents that have matches. TopDocs doesn't give me that and then I'd be also testing TopDocsCollector... FixedBitSet actualResult = new FixedBitSet(indexSearcher.IndexReader.MaxDoc); TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.Create(10, false); indexSearcher.Search(joinQuery, new CollectorAnonymousInnerClassHelper2(this, scoreDocsInOrder, context, actualResult, topScoreDocCollector)); // Asserting bit set... if (VERBOSE) { Console.WriteLine("expected cardinality:" + expectedResult.Cardinality()); DocIdSetIterator iterator = expectedResult.GetIterator(); for (int doc = iterator.NextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.NextDoc()) { Console.WriteLine(string.Format("Expected doc[{0}] with id value {1}", doc, indexSearcher.Doc(doc).Get("id"))); } Console.WriteLine("actual cardinality:" + actualResult.Cardinality()); iterator = actualResult.GetIterator(); for (int doc = iterator.NextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.NextDoc()) { Console.WriteLine(string.Format("Actual doc[{0}] with id value {1}", doc, indexSearcher.Doc(doc).Get("id"))); } } assertEquals(expectedResult, actualResult); // Asserting TopDocs... TopDocs expectedTopDocs = CreateExpectedTopDocs(randomValue, from, scoreMode, context); TopDocs actualTopDocs = topScoreDocCollector.GetTopDocs(); assertEquals(expectedTopDocs.TotalHits, actualTopDocs.TotalHits); assertEquals(expectedTopDocs.ScoreDocs.Length, actualTopDocs.ScoreDocs.Length); if (scoreMode == ScoreMode.None) { continue; } assertEquals(expectedTopDocs.MaxScore, actualTopDocs.MaxScore, 0.0f); for (int i = 0; i < expectedTopDocs.ScoreDocs.Length; i++) { if (VERBOSE) { string.Format("Expected doc: {0} | Actual doc: {1}\n", expectedTopDocs.ScoreDocs[i].Doc, actualTopDocs.ScoreDocs[i].Doc); string.Format("Expected score: {0} | Actual score: {1}\n", expectedTopDocs.ScoreDocs[i].Score, actualTopDocs.ScoreDocs[i].Score); } assertEquals(expectedTopDocs.ScoreDocs[i].Doc, actualTopDocs.ScoreDocs[i].Doc); assertEquals(expectedTopDocs.ScoreDocs[i].Score, actualTopDocs.ScoreDocs[i].Score, 0.0f); Explanation explanation = indexSearcher.Explain(joinQuery, expectedTopDocs.ScoreDocs[i].Doc); assertEquals(expectedTopDocs.ScoreDocs[i].Score, explanation.Value, 0.0f); } } topLevelReader.Dispose(); dir.Dispose(); } }
public void Init(IndexReader reader) { spellChecker.IndexDictionary(new LuceneDictionary(reader, field), workContext.CancellationToken); }
public void Search(Analyzer analyzer, string keyword) { if (string.IsNullOrEmpty(keyword)) { throw new ArgumentException("content"); } // 计时 Stopwatch watch = new Stopwatch(); watch.Start(); // 设置 int numHits = 10; bool docsScoredInOrder = true; bool isReadOnly = true; // 创建Searcher FSDirectory fsDir = new SimpleFSDirectory(new DirectoryInfo(_indexerFolder)); IndexSearcher indexSearcher = new IndexSearcher(IndexReader.Open(fsDir, isReadOnly)); TopScoreDocCollector collector = TopScoreDocCollector.Create(numHits, docsScoredInOrder); QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, Config.Field_Content, analyzer); Query query = parser.Parse(keyword); indexSearcher.Search(query, collector); //Console.WriteLine(collector.TotalHits); var result = collector.TopDocs().ScoreDocs; watch.Stop(); Console.WriteLine("总共耗时{0}毫秒", watch.ElapsedMilliseconds); Console.WriteLine("总共找到{0}个文件", result.Count()); foreach (var docs in result) { Document doc = indexSearcher.Doc(docs.Doc); Console.WriteLine("得分:{0},文件名:{1}", docs.Score, doc.Get(Config.Field_Name)); } indexSearcher.Dispose(); //BooleanQuery booleanQuery = new BooleanQuery(); //QueryParser parser1 = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, Config.Field_Path, analyzer); //Query query1 = parser1.Parse(path); //parser1.DefaultOperator = QueryParser.Operator.AND; //booleanQuery.Add(query1, Occur.MUST); ////QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, Config.Field_Content, analyzer); ////Query query = parser.Parse(content); ////parser.DefaultOperator = QueryParser.Operator.AND; ////booleanQuery.Add(query, Occur.MUST); ////var queryParserFilePath = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, new string[] { Config.Field_FilePath }, analyzer); ////query.Add(queryParserFilePath.Parse(path), Occur.SHOULD); ////var queryParserContent = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, new string[] { Config.Field_Content}, analyzer); ////query.Add(queryParserContent.Parse(content), Occur.MUST); //FSDirectory fsDir = new SimpleFSDirectory(new DirectoryInfo(_indexerFolder)); //bool isReadOnly = true; //IndexSearcher indexSearcher = new IndexSearcher(IndexReader.Open(fsDir, isReadOnly)); ////TopDocs result = indexSearcher.Search(booleanQuery, 10); //Sort sort = new Sort(new SortField(Config.Field_Path, SortField.STRING, true)); //var result = indexSearcher.Search(booleanQuery, (Filter)null, 10 * 1, sort); }
public override void SetUp() { base.SetUp(); Dir = NewDirectory(); FieldName = Random().NextBoolean() ? "field" : ""; // sometimes use an empty string as field name RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false)).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000))); Document doc = new Document(); Field field = NewStringField(FieldName, "", Field.Store.NO); doc.Add(field); List<string> terms = new List<string>(); int num = AtLeast(200); for (int i = 0; i < num; i++) { string s = TestUtil.RandomUnicodeString(Random()); field.StringValue = s; terms.Add(s); writer.AddDocument(doc); } if (VERBOSE) { // utf16 order terms.Sort(); Console.WriteLine("UTF16 order:"); foreach (string s in terms) { Console.WriteLine(" " + UnicodeUtil.ToHexString(s)); } } Reader = writer.Reader; Searcher1 = NewSearcher(Reader); Searcher2 = NewSearcher(Reader); writer.Dispose(); }
public override IndexSearcher NewSearcher(IndexReader r) { return(new IndexSearcher(r)); }
public void TestWildcardInBoolean() { Directory dir = NewDirectory(); // use simpleanalyzer for more natural tokenization (else "test." is a token) Analyzer analyzer = new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc); FieldType offsetsType = new FieldType(TextField.TYPE_STORED); offsetsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); Field body = new Field("body", "", offsetsType); Document doc = new Document(); doc.Add(body); body.SetStringValue("This is a test."); iw.AddDocument(doc); body.SetStringValue("Test a one sentence document."); iw.AddDocument(doc); IndexReader ir = iw.Reader; iw.Dispose(); IndexSearcher searcher = NewSearcher(ir); PostingsHighlighter highlighter = new PostingsHighlighterAnalyzerHelper(analyzer); //PostingsHighlighter highlighter = new PostingsHighlighter() { // @Override // protected Analyzer getIndexAnalyzer(String field) //{ // return analyzer; //} // }; BooleanQuery query = new BooleanQuery(); query.Add(new WildcardQuery(new Term("body", "te*")), Occur.SHOULD); TopDocs topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER); assertEquals(2, topDocs.TotalHits); String[] snippets = highlighter.Highlight("body", query, searcher, topDocs); assertEquals(2, snippets.Length); assertEquals("This is a <b>test</b>.", snippets[0]); assertEquals("<b>Test</b> a one sentence document.", snippets[1]); // must not query = new BooleanQuery(); query.Add(new MatchAllDocsQuery(), Occur.SHOULD); query.Add(new WildcardQuery(new Term("bogus", "te*")), Occur.MUST_NOT); topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER); assertEquals(2, topDocs.TotalHits); snippets = highlighter.Highlight("body", query, searcher, topDocs); assertEquals(2, snippets.Length); assertEquals("This is a test.", snippets[0]); assertEquals("Test a one sentence document.", snippets[1]); ir.Dispose(); dir.Dispose(); }
public static void AfterClass() { Reader.Dispose(); MultiReader.Dispose(); MultiReaderDupls.Dispose(); Dir.Dispose(); Sdir1.Dispose(); Sdir2.Dispose(); Reader = MultiReader = MultiReaderDupls = null; Searcher = MultiSearcher = MultiSearcherDupls = null; Dir = Sdir1 = Sdir2 = null; }
public virtual void TestRangeQueryId() { // NOTE: uses index build in *super* setUp IndexReader reader = SignedIndexReader; IndexSearcher search = NewSearcher(reader); if (VERBOSE) { Console.WriteLine("TEST: reader=" + reader); } int medId = ((MaxId - MinId) / 2); string minIP = Pad(MinId); string maxIP = Pad(MaxId); string medIP = Pad(medId); int numDocs = reader.NumDocs; AssertEquals("num of docs", numDocs, 1 + MaxId - MinId); ScoreDoc[] result; // test id, bounded on both ends result = search.Search(Csrq("id", minIP, maxIP, T, T), null, numDocs).ScoreDocs; AssertEquals("find all", numDocs, result.Length); result = search.Search(Csrq("id", minIP, maxIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs; AssertEquals("find all", numDocs, result.Length); result = search.Search(Csrq("id", minIP, maxIP, T, F), null, numDocs).ScoreDocs; AssertEquals("all but last", numDocs - 1, result.Length); result = search.Search(Csrq("id", minIP, maxIP, T, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs; AssertEquals("all but last", numDocs - 1, result.Length); result = search.Search(Csrq("id", minIP, maxIP, F, T), null, numDocs).ScoreDocs; AssertEquals("all but first", numDocs - 1, result.Length); result = search.Search(Csrq("id", minIP, maxIP, F, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs; AssertEquals("all but first", numDocs - 1, result.Length); result = search.Search(Csrq("id", minIP, maxIP, F, F), null, numDocs).ScoreDocs; AssertEquals("all but ends", numDocs - 2, result.Length); result = search.Search(Csrq("id", minIP, maxIP, F, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs; AssertEquals("all but ends", numDocs - 2, result.Length); result = search.Search(Csrq("id", medIP, maxIP, T, T), null, numDocs).ScoreDocs; AssertEquals("med and up", 1 + MaxId - medId, result.Length); result = search.Search(Csrq("id", medIP, maxIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs; AssertEquals("med and up", 1 + MaxId - medId, result.Length); result = search.Search(Csrq("id", minIP, medIP, T, T), null, numDocs).ScoreDocs; AssertEquals("up to med", 1 + medId - MinId, result.Length); result = search.Search(Csrq("id", minIP, medIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs; AssertEquals("up to med", 1 + medId - MinId, result.Length); // unbounded id result = search.Search(Csrq("id", minIP, null, T, F), null, numDocs).ScoreDocs; AssertEquals("min and up", numDocs, result.Length); result = search.Search(Csrq("id", null, maxIP, F, T), null, numDocs).ScoreDocs; AssertEquals("max and down", numDocs, result.Length); result = search.Search(Csrq("id", minIP, null, F, F), null, numDocs).ScoreDocs; AssertEquals("not min, but up", numDocs - 1, result.Length); result = search.Search(Csrq("id", null, maxIP, F, F), null, numDocs).ScoreDocs; AssertEquals("not max, but down", numDocs - 1, result.Length); result = search.Search(Csrq("id", medIP, maxIP, T, F), null, numDocs).ScoreDocs; AssertEquals("med and up, not max", MaxId - medId, result.Length); result = search.Search(Csrq("id", minIP, medIP, F, T), null, numDocs).ScoreDocs; AssertEquals("not min, up to med", medId - MinId, result.Length); // very small sets result = search.Search(Csrq("id", minIP, minIP, F, F), null, numDocs).ScoreDocs; AssertEquals("min,min,F,F", 0, result.Length); result = search.Search(Csrq("id", minIP, minIP, F, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs; AssertEquals("min,min,F,F", 0, result.Length); result = search.Search(Csrq("id", medIP, medIP, F, F), null, numDocs).ScoreDocs; AssertEquals("med,med,F,F", 0, result.Length); result = search.Search(Csrq("id", medIP, medIP, F, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs; AssertEquals("med,med,F,F", 0, result.Length); result = search.Search(Csrq("id", maxIP, maxIP, F, F), null, numDocs).ScoreDocs; AssertEquals("max,max,F,F", 0, result.Length); result = search.Search(Csrq("id", maxIP, maxIP, F, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs; AssertEquals("max,max,F,F", 0, result.Length); result = search.Search(Csrq("id", minIP, minIP, T, T), null, numDocs).ScoreDocs; AssertEquals("min,min,T,T", 1, result.Length); result = search.Search(Csrq("id", minIP, minIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs; AssertEquals("min,min,T,T", 1, result.Length); result = search.Search(Csrq("id", null, minIP, F, T), null, numDocs).ScoreDocs; AssertEquals("nul,min,F,T", 1, result.Length); result = search.Search(Csrq("id", null, minIP, F, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs; AssertEquals("nul,min,F,T", 1, result.Length); result = search.Search(Csrq("id", maxIP, maxIP, T, T), null, numDocs).ScoreDocs; AssertEquals("max,max,T,T", 1, result.Length); result = search.Search(Csrq("id", maxIP, maxIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs; AssertEquals("max,max,T,T", 1, result.Length); result = search.Search(Csrq("id", maxIP, null, T, F), null, numDocs).ScoreDocs; AssertEquals("max,nul,T,T", 1, result.Length); result = search.Search(Csrq("id", maxIP, null, T, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs; AssertEquals("max,nul,T,T", 1, result.Length); result = search.Search(Csrq("id", medIP, medIP, T, T), null, numDocs).ScoreDocs; AssertEquals("med,med,T,T", 1, result.Length); result = search.Search(Csrq("id", medIP, medIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs; AssertEquals("med,med,T,T", 1, result.Length); }
public virtual void TestRangeQueryRand() { // NOTE: uses index build in *super* setUp IndexReader reader = SignedIndexReader; IndexSearcher search = NewSearcher(reader); string minRP = Pad(SignedIndexDir.MinR); string maxRP = Pad(SignedIndexDir.MaxR); int numDocs = reader.NumDocs; AssertEquals("num of docs", numDocs, 1 + MaxId - MinId); ScoreDoc[] result; // test extremes, bounded on both ends result = search.Search(Csrq("rand", minRP, maxRP, T, T), null, numDocs).ScoreDocs; AssertEquals("find all", numDocs, result.Length); result = search.Search(Csrq("rand", minRP, maxRP, T, F), null, numDocs).ScoreDocs; AssertEquals("all but biggest", numDocs - 1, result.Length); result = search.Search(Csrq("rand", minRP, maxRP, F, T), null, numDocs).ScoreDocs; AssertEquals("all but smallest", numDocs - 1, result.Length); result = search.Search(Csrq("rand", minRP, maxRP, F, F), null, numDocs).ScoreDocs; AssertEquals("all but extremes", numDocs - 2, result.Length); // unbounded result = search.Search(Csrq("rand", minRP, null, T, F), null, numDocs).ScoreDocs; AssertEquals("smallest and up", numDocs, result.Length); result = search.Search(Csrq("rand", null, maxRP, F, T), null, numDocs).ScoreDocs; AssertEquals("biggest and down", numDocs, result.Length); result = search.Search(Csrq("rand", minRP, null, F, F), null, numDocs).ScoreDocs; AssertEquals("not smallest, but up", numDocs - 1, result.Length); result = search.Search(Csrq("rand", null, maxRP, F, F), null, numDocs).ScoreDocs; AssertEquals("not biggest, but down", numDocs - 1, result.Length); // very small sets result = search.Search(Csrq("rand", minRP, minRP, F, F), null, numDocs).ScoreDocs; AssertEquals("min,min,F,F", 0, result.Length); result = search.Search(Csrq("rand", maxRP, maxRP, F, F), null, numDocs).ScoreDocs; AssertEquals("max,max,F,F", 0, result.Length); result = search.Search(Csrq("rand", minRP, minRP, T, T), null, numDocs).ScoreDocs; AssertEquals("min,min,T,T", 1, result.Length); result = search.Search(Csrq("rand", null, minRP, F, T), null, numDocs).ScoreDocs; AssertEquals("nul,min,F,T", 1, result.Length); result = search.Search(Csrq("rand", maxRP, maxRP, T, T), null, numDocs).ScoreDocs; AssertEquals("max,max,T,T", 1, result.Length); result = search.Search(Csrq("rand", maxRP, null, T, F), null, numDocs).ScoreDocs; AssertEquals("max,nul,T,T", 1, result.Length); }
/// <param name="query"> /// a Lucene query (ideally rewritten using <see cref="Query.Rewrite(IndexReader)"/> before /// being passed to this class and the searcher) /// </param> /// <param name="reader"> /// used to compute IDF which can be used to a) score selected /// fragments better b) use graded highlights eg set font color /// intensity /// </param> /// <param name="fieldName"> /// the field on which Inverse Document Frequency (IDF) /// calculations are based /// </param> public QueryTermScorer(Query query, IndexReader reader, string fieldName) : this(QueryTermExtractor.GetIdfWeightedTerms(query, reader, fieldName)) { }
private void CreateRandomIndexes() { dir1 = NewDirectory(); dir2 = NewDirectory(); int numDocs = AtLeast(150); int numTerms = TestUtil.NextInt(Random(), 1, numDocs / 5); ISet <string> randomTerms = new HashSet <string>(); while (randomTerms.size() < numTerms) { randomTerms.add(TestUtil.RandomSimpleString(Random())); } terms = new List <string>(randomTerms); long seed = Random().NextLong(); IndexWriterConfig iwc1 = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random((int)seed))); IndexWriterConfig iwc2 = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random((int)seed))); iwc2.SetMergePolicy(NewSortingMergePolicy(sort)); RandomIndexWriter iw1 = new RandomIndexWriter(new Random((int)seed), dir1, iwc1); RandomIndexWriter iw2 = new RandomIndexWriter(new Random((int)seed), dir2, iwc2); for (int i = 0; i < numDocs; ++i) { if (Random().nextInt(5) == 0 && i != numDocs - 1) { string term = RandomInts.RandomFrom(Random(), terms); iw1.DeleteDocuments(new Term("s", term)); iw2.DeleteDocuments(new Term("s", term)); } Document doc = randomDocument(); iw1.AddDocument(doc); iw2.AddDocument(doc); if (Random().nextInt(8) == 0) { iw1.Commit(); iw2.Commit(); } } // Make sure we have something to merge iw1.Commit(); iw2.Commit(); Document doc2 = randomDocument(); // NOTE: don't use RIW.addDocument directly, since it sometimes commits // which may trigger a merge, at which case forceMerge may not do anything. // With field updates this is a problem, since the updates can go into the // single segment in the index, and threefore the index won't be sorted. // This hurts the assumption of the test later on, that the index is sorted // by SortingMP. iw1.w.AddDocument(doc2); iw2.w.AddDocument(doc2); if (DefaultCodecSupportsFieldUpdates()) { // update NDV of docs belonging to one term (covers many documents) long value = Random().NextLong(); string term = RandomInts.RandomFrom(Random(), terms); iw1.w.UpdateNumericDocValue(new Term("s", term), "ndv", value); iw2.w.UpdateNumericDocValue(new Term("s", term), "ndv", value); } iw1.ForceMerge(1); iw2.ForceMerge(1); iw1.Dispose(); iw2.Dispose(); reader = DirectoryReader.Open(dir1); sortedReader = DirectoryReader.Open(dir2); }
public override void SetNextReader(IndexReader reader, int docBase) { _docBase = docBase; }